3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
11 * An implementation of the Slab Allocator as described in outline in;
12 * UNIX Internals: The New Frontiers by Uresh Vahalia
13 * Pub: Prentice Hall ISBN 0-13-101908-2
14 * or with a little more detail in;
15 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
16 * Jeff Bonwick (Sun Microsystems).
17 * Presented at: USENIX Summer 1994 Technical Conference
20 * The memory is organized in caches, one cache for each object type.
21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22 * Each cache consists out of many slabs (they are small (usually one
23 * page long) and always contiguous), and each slab contains multiple
24 * initialized objects.
26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
27 * normal). If you need a special memory type, then must create a new
28 * cache for that memory type.
30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
31 * full slabs with 0 free objects
33 * empty slabs with no allocated objects
35 * If partial slabs exist, then new allocations come from these slabs,
36 * otherwise from empty slabs or new slabs are allocated.
38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
41 * On SMP systems, each cache has a short per-cpu head array, most allocs
42 * and frees go into that array, and if that array overflows, then 1/2
43 * of the entries in the array are given back into the global cache.
44 * This reduces the number of spinlock operations.
46 * The c_cpuarray can be changed with a smp_call_function call,
47 * it may not be read with enabled local interrupts.
49 * SMP synchronization:
50 * constructors and destructors are called without any locking.
51 * Several members in kmem_cache_t and slab_t never change, they
52 * are accessed without any locking.
53 * The per-cpu arrays are never accessed from the wrong cpu, no locking.
54 * smp_call_function() is used if one cpu must flush the arrays from
56 * The non-constant members are protected with a per-cache irq spinlock.
58 * Further notes from the original documentation:
60 * 11 April '97. Started multi-threading - markhe
61 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
62 * The sem is only needed when accessing/extending the cache-chain, which
63 * can never happen inside an interrupt (kmem_cache_create(),
64 * kmem_cache_shrink() and kmem_cache_reap()).
66 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
67 * maybe be sleeping and therefore not holding the semaphore/lock), the
68 * growing field is used. This also prevents reaping from a cache.
70 * At present, each engine can be growing a cache. This should be blocked.
74 #include <linux/config.h>
75 #include <linux/slab.h>
76 #include <linux/interrupt.h>
77 #include <linux/init.h>
78 #include <asm/uaccess.h>
81 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
82 * SLAB_RED_ZONE & SLAB_POISON.
83 * 0 for faster, smaller code (especially in the critical paths).
85 * STATS - 1 to collect stats for /proc/slabinfo.
86 * 0 for faster, smaller code (especially in the critical paths).
88 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
93 #define FORCED_DEBUG 0
96 * Parameters for kmem_cache_reap
98 #define REAP_SCANLEN 10
99 #define REAP_PERFECT 10
101 /* Shouldn't this be in a header file somewhere? */
102 #define BYTES_PER_WORD sizeof(void *)
104 /* Legal flag mask for kmem_cache_create(). */
106 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
107 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
108 SLAB_NO_REAP | SLAB_CACHE_DMA)
110 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
116 * Bufctl's are used for linking objs within a slab
119 * This implementaion relies on "struct page" for locating the cache &
120 * slab an object belongs to.
121 * This allows the bufctl structure to be small (one int), but limits
122 * the number of objects a slab (not a cache) can contain when off-slab
123 * bufctls are used. The limit is the size of the largest general cache
124 * that does not use off-slab slabs.
125 * For 32bit archs with 4 kB pages, is this 56.
126 * This is not serious, as it is only for large objects, when it is unwise
127 * to have too many per slab.
128 * Note: This limit can be raised by introducing a general cache whose size
129 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
132 #define BUFCTL_END 0xffffFFFF
133 #define SLAB_LIMIT 0xffffFFFE
134 typedef unsigned int kmem_bufctl_t
;
136 /* Max number of objs-per-slab for caches which use off-slab slabs.
137 * Needed to avoid a possible looping condition in kmem_cache_grow().
139 static unsigned long offslab_limit
;
144 * Manages the objs in a slab. Placed either at the beginning of mem allocated
145 * for a slab, or allocated from an general cache.
146 * Slabs are chained into one ordered list: fully used, partial, then fully
149 typedef struct slab_s
{
150 struct list_head list
;
151 unsigned long colouroff
;
152 void *s_mem
; /* including colour offset */
153 unsigned int inuse
; /* num of objs active in slab */
157 #define slab_bufctl(slabp) \
158 ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
164 * The limit is stored in the per-cpu structure to reduce the data cache
167 typedef struct cpucache_s
{
172 #define cc_entry(cpucache) \
173 ((void **)(((cpucache_t*)cpucache)+1))
174 #define cc_data(cachep) \
175 ((cachep)->cpudata[smp_processor_id()])
182 #define CACHE_NAMELEN 20 /* max name length for a slab cache */
184 struct kmem_cache_s
{
185 /* 1) each alloc & free */
186 /* full, partial first, then free */
187 struct list_head slabs
;
188 struct list_head
*firstnotfull
;
189 unsigned int objsize
;
190 unsigned int flags
; /* constant flags */
191 unsigned int num
; /* # of objs per slab */
194 unsigned int batchcount
;
197 /* 2) slab additions /removals */
198 /* order of pgs per slab (2^n) */
199 unsigned int gfporder
;
201 /* force GFP flags, e.g. GFP_DMA */
202 unsigned int gfpflags
;
204 size_t colour
; /* cache colouring range */
205 unsigned int colour_off
; /* colour offset */
206 unsigned int colour_next
; /* cache colouring */
207 kmem_cache_t
*slabp_cache
;
208 unsigned int growing
;
209 unsigned int dflags
; /* dynamic flags */
211 /* constructor func */
212 void (*ctor
)(void *, kmem_cache_t
*, unsigned long);
214 /* de-constructor func */
215 void (*dtor
)(void *, kmem_cache_t
*, unsigned long);
217 unsigned long failures
;
219 /* 3) cache creation/removal */
220 char name
[CACHE_NAMELEN
];
221 struct list_head next
;
223 /* 4) per-cpu data */
224 cpucache_t
*cpudata
[NR_CPUS
];
227 unsigned long num_active
;
228 unsigned long num_allocations
;
229 unsigned long high_mark
;
231 unsigned long reaped
;
232 unsigned long errors
;
242 /* internal c_flags */
243 #define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
244 #define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
246 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
247 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
249 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
250 #define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
251 #define GROWN(x) ((x)->dlags & DFLGS_GROWN)
254 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
255 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
256 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
257 #define STATS_INC_GROWN(x) ((x)->grown++)
258 #define STATS_INC_REAPED(x) ((x)->reaped++)
259 #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
260 (x)->high_mark = (x)->num_active; \
262 #define STATS_INC_ERR(x) ((x)->errors++)
264 #define STATS_INC_ACTIVE(x) do { } while (0)
265 #define STATS_DEC_ACTIVE(x) do { } while (0)
266 #define STATS_INC_ALLOCED(x) do { } while (0)
267 #define STATS_INC_GROWN(x) do { } while (0)
268 #define STATS_INC_REAPED(x) do { } while (0)
269 #define STATS_SET_HIGH(x) do { } while (0)
270 #define STATS_INC_ERR(x) do { } while (0)
273 #if STATS && defined(CONFIG_SMP)
274 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
275 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
276 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
277 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
279 #define STATS_INC_ALLOCHIT(x) do { } while (0)
280 #define STATS_INC_ALLOCMISS(x) do { } while (0)
281 #define STATS_INC_FREEHIT(x) do { } while (0)
282 #define STATS_INC_FREEMISS(x) do { } while (0)
286 /* Magic nums for obj red zoning.
287 * Placed in the first word before and the first word after an obj.
289 #define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
290 #define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
292 /* ...and for poisoning */
293 #define POISON_BYTE 0x5a /* byte value for poisoning */
294 #define POISON_END 0xa5 /* end-byte of poisoning */
298 /* maximum size of an obj (in 2^order pages) */
299 #define MAX_OBJ_ORDER 5 /* 32 pages */
302 * Do not go above this order unless 0 objects fit into the slab.
304 #define BREAK_GFP_ORDER_HI 2
305 #define BREAK_GFP_ORDER_LO 1
306 static int slab_break_gfp_order
= BREAK_GFP_ORDER_LO
;
309 * Absolute limit for the gfp order
311 #define MAX_GFP_ORDER 5 /* 32 pages */
314 /* Macros for storing/retrieving the cachep and or slab from the
315 * global 'mem_map'. These are used to find the slab an obj belongs to.
316 * With kfree(), these are used to find the cache which an obj belongs to.
318 #define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
319 #define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
320 #define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
321 #define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
323 /* Size description struct for general caches. */
324 typedef struct cache_sizes
{
326 kmem_cache_t
*cs_cachep
;
327 kmem_cache_t
*cs_dmacachep
;
330 static cache_sizes_t cache_sizes
[] = {
331 #if PAGE_SIZE == 4096
342 { 16384, NULL
, NULL
},
343 { 32768, NULL
, NULL
},
344 { 65536, NULL
, NULL
},
345 {131072, NULL
, NULL
},
349 /* internal cache of cache description objs */
350 static kmem_cache_t cache_cache
= {
351 slabs
: LIST_HEAD_INIT(cache_cache
.slabs
),
352 firstnotfull
: &cache_cache
.slabs
,
353 objsize
: sizeof(kmem_cache_t
),
355 spinlock
: SPIN_LOCK_UNLOCKED
,
356 colour_off
: L1_CACHE_BYTES
,
360 /* Guard access to the cache-chain. */
361 static struct semaphore cache_chain_sem
;
363 /* Place maintainer for reaping. */
364 static kmem_cache_t
*clock_searchp
= &cache_cache
;
366 #define cache_chain (cache_cache.next)
370 * chicken and egg problem: delay the per-cpu array allocation
371 * until the general caches are up.
373 static int g_cpucache_up
;
375 static void drain_cache (void *__cachep
);
376 static void enable_cpucache (kmem_cache_t
*cachep
);
377 static void enable_all_cpucaches (void);
380 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
381 static void kmem_cache_estimate (unsigned long gfporder
, size_t size
,
382 int flags
, size_t *left_over
, unsigned int *num
)
385 size_t wastage
= PAGE_SIZE
<<gfporder
;
389 if (!(flags
& CFLGS_OFF_SLAB
)) {
390 base
= sizeof(slab_t
);
391 extra
= sizeof(kmem_bufctl_t
);
394 while (i
*size
+ L1_CACHE_ALIGN(base
+i
*extra
) <= wastage
)
404 wastage
-= L1_CACHE_ALIGN(base
+i
*extra
);
405 *left_over
= wastage
;
408 /* Initialisation - setup the `cache' cache. */
409 void __init
kmem_cache_init(void)
413 init_MUTEX(&cache_chain_sem
);
414 INIT_LIST_HEAD(&cache_chain
);
416 kmem_cache_estimate(0, cache_cache
.objsize
, 0,
417 &left_over
, &cache_cache
.num
);
418 if (!cache_cache
.num
)
421 cache_cache
.colour
= left_over
/cache_cache
.colour_off
;
422 cache_cache
.colour_next
= 0;
426 /* Initialisation - setup remaining internal and general caches.
427 * Called after the gfp() functions have been enabled, and before smp_init().
429 void __init
kmem_cache_sizes_init(void)
431 cache_sizes_t
*sizes
= cache_sizes
;
434 * Fragmentation resistance on low memory - only use bigger
435 * page orders on machines with more than 32MB of memory.
437 if (num_physpages
> (32 << 20) >> PAGE_SHIFT
)
438 slab_break_gfp_order
= BREAK_GFP_ORDER_HI
;
440 /* For performance, all the general caches are L1 aligned.
441 * This should be particularly beneficial on SMP boxes, as it
442 * eliminates "false sharing".
443 * Note for systems short on memory removing the alignment will
444 * allow tighter packing of the smaller caches. */
445 sprintf(name
,"size-%Zd",sizes
->cs_size
);
446 if (!(sizes
->cs_cachep
=
447 kmem_cache_create(name
, sizes
->cs_size
,
448 0, SLAB_HWCACHE_ALIGN
, NULL
, NULL
))) {
452 /* Inc off-slab bufctl limit until the ceiling is hit. */
453 if (!(OFF_SLAB(sizes
->cs_cachep
))) {
454 offslab_limit
= sizes
->cs_size
-sizeof(slab_t
);
457 sprintf(name
, "size-%Zd(DMA)",sizes
->cs_size
);
458 sizes
->cs_dmacachep
= kmem_cache_create(name
, sizes
->cs_size
, 0,
459 SLAB_CACHE_DMA
|SLAB_HWCACHE_ALIGN
, NULL
, NULL
);
460 if (!sizes
->cs_dmacachep
)
463 } while (sizes
->cs_size
);
466 void __init
kmem_cpucache_init(void)
470 enable_all_cpucaches();
474 /* Interface to system's page allocator. No need to hold the cache-lock.
476 static inline void * kmem_getpages (kmem_cache_t
*cachep
, unsigned long flags
)
481 * If we requested dmaable memory, we will get it. Even if we
482 * did not request dmaable memory, we might get it, but that
483 * would be relatively rare and ignorable.
485 flags
|= cachep
->gfpflags
;
486 addr
= (void*) __get_free_pages(flags
, cachep
->gfporder
);
487 /* Assume that now we have the pages no one else can legally
488 * messes with the 'struct page's.
489 * However vm_scan() might try to test the structure to see if
490 * it is a named-page or buffer-page. The members it tests are
491 * of no interest here.....
496 /* Interface to system's page release. */
497 static inline void kmem_freepages (kmem_cache_t
*cachep
, void *addr
)
499 unsigned long i
= (1<<cachep
->gfporder
);
500 struct page
*page
= mem_map
+ MAP_NR(addr
);
502 /* free_pages() does not clear the type bit - we do that.
503 * The pages have been unlinked from their cache-slab,
504 * but their 'struct page's might be accessed in
505 * vm_scan(). Shouldn't be a worry.
511 free_pages((unsigned long)addr
, cachep
->gfporder
);
515 static inline void kmem_poison_obj (kmem_cache_t
*cachep
, void *addr
)
517 int size
= cachep
->objsize
;
518 if (cachep
->flags
& SLAB_RED_ZONE
) {
519 addr
+= BYTES_PER_WORD
;
520 size
-= 2*BYTES_PER_WORD
;
522 memset(addr
, POISON_BYTE
, size
);
523 *(unsigned char *)(addr
+size
-1) = POISON_END
;
526 static inline int kmem_check_poison_obj (kmem_cache_t
*cachep
, void *addr
)
528 int size
= cachep
->objsize
;
530 if (cachep
->flags
& SLAB_RED_ZONE
) {
531 addr
+= BYTES_PER_WORD
;
532 size
-= 2*BYTES_PER_WORD
;
534 end
= memchr(addr
, POISON_END
, size
);
535 if (end
!= (addr
+size
-1))
541 /* Destroy all the objs in a slab, and release the mem back to the system.
542 * Before calling the slab must have been unlinked from the cache.
543 * The cache-lock is not held/needed.
545 static void kmem_slab_destroy (kmem_cache_t
*cachep
, slab_t
*slabp
)
549 || cachep
->flags
& (SLAB_POISON
| SLAB_RED_ZONE
)
553 for (i
= 0; i
< cachep
->num
; i
++) {
554 void* objp
= slabp
->s_mem
+cachep
->objsize
*i
;
556 if (cachep
->flags
& SLAB_RED_ZONE
) {
557 if (*((unsigned long*)(objp
)) != RED_MAGIC1
)
559 if (*((unsigned long*)(objp
+ cachep
->objsize
560 -BYTES_PER_WORD
)) != RED_MAGIC1
)
562 objp
+= BYTES_PER_WORD
;
566 (cachep
->dtor
)(objp
, cachep
, 0);
568 if (cachep
->flags
& SLAB_RED_ZONE
) {
569 objp
-= BYTES_PER_WORD
;
571 if ((cachep
->flags
& SLAB_POISON
) &&
572 kmem_check_poison_obj(cachep
, objp
))
578 kmem_freepages(cachep
, slabp
->s_mem
-slabp
->colouroff
);
579 if (OFF_SLAB(cachep
))
580 kmem_cache_free(cachep
->slabp_cache
, slabp
);
585 * kmem_cache_create - Create a cache.
586 * @name: A string which is used in /proc/slabinfo to identify this cache.
587 * @size: The size of objects to be created in this cache.
588 * @offset: The offset to use within the page.
590 * @ctor: A constructor for the objects.
591 * @dtor: A destructor for the objects.
593 * Returns a ptr to the cache on success, NULL on failure.
594 * Cannot be called within a int, but can be interrupted.
595 * The @ctor is run when new pages are allocated by the cache
596 * and the @dtor is run before the pages are handed back.
599 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
600 * to catch references to uninitialised memory.
602 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
603 * for buffer overruns.
605 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
608 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
609 * cacheline. This can be beneficial if you're counting cycles as closely
613 kmem_cache_create (const char *name
, size_t size
, size_t offset
,
614 unsigned long flags
, void (*ctor
)(void*, kmem_cache_t
*, unsigned long),
615 void (*dtor
)(void*, kmem_cache_t
*, unsigned long))
617 const char *func_nm
= KERN_ERR
"kmem_create: ";
618 size_t left_over
, align
, slab_size
;
619 kmem_cache_t
*cachep
= NULL
;
622 * Sanity checks... these are all serious usage bugs.
625 ((strlen(name
) >= CACHE_NAMELEN
- 1)) ||
627 (size
< BYTES_PER_WORD
) ||
628 (size
> (1<<MAX_OBJ_ORDER
)*PAGE_SIZE
) ||
630 (offset
< 0 || offset
> size
))
634 if ((flags
& SLAB_DEBUG_INITIAL
) && !ctor
) {
635 /* No constructor, but inital state check requested */
636 printk("%sNo con, but init state check requested - %s\n", func_nm
, name
);
637 flags
&= ~SLAB_DEBUG_INITIAL
;
640 if ((flags
& SLAB_POISON
) && ctor
) {
641 /* request for poisoning, but we can't do that with a constructor */
642 printk("%sPoisoning requested, but con given - %s\n", func_nm
, name
);
643 flags
&= ~SLAB_POISON
;
646 if (size
< (PAGE_SIZE
>>3))
648 * do not red zone large object, causes severe
651 flags
|= SLAB_RED_ZONE
;
653 flags
|= SLAB_POISON
;
658 * Always checks flags, a caller might be expecting debug
659 * support which isn't available.
661 if (flags
& ~CREATE_MASK
)
664 /* Get cache's description obj. */
665 cachep
= (kmem_cache_t
*) kmem_cache_alloc(&cache_cache
, SLAB_KERNEL
);
668 memset(cachep
, 0, sizeof(kmem_cache_t
));
670 /* Check that size is in terms of words. This is needed to avoid
671 * unaligned accesses for some archs when redzoning is used, and makes
672 * sure any on-slab bufctl's are also correctly aligned.
674 if (size
& (BYTES_PER_WORD
-1)) {
675 size
+= (BYTES_PER_WORD
-1);
676 size
&= ~(BYTES_PER_WORD
-1);
677 printk("%sForcing size word alignment - %s\n", func_nm
, name
);
681 if (flags
& SLAB_RED_ZONE
) {
683 * There is no point trying to honour cache alignment
686 flags
&= ~SLAB_HWCACHE_ALIGN
;
687 size
+= 2*BYTES_PER_WORD
; /* words for redzone */
690 align
= BYTES_PER_WORD
;
691 if (flags
& SLAB_HWCACHE_ALIGN
)
692 align
= L1_CACHE_BYTES
;
694 /* Determine if the slab management is 'on' or 'off' slab. */
695 if (size
>= (PAGE_SIZE
>>3))
697 * Size is large, assume best to place the slab management obj
698 * off-slab (should allow better packing of objs).
700 flags
|= CFLGS_OFF_SLAB
;
702 if (flags
& SLAB_HWCACHE_ALIGN
) {
703 /* Need to adjust size so that objs are cache aligned. */
704 /* Small obj size, can get at least two per cache line. */
705 /* FIXME: only power of 2 supported, was better */
706 while (size
< align
/2)
708 size
= (size
+align
-1)&(~(align
-1));
711 /* Cal size (in pages) of slabs, and the num of objs per slab.
712 * This could be made much more intelligent. For now, try to avoid
713 * using high page-orders for slabs. When the gfp() funcs are more
714 * friendly towards high-order requests, this should be changed.
717 unsigned int break_flag
= 0;
719 kmem_cache_estimate(cachep
->gfporder
, size
, flags
,
720 &left_over
, &cachep
->num
);
723 if (cachep
->gfporder
>= MAX_GFP_ORDER
)
727 if (flags
& CFLGS_OFF_SLAB
&& cachep
->num
> offslab_limit
) {
728 /* Oops, this num of objs will cause problems. */
735 * Large num of objs is good, but v. large slabs are currently
736 * bad for the gfp()s.
738 if (cachep
->gfporder
>= slab_break_gfp_order
)
741 if ((left_over
*8) <= (PAGE_SIZE
<<cachep
->gfporder
))
742 break; /* Acceptable internal fragmentation. */
748 printk("kmem_cache_create: couldn't create cache %s.\n", name
);
749 kmem_cache_free(&cache_cache
, cachep
);
753 slab_size
= L1_CACHE_ALIGN(cachep
->num
*sizeof(kmem_bufctl_t
)+sizeof(slab_t
));
756 * If the slab has been placed off-slab, and we have enough space then
757 * move it on-slab. This is at the expense of any extra colouring.
759 if (flags
& CFLGS_OFF_SLAB
&& left_over
>= slab_size
) {
760 flags
&= ~CFLGS_OFF_SLAB
;
761 left_over
-= slab_size
;
764 /* Offset must be a multiple of the alignment. */
766 offset
&= ~(align
-1);
768 offset
= L1_CACHE_BYTES
;
769 cachep
->colour_off
= offset
;
770 cachep
->colour
= left_over
/offset
;
772 /* init remaining fields */
773 if (!cachep
->gfporder
&& !(flags
& CFLGS_OFF_SLAB
))
774 flags
|= CFLGS_OPTIMIZE
;
776 cachep
->flags
= flags
;
777 cachep
->gfpflags
= 0;
778 if (flags
& SLAB_CACHE_DMA
)
779 cachep
->gfpflags
|= GFP_DMA
;
780 spin_lock_init(&cachep
->spinlock
);
781 cachep
->objsize
= size
;
782 INIT_LIST_HEAD(&cachep
->slabs
);
783 cachep
->firstnotfull
= &cachep
->slabs
;
785 if (flags
& CFLGS_OFF_SLAB
)
786 cachep
->slabp_cache
= kmem_find_general_cachep(slab_size
,0);
789 /* Copy name over so we don't have problems with unloaded modules */
790 strcpy(cachep
->name
, name
);
794 enable_cpucache(cachep
);
796 /* Need the semaphore to access the chain. */
797 down(&cache_chain_sem
);
801 list_for_each(p
, &cache_chain
) {
802 kmem_cache_t
*pc
= list_entry(p
, kmem_cache_t
, next
);
804 /* The name field is constant - no lock needed. */
805 if (!strcmp(pc
->name
, name
))
810 /* There is no reason to lock our new cache before we
811 * link it in - no one knows about it yet...
813 list_add(&cachep
->next
, &cache_chain
);
814 up(&cache_chain_sem
);
820 * This check if the kmem_cache_t pointer is chained in the cache_cache
823 static int is_chained_kmem_cache(kmem_cache_t
* cachep
)
828 /* Find the cache in the chain of caches. */
829 down(&cache_chain_sem
);
830 list_for_each(p
, &cache_chain
) {
831 if (p
== &cachep
->next
) {
836 up(&cache_chain_sem
);
841 static int __kmem_cache_shrink(kmem_cache_t
*cachep
)
847 smp_call_function(drain_cache
, cachep
, 1, 1);
852 spin_lock_irq(&cachep
->spinlock
);
854 /* If the cache is growing, stop shrinking. */
855 while (!cachep
->growing
) {
858 p
= cachep
->slabs
.prev
;
859 if (p
== &cachep
->slabs
)
862 slabp
= list_entry(cachep
->slabs
.prev
, slab_t
, list
);
866 list_del(&slabp
->list
);
867 if (cachep
->firstnotfull
== &slabp
->list
)
868 cachep
->firstnotfull
= &cachep
->slabs
;
870 spin_unlock_irq(&cachep
->spinlock
);
871 kmem_slab_destroy(cachep
, slabp
);
872 spin_lock_irq(&cachep
->spinlock
);
874 ret
= !list_empty(&cachep
->slabs
);
875 spin_unlock_irq(&cachep
->spinlock
);
880 * kmem_cache_shrink - Shrink a cache.
881 * @cachep: The cache to shrink.
883 * Releases as many slabs as possible for a cache.
884 * To help debugging, a zero exit status indicates all slabs were released.
886 int kmem_cache_shrink(kmem_cache_t
*cachep
)
888 if (!cachep
|| in_interrupt() || !is_chained_kmem_cache(cachep
))
891 return __kmem_cache_shrink(cachep
);
895 * kmem_cache_destroy - delete a cache
896 * @cachep: the cache to destroy
898 * Remove a kmem_cache_t object from the slab cache.
899 * Returns 0 on success.
901 * It is expected this function will be called by a module when it is
902 * unloaded. This will remove the cache completely, and avoid a duplicate
903 * cache being allocated each time a module is loaded and unloaded, if the
904 * module doesn't have persistent in-kernel storage across loads and unloads.
906 * The caller must guarantee that noone will allocate memory from the cache
907 * during the kmem_cache_destroy().
909 int kmem_cache_destroy (kmem_cache_t
* cachep
)
911 if (!cachep
|| in_interrupt() || cachep
->growing
)
914 /* Find the cache in the chain of caches. */
915 down(&cache_chain_sem
);
916 /* the chain is never empty, cache_cache is never destroyed */
917 if (clock_searchp
== cachep
)
918 clock_searchp
= list_entry(cachep
->next
.next
,
920 list_del(&cachep
->next
);
921 up(&cache_chain_sem
);
923 if (__kmem_cache_shrink(cachep
)) {
924 printk(KERN_ERR
"kmem_cache_destroy: Can't free all objects %p\n",
926 down(&cache_chain_sem
);
927 list_add(&cachep
->next
,&cache_chain
);
928 up(&cache_chain_sem
);
934 for (i
= 0; i
< NR_CPUS
; i
++)
935 kfree(cachep
->cpudata
[i
]);
938 kmem_cache_free(&cache_cache
, cachep
);
943 /* Get the memory for a slab management obj. */
944 static inline slab_t
* kmem_cache_slabmgmt (kmem_cache_t
*cachep
,
945 void *objp
, int colour_off
, int local_flags
)
949 if (OFF_SLAB(cachep
)) {
950 /* Slab management obj is off-slab. */
951 slabp
= kmem_cache_alloc(cachep
->slabp_cache
, local_flags
);
957 * if you enable OPTIMIZE
959 slabp
= objp
+colour_off
;
960 colour_off
+= L1_CACHE_ALIGN(cachep
->num
*
961 sizeof(kmem_bufctl_t
) + sizeof(slab_t
));
964 slabp
->colouroff
= colour_off
;
965 slabp
->s_mem
= objp
+colour_off
;
970 static inline void kmem_cache_init_objs (kmem_cache_t
* cachep
,
971 slab_t
* slabp
, unsigned long ctor_flags
)
975 for (i
= 0; i
< cachep
->num
; i
++) {
976 void* objp
= slabp
->s_mem
+cachep
->objsize
*i
;
978 if (cachep
->flags
& SLAB_RED_ZONE
) {
979 *((unsigned long*)(objp
)) = RED_MAGIC1
;
980 *((unsigned long*)(objp
+ cachep
->objsize
-
981 BYTES_PER_WORD
)) = RED_MAGIC1
;
982 objp
+= BYTES_PER_WORD
;
987 * Constructors are not allowed to allocate memory from
988 * the same cache which they are a constructor for.
989 * Otherwise, deadlock. They must also be threaded.
992 cachep
->ctor(objp
, cachep
, ctor_flags
);
994 if (cachep
->flags
& SLAB_RED_ZONE
)
995 objp
-= BYTES_PER_WORD
;
996 if (cachep
->flags
& SLAB_POISON
)
997 /* need to poison the objs */
998 kmem_poison_obj(cachep
, objp
);
999 if (cachep
->flags
& SLAB_RED_ZONE
) {
1000 if (*((unsigned long*)(objp
)) != RED_MAGIC1
)
1002 if (*((unsigned long*)(objp
+ cachep
->objsize
-
1003 BYTES_PER_WORD
)) != RED_MAGIC1
)
1007 slab_bufctl(slabp
)[i
] = i
+1;
1009 slab_bufctl(slabp
)[i
-1] = BUFCTL_END
;
1014 * Grow (by 1) the number of slabs within a cache. This is called by
1015 * kmem_cache_alloc() when there are no active objs left in a cache.
1017 static int kmem_cache_grow (kmem_cache_t
* cachep
, int flags
)
1023 unsigned int i
, local_flags
;
1024 unsigned long ctor_flags
;
1025 unsigned long save_flags
;
1027 /* Be lazy and only check for valid flags here,
1028 * keeping it out of the critical path in kmem_cache_alloc().
1030 if (flags
& ~(SLAB_DMA
|SLAB_LEVEL_MASK
|SLAB_NO_GROW
))
1032 if (flags
& SLAB_NO_GROW
)
1036 * The test for missing atomic flag is performed here, rather than
1037 * the more obvious place, simply to reduce the critical path length
1038 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1039 * will eventually be caught here (where it matters).
1041 if (in_interrupt() && (flags
& SLAB_LEVEL_MASK
) != SLAB_ATOMIC
)
1044 ctor_flags
= SLAB_CTOR_CONSTRUCTOR
;
1045 local_flags
= (flags
& SLAB_LEVEL_MASK
);
1046 if (local_flags
== SLAB_ATOMIC
)
1048 * Not allowed to sleep. Need to tell a constructor about
1049 * this - it might need to know...
1051 ctor_flags
|= SLAB_CTOR_ATOMIC
;
1053 /* About to mess with non-constant members - lock. */
1054 spin_lock_irqsave(&cachep
->spinlock
, save_flags
);
1056 /* Get colour for the slab, and cal the next value. */
1057 offset
= cachep
->colour_next
;
1058 cachep
->colour_next
++;
1059 if (cachep
->colour_next
>= cachep
->colour
)
1060 cachep
->colour_next
= 0;
1061 offset
*= cachep
->colour_off
;
1062 cachep
->dflags
|= DFLGS_GROWN
;
1065 spin_unlock_irqrestore(&cachep
->spinlock
, save_flags
);
1067 /* A series of memory allocations for a new slab.
1068 * Neither the cache-chain semaphore, or cache-lock, are
1069 * held, but the incrementing c_growing prevents this
1070 * cache from being reaped or shrunk.
1071 * Note: The cache could be selected in for reaping in
1072 * kmem_cache_reap(), but when the final test is made the
1073 * growing value will be seen.
1076 /* Get mem for the objs. */
1077 if (!(objp
= kmem_getpages(cachep
, flags
)))
1080 /* Get slab management. */
1081 if (!(slabp
= kmem_cache_slabmgmt(cachep
, objp
, offset
, local_flags
)))
1084 /* Nasty!!!!!! I hope this is OK. */
1085 i
= 1 << cachep
->gfporder
;
1086 page
= mem_map
+ MAP_NR(objp
);
1088 SET_PAGE_CACHE(page
, cachep
);
1089 SET_PAGE_SLAB(page
, slabp
);
1094 kmem_cache_init_objs(cachep
, slabp
, ctor_flags
);
1096 spin_lock_irqsave(&cachep
->spinlock
, save_flags
);
1099 /* Make slab active. */
1100 list_add_tail(&slabp
->list
,&cachep
->slabs
);
1101 if (cachep
->firstnotfull
== &cachep
->slabs
)
1102 cachep
->firstnotfull
= &slabp
->list
;
1103 STATS_INC_GROWN(cachep
);
1104 cachep
->failures
= 0;
1106 spin_unlock_irqrestore(&cachep
->spinlock
, save_flags
);
1109 kmem_freepages(cachep
, objp
);
1111 spin_lock_irqsave(&cachep
->spinlock
, save_flags
);
1113 spin_unlock_irqrestore(&cachep
->spinlock
, save_flags
);
1118 * Perform extra freeing checks:
1119 * - detect double free
1120 * - detect bad pointers.
1121 * Called with the cache-lock held.
1125 static int kmem_extra_free_checks (kmem_cache_t
* cachep
,
1126 slab_t
*slabp
, void * objp
)
1129 unsigned int objnr
= (objp
-slabp
->s_mem
)/cachep
->objsize
;
1131 if (objnr
>= cachep
->num
)
1133 if (objp
!= slabp
->s_mem
+ objnr
*cachep
->objsize
)
1136 /* Check slab's freelist to see if this obj is there. */
1137 for (i
= slabp
->free
; i
!= BUFCTL_END
; i
= slab_bufctl(slabp
)[i
]) {
1145 static inline void kmem_cache_alloc_head(kmem_cache_t
*cachep
, int flags
)
1148 if (flags
& SLAB_DMA
) {
1149 if (!(cachep
->gfpflags
& GFP_DMA
))
1152 if (cachep
->gfpflags
& GFP_DMA
)
1158 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t
*cachep
,
1163 STATS_INC_ALLOCED(cachep
);
1164 STATS_INC_ACTIVE(cachep
);
1165 STATS_SET_HIGH(cachep
);
1167 /* get obj pointer */
1169 objp
= slabp
->s_mem
+ slabp
->free
*cachep
->objsize
;
1170 slabp
->free
=slab_bufctl(slabp
)[slabp
->free
];
1172 if (slabp
->free
== BUFCTL_END
)
1173 /* slab now full: move to next slab for next alloc */
1174 cachep
->firstnotfull
= slabp
->list
.next
;
1176 if (cachep
->flags
& SLAB_POISON
)
1177 if (kmem_check_poison_obj(cachep
, objp
))
1179 if (cachep
->flags
& SLAB_RED_ZONE
) {
1180 /* Set alloc red-zone, and check old one. */
1181 if (xchg((unsigned long *)objp
, RED_MAGIC2
) !=
1184 if (xchg((unsigned long *)(objp
+cachep
->objsize
-
1185 BYTES_PER_WORD
), RED_MAGIC2
) != RED_MAGIC1
)
1187 objp
+= BYTES_PER_WORD
;
1194 * Returns a ptr to an obj in the given cache.
1195 * caller must guarantee synchronization
1196 * #define for the goto optimization 8-)
1198 #define kmem_cache_alloc_one(cachep) \
1202 /* Get slab alloc is to come from. */ \
1204 struct list_head* p = cachep->firstnotfull; \
1205 if (p == &cachep->slabs) \
1206 goto alloc_new_slab; \
1207 slabp = list_entry(p,slab_t, list); \
1209 kmem_cache_alloc_one_tail(cachep, slabp); \
1213 void* kmem_cache_alloc_batch(kmem_cache_t
* cachep
, int flags
)
1215 int batchcount
= cachep
->batchcount
;
1216 cpucache_t
* cc
= cc_data(cachep
);
1218 spin_lock(&cachep
->spinlock
);
1219 while (batchcount
--) {
1220 /* Get slab alloc is to come from. */
1221 struct list_head
*p
= cachep
->firstnotfull
;
1224 if (p
== &cachep
->slabs
)
1226 slabp
= list_entry(p
,slab_t
, list
);
1227 cc_entry(cc
)[cc
->avail
++] =
1228 kmem_cache_alloc_one_tail(cachep
, slabp
);
1230 spin_unlock(&cachep
->spinlock
);
1233 return cc_entry(cc
)[--cc
->avail
];
1238 static inline void * __kmem_cache_alloc (kmem_cache_t
*cachep
, int flags
)
1240 unsigned long save_flags
;
1243 kmem_cache_alloc_head(cachep
, flags
);
1245 local_irq_save(save_flags
);
1248 cpucache_t
*cc
= cc_data(cachep
);
1252 STATS_INC_ALLOCHIT(cachep
);
1253 objp
= cc_entry(cc
)[--cc
->avail
];
1255 STATS_INC_ALLOCMISS(cachep
);
1256 objp
= kmem_cache_alloc_batch(cachep
,flags
);
1258 goto alloc_new_slab_nolock
;
1261 spin_lock(&cachep
->spinlock
);
1262 objp
= kmem_cache_alloc_one(cachep
);
1263 spin_unlock(&cachep
->spinlock
);
1267 objp
= kmem_cache_alloc_one(cachep
);
1269 local_irq_restore(save_flags
);
1273 spin_unlock(&cachep
->spinlock
);
1274 alloc_new_slab_nolock
:
1276 local_irq_restore(save_flags
);
1277 if (kmem_cache_grow(cachep
, flags
))
1278 /* Someone may have stolen our objs. Doesn't matter, we'll
1279 * just come back here again.
1286 * Release an obj back to its cache. If the obj has a constructed
1287 * state, it should be in this state _before_ it is released.
1288 * - caller is responsible for the synchronization
1292 # define CHECK_NR(nr) \
1294 if (nr >= max_mapnr) { \
1295 printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1296 (unsigned long)objp); \
1300 # define CHECK_PAGE(page) \
1302 if (!PageSlab(page)) { \
1303 printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1304 (unsigned long)objp); \
1310 # define CHECK_NR(nr) do { } while (0)
1311 # define CHECK_PAGE(nr) do { } while (0)
1314 static inline void kmem_cache_free_one(kmem_cache_t
*cachep
, void *objp
)
1318 CHECK_NR(MAP_NR(objp
));
1319 CHECK_PAGE(mem_map
+ MAP_NR(objp
));
1320 /* reduces memory footprint
1322 if (OPTIMIZE(cachep))
1323 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1326 slabp
= GET_PAGE_SLAB(mem_map
+ MAP_NR(objp
));
1329 if (cachep
->flags
& SLAB_DEBUG_INITIAL
)
1330 /* Need to call the slab's constructor so the
1331 * caller can perform a verify of its state (debugging).
1332 * Called without the cache-lock held.
1334 cachep
->ctor(objp
, cachep
, SLAB_CTOR_CONSTRUCTOR
|SLAB_CTOR_VERIFY
);
1336 if (cachep
->flags
& SLAB_RED_ZONE
) {
1337 objp
-= BYTES_PER_WORD
;
1338 if (xchg((unsigned long *)objp
, RED_MAGIC1
) != RED_MAGIC2
)
1339 /* Either write before start, or a double free. */
1341 if (xchg((unsigned long *)(objp
+cachep
->objsize
-
1342 BYTES_PER_WORD
), RED_MAGIC1
) != RED_MAGIC2
)
1343 /* Either write past end, or a double free. */
1346 if (cachep
->flags
& SLAB_POISON
)
1347 kmem_poison_obj(cachep
, objp
);
1348 if (kmem_extra_free_checks(cachep
, slabp
, objp
))
1352 unsigned int objnr
= (objp
-slabp
->s_mem
)/cachep
->objsize
;
1354 slab_bufctl(slabp
)[objnr
] = slabp
->free
;
1355 slabp
->free
= objnr
;
1357 STATS_DEC_ACTIVE(cachep
);
1359 /* fixup slab chain */
1360 if (slabp
->inuse
-- == cachep
->num
)
1361 goto moveslab_partial
;
1368 * Even if the page is now empty, we can set c_firstnotfull to
1369 * slabp: there are no partial slabs in this case
1372 struct list_head
*t
= cachep
->firstnotfull
;
1374 cachep
->firstnotfull
= &slabp
->list
;
1375 if (slabp
->list
.next
== t
)
1377 list_del(&slabp
->list
);
1378 list_add_tail(&slabp
->list
, t
);
1383 * was partial, now empty.
1384 * c_firstnotfull might point to slabp
1388 struct list_head
*t
= cachep
->firstnotfull
->prev
;
1390 list_del(&slabp
->list
);
1391 list_add_tail(&slabp
->list
, &cachep
->slabs
);
1392 if (cachep
->firstnotfull
== &slabp
->list
)
1393 cachep
->firstnotfull
= t
->next
;
1399 static inline void __free_block (kmem_cache_t
* cachep
,
1400 void** objpp
, int len
)
1402 for ( ; len
> 0; len
--, objpp
++)
1403 kmem_cache_free_one(cachep
, *objpp
);
1406 static void free_block (kmem_cache_t
* cachep
, void** objpp
, int len
)
1408 spin_lock(&cachep
->spinlock
);
1409 __free_block(cachep
, objpp
, len
);
1410 spin_unlock(&cachep
->spinlock
);
1416 * called with disabled ints
1418 static inline void __kmem_cache_free (kmem_cache_t
*cachep
, void* objp
)
1421 cpucache_t
*cc
= cc_data(cachep
);
1423 CHECK_NR(MAP_NR(objp
));
1424 CHECK_PAGE(mem_map
+ MAP_NR(objp
));
1427 if (cc
->avail
< cc
->limit
) {
1428 STATS_INC_FREEHIT(cachep
);
1429 cc_entry(cc
)[cc
->avail
++] = objp
;
1432 STATS_INC_FREEMISS(cachep
);
1433 batchcount
= cachep
->batchcount
;
1434 cc
->avail
-= batchcount
;
1436 &cc_entry(cc
)[cc
->avail
],batchcount
);
1437 cc_entry(cc
)[cc
->avail
++] = objp
;
1440 free_block(cachep
, &objp
, 1);
1443 kmem_cache_free_one(cachep
, objp
);
1448 * kmem_cache_alloc - Allocate an object
1449 * @cachep: The cache to allocate from.
1450 * @flags: See kmalloc().
1452 * Allocate an object from this cache. The flags are only relevant
1453 * if the cache has no available objects.
1455 void * kmem_cache_alloc (kmem_cache_t
*cachep
, int flags
)
1457 return __kmem_cache_alloc(cachep
, flags
);
1461 * kmalloc - allocate memory
1462 * @size: how many bytes of memory are required.
1463 * @flags: the type of memory to allocate.
1465 * kmalloc is the normal method of allocating memory
1466 * in the kernel. The @flags argument may be one of:
1470 * %GFP_ATOMIC - allocation will not sleep. Use inside interrupt handlers.
1472 * %GFP_USER - allocate memory on behalf of user. May sleep.
1474 * %GFP_KERNEL - allocate normal kernel ram. May sleep.
1476 * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1477 * Don't use unless you're in the NFS code.
1479 * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1481 void * kmalloc (size_t size
, int flags
)
1483 cache_sizes_t
*csizep
= cache_sizes
;
1485 for (; csizep
->cs_size
; csizep
++) {
1486 if (size
> csizep
->cs_size
)
1488 return __kmem_cache_alloc(flags
& GFP_DMA
?
1489 csizep
->cs_dmacachep
: csizep
->cs_cachep
, flags
);
1491 BUG(); // too big size
1496 * kmem_cache_free - Deallocate an object
1497 * @cachep: The cache the allocation was from.
1498 * @objp: The previously allocated object.
1500 * Free an object which was previously allocated from this
1503 void kmem_cache_free (kmem_cache_t
*cachep
, void *objp
)
1505 unsigned long flags
;
1507 CHECK_NR(MAP_NR(objp
));
1508 CHECK_PAGE(mem_map
+ MAP_NR(objp
));
1509 if (cachep
!= GET_PAGE_CACHE(mem_map
+ MAP_NR(objp
)))
1513 local_irq_save(flags
);
1514 __kmem_cache_free(cachep
, objp
);
1515 local_irq_restore(flags
);
1519 * kfree - free previously allocated memory
1520 * @objp: pointer returned by kmalloc.
1522 * Don't free memory not originally allocated by kmalloc()
1523 * or you will run into trouble.
1525 void kfree (const void *objp
)
1528 unsigned long flags
;
1532 local_irq_save(flags
);
1533 CHECK_NR(MAP_NR(objp
));
1534 CHECK_PAGE(mem_map
+ MAP_NR(objp
));
1535 c
= GET_PAGE_CACHE(mem_map
+ MAP_NR(objp
));
1536 __kmem_cache_free(c
, (void*)objp
);
1537 local_irq_restore(flags
);
1540 kmem_cache_t
* kmem_find_general_cachep (size_t size
, int gfpflags
)
1542 cache_sizes_t
*csizep
= cache_sizes
;
1544 /* This function could be moved to the header file, and
1545 * made inline so consumers can quickly determine what
1546 * cache pointer they require.
1548 for ( ; csizep
->cs_size
; csizep
++) {
1549 if (size
> csizep
->cs_size
)
1553 return (gfpflags
& GFP_DMA
) ? csizep
->cs_dmacachep
: csizep
->cs_cachep
;
1558 * called with local interrupts disabled
1560 static void drain_cache (void* __cachep
)
1562 kmem_cache_t
*cachep
= __cachep
;
1563 cpucache_t
*cc
= cc_data(cachep
);
1565 if (cc
&& cc
->avail
) {
1566 free_block(cachep
, cc_entry(cc
), cc
->avail
);
1571 typedef struct ccupdate_struct_s
1573 kmem_cache_t
* cachep
;
1574 cpucache_t
* new[NR_CPUS
];
1575 } ccupdate_struct_t
;
1578 * called with local interrupts disabled
1580 static void ccupdate_callback (void* __new
)
1582 ccupdate_struct_t
* new = __new
;
1583 cpucache_t
*old
= cc_data(new->cachep
);
1585 cc_data(new->cachep
) = new->new[smp_processor_id()];
1586 new->new[smp_processor_id()] = old
;
1589 /* called with cache_chain_sem acquired. */
1590 static int kmem_tune_cpucache (kmem_cache_t
* cachep
, int limit
, int batchcount
)
1592 ccupdate_struct_t
new;
1596 * These are admin-provided, so we are more graceful.
1602 if (batchcount
> limit
)
1604 if (limit
!= 0 && !batchcount
)
1607 memset(&new.new,0,sizeof(new.new));
1609 for (i
= 0; i
< smp_num_cpus
; i
++) {
1613 ccnew
= kmalloc(sizeof(void*)*limit
+
1614 sizeof(cpucache_t
), GFP_KERNEL
);
1617 ccnew
->limit
= limit
;
1619 new.new[cpu_logical_map(i
)] = ccnew
;
1622 new.cachep
= cachep
;
1623 spin_lock_irq(&cachep
->spinlock
);
1624 cachep
->batchcount
= batchcount
;
1625 spin_unlock_irq(&cachep
->spinlock
);
1627 smp_call_function(ccupdate_callback
,&new,1,1);
1628 local_irq_disable();
1629 ccupdate_callback(&new);
1632 for (i
= 0; i
< smp_num_cpus
; i
++) {
1633 cpucache_t
* ccold
= new.new[cpu_logical_map(i
)];
1636 local_irq_disable();
1637 free_block(cachep
, cc_entry(ccold
), ccold
->avail
);
1643 for (i
--; i
>= 0; i
--)
1644 kfree(new.new[cpu_logical_map(i
)]);
1648 static void enable_cpucache (kmem_cache_t
*cachep
)
1653 /* FIXME: optimize */
1654 if (cachep
->objsize
> PAGE_SIZE
)
1656 if (cachep
->objsize
> 1024)
1658 else if (cachep
->objsize
> 256)
1663 err
= kmem_tune_cpucache(cachep
, limit
, limit
/2);
1665 printk(KERN_ERR
"enable_cpucache failed for %s, error %d.\n",
1666 cachep
->name
, -err
);
1669 static void enable_all_cpucaches (void)
1671 struct list_head
* p
;
1673 down(&cache_chain_sem
);
1675 p
= &cache_cache
.next
;
1677 kmem_cache_t
* cachep
= list_entry(p
, kmem_cache_t
, next
);
1679 enable_cpucache(cachep
);
1680 p
= cachep
->next
.next
;
1681 } while (p
!= &cache_cache
.next
);
1683 up(&cache_chain_sem
);
1688 * kmem_cache_reap - Reclaim memory from caches.
1689 * @gfp_mask: the type of memory required.
1691 * Called from try_to_free_page().
1693 void kmem_cache_reap (int gfp_mask
)
1696 kmem_cache_t
*searchp
;
1697 kmem_cache_t
*best_cachep
;
1698 unsigned int best_pages
;
1699 unsigned int best_len
;
1702 if (gfp_mask
& __GFP_WAIT
)
1703 down(&cache_chain_sem
);
1705 if (down_trylock(&cache_chain_sem
))
1708 scan
= REAP_SCANLEN
;
1712 searchp
= clock_searchp
;
1715 struct list_head
* p
;
1716 unsigned int full_free
;
1718 /* It's safe to test this without holding the cache-lock. */
1719 if (searchp
->flags
& SLAB_NO_REAP
)
1721 /* FIXME: is this really a good idea? */
1722 if (gfp_mask
& GFP_DMA
) {
1723 if (!(searchp
->gfpflags
& GFP_DMA
))
1726 if (searchp
->gfpflags
& GFP_DMA
)
1729 spin_lock_irq(&searchp
->spinlock
);
1730 if (searchp
->growing
)
1732 if (searchp
->dflags
& DFLGS_GROWN
) {
1733 searchp
->dflags
&= ~DFLGS_GROWN
;
1738 cpucache_t
*cc
= cc_data(searchp
);
1739 if (cc
&& cc
->avail
) {
1740 __free_block(searchp
, cc_entry(cc
), cc
->avail
);
1747 p
= searchp
->slabs
.prev
;
1748 while (p
!= &searchp
->slabs
) {
1749 slabp
= list_entry(p
, slab_t
, list
);
1757 * Try to avoid slabs with constructors and/or
1758 * more than one page per slab (as it can be difficult
1759 * to get high orders from gfp()).
1761 pages
= full_free
* (1<<searchp
->gfporder
);
1763 pages
= (pages
*4+1)/5;
1764 if (searchp
->gfporder
)
1765 pages
= (pages
*4+1)/5;
1766 if (pages
> best_pages
) {
1767 best_cachep
= searchp
;
1768 best_len
= full_free
;
1770 if (full_free
>= REAP_PERFECT
) {
1771 clock_searchp
= list_entry(searchp
->next
.next
,
1777 spin_unlock_irq(&searchp
->spinlock
);
1779 searchp
= list_entry(searchp
->next
.next
,kmem_cache_t
,next
);
1780 } while (--scan
&& searchp
!= clock_searchp
);
1782 clock_searchp
= searchp
;
1785 /* couldn't find anything to reap */
1788 spin_lock_irq(&best_cachep
->spinlock
);
1790 /* free only 80% of the free slabs */
1791 best_len
= (best_len
*4 + 1)/5;
1792 for (scan
= 0; scan
< best_len
; scan
++) {
1793 struct list_head
*p
;
1795 if (best_cachep
->growing
)
1797 p
= best_cachep
->slabs
.prev
;
1798 if (p
== &best_cachep
->slabs
)
1800 slabp
= list_entry(p
,slab_t
,list
);
1803 list_del(&slabp
->list
);
1804 if (best_cachep
->firstnotfull
== &slabp
->list
)
1805 best_cachep
->firstnotfull
= &best_cachep
->slabs
;
1806 STATS_INC_REAPED(best_cachep
);
1808 /* Safe to drop the lock. The slab is no longer linked to the
1811 spin_unlock_irq(&best_cachep
->spinlock
);
1812 kmem_slab_destroy(best_cachep
, slabp
);
1813 spin_lock_irq(&best_cachep
->spinlock
);
1815 spin_unlock_irq(&best_cachep
->spinlock
);
1817 up(&cache_chain_sem
);
1821 #ifdef CONFIG_PROC_FS
1823 * cache-name num-active-objs total-objs
1824 * obj-size num-active-slabs total-slabs
1825 * num-pages-per-slab
1833 if (len-off > count) \
1838 static int proc_getdata (char*page
, char**start
, off_t off
, int count
)
1840 struct list_head
*p
;
1843 /* Output format version, so at least we can change it without _too_
1846 len
+= sprintf(page
+len
, "slabinfo - version: 1.1"
1856 down(&cache_chain_sem
);
1857 p
= &cache_cache
.next
;
1859 kmem_cache_t
*cachep
;
1860 struct list_head
*q
;
1862 unsigned long active_objs
;
1863 unsigned long num_objs
;
1864 unsigned long active_slabs
= 0;
1865 unsigned long num_slabs
;
1866 cachep
= list_entry(p
, kmem_cache_t
, next
);
1868 spin_lock_irq(&cachep
->spinlock
);
1871 list_for_each(q
,&cachep
->slabs
) {
1872 slabp
= list_entry(q
, slab_t
, list
);
1873 active_objs
+= slabp
->inuse
;
1874 num_objs
+= cachep
->num
;
1880 num_slabs
+=active_slabs
;
1881 num_objs
= num_slabs
*cachep
->num
;
1883 len
+= sprintf(page
+len
, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1884 cachep
->name
, active_objs
, num_objs
, cachep
->objsize
,
1885 active_slabs
, num_slabs
, (1<<cachep
->gfporder
));
1889 unsigned long errors
= cachep
->errors
;
1890 unsigned long high
= cachep
->high_mark
;
1891 unsigned long grown
= cachep
->grown
;
1892 unsigned long reaped
= cachep
->reaped
;
1893 unsigned long allocs
= cachep
->num_allocations
;
1895 len
+= sprintf(page
+len
, " : %6lu %7lu %5lu %4lu %4lu",
1896 high
, allocs
, grown
, reaped
, errors
);
1901 unsigned int batchcount
= cachep
->batchcount
;
1904 if (cc_data(cachep
))
1905 limit
= cc_data(cachep
)->limit
;
1908 len
+= sprintf(page
+len
, " : %4u %4u",
1912 #if STATS && defined(CONFIG_SMP)
1914 unsigned long allochit
= atomic_read(&cachep
->allochit
);
1915 unsigned long allocmiss
= atomic_read(&cachep
->allocmiss
);
1916 unsigned long freehit
= atomic_read(&cachep
->freehit
);
1917 unsigned long freemiss
= atomic_read(&cachep
->freemiss
);
1918 len
+= sprintf(page
+len
, " : %6lu %6lu %6lu %6lu",
1919 allochit
, allocmiss
, freehit
, freemiss
);
1922 len
+= sprintf(page
+len
,"\n");
1923 spin_unlock_irq(&cachep
->spinlock
);
1925 p
= cachep
->next
.next
;
1926 } while (p
!= &cache_cache
.next
);
1928 up(&cache_chain_sem
);
1936 * slabinfo_read_proc - generates /proc/slabinfo
1937 * @page: scratch area, one page long
1938 * @start: pointer to the pointer to the output buffer
1939 * @off: offset within /proc/slabinfo the caller is interested in
1940 * @count: requested len in bytes
1944 * The contents of the buffer are
1951 * num-pages-per-slab
1952 * + further values on SMP and with statistics enabled
1954 int slabinfo_read_proc (char *page
, char **start
, off_t off
,
1955 int count
, int *eof
, void *data
)
1957 int len
= proc_getdata(page
, start
, off
, count
);
1958 len
-= (*start
-page
);
1961 if (len
>count
) len
= count
;
1966 #define MAX_SLABINFO_WRITE 128
1968 * slabinfo_write_proc - SMP tuning for the slab allocator
1970 * @buffer: user buffer
1974 int slabinfo_write_proc (struct file
*file
, const char *buffer
,
1975 unsigned long count
, void *data
)
1978 char kbuf
[MAX_SLABINFO_WRITE
], *tmp
;
1979 int limit
, batchcount
, res
;
1980 struct list_head
*p
;
1982 if (count
> MAX_SLABINFO_WRITE
)
1984 if (copy_from_user(&kbuf
, buffer
, count
))
1987 tmp
= strchr(kbuf
, ' ');
1992 limit
= simple_strtol(tmp
, &tmp
, 10);
1995 batchcount
= simple_strtol(tmp
, &tmp
, 10);
1997 /* Find the cache in the chain of caches. */
1998 down(&cache_chain_sem
);
2000 list_for_each(p
,&cache_chain
) {
2001 kmem_cache_t
*cachep
= list_entry(p
, kmem_cache_t
, next
);
2003 if (!strcmp(cachep
->name
, kbuf
)) {
2004 res
= kmem_tune_cpucache(cachep
, limit
, batchcount
);
2008 up(&cache_chain_sem
);