[PATCH] Slab name cleanup - last try
[linux-2.6/history.git] / mm / slab.c
blob9835c232b06664820379c660a6f008cafb4cc39d
1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
11 * An implementation of the Slab Allocator as described in outline in;
12 * UNIX Internals: The New Frontiers by Uresh Vahalia
13 * Pub: Prentice Hall ISBN 0-13-101908-2
14 * or with a little more detail in;
15 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
16 * Jeff Bonwick (Sun Microsystems).
17 * Presented at: USENIX Summer 1994 Technical Conference
20 * The memory is organized in caches, one cache for each object type.
21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22 * Each cache consists out of many slabs (they are small (usually one
23 * page long) and always contiguous), and each slab contains multiple
24 * initialized objects.
26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
27 * normal). If you need a special memory type, then must create a new
28 * cache for that memory type.
30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
31 * full slabs with 0 free objects
32 * partial slabs
33 * empty slabs with no allocated objects
35 * If partial slabs exist, then new allocations come from these slabs,
36 * otherwise from empty slabs or new slabs are allocated.
38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
41 * On SMP systems, each cache has a short per-cpu head array, most allocs
42 * and frees go into that array, and if that array overflows, then 1/2
43 * of the entries in the array are given back into the global cache.
44 * This reduces the number of spinlock operations.
46 * The c_cpuarray may not be read with enabled local interrupts.
48 * SMP synchronization:
49 * constructors and destructors are called without any locking.
50 * Several members in kmem_cache_t and slab_t never change, they
51 * are accessed without any locking.
52 * The per-cpu arrays are never accessed from the wrong cpu, no locking.
53 * The non-constant members are protected with a per-cache irq spinlock.
55 * Further notes from the original documentation:
57 * 11 April '97. Started multi-threading - markhe
58 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
59 * The sem is only needed when accessing/extending the cache-chain, which
60 * can never happen inside an interrupt (kmem_cache_create(),
61 * kmem_cache_shrink() and kmem_cache_reap()).
63 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
64 * maybe be sleeping and therefore not holding the semaphore/lock), the
65 * growing field is used. This also prevents reaping from a cache.
67 * At present, each engine can be growing a cache. This should be blocked.
71 #include <linux/config.h>
72 #include <linux/slab.h>
73 #include <linux/mm.h>
74 #include <linux/cache.h>
75 #include <linux/interrupt.h>
76 #include <linux/init.h>
77 #include <linux/compiler.h>
78 #include <asm/uaccess.h>
81 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
82 * SLAB_RED_ZONE & SLAB_POISON.
83 * 0 for faster, smaller code (especially in the critical paths).
85 * STATS - 1 to collect stats for /proc/slabinfo.
86 * 0 for faster, smaller code (especially in the critical paths).
88 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
91 #ifdef CONFIG_DEBUG_SLAB
92 #define DEBUG 1
93 #define STATS 1
94 #define FORCED_DEBUG 1
95 #else
96 #define DEBUG 0
97 #define STATS 0
98 #define FORCED_DEBUG 0
99 #endif
102 * Parameters for kmem_cache_reap
104 #define REAP_SCANLEN 10
105 #define REAP_PERFECT 10
107 /* Shouldn't this be in a header file somewhere? */
108 #define BYTES_PER_WORD sizeof(void *)
110 /* Legal flag mask for kmem_cache_create(). */
111 #if DEBUG
112 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
113 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
114 SLAB_NO_REAP | SLAB_CACHE_DMA | \
115 SLAB_MUST_HWCACHE_ALIGN)
116 #else
117 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
118 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
119 #endif
122 * kmem_bufctl_t:
124 * Bufctl's are used for linking objs within a slab
125 * linked offsets.
127 * This implementaion relies on "struct page" for locating the cache &
128 * slab an object belongs to.
129 * This allows the bufctl structure to be small (one int), but limits
130 * the number of objects a slab (not a cache) can contain when off-slab
131 * bufctls are used. The limit is the size of the largest general cache
132 * that does not use off-slab slabs.
133 * For 32bit archs with 4 kB pages, is this 56.
134 * This is not serious, as it is only for large objects, when it is unwise
135 * to have too many per slab.
136 * Note: This limit can be raised by introducing a general cache whose size
137 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
140 #define BUFCTL_END 0xffffFFFF
141 #define SLAB_LIMIT 0xffffFFFE
142 typedef unsigned int kmem_bufctl_t;
144 /* Max number of objs-per-slab for caches which use off-slab slabs.
145 * Needed to avoid a possible looping condition in kmem_cache_grow().
147 static unsigned long offslab_limit;
150 * slab_t
152 * Manages the objs in a slab. Placed either at the beginning of mem allocated
153 * for a slab, or allocated from an general cache.
154 * Slabs are chained into three list: fully used, partial, fully free slabs.
156 typedef struct slab_s {
157 struct list_head list;
158 unsigned long colouroff;
159 void *s_mem; /* including colour offset */
160 unsigned int inuse; /* num of objs active in slab */
161 kmem_bufctl_t free;
162 } slab_t;
164 #define slab_bufctl(slabp) \
165 ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
168 * cpucache_t
170 * Per cpu structures
171 * The limit is stored in the per-cpu structure to reduce the data cache
172 * footprint.
174 typedef struct cpucache_s {
175 unsigned int avail;
176 unsigned int limit;
177 } cpucache_t;
179 #define cc_entry(cpucache) \
180 ((void **)(((cpucache_t*)(cpucache))+1))
181 #define cc_data(cachep) \
182 ((cachep)->cpudata[smp_processor_id()])
184 * kmem_cache_t
186 * manages a cache.
189 struct kmem_cache_s {
190 /* 1) each alloc & free */
191 /* full, partial first, then free */
192 struct list_head slabs_full;
193 struct list_head slabs_partial;
194 struct list_head slabs_free;
195 unsigned int objsize;
196 unsigned int flags; /* constant flags */
197 unsigned int num; /* # of objs per slab */
198 spinlock_t spinlock;
199 #ifdef CONFIG_SMP
200 unsigned int batchcount;
201 #endif
203 /* 2) slab additions /removals */
204 /* order of pgs per slab (2^n) */
205 unsigned int gfporder;
207 /* force GFP flags, e.g. GFP_DMA */
208 unsigned int gfpflags;
210 size_t colour; /* cache colouring range */
211 unsigned int colour_off; /* colour offset */
212 unsigned int colour_next; /* cache colouring */
213 kmem_cache_t *slabp_cache;
214 unsigned int growing;
215 unsigned int dflags; /* dynamic flags */
217 /* constructor func */
218 void (*ctor)(void *, kmem_cache_t *, unsigned long);
220 /* de-constructor func */
221 void (*dtor)(void *, kmem_cache_t *, unsigned long);
223 unsigned long failures;
225 /* 3) cache creation/removal */
226 const char *name;
227 struct list_head next;
228 #ifdef CONFIG_SMP
229 /* 4) per-cpu data */
230 cpucache_t *cpudata[NR_CPUS];
231 #endif
232 #if STATS
233 unsigned long num_active;
234 unsigned long num_allocations;
235 unsigned long high_mark;
236 unsigned long grown;
237 unsigned long reaped;
238 unsigned long errors;
239 #ifdef CONFIG_SMP
240 atomic_t allochit;
241 atomic_t allocmiss;
242 atomic_t freehit;
243 atomic_t freemiss;
244 #endif
245 #endif
248 /* internal c_flags */
249 #define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
250 #define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
252 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
253 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
255 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
256 #define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
257 #define GROWN(x) ((x)->dlags & DFLGS_GROWN)
259 #if STATS
260 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
261 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
262 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
263 #define STATS_INC_GROWN(x) ((x)->grown++)
264 #define STATS_INC_REAPED(x) ((x)->reaped++)
265 #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
266 (x)->high_mark = (x)->num_active; \
267 } while (0)
268 #define STATS_INC_ERR(x) ((x)->errors++)
269 #else
270 #define STATS_INC_ACTIVE(x) do { } while (0)
271 #define STATS_DEC_ACTIVE(x) do { } while (0)
272 #define STATS_INC_ALLOCED(x) do { } while (0)
273 #define STATS_INC_GROWN(x) do { } while (0)
274 #define STATS_INC_REAPED(x) do { } while (0)
275 #define STATS_SET_HIGH(x) do { } while (0)
276 #define STATS_INC_ERR(x) do { } while (0)
277 #endif
279 #if STATS && defined(CONFIG_SMP)
280 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
281 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
282 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
283 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
284 #else
285 #define STATS_INC_ALLOCHIT(x) do { } while (0)
286 #define STATS_INC_ALLOCMISS(x) do { } while (0)
287 #define STATS_INC_FREEHIT(x) do { } while (0)
288 #define STATS_INC_FREEMISS(x) do { } while (0)
289 #endif
291 #if DEBUG
292 /* Magic nums for obj red zoning.
293 * Placed in the first word before and the first word after an obj.
295 #define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
296 #define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
298 /* ...and for poisoning */
299 #define POISON_BYTE 0x5a /* byte value for poisoning */
300 #define POISON_END 0xa5 /* end-byte of poisoning */
302 #endif
304 /* maximum size of an obj (in 2^order pages) */
305 #define MAX_OBJ_ORDER 5 /* 32 pages */
308 * Do not go above this order unless 0 objects fit into the slab.
310 #define BREAK_GFP_ORDER_HI 2
311 #define BREAK_GFP_ORDER_LO 1
312 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
315 * Absolute limit for the gfp order
317 #define MAX_GFP_ORDER 5 /* 32 pages */
320 /* Macros for storing/retrieving the cachep and or slab from the
321 * global 'mem_map'. These are used to find the slab an obj belongs to.
322 * With kfree(), these are used to find the cache which an obj belongs to.
324 #define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
325 #define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
326 #define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
327 #define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
329 /* Size description struct for general caches. */
330 typedef struct cache_sizes {
331 size_t cs_size;
332 kmem_cache_t *cs_cachep;
333 kmem_cache_t *cs_dmacachep;
334 } cache_sizes_t;
336 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
337 static cache_sizes_t cache_sizes[] = {
338 #if PAGE_SIZE == 4096
339 { 32, NULL, NULL},
340 #endif
341 { 64, NULL, NULL},
342 { 128, NULL, NULL},
343 { 256, NULL, NULL},
344 { 512, NULL, NULL},
345 { 1024, NULL, NULL},
346 { 2048, NULL, NULL},
347 { 4096, NULL, NULL},
348 { 8192, NULL, NULL},
349 { 16384, NULL, NULL},
350 { 32768, NULL, NULL},
351 { 65536, NULL, NULL},
352 {131072, NULL, NULL},
353 { 0, NULL, NULL}
355 /* Must match cache_sizes above. Out of line to keep cache footprint low. */
356 #define CN(x) { x, x " (DMA)" }
357 static struct {
358 char *name;
359 char *name_dma;
360 } cache_names[] = {
361 #if PAGE_SIZE == 4096
362 CN("size-32"),
363 #endif
364 CN("size-64"),
365 CN("size-128"),
366 CN("size-256"),
367 CN("size-512"),
368 CN("size-1024"),
369 CN("size-2048"),
370 CN("size-4096"),
371 CN("size-8192"),
372 CN("size-16384"),
373 CN("size-32768"),
374 CN("size-65536"),
375 CN("size-131072")
377 #undef CN
379 /* internal cache of cache description objs */
380 static kmem_cache_t cache_cache = {
381 slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full),
382 slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
383 slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free),
384 objsize: sizeof(kmem_cache_t),
385 flags: SLAB_NO_REAP,
386 spinlock: SPIN_LOCK_UNLOCKED,
387 colour_off: L1_CACHE_BYTES,
388 name: "kmem_cache",
391 /* Guard access to the cache-chain. */
392 static struct semaphore cache_chain_sem;
394 /* Place maintainer for reaping. */
395 static kmem_cache_t *clock_searchp = &cache_cache;
397 #define cache_chain (cache_cache.next)
399 #ifdef CONFIG_SMP
401 * chicken and egg problem: delay the per-cpu array allocation
402 * until the general caches are up.
404 static int g_cpucache_up;
406 static void enable_cpucache (kmem_cache_t *cachep);
407 static void enable_all_cpucaches (void);
408 #endif
410 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
411 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
412 int flags, size_t *left_over, unsigned int *num)
414 int i;
415 size_t wastage = PAGE_SIZE<<gfporder;
416 size_t extra = 0;
417 size_t base = 0;
419 if (!(flags & CFLGS_OFF_SLAB)) {
420 base = sizeof(slab_t);
421 extra = sizeof(kmem_bufctl_t);
423 i = 0;
424 while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
425 i++;
426 if (i > 0)
427 i--;
429 if (i > SLAB_LIMIT)
430 i = SLAB_LIMIT;
432 *num = i;
433 wastage -= i*size;
434 wastage -= L1_CACHE_ALIGN(base+i*extra);
435 *left_over = wastage;
438 /* Initialisation - setup the `cache' cache. */
439 void __init kmem_cache_init(void)
441 size_t left_over;
443 init_MUTEX(&cache_chain_sem);
444 INIT_LIST_HEAD(&cache_chain);
446 kmem_cache_estimate(0, cache_cache.objsize, 0,
447 &left_over, &cache_cache.num);
448 if (!cache_cache.num)
449 BUG();
451 cache_cache.colour = left_over/cache_cache.colour_off;
452 cache_cache.colour_next = 0;
456 /* Initialisation - setup remaining internal and general caches.
457 * Called after the gfp() functions have been enabled, and before smp_init().
459 void __init kmem_cache_sizes_init(void)
461 cache_sizes_t *sizes = cache_sizes;
463 * Fragmentation resistance on low memory - only use bigger
464 * page orders on machines with more than 32MB of memory.
466 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
467 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
468 do {
469 /* For performance, all the general caches are L1 aligned.
470 * This should be particularly beneficial on SMP boxes, as it
471 * eliminates "false sharing".
472 * Note for systems short on memory removing the alignment will
473 * allow tighter packing of the smaller caches. */
474 if (!(sizes->cs_cachep =
475 kmem_cache_create(cache_names[sizes-cache_sizes].name,
476 sizes->cs_size,
477 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
478 BUG();
481 /* Inc off-slab bufctl limit until the ceiling is hit. */
482 if (!(OFF_SLAB(sizes->cs_cachep))) {
483 offslab_limit = sizes->cs_size-sizeof(slab_t);
484 offslab_limit /= 2;
486 sizes->cs_dmacachep = kmem_cache_create(
487 cache_names[sizes-cache_sizes].name_dma,
488 sizes->cs_size, 0,
489 SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
490 if (!sizes->cs_dmacachep)
491 BUG();
492 sizes++;
493 } while (sizes->cs_size);
496 int __init kmem_cpucache_init(void)
498 #ifdef CONFIG_SMP
499 g_cpucache_up = 1;
500 enable_all_cpucaches();
501 #endif
502 return 0;
505 __initcall(kmem_cpucache_init);
507 /* Interface to system's page allocator. No need to hold the cache-lock.
509 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
511 void *addr;
514 * If we requested dmaable memory, we will get it. Even if we
515 * did not request dmaable memory, we might get it, but that
516 * would be relatively rare and ignorable.
518 flags |= cachep->gfpflags;
519 addr = (void*) __get_free_pages(flags, cachep->gfporder);
520 /* Assume that now we have the pages no one else can legally
521 * messes with the 'struct page's.
522 * However vm_scan() might try to test the structure to see if
523 * it is a named-page or buffer-page. The members it tests are
524 * of no interest here.....
526 return addr;
529 /* Interface to system's page release. */
530 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
532 unsigned long i = (1<<cachep->gfporder);
533 struct page *page = virt_to_page(addr);
535 /* free_pages() does not clear the type bit - we do that.
536 * The pages have been unlinked from their cache-slab,
537 * but their 'struct page's might be accessed in
538 * vm_scan(). Shouldn't be a worry.
540 while (i--) {
541 PageClearSlab(page);
542 page++;
544 free_pages((unsigned long)addr, cachep->gfporder);
547 #if DEBUG
548 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
550 int size = cachep->objsize;
551 if (cachep->flags & SLAB_RED_ZONE) {
552 addr += BYTES_PER_WORD;
553 size -= 2*BYTES_PER_WORD;
555 memset(addr, POISON_BYTE, size);
556 *(unsigned char *)(addr+size-1) = POISON_END;
559 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
561 int size = cachep->objsize;
562 void *end;
563 if (cachep->flags & SLAB_RED_ZONE) {
564 addr += BYTES_PER_WORD;
565 size -= 2*BYTES_PER_WORD;
567 end = memchr(addr, POISON_END, size);
568 if (end != (addr+size-1))
569 return 1;
570 return 0;
572 #endif
574 /* Destroy all the objs in a slab, and release the mem back to the system.
575 * Before calling the slab must have been unlinked from the cache.
576 * The cache-lock is not held/needed.
578 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
580 if (cachep->dtor
581 #if DEBUG
582 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
583 #endif
585 int i;
586 for (i = 0; i < cachep->num; i++) {
587 void* objp = slabp->s_mem+cachep->objsize*i;
588 #if DEBUG
589 if (cachep->flags & SLAB_RED_ZONE) {
590 if (*((unsigned long*)(objp)) != RED_MAGIC1)
591 BUG();
592 if (*((unsigned long*)(objp + cachep->objsize
593 -BYTES_PER_WORD)) != RED_MAGIC1)
594 BUG();
595 objp += BYTES_PER_WORD;
597 #endif
598 if (cachep->dtor)
599 (cachep->dtor)(objp, cachep, 0);
600 #if DEBUG
601 if (cachep->flags & SLAB_RED_ZONE) {
602 objp -= BYTES_PER_WORD;
604 if ((cachep->flags & SLAB_POISON) &&
605 kmem_check_poison_obj(cachep, objp))
606 BUG();
607 #endif
611 kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
612 if (OFF_SLAB(cachep))
613 kmem_cache_free(cachep->slabp_cache, slabp);
617 * kmem_cache_create - Create a cache.
618 * @name: A string which is used in /proc/slabinfo to identify this cache.
619 * @size: The size of objects to be created in this cache.
620 * @offset: The offset to use within the page.
621 * @flags: SLAB flags
622 * @ctor: A constructor for the objects.
623 * @dtor: A destructor for the objects.
625 * Returns a ptr to the cache on success, NULL on failure.
626 * Cannot be called within a int, but can be interrupted.
627 * The @ctor is run when new pages are allocated by the cache
628 * and the @dtor is run before the pages are handed back.
630 * @name must be valid until the cache is destroyed. This implies that
631 * the module calling this has to destroy the cache before getting
632 * unloaded.
634 * The flags are
636 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
637 * to catch references to uninitialised memory.
639 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
640 * for buffer overruns.
642 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
643 * memory pressure.
645 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
646 * cacheline. This can be beneficial if you're counting cycles as closely
647 * as davem.
649 kmem_cache_t *
650 kmem_cache_create (const char *name, size_t size, size_t offset,
651 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
652 void (*dtor)(void*, kmem_cache_t *, unsigned long))
654 const char *func_nm = KERN_ERR "kmem_create: ";
655 size_t left_over, align, slab_size;
656 kmem_cache_t *cachep = NULL;
659 * Sanity checks... these are all serious usage bugs.
661 if ((!name) ||
662 in_interrupt() ||
663 (size < BYTES_PER_WORD) ||
664 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
665 (dtor && !ctor) ||
666 (offset < 0 || offset > size))
667 BUG();
669 #if DEBUG
670 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
671 /* No constructor, but inital state check requested */
672 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
673 flags &= ~SLAB_DEBUG_INITIAL;
676 if ((flags & SLAB_POISON) && ctor) {
677 /* request for poisoning, but we can't do that with a constructor */
678 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
679 flags &= ~SLAB_POISON;
681 #if FORCED_DEBUG
682 if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN))
684 * do not red zone large object, causes severe
685 * fragmentation.
687 flags |= SLAB_RED_ZONE;
688 if (!ctor)
689 flags |= SLAB_POISON;
690 #endif
691 #endif
694 * Always checks flags, a caller might be expecting debug
695 * support which isn't available.
697 if (flags & ~CREATE_MASK)
698 BUG();
700 /* Get cache's description obj. */
701 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
702 if (!cachep)
703 goto opps;
704 memset(cachep, 0, sizeof(kmem_cache_t));
706 /* Check that size is in terms of words. This is needed to avoid
707 * unaligned accesses for some archs when redzoning is used, and makes
708 * sure any on-slab bufctl's are also correctly aligned.
710 if (size & (BYTES_PER_WORD-1)) {
711 size += (BYTES_PER_WORD-1);
712 size &= ~(BYTES_PER_WORD-1);
713 printk("%sForcing size word alignment - %s\n", func_nm, name);
716 #if DEBUG
717 if (flags & SLAB_RED_ZONE) {
719 * There is no point trying to honour cache alignment
720 * when redzoning.
722 flags &= ~SLAB_HWCACHE_ALIGN;
723 size += 2*BYTES_PER_WORD; /* words for redzone */
725 #endif
726 align = BYTES_PER_WORD;
727 if (flags & SLAB_HWCACHE_ALIGN)
728 align = L1_CACHE_BYTES;
730 /* Determine if the slab management is 'on' or 'off' slab. */
731 if (size >= (PAGE_SIZE>>3))
733 * Size is large, assume best to place the slab management obj
734 * off-slab (should allow better packing of objs).
736 flags |= CFLGS_OFF_SLAB;
738 if (flags & SLAB_HWCACHE_ALIGN) {
739 /* Need to adjust size so that objs are cache aligned. */
740 /* Small obj size, can get at least two per cache line. */
741 /* FIXME: only power of 2 supported, was better */
742 while (size < align/2)
743 align /= 2;
744 size = (size+align-1)&(~(align-1));
747 /* Cal size (in pages) of slabs, and the num of objs per slab.
748 * This could be made much more intelligent. For now, try to avoid
749 * using high page-orders for slabs. When the gfp() funcs are more
750 * friendly towards high-order requests, this should be changed.
752 do {
753 unsigned int break_flag = 0;
754 cal_wastage:
755 kmem_cache_estimate(cachep->gfporder, size, flags,
756 &left_over, &cachep->num);
757 if (break_flag)
758 break;
759 if (cachep->gfporder >= MAX_GFP_ORDER)
760 break;
761 if (!cachep->num)
762 goto next;
763 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
764 /* Oops, this num of objs will cause problems. */
765 cachep->gfporder--;
766 break_flag++;
767 goto cal_wastage;
771 * Large num of objs is good, but v. large slabs are currently
772 * bad for the gfp()s.
774 if (cachep->gfporder >= slab_break_gfp_order)
775 break;
777 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
778 break; /* Acceptable internal fragmentation. */
779 next:
780 cachep->gfporder++;
781 } while (1);
783 if (!cachep->num) {
784 printk("kmem_cache_create: couldn't create cache %s.\n", name);
785 kmem_cache_free(&cache_cache, cachep);
786 cachep = NULL;
787 goto opps;
789 slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
792 * If the slab has been placed off-slab, and we have enough space then
793 * move it on-slab. This is at the expense of any extra colouring.
795 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
796 flags &= ~CFLGS_OFF_SLAB;
797 left_over -= slab_size;
800 /* Offset must be a multiple of the alignment. */
801 offset += (align-1);
802 offset &= ~(align-1);
803 if (!offset)
804 offset = L1_CACHE_BYTES;
805 cachep->colour_off = offset;
806 cachep->colour = left_over/offset;
808 /* init remaining fields */
809 if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
810 flags |= CFLGS_OPTIMIZE;
812 cachep->flags = flags;
813 cachep->gfpflags = 0;
814 if (flags & SLAB_CACHE_DMA)
815 cachep->gfpflags |= GFP_DMA;
816 spin_lock_init(&cachep->spinlock);
817 cachep->objsize = size;
818 INIT_LIST_HEAD(&cachep->slabs_full);
819 INIT_LIST_HEAD(&cachep->slabs_partial);
820 INIT_LIST_HEAD(&cachep->slabs_free);
822 if (flags & CFLGS_OFF_SLAB)
823 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
824 cachep->ctor = ctor;
825 cachep->dtor = dtor;
826 cachep->name = name;
828 #ifdef CONFIG_SMP
829 if (g_cpucache_up)
830 enable_cpucache(cachep);
831 #endif
832 /* Need the semaphore to access the chain. */
833 down(&cache_chain_sem);
835 struct list_head *p;
837 list_for_each(p, &cache_chain) {
838 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
839 char tmp;
840 /* This happens when the module gets unloaded and doesn't
841 destroy its slab cache and noone else reuses the vmalloc
842 area of the module. Print a warning. */
843 if (__get_user(tmp,pc->name)) {
844 printk("SLAB: cache with size %d has lost its name\n",
845 pc->objsize);
846 continue;
848 if (!strcmp(pc->name,name)) {
849 printk("kmem_cache_create: duplicate cache %s\n",name);
850 up(&cache_chain_sem);
851 BUG();
856 /* There is no reason to lock our new cache before we
857 * link it in - no one knows about it yet...
859 list_add(&cachep->next, &cache_chain);
860 up(&cache_chain_sem);
861 opps:
862 return cachep;
866 #if DEBUG
868 * This check if the kmem_cache_t pointer is chained in the cache_cache
869 * list. -arca
871 static int is_chained_kmem_cache(kmem_cache_t * cachep)
873 struct list_head *p;
874 int ret = 0;
876 /* Find the cache in the chain of caches. */
877 down(&cache_chain_sem);
878 list_for_each(p, &cache_chain) {
879 if (p == &cachep->next) {
880 ret = 1;
881 break;
884 up(&cache_chain_sem);
886 return ret;
888 #else
889 #define is_chained_kmem_cache(x) 1
890 #endif
892 #ifdef CONFIG_SMP
894 * Waits for all CPUs to execute func().
896 static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
898 local_irq_disable();
899 func(arg);
900 local_irq_enable();
902 if (smp_call_function(func, arg, 1, 1))
903 BUG();
905 typedef struct ccupdate_struct_s
907 kmem_cache_t *cachep;
908 cpucache_t *new[NR_CPUS];
909 } ccupdate_struct_t;
911 static void do_ccupdate_local(void *info)
913 ccupdate_struct_t *new = (ccupdate_struct_t *)info;
914 cpucache_t *old = cc_data(new->cachep);
916 cc_data(new->cachep) = new->new[smp_processor_id()];
917 new->new[smp_processor_id()] = old;
920 static void free_block (kmem_cache_t* cachep, void** objpp, int len);
922 static void drain_cpu_caches(kmem_cache_t *cachep)
924 ccupdate_struct_t new;
925 int i;
927 memset(&new.new,0,sizeof(new.new));
929 new.cachep = cachep;
931 down(&cache_chain_sem);
932 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
934 for (i = 0; i < smp_num_cpus; i++) {
935 cpucache_t* ccold = new.new[cpu_logical_map(i)];
936 if (!ccold || (ccold->avail == 0))
937 continue;
938 local_irq_disable();
939 free_block(cachep, cc_entry(ccold), ccold->avail);
940 local_irq_enable();
941 ccold->avail = 0;
943 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
944 up(&cache_chain_sem);
947 #else
948 #define drain_cpu_caches(cachep) do { } while (0)
949 #endif
951 static int __kmem_cache_shrink(kmem_cache_t *cachep)
953 slab_t *slabp;
954 int ret;
956 drain_cpu_caches(cachep);
958 spin_lock_irq(&cachep->spinlock);
960 /* If the cache is growing, stop shrinking. */
961 while (!cachep->growing) {
962 struct list_head *p;
964 p = cachep->slabs_free.prev;
965 if (p == &cachep->slabs_free)
966 break;
968 slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
969 #if DEBUG
970 if (slabp->inuse)
971 BUG();
972 #endif
973 list_del(&slabp->list);
975 spin_unlock_irq(&cachep->spinlock);
976 kmem_slab_destroy(cachep, slabp);
977 spin_lock_irq(&cachep->spinlock);
979 ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
980 spin_unlock_irq(&cachep->spinlock);
981 return ret;
985 * kmem_cache_shrink - Shrink a cache.
986 * @cachep: The cache to shrink.
988 * Releases as many slabs as possible for a cache.
989 * To help debugging, a zero exit status indicates all slabs were released.
991 int kmem_cache_shrink(kmem_cache_t *cachep)
993 if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
994 BUG();
996 return __kmem_cache_shrink(cachep);
1000 * kmem_cache_destroy - delete a cache
1001 * @cachep: the cache to destroy
1003 * Remove a kmem_cache_t object from the slab cache.
1004 * Returns 0 on success.
1006 * It is expected this function will be called by a module when it is
1007 * unloaded. This will remove the cache completely, and avoid a duplicate
1008 * cache being allocated each time a module is loaded and unloaded, if the
1009 * module doesn't have persistent in-kernel storage across loads and unloads.
1011 * The caller must guarantee that noone will allocate memory from the cache
1012 * during the kmem_cache_destroy().
1014 int kmem_cache_destroy (kmem_cache_t * cachep)
1016 if (!cachep || in_interrupt() || cachep->growing)
1017 BUG();
1019 /* Find the cache in the chain of caches. */
1020 down(&cache_chain_sem);
1021 /* the chain is never empty, cache_cache is never destroyed */
1022 if (clock_searchp == cachep)
1023 clock_searchp = list_entry(cachep->next.next,
1024 kmem_cache_t, next);
1025 list_del(&cachep->next);
1026 up(&cache_chain_sem);
1028 if (__kmem_cache_shrink(cachep)) {
1029 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
1030 cachep);
1031 down(&cache_chain_sem);
1032 list_add(&cachep->next,&cache_chain);
1033 up(&cache_chain_sem);
1034 return 1;
1036 #ifdef CONFIG_SMP
1038 int i;
1039 for (i = 0; i < NR_CPUS; i++)
1040 kfree(cachep->cpudata[i]);
1042 #endif
1043 kmem_cache_free(&cache_cache, cachep);
1045 return 0;
1048 /* Get the memory for a slab management obj. */
1049 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
1050 void *objp, int colour_off, int local_flags)
1052 slab_t *slabp;
1054 if (OFF_SLAB(cachep)) {
1055 /* Slab management obj is off-slab. */
1056 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1057 if (!slabp)
1058 return NULL;
1059 } else {
1060 /* FIXME: change to
1061 slabp = objp
1062 * if you enable OPTIMIZE
1064 slabp = objp+colour_off;
1065 colour_off += L1_CACHE_ALIGN(cachep->num *
1066 sizeof(kmem_bufctl_t) + sizeof(slab_t));
1068 slabp->inuse = 0;
1069 slabp->colouroff = colour_off;
1070 slabp->s_mem = objp+colour_off;
1072 return slabp;
1075 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1076 slab_t * slabp, unsigned long ctor_flags)
1078 int i;
1080 for (i = 0; i < cachep->num; i++) {
1081 void* objp = slabp->s_mem+cachep->objsize*i;
1082 #if DEBUG
1083 if (cachep->flags & SLAB_RED_ZONE) {
1084 *((unsigned long*)(objp)) = RED_MAGIC1;
1085 *((unsigned long*)(objp + cachep->objsize -
1086 BYTES_PER_WORD)) = RED_MAGIC1;
1087 objp += BYTES_PER_WORD;
1089 #endif
1092 * Constructors are not allowed to allocate memory from
1093 * the same cache which they are a constructor for.
1094 * Otherwise, deadlock. They must also be threaded.
1096 if (cachep->ctor)
1097 cachep->ctor(objp, cachep, ctor_flags);
1098 #if DEBUG
1099 if (cachep->flags & SLAB_RED_ZONE)
1100 objp -= BYTES_PER_WORD;
1101 if (cachep->flags & SLAB_POISON)
1102 /* need to poison the objs */
1103 kmem_poison_obj(cachep, objp);
1104 if (cachep->flags & SLAB_RED_ZONE) {
1105 if (*((unsigned long*)(objp)) != RED_MAGIC1)
1106 BUG();
1107 if (*((unsigned long*)(objp + cachep->objsize -
1108 BYTES_PER_WORD)) != RED_MAGIC1)
1109 BUG();
1111 #endif
1112 slab_bufctl(slabp)[i] = i+1;
1114 slab_bufctl(slabp)[i-1] = BUFCTL_END;
1115 slabp->free = 0;
1119 * Grow (by 1) the number of slabs within a cache. This is called by
1120 * kmem_cache_alloc() when there are no active objs left in a cache.
1122 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1124 slab_t *slabp;
1125 struct page *page;
1126 void *objp;
1127 size_t offset;
1128 unsigned int i, local_flags;
1129 unsigned long ctor_flags;
1130 unsigned long save_flags;
1132 /* Be lazy and only check for valid flags here,
1133 * keeping it out of the critical path in kmem_cache_alloc().
1135 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1136 BUG();
1137 if (flags & SLAB_NO_GROW)
1138 return 0;
1141 * The test for missing atomic flag is performed here, rather than
1142 * the more obvious place, simply to reduce the critical path length
1143 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1144 * will eventually be caught here (where it matters).
1146 if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1147 BUG();
1149 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1150 local_flags = (flags & SLAB_LEVEL_MASK);
1151 if (local_flags == SLAB_ATOMIC)
1153 * Not allowed to sleep. Need to tell a constructor about
1154 * this - it might need to know...
1156 ctor_flags |= SLAB_CTOR_ATOMIC;
1158 /* About to mess with non-constant members - lock. */
1159 spin_lock_irqsave(&cachep->spinlock, save_flags);
1161 /* Get colour for the slab, and cal the next value. */
1162 offset = cachep->colour_next;
1163 cachep->colour_next++;
1164 if (cachep->colour_next >= cachep->colour)
1165 cachep->colour_next = 0;
1166 offset *= cachep->colour_off;
1167 cachep->dflags |= DFLGS_GROWN;
1169 cachep->growing++;
1170 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1172 /* A series of memory allocations for a new slab.
1173 * Neither the cache-chain semaphore, or cache-lock, are
1174 * held, but the incrementing c_growing prevents this
1175 * cache from being reaped or shrunk.
1176 * Note: The cache could be selected in for reaping in
1177 * kmem_cache_reap(), but when the final test is made the
1178 * growing value will be seen.
1181 /* Get mem for the objs. */
1182 if (!(objp = kmem_getpages(cachep, flags)))
1183 goto failed;
1185 /* Get slab management. */
1186 if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1187 goto opps1;
1189 /* Nasty!!!!!! I hope this is OK. */
1190 i = 1 << cachep->gfporder;
1191 page = virt_to_page(objp);
1192 do {
1193 SET_PAGE_CACHE(page, cachep);
1194 SET_PAGE_SLAB(page, slabp);
1195 PageSetSlab(page);
1196 page++;
1197 } while (--i);
1199 kmem_cache_init_objs(cachep, slabp, ctor_flags);
1201 spin_lock_irqsave(&cachep->spinlock, save_flags);
1202 cachep->growing--;
1204 /* Make slab active. */
1205 list_add_tail(&slabp->list, &cachep->slabs_free);
1206 STATS_INC_GROWN(cachep);
1207 cachep->failures = 0;
1209 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1210 return 1;
1211 opps1:
1212 kmem_freepages(cachep, objp);
1213 failed:
1214 spin_lock_irqsave(&cachep->spinlock, save_flags);
1215 cachep->growing--;
1216 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1217 return 0;
1221 * Perform extra freeing checks:
1222 * - detect double free
1223 * - detect bad pointers.
1224 * Called with the cache-lock held.
1227 #if DEBUG
1228 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1229 slab_t *slabp, void * objp)
1231 int i;
1232 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1234 if (objnr >= cachep->num)
1235 BUG();
1236 if (objp != slabp->s_mem + objnr*cachep->objsize)
1237 BUG();
1239 /* Check slab's freelist to see if this obj is there. */
1240 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1241 if (i == objnr)
1242 BUG();
1244 return 0;
1246 #endif
1248 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1250 if (flags & SLAB_DMA) {
1251 if (!(cachep->gfpflags & GFP_DMA))
1252 BUG();
1253 } else {
1254 if (cachep->gfpflags & GFP_DMA)
1255 BUG();
1259 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1260 slab_t *slabp)
1262 void *objp;
1264 STATS_INC_ALLOCED(cachep);
1265 STATS_INC_ACTIVE(cachep);
1266 STATS_SET_HIGH(cachep);
1268 /* get obj pointer */
1269 slabp->inuse++;
1270 objp = slabp->s_mem + slabp->free*cachep->objsize;
1271 slabp->free=slab_bufctl(slabp)[slabp->free];
1273 if (unlikely(slabp->free == BUFCTL_END)) {
1274 list_del(&slabp->list);
1275 list_add(&slabp->list, &cachep->slabs_full);
1277 #if DEBUG
1278 if (cachep->flags & SLAB_POISON)
1279 if (kmem_check_poison_obj(cachep, objp))
1280 BUG();
1281 if (cachep->flags & SLAB_RED_ZONE) {
1282 /* Set alloc red-zone, and check old one. */
1283 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1284 RED_MAGIC1)
1285 BUG();
1286 if (xchg((unsigned long *)(objp+cachep->objsize -
1287 BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1288 BUG();
1289 objp += BYTES_PER_WORD;
1291 #endif
1292 return objp;
1296 * Returns a ptr to an obj in the given cache.
1297 * caller must guarantee synchronization
1298 * #define for the goto optimization 8-)
1300 #define kmem_cache_alloc_one(cachep) \
1301 ({ \
1302 struct list_head * slabs_partial, * entry; \
1303 slab_t *slabp; \
1305 slabs_partial = &(cachep)->slabs_partial; \
1306 entry = slabs_partial->next; \
1307 if (unlikely(entry == slabs_partial)) { \
1308 struct list_head * slabs_free; \
1309 slabs_free = &(cachep)->slabs_free; \
1310 entry = slabs_free->next; \
1311 if (unlikely(entry == slabs_free)) \
1312 goto alloc_new_slab; \
1313 list_del(entry); \
1314 list_add(entry, slabs_partial); \
1317 slabp = list_entry(entry, slab_t, list); \
1318 kmem_cache_alloc_one_tail(cachep, slabp); \
1321 #ifdef CONFIG_SMP
1322 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1324 int batchcount = cachep->batchcount;
1325 cpucache_t* cc = cc_data(cachep);
1327 spin_lock(&cachep->spinlock);
1328 while (batchcount--) {
1329 struct list_head * slabs_partial, * entry;
1330 slab_t *slabp;
1331 /* Get slab alloc is to come from. */
1332 slabs_partial = &(cachep)->slabs_partial;
1333 entry = slabs_partial->next;
1334 if (unlikely(entry == slabs_partial)) {
1335 struct list_head * slabs_free;
1336 slabs_free = &(cachep)->slabs_free;
1337 entry = slabs_free->next;
1338 if (unlikely(entry == slabs_free))
1339 break;
1340 list_del(entry);
1341 list_add(entry, slabs_partial);
1344 slabp = list_entry(entry, slab_t, list);
1345 cc_entry(cc)[cc->avail++] =
1346 kmem_cache_alloc_one_tail(cachep, slabp);
1348 spin_unlock(&cachep->spinlock);
1350 if (cc->avail)
1351 return cc_entry(cc)[--cc->avail];
1352 return NULL;
1354 #endif
1356 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1358 unsigned long save_flags;
1359 void* objp;
1361 kmem_cache_alloc_head(cachep, flags);
1362 try_again:
1363 local_irq_save(save_flags);
1364 #ifdef CONFIG_SMP
1366 cpucache_t *cc = cc_data(cachep);
1368 if (cc) {
1369 if (cc->avail) {
1370 STATS_INC_ALLOCHIT(cachep);
1371 objp = cc_entry(cc)[--cc->avail];
1372 } else {
1373 STATS_INC_ALLOCMISS(cachep);
1374 objp = kmem_cache_alloc_batch(cachep,flags);
1375 if (!objp)
1376 goto alloc_new_slab_nolock;
1378 } else {
1379 spin_lock(&cachep->spinlock);
1380 objp = kmem_cache_alloc_one(cachep);
1381 spin_unlock(&cachep->spinlock);
1384 #else
1385 objp = kmem_cache_alloc_one(cachep);
1386 #endif
1387 local_irq_restore(save_flags);
1388 return objp;
1389 alloc_new_slab:
1390 #ifdef CONFIG_SMP
1391 spin_unlock(&cachep->spinlock);
1392 alloc_new_slab_nolock:
1393 #endif
1394 local_irq_restore(save_flags);
1395 if (kmem_cache_grow(cachep, flags))
1396 /* Someone may have stolen our objs. Doesn't matter, we'll
1397 * just come back here again.
1399 goto try_again;
1400 return NULL;
1404 * Release an obj back to its cache. If the obj has a constructed
1405 * state, it should be in this state _before_ it is released.
1406 * - caller is responsible for the synchronization
1409 #if DEBUG
1410 # define CHECK_NR(pg) \
1411 do { \
1412 if (!VALID_PAGE(pg)) { \
1413 printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1414 (unsigned long)objp); \
1415 BUG(); \
1417 } while (0)
1418 # define CHECK_PAGE(page) \
1419 do { \
1420 CHECK_NR(page); \
1421 if (!PageSlab(page)) { \
1422 printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1423 (unsigned long)objp); \
1424 BUG(); \
1426 } while (0)
1428 #else
1429 # define CHECK_PAGE(pg) do { } while (0)
1430 #endif
1432 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1434 slab_t* slabp;
1436 CHECK_PAGE(virt_to_page(objp));
1437 /* reduces memory footprint
1439 if (OPTIMIZE(cachep))
1440 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1441 else
1443 slabp = GET_PAGE_SLAB(virt_to_page(objp));
1445 #if DEBUG
1446 if (cachep->flags & SLAB_DEBUG_INITIAL)
1447 /* Need to call the slab's constructor so the
1448 * caller can perform a verify of its state (debugging).
1449 * Called without the cache-lock held.
1451 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1453 if (cachep->flags & SLAB_RED_ZONE) {
1454 objp -= BYTES_PER_WORD;
1455 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1456 /* Either write before start, or a double free. */
1457 BUG();
1458 if (xchg((unsigned long *)(objp+cachep->objsize -
1459 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1460 /* Either write past end, or a double free. */
1461 BUG();
1463 if (cachep->flags & SLAB_POISON)
1464 kmem_poison_obj(cachep, objp);
1465 if (kmem_extra_free_checks(cachep, slabp, objp))
1466 return;
1467 #endif
1469 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1471 slab_bufctl(slabp)[objnr] = slabp->free;
1472 slabp->free = objnr;
1474 STATS_DEC_ACTIVE(cachep);
1476 /* fixup slab chains */
1478 int inuse = slabp->inuse;
1479 if (unlikely(!--slabp->inuse)) {
1480 /* Was partial or full, now empty. */
1481 list_del(&slabp->list);
1482 list_add(&slabp->list, &cachep->slabs_free);
1483 } else if (unlikely(inuse == cachep->num)) {
1484 /* Was full. */
1485 list_del(&slabp->list);
1486 list_add(&slabp->list, &cachep->slabs_partial);
1491 #ifdef CONFIG_SMP
1492 static inline void __free_block (kmem_cache_t* cachep,
1493 void** objpp, int len)
1495 for ( ; len > 0; len--, objpp++)
1496 kmem_cache_free_one(cachep, *objpp);
1499 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1501 spin_lock(&cachep->spinlock);
1502 __free_block(cachep, objpp, len);
1503 spin_unlock(&cachep->spinlock);
1505 #endif
1508 * __kmem_cache_free
1509 * called with disabled ints
1511 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1513 #ifdef CONFIG_SMP
1514 cpucache_t *cc = cc_data(cachep);
1516 CHECK_PAGE(virt_to_page(objp));
1517 if (cc) {
1518 int batchcount;
1519 if (cc->avail < cc->limit) {
1520 STATS_INC_FREEHIT(cachep);
1521 cc_entry(cc)[cc->avail++] = objp;
1522 return;
1524 STATS_INC_FREEMISS(cachep);
1525 batchcount = cachep->batchcount;
1526 cc->avail -= batchcount;
1527 free_block(cachep,
1528 &cc_entry(cc)[cc->avail],batchcount);
1529 cc_entry(cc)[cc->avail++] = objp;
1530 return;
1531 } else {
1532 free_block(cachep, &objp, 1);
1534 #else
1535 kmem_cache_free_one(cachep, objp);
1536 #endif
1540 * kmem_cache_alloc - Allocate an object
1541 * @cachep: The cache to allocate from.
1542 * @flags: See kmalloc().
1544 * Allocate an object from this cache. The flags are only relevant
1545 * if the cache has no available objects.
1547 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1549 return __kmem_cache_alloc(cachep, flags);
1553 * kmalloc - allocate memory
1554 * @size: how many bytes of memory are required.
1555 * @flags: the type of memory to allocate.
1557 * kmalloc is the normal method of allocating memory
1558 * in the kernel.
1560 * The @flags argument may be one of:
1562 * %GFP_USER - Allocate memory on behalf of user. May sleep.
1564 * %GFP_KERNEL - Allocate normal kernel ram. May sleep.
1566 * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers.
1568 * Additionally, the %GFP_DMA flag may be set to indicate the memory
1569 * must be suitable for DMA. This can mean different things on different
1570 * platforms. For example, on i386, it means that the memory must come
1571 * from the first 16MB.
1573 void * kmalloc (size_t size, int flags)
1575 cache_sizes_t *csizep = cache_sizes;
1577 for (; csizep->cs_size; csizep++) {
1578 if (size > csizep->cs_size)
1579 continue;
1580 return __kmem_cache_alloc(flags & GFP_DMA ?
1581 csizep->cs_dmacachep : csizep->cs_cachep, flags);
1583 return NULL;
1587 * kmem_cache_free - Deallocate an object
1588 * @cachep: The cache the allocation was from.
1589 * @objp: The previously allocated object.
1591 * Free an object which was previously allocated from this
1592 * cache.
1594 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1596 unsigned long flags;
1597 #if DEBUG
1598 CHECK_PAGE(virt_to_page(objp));
1599 if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1600 BUG();
1601 #endif
1603 local_irq_save(flags);
1604 __kmem_cache_free(cachep, objp);
1605 local_irq_restore(flags);
1609 * kfree - free previously allocated memory
1610 * @objp: pointer returned by kmalloc.
1612 * Don't free memory not originally allocated by kmalloc()
1613 * or you will run into trouble.
1615 void kfree (const void *objp)
1617 kmem_cache_t *c;
1618 unsigned long flags;
1620 if (!objp)
1621 return;
1622 local_irq_save(flags);
1623 CHECK_PAGE(virt_to_page(objp));
1624 c = GET_PAGE_CACHE(virt_to_page(objp));
1625 __kmem_cache_free(c, (void*)objp);
1626 local_irq_restore(flags);
1629 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1631 cache_sizes_t *csizep = cache_sizes;
1633 /* This function could be moved to the header file, and
1634 * made inline so consumers can quickly determine what
1635 * cache pointer they require.
1637 for ( ; csizep->cs_size; csizep++) {
1638 if (size > csizep->cs_size)
1639 continue;
1640 break;
1642 return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1645 #ifdef CONFIG_SMP
1647 /* called with cache_chain_sem acquired. */
1648 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1650 ccupdate_struct_t new;
1651 int i;
1654 * These are admin-provided, so we are more graceful.
1656 if (limit < 0)
1657 return -EINVAL;
1658 if (batchcount < 0)
1659 return -EINVAL;
1660 if (batchcount > limit)
1661 return -EINVAL;
1662 if (limit != 0 && !batchcount)
1663 return -EINVAL;
1665 memset(&new.new,0,sizeof(new.new));
1666 if (limit) {
1667 for (i = 0; i< smp_num_cpus; i++) {
1668 cpucache_t* ccnew;
1670 ccnew = kmalloc(sizeof(void*)*limit+
1671 sizeof(cpucache_t), GFP_KERNEL);
1672 if (!ccnew)
1673 goto oom;
1674 ccnew->limit = limit;
1675 ccnew->avail = 0;
1676 new.new[cpu_logical_map(i)] = ccnew;
1679 new.cachep = cachep;
1680 spin_lock_irq(&cachep->spinlock);
1681 cachep->batchcount = batchcount;
1682 spin_unlock_irq(&cachep->spinlock);
1684 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1686 for (i = 0; i < smp_num_cpus; i++) {
1687 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1688 if (!ccold)
1689 continue;
1690 local_irq_disable();
1691 free_block(cachep, cc_entry(ccold), ccold->avail);
1692 local_irq_enable();
1693 kfree(ccold);
1695 return 0;
1696 oom:
1697 for (i--; i >= 0; i--)
1698 kfree(new.new[cpu_logical_map(i)]);
1699 return -ENOMEM;
1702 static void enable_cpucache (kmem_cache_t *cachep)
1704 int err;
1705 int limit;
1707 /* FIXME: optimize */
1708 if (cachep->objsize > PAGE_SIZE)
1709 return;
1710 if (cachep->objsize > 1024)
1711 limit = 60;
1712 else if (cachep->objsize > 256)
1713 limit = 124;
1714 else
1715 limit = 252;
1717 err = kmem_tune_cpucache(cachep, limit, limit/2);
1718 if (err)
1719 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1720 cachep->name, -err);
1723 static void enable_all_cpucaches (void)
1725 struct list_head* p;
1727 down(&cache_chain_sem);
1729 p = &cache_cache.next;
1730 do {
1731 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1733 enable_cpucache(cachep);
1734 p = cachep->next.next;
1735 } while (p != &cache_cache.next);
1737 up(&cache_chain_sem);
1739 #endif
1742 * kmem_cache_reap - Reclaim memory from caches.
1743 * @gfp_mask: the type of memory required.
1745 * Called from do_try_to_free_pages() and __alloc_pages()
1747 int kmem_cache_reap (int gfp_mask)
1749 slab_t *slabp;
1750 kmem_cache_t *searchp;
1751 kmem_cache_t *best_cachep;
1752 unsigned int best_pages;
1753 unsigned int best_len;
1754 unsigned int scan;
1755 int ret = 0;
1757 if (gfp_mask & __GFP_WAIT)
1758 down(&cache_chain_sem);
1759 else
1760 if (down_trylock(&cache_chain_sem))
1761 return 0;
1763 scan = REAP_SCANLEN;
1764 best_len = 0;
1765 best_pages = 0;
1766 best_cachep = NULL;
1767 searchp = clock_searchp;
1768 do {
1769 unsigned int pages;
1770 struct list_head* p;
1771 unsigned int full_free;
1773 /* It's safe to test this without holding the cache-lock. */
1774 if (searchp->flags & SLAB_NO_REAP)
1775 goto next;
1776 spin_lock_irq(&searchp->spinlock);
1777 if (searchp->growing)
1778 goto next_unlock;
1779 if (searchp->dflags & DFLGS_GROWN) {
1780 searchp->dflags &= ~DFLGS_GROWN;
1781 goto next_unlock;
1783 #ifdef CONFIG_SMP
1785 cpucache_t *cc = cc_data(searchp);
1786 if (cc && cc->avail) {
1787 __free_block(searchp, cc_entry(cc), cc->avail);
1788 cc->avail = 0;
1791 #endif
1793 full_free = 0;
1794 p = searchp->slabs_free.next;
1795 while (p != &searchp->slabs_free) {
1796 slabp = list_entry(p, slab_t, list);
1797 #if DEBUG
1798 if (slabp->inuse)
1799 BUG();
1800 #endif
1801 full_free++;
1802 p = p->next;
1806 * Try to avoid slabs with constructors and/or
1807 * more than one page per slab (as it can be difficult
1808 * to get high orders from gfp()).
1810 pages = full_free * (1<<searchp->gfporder);
1811 if (searchp->ctor)
1812 pages = (pages*4+1)/5;
1813 if (searchp->gfporder)
1814 pages = (pages*4+1)/5;
1815 if (pages > best_pages) {
1816 best_cachep = searchp;
1817 best_len = full_free;
1818 best_pages = pages;
1819 if (pages >= REAP_PERFECT) {
1820 clock_searchp = list_entry(searchp->next.next,
1821 kmem_cache_t,next);
1822 goto perfect;
1825 next_unlock:
1826 spin_unlock_irq(&searchp->spinlock);
1827 next:
1828 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1829 } while (--scan && searchp != clock_searchp);
1831 clock_searchp = searchp;
1833 if (!best_cachep)
1834 /* couldn't find anything to reap */
1835 goto out;
1837 spin_lock_irq(&best_cachep->spinlock);
1838 perfect:
1839 /* free only 50% of the free slabs */
1840 best_len = (best_len + 1)/2;
1841 for (scan = 0; scan < best_len; scan++) {
1842 struct list_head *p;
1844 if (best_cachep->growing)
1845 break;
1846 p = best_cachep->slabs_free.prev;
1847 if (p == &best_cachep->slabs_free)
1848 break;
1849 slabp = list_entry(p,slab_t,list);
1850 #if DEBUG
1851 if (slabp->inuse)
1852 BUG();
1853 #endif
1854 list_del(&slabp->list);
1855 STATS_INC_REAPED(best_cachep);
1857 /* Safe to drop the lock. The slab is no longer linked to the
1858 * cache.
1860 spin_unlock_irq(&best_cachep->spinlock);
1861 kmem_slab_destroy(best_cachep, slabp);
1862 spin_lock_irq(&best_cachep->spinlock);
1864 spin_unlock_irq(&best_cachep->spinlock);
1865 ret = scan * (1 << best_cachep->gfporder);
1866 out:
1867 up(&cache_chain_sem);
1868 return ret;
1871 #ifdef CONFIG_PROC_FS
1872 /* /proc/slabinfo
1873 * cache-name num-active-objs total-objs
1874 * obj-size num-active-slabs total-slabs
1875 * num-pages-per-slab
1877 #define FIXUP(t) \
1878 do { \
1879 if (len <= off) { \
1880 off -= len; \
1881 len = 0; \
1882 } else { \
1883 if (len-off > count) \
1884 goto t; \
1886 } while (0)
1888 static int proc_getdata (char*page, char**start, off_t off, int count)
1890 struct list_head *p;
1891 int len = 0;
1893 /* Output format version, so at least we can change it without _too_
1894 * many complaints.
1896 len += sprintf(page+len, "slabinfo - version: 1.1"
1897 #if STATS
1898 " (statistics)"
1899 #endif
1900 #ifdef CONFIG_SMP
1901 " (SMP)"
1902 #endif
1903 "\n");
1904 FIXUP(got_data);
1906 down(&cache_chain_sem);
1907 p = &cache_cache.next;
1908 do {
1909 kmem_cache_t *cachep;
1910 struct list_head *q;
1911 slab_t *slabp;
1912 unsigned long active_objs;
1913 unsigned long num_objs;
1914 unsigned long active_slabs = 0;
1915 unsigned long num_slabs;
1916 const char *name;
1917 cachep = list_entry(p, kmem_cache_t, next);
1919 spin_lock_irq(&cachep->spinlock);
1920 active_objs = 0;
1921 num_slabs = 0;
1922 list_for_each(q,&cachep->slabs_full) {
1923 slabp = list_entry(q, slab_t, list);
1924 if (slabp->inuse != cachep->num)
1925 BUG();
1926 active_objs += cachep->num;
1927 active_slabs++;
1929 list_for_each(q,&cachep->slabs_partial) {
1930 slabp = list_entry(q, slab_t, list);
1931 if (slabp->inuse == cachep->num || !slabp->inuse)
1932 BUG();
1933 active_objs += slabp->inuse;
1934 active_slabs++;
1936 list_for_each(q,&cachep->slabs_free) {
1937 slabp = list_entry(q, slab_t, list);
1938 if (slabp->inuse)
1939 BUG();
1940 num_slabs++;
1942 num_slabs+=active_slabs;
1943 num_objs = num_slabs*cachep->num;
1945 name = cachep->name;
1947 char tmp;
1948 if (__get_user(tmp, name))
1949 name = "broken";
1952 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1953 name, active_objs, num_objs, cachep->objsize,
1954 active_slabs, num_slabs, (1<<cachep->gfporder));
1956 #if STATS
1958 unsigned long errors = cachep->errors;
1959 unsigned long high = cachep->high_mark;
1960 unsigned long grown = cachep->grown;
1961 unsigned long reaped = cachep->reaped;
1962 unsigned long allocs = cachep->num_allocations;
1964 len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1965 high, allocs, grown, reaped, errors);
1967 #endif
1968 #ifdef CONFIG_SMP
1970 unsigned int batchcount = cachep->batchcount;
1971 unsigned int limit;
1973 if (cc_data(cachep))
1974 limit = cc_data(cachep)->limit;
1975 else
1976 limit = 0;
1977 len += sprintf(page+len, " : %4u %4u",
1978 limit, batchcount);
1980 #endif
1981 #if STATS && defined(CONFIG_SMP)
1983 unsigned long allochit = atomic_read(&cachep->allochit);
1984 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1985 unsigned long freehit = atomic_read(&cachep->freehit);
1986 unsigned long freemiss = atomic_read(&cachep->freemiss);
1987 len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1988 allochit, allocmiss, freehit, freemiss);
1990 #endif
1991 len += sprintf(page+len,"\n");
1992 spin_unlock_irq(&cachep->spinlock);
1993 FIXUP(got_data_up);
1994 p = cachep->next.next;
1995 } while (p != &cache_cache.next);
1996 got_data_up:
1997 up(&cache_chain_sem);
1999 got_data:
2000 *start = page+off;
2001 return len;
2005 * slabinfo_read_proc - generates /proc/slabinfo
2006 * @page: scratch area, one page long
2007 * @start: pointer to the pointer to the output buffer
2008 * @off: offset within /proc/slabinfo the caller is interested in
2009 * @count: requested len in bytes
2010 * @eof: eof marker
2011 * @data: unused
2013 * The contents of the buffer are
2014 * cache-name
2015 * num-active-objs
2016 * total-objs
2017 * object size
2018 * num-active-slabs
2019 * total-slabs
2020 * num-pages-per-slab
2021 * + further values on SMP and with statistics enabled
2023 int slabinfo_read_proc (char *page, char **start, off_t off,
2024 int count, int *eof, void *data)
2026 int len = proc_getdata(page, start, off, count);
2027 len -= (*start-page);
2028 if (len <= count)
2029 *eof = 1;
2030 if (len>count) len = count;
2031 if (len<0) len = 0;
2032 return len;
2035 #define MAX_SLABINFO_WRITE 128
2037 * slabinfo_write_proc - SMP tuning for the slab allocator
2038 * @file: unused
2039 * @buffer: user buffer
2040 * @count: data len
2041 * @data: unused
2043 int slabinfo_write_proc (struct file *file, const char *buffer,
2044 unsigned long count, void *data)
2046 #ifdef CONFIG_SMP
2047 char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
2048 int limit, batchcount, res;
2049 struct list_head *p;
2051 if (count > MAX_SLABINFO_WRITE)
2052 return -EINVAL;
2053 if (copy_from_user(&kbuf, buffer, count))
2054 return -EFAULT;
2055 kbuf[MAX_SLABINFO_WRITE] = '\0';
2057 tmp = strchr(kbuf, ' ');
2058 if (!tmp)
2059 return -EINVAL;
2060 *tmp = '\0';
2061 tmp++;
2062 limit = simple_strtol(tmp, &tmp, 10);
2063 while (*tmp == ' ')
2064 tmp++;
2065 batchcount = simple_strtol(tmp, &tmp, 10);
2067 /* Find the cache in the chain of caches. */
2068 down(&cache_chain_sem);
2069 res = -EINVAL;
2070 list_for_each(p,&cache_chain) {
2071 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2073 if (!strcmp(cachep->name, kbuf)) {
2074 res = kmem_tune_cpucache(cachep, limit, batchcount);
2075 break;
2078 up(&cache_chain_sem);
2079 if (res >= 0)
2080 res = count;
2081 return res;
2082 #else
2083 return -EINVAL;
2084 #endif
2086 #endif