Kill junk from recent merge.
[linux-2.6/linux-mips.git] / mm / slab.c
blob09fbaa7bf6b8a341d945cca510f544851873c957
1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
11 * An implementation of the Slab Allocator as described in outline in;
12 * UNIX Internals: The New Frontiers by Uresh Vahalia
13 * Pub: Prentice Hall ISBN 0-13-101908-2
14 * or with a little more detail in;
15 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
16 * Jeff Bonwick (Sun Microsystems).
17 * Presented at: USENIX Summer 1994 Technical Conference
20 * The memory is organized in caches, one cache for each object type.
21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22 * Each cache consists out of many slabs (they are small (usually one
23 * page long) and always contiguous), and each slab contains multiple
24 * initialized objects.
26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
27 * normal). If you need a special memory type, then must create a new
28 * cache for that memory type.
30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
31 * full slabs with 0 free objects
32 * partial slabs
33 * empty slabs with no allocated objects
35 * If partial slabs exist, then new allocations come from these slabs,
36 * otherwise from empty slabs or new slabs are allocated.
38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
41 * On SMP systems, each cache has a short per-cpu head array, most allocs
42 * and frees go into that array, and if that array overflows, then 1/2
43 * of the entries in the array are given back into the global cache.
44 * This reduces the number of spinlock operations.
46 * The c_cpuarray can be changed with a smp_call_function call,
47 * it may not be read with enabled local interrupts.
49 * SMP synchronization:
50 * constructors and destructors are called without any locking.
51 * Several members in kmem_cache_t and slab_t never change, they
52 * are accessed without any locking.
53 * The per-cpu arrays are never accessed from the wrong cpu, no locking.
54 * smp_call_function() is used if one cpu must flush the arrays from
55 * other cpus.
56 * The non-constant members are protected with a per-cache irq spinlock.
58 * Further notes from the original documentation:
60 * 11 April '97. Started multi-threading - markhe
61 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
62 * The sem is only needed when accessing/extending the cache-chain, which
63 * can never happen inside an interrupt (kmem_cache_create(),
64 * kmem_cache_shrink() and kmem_cache_reap()).
66 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
67 * maybe be sleeping and therefore not holding the semaphore/lock), the
68 * growing field is used. This also prevents reaping from a cache.
70 * At present, each engine can be growing a cache. This should be blocked.
74 #include <linux/config.h>
75 #include <linux/slab.h>
76 #include <linux/interrupt.h>
77 #include <linux/init.h>
78 #include <asm/uaccess.h>
81 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
82 * SLAB_RED_ZONE & SLAB_POISON.
83 * 0 for faster, smaller code (especially in the critical paths).
85 * STATS - 1 to collect stats for /proc/slabinfo.
86 * 0 for faster, smaller code (especially in the critical paths).
88 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
91 #define DEBUG 0
92 #define STATS 0
93 #define FORCED_DEBUG 0
96 * Parameters for kmem_cache_reap
98 #define REAP_SCANLEN 10
99 #define REAP_PERFECT 10
101 /* Shouldn't this be in a header file somewhere? */
102 #define BYTES_PER_WORD sizeof(void *)
104 /* Legal flag mask for kmem_cache_create(). */
105 #if DEBUG
106 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
107 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
108 SLAB_NO_REAP | SLAB_CACHE_DMA)
109 #else
110 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
111 #endif
114 * kmem_bufctl_t:
116 * Bufctl's are used for linking objs within a slab
117 * linked offsets.
119 * This implementaion relies on "struct page" for locating the cache &
120 * slab an object belongs to.
121 * This allows the bufctl structure to be small (one int), but limits
122 * the number of objects a slab (not a cache) can contain when off-slab
123 * bufctls are used. The limit is the size of the largest general cache
124 * that does not use off-slab slabs.
125 * For 32bit archs with 4 kB pages, is this 56.
126 * This is not serious, as it is only for large objects, when it is unwise
127 * to have too many per slab.
128 * Note: This limit can be raised by introducing a general cache whose size
129 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
132 #define BUFCTL_END 0xffffFFFF
133 #define SLAB_LIMIT 0xffffFFFE
134 typedef unsigned int kmem_bufctl_t;
136 /* Max number of objs-per-slab for caches which use off-slab slabs.
137 * Needed to avoid a possible looping condition in kmem_cache_grow().
139 static unsigned long offslab_limit;
142 * slab_t
144 * Manages the objs in a slab. Placed either at the beginning of mem allocated
145 * for a slab, or allocated from an general cache.
146 * Slabs are chained into one ordered list: fully used, partial, then fully
147 * free slabs.
149 typedef struct slab_s {
150 struct list_head list;
151 unsigned long colouroff;
152 void *s_mem; /* including colour offset */
153 unsigned int inuse; /* num of objs active in slab */
154 kmem_bufctl_t free;
155 } slab_t;
157 #define slab_bufctl(slabp) \
158 ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
161 * cpucache_t
163 * Per cpu structures
164 * The limit is stored in the per-cpu structure to reduce the data cache
165 * footprint.
167 typedef struct cpucache_s {
168 unsigned int avail;
169 unsigned int limit;
170 } cpucache_t;
172 #define cc_entry(cpucache) \
173 ((void **)(((cpucache_t*)cpucache)+1))
174 #define cc_data(cachep) \
175 ((cachep)->cpudata[smp_processor_id()])
177 * kmem_cache_t
179 * manages a cache.
182 #define CACHE_NAMELEN 20 /* max name length for a slab cache */
184 struct kmem_cache_s {
185 /* 1) each alloc & free */
186 /* full, partial first, then free */
187 struct list_head slabs;
188 struct list_head *firstnotfull;
189 unsigned int objsize;
190 unsigned int flags; /* constant flags */
191 unsigned int num; /* # of objs per slab */
192 spinlock_t spinlock;
193 #ifdef CONFIG_SMP
194 unsigned int batchcount;
195 #endif
197 /* 2) slab additions /removals */
198 /* order of pgs per slab (2^n) */
199 unsigned int gfporder;
201 /* force GFP flags, e.g. GFP_DMA */
202 unsigned int gfpflags;
204 size_t colour; /* cache colouring range */
205 unsigned int colour_off; /* colour offset */
206 unsigned int colour_next; /* cache colouring */
207 kmem_cache_t *slabp_cache;
208 unsigned int growing;
209 unsigned int dflags; /* dynamic flags */
211 /* constructor func */
212 void (*ctor)(void *, kmem_cache_t *, unsigned long);
214 /* de-constructor func */
215 void (*dtor)(void *, kmem_cache_t *, unsigned long);
217 unsigned long failures;
219 /* 3) cache creation/removal */
220 char name[CACHE_NAMELEN];
221 struct list_head next;
222 #ifdef CONFIG_SMP
223 /* 4) per-cpu data */
224 cpucache_t *cpudata[NR_CPUS];
225 #endif
226 #if STATS
227 unsigned long num_active;
228 unsigned long num_allocations;
229 unsigned long high_mark;
230 unsigned long grown;
231 unsigned long reaped;
232 unsigned long errors;
233 #ifdef CONFIG_SMP
234 atomic_t allochit;
235 atomic_t allocmiss;
236 atomic_t freehit;
237 atomic_t freemiss;
238 #endif
239 #endif
242 /* internal c_flags */
243 #define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
244 #define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
246 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
247 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
249 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
250 #define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
251 #define GROWN(x) ((x)->dlags & DFLGS_GROWN)
253 #if STATS
254 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
255 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
256 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
257 #define STATS_INC_GROWN(x) ((x)->grown++)
258 #define STATS_INC_REAPED(x) ((x)->reaped++)
259 #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
260 (x)->high_mark = (x)->num_active; \
261 } while (0)
262 #define STATS_INC_ERR(x) ((x)->errors++)
263 #else
264 #define STATS_INC_ACTIVE(x) do { } while (0)
265 #define STATS_DEC_ACTIVE(x) do { } while (0)
266 #define STATS_INC_ALLOCED(x) do { } while (0)
267 #define STATS_INC_GROWN(x) do { } while (0)
268 #define STATS_INC_REAPED(x) do { } while (0)
269 #define STATS_SET_HIGH(x) do { } while (0)
270 #define STATS_INC_ERR(x) do { } while (0)
271 #endif
273 #if STATS && defined(CONFIG_SMP)
274 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
275 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
276 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
277 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
278 #else
279 #define STATS_INC_ALLOCHIT(x) do { } while (0)
280 #define STATS_INC_ALLOCMISS(x) do { } while (0)
281 #define STATS_INC_FREEHIT(x) do { } while (0)
282 #define STATS_INC_FREEMISS(x) do { } while (0)
283 #endif
285 #if DEBUG
286 /* Magic nums for obj red zoning.
287 * Placed in the first word before and the first word after an obj.
289 #define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
290 #define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
292 /* ...and for poisoning */
293 #define POISON_BYTE 0x5a /* byte value for poisoning */
294 #define POISON_END 0xa5 /* end-byte of poisoning */
296 #endif
298 /* maximum size of an obj (in 2^order pages) */
299 #define MAX_OBJ_ORDER 5 /* 32 pages */
302 * Do not go above this order unless 0 objects fit into the slab.
304 #define BREAK_GFP_ORDER_HI 2
305 #define BREAK_GFP_ORDER_LO 1
306 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
309 * Absolute limit for the gfp order
311 #define MAX_GFP_ORDER 5 /* 32 pages */
314 /* Macros for storing/retrieving the cachep and or slab from the
315 * global 'mem_map'. These are used to find the slab an obj belongs to.
316 * With kfree(), these are used to find the cache which an obj belongs to.
318 #define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
319 #define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
320 #define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
321 #define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
323 /* Size description struct for general caches. */
324 typedef struct cache_sizes {
325 size_t cs_size;
326 kmem_cache_t *cs_cachep;
327 kmem_cache_t *cs_dmacachep;
328 } cache_sizes_t;
330 static cache_sizes_t cache_sizes[] = {
331 #if PAGE_SIZE == 4096
332 { 32, NULL, NULL},
333 #endif
334 { 64, NULL, NULL},
335 { 128, NULL, NULL},
336 { 256, NULL, NULL},
337 { 512, NULL, NULL},
338 { 1024, NULL, NULL},
339 { 2048, NULL, NULL},
340 { 4096, NULL, NULL},
341 { 8192, NULL, NULL},
342 { 16384, NULL, NULL},
343 { 32768, NULL, NULL},
344 { 65536, NULL, NULL},
345 {131072, NULL, NULL},
346 { 0, NULL, NULL}
349 /* internal cache of cache description objs */
350 static kmem_cache_t cache_cache = {
351 slabs: LIST_HEAD_INIT(cache_cache.slabs),
352 firstnotfull: &cache_cache.slabs,
353 objsize: sizeof(kmem_cache_t),
354 flags: SLAB_NO_REAP,
355 spinlock: SPIN_LOCK_UNLOCKED,
356 colour_off: L1_CACHE_BYTES,
357 name: "kmem_cache",
358 next: LIST_HEAD_INIT(cache_cache.next)
361 /* Guard access to the cache-chain. */
362 static struct semaphore cache_chain_sem;
364 /* Place maintainer for reaping. */
365 static kmem_cache_t *clock_searchp = &cache_cache;
367 #define cache_chain (cache_cache.next)
369 #ifdef CONFIG_SMP
371 * chicken and egg problem: delay the per-cpu array allocation
372 * until the general caches are up.
374 static int g_cpucache_up;
376 static void drain_cache (void *__cachep);
377 static void enable_cpucache (kmem_cache_t *cachep);
378 static void enable_all_cpucaches (void);
379 #endif
381 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
382 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
383 int flags, size_t *left_over, unsigned int *num)
385 int i;
386 size_t wastage = PAGE_SIZE<<gfporder;
387 size_t extra = 0;
388 size_t base = 0;
390 if (!(flags & CFLGS_OFF_SLAB)) {
391 base = sizeof(slab_t);
392 extra = sizeof(kmem_bufctl_t);
394 i = 0;
395 while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
396 i++;
397 if (i > 0)
398 i--;
400 if (i > SLAB_LIMIT)
401 i = SLAB_LIMIT;
403 *num = i;
404 wastage -= i*size;
405 wastage -= L1_CACHE_ALIGN(base+i*extra);
406 *left_over = wastage;
409 /* Initialisation - setup the `cache' cache. */
410 void __init kmem_cache_init(void)
412 size_t left_over;
414 init_MUTEX(&cache_chain_sem);
415 list_add(&cache_cache.next,&cache_chain);
417 kmem_cache_estimate(0, cache_cache.objsize, 0,
418 &left_over, &cache_cache.num);
419 if (!cache_cache.num)
420 BUG();
422 cache_cache.colour = left_over/cache_cache.colour_off;
423 cache_cache.colour_next = 0;
427 /* Initialisation - setup remaining internal and general caches.
428 * Called after the gfp() functions have been enabled, and before smp_init().
430 void __init kmem_cache_sizes_init(void)
432 cache_sizes_t *sizes = cache_sizes;
433 char name[20];
435 * Fragmentation resistance on low memory - only use bigger
436 * page orders on machines with more than 32MB of memory.
438 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
439 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
440 do {
441 /* For performance, all the general caches are L1 aligned.
442 * This should be particularly beneficial on SMP boxes, as it
443 * eliminates "false sharing".
444 * Note for systems short on memory removing the alignment will
445 * allow tighter packing of the smaller caches. */
446 sprintf(name,"size-%ld", (unsigned long) sizes->cs_size);
447 if (!(sizes->cs_cachep =
448 kmem_cache_create(name, sizes->cs_size,
449 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
450 BUG();
453 /* Inc off-slab bufctl limit until the ceiling is hit. */
454 if (!(OFF_SLAB(sizes->cs_cachep))) {
455 offslab_limit = sizes->cs_size-sizeof(slab_t);
456 offslab_limit /= 2;
458 sprintf(name, "size-%ld(DMA)", (unsigned long) sizes->cs_size);
459 sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
460 SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
461 if (!sizes->cs_dmacachep)
462 BUG();
463 sizes++;
464 } while (sizes->cs_size);
467 void __init kmem_cpucache_init(void)
469 #ifdef CONFIG_SMP
470 g_cpucache_up = 1;
471 enable_all_cpucaches();
472 #endif
475 /* Interface to system's page allocator. No need to hold the cache-lock.
477 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
479 void *addr;
482 * If we requested dmaable memory, we will get it. Even if we
483 * did not request dmaable memory, we might get it, but that
484 * would be relatively rare and ignorable.
486 flags |= cachep->gfpflags;
487 addr = (void*) __get_free_pages(flags, cachep->gfporder);
488 /* Assume that now we have the pages no one else can legally
489 * messes with the 'struct page's.
490 * However vm_scan() might try to test the structure to see if
491 * it is a named-page or buffer-page. The members it tests are
492 * of no interest here.....
494 return addr;
497 /* Interface to system's page release. */
498 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
500 unsigned long i = (1<<cachep->gfporder);
501 struct page *page = mem_map + MAP_NR(addr);
503 /* free_pages() does not clear the type bit - we do that.
504 * The pages have been unlinked from their cache-slab,
505 * but their 'struct page's might be accessed in
506 * vm_scan(). Shouldn't be a worry.
508 while (i--) {
509 PageClearSlab(page);
510 page++;
512 free_pages((unsigned long)addr, cachep->gfporder);
515 #if DEBUG
516 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
518 int size = cachep->objsize;
519 if (cachep->flags & SLAB_RED_ZONE) {
520 addr += BYTES_PER_WORD;
521 size -= 2*BYTES_PER_WORD;
523 memset(addr, POISON_BYTE, size);
524 *(unsigned char *)(addr+size-1) = POISON_END;
527 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
529 int size = cachep->objsize;
530 void *end;
531 if (cachep->flags & SLAB_RED_ZONE) {
532 addr += BYTES_PER_WORD;
533 size -= 2*BYTES_PER_WORD;
535 end = memchr(addr, POISON_END, size);
536 if (end != (addr+size-1))
537 return 1;
538 return 0;
540 #endif
542 /* Destroy all the objs in a slab, and release the mem back to the system.
543 * Before calling the slab must have been unlinked from the cache.
544 * The cache-lock is not held/needed.
546 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
548 if (cachep->dtor
549 #if DEBUG
550 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
551 #endif
553 int i;
554 for (i = 0; i < cachep->num; i++) {
555 void* objp = slabp->s_mem+cachep->objsize*i;
556 #if DEBUG
557 if (cachep->flags & SLAB_RED_ZONE) {
558 if (*((unsigned long*)(objp)) != RED_MAGIC1)
559 BUG();
560 if (*((unsigned long*)(objp + cachep->objsize
561 -BYTES_PER_WORD)) != RED_MAGIC1)
562 BUG();
563 objp += BYTES_PER_WORD;
565 #endif
566 if (cachep->dtor)
567 (cachep->dtor)(objp, cachep, 0);
568 #if DEBUG
569 if (cachep->flags & SLAB_RED_ZONE) {
570 objp -= BYTES_PER_WORD;
572 if ((cachep->flags & SLAB_POISON) &&
573 kmem_check_poison_obj(cachep, objp))
574 BUG();
575 #endif
579 kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
580 if (OFF_SLAB(cachep))
581 kmem_cache_free(cachep->slabp_cache, slabp);
586 * kmem_cache_create - Create a cache.
587 * @name: A string which is used in /proc/slabinfo to identify this cache.
588 * @size: The size of objects to be created in this cache.
589 * @offset: The offset to use within the page.
590 * @flags: SLAB flags
591 * @ctor: A constructor for the objects.
592 * @dtor: A destructor for the objects.
594 * Returns a ptr to the cache on success, NULL on failure.
595 * Cannot be called within a int, but can be interrupted.
596 * The @ctor is run when new pages are allocated by the cache
597 * and the @dtor is run before the pages are handed back.
598 * The flags are
600 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
601 * to catch references to uninitialised memory.
603 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
604 * for buffer overruns.
606 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
607 * memory pressure.
609 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
610 * cacheline. This can be beneficial if you're counting cycles as closely
611 * as davem.
613 kmem_cache_t *
614 kmem_cache_create (const char *name, size_t size, size_t offset,
615 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
616 void (*dtor)(void*, kmem_cache_t *, unsigned long))
618 const char *func_nm = KERN_ERR "kmem_create: ";
619 size_t left_over, align, slab_size;
620 kmem_cache_t *cachep = NULL;
623 * Sanity checks... these are all serious usage bugs.
625 if ((!name) ||
626 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
627 in_interrupt() ||
628 (size < BYTES_PER_WORD) ||
629 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
630 (dtor && !ctor) ||
631 (offset < 0 || offset > size))
632 BUG();
634 #if DEBUG
635 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
636 /* No constructor, but inital state check requested */
637 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
638 flags &= ~SLAB_DEBUG_INITIAL;
641 if ((flags & SLAB_POISON) && ctor) {
642 /* request for poisoning, but we can't do that with a constructor */
643 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
644 flags &= ~SLAB_POISON;
646 #if FORCED_DEBUG
647 if (size < (PAGE_SIZE>>3))
649 * do not red zone large object, causes severe
650 * fragmentation.
652 flags |= SLAB_RED_ZONE;
653 if (!ctor)
654 flags |= SLAB_POISON;
655 #endif
656 #endif
659 * Always checks flags, a caller might be expecting debug
660 * support which isn't available.
662 if (flags & ~CREATE_MASK)
663 BUG();
665 /* Get cache's description obj. */
666 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
667 if (!cachep)
668 goto opps;
669 memset(cachep, 0, sizeof(kmem_cache_t));
671 /* Check that size is in terms of words. This is needed to avoid
672 * unaligned accesses for some archs when redzoning is used, and makes
673 * sure any on-slab bufctl's are also correctly aligned.
675 if (size & (BYTES_PER_WORD-1)) {
676 size += (BYTES_PER_WORD-1);
677 size &= ~(BYTES_PER_WORD-1);
678 printk("%sForcing size word alignment - %s\n", func_nm, name);
681 #if DEBUG
682 if (flags & SLAB_RED_ZONE) {
684 * There is no point trying to honour cache alignment
685 * when redzoning.
687 flags &= ~SLAB_HWCACHE_ALIGN;
688 size += 2*BYTES_PER_WORD; /* words for redzone */
690 #endif
691 align = BYTES_PER_WORD;
692 if (flags & SLAB_HWCACHE_ALIGN)
693 align = L1_CACHE_BYTES;
695 /* Determine if the slab management is 'on' or 'off' slab. */
696 if (size >= (PAGE_SIZE>>3))
698 * Size is large, assume best to place the slab management obj
699 * off-slab (should allow better packing of objs).
701 flags |= CFLGS_OFF_SLAB;
703 if (flags & SLAB_HWCACHE_ALIGN) {
704 /* Need to adjust size so that objs are cache aligned. */
705 /* Small obj size, can get at least two per cache line. */
706 /* FIXME: only power of 2 supported, was better */
707 while (size < align/2)
708 align /= 2;
709 size = (size+align-1)&(~(align-1));
712 /* Cal size (in pages) of slabs, and the num of objs per slab.
713 * This could be made much more intelligent. For now, try to avoid
714 * using high page-orders for slabs. When the gfp() funcs are more
715 * friendly towards high-order requests, this should be changed.
717 do {
718 unsigned int break_flag = 0;
719 cal_wastage:
720 kmem_cache_estimate(cachep->gfporder, size, flags,
721 &left_over, &cachep->num);
722 if (break_flag)
723 break;
724 if (cachep->gfporder >= MAX_GFP_ORDER)
725 break;
726 if (!cachep->num)
727 goto next;
728 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
729 /* Oops, this num of objs will cause problems. */
730 cachep->gfporder--;
731 break_flag++;
732 goto cal_wastage;
736 * Large num of objs is good, but v. large slabs are currently
737 * bad for the gfp()s.
739 if (cachep->gfporder >= slab_break_gfp_order)
740 break;
742 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
743 break; /* Acceptable internal fragmentation. */
744 next:
745 cachep->gfporder++;
746 } while (1);
748 if (!cachep->num) {
749 printk("kmem_cache_create: couldn't create cache %s.\n", name);
750 kmem_cache_free(&cache_cache, cachep);
751 cachep = NULL;
752 goto opps;
754 slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
757 * If the slab has been placed off-slab, and we have enough space then
758 * move it on-slab. This is at the expense of any extra colouring.
760 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
761 flags &= ~CFLGS_OFF_SLAB;
762 left_over -= slab_size;
765 /* Offset must be a multiple of the alignment. */
766 offset += (align-1);
767 offset &= ~(align-1);
768 if (!offset)
769 offset = L1_CACHE_BYTES;
770 cachep->colour_off = offset;
771 cachep->colour = left_over/offset;
773 /* init remaining fields */
774 if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
775 flags |= CFLGS_OPTIMIZE;
777 cachep->flags = flags;
778 cachep->gfpflags = 0;
779 if (flags & SLAB_CACHE_DMA)
780 cachep->gfpflags |= GFP_DMA;
781 spin_lock_init(&cachep->spinlock);
782 cachep->objsize = size;
783 INIT_LIST_HEAD(&cachep->slabs);
784 cachep->firstnotfull = &cachep->slabs;
786 if (flags & CFLGS_OFF_SLAB)
787 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
788 cachep->ctor = ctor;
789 cachep->dtor = dtor;
790 /* Copy name over so we don't have problems with unloaded modules */
791 strcpy(cachep->name, name);
793 #ifdef CONFIG_SMP
794 if (g_cpucache_up)
795 enable_cpucache(cachep);
796 #endif
797 /* Need the semaphore to access the chain. */
798 down(&cache_chain_sem);
800 struct list_head *p;
802 list_for_each(p, &cache_chain) {
803 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
805 /* The name field is constant - no lock needed. */
806 if (!strcmp(pc->name, name))
807 BUG();
811 /* There is no reason to lock our new cache before we
812 * link it in - no one knows about it yet...
814 list_add(&cachep->next, &cache_chain);
815 up(&cache_chain_sem);
816 opps:
817 return cachep;
821 * This check if the kmem_cache_t pointer is chained in the cache_cache
822 * list. -arca
824 static int is_chained_kmem_cache(kmem_cache_t * cachep)
826 struct list_head *p;
827 int ret = 0;
829 /* Find the cache in the chain of caches. */
830 down(&cache_chain_sem);
831 list_for_each(p, &cache_chain) {
832 if (p == &cachep->next) {
833 ret = 1;
834 break;
837 up(&cache_chain_sem);
839 return ret;
842 static int __kmem_cache_shrink(kmem_cache_t *cachep)
844 slab_t *slabp;
845 int ret;
847 #ifdef CONFIG_SMP
848 smp_call_function(drain_cache, cachep, 1, 1);
849 local_irq_disable();
850 drain_cache(cachep);
851 local_irq_enable();
852 #endif
853 spin_lock_irq(&cachep->spinlock);
855 /* If the cache is growing, stop shrinking. */
856 while (!cachep->growing) {
857 struct list_head *p;
859 p = cachep->slabs.prev;
860 if (p == &cachep->slabs)
861 break;
863 slabp = list_entry(cachep->slabs.prev, slab_t, list);
864 if (slabp->inuse)
865 break;
867 list_del(&slabp->list);
868 if (cachep->firstnotfull == &slabp->list)
869 cachep->firstnotfull = &cachep->slabs;
871 spin_unlock_irq(&cachep->spinlock);
872 kmem_slab_destroy(cachep, slabp);
873 spin_lock_irq(&cachep->spinlock);
875 ret = !list_empty(&cachep->slabs);
876 spin_unlock_irq(&cachep->spinlock);
877 return ret;
881 * kmem_cache_shrink - Shrink a cache.
882 * @cachep: The cache to shrink.
884 * Releases as many slabs as possible for a cache.
885 * To help debugging, a zero exit status indicates all slabs were released.
887 int kmem_cache_shrink(kmem_cache_t *cachep)
889 if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
890 BUG();
892 return __kmem_cache_shrink(cachep);
896 * kmem_cache_destroy - delete a cache
897 * @cachep: the cache to destroy
899 * Remove a kmem_cache_t object from the slab cache.
900 * Returns 0 on success.
902 * It is expected this function will be called by a module when it is
903 * unloaded. This will remove the cache completely, and avoid a duplicate
904 * cache being allocated each time a module is loaded and unloaded, if the
905 * module doesn't have persistent in-kernel storage across loads and unloads.
907 * The caller must guarantee that noone will allocate memory from the cache
908 * during the kmem_cache_destroy().
910 int kmem_cache_destroy (kmem_cache_t * cachep)
912 if (!cachep || in_interrupt() || cachep->growing)
913 BUG();
915 /* Find the cache in the chain of caches. */
916 down(&cache_chain_sem);
917 /* the chain is never empty, cache_cache is never destroyed */
918 if (clock_searchp == cachep)
919 clock_searchp = list_entry(cachep->next.next,
920 kmem_cache_t, next);
921 list_del(&cachep->next);
922 up(&cache_chain_sem);
924 if (__kmem_cache_shrink(cachep)) {
925 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
926 cachep);
927 down(&cache_chain_sem);
928 list_add(&cachep->next,&cache_chain);
929 up(&cache_chain_sem);
930 return 1;
932 #ifdef CONFIG_SMP
934 int i;
935 for (i = 0; i < NR_CPUS; i++)
936 kfree(cachep->cpudata[i]);
938 #endif
939 kmem_cache_free(&cache_cache, cachep);
941 return 0;
944 /* Get the memory for a slab management obj. */
945 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
946 void *objp, int colour_off, int local_flags)
948 slab_t *slabp;
950 if (OFF_SLAB(cachep)) {
951 /* Slab management obj is off-slab. */
952 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
953 if (!slabp)
954 return NULL;
955 } else {
956 /* FIXME: change to
957 slabp = objp
958 * if you enable OPTIMIZE
960 slabp = objp+colour_off;
961 colour_off += L1_CACHE_ALIGN(cachep->num *
962 sizeof(kmem_bufctl_t) + sizeof(slab_t));
964 slabp->inuse = 0;
965 slabp->colouroff = colour_off;
966 slabp->s_mem = objp+colour_off;
968 return slabp;
971 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
972 slab_t * slabp, unsigned long ctor_flags)
974 int i;
976 for (i = 0; i < cachep->num; i++) {
977 void* objp = slabp->s_mem+cachep->objsize*i;
978 #if DEBUG
979 if (cachep->flags & SLAB_RED_ZONE) {
980 *((unsigned long*)(objp)) = RED_MAGIC1;
981 *((unsigned long*)(objp + cachep->objsize -
982 BYTES_PER_WORD)) = RED_MAGIC1;
983 objp += BYTES_PER_WORD;
985 #endif
988 * Constructors are not allowed to allocate memory from
989 * the same cache which they are a constructor for.
990 * Otherwise, deadlock. They must also be threaded.
992 if (cachep->ctor)
993 cachep->ctor(objp, cachep, ctor_flags);
994 #if DEBUG
995 if (cachep->flags & SLAB_RED_ZONE)
996 objp -= BYTES_PER_WORD;
997 if (cachep->flags & SLAB_POISON)
998 /* need to poison the objs */
999 kmem_poison_obj(cachep, objp);
1000 if (cachep->flags & SLAB_RED_ZONE) {
1001 if (*((unsigned long*)(objp)) != RED_MAGIC1)
1002 BUG();
1003 if (*((unsigned long*)(objp + cachep->objsize -
1004 BYTES_PER_WORD)) != RED_MAGIC1)
1005 BUG();
1007 #endif
1008 slab_bufctl(slabp)[i] = i+1;
1010 slab_bufctl(slabp)[i-1] = BUFCTL_END;
1011 slabp->free = 0;
1015 * Grow (by 1) the number of slabs within a cache. This is called by
1016 * kmem_cache_alloc() when there are no active objs left in a cache.
1018 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1020 slab_t *slabp;
1021 struct page *page;
1022 void *objp;
1023 size_t offset;
1024 unsigned int i, local_flags;
1025 unsigned long ctor_flags;
1026 unsigned long save_flags;
1028 /* Be lazy and only check for valid flags here,
1029 * keeping it out of the critical path in kmem_cache_alloc().
1031 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1032 BUG();
1033 if (flags & SLAB_NO_GROW)
1034 return 0;
1037 * The test for missing atomic flag is performed here, rather than
1038 * the more obvious place, simply to reduce the critical path length
1039 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1040 * will eventually be caught here (where it matters).
1042 if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1043 BUG();
1045 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1046 local_flags = (flags & SLAB_LEVEL_MASK);
1047 if (local_flags == SLAB_ATOMIC)
1049 * Not allowed to sleep. Need to tell a constructor about
1050 * this - it might need to know...
1052 ctor_flags |= SLAB_CTOR_ATOMIC;
1054 /* About to mess with non-constant members - lock. */
1055 spin_lock_irqsave(&cachep->spinlock, save_flags);
1057 /* Get colour for the slab, and cal the next value. */
1058 offset = cachep->colour_next;
1059 cachep->colour_next++;
1060 if (cachep->colour_next >= cachep->colour)
1061 cachep->colour_next = 0;
1062 offset *= cachep->colour_off;
1063 cachep->dflags |= DFLGS_GROWN;
1065 cachep->growing++;
1066 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1068 /* A series of memory allocations for a new slab.
1069 * Neither the cache-chain semaphore, or cache-lock, are
1070 * held, but the incrementing c_growing prevents this
1071 * cache from being reaped or shrunk.
1072 * Note: The cache could be selected in for reaping in
1073 * kmem_cache_reap(), but when the final test is made the
1074 * growing value will be seen.
1077 /* Get mem for the objs. */
1078 if (!(objp = kmem_getpages(cachep, flags)))
1079 goto failed;
1081 /* Get slab management. */
1082 if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1083 goto opps1;
1085 /* Nasty!!!!!! I hope this is OK. */
1086 i = 1 << cachep->gfporder;
1087 page = mem_map + MAP_NR(objp);
1088 do {
1089 SET_PAGE_CACHE(page, cachep);
1090 SET_PAGE_SLAB(page, slabp);
1091 PageSetSlab(page);
1092 page++;
1093 } while (--i);
1095 kmem_cache_init_objs(cachep, slabp, ctor_flags);
1097 spin_lock_irqsave(&cachep->spinlock, save_flags);
1098 cachep->growing--;
1100 /* Make slab active. */
1101 list_add_tail(&slabp->list,&cachep->slabs);
1102 if (cachep->firstnotfull == &cachep->slabs)
1103 cachep->firstnotfull = &slabp->list;
1104 STATS_INC_GROWN(cachep);
1105 cachep->failures = 0;
1107 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1108 return 1;
1109 opps1:
1110 kmem_freepages(cachep, objp);
1111 failed:
1112 spin_lock_irqsave(&cachep->spinlock, save_flags);
1113 cachep->growing--;
1114 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1115 return 0;
1119 * Perform extra freeing checks:
1120 * - detect double free
1121 * - detect bad pointers.
1122 * Called with the cache-lock held.
1125 #if DEBUG
1126 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1127 slab_t *slabp, void * objp)
1129 int i;
1130 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1132 if (objnr >= cachep->num)
1133 BUG();
1134 if (objp != slabp->s_mem + objnr*cachep->objsize)
1135 BUG();
1137 /* Check slab's freelist to see if this obj is there. */
1138 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1139 if (i == objnr)
1140 BUG();
1142 return 0;
1144 #endif
1146 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1148 #if DEBUG
1149 if (flags & SLAB_DMA) {
1150 if (!(cachep->gfpflags & GFP_DMA))
1151 BUG();
1152 } else {
1153 if (cachep->gfpflags & GFP_DMA)
1154 BUG();
1156 #endif
1159 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1160 slab_t *slabp)
1162 void *objp;
1164 STATS_INC_ALLOCED(cachep);
1165 STATS_INC_ACTIVE(cachep);
1166 STATS_SET_HIGH(cachep);
1168 /* get obj pointer */
1169 slabp->inuse++;
1170 objp = slabp->s_mem + slabp->free*cachep->objsize;
1171 slabp->free=slab_bufctl(slabp)[slabp->free];
1173 if (slabp->free == BUFCTL_END)
1174 /* slab now full: move to next slab for next alloc */
1175 cachep->firstnotfull = slabp->list.next;
1176 #if DEBUG
1177 if (cachep->flags & SLAB_POISON)
1178 if (kmem_check_poison_obj(cachep, objp))
1179 BUG();
1180 if (cachep->flags & SLAB_RED_ZONE) {
1181 /* Set alloc red-zone, and check old one. */
1182 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1183 RED_MAGIC1)
1184 BUG();
1185 if (xchg((unsigned long *)(objp+cachep->objsize -
1186 BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1187 BUG();
1188 objp += BYTES_PER_WORD;
1190 #endif
1191 return objp;
1195 * Returns a ptr to an obj in the given cache.
1196 * caller must guarantee synchronization
1197 * #define for the goto optimization 8-)
1199 #define kmem_cache_alloc_one(cachep) \
1200 ({ \
1201 slab_t *slabp; \
1203 /* Get slab alloc is to come from. */ \
1205 struct list_head* p = cachep->firstnotfull; \
1206 if (p == &cachep->slabs) \
1207 goto alloc_new_slab; \
1208 slabp = list_entry(p,slab_t, list); \
1210 kmem_cache_alloc_one_tail(cachep, slabp); \
1213 #ifdef CONFIG_SMP
1214 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1216 int batchcount = cachep->batchcount;
1217 cpucache_t* cc = cc_data(cachep);
1219 spin_lock(&cachep->spinlock);
1220 while (batchcount--) {
1221 /* Get slab alloc is to come from. */
1222 struct list_head *p = cachep->firstnotfull;
1223 slab_t *slabp;
1225 if (p == &cachep->slabs)
1226 break;
1227 slabp = list_entry(p,slab_t, list);
1228 cc_entry(cc)[cc->avail++] =
1229 kmem_cache_alloc_one_tail(cachep, slabp);
1231 spin_unlock(&cachep->spinlock);
1233 if (cc->avail)
1234 return cc_entry(cc)[--cc->avail];
1235 return NULL;
1237 #endif
1239 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1241 unsigned long save_flags;
1242 void* objp;
1244 kmem_cache_alloc_head(cachep, flags);
1245 try_again:
1246 local_irq_save(save_flags);
1247 #ifdef CONFIG_SMP
1249 cpucache_t *cc = cc_data(cachep);
1251 if (cc) {
1252 if (cc->avail) {
1253 STATS_INC_ALLOCHIT(cachep);
1254 objp = cc_entry(cc)[--cc->avail];
1255 } else {
1256 STATS_INC_ALLOCMISS(cachep);
1257 objp = kmem_cache_alloc_batch(cachep,flags);
1258 if (!objp)
1259 goto alloc_new_slab_nolock;
1261 } else {
1262 spin_lock(&cachep->spinlock);
1263 objp = kmem_cache_alloc_one(cachep);
1264 spin_unlock(&cachep->spinlock);
1267 #else
1268 objp = kmem_cache_alloc_one(cachep);
1269 #endif
1270 local_irq_restore(save_flags);
1271 return objp;
1272 alloc_new_slab:
1273 #ifdef CONFIG_SMP
1274 spin_unlock(&cachep->spinlock);
1275 alloc_new_slab_nolock:
1276 #endif
1277 local_irq_restore(save_flags);
1278 if (kmem_cache_grow(cachep, flags))
1279 /* Someone may have stolen our objs. Doesn't matter, we'll
1280 * just come back here again.
1282 goto try_again;
1283 return NULL;
1287 * Release an obj back to its cache. If the obj has a constructed
1288 * state, it should be in this state _before_ it is released.
1289 * - caller is responsible for the synchronization
1292 #if DEBUG
1293 # define CHECK_NR(nr) \
1294 do { \
1295 if (nr >= max_mapnr) { \
1296 printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1297 (unsigned long)objp); \
1298 BUG(); \
1300 } while (0)
1301 # define CHECK_PAGE(page) \
1302 do { \
1303 if (!PageSlab(page)) { \
1304 printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1305 (unsigned long)objp); \
1306 BUG(); \
1308 } while (0)
1310 #else
1311 # define CHECK_NR(nr) do { } while (0)
1312 # define CHECK_PAGE(nr) do { } while (0)
1313 #endif
1315 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1317 slab_t* slabp;
1319 CHECK_NR(MAP_NR(objp));
1320 CHECK_PAGE(mem_map + MAP_NR(objp));
1321 /* reduces memory footprint
1323 if (OPTIMIZE(cachep))
1324 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1325 else
1327 slabp = GET_PAGE_SLAB(mem_map + MAP_NR(objp));
1329 #if DEBUG
1330 if (cachep->flags & SLAB_DEBUG_INITIAL)
1331 /* Need to call the slab's constructor so the
1332 * caller can perform a verify of its state (debugging).
1333 * Called without the cache-lock held.
1335 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1337 if (cachep->flags & SLAB_RED_ZONE) {
1338 objp -= BYTES_PER_WORD;
1339 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1340 /* Either write before start, or a double free. */
1341 BUG();
1342 if (xchg((unsigned long *)(objp+cachep->objsize -
1343 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1344 /* Either write past end, or a double free. */
1345 BUG();
1347 if (cachep->flags & SLAB_POISON)
1348 kmem_poison_obj(cachep, objp);
1349 if (kmem_extra_free_checks(cachep, slabp, objp))
1350 return;
1351 #endif
1353 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1355 slab_bufctl(slabp)[objnr] = slabp->free;
1356 slabp->free = objnr;
1358 STATS_DEC_ACTIVE(cachep);
1360 /* fixup slab chain */
1361 if (slabp->inuse-- == cachep->num)
1362 goto moveslab_partial;
1363 if (!slabp->inuse)
1364 goto moveslab_free;
1365 return;
1367 moveslab_partial:
1368 /* was full.
1369 * Even if the page is now empty, we can set c_firstnotfull to
1370 * slabp: there are no partial slabs in this case
1373 struct list_head *t = cachep->firstnotfull;
1375 cachep->firstnotfull = &slabp->list;
1376 if (slabp->list.next == t)
1377 return;
1378 list_del(&slabp->list);
1379 list_add_tail(&slabp->list, t);
1380 return;
1382 moveslab_free:
1384 * was partial, now empty.
1385 * c_firstnotfull might point to slabp
1386 * FIXME: optimize
1389 struct list_head *t = cachep->firstnotfull->prev;
1391 list_del(&slabp->list);
1392 list_add_tail(&slabp->list, &cachep->slabs);
1393 if (cachep->firstnotfull == &slabp->list)
1394 cachep->firstnotfull = t->next;
1395 return;
1399 #ifdef CONFIG_SMP
1400 static inline void __free_block (kmem_cache_t* cachep,
1401 void** objpp, int len)
1403 for ( ; len > 0; len--, objpp++)
1404 kmem_cache_free_one(cachep, *objpp);
1407 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1409 spin_lock(&cachep->spinlock);
1410 __free_block(cachep, objpp, len);
1411 spin_unlock(&cachep->spinlock);
1413 #endif
1416 * __kmem_cache_free
1417 * called with disabled ints
1419 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1421 #ifdef CONFIG_SMP
1422 cpucache_t *cc = cc_data(cachep);
1424 CHECK_NR(MAP_NR(objp));
1425 CHECK_PAGE(mem_map + MAP_NR(objp));
1426 if (cc) {
1427 int batchcount;
1428 if (cc->avail < cc->limit) {
1429 STATS_INC_FREEHIT(cachep);
1430 cc_entry(cc)[cc->avail++] = objp;
1431 return;
1433 STATS_INC_FREEMISS(cachep);
1434 batchcount = cachep->batchcount;
1435 cc->avail -= batchcount;
1436 free_block(cachep,
1437 &cc_entry(cc)[cc->avail],batchcount);
1438 cc_entry(cc)[cc->avail++] = objp;
1439 return;
1440 } else {
1441 free_block(cachep, &objp, 1);
1443 #else
1444 kmem_cache_free_one(cachep, objp);
1445 #endif
1449 * kmem_cache_alloc - Allocate an object
1450 * @cachep: The cache to allocate from.
1451 * @flags: See kmalloc().
1453 * Allocate an object from this cache. The flags are only relevant
1454 * if the cache has no available objects.
1456 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1458 return __kmem_cache_alloc(cachep, flags);
1462 * kmalloc - allocate memory
1463 * @size: how many bytes of memory are required.
1464 * @flags: the type of memory to allocate.
1466 * kmalloc is the normal method of allocating memory
1467 * in the kernel. The @flags argument may be one of:
1469 * %GFP_BUFFER - XXX
1471 * %GFP_ATOMIC - allocation will not sleep. Use inside interrupt handlers.
1473 * %GFP_USER - allocate memory on behalf of user. May sleep.
1475 * %GFP_KERNEL - allocate normal kernel ram. May sleep.
1477 * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1478 * Don't use unless you're in the NFS code.
1480 * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1482 void * kmalloc (size_t size, int flags)
1484 cache_sizes_t *csizep = cache_sizes;
1486 for (; csizep->cs_size; csizep++) {
1487 if (size > csizep->cs_size)
1488 continue;
1489 return __kmem_cache_alloc(flags & GFP_DMA ?
1490 csizep->cs_dmacachep : csizep->cs_cachep, flags);
1492 BUG(); // too big size
1493 return NULL;
1497 * kmem_cache_free - Deallocate an object
1498 * @cachep: The cache the allocation was from.
1499 * @objp: The previously allocated object.
1501 * Free an object which was previously allocated from this
1502 * cache.
1504 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1506 unsigned long flags;
1507 #if DEBUG
1508 CHECK_NR(MAP_NR(objp));
1509 CHECK_PAGE(mem_map + MAP_NR(objp));
1510 if (cachep != GET_PAGE_CACHE(mem_map + MAP_NR(objp)))
1511 BUG();
1512 #endif
1514 local_irq_save(flags);
1515 __kmem_cache_free(cachep, objp);
1516 local_irq_restore(flags);
1520 * kfree - free previously allocated memory
1521 * @objp: pointer returned by kmalloc.
1523 * Don't free memory not originally allocated by kmalloc()
1524 * or you will run into trouble.
1526 void kfree (const void *objp)
1528 kmem_cache_t *c;
1529 unsigned long flags;
1531 if (!objp)
1532 return;
1533 local_irq_save(flags);
1534 CHECK_NR(MAP_NR(objp));
1535 CHECK_PAGE(mem_map + MAP_NR(objp));
1536 c = GET_PAGE_CACHE(mem_map + MAP_NR(objp));
1537 __kmem_cache_free(c, (void*)objp);
1538 local_irq_restore(flags);
1541 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1543 cache_sizes_t *csizep = cache_sizes;
1545 /* This function could be moved to the header file, and
1546 * made inline so consumers can quickly determine what
1547 * cache pointer they require.
1549 for ( ; csizep->cs_size; csizep++) {
1550 if (size > csizep->cs_size)
1551 continue;
1552 break;
1554 return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1557 #ifdef CONFIG_SMP
1559 * called with local interrupts disabled
1561 static void drain_cache (void* __cachep)
1563 kmem_cache_t *cachep = __cachep;
1564 cpucache_t *cc = cc_data(cachep);
1566 if (cc && cc->avail) {
1567 free_block(cachep, cc_entry(cc), cc->avail);
1568 cc->avail = 0;
1572 typedef struct ccupdate_struct_s
1574 kmem_cache_t* cachep;
1575 cpucache_t* new[NR_CPUS];
1576 } ccupdate_struct_t;
1579 * called with local interrupts disabled
1581 static void ccupdate_callback (void* __new)
1583 ccupdate_struct_t* new = __new;
1584 cpucache_t *old = cc_data(new->cachep);
1586 cc_data(new->cachep) = new->new[smp_processor_id()];
1587 new->new[smp_processor_id()] = old;
1590 /* called with cache_chain_sem acquired. */
1591 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1593 ccupdate_struct_t new;
1594 int i;
1597 * These are admin-provided, so we are more graceful.
1599 if (limit < 0)
1600 return -EINVAL;
1601 if (batchcount < 0)
1602 return -EINVAL;
1603 if (batchcount > limit)
1604 return -EINVAL;
1605 if (limit != 0 && !batchcount)
1606 return -EINVAL;
1608 memset(&new.new,0,sizeof(new.new));
1609 if (limit) {
1610 for (i = 0; i< smp_num_cpus; i++) {
1611 cpucache_t* ccnew;
1614 ccnew = kmalloc(sizeof(void*)*limit+
1615 sizeof(cpucache_t), GFP_KERNEL);
1616 if (!ccnew)
1617 goto oom;
1618 ccnew->limit = limit;
1619 ccnew->avail = 0;
1620 new.new[cpu_logical_map(i)] = ccnew;
1623 new.cachep = cachep;
1624 spin_lock_irq(&cachep->spinlock);
1625 cachep->batchcount = batchcount;
1626 spin_unlock_irq(&cachep->spinlock);
1628 smp_call_function(ccupdate_callback,&new,1,1);
1629 local_irq_disable();
1630 ccupdate_callback(&new);
1631 local_irq_enable();
1633 for (i = 0; i < smp_num_cpus; i++) {
1634 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1635 if (!ccold)
1636 continue;
1637 local_irq_disable();
1638 free_block(cachep, cc_entry(ccold), ccold->avail);
1639 local_irq_enable();
1640 kfree(ccold);
1642 return 0;
1643 oom:
1644 for (i--; i >= 0; i--)
1645 kfree(new.new[cpu_logical_map(i)]);
1646 return -ENOMEM;
1649 static void enable_cpucache (kmem_cache_t *cachep)
1651 int err;
1652 int limit;
1654 /* FIXME: optimize */
1655 if (cachep->objsize > PAGE_SIZE)
1656 return;
1657 if (cachep->objsize > 1024)
1658 limit = 60;
1659 else if (cachep->objsize > 256)
1660 limit = 124;
1661 else
1662 limit = 252;
1664 err = kmem_tune_cpucache(cachep, limit, limit/2);
1665 if (err)
1666 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1667 cachep->name, -err);
1670 static void enable_all_cpucaches (void)
1672 struct list_head* p;
1674 down(&cache_chain_sem);
1676 p = &cache_cache.next;
1677 do {
1678 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1680 enable_cpucache(cachep);
1681 p = cachep->next.next;
1682 } while (p != &cache_cache.next);
1684 up(&cache_chain_sem);
1686 #endif
1689 * kmem_cache_reap - Reclaim memory from caches.
1690 * @gfp_mask: the type of memory required.
1692 * Called from try_to_free_page().
1694 void kmem_cache_reap (int gfp_mask)
1696 slab_t *slabp;
1697 kmem_cache_t *searchp;
1698 kmem_cache_t *best_cachep;
1699 unsigned int best_pages;
1700 unsigned int best_len;
1701 unsigned int scan;
1703 if (gfp_mask & __GFP_WAIT)
1704 down(&cache_chain_sem);
1705 else
1706 if (down_trylock(&cache_chain_sem))
1707 return;
1709 scan = REAP_SCANLEN;
1710 best_len = 0;
1711 best_pages = 0;
1712 best_cachep = NULL;
1713 searchp = clock_searchp;
1714 do {
1715 unsigned int pages;
1716 struct list_head* p;
1717 unsigned int full_free;
1719 /* It's safe to test this without holding the cache-lock. */
1720 if (searchp->flags & SLAB_NO_REAP)
1721 goto next;
1722 /* FIXME: is this really a good idea? */
1723 if (gfp_mask & GFP_DMA) {
1724 if (!(searchp->gfpflags & GFP_DMA))
1725 goto next;
1726 } else {
1727 if (searchp->gfpflags & GFP_DMA)
1728 goto next;
1730 spin_lock_irq(&searchp->spinlock);
1731 if (searchp->growing)
1732 goto next_unlock;
1733 if (searchp->dflags & DFLGS_GROWN) {
1734 searchp->dflags &= ~DFLGS_GROWN;
1735 goto next_unlock;
1737 #ifdef CONFIG_SMP
1739 cpucache_t *cc = cc_data(searchp);
1740 if (cc && cc->avail) {
1741 __free_block(searchp, cc_entry(cc), cc->avail);
1742 cc->avail = 0;
1745 #endif
1747 full_free = 0;
1748 p = searchp->slabs.prev;
1749 while (p != &searchp->slabs) {
1750 slabp = list_entry(p, slab_t, list);
1751 if (slabp->inuse)
1752 break;
1753 full_free++;
1754 p = p->prev;
1758 * Try to avoid slabs with constructors and/or
1759 * more than one page per slab (as it can be difficult
1760 * to get high orders from gfp()).
1762 pages = full_free * (1<<searchp->gfporder);
1763 if (searchp->ctor)
1764 pages = (pages*4+1)/5;
1765 if (searchp->gfporder)
1766 pages = (pages*4+1)/5;
1767 if (pages > best_pages) {
1768 best_cachep = searchp;
1769 best_len = full_free;
1770 best_pages = pages;
1771 if (full_free >= REAP_PERFECT) {
1772 clock_searchp = list_entry(searchp->next.next,
1773 kmem_cache_t,next);
1774 goto perfect;
1777 next_unlock:
1778 spin_unlock_irq(&searchp->spinlock);
1779 next:
1780 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1781 } while (--scan && searchp != clock_searchp);
1783 clock_searchp = searchp;
1785 if (!best_cachep)
1786 /* couldn't find anything to reap */
1787 goto out;
1789 spin_lock_irq(&best_cachep->spinlock);
1790 perfect:
1791 /* free only 80% of the free slabs */
1792 best_len = (best_len*4 + 1)/5;
1793 for (scan = 0; scan < best_len; scan++) {
1794 struct list_head *p;
1796 if (best_cachep->growing)
1797 break;
1798 p = best_cachep->slabs.prev;
1799 if (p == &best_cachep->slabs)
1800 break;
1801 slabp = list_entry(p,slab_t,list);
1802 if (slabp->inuse)
1803 break;
1804 list_del(&slabp->list);
1805 if (best_cachep->firstnotfull == &slabp->list)
1806 best_cachep->firstnotfull = &best_cachep->slabs;
1807 STATS_INC_REAPED(best_cachep);
1809 /* Safe to drop the lock. The slab is no longer linked to the
1810 * cache.
1812 spin_unlock_irq(&best_cachep->spinlock);
1813 kmem_slab_destroy(best_cachep, slabp);
1814 spin_lock_irq(&best_cachep->spinlock);
1816 spin_unlock_irq(&best_cachep->spinlock);
1817 out:
1818 up(&cache_chain_sem);
1819 return;
1822 #ifdef CONFIG_PROC_FS
1823 /* /proc/slabinfo
1824 * cache-name num-active-objs total-objs
1825 * obj-size num-active-slabs total-slabs
1826 * num-pages-per-slab
1828 #define FIXUP(t) \
1829 do { \
1830 if (len <= off) { \
1831 off -= len; \
1832 len = 0; \
1833 } else { \
1834 if (len-off > count) \
1835 goto t; \
1837 } while (0)
1839 static int proc_getdata (char*page, char**start, off_t off, int count)
1841 struct list_head *p;
1842 int len = 0;
1844 /* Output format version, so at least we can change it without _too_
1845 * many complaints.
1847 len += sprintf(page+len, "slabinfo - version: 1.1"
1848 #if STATS
1849 " (statistics)"
1850 #endif
1851 #ifdef CONFIG_SMP
1852 " (SMP)"
1853 #endif
1854 "\n");
1855 FIXUP(got_data);
1857 down(&cache_chain_sem);
1858 p = &cache_cache.next;
1859 do {
1860 kmem_cache_t *cachep;
1861 struct list_head *q;
1862 slab_t *slabp;
1863 unsigned long active_objs;
1864 unsigned long num_objs;
1865 unsigned long active_slabs = 0;
1866 unsigned long num_slabs;
1867 cachep = list_entry(p, kmem_cache_t, next);
1869 spin_lock_irq(&cachep->spinlock);
1870 active_objs = 0;
1871 num_slabs = 0;
1872 list_for_each(q,&cachep->slabs) {
1873 slabp = list_entry(q, slab_t, list);
1874 active_objs += slabp->inuse;
1875 num_objs += cachep->num;
1876 if (slabp->inuse)
1877 active_slabs++;
1878 else
1879 num_slabs++;
1881 num_slabs+=active_slabs;
1882 num_objs = num_slabs*cachep->num;
1884 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1885 cachep->name, active_objs, num_objs, cachep->objsize,
1886 active_slabs, num_slabs, (1<<cachep->gfporder));
1888 #if STATS
1890 unsigned long errors = cachep->errors;
1891 unsigned long high = cachep->high_mark;
1892 unsigned long grown = cachep->grown;
1893 unsigned long reaped = cachep->reaped;
1894 unsigned long allocs = cachep->num_allocations;
1896 len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1897 high, allocs, grown, reaped, errors);
1899 #endif
1900 #ifdef CONFIG_SMP
1902 unsigned int batchcount = cachep->batchcount;
1903 unsigned int limit;
1905 if (cc_data(cachep))
1906 limit = cc_data(cachep)->limit;
1907 else
1908 limit = 0;
1909 len += sprintf(page+len, " : %4u %4u",
1910 limit, batchcount);
1912 #endif
1913 #if STATS && defined(CONFIG_SMP)
1915 unsigned long allochit = atomic_read(&cachep->allochit);
1916 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1917 unsigned long freehit = atomic_read(&cachep->freehit);
1918 unsigned long freemiss = atomic_read(&cachep->freemiss);
1919 len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1920 allochit, allocmiss, freehit, freemiss);
1922 #endif
1923 len += sprintf(page+len,"\n");
1924 spin_unlock_irq(&cachep->spinlock);
1925 FIXUP(got_data_up);
1926 p = cachep->next.next;
1927 } while (p != &cache_cache.next);
1928 got_data_up:
1929 up(&cache_chain_sem);
1931 got_data:
1932 *start = page+off;
1933 return len;
1937 * slabinfo_read_proc - generates /proc/slabinfo
1938 * @page: scratch area, one page long
1939 * @start: pointer to the pointer to the output buffer
1940 * @off: offset within /proc/slabinfo the caller is interested in
1941 * @count: requested len in bytes
1942 * @eof: eof marker
1943 * @data: unused
1945 * The contents of the buffer are
1946 * cache-name
1947 * num-active-objs
1948 * total-objs
1949 * object size
1950 * num-active-slabs
1951 * total-slabs
1952 * num-pages-per-slab
1953 * + further values on SMP and with statistics enabled
1955 int slabinfo_read_proc (char *page, char **start, off_t off,
1956 int count, int *eof, void *data)
1958 int len = proc_getdata(page, start, off, count);
1959 len -= (*start-page);
1960 if (len <= count)
1961 *eof = 1;
1962 if (len>count) len = count;
1963 if (len<0) len = 0;
1964 return len;
1967 #define MAX_SLABINFO_WRITE 128
1969 * slabinfo_write_proc - SMP tuning for the slab allocator
1970 * @file:
1971 * @buffer: user buffer
1972 * @count: data len
1973 * @data: unused
1975 int slabinfo_write_proc (struct file *file, const char *buffer,
1976 unsigned long count, void *data)
1978 #ifdef CONFIG_SMP
1979 char kbuf[MAX_SLABINFO_WRITE], *tmp;
1980 int limit, batchcount, res;
1981 struct list_head *p;
1983 if (count > MAX_SLABINFO_WRITE)
1984 return -EINVAL;
1985 if (copy_from_user(&kbuf, buffer, count))
1986 return -EFAULT;
1988 tmp = strchr(kbuf, ' ');
1989 if (!tmp)
1990 return -EINVAL;
1991 *tmp = '\0';
1992 tmp++;
1993 limit = simple_strtol(tmp, &tmp, 10);
1994 while (*tmp == ' ')
1995 tmp++;
1996 batchcount = simple_strtol(tmp, &tmp, 10);
1998 /* Find the cache in the chain of caches. */
1999 down(&cache_chain_sem);
2000 res = -EINVAL;
2001 list_for_each(p,&cache_chain) {
2002 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2004 if (!strcmp(cachep->name, kbuf)) {
2005 res = kmem_tune_cpucache(cachep, limit, batchcount);
2006 break;
2009 up(&cache_chain_sem);
2010 if (res >= 0)
2011 res = count;
2012 return res;
2013 #else
2014 return -EINVAL;
2015 #endif
2017 #endif