Remove references to CONFIG_PROFILE. Kernel profiling is no longer a
[linux-2.6/linux-mips.git] / mm / slab.c
blobcccc16c58cfcdb0f252607e6c01df4497759f04c
1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
8 * 11 April '97. Started multi-threading - markhe
9 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
10 * The sem is only needed when accessing/extending the cache-chain, which
11 * can never happen inside an interrupt (kmem_cache_create(),
12 * kmem_cache_shrink() and kmem_cache_reap()).
13 * This is a medium-term exclusion lock.
15 * Each cache has its own lock; 'c_spinlock'. This lock is needed only
16 * when accessing non-constant members of a cache-struct.
17 * Note: 'constant members' are assigned a value in kmem_cache_create() before
18 * the cache is linked into the cache-chain. The values never change, so not
19 * even a multi-reader lock is needed for these members.
20 * The c_spinlock is only ever held for a few cycles.
22 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
23 * maybe be sleeping and therefore not holding the semaphore/lock), the
24 * c_growing field is used. This also prevents reaping from a cache.
26 * Note, caches can _never_ be destroyed. When a sub-system (eg module) has
27 * finished with a cache, it can only be shrunk. This leaves the cache empty,
28 * but already enabled for re-use, eg. during a module re-load.
30 * Notes:
31 * o Constructors/deconstructors are called while the cache-lock
32 * is _not_ held. Therefore they _must_ be threaded.
33 * o Constructors must not attempt to allocate memory from the
34 * same cache that they are a constructor for - infinite loop!
35 * (There is no easy way to trap this.)
36 * o The per-cache locks must be obtained with local-interrupts disabled.
37 * o When compiled with debug support, and an object-verify (upon release)
38 * is request for a cache, the verify-function is called with the cache
39 * lock held. This helps debugging.
40 * o The functions called from try_to_free_page() must not attempt
41 * to allocate memory from a cache which is being grown.
42 * The buffer sub-system might try to allocate memory, via buffer_cachep.
43 * As this pri is passed to the SLAB, and then (if necessary) onto the
44 * gfp() funcs (which avoid calling try_to_free_page()), no deadlock
45 * should happen.
47 * The positioning of the per-cache lock is tricky. If the lock is
48 * placed on the same h/w cache line as commonly accessed members
49 * the number of L1 cache-line faults is reduced. However, this can
50 * lead to the cache-line ping-ponging between processors when the
51 * lock is in contention (and the common members are being accessed).
52 * Decided to keep it away from common members.
54 * More fine-graining is possible, with per-slab locks...but this might be
55 * taking fine graining too far, but would have the advantage;
56 * During most allocs/frees no writes occur to the cache-struct.
57 * Therefore a multi-reader/one writer lock could be used (the writer
58 * needed when the slab chain is being link/unlinked).
59 * As we would not have an exclusion lock for the cache-structure, one
60 * would be needed per-slab (for updating s_free ptr, and/or the contents
61 * of s_index).
62 * The above locking would allow parallel operations to different slabs within
63 * the same cache with reduced spinning.
65 * Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
66 * would allow most allocations from the same cache to execute in parallel.
68 * At present, each engine can be growing a cache. This should be blocked.
70 * It is not currently 100% safe to examine the page_struct outside of a kernel
71 * or global cli lock. The risk is v. small, and non-fatal.
73 * Calls to printk() are not 100% safe (the function is not threaded). However,
74 * printk() is only used under an error condition, and the risk is v. small (not
75 * sure if the console write functions 'enjoy' executing multiple contexts in
76 * parallel. I guess they don't...).
77 * Note, for most calls to printk() any held cache-lock is dropped. This is not
78 * always done for text size reasons - having *_unlock() everywhere is bloat.
82 * An implementation of the Slab Allocator as described in outline in;
83 * UNIX Internals: The New Frontiers by Uresh Vahalia
84 * Pub: Prentice Hall ISBN 0-13-101908-2
85 * or with a little more detail in;
86 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
87 * Jeff Bonwick (Sun Microsystems).
88 * Presented at: USENIX Summer 1994 Technical Conference
92 * This implementation deviates from Bonwick's paper as it
93 * does not use a hash-table for large objects, but rather a per slab
94 * index to hold the bufctls. This allows the bufctl structure to
95 * be small (one word), but limits the number of objects a slab (not
96 * a cache) can contain when off-slab bufctls are used. The limit is the
97 * size of the largest general cache that does not use off-slab bufctls,
98 * divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64.
99 * This is not serious, as it is only for large objects, when it is unwise
100 * to have too many per slab.
101 * Note: This limit can be raised by introducing a general cache whose size
102 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
105 #include <linux/config.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/init.h>
110 /* If there is a different PAGE_SIZE around, and it works with this allocator,
111 * then change the following.
113 #if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096 && PAGE_SIZE != 16384 && PAGE_SIZE != 32768)
114 #error Your page size is probably not correctly supported - please check
115 #endif
117 /* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create().
118 * 0 if you wish to reduce memory usage.
120 * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
121 * SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
122 * 0 for faster, smaller, code (especially in the critical paths).
124 * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
125 * 0 for faster, smaller, code (especially in the critical paths).
127 * SLAB_SELFTEST - 1 to perform a few tests, mainly for development.
129 #define SLAB_MGMT_CHECKS 1
130 #define SLAB_DEBUG_SUPPORT 1
131 #define SLAB_STATS 0
132 #define SLAB_SELFTEST 0
134 /* Shouldn't this be in a header file somewhere? */
135 #define BYTES_PER_WORD sizeof(void *)
137 /* Legal flag mask for kmem_cache_create(). */
138 #if SLAB_DEBUG_SUPPORT
139 #if 0
140 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
141 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
142 SLAB_HIGH_PACK)
143 #endif
144 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
145 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
146 #else
147 #if 0
148 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
149 #endif
150 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
151 #endif /* SLAB_DEBUG_SUPPORT */
153 /* Slab management struct.
154 * Manages the objs in a slab. Placed either at the end of mem allocated
155 * for a slab, or from an internal obj cache (cache_slabp).
156 * Slabs are chained into a partially ordered list; fully used first, partial
157 * next, and then fully free slabs.
158 * The first 4 members are referenced during an alloc/free operation, and
159 * should always appear on the same cache line.
160 * Note: The offset between some members _must_ match offsets within
161 * the kmem_cache_t - see kmem_cache_init() for the checks. */
163 #define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */
165 typedef struct kmem_slab_s {
166 struct kmem_bufctl_s *s_freep; /* ptr to first inactive obj in slab */
167 struct kmem_bufctl_s *s_index;
168 unsigned long s_magic;
169 unsigned long s_inuse; /* num of objs active in slab */
171 struct kmem_slab_s *s_nextp;
172 struct kmem_slab_s *s_prevp;
173 void *s_mem; /* addr of first obj in slab */
174 unsigned long s_offset:SLAB_OFFSET_BITS,
175 s_dma:1;
176 } kmem_slab_t;
178 /* When the slab management is on-slab, this gives the size to use. */
179 #define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
181 /* Test for end of slab chain. */
182 #define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset))
184 /* s_magic */
185 #define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */
186 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destroyed */
188 /* Bufctl's are used for linking objs within a slab, identifying what slab an obj
189 * is in, and the address of the associated obj (for sanity checking with off-slab
190 * bufctls). What a bufctl contains depends upon the state of the obj and
191 * the organisation of the cache.
193 typedef struct kmem_bufctl_s {
194 union {
195 struct kmem_bufctl_s *buf_nextp;
196 kmem_slab_t *buf_slabp; /* slab for obj */
197 void * buf_objp;
198 } u;
199 } kmem_bufctl_t;
201 /* ...shorthand... */
202 #define buf_nextp u.buf_nextp
203 #define buf_slabp u.buf_slabp
204 #define buf_objp u.buf_objp
206 #if SLAB_DEBUG_SUPPORT
207 /* Magic nums for obj red zoning.
208 * Placed in the first word before and the first word after an obj.
210 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
211 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
213 /* ...and for poisoning */
214 #define SLAB_POISON_BYTE 0x5a /* byte value for poisoning */
215 #define SLAB_POISON_END 0xa5 /* end-byte of poisoning */
217 #endif /* SLAB_DEBUG_SUPPORT */
219 #define SLAB_CACHE_NAME_LEN 20 /* max name length for a slab cache */
221 /* Cache struct - manages a cache.
222 * First four members are commonly referenced during an alloc/free operation.
224 struct kmem_cache_s {
225 kmem_slab_t *c_freep; /* first slab w. free objs */
226 unsigned long c_flags; /* constant flags */
227 unsigned long c_offset;
228 unsigned long c_num; /* # of objs per slab */
230 unsigned long c_magic;
231 unsigned long c_inuse; /* kept at zero */
232 kmem_slab_t *c_firstp; /* first slab in chain */
233 kmem_slab_t *c_lastp; /* last slab in chain */
235 spinlock_t c_spinlock;
236 unsigned long c_growing;
237 unsigned long c_dflags; /* dynamic flags */
238 size_t c_org_size;
239 unsigned long c_gfporder; /* order of pgs per slab (2^n) */
240 void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
241 void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
242 unsigned long c_align; /* alignment of objs */
243 size_t c_colour; /* cache colouring range */
244 size_t c_colour_next;/* cache colouring */
245 unsigned long c_failures;
246 char c_name[SLAB_CACHE_NAME_LEN];
247 struct kmem_cache_s *c_nextp;
248 kmem_cache_t *c_index_cachep;
249 #if SLAB_STATS
250 unsigned long c_num_active;
251 unsigned long c_num_allocations;
252 unsigned long c_high_mark;
253 unsigned long c_grown;
254 unsigned long c_reaped;
255 atomic_t c_errors;
256 #endif /* SLAB_STATS */
259 /* internal c_flags */
260 #define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
261 #define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */
262 #define SLAB_CFLGS_GENERAL 0x080000UL /* a general cache */
264 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
265 #define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */
267 #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
268 #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
269 #define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
271 #if SLAB_STATS
272 #define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
273 #define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
274 #define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++)
275 #define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
276 #define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
277 #define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \
278 (x)->c_high_mark = (x)->c_num_active; \
279 } while (0)
280 #define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors))
281 #else
282 #define SLAB_STATS_INC_ACTIVE(x)
283 #define SLAB_STATS_DEC_ACTIVE(x)
284 #define SLAB_STATS_INC_ALLOCED(x)
285 #define SLAB_STATS_INC_GROWN(x)
286 #define SLAB_STATS_INC_REAPED(x)
287 #define SLAB_STATS_SET_HIGH(x)
288 #define SLAB_STATS_INC_ERR(x)
289 #endif /* SLAB_STATS */
291 #if SLAB_SELFTEST
292 #if !SLAB_DEBUG_SUPPORT
293 #error Debug support needed for self-test
294 #endif
295 static void kmem_self_test(void);
296 #endif /* SLAB_SELFTEST */
298 /* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
299 #define SLAB_C_MAGIC 0x4F17A36DUL
301 /* maximum size of an obj (in 2^order pages) */
302 #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
304 /* maximum num of pages for a slab (prevents large requests to the VM layer) */
305 #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
307 /* the 'preferred' minimum num of objs per slab - maybe less for large objs */
308 #define SLAB_MIN_OBJS_PER_SLAB 4
310 /* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
311 * then the page order must be less than this before trying the next order.
313 #define SLAB_BREAK_GFP_ORDER_HI 2
314 #define SLAB_BREAK_GFP_ORDER_LO 1
315 static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
317 /* Macros for storing/retrieving the cachep and or slab from the
318 * global 'mem_map'. With off-slab bufctls, these are used to find the
319 * slab an obj belongs to. With kmalloc(), and kfree(), these are used
320 * to find the cache which an obj belongs to.
322 #define SLAB_SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
323 #define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
324 #define SLAB_SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
325 #define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->list.prev)
327 /* Size description struct for general caches. */
328 typedef struct cache_sizes {
329 size_t cs_size;
330 kmem_cache_t *cs_cachep;
331 } cache_sizes_t;
333 static cache_sizes_t cache_sizes[] = {
334 #if PAGE_SIZE == 4096
335 { 32, NULL},
336 #endif
337 { 64, NULL},
338 { 128, NULL},
339 { 256, NULL},
340 { 512, NULL},
341 {1024, NULL},
342 {2048, NULL},
343 {4096, NULL},
344 {8192, NULL},
345 {16384, NULL},
346 {32768, NULL},
347 {65536, NULL},
348 {131072, NULL},
349 {0, NULL}
352 /* Names for the general caches. Not placed into the sizes struct for
353 * a good reason; the string ptr is not needed while searching in kmalloc(),
354 * and would 'get-in-the-way' in the h/w cache.
356 static char *cache_sizes_name[] = {
357 #if PAGE_SIZE == 4096
358 "size-32",
359 #endif
360 "size-64",
361 "size-128",
362 "size-256",
363 "size-512",
364 "size-1024",
365 "size-2048",
366 "size-4096",
367 "size-8192",
368 "size-16384",
369 "size-32768",
370 "size-65536",
371 "size-131072"
374 /* internal cache of cache description objs */
375 static kmem_cache_t cache_cache = {
376 /* freep, flags */ kmem_slab_end(&cache_cache), SLAB_NO_REAP,
377 /* offset, num */ sizeof(kmem_cache_t), 0,
378 /* c_magic, c_inuse */ SLAB_C_MAGIC, 0,
379 /* firstp, lastp */ kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
380 /* spinlock */ SPIN_LOCK_UNLOCKED,
381 /* growing */ 0,
382 /* dflags */ 0,
383 /* org_size, gfp */ 0, 0,
384 /* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES,
385 /* colour, colour_next */ 0, 0,
386 /* failures */ 0,
387 /* name */ "kmem_cache",
388 /* nextp */ &cache_cache,
389 /* index */ NULL,
392 /* Guard access to the cache-chain. */
393 static struct semaphore cache_chain_sem;
395 /* Place maintainer for reaping. */
396 static kmem_cache_t *clock_searchp = &cache_cache;
398 /* Internal slab management cache, for when slab management is off-slab. */
399 static kmem_cache_t *cache_slabp;
401 /* Max number of objs-per-slab for caches which use bufctl's.
402 * Needed to avoid a possible looping condition in kmem_cache_grow().
404 static unsigned long bufctl_limit;
406 /* Initialisation - setup the `cache' cache. */
407 void __init kmem_cache_init(void)
409 size_t size, i;
411 #define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
412 #define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b))
413 #define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
414 #define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
416 /* Sanity checks... */
417 if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
418 kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
419 ((kmem_cache_offset(c_lastp) -
420 ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
421 kmem_slab_offset(s_prevp)) ||
422 kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
423 /* Offsets to the magic are incorrect, either the structures have
424 * been incorrectly changed, or adjustments are needed for your
425 * architecture.
427 panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
428 /* NOTREACHED */
430 #undef kmem_cache_offset
431 #undef kmem_cache_diff
432 #undef kmem_slab_offset
433 #undef kmem_slab_diff
435 init_MUTEX(&cache_chain_sem);
437 size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
438 size += (L1_CACHE_BYTES-1);
439 size &= ~(L1_CACHE_BYTES-1);
440 cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
442 i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
443 cache_cache.c_num = i / size; /* num of objs per slab */
445 /* Cache colouring. */
446 cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
447 cache_cache.c_colour_next = cache_cache.c_colour;
450 * Fragmentation resistance on low memory - only use bigger
451 * page orders on machines with more than 32MB of memory.
453 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
454 slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
457 /* Initialisation - setup remaining internal and general caches.
458 * Called after the gfp() functions have been enabled, and before smp_init().
460 void __init kmem_cache_sizes_init(void)
462 unsigned int found = 0;
464 cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
465 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
466 if (cache_slabp) {
467 char **names = cache_sizes_name;
468 cache_sizes_t *sizes = cache_sizes;
469 do {
470 /* For performance, all the general caches are L1 aligned.
471 * This should be particularly beneficial on SMP boxes, as it
472 * eliminates "false sharing".
473 * Note for systems short on memory removing the alignment will
474 * allow tighter packing of the smaller caches. */
475 if (!(sizes->cs_cachep =
476 kmem_cache_create(*names++, sizes->cs_size,
477 0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
478 goto panic_time;
479 if (!found) {
480 /* Inc off-slab bufctl limit until the ceiling is hit. */
481 if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
482 found++;
483 else
484 bufctl_limit =
485 (sizes->cs_size/sizeof(kmem_bufctl_t));
487 sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
488 sizes++;
489 } while (sizes->cs_size);
490 #if SLAB_SELFTEST
491 kmem_self_test();
492 #endif /* SLAB_SELFTEST */
493 return;
495 panic_time:
496 panic("kmem_cache_sizes_init: Error creating caches");
497 /* NOTREACHED */
500 /* Interface to system's page allocator. Dma pts to non-zero if all
501 * of memory is DMAable. No need to hold the cache-lock.
503 static inline void *
504 kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
506 void *addr;
509 * If we requested dmaable memory, we will get it. Even if we
510 * did not request dmaable memory, we might get it, but that
511 * would be relatively rare and ignorable.
513 *dma = flags & SLAB_DMA;
514 addr = (void*) __get_free_pages(flags, cachep->c_gfporder);
515 /* Assume that now we have the pages no one else can legally
516 * messes with the 'struct page's.
517 * However vm_scan() might try to test the structure to see if
518 * it is a named-page or buffer-page. The members it tests are
519 * of no interest here.....
521 return addr;
524 /* Interface to system's page release. */
525 static inline void
526 kmem_freepages(kmem_cache_t *cachep, void *addr)
528 unsigned long i = (1<<cachep->c_gfporder);
529 struct page *page = &mem_map[MAP_NR(addr)];
531 /* free_pages() does not clear the type bit - we do that.
532 * The pages have been unlinked from their cache-slab,
533 * but their 'struct page's might be accessed in
534 * vm_scan(). Shouldn't be a worry.
536 while (i--) {
537 PageClearSlab(page);
538 page++;
540 free_pages((unsigned long)addr, cachep->c_gfporder);
543 #if SLAB_DEBUG_SUPPORT
544 static inline void
545 kmem_poison_obj(kmem_cache_t *cachep, void *addr)
547 memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
548 *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISON_END;
551 static inline int
552 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
554 void *end;
555 end = memchr(addr, SLAB_POISON_END, cachep->c_org_size);
556 if (end != (addr+cachep->c_org_size-1))
557 return 1;
558 return 0;
560 #endif /* SLAB_DEBUG_SUPPORT */
562 /* Three slab chain funcs - all called with ints disabled and the appropriate
563 * cache-lock held.
565 static inline void
566 kmem_slab_unlink(kmem_slab_t *slabp)
568 kmem_slab_t *prevp = slabp->s_prevp;
569 kmem_slab_t *nextp = slabp->s_nextp;
570 prevp->s_nextp = nextp;
571 nextp->s_prevp = prevp;
574 static inline void
575 kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
577 kmem_slab_t *lastp = cachep->c_lastp;
578 slabp->s_nextp = kmem_slab_end(cachep);
579 slabp->s_prevp = lastp;
580 cachep->c_lastp = slabp;
581 lastp->s_nextp = slabp;
584 static inline void
585 kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
587 kmem_slab_t *nextp = cachep->c_freep;
588 kmem_slab_t *prevp = nextp->s_prevp;
589 slabp->s_nextp = nextp;
590 slabp->s_prevp = prevp;
591 nextp->s_prevp = slabp;
592 slabp->s_prevp->s_nextp = slabp;
595 /* Destroy all the objs in a slab, and release the mem back to the system.
596 * Before calling the slab must have been unlinked from the cache.
597 * The cache-lock is not held/needed.
599 static void
600 kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
602 if (cachep->c_dtor
603 #if SLAB_DEBUG_SUPPORT
604 || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
605 #endif /*SLAB_DEBUG_SUPPORT*/
607 /* Doesn't use the bufctl ptrs to find objs. */
608 unsigned long num = cachep->c_num;
609 void *objp = slabp->s_mem;
610 do {
611 #if SLAB_DEBUG_SUPPORT
612 if (cachep->c_flags & SLAB_RED_ZONE) {
613 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
614 printk(KERN_ERR "kmem_slab_destroy: "
615 "Bad front redzone - %s\n",
616 cachep->c_name);
617 objp += BYTES_PER_WORD;
618 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
619 SLAB_RED_MAGIC1)
620 printk(KERN_ERR "kmem_slab_destroy: "
621 "Bad rear redzone - %s\n",
622 cachep->c_name);
624 if (cachep->c_dtor)
625 #endif /*SLAB_DEBUG_SUPPORT*/
626 (cachep->c_dtor)(objp, cachep, 0);
627 #if SLAB_DEBUG_SUPPORT
628 else if (cachep->c_flags & SLAB_POISON) {
629 if (kmem_check_poison_obj(cachep, objp))
630 printk(KERN_ERR "kmem_slab_destroy: "
631 "Bad poison - %s\n", cachep->c_name);
633 if (cachep->c_flags & SLAB_RED_ZONE)
634 objp -= BYTES_PER_WORD;
635 #endif /* SLAB_DEBUG_SUPPORT */
636 objp += cachep->c_offset;
637 if (!slabp->s_index)
638 objp += sizeof(kmem_bufctl_t);
639 } while (--num);
642 slabp->s_magic = SLAB_MAGIC_DESTROYED;
643 if (slabp->s_index)
644 kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
645 kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
646 if (SLAB_OFF_SLAB(cachep->c_flags))
647 kmem_cache_free(cache_slabp, slabp);
650 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
651 static inline size_t
652 kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
653 unsigned long flags, size_t *left_over, unsigned long *num)
655 size_t wastage = PAGE_SIZE<<gfporder;
657 if (SLAB_OFF_SLAB(flags))
658 gfporder = 0;
659 else
660 gfporder = slab_align_size;
661 wastage -= gfporder;
662 *num = wastage / size;
663 wastage -= (*num * size);
664 *left_over = wastage;
666 return (wastage + gfporder + (extra * *num));
670 * kmem_cache_create - Create a cache.
671 * @name: A string which is used in /proc/slabinfo to identify this cache.
672 * @size: The size of objects to be created in this cache.
673 * @offset: The offset to use within the page.
674 * @flags: SLAB flags
675 * @ctor: A constructor for the objects.
676 * @dtor: A destructor for the objects.
678 * Returns a ptr to the cache on success, NULL on failure.
679 * Cannot be called within a int, but can be interrupted.
680 * The @ctor is run when new pages are allocated by the cache
681 * and the @dtor is run before the pages are handed back.
682 * The flags are
684 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
685 * to catch references to uninitialised memory.
687 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
688 * for buffer overruns.
690 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
691 * memory pressure.
693 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
694 * cacheline. This can be beneficial if you're counting cycles as closely
695 * as davem.
697 kmem_cache_t *
698 kmem_cache_create(const char *name, size_t size, size_t offset,
699 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
700 void (*dtor)(void*, kmem_cache_t *, unsigned long))
702 const char *func_nm= KERN_ERR "kmem_create: ";
703 kmem_cache_t *searchp;
704 kmem_cache_t *cachep=NULL;
705 size_t extra;
706 size_t left_over;
707 size_t align;
709 #if SLAB_DEBUG_SUPPORT
710 flags |= SLAB_POISON;
711 #endif
712 /* Sanity checks... */
713 #if SLAB_MGMT_CHECKS
714 if (!name) {
715 printk("%sNULL ptr\n", func_nm);
716 goto opps;
718 if (strlen(name) >= SLAB_CACHE_NAME_LEN) {
719 printk("%sname too long\n", func_nm);
720 goto opps;
722 if (in_interrupt()) {
723 printk("%sCalled during int - %s\n", func_nm, name);
724 goto opps;
727 if (size < BYTES_PER_WORD) {
728 printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
729 size = BYTES_PER_WORD;
732 if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
733 printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
734 goto opps;
737 if (dtor && !ctor) {
738 /* Decon, but no con - doesn't make sense */
739 printk("%sDecon but no con - %s\n", func_nm, name);
740 goto opps;
743 if (offset < 0 || offset > size) {
744 printk("%sOffset weird %d - %s\n", func_nm, (int) offset, name);
745 offset = 0;
748 #if SLAB_DEBUG_SUPPORT
749 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
750 /* No constructor, but inital state check requested */
751 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
752 flags &= ~SLAB_DEBUG_INITIAL;
755 if ((flags & SLAB_POISON) && ctor) {
756 /* request for poisoning, but we can't do that with a constructor */
757 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
758 flags &= ~SLAB_POISON;
760 #if 0
761 if ((flags & SLAB_HIGH_PACK) && ctor) {
762 printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
763 flags &= ~SLAB_HIGH_PACK;
765 if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
766 printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
767 func_nm, name);
768 flags &= ~SLAB_HIGH_PACK;
770 #endif
771 #endif /* SLAB_DEBUG_SUPPORT */
772 #endif /* SLAB_MGMT_CHECKS */
774 /* Always checks flags, a caller might be expecting debug
775 * support which isn't available.
777 if (flags & ~SLAB_C_MASK) {
778 printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
779 flags &= SLAB_C_MASK;
782 /* Get cache's description obj. */
783 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
784 if (!cachep)
785 goto opps;
786 memset(cachep, 0, sizeof(kmem_cache_t));
788 /* Check that size is in terms of words. This is needed to avoid
789 * unaligned accesses for some archs when redzoning is used, and makes
790 * sure any on-slab bufctl's are also correctly aligned.
792 if (size & (BYTES_PER_WORD-1)) {
793 size += (BYTES_PER_WORD-1);
794 size &= ~(BYTES_PER_WORD-1);
795 printk("%sForcing size word alignment - %s\n", func_nm, name);
798 cachep->c_org_size = size;
799 #if SLAB_DEBUG_SUPPORT
800 if (flags & SLAB_RED_ZONE) {
801 /* There is no point trying to honour cache alignment when redzoning. */
802 flags &= ~SLAB_HWCACHE_ALIGN;
803 size += 2*BYTES_PER_WORD; /* words for redzone */
805 #endif /* SLAB_DEBUG_SUPPORT */
807 align = BYTES_PER_WORD;
808 if (flags & SLAB_HWCACHE_ALIGN)
809 align = L1_CACHE_BYTES;
811 /* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
812 extra = sizeof(kmem_bufctl_t);
813 if (size < (PAGE_SIZE>>3)) {
814 /* Size is small(ish). Use packing where bufctl size per
815 * obj is low, and slab management is on-slab.
817 #if 0
818 if ((flags & SLAB_HIGH_PACK)) {
819 /* Special high packing for small objects
820 * (mainly for vm_mapping structs, but
821 * others can use it).
823 if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
824 size == L1_CACHE_BYTES) {
825 /* The bufctl is stored with the object. */
826 extra = 0;
827 } else
828 flags &= ~SLAB_HIGH_PACK;
830 #endif
831 } else {
832 /* Size is large, assume best to place the slab management obj
833 * off-slab (should allow better packing of objs).
835 flags |= SLAB_CFLGS_OFF_SLAB;
836 if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
837 || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
838 /* To avoid waste the bufctls are off-slab... */
839 flags |= SLAB_CFLGS_BUFCTL;
840 extra = 0;
841 } /* else slab management is off-slab, but freelist pointers are on. */
843 size += extra;
845 if (flags & SLAB_HWCACHE_ALIGN) {
846 /* Need to adjust size so that objs are cache aligned. */
847 if (size > (L1_CACHE_BYTES/2)) {
848 size_t words = size % L1_CACHE_BYTES;
849 if (words)
850 size += (L1_CACHE_BYTES-words);
851 } else {
852 /* Small obj size, can get at least two per cache line. */
853 int num_per_line = L1_CACHE_BYTES/size;
854 left_over = L1_CACHE_BYTES - (num_per_line*size);
855 if (left_over) {
856 /* Need to adjust size so objs cache align. */
857 if (left_over%num_per_line) {
858 /* Odd num of objs per line - fixup. */
859 num_per_line--;
860 left_over += size;
862 size += (left_over/num_per_line);
865 } else if (!(size%L1_CACHE_BYTES)) {
866 /* Size happens to cache align... */
867 flags |= SLAB_HWCACHE_ALIGN;
868 align = L1_CACHE_BYTES;
871 /* Cal size (in pages) of slabs, and the num of objs per slab.
872 * This could be made much more intelligent. For now, try to avoid
873 * using high page-orders for slabs. When the gfp() funcs are more
874 * friendly towards high-order requests, this should be changed.
876 do {
877 size_t wastage;
878 unsigned int break_flag = 0;
879 cal_wastage:
880 wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
881 flags, &left_over, &cachep->c_num);
882 if (!cachep->c_num)
883 goto next;
884 if (break_flag)
885 break;
886 if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
887 /* Oops, this num of objs will cause problems. */
888 cachep->c_gfporder--;
889 break_flag++;
890 goto cal_wastage;
892 if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
893 break;
895 /* Large num of objs is good, but v. large slabs are currently
896 * bad for the gfp()s.
898 if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
899 if (cachep->c_gfporder < slab_break_gfp_order)
900 goto next;
903 /* Stop caches with small objs having a large num of pages. */
904 if (left_over <= slab_align_size)
905 break;
906 if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
907 break; /* Acceptable internal fragmentation. */
908 next:
909 cachep->c_gfporder++;
910 } while (1);
912 /* If the slab has been placed off-slab, and we have enough space then
913 * move it on-slab. This is at the expense of any extra colouring.
915 if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
916 left_over >= slab_align_size) {
917 flags &= ~SLAB_CFLGS_OFF_SLAB;
918 left_over -= slab_align_size;
921 /* Offset must be a multiple of the alignment. */
922 offset += (align-1);
923 offset &= ~(align-1);
925 /* Mess around with the offset alignment. */
926 if (!left_over) {
927 offset = 0;
928 } else if (left_over < offset) {
929 offset = align;
930 if (flags & SLAB_HWCACHE_ALIGN) {
931 if (left_over < offset)
932 offset = 0;
933 } else {
934 /* Offset is BYTES_PER_WORD, and left_over is at
935 * least BYTES_PER_WORD.
937 if (left_over >= (BYTES_PER_WORD*2)) {
938 offset >>= 1;
939 if (left_over >= (BYTES_PER_WORD*4))
940 offset >>= 1;
943 } else if (!offset) {
944 /* No offset requested, but space enough - give one. */
945 offset = left_over/align;
946 if (flags & SLAB_HWCACHE_ALIGN) {
947 if (offset >= 8) {
948 /* A large number of colours - use a larger alignment. */
949 align <<= 1;
951 } else {
952 if (offset >= 10) {
953 align <<= 1;
954 if (offset >= 16)
955 align <<= 1;
958 offset = align;
961 #if 0
962 printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
963 #endif
965 if ((cachep->c_align = (unsigned long) offset))
966 cachep->c_colour = (left_over/offset);
967 cachep->c_colour_next = cachep->c_colour;
969 /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
970 if (!SLAB_BUFCTL(flags))
971 size -= sizeof(kmem_bufctl_t);
972 else
973 cachep->c_index_cachep =
974 kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
975 cachep->c_offset = (unsigned long) size;
976 cachep->c_freep = kmem_slab_end(cachep);
977 cachep->c_firstp = kmem_slab_end(cachep);
978 cachep->c_lastp = kmem_slab_end(cachep);
979 cachep->c_flags = flags;
980 cachep->c_ctor = ctor;
981 cachep->c_dtor = dtor;
982 cachep->c_magic = SLAB_C_MAGIC;
983 /* Copy name over so we don't have problems with unloaded modules */
984 strcpy(cachep->c_name, name);
985 spin_lock_init(&cachep->c_spinlock);
987 /* Need the semaphore to access the chain. */
988 down(&cache_chain_sem);
989 searchp = &cache_cache;
990 do {
991 /* The name field is constant - no lock needed. */
992 if (!strcmp(searchp->c_name, name)) {
993 printk("%sDup name - %s\n", func_nm, name);
994 break;
996 searchp = searchp->c_nextp;
997 } while (searchp != &cache_cache);
999 /* There is no reason to lock our new cache before we
1000 * link it in - no one knows about it yet...
1002 cachep->c_nextp = cache_cache.c_nextp;
1003 cache_cache.c_nextp = cachep;
1004 up(&cache_chain_sem);
1005 opps:
1006 return cachep;
1010 * This check if the kmem_cache_t pointer is chained in the cache_cache
1011 * list. -arca
1013 static int is_chained_kmem_cache(kmem_cache_t * cachep)
1015 kmem_cache_t * searchp;
1016 int ret = 0;
1018 /* Find the cache in the chain of caches. */
1019 down(&cache_chain_sem);
1020 for (searchp = &cache_cache; searchp->c_nextp != &cache_cache;
1021 searchp = searchp->c_nextp) {
1022 if (searchp->c_nextp != cachep)
1023 continue;
1025 /* Accessing clock_searchp is safe - we hold the mutex. */
1026 if (cachep == clock_searchp)
1027 clock_searchp = cachep->c_nextp;
1028 ret = 1;
1029 break;
1031 up(&cache_chain_sem);
1033 return ret;
1036 /* returns 0 if every slab is been freed -arca */
1037 static int __kmem_cache_shrink(kmem_cache_t *cachep)
1039 kmem_slab_t *slabp;
1040 int ret;
1042 spin_lock_irq(&cachep->c_spinlock);
1044 /* If the cache is growing, stop shrinking. */
1045 while (!cachep->c_growing) {
1046 slabp = cachep->c_lastp;
1047 if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
1048 break;
1050 * If this slab is the first slab with free objects
1051 * (c_freep), and as we are walking the slab chain
1052 * backwards, it is also the last slab with free
1053 * objects. After unlinking it, there will be no
1054 * slabs with free objects, so point c_freep into the
1055 * cache structure.
1057 if (cachep->c_freep == slabp)
1058 cachep->c_freep = kmem_slab_end(cachep);
1059 kmem_slab_unlink(slabp);
1060 spin_unlock_irq(&cachep->c_spinlock);
1061 kmem_slab_destroy(cachep, slabp);
1062 spin_lock_irq(&cachep->c_spinlock);
1064 ret = 1;
1065 if (cachep->c_lastp == kmem_slab_end(cachep))
1066 ret = 0; /* Cache is empty. */
1067 spin_unlock_irq(&cachep->c_spinlock);
1068 return ret;
1072 * kmem_cache_shrink - Shrink a cache.
1073 * @cachep: The cache to shrink.
1075 * Releases as many slabs as possible for a cache.
1076 * To help debugging, a zero exit status indicates all slabs were released.
1079 kmem_cache_shrink(kmem_cache_t *cachep)
1081 if (!cachep)
1082 BUG();
1083 if (in_interrupt())
1084 BUG();
1085 if (!is_chained_kmem_cache(cachep))
1086 BUG();
1088 return __kmem_cache_shrink(cachep);
1092 * kmem_cache_destroy - delete a cache
1093 * @cachep: the cache to destroy
1095 * Remove a kmem_cache_t object from the slab cache.
1096 * Returns 0 on success.
1098 * It is expected this function will be called by a module when it is
1099 * unloaded. This will remove the cache completely, and avoid a duplicate
1100 * cache being allocated each time a module is loaded and unloaded, if the
1101 * module doesn't have persistent in-kernel storage across loads and unloads.
1104 int kmem_cache_destroy(kmem_cache_t * cachep)
1106 kmem_cache_t * prev;
1107 int ret;
1109 if (!cachep) {
1110 printk(KERN_ERR "kmem_destroy: NULL ptr\n");
1111 return 1;
1113 if (in_interrupt()) {
1114 printk(KERN_ERR "kmem_destroy: Called during int - %s\n",
1115 cachep->c_name);
1116 return 1;
1119 ret = 0;
1120 /* Find the cache in the chain of caches. */
1121 down(&cache_chain_sem);
1122 for (prev = &cache_cache; prev->c_nextp != &cache_cache;
1123 prev = prev->c_nextp) {
1124 if (prev->c_nextp != cachep)
1125 continue;
1127 /* Accessing clock_searchp is safe - we hold the mutex. */
1128 if (cachep == clock_searchp)
1129 clock_searchp = cachep->c_nextp;
1131 /* remove the cachep from the cache_cache list. -arca */
1132 prev->c_nextp = cachep->c_nextp;
1134 ret = 1;
1135 break;
1137 up(&cache_chain_sem);
1139 if (!ret) {
1140 printk(KERN_ERR "kmem_destroy: Invalid cache addr %p\n",
1141 cachep);
1142 return 1;
1145 if (__kmem_cache_shrink(cachep)) {
1146 printk(KERN_ERR "kmem_destroy: Can't free all objects %p\n",
1147 cachep);
1148 down(&cache_chain_sem);
1149 cachep->c_nextp = cache_cache.c_nextp;
1150 cache_cache.c_nextp = cachep;
1151 up(&cache_chain_sem);
1152 return 1;
1155 kmem_cache_free(&cache_cache, cachep);
1157 return 0;
1160 /* Get the memory for a slab management obj. */
1161 static inline kmem_slab_t *
1162 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
1164 kmem_slab_t *slabp;
1166 if (SLAB_OFF_SLAB(cachep->c_flags)) {
1167 /* Slab management obj is off-slab. */
1168 slabp = kmem_cache_alloc(cache_slabp, local_flags);
1169 } else {
1170 /* Slab management at end of slab memory, placed so that
1171 * the position is 'coloured'.
1173 void *end;
1174 end = objp + (cachep->c_num * cachep->c_offset);
1175 if (!SLAB_BUFCTL(cachep->c_flags))
1176 end += (cachep->c_num * sizeof(kmem_bufctl_t));
1177 slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
1180 if (slabp) {
1181 slabp->s_inuse = 0;
1182 slabp->s_dma = 0;
1183 slabp->s_index = NULL;
1186 return slabp;
1189 static inline void
1190 kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
1191 unsigned long ctor_flags)
1193 kmem_bufctl_t **bufpp = &slabp->s_freep;
1194 unsigned long num = cachep->c_num-1;
1196 do {
1197 #if SLAB_DEBUG_SUPPORT
1198 if (cachep->c_flags & SLAB_RED_ZONE) {
1199 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1200 objp += BYTES_PER_WORD;
1201 *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
1203 #endif /* SLAB_DEBUG_SUPPORT */
1205 /* Constructors are not allowed to allocate memory from the same cache
1206 * which they are a constructor for. Otherwise, deadlock.
1207 * They must also be threaded.
1209 if (cachep->c_ctor)
1210 cachep->c_ctor(objp, cachep, ctor_flags);
1211 #if SLAB_DEBUG_SUPPORT
1212 else if (cachep->c_flags & SLAB_POISON) {
1213 /* need to poison the objs */
1214 kmem_poison_obj(cachep, objp);
1217 if (cachep->c_flags & SLAB_RED_ZONE) {
1218 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
1219 SLAB_RED_MAGIC1) {
1220 *((unsigned long*)(objp+cachep->c_org_size)) =
1221 SLAB_RED_MAGIC1;
1222 printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
1223 "after constructor - %s\n", cachep->c_name);
1225 objp -= BYTES_PER_WORD;
1226 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
1227 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1228 printk(KERN_ERR "kmem_init_obj: Bad front redzone "
1229 "after constructor - %s\n", cachep->c_name);
1232 #endif /* SLAB_DEBUG_SUPPORT */
1234 objp += cachep->c_offset;
1235 if (!slabp->s_index) {
1236 *bufpp = objp;
1237 objp += sizeof(kmem_bufctl_t);
1238 } else
1239 *bufpp = &slabp->s_index[num];
1240 bufpp = &(*bufpp)->buf_nextp;
1241 } while (num--);
1243 *bufpp = NULL;
1246 /* Grow (by 1) the number of slabs within a cache. This is called by
1247 * kmem_cache_alloc() when there are no active objs left in a cache.
1249 static int
1250 kmem_cache_grow(kmem_cache_t * cachep, int flags)
1252 kmem_slab_t *slabp;
1253 struct page *page;
1254 void *objp;
1255 size_t offset;
1256 unsigned int dma, local_flags;
1257 unsigned long ctor_flags;
1258 unsigned long save_flags;
1260 /* Be lazy and only check for valid flags here,
1261 * keeping it out of the critical path in kmem_cache_alloc().
1263 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
1264 printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
1265 flags, cachep->c_name);
1266 flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
1269 if (flags & SLAB_NO_GROW)
1270 return 0;
1272 /* The test for missing atomic flag is performed here, rather than
1273 * the more obvious place, simply to reduce the critical path length
1274 * in kmem_cache_alloc(). If a caller is slightly mis-behaving they
1275 * will eventually be caught here (where it matters).
1277 if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
1278 printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
1279 cachep->c_name);
1280 flags &= ~SLAB_LEVEL_MASK;
1281 flags |= SLAB_ATOMIC;
1283 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1284 local_flags = (flags & SLAB_LEVEL_MASK);
1285 if (local_flags == SLAB_ATOMIC) {
1286 /* Not allowed to sleep. Need to tell a constructor about
1287 * this - it might need to know...
1289 ctor_flags |= SLAB_CTOR_ATOMIC;
1292 /* About to mess with non-constant members - lock. */
1293 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1295 /* Get colour for the slab, and cal the next value. */
1296 if (!(offset = cachep->c_colour_next--))
1297 cachep->c_colour_next = cachep->c_colour;
1298 offset *= cachep->c_align;
1299 cachep->c_dflags = SLAB_CFLGS_GROWN;
1301 cachep->c_growing++;
1302 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1304 /* A series of memory allocations for a new slab.
1305 * Neither the cache-chain semaphore, or cache-lock, are
1306 * held, but the incrementing c_growing prevents this
1307 * this cache from being reaped or shrunk.
1308 * Note: The cache could be selected in for reaping in
1309 * kmem_cache_reap(), but when the final test is made the
1310 * growing value will be seen.
1313 /* Get mem for the objs. */
1314 if (!(objp = kmem_getpages(cachep, flags, &dma)))
1315 goto failed;
1317 /* Get slab management. */
1318 if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
1319 goto opps1;
1320 if (dma)
1321 slabp->s_dma = 1;
1322 if (SLAB_BUFCTL(cachep->c_flags)) {
1323 slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
1324 if (!slabp->s_index)
1325 goto opps2;
1328 /* Nasty!!!!!! I hope this is OK. */
1329 dma = 1 << cachep->c_gfporder;
1330 page = &mem_map[MAP_NR(objp)];
1331 do {
1332 SLAB_SET_PAGE_CACHE(page, cachep);
1333 SLAB_SET_PAGE_SLAB(page, slabp);
1334 PageSetSlab(page);
1335 page++;
1336 } while (--dma);
1338 slabp->s_offset = offset; /* It will fit... */
1339 objp += offset; /* Address of first object. */
1340 slabp->s_mem = objp;
1342 /* For on-slab bufctls, c_offset is the distance between the start of
1343 * an obj and its related bufctl. For off-slab bufctls, c_offset is
1344 * the distance between objs in the slab.
1346 kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
1348 spin_lock_irq(&cachep->c_spinlock);
1350 /* Make slab active. */
1351 slabp->s_magic = SLAB_MAGIC_ALLOC;
1352 kmem_slab_link_end(cachep, slabp);
1353 if (cachep->c_freep == kmem_slab_end(cachep))
1354 cachep->c_freep = slabp;
1355 SLAB_STATS_INC_GROWN(cachep);
1356 cachep->c_failures = 0;
1357 cachep->c_growing--;
1359 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1360 return 1;
1361 opps2:
1362 if (SLAB_OFF_SLAB(cachep->c_flags))
1363 kmem_cache_free(cache_slabp, slabp);
1364 opps1:
1365 kmem_freepages(cachep, objp);
1366 failed:
1367 spin_lock_irq(&cachep->c_spinlock);
1368 cachep->c_growing--;
1369 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1370 return 0;
1373 static void
1374 kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
1376 if (cachep)
1377 SLAB_STATS_INC_ERR(cachep); /* this is atomic */
1378 printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
1379 str, cachep ? cachep->c_name : "unknown");
1382 static void
1383 kmem_report_free_err(const char *str, const void *objp, kmem_cache_t * cachep)
1385 if (cachep)
1386 SLAB_STATS_INC_ERR(cachep);
1387 printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
1388 str, objp, cachep ? cachep->c_name : "unknown");
1391 /* Search for a slab whose objs are suitable for DMA.
1392 * Note: since testing the first free slab (in __kmem_cache_alloc()),
1393 * ints must not have been enabled, or the cache-lock released!
1395 static inline kmem_slab_t *
1396 kmem_cache_search_dma(kmem_cache_t * cachep)
1398 kmem_slab_t *slabp = cachep->c_freep->s_nextp;
1400 for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1401 if (!(slabp->s_dma))
1402 continue;
1403 kmem_slab_unlink(slabp);
1404 kmem_slab_link_free(cachep, slabp);
1405 cachep->c_freep = slabp;
1406 break;
1408 return slabp;
1411 #if SLAB_DEBUG_SUPPORT
1412 /* Perform extra freeing checks. Currently, this check is only for caches
1413 * that use bufctl structures within the slab. Those which use bufctl's
1414 * from the internal cache have a reasonable check when the address is
1415 * searched for. Called with the cache-lock held.
1417 static void *
1418 kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
1419 kmem_bufctl_t *bufp, void * objp)
1421 if (SLAB_BUFCTL(cachep->c_flags))
1422 return objp;
1424 /* Check slab's freelist to see if this obj is there. */
1425 for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
1426 if (search_bufp != bufp)
1427 continue;
1428 return NULL;
1430 return objp;
1432 #endif /* SLAB_DEBUG_SUPPORT */
1434 /* Called with cache lock held. */
1435 static inline void
1436 kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1438 if (slabp->s_nextp->s_inuse) {
1439 /* Not at correct position. */
1440 if (cachep->c_freep == slabp)
1441 cachep->c_freep = slabp->s_nextp;
1442 kmem_slab_unlink(slabp);
1443 kmem_slab_link_end(cachep, slabp);
1447 /* Called with cache lock held. */
1448 static inline void
1449 kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1451 if (slabp->s_nextp->s_inuse == cachep->c_num) {
1452 kmem_slab_unlink(slabp);
1453 kmem_slab_link_free(cachep, slabp);
1455 cachep->c_freep = slabp;
1458 /* Returns a ptr to an obj in the given cache. */
1459 static inline void *
1460 __kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1462 kmem_slab_t *slabp;
1463 kmem_bufctl_t *bufp;
1464 void *objp;
1465 unsigned long save_flags;
1467 /* Sanity check. */
1468 if (!cachep)
1469 goto nul_ptr;
1470 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1471 try_again:
1472 /* Get slab alloc is to come from. */
1473 slabp = cachep->c_freep;
1475 /* Magic is a sanity check _and_ says if we need a new slab. */
1476 if (slabp->s_magic != SLAB_MAGIC_ALLOC)
1477 goto alloc_new_slab;
1478 /* DMA requests are 'rare' - keep out of the critical path. */
1479 if (flags & SLAB_DMA)
1480 goto search_dma;
1481 try_again_dma:
1482 SLAB_STATS_INC_ALLOCED(cachep);
1483 SLAB_STATS_INC_ACTIVE(cachep);
1484 SLAB_STATS_SET_HIGH(cachep);
1485 slabp->s_inuse++;
1486 bufp = slabp->s_freep;
1487 slabp->s_freep = bufp->buf_nextp;
1488 if (slabp->s_freep) {
1489 ret_obj:
1490 if (!slabp->s_index) {
1491 bufp->buf_slabp = slabp;
1492 objp = ((void*)bufp) - cachep->c_offset;
1493 finished:
1494 /* The lock is not needed by the red-zone or poison ops, and the
1495 * obj has been removed from the slab. Should be safe to drop
1496 * the lock here.
1498 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1499 #if SLAB_DEBUG_SUPPORT
1500 if (cachep->c_flags & SLAB_RED_ZONE)
1501 goto red_zone;
1502 ret_red:
1503 if ((cachep->c_flags & SLAB_POISON) && kmem_check_poison_obj(cachep, objp))
1504 kmem_report_alloc_err("Bad poison", cachep);
1505 #endif /* SLAB_DEBUG_SUPPORT */
1506 return objp;
1508 /* Update index ptr. */
1509 objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
1510 bufp->buf_objp = objp;
1511 goto finished;
1513 cachep->c_freep = slabp->s_nextp;
1514 goto ret_obj;
1516 #if SLAB_DEBUG_SUPPORT
1517 red_zone:
1518 /* Set alloc red-zone, and check old one. */
1519 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1520 kmem_report_alloc_err("Bad front redzone", cachep);
1521 objp += BYTES_PER_WORD;
1522 if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1523 kmem_report_alloc_err("Bad rear redzone", cachep);
1524 goto ret_red;
1525 #endif /* SLAB_DEBUG_SUPPORT */
1527 search_dma:
1528 if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
1529 goto try_again_dma;
1530 alloc_new_slab:
1531 /* Either out of slabs, or magic number corruption. */
1532 if (slabp == kmem_slab_end(cachep)) {
1533 /* Need a new slab. Release the lock before calling kmem_cache_grow().
1534 * This allows objs to be released back into the cache while growing.
1536 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1537 if (kmem_cache_grow(cachep, flags)) {
1538 /* Someone may have stolen our objs. Doesn't matter, we'll
1539 * just come back here again.
1541 spin_lock_irq(&cachep->c_spinlock);
1542 goto try_again;
1544 /* Couldn't grow, but some objs may have been freed. */
1545 spin_lock_irq(&cachep->c_spinlock);
1546 if (cachep->c_freep != kmem_slab_end(cachep)) {
1547 if ((flags & SLAB_ATOMIC) == 0)
1548 goto try_again;
1550 } else {
1551 /* Very serious error - maybe panic() here? */
1552 kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
1554 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1555 err_exit:
1556 return NULL;
1557 nul_ptr:
1558 kmem_report_alloc_err("NULL ptr", NULL);
1559 goto err_exit;
1562 /* Release an obj back to its cache. If the obj has a constructed state,
1563 * it should be in this state _before_ it is released.
1565 static inline void
1566 __kmem_cache_free(kmem_cache_t *cachep, void *objp)
1568 kmem_slab_t *slabp;
1569 kmem_bufctl_t *bufp;
1570 unsigned long save_flags;
1572 /* Basic sanity checks. */
1573 if (!cachep || !objp)
1574 goto null_addr;
1576 #if SLAB_DEBUG_SUPPORT
1577 /* A verify func is called without the cache-lock held. */
1578 if (cachep->c_flags & SLAB_DEBUG_INITIAL)
1579 goto init_state_check;
1580 finished_initial:
1582 if (cachep->c_flags & SLAB_RED_ZONE)
1583 goto red_zone;
1584 return_red:
1585 #endif /* SLAB_DEBUG_SUPPORT */
1587 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1589 if (SLAB_BUFCTL(cachep->c_flags))
1590 goto bufctl;
1591 bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
1593 /* Get slab for the object. */
1594 #if 0
1595 /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
1596 * Is this worth while? XXX
1598 if (cachep->c_flags & SLAB_HIGH_PACK)
1599 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
1600 else
1601 #endif
1602 slabp = bufp->buf_slabp;
1604 check_magic:
1605 if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* Sanity check. */
1606 goto bad_slab;
1608 #if SLAB_DEBUG_SUPPORT
1609 if (cachep->c_flags & SLAB_DEBUG_FREE)
1610 goto extra_checks;
1611 passed_extra:
1612 #endif /* SLAB_DEBUG_SUPPORT */
1614 if (slabp->s_inuse) { /* Sanity check. */
1615 SLAB_STATS_DEC_ACTIVE(cachep);
1616 slabp->s_inuse--;
1617 bufp->buf_nextp = slabp->s_freep;
1618 slabp->s_freep = bufp;
1619 if (bufp->buf_nextp) {
1620 if (slabp->s_inuse) {
1621 /* (hopefully) The most common case. */
1622 finished:
1623 #if SLAB_DEBUG_SUPPORT
1624 if (cachep->c_flags & SLAB_POISON) {
1625 if (cachep->c_flags & SLAB_RED_ZONE)
1626 objp += BYTES_PER_WORD;
1627 kmem_poison_obj(cachep, objp);
1629 #endif /* SLAB_DEBUG_SUPPORT */
1630 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1631 return;
1633 kmem_cache_full_free(cachep, slabp);
1634 goto finished;
1636 kmem_cache_one_free(cachep, slabp);
1637 goto finished;
1640 /* Don't add to freelist. */
1641 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1642 kmem_report_free_err("free with no active objs", objp, cachep);
1643 return;
1644 bufctl:
1645 /* No 'extra' checks are performed for objs stored this way, finding
1646 * the obj is check enough.
1648 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
1649 bufp = &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
1650 if (bufp->buf_objp == objp)
1651 goto check_magic;
1652 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1653 kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
1654 return;
1655 #if SLAB_DEBUG_SUPPORT
1656 init_state_check:
1657 /* Need to call the slab's constructor so the
1658 * caller can perform a verify of its state (debugging).
1660 cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1661 goto finished_initial;
1662 extra_checks:
1663 if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
1664 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1665 kmem_report_free_err("Double free detected during checks", objp, cachep);
1666 return;
1668 goto passed_extra;
1669 red_zone:
1670 /* We do not hold the cache-lock while checking the red-zone.
1672 objp -= BYTES_PER_WORD;
1673 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1674 /* Either write before start of obj, or a double free. */
1675 kmem_report_free_err("Bad front redzone", objp, cachep);
1677 if (xchg((unsigned long *)(objp+cachep->c_org_size+BYTES_PER_WORD), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1678 /* Either write past end of obj, or a double free. */
1679 kmem_report_free_err("Bad rear redzone", objp, cachep);
1681 goto return_red;
1682 #endif /* SLAB_DEBUG_SUPPORT */
1684 bad_slab:
1685 /* Slab doesn't contain the correct magic num. */
1686 if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
1687 /* Magic num says this is a destroyed slab. */
1688 kmem_report_free_err("free from inactive slab", objp, cachep);
1689 } else
1690 kmem_report_free_err("Bad obj addr", objp, cachep);
1691 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1693 #if 1
1694 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1695 BUG();
1696 #endif
1698 return;
1699 null_addr:
1700 kmem_report_free_err("NULL ptr", objp, cachep);
1701 return;
1705 * kmem_cache_alloc - Allocate an object
1706 * @cachep: The cache to allocate from.
1707 * @flags: See kmalloc().
1709 * Allocate an object from this cache. The flags are only relevant
1710 * if the cache has no available objects.
1712 void *
1713 kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1715 return __kmem_cache_alloc(cachep, flags);
1719 * kmem_cache_free - Deallocate an object
1720 * @cachep: The cache the allocation was from.
1721 * @objp: The previously allocated object.
1723 * Free an object which was previously allocated from this
1724 * cache.
1726 void
1727 kmem_cache_free(kmem_cache_t *cachep, void *objp)
1729 __kmem_cache_free(cachep, objp);
1733 * kmalloc - allocate memory
1734 * @size: how many bytes of memory are required.
1735 * @flags: the type of memory to allocate.
1737 * kmalloc is the normal method of allocating memory
1738 * in the kernel. The @flags argument may be one of:
1740 * %GFP_BUFFER - XXX
1742 * %GFP_ATOMIC - allocation will not sleep. Use inside interrupt handlers.
1744 * %GFP_USER - allocate memory on behalf of user. May sleep.
1746 * %GFP_KERNEL - allocate normal kernel ram. May sleep.
1748 * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1749 * Don't use unless you're in the NFS code.
1751 * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1753 void *
1754 kmalloc(size_t size, int flags)
1756 cache_sizes_t *csizep = cache_sizes;
1758 for (; csizep->cs_size; csizep++) {
1759 if (size > csizep->cs_size)
1760 continue;
1761 return __kmem_cache_alloc(csizep->cs_cachep, flags);
1763 printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
1764 return NULL;
1768 * kfree - free previously allocated memory
1769 * @objp: pointer returned by kmalloc.
1771 * Don't free memory not originally allocated by kmalloc()
1772 * or you will run into trouble.
1774 void
1775 kfree(const void *objp)
1777 struct page *page;
1778 int nr;
1780 if (!objp)
1781 goto null_ptr;
1782 nr = MAP_NR(objp);
1783 if (nr >= max_mapnr)
1784 goto bad_ptr;
1786 /* Assume we own the page structure - hence no locking.
1787 * If someone is misbehaving (for example, calling us with a bad
1788 * address), then access to the page structure can race with the
1789 * kmem_slab_destroy() code. Need to add a spin_lock to each page
1790 * structure, which would be useful in threading the gfp() functions....
1792 page = &mem_map[nr];
1793 if (PageSlab(page)) {
1794 kmem_cache_t *cachep;
1796 /* Here, we again assume the obj address is good.
1797 * If it isn't, and happens to map onto another
1798 * general cache page which has no active objs, then
1799 * we race.
1801 cachep = SLAB_GET_PAGE_CACHE(page);
1802 if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
1803 __kmem_cache_free(cachep, (void *)objp);
1804 return;
1807 bad_ptr:
1808 printk(KERN_ERR "kfree: Bad obj %p\n", objp);
1810 #if 1
1811 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1812 BUG();
1813 #endif
1815 null_ptr:
1816 return;
1820 * kfree_s - free previously allocated memory
1821 * @objp: pointer returned by kmalloc.
1822 * @size: size of object which is being freed.
1824 * This function performs the same task as kfree() except
1825 * that it can use the extra information to speed up deallocation
1826 * or perform additional tests.
1827 * Don't free memory not originally allocated by kmalloc()
1828 * or allocated with a different size, or you will run into trouble.
1830 void
1831 kfree_s(const void *objp, size_t size)
1833 struct page *page;
1834 int nr;
1836 if (!objp)
1837 goto null_ptr;
1838 nr = MAP_NR(objp);
1839 if (nr >= max_mapnr)
1840 goto null_ptr;
1841 /* See comment in kfree() */
1842 page = &mem_map[nr];
1843 if (PageSlab(page)) {
1844 kmem_cache_t *cachep;
1845 /* See comment in kfree() */
1846 cachep = SLAB_GET_PAGE_CACHE(page);
1847 if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
1848 if (size <= cachep->c_org_size) { /* XXX better check */
1849 __kmem_cache_free(cachep, (void *)objp);
1850 return;
1854 null_ptr:
1855 printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
1856 return;
1859 kmem_cache_t *
1860 kmem_find_general_cachep(size_t size)
1862 cache_sizes_t *csizep = cache_sizes;
1864 /* This function could be moved to the header file, and
1865 * made inline so consumers can quickly determine what
1866 * cache pointer they require.
1868 for (; csizep->cs_size; csizep++) {
1869 if (size > csizep->cs_size)
1870 continue;
1871 break;
1873 return csizep->cs_cachep;
1878 * kmem_cache_reap - Reclaim memory from caches.
1879 * @gfp_mask: the type of memory required.
1881 * Called from try_to_free_page().
1882 * This function _cannot_ be called within a int, but it
1883 * can be interrupted.
1885 void
1886 kmem_cache_reap(int gfp_mask)
1888 kmem_slab_t *slabp;
1889 kmem_cache_t *searchp;
1890 kmem_cache_t *best_cachep;
1891 unsigned int scan;
1892 unsigned int reap_level;
1894 if (in_interrupt()) {
1895 printk("kmem_cache_reap() called within int!\n");
1896 return;
1899 /* We really need a test semaphore op so we can avoid sleeping when
1900 * !wait is true.
1902 down(&cache_chain_sem);
1904 scan = 10;
1905 reap_level = 0;
1907 best_cachep = NULL;
1908 searchp = clock_searchp;
1909 do {
1910 unsigned int full_free;
1911 unsigned int dma_flag;
1913 /* It's safe to test this without holding the cache-lock. */
1914 if (searchp->c_flags & SLAB_NO_REAP)
1915 goto next;
1916 spin_lock_irq(&searchp->c_spinlock);
1917 if (searchp->c_growing)
1918 goto next_unlock;
1919 if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
1920 searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
1921 goto next_unlock;
1923 /* Sanity check for corruption of static values. */
1924 if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
1925 spin_unlock_irq(&searchp->c_spinlock);
1926 printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
1927 goto next;
1929 dma_flag = 0;
1930 full_free = 0;
1932 /* Count the fully free slabs. There should not be not many,
1933 * since we are holding the cache lock.
1935 slabp = searchp->c_lastp;
1936 while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
1937 slabp = slabp->s_prevp;
1938 full_free++;
1939 if (slabp->s_dma)
1940 dma_flag++;
1942 spin_unlock_irq(&searchp->c_spinlock);
1944 if ((gfp_mask & GFP_DMA) && !dma_flag)
1945 goto next;
1947 if (full_free) {
1948 if (full_free >= 10) {
1949 best_cachep = searchp;
1950 break;
1953 /* Try to avoid slabs with constructors and/or
1954 * more than one page per slab (as it can be difficult
1955 * to get high orders from gfp()).
1957 if (full_free >= reap_level) {
1958 reap_level = full_free;
1959 best_cachep = searchp;
1962 goto next;
1963 next_unlock:
1964 spin_unlock_irq(&searchp->c_spinlock);
1965 next:
1966 searchp = searchp->c_nextp;
1967 } while (--scan && searchp != clock_searchp);
1969 clock_searchp = searchp;
1971 if (!best_cachep) {
1972 /* couldn't find anything to reap */
1973 goto out;
1976 spin_lock_irq(&best_cachep->c_spinlock);
1977 while (!best_cachep->c_growing &&
1978 !(slabp = best_cachep->c_lastp)->s_inuse &&
1979 slabp != kmem_slab_end(best_cachep)) {
1980 if (gfp_mask & GFP_DMA) {
1981 do {
1982 if (slabp->s_dma)
1983 goto good_dma;
1984 slabp = slabp->s_prevp;
1985 } while (!slabp->s_inuse && slabp != kmem_slab_end(best_cachep));
1987 /* Didn't found a DMA slab (there was a free one -
1988 * must have been become active).
1990 goto dma_fail;
1991 good_dma:
1993 if (slabp == best_cachep->c_freep)
1994 best_cachep->c_freep = slabp->s_nextp;
1995 kmem_slab_unlink(slabp);
1996 SLAB_STATS_INC_REAPED(best_cachep);
1998 /* Safe to drop the lock. The slab is no longer linked to the
1999 * cache.
2001 spin_unlock_irq(&best_cachep->c_spinlock);
2002 kmem_slab_destroy(best_cachep, slabp);
2003 spin_lock_irq(&best_cachep->c_spinlock);
2005 dma_fail:
2006 spin_unlock_irq(&best_cachep->c_spinlock);
2007 out:
2008 up(&cache_chain_sem);
2009 return;
2012 #if SLAB_SELFTEST
2013 /* A few v. simple tests */
2014 static void
2015 kmem_self_test(void)
2017 kmem_cache_t *test_cachep;
2019 printk(KERN_INFO "kmem_test() - start\n");
2020 test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISON, NULL, NULL);
2021 if (test_cachep) {
2022 char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
2023 if (objp) {
2024 /* Write in front and past end, red-zone test. */
2025 *(objp-1) = 1;
2026 *(objp+16) = 1;
2027 kmem_cache_free(test_cachep, objp);
2029 /* Mess up poisoning. */
2030 *objp = 10;
2031 objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
2032 kmem_cache_free(test_cachep, objp);
2034 /* Mess up poisoning (again). */
2035 *objp = 10;
2036 kmem_cache_shrink(test_cachep);
2039 printk(KERN_INFO "kmem_test() - finished\n");
2041 #endif /* SLAB_SELFTEST */
2043 #if defined(CONFIG_PROC_FS)
2045 * get_slabinfo - generates /proc/slabinfo
2046 * @buf: the buffer to write it into
2048 * The contents of the buffer are
2049 * cache-name
2050 * num-active-objs
2051 * total-objs
2052 * num-active-slabs
2053 * total-slabs
2054 * num-pages-per-slab
2057 get_slabinfo(char *buf)
2059 kmem_cache_t *cachep;
2060 kmem_slab_t *slabp;
2061 unsigned long active_objs;
2062 unsigned long save_flags;
2063 unsigned long num_slabs;
2064 unsigned long num_objs;
2065 int len=0;
2066 #if SLAB_STATS
2067 unsigned long active_slabs;
2068 #endif /* SLAB_STATS */
2070 __save_flags(save_flags);
2072 /* Output format version, so at least we can change it without _too_
2073 * many complaints.
2075 #if SLAB_STATS
2076 len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
2077 #else
2078 len = sprintf(buf, "slabinfo - version: 1.0\n");
2079 #endif /* SLAB_STATS */
2080 down(&cache_chain_sem);
2081 cachep = &cache_cache;
2082 do {
2083 #if SLAB_STATS
2084 active_slabs = 0;
2085 #endif /* SLAB_STATS */
2086 num_slabs = active_objs = 0;
2087 spin_lock_irq(&cachep->c_spinlock);
2088 for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
2089 active_objs += slabp->s_inuse;
2090 num_slabs++;
2091 #if SLAB_STATS
2092 if (slabp->s_inuse)
2093 active_slabs++;
2094 #endif /* SLAB_STATS */
2096 num_objs = cachep->c_num*num_slabs;
2097 #if SLAB_STATS
2099 unsigned long errors;
2100 unsigned long high = cachep->c_high_mark;
2101 unsigned long grown = cachep->c_grown;
2102 unsigned long reaped = cachep->c_reaped;
2103 unsigned long allocs = cachep->c_num_allocations;
2104 errors = (unsigned long) atomic_read(&cachep->c_errors);
2105 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
2106 len += sprintf(buf+len, "%-16s %6lu %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
2107 cachep->c_name, active_objs, num_objs, cachep->c_offset, active_slabs, num_slabs,
2108 (1<<cachep->c_gfporder)*num_slabs,
2109 high, allocs, grown, reaped, errors);
2111 #else
2112 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
2113 len += sprintf(buf+len, "%-17s %6lu %6lu %6lu\n", cachep->c_name, active_objs, num_objs, cachep->c_offset);
2114 #endif /* SLAB_STATS */
2115 } while ((cachep = cachep->c_nextp) != &cache_cache);
2116 up(&cache_chain_sem);
2118 return len;
2120 #endif /* CONFIG_PROC_FS */