Import 2.1.118
[davej-history.git] / mm / slab.c
blob5a00becdb5ccb772e6fff37dc654fea87c239a9f
1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * 11 April '97. Started multi-threading - markhe
7 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
8 * The sem is only needed when accessing/extending the cache-chain, which
9 * can never happen inside an interrupt (kmem_cache_create(),
10 * kmem_cache_shrink() and kmem_cache_reap()).
11 * This is a medium-term exclusion lock.
13 * Each cache has its own lock; 'c_spinlock'. This lock is needed only
14 * when accessing non-constant members of a cache-struct.
15 * Note: 'constant members' are assigned a value in kmem_cache_create() before
16 * the cache is linked into the cache-chain. The values never change, so not
17 * even a multi-reader lock is needed for these members.
18 * The c_spinlock is only ever held for a few cycles.
20 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
21 * maybe be sleeping and therefore not holding the semaphore/lock), the
22 * c_growing field is used. This also prevents reaping from a cache.
24 * Note, caches can _never_ be destroyed. When a sub-system (eg module) has
25 * finished with a cache, it can only be shrunk. This leaves the cache empty,
26 * but already enabled for re-use, eg. during a module re-load.
28 * Notes:
29 * o Constructors/deconstructors are called while the cache-lock
30 * is _not_ held. Therefore they _must_ be threaded.
31 * o Constructors must not attempt to allocate memory from the
32 * same cache that they are a constructor for - infinite loop!
33 * (There is no easy way to trap this.)
34 * o The per-cache locks must be obtained with local-interrupts disabled.
35 * o When compiled with debug support, and an object-verify (upon release)
36 * is request for a cache, the verify-function is called with the cache
37 * lock held. This helps debugging.
38 * o The functions called from try_to_free_page() must not attempt
39 * to allocate memory from a cache which is being grown.
40 * The buffer sub-system might try to allocate memory, via buffer_cachep.
41 * As this pri is passed to the SLAB, and then (if necessary) onto the
42 * gfp() funcs (which avoid calling try_to_free_page()), no deadlock
43 * should happen.
45 * The positioning of the per-cache lock is tricky. If the lock is
46 * placed on the same h/w cache line as commonly accessed members
47 * the number of L1 cache-line faults is reduced. However, this can
48 * lead to the cache-line ping-ponging between processors when the
49 * lock is in contention (and the common members are being accessed).
50 * Decided to keep it away from common members.
52 * More fine-graining is possible, with per-slab locks...but this might be
53 * taking fine graining too far, but would have the advantage;
54 * During most allocs/frees no writes occur to the cache-struct.
55 * Therefore a multi-reader/one writer lock could be used (the writer
56 * needed when the slab chain is being link/unlinked).
57 * As we would not have an exclusion lock for the cache-structure, one
58 * would be needed per-slab (for updating s_free ptr, and/or the contents
59 * of s_index).
60 * The above locking would allow parallel operations to different slabs within
61 * the same cache with reduced spinning.
63 * Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
64 * would allow most allocations from the same cache to execute in parallel.
66 * At present, each engine can be growing a cache. This should be blocked.
68 * It is not currently 100% safe to examine the page_struct outside of a kernel
69 * or global cli lock. The risk is v. small, and non-fatal.
71 * Calls to printk() are not 100% safe (the function is not threaded). However,
72 * printk() is only used under an error condition, and the risk is v. small (not
73 * sure if the console write functions 'enjoy' executing multiple contexts in
74 * parallel. I guess they don't...).
75 * Note, for most calls to printk() any held cache-lock is dropped. This is not
76 * always done for text size reasons - having *_unlock() everywhere is bloat.
80 * An implementation of the Slab Allocator as described in outline in;
81 * UNIX Internals: The New Frontiers by Uresh Vahalia
82 * Pub: Prentice Hall ISBN 0-13-101908-2
83 * or with a little more detail in;
84 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
85 * Jeff Bonwick (Sun Microsystems).
86 * Presented at: USENIX Summer 1994 Technical Conference
90 * This implementation deviates from Bonwick's paper as it
91 * does not use a hash-table for large objects, but rather a per slab
92 * index to hold the bufctls. This allows the bufctl structure to
93 * be small (one word), but limits the number of objects a slab (not
94 * a cache) can contain when off-slab bufctls are used. The limit is the
95 * size of the largest general cache that does not use off-slab bufctls,
96 * divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64.
97 * This is not serious, as it is only for large objects, when it is unwise
98 * to have too many per slab.
99 * Note: This limit can be raised by introducing a general cache whose size
100 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
103 #include <linux/mm.h>
104 #include <linux/slab.h>
105 #include <linux/interrupt.h>
106 #include <linux/config.h>
107 #include <linux/init.h>
108 #include <linux/smp.h>
110 #include <asm/system.h>
111 #include <asm/atomic.h>
112 #include <asm/spinlock.h>
114 /* If there is a different PAGE_SIZE around, and it works with this allocator,
115 * then change the following.
117 #if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
118 #error Your page size is probably not correctly supported - please check
119 #endif
121 /* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create().
122 * 0 if you wish to reduce memory usage.
124 * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
125 * SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
126 * 0 for faster, smaller, code (especially in the critical paths).
128 * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
129 * 0 for faster, smaller, code (especially in the critical paths).
131 * SLAB_SELFTEST - 1 to perform a few tests, mainly for development.
133 #define SLAB_MGMT_CHECKS 1
134 #define SLAB_DEBUG_SUPPORT 0
135 #define SLAB_STATS 0
136 #define SLAB_SELFTEST 0
138 /* Shouldn't this be in a header file somewhere? */
139 #define BYTES_PER_WORD sizeof(void *)
141 /* Legal flag mask for kmem_cache_create(). */
142 #if SLAB_DEBUG_SUPPORT
143 #if 0
144 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
145 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
146 SLAB_HIGH_PACK)
147 #endif
148 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
149 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
150 #else
151 #if 0
152 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
153 #endif
154 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
155 #endif /* SLAB_DEBUG_SUPPORT */
157 /* Slab management struct.
158 * Manages the objs in a slab. Placed either at the end of mem allocated
159 * for a slab, or from an internal obj cache (cache_slabp).
160 * Slabs are chained into a partially ordered list; fully used first, partial
161 * next, and then fully free slabs.
162 * The first 4 members are referenced during an alloc/free operation, and
163 * should always appear on the same cache line.
164 * Note: The offset between some members _must_ match offsets within
165 * the kmem_cache_t - see kmem_cache_init() for the checks. */
167 #define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */
169 typedef struct kmem_slab_s {
170 struct kmem_bufctl_s *s_freep; /* ptr to first inactive obj in slab */
171 struct kmem_bufctl_s *s_index;
172 unsigned long s_magic;
173 unsigned long s_inuse; /* num of objs active in slab */
175 struct kmem_slab_s *s_nextp;
176 struct kmem_slab_s *s_prevp;
177 void *s_mem; /* addr of first obj in slab */
178 unsigned long s_offset:SLAB_OFFSET_BITS,
179 s_dma:1;
180 } kmem_slab_t;
182 /* When the slab management is on-slab, this gives the size to use. */
183 #define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
185 /* Test for end of slab chain. */
186 #define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset))
188 /* s_magic */
189 #define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */
190 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destroyed */
192 /* Bufctl's are used for linking objs within a slab, identifying what slab an obj
193 * is in, and the address of the associated obj (for sanity checking with off-slab
194 * bufctls). What a bufctl contains depends upon the state of the obj and
195 * the organisation of the cache.
197 typedef struct kmem_bufctl_s {
198 union {
199 struct kmem_bufctl_s *buf_nextp;
200 kmem_slab_t *buf_slabp; /* slab for obj */
201 void * buf_objp;
202 } u;
203 } kmem_bufctl_t;
205 /* ...shorthand... */
206 #define buf_nextp u.buf_nextp
207 #define buf_slabp u.buf_slabp
208 #define buf_objp u.buf_objp
210 #if SLAB_DEBUG_SUPPORT
211 /* Magic nums for obj red zoning.
212 * Placed in the first word before and the first word after an obj.
214 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
215 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
217 /* ...and for poisoning */
218 #define SLAB_POISON_BYTE 0x5a /* byte value for poisoning */
219 #define SLAB_POISON_END 0xa5 /* end-byte of poisoning */
221 #endif /* SLAB_DEBUG_SUPPORT */
223 /* Cache struct - manages a cache.
224 * First four members are commonly referenced during an alloc/free operation.
226 struct kmem_cache_s {
227 kmem_slab_t *c_freep; /* first slab w. free objs */
228 unsigned long c_flags; /* constant flags */
229 unsigned long c_offset;
230 unsigned long c_num; /* # of objs per slab */
232 unsigned long c_magic;
233 unsigned long c_inuse; /* kept at zero */
234 kmem_slab_t *c_firstp; /* first slab in chain */
235 kmem_slab_t *c_lastp; /* last slab in chain */
237 spinlock_t c_spinlock;
238 unsigned long c_growing;
239 unsigned long c_dflags; /* dynamic flags */
240 size_t c_org_size;
241 unsigned long c_gfporder; /* order of pgs per slab (2^n) */
242 void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
243 void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
244 unsigned long c_align; /* alignment of objs */
245 size_t c_colour; /* cache colouring range */
246 size_t c_colour_next;/* cache colouring */
247 unsigned long c_failures;
248 const char *c_name;
249 struct kmem_cache_s *c_nextp;
250 kmem_cache_t *c_index_cachep;
251 #if SLAB_STATS
252 unsigned long c_num_active;
253 unsigned long c_num_allocations;
254 unsigned long c_high_mark;
255 unsigned long c_grown;
256 unsigned long c_reaped;
257 atomic_t c_errors;
258 #endif /* SLAB_STATS */
261 /* internal c_flags */
262 #define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
263 #define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */
264 #define SLAB_CFLGS_GENERAL 0x080000UL /* a general cache */
266 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
267 #define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */
269 #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
270 #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
271 #define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
273 #if SLAB_STATS
274 #define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
275 #define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
276 #define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++)
277 #define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
278 #define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
279 #define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \
280 (x)->c_high_mark = (x)->c_num_active; \
281 } while (0)
282 #define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors))
283 #else
284 #define SLAB_STATS_INC_ACTIVE(x)
285 #define SLAB_STATS_DEC_ACTIVE(x)
286 #define SLAB_STATS_INC_ALLOCED(x)
287 #define SLAB_STATS_INC_GROWN(x)
288 #define SLAB_STATS_INC_REAPED(x)
289 #define SLAB_STATS_SET_HIGH(x)
290 #define SLAB_STATS_INC_ERR(x)
291 #endif /* SLAB_STATS */
293 #if SLAB_SELFTEST
294 #if !SLAB_DEBUG_SUPPORT
295 #error Debug support needed for self-test
296 #endif
297 static void kmem_self_test(void);
298 #endif /* SLAB_SELFTEST */
300 /* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
301 #define SLAB_C_MAGIC 0x4F17A36DUL
303 /* maximum size of an obj (in 2^order pages) */
304 #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
306 /* maximum num of pages for a slab (prevents large requests to the VM layer) */
307 #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
309 /* the 'preferred' minimum num of objs per slab - maybe less for large objs */
310 #define SLAB_MIN_OBJS_PER_SLAB 4
312 /* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
313 * then the page order must be less than this before trying the next order.
315 #define SLAB_BREAK_GFP_ORDER_HI 2
316 #define SLAB_BREAK_GFP_ORDER_LO 1
317 static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
319 /* Macros for storing/retrieving the cachep and or slab from the
320 * global 'mem_map'. With off-slab bufctls, these are used to find the
321 * slab an obj belongs to. With kmalloc(), and kfree(), these are used
322 * to find the cache which an obj belongs to.
324 #define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x))
325 #define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next)
326 #define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x))
327 #define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev)
329 /* Size description struct for general caches. */
330 typedef struct cache_sizes {
331 size_t cs_size;
332 kmem_cache_t *cs_cachep;
333 } cache_sizes_t;
335 static cache_sizes_t cache_sizes[] = {
336 #if PAGE_SIZE == 4096
337 { 32, NULL},
338 #endif
339 { 64, NULL},
340 { 128, NULL},
341 { 256, NULL},
342 { 512, NULL},
343 {1024, NULL},
344 {2048, NULL},
345 {4096, NULL},
346 {8192, NULL},
347 {16384, NULL},
348 {32768, NULL},
349 {65536, NULL},
350 {131072, NULL},
351 {0, NULL}
354 /* Names for the general caches. Not placed into the sizes struct for
355 * a good reason; the string ptr is not needed while searching in kmalloc(),
356 * and would 'get-in-the-way' in the h/w cache.
358 static char *cache_sizes_name[] = {
359 #if PAGE_SIZE == 4096
360 "size-32",
361 #endif
362 "size-64",
363 "size-128",
364 "size-256",
365 "size-512",
366 "size-1024",
367 "size-2048",
368 "size-4096",
369 "size-8192",
370 "size-16384",
371 "size-32768",
372 "size-65536",
373 "size-131072"
376 /* internal cache of cache description objs */
377 static kmem_cache_t cache_cache = {
378 /* freep, flags */ kmem_slab_end(&cache_cache), SLAB_NO_REAP,
379 /* offset, num */ sizeof(kmem_cache_t), 0,
380 /* c_magic, c_inuse */ SLAB_C_MAGIC, 0,
381 /* firstp, lastp */ kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
382 /* spinlock */ SPIN_LOCK_UNLOCKED,
383 /* growing */ 0,
384 /* dflags */ 0,
385 /* org_size, gfp */ 0, 0,
386 /* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES,
387 /* colour, colour_next */ 0, 0,
388 /* failures */ 0,
389 /* name */ "kmem_cache",
390 /* nextp */ &cache_cache,
391 /* index */ NULL,
394 /* Guard access to the cache-chain. */
395 static struct semaphore cache_chain_sem;
397 /* Place maintainer for reaping. */
398 static kmem_cache_t *clock_searchp = &cache_cache;
400 /* Internal slab management cache, for when slab management is off-slab. */
401 static kmem_cache_t *cache_slabp = NULL;
403 /* Max number of objs-per-slab for caches which use bufctl's.
404 * Needed to avoid a possible looping condition in kmem_cache_grow().
406 static unsigned long bufctl_limit = 0;
408 /* Initialisation - setup the `cache' cache. */
409 long __init kmem_cache_init(long start, long end)
411 size_t size, i;
413 #define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
414 #define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b))
415 #define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
416 #define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
418 /* Sanity checks... */
419 if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
420 kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
421 ((kmem_cache_offset(c_lastp) -
422 ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
423 kmem_slab_offset(s_prevp)) ||
424 kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
425 /* Offsets to the magic are incorrect, either the structures have
426 * been incorrectly changed, or adjustments are needed for your
427 * architecture.
429 panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
430 /* NOTREACHED */
432 #undef kmem_cache_offset
433 #undef kmem_cache_diff
434 #undef kmem_slab_offset
435 #undef kmem_slab_diff
437 cache_chain_sem = MUTEX;
439 size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
440 size += (L1_CACHE_BYTES-1);
441 size &= ~(L1_CACHE_BYTES-1);
442 cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
444 i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
445 cache_cache.c_num = i / size; /* num of objs per slab */
447 /* Cache colouring. */
448 cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
449 cache_cache.c_colour_next = cache_cache.c_colour;
452 * Fragmentation resistance on low memory - only use bigger
453 * page orders on machines with more than 32MB of memory.
455 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
456 slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
457 return start;
460 /* Initialisation - setup remaining internal and general caches.
461 * Called after the gfp() functions have been enabled, and before smp_init().
463 void __init kmem_cache_sizes_init(void)
465 unsigned int found = 0;
467 cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
468 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
469 if (cache_slabp) {
470 char **names = cache_sizes_name;
471 cache_sizes_t *sizes = cache_sizes;
472 do {
473 /* For performance, all the general caches are L1 aligned.
474 * This should be particularly beneficial on SMP boxes, as it
475 * eliminates "false sharing".
476 * Note for systems short on memory removing the alignment will
477 * allow tighter packing of the smaller caches. */
478 if (!(sizes->cs_cachep =
479 kmem_cache_create(*names++, sizes->cs_size,
480 0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
481 goto panic_time;
482 if (!found) {
483 /* Inc off-slab bufctl limit until the ceiling is hit. */
484 if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
485 found++;
486 else
487 bufctl_limit =
488 (sizes->cs_size/sizeof(kmem_bufctl_t));
490 sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
491 sizes++;
492 } while (sizes->cs_size);
493 #if SLAB_SELFTEST
494 kmem_self_test();
495 #endif /* SLAB_SELFTEST */
496 return;
498 panic_time:
499 panic("kmem_cache_sizes_init: Error creating caches");
500 /* NOTREACHED */
503 /* Interface to system's page allocator. Dma pts to non-zero if all
504 * of memory is DMAable. No need to hold the cache-lock.
506 static inline void *
507 kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
509 void *addr;
511 *dma = flags & SLAB_DMA;
512 addr = (void*) __get_free_pages(flags, cachep->c_gfporder);
513 /* Assume that now we have the pages no one else can legally
514 * messes with the 'struct page's.
515 * However vm_scan() might try to test the structure to see if
516 * it is a named-page or buffer-page. The members it tests are
517 * of no interest here.....
519 if (!*dma && addr) {
520 /* Need to check if can dma. */
521 struct page *page = mem_map + MAP_NR(addr);
522 *dma = 1<<cachep->c_gfporder;
523 while ((*dma)--) {
524 if (!PageDMA(page)) {
525 *dma = 0;
526 break;
528 page++;
531 return addr;
534 /* Interface to system's page release. */
535 static inline void
536 kmem_freepages(kmem_cache_t *cachep, void *addr)
538 unsigned long i = (1<<cachep->c_gfporder);
539 struct page *page = &mem_map[MAP_NR(addr)];
541 /* free_pages() does not clear the type bit - we do that.
542 * The pages have been unlinked from their cache-slab,
543 * but their 'struct page's might be accessed in
544 * vm_scan(). Shouldn't be a worry.
546 while (i--) {
547 PageClearSlab(page);
548 page++;
550 free_pages((unsigned long)addr, cachep->c_gfporder);
553 #if SLAB_DEBUG_SUPPORT
554 static inline void
555 kmem_poison_obj(kmem_cache_t *cachep, void *addr)
557 memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
558 *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISON_END;
561 static inline int
562 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
564 void *end;
565 end = memchr(addr, SLAB_POISON_END, cachep->c_org_size);
566 if (end != (addr+cachep->c_org_size-1))
567 return 1;
568 return 0;
570 #endif /* SLAB_DEBUG_SUPPORT */
572 /* Three slab chain funcs - all called with ints disabled and the appropriate
573 * cache-lock held.
575 static inline void
576 kmem_slab_unlink(kmem_slab_t *slabp)
578 kmem_slab_t *prevp = slabp->s_prevp;
579 kmem_slab_t *nextp = slabp->s_nextp;
580 prevp->s_nextp = nextp;
581 nextp->s_prevp = prevp;
584 static inline void
585 kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
587 kmem_slab_t *lastp = cachep->c_lastp;
588 slabp->s_nextp = kmem_slab_end(cachep);
589 slabp->s_prevp = lastp;
590 cachep->c_lastp = slabp;
591 lastp->s_nextp = slabp;
594 static inline void
595 kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
597 kmem_slab_t *nextp = cachep->c_freep;
598 kmem_slab_t *prevp = nextp->s_prevp;
599 slabp->s_nextp = nextp;
600 slabp->s_prevp = prevp;
601 nextp->s_prevp = slabp;
602 slabp->s_prevp->s_nextp = slabp;
605 /* Destroy all the objs in a slab, and release the mem back to the system.
606 * Before calling the slab must have been unlinked from the cache.
607 * The cache-lock is not held/needed.
609 static void
610 kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
612 if (cachep->c_dtor
613 #if SLAB_DEBUG_SUPPORT
614 || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
615 #endif /*SLAB_DEBUG_SUPPORT*/
617 /* Doesn't use the bufctl ptrs to find objs. */
618 unsigned long num = cachep->c_num;
619 void *objp = slabp->s_mem;
620 do {
621 #if SLAB_DEBUG_SUPPORT
622 if (cachep->c_flags & SLAB_RED_ZONE) {
623 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
624 printk(KERN_ERR "kmem_slab_destroy: "
625 "Bad front redzone - %s\n",
626 cachep->c_name);
627 objp += BYTES_PER_WORD;
628 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
629 SLAB_RED_MAGIC1)
630 printk(KERN_ERR "kmem_slab_destroy: "
631 "Bad rear redzone - %s\n",
632 cachep->c_name);
634 if (cachep->c_dtor)
635 #endif /*SLAB_DEBUG_SUPPORT*/
636 (cachep->c_dtor)(objp, cachep, 0);
637 #if SLAB_DEBUG_SUPPORT
638 else if (cachep->c_flags & SLAB_POISON) {
639 if (kmem_check_poison_obj(cachep, objp))
640 printk(KERN_ERR "kmem_slab_destroy: "
641 "Bad poison - %s\n", cachep->c_name);
643 if (cachep->c_flags & SLAB_RED_ZONE)
644 objp -= BYTES_PER_WORD;
645 #endif /* SLAB_DEBUG_SUPPORT */
646 objp += cachep->c_offset;
647 if (!slabp->s_index)
648 objp += sizeof(kmem_bufctl_t);
649 } while (--num);
652 slabp->s_magic = SLAB_MAGIC_DESTROYED;
653 kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
654 if (slabp->s_index)
655 kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
656 if (SLAB_OFF_SLAB(cachep->c_flags))
657 kmem_cache_free(cache_slabp, slabp);
660 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
661 static inline size_t
662 kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
663 unsigned long flags, size_t *left_over, unsigned long *num)
665 size_t wastage = PAGE_SIZE<<gfporder;
667 if (SLAB_OFF_SLAB(flags))
668 gfporder = 0;
669 else
670 gfporder = slab_align_size;
671 wastage -= gfporder;
672 *num = wastage / size;
673 wastage -= (*num * size);
674 *left_over = wastage;
676 return (wastage + gfporder + (extra * *num));
679 /* Create a cache:
680 * Returns a ptr to the cache on success, NULL on failure.
681 * Cannot be called within a int, but can be interrupted.
682 * NOTE: The 'name' is assumed to be memory that is _not_ going to disappear.
684 kmem_cache_t *
685 kmem_cache_create(const char *name, size_t size, size_t offset,
686 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
687 void (*dtor)(void*, kmem_cache_t *, unsigned long))
689 const char *func_nm= KERN_ERR "kmem_create: ";
690 kmem_cache_t *searchp;
691 kmem_cache_t *cachep=NULL;
692 size_t extra;
693 size_t left_over;
694 size_t align;
696 /* Sanity checks... */
697 #if SLAB_MGMT_CHECKS
698 if (!name) {
699 printk("%sNULL ptr\n", func_nm);
700 goto opps;
702 if (in_interrupt()) {
703 printk("%sCalled during int - %s\n", func_nm, name);
704 goto opps;
707 if (size < BYTES_PER_WORD) {
708 printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
709 size = BYTES_PER_WORD;
712 if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
713 printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
714 goto opps;
717 if (dtor && !ctor) {
718 /* Decon, but no con - doesn't make sense */
719 printk("%sDecon but no con - %s\n", func_nm, name);
720 goto opps;
723 if (offset < 0 || offset > size) {
724 printk("%sOffset weird %d - %s\n", func_nm, (int) offset, name);
725 offset = 0;
728 #if SLAB_DEBUG_SUPPORT
729 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
730 /* No constructor, but inital state check requested */
731 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
732 flags &= ~SLAB_DEBUG_INITIAL;
735 if ((flags & SLAB_POISON) && ctor) {
736 /* request for poisoning, but we can't do that with a constructor */
737 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
738 flags &= ~SLAB_POISON;
740 #if 0
741 if ((flags & SLAB_HIGH_PACK) && ctor) {
742 printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
743 flags &= ~SLAB_HIGH_PACK;
745 if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
746 printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
747 func_nm, name);
748 flags &= ~SLAB_HIGH_PACK;
750 #endif
751 #endif /* SLAB_DEBUG_SUPPORT */
752 #endif /* SLAB_MGMT_CHECKS */
754 /* Always checks flags, a caller might be expecting debug
755 * support which isn't available.
757 if (flags & ~SLAB_C_MASK) {
758 printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
759 flags &= SLAB_C_MASK;
762 /* Get cache's description obj. */
763 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
764 if (!cachep)
765 goto opps;
766 memset(cachep, 0, sizeof(kmem_cache_t));
768 /* Check that size is in terms of words. This is needed to avoid
769 * unaligned accesses for some archs when redzoning is used, and makes
770 * sure any on-slab bufctl's are also correctly aligned.
772 if (size & (BYTES_PER_WORD-1)) {
773 size += (BYTES_PER_WORD-1);
774 size &= ~(BYTES_PER_WORD-1);
775 printk("%sForcing size word alignment - %s\n", func_nm, name);
778 cachep->c_org_size = size;
779 #if SLAB_DEBUG_SUPPORT
780 if (flags & SLAB_RED_ZONE) {
781 /* There is no point trying to honour cache alignment when redzoning. */
782 flags &= ~SLAB_HWCACHE_ALIGN;
783 size += 2*BYTES_PER_WORD; /* words for redzone */
785 #endif /* SLAB_DEBUG_SUPPORT */
787 align = BYTES_PER_WORD;
788 if (flags & SLAB_HWCACHE_ALIGN)
789 align = L1_CACHE_BYTES;
791 /* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
792 extra = sizeof(kmem_bufctl_t);
793 if (size < (PAGE_SIZE>>3)) {
794 /* Size is small(ish). Use packing where bufctl size per
795 * obj is low, and slab management is on-slab.
797 #if 0
798 if ((flags & SLAB_HIGH_PACK)) {
799 /* Special high packing for small objects
800 * (mainly for vm_mapping structs, but
801 * others can use it).
803 if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
804 size == L1_CACHE_BYTES) {
805 /* The bufctl is stored with the object. */
806 extra = 0;
807 } else
808 flags &= ~SLAB_HIGH_PACK;
810 #endif
811 } else {
812 /* Size is large, assume best to place the slab management obj
813 * off-slab (should allow better packing of objs).
815 flags |= SLAB_CFLGS_OFF_SLAB;
816 if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
817 || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
818 /* To avoid waste the bufctls are off-slab... */
819 flags |= SLAB_CFLGS_BUFCTL;
820 extra = 0;
821 } /* else slab management is off-slab, but freelist pointers are on. */
823 size += extra;
825 if (flags & SLAB_HWCACHE_ALIGN) {
826 /* Need to adjust size so that objs are cache aligned. */
827 if (size > (L1_CACHE_BYTES/2)) {
828 size_t words = size % L1_CACHE_BYTES;
829 if (words)
830 size += (L1_CACHE_BYTES-words);
831 } else {
832 /* Small obj size, can get at least two per cache line. */
833 int num_per_line = L1_CACHE_BYTES/size;
834 left_over = L1_CACHE_BYTES - (num_per_line*size);
835 if (left_over) {
836 /* Need to adjust size so objs cache align. */
837 if (left_over%num_per_line) {
838 /* Odd num of objs per line - fixup. */
839 num_per_line--;
840 left_over += size;
842 size += (left_over/num_per_line);
845 } else if (!(size%L1_CACHE_BYTES)) {
846 /* Size happens to cache align... */
847 flags |= SLAB_HWCACHE_ALIGN;
848 align = L1_CACHE_BYTES;
851 /* Cal size (in pages) of slabs, and the num of objs per slab.
852 * This could be made much more intelligent. For now, try to avoid
853 * using high page-orders for slabs. When the gfp() funcs are more
854 * friendly towards high-order requests, this should be changed.
856 do {
857 size_t wastage;
858 unsigned int break_flag = 0;
859 cal_wastage:
860 wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
861 flags, &left_over, &cachep->c_num);
862 if (!cachep->c_num)
863 goto next;
864 if (break_flag)
865 break;
866 if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
867 /* Oops, this num of objs will cause problems. */
868 cachep->c_gfporder--;
869 break_flag++;
870 goto cal_wastage;
872 if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
873 break;
875 /* Large num of objs is good, but v. large slabs are currently
876 * bad for the gfp()s.
878 if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
879 if (cachep->c_gfporder < slab_break_gfp_order)
880 goto next;
883 /* Stop caches with small objs having a large num of pages. */
884 if (left_over <= slab_align_size)
885 break;
886 if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
887 break; /* Acceptable internal fragmentation. */
888 next:
889 cachep->c_gfporder++;
890 } while (1);
892 /* If the slab has been placed off-slab, and we have enough space then
893 * move it on-slab. This is at the expense of any extra colouring.
895 if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
896 left_over >= slab_align_size) {
897 flags &= ~SLAB_CFLGS_OFF_SLAB;
898 left_over -= slab_align_size;
901 /* Offset must be a factor of the alignment. */
902 offset += (align-1);
903 offset &= ~(align-1);
905 /* Mess around with the offset alignment. */
906 if (!left_over) {
907 offset = 0;
908 } else if (left_over < offset) {
909 offset = align;
910 if (flags & SLAB_HWCACHE_ALIGN) {
911 if (left_over < offset)
912 offset = 0;
913 } else {
914 /* Offset is BYTES_PER_WORD, and left_over is at
915 * least BYTES_PER_WORD.
917 if (left_over >= (BYTES_PER_WORD*2)) {
918 offset >>= 1;
919 if (left_over >= (BYTES_PER_WORD*4))
920 offset >>= 1;
923 } else if (!offset) {
924 /* No offset requested, but space enough - give one. */
925 offset = left_over/align;
926 if (flags & SLAB_HWCACHE_ALIGN) {
927 if (offset >= 8) {
928 /* A large number of colours - use a larger alignment. */
929 align <<= 1;
931 } else {
932 if (offset >= 10) {
933 align <<= 1;
934 if (offset >= 16)
935 align <<= 1;
938 offset = align;
941 #if 0
942 printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
943 #endif
945 if ((cachep->c_align = (unsigned long) offset))
946 cachep->c_colour = (left_over/offset);
947 cachep->c_colour_next = cachep->c_colour;
949 /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
950 if (!SLAB_BUFCTL(flags))
951 size -= sizeof(kmem_bufctl_t);
952 else
953 cachep->c_index_cachep =
954 kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
955 cachep->c_offset = (unsigned long) size;
956 cachep->c_freep = kmem_slab_end(cachep);
957 cachep->c_firstp = kmem_slab_end(cachep);
958 cachep->c_lastp = kmem_slab_end(cachep);
959 cachep->c_flags = flags;
960 cachep->c_ctor = ctor;
961 cachep->c_dtor = dtor;
962 cachep->c_magic = SLAB_C_MAGIC;
963 cachep->c_name = name; /* Simply point to the name. */
964 spin_lock_init(&cachep->c_spinlock);
966 /* Need the semaphore to access the chain. */
967 down(&cache_chain_sem);
968 searchp = &cache_cache;
969 do {
970 /* The name field is constant - no lock needed. */
971 if (!strcmp(searchp->c_name, name)) {
972 printk("%sDup name - %s\n", func_nm, name);
973 break;
975 searchp = searchp->c_nextp;
976 } while (searchp != &cache_cache);
978 /* There is no reason to lock our new cache before we
979 * link it in - no one knows about it yet...
981 cachep->c_nextp = cache_cache.c_nextp;
982 cache_cache.c_nextp = cachep;
983 up(&cache_chain_sem);
984 opps:
985 return cachep;
988 /* Shrink a cache. Releases as many slabs as possible for a cache.
989 * It is expected this function will be called by a module when it is
990 * unloaded. The cache is _not_ removed, this creates too many problems and
991 * the cache-structure does not take up much room. A module should keep its
992 * cache pointer(s) in unloaded memory, so when reloaded it knows the cache
993 * is available. To help debugging, a zero exit status indicates all slabs
994 * were released.
997 kmem_cache_shrink(kmem_cache_t *cachep)
999 kmem_cache_t *searchp;
1000 kmem_slab_t *slabp;
1001 int ret;
1003 if (!cachep) {
1004 printk(KERN_ERR "kmem_shrink: NULL ptr\n");
1005 return 2;
1007 if (in_interrupt()) {
1008 printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name);
1009 return 2;
1012 /* Find the cache in the chain of caches. */
1013 down(&cache_chain_sem); /* Semaphore is needed. */
1014 searchp = &cache_cache;
1015 for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) {
1016 if (searchp->c_nextp != cachep)
1017 continue;
1019 /* Accessing clock_searchp is safe - we hold the mutex. */
1020 if (cachep == clock_searchp)
1021 clock_searchp = cachep->c_nextp;
1022 goto found;
1024 up(&cache_chain_sem);
1025 printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep);
1026 return 2;
1027 found:
1028 /* Release the semaphore before getting the cache-lock. This could
1029 * mean multiple engines are shrinking the cache, but so what.
1031 up(&cache_chain_sem);
1032 spin_lock_irq(&cachep->c_spinlock);
1034 /* If the cache is growing, stop shrinking. */
1035 while (!cachep->c_growing) {
1036 slabp = cachep->c_lastp;
1037 if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
1038 break;
1039 kmem_slab_unlink(slabp);
1040 spin_unlock_irq(&cachep->c_spinlock);
1041 kmem_slab_destroy(cachep, slabp);
1042 spin_lock_irq(&cachep->c_spinlock);
1044 ret = 1;
1045 if (cachep->c_lastp == kmem_slab_end(cachep))
1046 ret--; /* Cache is empty. */
1047 spin_unlock_irq(&cachep->c_spinlock);
1048 return ret;
1051 /* Get the memory for a slab management obj. */
1052 static inline kmem_slab_t *
1053 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
1055 kmem_slab_t *slabp;
1057 if (SLAB_OFF_SLAB(cachep->c_flags)) {
1058 /* Slab management obj is off-slab. */
1059 slabp = kmem_cache_alloc(cache_slabp, local_flags);
1060 } else {
1061 /* Slab management at end of slab memory, placed so that
1062 * the position is 'coloured'.
1064 void *end;
1065 end = objp + (cachep->c_num * cachep->c_offset);
1066 if (!SLAB_BUFCTL(cachep->c_flags))
1067 end += (cachep->c_num * sizeof(kmem_bufctl_t));
1068 slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
1071 if (slabp) {
1072 slabp->s_inuse = 0;
1073 slabp->s_dma = 0;
1074 slabp->s_index = NULL;
1077 return slabp;
1080 static inline void
1081 kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
1082 unsigned long ctor_flags)
1084 kmem_bufctl_t **bufpp = &slabp->s_freep;
1085 unsigned long num = cachep->c_num-1;
1087 do {
1088 #if SLAB_DEBUG_SUPPORT
1089 if (cachep->c_flags & SLAB_RED_ZONE) {
1090 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1091 objp += BYTES_PER_WORD;
1092 *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
1094 #endif /* SLAB_DEBUG_SUPPORT */
1096 /* Constructors are not allowed to allocate memory from the same cache
1097 * which they are a constructor for. Otherwise, deadlock.
1098 * They must also be threaded.
1100 if (cachep->c_ctor)
1101 cachep->c_ctor(objp, cachep, ctor_flags);
1102 #if SLAB_DEBUG_SUPPORT
1103 else if (cachep->c_flags & SLAB_POISON) {
1104 /* need to poison the objs */
1105 kmem_poison_obj(cachep, objp);
1108 if (cachep->c_flags & SLAB_RED_ZONE) {
1109 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
1110 SLAB_RED_MAGIC1) {
1111 *((unsigned long*)(objp+cachep->c_org_size)) =
1112 SLAB_RED_MAGIC1;
1113 printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
1114 "after constructor - %s\n", cachep->c_name);
1116 objp -= BYTES_PER_WORD;
1117 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
1118 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1119 printk(KERN_ERR "kmem_init_obj: Bad front redzone "
1120 "after constructor - %s\n", cachep->c_name);
1123 #endif /* SLAB_DEBUG_SUPPORT */
1125 objp += cachep->c_offset;
1126 if (!slabp->s_index) {
1127 *bufpp = objp;
1128 objp += sizeof(kmem_bufctl_t);
1129 } else
1130 *bufpp = &slabp->s_index[num];
1131 bufpp = &(*bufpp)->buf_nextp;
1132 } while (num--);
1134 *bufpp = NULL;
1137 /* Grow (by 1) the number of slabs within a cache. This is called by
1138 * kmem_cache_alloc() when there are no active objs left in a cache.
1140 static int
1141 kmem_cache_grow(kmem_cache_t * cachep, int flags)
1143 kmem_slab_t *slabp;
1144 struct page *page;
1145 void *objp;
1146 size_t offset;
1147 unsigned int dma, local_flags;
1148 unsigned long ctor_flags;
1149 unsigned long save_flags;
1151 /* Be lazy and only check for valid flags here,
1152 * keeping it out of the critical path in kmem_cache_alloc().
1154 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
1155 printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
1156 flags, cachep->c_name);
1157 flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
1160 if (flags & SLAB_NO_GROW)
1161 return 0;
1163 /* The test for missing atomic flag is performed here, rather than
1164 * the more obvious place, simply to reduce the critical path length
1165 * in kmem_cache_alloc(). If a caller is slightly mis-behaving they
1166 * will eventually be caught here (where it matters).
1168 if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
1169 printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
1170 cachep->c_name);
1171 flags &= ~SLAB_LEVEL_MASK;
1172 flags |= SLAB_ATOMIC;
1174 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1175 local_flags = (flags & SLAB_LEVEL_MASK);
1176 if (local_flags == SLAB_ATOMIC) {
1177 /* Not allowed to sleep. Need to tell a constructor about
1178 * this - it might need to know...
1180 ctor_flags |= SLAB_CTOR_ATOMIC;
1183 /* About to mess with non-constant members - lock. */
1184 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1186 /* Get colour for the slab, and cal the next value. */
1187 if (!(offset = cachep->c_colour_next--))
1188 cachep->c_colour_next = cachep->c_colour;
1189 offset *= cachep->c_align;
1190 cachep->c_dflags = SLAB_CFLGS_GROWN;
1192 cachep->c_growing++;
1193 re_try:
1194 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1196 /* A series of memory allocations for a new slab.
1197 * Neither the cache-chain semaphore, or cache-lock, are
1198 * held, but the incrementing c_growing prevents this
1199 * this cache from being reaped or shrunk.
1200 * Note: The cache could be selected in for reaping in
1201 * kmem_cache_reap(), but when the final test is made the
1202 * growing value will be seen.
1205 /* Get mem for the objs. */
1206 if (!(objp = kmem_getpages(cachep, flags, &dma)))
1207 goto failed;
1209 /* Get slab management. */
1210 if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
1211 goto opps1;
1212 if (dma)
1213 slabp->s_dma = 1;
1214 if (SLAB_BUFCTL(cachep->c_flags)) {
1215 slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
1216 if (!slabp->s_index)
1217 goto opps2;
1220 /* Nasty!!!!!! I hope this is OK. */
1221 dma = 1 << cachep->c_gfporder;
1222 page = &mem_map[MAP_NR(objp)];
1223 do {
1224 SLAB_SET_PAGE_CACHE(page, cachep);
1225 SLAB_SET_PAGE_SLAB(page, slabp);
1226 PageSetSlab(page);
1227 page++;
1228 } while (--dma);
1230 slabp->s_offset = offset; /* It will fit... */
1231 objp += offset; /* Address of first object. */
1232 slabp->s_mem = objp;
1234 /* For on-slab bufctls, c_offset is the distance between the start of
1235 * an obj and its related bufctl. For off-slab bufctls, c_offset is
1236 * the distance between objs in the slab.
1238 kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
1240 spin_lock_irq(&cachep->c_spinlock);
1242 /* Make slab active. */
1243 slabp->s_magic = SLAB_MAGIC_ALLOC;
1244 kmem_slab_link_end(cachep, slabp);
1245 if (cachep->c_freep == kmem_slab_end(cachep))
1246 cachep->c_freep = slabp;
1247 SLAB_STATS_INC_GROWN(cachep);
1248 cachep->c_failures = 0;
1249 cachep->c_growing--;
1251 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1252 return 1;
1253 opps2:
1254 if (SLAB_OFF_SLAB(cachep->c_flags))
1255 kmem_cache_free(cache_slabp, slabp);
1256 opps1:
1257 kmem_freepages(cachep, objp);
1258 failed:
1259 spin_lock_irq(&cachep->c_spinlock);
1260 if (local_flags != SLAB_ATOMIC && cachep->c_gfporder) {
1261 /* For large order (>0) slabs, we try again.
1262 * Needed because the gfp() functions are not good at giving
1263 * out contiguous pages unless pushed (but do not push too hard).
1265 if (cachep->c_failures++ < 4 && cachep->c_freep == kmem_slab_end(cachep))
1266 goto re_try;
1267 cachep->c_failures = 1; /* Memory is low, don't try as hard next time. */
1269 cachep->c_growing--;
1270 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1271 return 0;
1274 static void
1275 kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
1277 if (cachep)
1278 SLAB_STATS_INC_ERR(cachep); /* this is atomic */
1279 printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
1280 str, cachep ? cachep->c_name : "unknown");
1283 static void
1284 kmem_report_free_err(const char *str, const void *objp, kmem_cache_t * cachep)
1286 if (cachep)
1287 SLAB_STATS_INC_ERR(cachep);
1288 printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
1289 str, objp, cachep ? cachep->c_name : "unknown");
1292 /* Search for a slab whose objs are suitable for DMA.
1293 * Note: since testing the first free slab (in __kmem_cache_alloc()),
1294 * ints must not have been enabled, or the cache-lock released!
1296 static inline kmem_slab_t *
1297 kmem_cache_search_dma(kmem_cache_t * cachep)
1299 kmem_slab_t *slabp = cachep->c_freep->s_nextp;
1301 for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1302 if (!(slabp->s_dma))
1303 continue;
1304 kmem_slab_unlink(slabp);
1305 kmem_slab_link_free(cachep, slabp);
1306 cachep->c_freep = slabp;
1307 break;
1309 return slabp;
1312 #if SLAB_DEBUG_SUPPORT
1313 /* Perform extra freeing checks. Currently, this check is only for caches
1314 * that use bufctl structures within the slab. Those which use bufctl's
1315 * from the internal cache have a reasonable check when the address is
1316 * searched for. Called with the cache-lock held.
1318 static void *
1319 kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
1320 kmem_bufctl_t *bufp, void * objp)
1322 if (SLAB_BUFCTL(cachep->c_flags))
1323 return objp;
1325 /* Check slab's freelist to see if this obj is there. */
1326 for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
1327 if (search_bufp != bufp)
1328 continue;
1329 return NULL;
1331 return objp;
1333 #endif /* SLAB_DEBUG_SUPPORT */
1335 /* Called with cache lock held. */
1336 static inline void
1337 kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1339 if (slabp->s_nextp->s_inuse) {
1340 /* Not at correct position. */
1341 if (cachep->c_freep == slabp)
1342 cachep->c_freep = slabp->s_nextp;
1343 kmem_slab_unlink(slabp);
1344 kmem_slab_link_end(cachep, slabp);
1348 /* Called with cache lock held. */
1349 static inline void
1350 kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1352 if (slabp->s_nextp->s_inuse == cachep->c_num) {
1353 kmem_slab_unlink(slabp);
1354 kmem_slab_link_free(cachep, slabp);
1356 cachep->c_freep = slabp;
1359 /* Returns a ptr to an obj in the given cache. */
1360 static inline void *
1361 __kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1363 kmem_slab_t *slabp;
1364 kmem_bufctl_t *bufp;
1365 void *objp;
1366 unsigned long save_flags;
1368 /* Sanity check. */
1369 if (!cachep)
1370 goto nul_ptr;
1371 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1372 try_again:
1373 /* Get slab alloc is to come from. */
1374 slabp = cachep->c_freep;
1376 /* Magic is a sanity check _and_ says if we need a new slab. */
1377 if (slabp->s_magic != SLAB_MAGIC_ALLOC)
1378 goto alloc_new_slab;
1379 /* DMA requests are 'rare' - keep out of the critical path. */
1380 if (flags & SLAB_DMA)
1381 goto search_dma;
1382 try_again_dma:
1383 SLAB_STATS_INC_ALLOCED(cachep);
1384 SLAB_STATS_INC_ACTIVE(cachep);
1385 SLAB_STATS_SET_HIGH(cachep);
1386 slabp->s_inuse++;
1387 bufp = slabp->s_freep;
1388 slabp->s_freep = bufp->buf_nextp;
1389 if (slabp->s_freep) {
1390 ret_obj:
1391 if (!slabp->s_index) {
1392 bufp->buf_slabp = slabp;
1393 objp = ((void*)bufp) - cachep->c_offset;
1394 finished:
1395 /* The lock is not needed by the red-zone or poison ops, and the
1396 * obj has been removed from the slab. Should be safe to drop
1397 * the lock here.
1399 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1400 #if SLAB_DEBUG_SUPPORT
1401 if (cachep->c_flags & SLAB_RED_ZONE)
1402 goto red_zone;
1403 ret_red:
1404 if ((cachep->c_flags & SLAB_POISON) && kmem_check_poison_obj(cachep, objp))
1405 kmem_report_alloc_err("Bad poison", cachep);
1406 #endif /* SLAB_DEBUG_SUPPORT */
1407 return objp;
1409 /* Update index ptr. */
1410 objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
1411 bufp->buf_objp = objp;
1412 goto finished;
1414 cachep->c_freep = slabp->s_nextp;
1415 goto ret_obj;
1417 #if SLAB_DEBUG_SUPPORT
1418 red_zone:
1419 /* Set alloc red-zone, and check old one. */
1420 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1421 kmem_report_alloc_err("Bad front redzone", cachep);
1422 objp += BYTES_PER_WORD;
1423 if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1424 kmem_report_alloc_err("Bad rear redzone", cachep);
1425 goto ret_red;
1426 #endif /* SLAB_DEBUG_SUPPORT */
1428 search_dma:
1429 if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
1430 goto try_again_dma;
1431 alloc_new_slab:
1432 /* Either out of slabs, or magic number corruption. */
1433 if (slabp == kmem_slab_end(cachep)) {
1434 /* Need a new slab. Release the lock before calling kmem_cache_grow().
1435 * This allows objs to be released back into the cache while growing.
1437 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1438 if (kmem_cache_grow(cachep, flags)) {
1439 /* Someone may have stolen our objs. Doesn't matter, we'll
1440 * just come back here again.
1442 spin_lock_irq(&cachep->c_spinlock);
1443 goto try_again;
1445 /* Couldn't grow, but some objs may have been freed. */
1446 spin_lock_irq(&cachep->c_spinlock);
1447 if (cachep->c_freep != kmem_slab_end(cachep))
1448 goto try_again;
1449 } else {
1450 /* Very serious error - maybe panic() here? */
1451 kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
1453 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1454 err_exit:
1455 return NULL;
1456 nul_ptr:
1457 kmem_report_alloc_err("NULL ptr", NULL);
1458 goto err_exit;
1461 /* Release an obj back to its cache. If the obj has a constructed state,
1462 * it should be in this state _before_ it is released.
1464 static inline void
1465 __kmem_cache_free(kmem_cache_t *cachep, const void *objp)
1467 kmem_slab_t *slabp;
1468 kmem_bufctl_t *bufp;
1469 unsigned long save_flags;
1471 /* Basic sanity checks. */
1472 if (!cachep || !objp)
1473 goto null_addr;
1475 #if SLAB_DEBUG_SUPPORT
1476 /* A verify func is called without the cache-lock held. */
1477 if (cachep->c_flags & SLAB_DEBUG_INITIAL)
1478 goto init_state_check;
1479 finished_initial:
1481 if (cachep->c_flags & SLAB_RED_ZONE)
1482 goto red_zone;
1483 return_red:
1484 #endif /* SLAB_DEBUG_SUPPORT */
1486 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1488 if (SLAB_BUFCTL(cachep->c_flags))
1489 goto bufctl;
1490 bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
1492 /* Get slab for the object. */
1493 #if 0
1494 /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
1495 * Is this worth while? XXX
1497 if (cachep->c_flags & SLAB_HIGH_PACK)
1498 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
1499 else
1500 #endif
1501 slabp = bufp->buf_slabp;
1503 check_magic:
1504 if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* Sanity check. */
1505 goto bad_slab;
1507 #if SLAB_DEBUG_SUPPORT
1508 if (cachep->c_flags & SLAB_DEBUG_FREE)
1509 goto extra_checks;
1510 passed_extra:
1511 #endif /* SLAB_DEBUG_SUPPORT */
1513 if (slabp->s_inuse) { /* Sanity check. */
1514 SLAB_STATS_DEC_ACTIVE(cachep);
1515 slabp->s_inuse--;
1516 bufp->buf_nextp = slabp->s_freep;
1517 slabp->s_freep = bufp;
1518 if (bufp->buf_nextp) {
1519 if (slabp->s_inuse) {
1520 /* (hopefully) The most common case. */
1521 finished:
1522 #if SLAB_DEBUG_SUPPORT
1523 if (cachep->c_flags & SLAB_POISON) {
1524 if (cachep->c_flags & SLAB_RED_ZONE)
1525 objp += BYTES_PER_WORD;
1526 kmem_poison_obj(cachep, objp);
1528 #endif /* SLAB_DEBUG_SUPPORT */
1529 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1530 return;
1532 kmem_cache_full_free(cachep, slabp);
1533 goto finished;
1535 kmem_cache_one_free(cachep, slabp);
1536 goto finished;
1539 /* Don't add to freelist. */
1540 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1541 kmem_report_free_err("free with no active objs", objp, cachep);
1542 return;
1543 bufctl:
1544 /* No 'extra' checks are performed for objs stored this way, finding
1545 * the obj is check enough.
1547 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
1548 bufp = &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
1549 if (bufp->buf_objp == objp)
1550 goto check_magic;
1551 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1552 kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
1553 return;
1554 #if SLAB_DEBUG_SUPPORT
1555 init_state_check:
1556 /* Need to call the slab's constructor so the
1557 * caller can perform a verify of its state (debugging).
1559 cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1560 goto finished_initial;
1561 extra_checks:
1562 if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
1563 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1564 kmem_report_free_err("Double free detected during checks", objp, cachep);
1565 return;
1567 goto passed_extra;
1568 red_zone:
1569 /* We do not hold the cache-lock while checking the red-zone.
1571 objp -= BYTES_PER_WORD;
1572 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1573 /* Either write before start of obj, or a double free. */
1574 kmem_report_free_err("Bad front redzone", objp, cachep);
1576 if (xchg((unsigned long *)(objp+cachep->c_org_size+BYTES_PER_WORD), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1577 /* Either write past end of obj, or a double free. */
1578 kmem_report_free_err("Bad rear redzone", objp, cachep);
1580 goto return_red;
1581 #endif /* SLAB_DEBUG_SUPPORT */
1583 bad_slab:
1584 /* Slab doesn't contain the correct magic num. */
1585 if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
1586 /* Magic num says this is a destroyed slab. */
1587 kmem_report_free_err("free from inactive slab", objp, cachep);
1588 } else
1589 kmem_report_free_err("Bad obj addr", objp, cachep);
1590 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1592 #if 1
1593 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1594 *(int *) 0 = 0;
1595 #endif
1597 return;
1598 null_addr:
1599 kmem_report_free_err("NULL ptr", objp, cachep);
1600 return;
1603 void *
1604 kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1606 return __kmem_cache_alloc(cachep, flags);
1609 void
1610 kmem_cache_free(kmem_cache_t *cachep, void *objp)
1612 __kmem_cache_free(cachep, objp);
1615 void *
1616 kmalloc(size_t size, int flags)
1618 cache_sizes_t *csizep = cache_sizes;
1620 for (; csizep->cs_size; csizep++) {
1621 if (size > csizep->cs_size)
1622 continue;
1623 return __kmem_cache_alloc(csizep->cs_cachep, flags);
1625 printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
1626 return NULL;
1629 void
1630 kfree(const void *objp)
1632 struct page *page;
1633 int nr;
1635 if (!objp)
1636 goto null_ptr;
1637 nr = MAP_NR(objp);
1638 if (nr >= max_mapnr)
1639 goto bad_ptr;
1641 /* Assume we own the page structure - hence no locking.
1642 * If someone is misbehaving (for example, calling us with a bad
1643 * address), then access to the page structure can race with the
1644 * kmem_slab_destroy() code. Need to add a spin_lock to each page
1645 * structure, which would be useful in threading the gfp() functions....
1647 page = &mem_map[nr];
1648 if (PageSlab(page)) {
1649 kmem_cache_t *cachep;
1651 /* Here, we again assume the obj address is good.
1652 * If it isn't, and happens to map onto another
1653 * general cache page which has no active objs, then
1654 * we race.
1656 cachep = SLAB_GET_PAGE_CACHE(page);
1657 if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
1658 __kmem_cache_free(cachep, objp);
1659 return;
1662 bad_ptr:
1663 printk(KERN_ERR "kfree: Bad obj %p\n", objp);
1665 #if 1
1666 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1667 *(int *) 0 = 0;
1668 #endif
1670 null_ptr:
1671 return;
1674 void
1675 kfree_s(const void *objp, size_t size)
1677 struct page *page;
1678 int nr;
1680 if (!objp)
1681 goto null_ptr;
1682 nr = MAP_NR(objp);
1683 if (nr >= max_mapnr)
1684 goto null_ptr;
1685 /* See comment in kfree() */
1686 page = &mem_map[nr];
1687 if (PageSlab(page)) {
1688 kmem_cache_t *cachep;
1689 /* See comment in kfree() */
1690 cachep = SLAB_GET_PAGE_CACHE(page);
1691 if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
1692 if (size <= cachep->c_org_size) { /* XXX better check */
1693 __kmem_cache_free(cachep, objp);
1694 return;
1698 null_ptr:
1699 printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
1700 return;
1703 kmem_cache_t *
1704 kmem_find_general_cachep(size_t size)
1706 cache_sizes_t *csizep = cache_sizes;
1708 /* This function could be moved to the header file, and
1709 * made inline so consumers can quickly determine what
1710 * cache pointer they require.
1712 for (; csizep->cs_size; csizep++) {
1713 if (size > csizep->cs_size)
1714 continue;
1715 break;
1717 return csizep->cs_cachep;
1721 /* Called from try_to_free_page().
1722 * This function _cannot_ be called within a int, but it
1723 * can be interrupted.
1725 void
1726 kmem_cache_reap(int gfp_mask)
1728 kmem_slab_t *slabp;
1729 kmem_cache_t *searchp;
1730 kmem_cache_t *best_cachep;
1731 unsigned int scan;
1732 unsigned int reap_level;
1734 if (in_interrupt()) {
1735 printk("kmem_cache_reap() called within int!\n");
1736 return;
1739 /* We really need a test semaphore op so we can avoid sleeping when
1740 * !wait is true.
1742 down(&cache_chain_sem);
1744 scan = 10;
1745 reap_level = 0;
1747 best_cachep = NULL;
1748 searchp = clock_searchp;
1749 do {
1750 unsigned int full_free;
1751 unsigned int dma_flag;
1753 /* It's safe to test this without holding the cache-lock. */
1754 if (searchp->c_flags & SLAB_NO_REAP)
1755 goto next;
1756 spin_lock_irq(&searchp->c_spinlock);
1757 if (searchp->c_growing)
1758 goto next_unlock;
1759 if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
1760 searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
1761 goto next_unlock;
1763 /* Sanity check for corruption of static values. */
1764 if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
1765 spin_unlock_irq(&searchp->c_spinlock);
1766 printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
1767 goto next;
1769 dma_flag = 0;
1770 full_free = 0;
1772 /* Count the fully free slabs. There should not be not many,
1773 * since we are holding the cache lock.
1775 slabp = searchp->c_lastp;
1776 while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
1777 slabp = slabp->s_prevp;
1778 full_free++;
1779 if (slabp->s_dma)
1780 dma_flag++;
1782 spin_unlock_irq(&searchp->c_spinlock);
1784 if ((gfp_mask & GFP_DMA) && !dma_flag)
1785 goto next;
1787 if (full_free) {
1788 if (full_free >= 10) {
1789 best_cachep = searchp;
1790 break;
1793 /* Try to avoid slabs with constructors and/or
1794 * more than one page per slab (as it can be difficult
1795 * to get high orders from gfp()).
1797 if (full_free >= reap_level) {
1798 reap_level = full_free;
1799 best_cachep = searchp;
1802 goto next;
1803 next_unlock:
1804 spin_unlock_irq(&searchp->c_spinlock);
1805 next:
1806 searchp = searchp->c_nextp;
1807 } while (--scan && searchp != clock_searchp);
1809 clock_searchp = searchp;
1810 up(&cache_chain_sem);
1812 if (!best_cachep) {
1813 /* couldn't find anything to reap */
1814 return;
1817 spin_lock_irq(&best_cachep->c_spinlock);
1818 while (!best_cachep->c_growing &&
1819 !(slabp = best_cachep->c_lastp)->s_inuse &&
1820 slabp != kmem_slab_end(best_cachep)) {
1821 if (gfp_mask & GFP_DMA) {
1822 do {
1823 if (slabp->s_dma)
1824 goto good_dma;
1825 slabp = slabp->s_prevp;
1826 } while (!slabp->s_inuse && slabp != kmem_slab_end(best_cachep));
1828 /* Didn't found a DMA slab (there was a free one -
1829 * must have been become active).
1831 goto dma_fail;
1832 good_dma:
1834 if (slabp == best_cachep->c_freep)
1835 best_cachep->c_freep = slabp->s_nextp;
1836 kmem_slab_unlink(slabp);
1837 SLAB_STATS_INC_REAPED(best_cachep);
1839 /* Safe to drop the lock. The slab is no longer linked to the
1840 * cache.
1842 spin_unlock_irq(&best_cachep->c_spinlock);
1843 kmem_slab_destroy(best_cachep, slabp);
1844 spin_lock_irq(&best_cachep->c_spinlock);
1846 dma_fail:
1847 spin_unlock_irq(&best_cachep->c_spinlock);
1848 return;
1851 #if SLAB_SELFTEST
1852 /* A few v. simple tests */
1853 static void
1854 kmem_self_test(void)
1856 kmem_cache_t *test_cachep;
1858 printk(KERN_INFO "kmem_test() - start\n");
1859 test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISON, NULL, NULL);
1860 if (test_cachep) {
1861 char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1862 if (objp) {
1863 /* Write in front and past end, red-zone test. */
1864 *(objp-1) = 1;
1865 *(objp+16) = 1;
1866 kmem_cache_free(test_cachep, objp);
1868 /* Mess up poisoning. */
1869 *objp = 10;
1870 objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1871 kmem_cache_free(test_cachep, objp);
1873 /* Mess up poisoning (again). */
1874 *objp = 10;
1875 kmem_cache_shrink(test_cachep);
1878 printk(KERN_INFO "kmem_test() - finished\n");
1880 #endif /* SLAB_SELFTEST */
1882 #if defined(CONFIG_PROC_FS)
1883 /* /proc/slabinfo
1884 * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
1887 get_slabinfo(char *buf)
1889 kmem_cache_t *cachep;
1890 kmem_slab_t *slabp;
1891 unsigned long active_objs;
1892 unsigned long save_flags;
1893 unsigned long num_slabs;
1894 unsigned long num_objs;
1895 int len=0;
1896 #if SLAB_STATS
1897 unsigned long active_slabs;
1898 #endif /* SLAB_STATS */
1900 __save_flags(save_flags);
1902 /* Output format version, so at least we can change it without _too_
1903 * many complaints.
1905 #if SLAB_STATS
1906 len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
1907 #else
1908 len = sprintf(buf, "slabinfo - version: 1.0\n");
1909 #endif /* SLAB_STATS */
1910 down(&cache_chain_sem);
1911 cachep = &cache_cache;
1912 do {
1913 #if SLAB_STATS
1914 active_slabs = 0;
1915 #endif /* SLAB_STATS */
1916 num_slabs = active_objs = 0;
1917 spin_lock_irq(&cachep->c_spinlock);
1918 for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1919 active_objs += slabp->s_inuse;
1920 num_slabs++;
1921 #if SLAB_STATS
1922 if (slabp->s_inuse)
1923 active_slabs++;
1924 #endif /* SLAB_STATS */
1926 num_objs = cachep->c_num*num_slabs;
1927 #if SLAB_STATS
1929 unsigned long errors;
1930 unsigned long high = cachep->c_high_mark;
1931 unsigned long grown = cachep->c_grown;
1932 unsigned long reaped = cachep->c_reaped;
1933 unsigned long allocs = cachep->c_num_allocations;
1934 errors = (unsigned long) atomic_read(&cachep->c_errors);
1935 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1936 len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
1937 cachep->c_name, active_objs, num_objs, active_slabs, num_slabs,
1938 (1<<cachep->c_gfporder)*num_slabs,
1939 high, allocs, grown, reaped, errors);
1941 #else
1942 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1943 len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs);
1944 #endif /* SLAB_STATS */
1945 } while ((cachep = cachep->c_nextp) != &cache_cache);
1946 up(&cache_chain_sem);
1948 return len;
1950 #endif /* CONFIG_PROC_FS */