mm/slab.c

   1 /*
   2  * linux/mm/slab.c
   3  * Written by Mark Hemment, 1996/97.
   4  * (markhe@nextd.demon.co.uk)
   5  *
   6  * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   7  *
   8  * Major cleanup, different bufctl logic, per-cpu arrays
   9  *      (c) 2000 Manfred Spraul
  10  *
  11  * An implementation of the Slab Allocator as described in outline in;
  12  *      UNIX Internals: The New Frontiers by Uresh Vahalia
  13  *      Pub: Prentice Hall      ISBN 0-13-101908-2
  14  * or with a little more detail in;
  15  *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  16  *      Jeff Bonwick (Sun Microsystems).
  17  *      Presented at: USENIX Summer 1994 Technical Conference
  18  *
  19  *
  20  * The memory is organized in caches, one cache for each object type.
  21  * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
  22  * Each cache consists out of many slabs (they are small (usually one
  23  * page long) and always contiguous), and each slab contains multiple
  24  * initialized objects.
  25  *
  26  * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
  27  * normal). If you need a special memory type, then must create a new
  28  * cache for that memory type.
  29  *
  30  * In order to reduce fragmentation, the slabs are sorted in 3 groups:
  31  *   full slabs with 0 free objects
  32  *   partial slabs
  33  *   empty slabs with no allocated objects
  34  *
  35  * If partial slabs exist, then new allocations come from these slabs,
  36  * otherwise from empty slabs or new slabs are allocated.
  37  *
  38  * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
  39  * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
  40  *
  41  * On SMP systems, each cache has a short per-cpu head array, most allocs
  42  * and frees go into that array, and if that array overflows, then 1/2
  43  * of the entries in the array are given back into the global cache.
  44  * This reduces the number of spinlock operations.
  45  *
  46  * The c_cpuarray can be changed with a smp_call_function call,
  47  * it may not be read with enabled local interrupts.
  48  *
  49  * SMP synchronization:
  50  *  constructors and destructors are called without any locking.
  51  *  Several members in kmem_cache_t and slab_t never change, they
  52  *      are accessed without any locking.
  53  *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
  54  *      smp_call_function() is used if one cpu must flush the arrays from
  55  *      other cpus.
  56  *  The non-constant members are protected with a per-cache irq spinlock.
  57  *
  58  * Further notes from the original documentation:
  59  *
  60  * 11 April '97.  Started multi-threading - markhe
  61  *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
  62  *      The sem is only needed when accessing/extending the cache-chain, which
  63  *      can never happen inside an interrupt (kmem_cache_create(),
  64  *      kmem_cache_shrink() and kmem_cache_reap()).
  65  *
  66  *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  67  *      maybe be sleeping and therefore not holding the semaphore/lock), the
  68  *      growing field is used.  This also prevents reaping from a cache.
  69  *
  70  *      At present, each engine can be growing a cache.  This should be blocked.
  71  *
  72  */
  73
  74 #include        <linux/config.h>
  75 #include        <linux/slab.h>
  76 #include        <linux/interrupt.h>
  77 #include        <linux/init.h>
  78 #include        <asm/uaccess.h>
  79
  80 /*
  81  * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
  82  *                SLAB_RED_ZONE & SLAB_POISON.
  83  *                0 for faster, smaller code (especially in the critical paths).
  84  *
  85  * STATS        - 1 to collect stats for /proc/slabinfo.
  86  *                0 for faster, smaller code (especially in the critical paths).
  87  *
  88  * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
  89  */
  90
  91 #define DEBUG           0
  92 #define STATS           0
  93 #define FORCED_DEBUG    0
  94
  95 /*
  96  * Parameters for kmem_cache_reap
  97  */
  98 #define REAP_SCANLEN    10
  99 #define REAP_PERFECT    10
 100
 101 /* Shouldn't this be in a header file somewhere? */
 102 #define BYTES_PER_WORD          sizeof(void *)
 103
 104 /* Legal flag mask for kmem_cache_create(). */
 105 #if DEBUG
 106 # define CREATE_MASK    (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 107                          SLAB_POISON | SLAB_HWCACHE_ALIGN | \
 108                          SLAB_NO_REAP | SLAB_CACHE_DMA)
 109 #else
 110 # define CREATE_MASK    (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
 111 #endif
 112
 113 /*
 114  * kmem_bufctl_t:
 115  *
 116  * Bufctl's are used for linking objs within a slab
 117  * linked offsets.
 118  *
 119  * This implementaion relies on "struct page" for locating the cache &
 120  * slab an object belongs to.
 121  * This allows the bufctl structure to be small (one int), but limits
 122  * the number of objects a slab (not a cache) can contain when off-slab
 123  * bufctls are used. The limit is the size of the largest general cache
 124  * that does not use off-slab slabs.
 125  * For 32bit archs with 4 kB pages, is this 56.
 126  * This is not serious, as it is only for large objects, when it is unwise
 127  * to have too many per slab.
 128  * Note: This limit can be raised by introducing a general cache whose size
 129  * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 130  */
 131
 132 #define BUFCTL_END 0xffffFFFF
 133 #define SLAB_LIMIT 0xffffFFFE
 134 typedef unsigned int kmem_bufctl_t;
 135
 136 /* Max number of objs-per-slab for caches which use off-slab slabs.
 137  * Needed to avoid a possible looping condition in kmem_cache_grow().
 138  */
 139 static unsigned long offslab_limit;
 140
 141 /*
 142  * slab_t
 143  *
 144  * Manages the objs in a slab. Placed either at the beginning of mem allocated
 145  * for a slab, or allocated from an general cache.
 146  * Slabs are chained into one ordered list: fully used, partial, then fully
 147  * free slabs.
 148  */
 149 typedef struct slab_s {
 150         struct list_head        list;
 151         unsigned long           colouroff;
 152         void                    *s_mem;         /* including colour offset */
 153         unsigned int            inuse;          /* num of objs active in slab */
 154         kmem_bufctl_t           free;
 155 } slab_t;
 156
 157 #define slab_bufctl(slabp) \
 158         ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
 159
 160 /*
 161  * cpucache_t
 162  *
 163  * Per cpu structures
 164  * The limit is stored in the per-cpu structure to reduce the data cache
 165  * footprint.
 166  */
 167 typedef struct cpucache_s {
 168         unsigned int avail;
 169         unsigned int limit;
 170 } cpucache_t;
 171
 172 #define cc_entry(cpucache) \
 173         ((void **)(((cpucache_t*)cpucache)+1))
 174 #define cc_data(cachep) \
 175         ((cachep)->cpudata[smp_processor_id()])
 176 /*
 177  * kmem_cache_t
 178  *
 179  * manages a cache.
 180  */
 181
 182 #define CACHE_NAMELEN   20      /* max name length for a slab cache */
 183
 184 struct kmem_cache_s {
 185 /* 1) each alloc & free */
 186         /* full, partial first, then free */
 187         struct list_head        slabs;
 188         struct list_head        *firstnotfull;
 189         unsigned int            objsize;
 190         unsigned int            flags;  /* constant flags */
 191         unsigned int            num;    /* # of objs per slab */
 192         spinlock_t              spinlock;
 193 #ifdef CONFIG_SMP
 194         unsigned int            batchcount;
 195 #endif
 196
 197 /* 2) slab additions /removals */
 198         /* order of pgs per slab (2^n) */
 199         unsigned int            gfporder;
 200
 201         /* force GFP flags, e.g. GFP_DMA */
 202         unsigned int            gfpflags;
 203
 204         size_t                  colour;         /* cache colouring range */
 205         unsigned int            colour_off;     /* colour offset */
 206         unsigned int            colour_next;    /* cache colouring */
 207         kmem_cache_t            *slabp_cache;
 208         unsigned int            growing;
 209         unsigned int            dflags;         /* dynamic flags */
 210
 211         /* constructor func */
 212         void (*ctor)(void *, kmem_cache_t *, unsigned long);
 213
 214         /* de-constructor func */
 215         void (*dtor)(void *, kmem_cache_t *, unsigned long);
 216
 217         unsigned long           failures;
 218
 219 /* 3) cache creation/removal */
 220         char                    name[CACHE_NAMELEN];
 221         struct list_head        next;
 222 #ifdef CONFIG_SMP
 223 /* 4) per-cpu data */
 224         cpucache_t              *cpudata[NR_CPUS];
 225 #endif
 226 #if STATS
 227         unsigned long           num_active;
 228         unsigned long           num_allocations;
 229         unsigned long           high_mark;
 230         unsigned long           grown;
 231         unsigned long           reaped;
 232         unsigned long           errors;
 233 #ifdef CONFIG_SMP
 234         atomic_t                allochit;
 235         atomic_t                allocmiss;
 236         atomic_t                freehit;
 237         atomic_t                freemiss;
 238 #endif
 239 #endif
 240 };
 241
 242 /* internal c_flags */
 243 #define CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
 244 #define CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
 245
 246 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
 247 #define DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
 248
 249 #define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
 250 #define OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
 251 #define GROWN(x)        ((x)->dlags & DFLGS_GROWN)
 252
 253 #if STATS
 254 #define STATS_INC_ACTIVE(x)     ((x)->num_active++)
 255 #define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 256 #define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 257 #define STATS_INC_GROWN(x)      ((x)->grown++)
 258 #define STATS_INC_REAPED(x)     ((x)->reaped++)
 259 #define STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
 260                                         (x)->high_mark = (x)->num_active; \
 261                                 } while (0)
 262 #define STATS_INC_ERR(x)        ((x)->errors++)
 263 #else
 264 #define STATS_INC_ACTIVE(x)     do { } while (0)
 265 #define STATS_DEC_ACTIVE(x)     do { } while (0)
 266 #define STATS_INC_ALLOCED(x)    do { } while (0)
 267 #define STATS_INC_GROWN(x)      do { } while (0)
 268 #define STATS_INC_REAPED(x)     do { } while (0)
 269 #define STATS_SET_HIGH(x)       do { } while (0)
 270 #define STATS_INC_ERR(x)        do { } while (0)
 271 #endif
 272
 273 #if STATS && defined(CONFIG_SMP)
 274 #define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
 275 #define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
 276 #define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
 277 #define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
 278 #else
 279 #define STATS_INC_ALLOCHIT(x)   do { } while (0)
 280 #define STATS_INC_ALLOCMISS(x)  do { } while (0)
 281 #define STATS_INC_FREEHIT(x)    do { } while (0)
 282 #define STATS_INC_FREEMISS(x)   do { } while (0)
 283 #endif
 284
 285 #if DEBUG
 286 /* Magic nums for obj red zoning.
 287  * Placed in the first word before and the first word after an obj.
 288  */
 289 #define RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
 290 #define RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
 291
 292 /* ...and for poisoning */
 293 #define POISON_BYTE     0x5a            /* byte value for poisoning */
 294 #define POISON_END      0xa5            /* end-byte of poisoning */
 295
 296 #endif
 297
 298 /* maximum size of an obj (in 2^order pages) */
 299 #define MAX_OBJ_ORDER   5       /* 32 pages */
 300
 301 /*
 302  * Do not go above this order unless 0 objects fit into the slab.
 303  */
 304 #define BREAK_GFP_ORDER_HI      2
 305 #define BREAK_GFP_ORDER_LO      1
 306 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 307
 308 /*
 309  * Absolute limit for the gfp order
 310  */
 311 #define MAX_GFP_ORDER   5       /* 32 pages */
 312
 313
 314 /* Macros for storing/retrieving the cachep and or slab from the
 315  * global 'mem_map'. These are used to find the slab an obj belongs to.
 316  * With kfree(), these are used to find the cache which an obj belongs to.
 317  */
 318 #define SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
 319 #define GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
 320 #define SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 321 #define GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
 322
 323 /* Size description struct for general caches. */
 324 typedef struct cache_sizes {
 325         size_t           cs_size;
 326         kmem_cache_t    *cs_cachep;
 327         kmem_cache_t    *cs_dmacachep;
 328 } cache_sizes_t;
 329
 330 static cache_sizes_t cache_sizes[] = {
 331 #if PAGE_SIZE == 4096
 332         {    32,        NULL, NULL},
 333 #endif
 334         {    64,        NULL, NULL},
 335         {   128,        NULL, NULL},
 336         {   256,        NULL, NULL},
 337         {   512,        NULL, NULL},
 338         {  1024,        NULL, NULL},
 339         {  2048,        NULL, NULL},
 340         {  4096,        NULL, NULL},
 341         {  8192,        NULL, NULL},
 342         { 16384,        NULL, NULL},
 343         { 32768,        NULL, NULL},
 344         { 65536,        NULL, NULL},
 345         {131072,        NULL, NULL},
 346         {     0,        NULL, NULL}
 347 };
 348
 349 /* internal cache of cache description objs */
 350 static kmem_cache_t cache_cache = {
 351         slabs:          LIST_HEAD_INIT(cache_cache.slabs),
 352         firstnotfull:   &cache_cache.slabs,
 353         objsize:        sizeof(kmem_cache_t),
 354         flags:          SLAB_NO_REAP,
 355         spinlock:       SPIN_LOCK_UNLOCKED,
 356         colour_off:     L1_CACHE_BYTES,
 357         name:           "kmem_cache",
 358 };
 359
 360 /* Guard access to the cache-chain. */
 361 static struct semaphore cache_chain_sem;
 362
 363 /* Place maintainer for reaping. */
 364 static kmem_cache_t *clock_searchp = &cache_cache;
 365
 366 #define cache_chain (cache_cache.next)
 367
 368 #ifdef CONFIG_SMP
 369 /*
 370  * chicken and egg problem: delay the per-cpu array allocation
 371  * until the general caches are up.
 372  */
 373 static int g_cpucache_up;
 374
 375 static void drain_cache (void *__cachep);
 376 static void enable_cpucache (kmem_cache_t *cachep);
 377 static void enable_all_cpucaches (void);
 378 #endif
 379
 380 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
 381 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
 382                  int flags, size_t *left_over, unsigned int *num)
 383 {
 384         int i;
 385         size_t wastage = PAGE_SIZE<<gfporder;
 386         size_t extra = 0;
 387         size_t base = 0;
 388
 389         if (!(flags & CFLGS_OFF_SLAB)) {
 390                 base = sizeof(slab_t);
 391                 extra = sizeof(kmem_bufctl_t);
 392         }
 393         i = 0;
 394         while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
 395                 i++;
 396         if (i > 0)
 397                 i--;
 398
 399         if (i > SLAB_LIMIT)
 400                 i = SLAB_LIMIT;
 401
 402         *num = i;
 403         wastage -= i*size;
 404         wastage -= L1_CACHE_ALIGN(base+i*extra);
 405         *left_over = wastage;
 406 }
 407
 408 /* Initialisation - setup the `cache' cache. */
 409 void __init kmem_cache_init(void)
 410 {
 411         size_t left_over;
 412
 413         init_MUTEX(&cache_chain_sem);
 414         INIT_LIST_HEAD(&cache_chain);
 415
 416         kmem_cache_estimate(0, cache_cache.objsize, 0,
 417                         &left_over, &cache_cache.num);
 418         if (!cache_cache.num)
 419                 BUG();
 420
 421         cache_cache.colour = left_over/cache_cache.colour_off;
 422         cache_cache.colour_next = 0;
 423 }
 424
 425
 426 /* Initialisation - setup remaining internal and general caches.
 427  * Called after the gfp() functions have been enabled, and before smp_init().
 428  */
 429 void __init kmem_cache_sizes_init(void)
 430 {
 431         cache_sizes_t *sizes = cache_sizes;
 432         char name[20];
 433         /*
 434          * Fragmentation resistance on low memory - only use bigger
 435          * page orders on machines with more than 32MB of memory.
 436          */
 437         if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 438                 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 439         do {
 440                 /* For performance, all the general caches are L1 aligned.
 441                  * This should be particularly beneficial on SMP boxes, as it
 442                  * eliminates "false sharing".
 443                  * Note for systems short on memory removing the alignment will
 444                  * allow tighter packing of the smaller caches. */
 445                 sprintf(name,"size-%Zd",sizes->cs_size);
 446                 if (!(sizes->cs_cachep =
 447                         kmem_cache_create(name, sizes->cs_size,
 448                                         0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
 449                         BUG();
 450                 }
 451
 452                 /* Inc off-slab bufctl limit until the ceiling is hit. */
 453                 if (!(OFF_SLAB(sizes->cs_cachep))) {
 454                         offslab_limit = sizes->cs_size-sizeof(slab_t);
 455                         offslab_limit /= 2;
 456                 }
 457                 sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
 458                 sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
 459                               SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 460                 if (!sizes->cs_dmacachep)
 461                         BUG();
 462                 sizes++;
 463         } while (sizes->cs_size);
 464 }
 465
 466 void __init kmem_cpucache_init(void)
 467 {
 468 #ifdef CONFIG_SMP
 469         g_cpucache_up = 1;
 470         enable_all_cpucaches();
 471 #endif
 472 }
 473
 474 /* Interface to system's page allocator. No need to hold the cache-lock.
 475  */
 476 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
 477 {
 478         void    *addr;
 479
 480         /*
 481          * If we requested dmaable memory, we will get it. Even if we
 482          * did not request dmaable memory, we might get it, but that
 483          * would be relatively rare and ignorable.
 484          */
 485         flags |= cachep->gfpflags;
 486         addr = (void*) __get_free_pages(flags, cachep->gfporder);
 487         /* Assume that now we have the pages no one else can legally
 488          * messes with the 'struct page's.
 489          * However vm_scan() might try to test the structure to see if
 490          * it is a named-page or buffer-page.  The members it tests are
 491          * of no interest here.....
 492          */
 493         return addr;
 494 }
 495
 496 /* Interface to system's page release. */
 497 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
 498 {
 499         unsigned long i = (1<<cachep->gfporder);
 500         struct page *page = mem_map + MAP_NR(addr);
 501
 502         /* free_pages() does not clear the type bit - we do that.
 503          * The pages have been unlinked from their cache-slab,
 504          * but their 'struct page's might be accessed in
 505          * vm_scan(). Shouldn't be a worry.
 506          */
 507         while (i--) {
 508                 PageClearSlab(page);
 509                 page++;
 510         }
 511         free_pages((unsigned long)addr, cachep->gfporder);
 512 }
 513
 514 #if DEBUG
 515 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
 516 {
 517         int size = cachep->objsize;
 518         if (cachep->flags & SLAB_RED_ZONE) {
 519                 addr += BYTES_PER_WORD;
 520                 size -= 2*BYTES_PER_WORD;
 521         }
 522         memset(addr, POISON_BYTE, size);
 523         *(unsigned char *)(addr+size-1) = POISON_END;
 524 }
 525
 526 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
 527 {
 528         int size = cachep->objsize;
 529         void *end;
 530         if (cachep->flags & SLAB_RED_ZONE) {
 531                 addr += BYTES_PER_WORD;
 532                 size -= 2*BYTES_PER_WORD;
 533         }
 534         end = memchr(addr, POISON_END, size);
 535         if (end != (addr+size-1))
 536                 return 1;
 537         return 0;
 538 }
 539 #endif
 540
 541 /* Destroy all the objs in a slab, and release the mem back to the system.
 542  * Before calling the slab must have been unlinked from the cache.
 543  * The cache-lock is not held/needed.
 544  */
 545 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
 546 {
 547         if (cachep->dtor
 548 #if DEBUG
 549                 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
 550 #endif
 551         ) {
 552                 int i;
 553                 for (i = 0; i < cachep->num; i++) {
 554                         void* objp = slabp->s_mem+cachep->objsize*i;
 555 #if DEBUG
 556                         if (cachep->flags & SLAB_RED_ZONE) {
 557                                 if (*((unsigned long*)(objp)) != RED_MAGIC1)
 558                                         BUG();
 559                                 if (*((unsigned long*)(objp + cachep->objsize
 560                                                 -BYTES_PER_WORD)) != RED_MAGIC1)
 561                                         BUG();
 562                                 objp += BYTES_PER_WORD;
 563                         }
 564 #endif
 565                         if (cachep->dtor)
 566                                 (cachep->dtor)(objp, cachep, 0);
 567 #if DEBUG
 568                         if (cachep->flags & SLAB_RED_ZONE) {
 569                                 objp -= BYTES_PER_WORD;
 570                         }
 571                         if ((cachep->flags & SLAB_POISON)  &&
 572                                 kmem_check_poison_obj(cachep, objp))
 573                                 BUG();
 574 #endif
 575                 }
 576         }
 577
 578         kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
 579         if (OFF_SLAB(cachep))
 580                 kmem_cache_free(cachep->slabp_cache, slabp);
 581 }
 582
 583
 584 /**
 585  * kmem_cache_create - Create a cache.
 586  * @name: A string which is used in /proc/slabinfo to identify this cache.
 587  * @size: The size of objects to be created in this cache.
 588  * @offset: The offset to use within the page.
 589  * @flags: SLAB flags
 590  * @ctor: A constructor for the objects.
 591  * @dtor: A destructor for the objects.
 592  *
 593  * Returns a ptr to the cache on success, NULL on failure.
 594  * Cannot be called within a int, but can be interrupted.
 595  * The @ctor is run when new pages are allocated by the cache
 596  * and the @dtor is run before the pages are handed back.
 597  * The flags are
 598  *
 599  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 600  * to catch references to uninitialised memory.
 601  *
 602  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 603  * for buffer overruns.
 604  *
 605  * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
 606  * memory pressure.
 607  *
 608  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 609  * cacheline.  This can be beneficial if you're counting cycles as closely
 610  * as davem.
 611  */
 612 kmem_cache_t *
 613 kmem_cache_create (const char *name, size_t size, size_t offset,
 614         unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 615         void (*dtor)(void*, kmem_cache_t *, unsigned long))
 616 {
 617         const char *func_nm = KERN_ERR "kmem_create: ";
 618         size_t left_over, align, slab_size;
 619         kmem_cache_t *cachep = NULL;
 620
 621         /*
 622          * Sanity checks... these are all serious usage bugs.
 623          */
 624         if ((!name) ||
 625                 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
 626                 in_interrupt() ||
 627                 (size < BYTES_PER_WORD) ||
 628                 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 629                 (dtor && !ctor) ||
 630                 (offset < 0 || offset > size))
 631                         BUG();
 632
 633 #if DEBUG
 634         if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 635                 /* No constructor, but inital state check requested */
 636                 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 637                 flags &= ~SLAB_DEBUG_INITIAL;
 638         }
 639
 640         if ((flags & SLAB_POISON) && ctor) {
 641                 /* request for poisoning, but we can't do that with a constructor */
 642                 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 643                 flags &= ~SLAB_POISON;
 644         }
 645 #if FORCED_DEBUG
 646         if (size < (PAGE_SIZE>>3))
 647                 /*
 648                  * do not red zone large object, causes severe
 649                  * fragmentation.
 650                  */
 651                 flags |= SLAB_RED_ZONE;
 652         if (!ctor)
 653                 flags |= SLAB_POISON;
 654 #endif
 655 #endif
 656
 657         /*
 658          * Always checks flags, a caller might be expecting debug
 659          * support which isn't available.
 660          */
 661         if (flags & ~CREATE_MASK)
 662                 BUG();
 663
 664         /* Get cache's description obj. */
 665         cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 666         if (!cachep)
 667                 goto opps;
 668         memset(cachep, 0, sizeof(kmem_cache_t));
 669
 670         /* Check that size is in terms of words.  This is needed to avoid
 671          * unaligned accesses for some archs when redzoning is used, and makes
 672          * sure any on-slab bufctl's are also correctly aligned.
 673          */
 674         if (size & (BYTES_PER_WORD-1)) {
 675                 size += (BYTES_PER_WORD-1);
 676                 size &= ~(BYTES_PER_WORD-1);
 677                 printk("%sForcing size word alignment - %s\n", func_nm, name);
 678         }
 679
 680 #if DEBUG
 681         if (flags & SLAB_RED_ZONE) {
 682                 /*
 683                  * There is no point trying to honour cache alignment
 684                  * when redzoning.
 685                  */
 686                 flags &= ~SLAB_HWCACHE_ALIGN;
 687                 size += 2*BYTES_PER_WORD;       /* words for redzone */
 688         }
 689 #endif
 690         align = BYTES_PER_WORD;
 691         if (flags & SLAB_HWCACHE_ALIGN)
 692                 align = L1_CACHE_BYTES;
 693
 694         /* Determine if the slab management is 'on' or 'off' slab. */
 695         if (size >= (PAGE_SIZE>>3))
 696                 /*
 697                  * Size is large, assume best to place the slab management obj
 698                  * off-slab (should allow better packing of objs).
 699                  */
 700                 flags |= CFLGS_OFF_SLAB;
 701
 702         if (flags & SLAB_HWCACHE_ALIGN) {
 703                 /* Need to adjust size so that objs are cache aligned. */
 704                 /* Small obj size, can get at least two per cache line. */
 705                 /* FIXME: only power of 2 supported, was better */
 706                 while (size < align/2)
 707                         align /= 2;
 708                 size = (size+align-1)&(~(align-1));
 709         }
 710
 711         /* Cal size (in pages) of slabs, and the num of objs per slab.
 712          * This could be made much more intelligent.  For now, try to avoid
 713          * using high page-orders for slabs.  When the gfp() funcs are more
 714          * friendly towards high-order requests, this should be changed.
 715          */
 716         do {
 717                 unsigned int break_flag = 0;
 718 cal_wastage:
 719                 kmem_cache_estimate(cachep->gfporder, size, flags,
 720                                                 &left_over, &cachep->num);
 721                 if (break_flag)
 722                         break;
 723                 if (cachep->gfporder >= MAX_GFP_ORDER)
 724                         break;
 725                 if (!cachep->num)
 726                         goto next;
 727                 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
 728                         /* Oops, this num of objs will cause problems. */
 729                         cachep->gfporder--;
 730                         break_flag++;
 731                         goto cal_wastage;
 732                 }
 733
 734                 /*
 735                  * Large num of objs is good, but v. large slabs are currently
 736                  * bad for the gfp()s.
 737                  */
 738                 if (cachep->gfporder >= slab_break_gfp_order)
 739                         break;
 740
 741                 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 742                         break;  /* Acceptable internal fragmentation. */
 743 next:
 744                 cachep->gfporder++;
 745         } while (1);
 746
 747         if (!cachep->num) {
 748                 printk("kmem_cache_create: couldn't create cache %s.\n", name);
 749                 kmem_cache_free(&cache_cache, cachep);
 750                 cachep = NULL;
 751                 goto opps;
 752         }
 753         slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
 754
 755         /*
 756          * If the slab has been placed off-slab, and we have enough space then
 757          * move it on-slab. This is at the expense of any extra colouring.
 758          */
 759         if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 760                 flags &= ~CFLGS_OFF_SLAB;
 761                 left_over -= slab_size;
 762         }
 763
 764         /* Offset must be a multiple of the alignment. */
 765         offset += (align-1);
 766         offset &= ~(align-1);
 767         if (!offset)
 768                 offset = L1_CACHE_BYTES;
 769         cachep->colour_off = offset;
 770         cachep->colour = left_over/offset;
 771
 772         /* init remaining fields */
 773         if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
 774                 flags |= CFLGS_OPTIMIZE;
 775
 776         cachep->flags = flags;
 777         cachep->gfpflags = 0;
 778         if (flags & SLAB_CACHE_DMA)
 779                 cachep->gfpflags |= GFP_DMA;
 780         spin_lock_init(&cachep->spinlock);
 781         cachep->objsize = size;
 782         INIT_LIST_HEAD(&cachep->slabs);
 783         cachep->firstnotfull = &cachep->slabs;
 784
 785         if (flags & CFLGS_OFF_SLAB)
 786                 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 787         cachep->ctor = ctor;
 788         cachep->dtor = dtor;
 789         /* Copy name over so we don't have problems with unloaded modules */
 790         strcpy(cachep->name, name);
 791
 792 #ifdef CONFIG_SMP
 793         if (g_cpucache_up)
 794                 enable_cpucache(cachep);
 795 #endif
 796         /* Need the semaphore to access the chain. */
 797         down(&cache_chain_sem);
 798         {
 799                 struct list_head *p;
 800
 801                 list_for_each(p, &cache_chain) {
 802                         kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 803
 804                         /* The name field is constant - no lock needed. */
 805                         if (!strcmp(pc->name, name))
 806                                 BUG();
 807                 }
 808         }
 809
 810         /* There is no reason to lock our new cache before we
 811          * link it in - no one knows about it yet...
 812          */
 813         list_add(&cachep->next, &cache_chain);
 814         up(&cache_chain_sem);
 815 opps:
 816         return cachep;
 817 }
 818
 819 /*
 820  * This check if the kmem_cache_t pointer is chained in the cache_cache
 821  * list. -arca
 822  */
 823 static int is_chained_kmem_cache(kmem_cache_t * cachep)
 824 {
 825         struct list_head *p;
 826         int ret = 0;
 827
 828         /* Find the cache in the chain of caches. */
 829         down(&cache_chain_sem);
 830         list_for_each(p, &cache_chain) {
 831                 if (p == &cachep->next) {
 832                         ret = 1;
 833                         break;
 834                 }
 835         }
 836         up(&cache_chain_sem);
 837
 838         return ret;
 839 }
 840
 841 static int __kmem_cache_shrink(kmem_cache_t *cachep)
 842 {
 843         slab_t *slabp;
 844         int ret;
 845
 846 #ifdef CONFIG_SMP
 847         smp_call_function(drain_cache, cachep, 1, 1);
 848         local_irq_disable();
 849         drain_cache(cachep);
 850         local_irq_enable();
 851 #endif
 852         spin_lock_irq(&cachep->spinlock);
 853
 854         /* If the cache is growing, stop shrinking. */
 855         while (!cachep->growing) {
 856                 struct list_head *p;
 857
 858                 p = cachep->slabs.prev;
 859                 if (p == &cachep->slabs)
 860                         break;
 861
 862                 slabp = list_entry(cachep->slabs.prev, slab_t, list);
 863                 if (slabp->inuse)
 864                         break;
 865
 866                 list_del(&slabp->list);
 867                 if (cachep->firstnotfull == &slabp->list)
 868                         cachep->firstnotfull = &cachep->slabs;
 869
 870                 spin_unlock_irq(&cachep->spinlock);
 871                 kmem_slab_destroy(cachep, slabp);
 872                 spin_lock_irq(&cachep->spinlock);
 873         }
 874         ret = !list_empty(&cachep->slabs);
 875         spin_unlock_irq(&cachep->spinlock);
 876         return ret;
 877 }
 878
 879 /**
 880  * kmem_cache_shrink - Shrink a cache.
 881  * @cachep: The cache to shrink.
 882  *
 883  * Releases as many slabs as possible for a cache.
 884  * To help debugging, a zero exit status indicates all slabs were released.
 885  */
 886 int kmem_cache_shrink(kmem_cache_t *cachep)
 887 {
 888         if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
 889                 BUG();
 890
 891         return __kmem_cache_shrink(cachep);
 892 }
 893
 894 /**
 895  * kmem_cache_destroy - delete a cache
 896  * @cachep: the cache to destroy
 897  *
 898  * Remove a kmem_cache_t object from the slab cache.
 899  * Returns 0 on success.
 900  *
 901  * It is expected this function will be called by a module when it is
 902  * unloaded.  This will remove the cache completely, and avoid a duplicate
 903  * cache being allocated each time a module is loaded and unloaded, if the
 904  * module doesn't have persistent in-kernel storage across loads and unloads.
 905  *
 906  * The caller must guarantee that noone will allocate memory from the cache
 907  * during the kmem_cache_destroy().
 908  */
 909 int kmem_cache_destroy (kmem_cache_t * cachep)
 910 {
 911         if (!cachep || in_interrupt() || cachep->growing)
 912                 BUG();
 913
 914         /* Find the cache in the chain of caches. */
 915         down(&cache_chain_sem);
 916         /* the chain is never empty, cache_cache is never destroyed */
 917         if (clock_searchp == cachep)
 918                 clock_searchp = list_entry(cachep->next.next,
 919                                                 kmem_cache_t, next);
 920         list_del(&cachep->next);
 921         up(&cache_chain_sem);
 922
 923         if (__kmem_cache_shrink(cachep)) {
 924                 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
 925                        cachep);
 926                 down(&cache_chain_sem);
 927                 list_add(&cachep->next,&cache_chain);
 928                 up(&cache_chain_sem);
 929                 return 1;
 930         }
 931 #ifdef CONFIG_SMP
 932         {
 933                 int i;
 934                 for (i = 0; i < NR_CPUS; i++)
 935                         kfree(cachep->cpudata[i]);
 936         }
 937 #endif
 938         kmem_cache_free(&cache_cache, cachep);
 939
 940         return 0;
 941 }
 942
 943 /* Get the memory for a slab management obj. */
 944 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
 945                         void *objp, int colour_off, int local_flags)
 946 {
 947         slab_t *slabp;
 948
 949         if (OFF_SLAB(cachep)) {
 950                 /* Slab management obj is off-slab. */
 951                 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
 952                 if (!slabp)
 953                         return NULL;
 954         } else {
 955                 /* FIXME: change to
 956                         slabp = objp
 957                  * if you enable OPTIMIZE
 958                  */
 959                 slabp = objp+colour_off;
 960                 colour_off += L1_CACHE_ALIGN(cachep->num *
 961                                 sizeof(kmem_bufctl_t) + sizeof(slab_t));
 962         }
 963         slabp->inuse = 0;
 964         slabp->colouroff = colour_off;
 965         slabp->s_mem = objp+colour_off;
 966
 967         return slabp;
 968 }
 969
 970 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
 971                         slab_t * slabp, unsigned long ctor_flags)
 972 {
 973         int i;
 974
 975         for (i = 0; i < cachep->num; i++) {
 976                 void* objp = slabp->s_mem+cachep->objsize*i;
 977 #if DEBUG
 978                 if (cachep->flags & SLAB_RED_ZONE) {
 979                         *((unsigned long*)(objp)) = RED_MAGIC1;
 980                         *((unsigned long*)(objp + cachep->objsize -
 981                                         BYTES_PER_WORD)) = RED_MAGIC1;
 982                         objp += BYTES_PER_WORD;
 983                 }
 984 #endif
 985
 986                 /*
 987                  * Constructors are not allowed to allocate memory from
 988                  * the same cache which they are a constructor for.
 989                  * Otherwise, deadlock. They must also be threaded.
 990                  */
 991                 if (cachep->ctor)
 992                         cachep->ctor(objp, cachep, ctor_flags);
 993 #if DEBUG
 994                 if (cachep->flags & SLAB_RED_ZONE)
 995                         objp -= BYTES_PER_WORD;
 996                 if (cachep->flags & SLAB_POISON)
 997                         /* need to poison the objs */
 998                         kmem_poison_obj(cachep, objp);
 999                 if (cachep->flags & SLAB_RED_ZONE) {
1000                         if (*((unsigned long*)(objp)) != RED_MAGIC1)
1001                                 BUG();
1002                         if (*((unsigned long*)(objp + cachep->objsize -
1003                                         BYTES_PER_WORD)) != RED_MAGIC1)
1004                                 BUG();
1005                 }
1006 #endif
1007                 slab_bufctl(slabp)[i] = i+1;
1008         }
1009         slab_bufctl(slabp)[i-1] = BUFCTL_END;
1010         slabp->free = 0;
1011 }
1012
1013 /*
1014  * Grow (by 1) the number of slabs within a cache.  This is called by
1015  * kmem_cache_alloc() when there are no active objs left in a cache.
1016  */
1017 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1018 {
1019         slab_t  *slabp;
1020         struct page     *page;
1021         void            *objp;
1022         size_t           offset;
1023         unsigned int     i, local_flags;
1024         unsigned long    ctor_flags;
1025         unsigned long    save_flags;
1026
1027         /* Be lazy and only check for valid flags here,
1028          * keeping it out of the critical path in kmem_cache_alloc().
1029          */
1030         if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1031                 BUG();
1032         if (flags & SLAB_NO_GROW)
1033                 return 0;
1034
1035         /*
1036          * The test for missing atomic flag is performed here, rather than
1037          * the more obvious place, simply to reduce the critical path length
1038          * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1039          * will eventually be caught here (where it matters).
1040          */
1041         if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1042                 BUG();
1043
1044         ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1045         local_flags = (flags & SLAB_LEVEL_MASK);
1046         if (local_flags == SLAB_ATOMIC)
1047                 /*
1048                  * Not allowed to sleep.  Need to tell a constructor about
1049                  * this - it might need to know...
1050                  */
1051                 ctor_flags |= SLAB_CTOR_ATOMIC;
1052
1053         /* About to mess with non-constant members - lock. */
1054         spin_lock_irqsave(&cachep->spinlock, save_flags);
1055
1056         /* Get colour for the slab, and cal the next value. */
1057         offset = cachep->colour_next;
1058         cachep->colour_next++;
1059         if (cachep->colour_next >= cachep->colour)
1060                 cachep->colour_next = 0;
1061         offset *= cachep->colour_off;
1062         cachep->dflags |= DFLGS_GROWN;
1063
1064         cachep->growing++;
1065         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1066
1067         /* A series of memory allocations for a new slab.
1068          * Neither the cache-chain semaphore, or cache-lock, are
1069          * held, but the incrementing c_growing prevents this
1070          * cache from being reaped or shrunk.
1071          * Note: The cache could be selected in for reaping in
1072          * kmem_cache_reap(), but when the final test is made the
1073          * growing value will be seen.
1074          */
1075
1076         /* Get mem for the objs. */
1077         if (!(objp = kmem_getpages(cachep, flags)))
1078                 goto failed;
1079
1080         /* Get slab management. */
1081         if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1082                 goto opps1;
1083
1084         /* Nasty!!!!!! I hope this is OK. */
1085         i = 1 << cachep->gfporder;
1086         page = mem_map + MAP_NR(objp);
1087         do {
1088                 SET_PAGE_CACHE(page, cachep);
1089                 SET_PAGE_SLAB(page, slabp);
1090                 PageSetSlab(page);
1091                 page++;
1092         } while (--i);
1093
1094         kmem_cache_init_objs(cachep, slabp, ctor_flags);
1095
1096         spin_lock_irqsave(&cachep->spinlock, save_flags);
1097         cachep->growing--;
1098
1099         /* Make slab active. */
1100         list_add_tail(&slabp->list,&cachep->slabs);
1101         if (cachep->firstnotfull == &cachep->slabs)
1102                 cachep->firstnotfull = &slabp->list;
1103         STATS_INC_GROWN(cachep);
1104         cachep->failures = 0;
1105
1106         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1107         return 1;
1108 opps1:
1109         kmem_freepages(cachep, objp);
1110 failed:
1111         spin_lock_irqsave(&cachep->spinlock, save_flags);
1112         cachep->growing--;
1113         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1114         return 0;
1115 }
1116
1117 /*
1118  * Perform extra freeing checks:
1119  * - detect double free
1120  * - detect bad pointers.
1121  * Called with the cache-lock held.
1122  */
1123
1124 #if DEBUG
1125 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1126                         slab_t *slabp, void * objp)
1127 {
1128         int i;
1129         unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1130
1131         if (objnr >= cachep->num)
1132                 BUG();
1133         if (objp != slabp->s_mem + objnr*cachep->objsize)
1134                 BUG();
1135
1136         /* Check slab's freelist to see if this obj is there. */
1137         for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1138                 if (i == objnr)
1139                         BUG();
1140         }
1141         return 0;
1142 }
1143 #endif
1144
1145 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1146 {
1147 #if DEBUG
1148         if (flags & SLAB_DMA) {
1149                 if (!(cachep->gfpflags & GFP_DMA))
1150                         BUG();
1151         } else {
1152                 if (cachep->gfpflags & GFP_DMA)
1153                         BUG();
1154         }
1155 #endif
1156 }
1157
1158 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1159                                                          slab_t *slabp)
1160 {
1161         void *objp;
1162
1163         STATS_INC_ALLOCED(cachep);
1164         STATS_INC_ACTIVE(cachep);
1165         STATS_SET_HIGH(cachep);
1166
1167         /* get obj pointer */
1168         slabp->inuse++;
1169         objp = slabp->s_mem + slabp->free*cachep->objsize;
1170         slabp->free=slab_bufctl(slabp)[slabp->free];
1171
1172         if (slabp->free == BUFCTL_END)
1173                 /* slab now full: move to next slab for next alloc */
1174                 cachep->firstnotfull = slabp->list.next;
1175 #if DEBUG
1176         if (cachep->flags & SLAB_POISON)
1177                 if (kmem_check_poison_obj(cachep, objp))
1178                         BUG();
1179         if (cachep->flags & SLAB_RED_ZONE) {
1180                 /* Set alloc red-zone, and check old one. */
1181                 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1182                                                          RED_MAGIC1)
1183                         BUG();
1184                 if (xchg((unsigned long *)(objp+cachep->objsize -
1185                           BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1186                         BUG();
1187                 objp += BYTES_PER_WORD;
1188         }
1189 #endif
1190         return objp;
1191 }
1192
1193 /*
1194  * Returns a ptr to an obj in the given cache.
1195  * caller must guarantee synchronization
1196  * #define for the goto optimization 8-)
1197  */
1198 #define kmem_cache_alloc_one(cachep)                            \
1199 ({                                                              \
1200         slab_t  *slabp;                                 \
1201                                                                 \
1202         /* Get slab alloc is to come from. */                   \
1203         {                                                       \
1204                 struct list_head* p = cachep->firstnotfull;     \
1205                 if (p == &cachep->slabs)                        \
1206                         goto alloc_new_slab;                    \
1207                 slabp = list_entry(p,slab_t, list);     \
1208         }                                                       \
1209         kmem_cache_alloc_one_tail(cachep, slabp);               \
1210 })
1211
1212 #ifdef CONFIG_SMP
1213 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1214 {
1215         int batchcount = cachep->batchcount;
1216         cpucache_t* cc = cc_data(cachep);
1217
1218         spin_lock(&cachep->spinlock);
1219         while (batchcount--) {
1220                 /* Get slab alloc is to come from. */
1221                 struct list_head *p = cachep->firstnotfull;
1222                 slab_t *slabp;
1223
1224                 if (p == &cachep->slabs)
1225                         break;
1226                 slabp = list_entry(p,slab_t, list);
1227                 cc_entry(cc)[cc->avail++] =
1228                                 kmem_cache_alloc_one_tail(cachep, slabp);
1229         }
1230         spin_unlock(&cachep->spinlock);
1231
1232         if (cc->avail)
1233                 return cc_entry(cc)[--cc->avail];
1234         return NULL;
1235 }
1236 #endif
1237
1238 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1239 {
1240         unsigned long save_flags;
1241         void* objp;
1242
1243         kmem_cache_alloc_head(cachep, flags);
1244 try_again:
1245         local_irq_save(save_flags);
1246 #ifdef CONFIG_SMP
1247         {
1248                 cpucache_t *cc = cc_data(cachep);
1249
1250                 if (cc) {
1251                         if (cc->avail) {
1252                                 STATS_INC_ALLOCHIT(cachep);
1253                                 objp = cc_entry(cc)[--cc->avail];
1254                         } else {
1255                                 STATS_INC_ALLOCMISS(cachep);
1256                                 objp = kmem_cache_alloc_batch(cachep,flags);
1257                                 if (!objp)
1258                                         goto alloc_new_slab_nolock;
1259                         }
1260                 } else {
1261                         spin_lock(&cachep->spinlock);
1262                         objp = kmem_cache_alloc_one(cachep);
1263                         spin_unlock(&cachep->spinlock);
1264                 }
1265         }
1266 #else
1267         objp = kmem_cache_alloc_one(cachep);
1268 #endif
1269         local_irq_restore(save_flags);
1270         return objp;
1271 alloc_new_slab:
1272 #ifdef CONFIG_SMP
1273         spin_unlock(&cachep->spinlock);
1274 alloc_new_slab_nolock:
1275 #endif
1276         local_irq_restore(save_flags);
1277         if (kmem_cache_grow(cachep, flags))
1278                 /* Someone may have stolen our objs.  Doesn't matter, we'll
1279                  * just come back here again.
1280                  */
1281                 goto try_again;
1282         return NULL;
1283 }
1284
1285 /*
1286  * Release an obj back to its cache. If the obj has a constructed
1287  * state, it should be in this state _before_ it is released.
1288  * - caller is responsible for the synchronization
1289  */
1290
1291 #if DEBUG
1292 # define CHECK_NR(nr)                                           \
1293         do {                                                    \
1294                 if (nr >= max_mapnr) {                          \
1295                         printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1296                                 (unsigned long)objp);           \
1297                         BUG();                                  \
1298                 } \
1299         } while (0)
1300 # define CHECK_PAGE(page)                                       \
1301         do {                                                    \
1302                 if (!PageSlab(page)) {                          \
1303                         printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1304                                 (unsigned long)objp);           \
1305                         BUG();                                  \
1306                 }                                               \
1307         } while (0)
1308
1309 #else
1310 # define CHECK_NR(nr)   do { } while (0)
1311 # define CHECK_PAGE(nr) do { } while (0)
1312 #endif
1313
1314 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1315 {
1316         slab_t* slabp;
1317
1318         CHECK_NR(MAP_NR(objp));
1319         CHECK_PAGE(mem_map + MAP_NR(objp));
1320         /* reduces memory footprint
1321          *
1322         if (OPTIMIZE(cachep))
1323                 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1324          else
1325          */
1326         slabp = GET_PAGE_SLAB(mem_map + MAP_NR(objp));
1327
1328 #if DEBUG
1329         if (cachep->flags & SLAB_DEBUG_INITIAL)
1330                 /* Need to call the slab's constructor so the
1331                  * caller can perform a verify of its state (debugging).
1332                  * Called without the cache-lock held.
1333                  */
1334                 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1335
1336         if (cachep->flags & SLAB_RED_ZONE) {
1337                 objp -= BYTES_PER_WORD;
1338                 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1339                         /* Either write before start, or a double free. */
1340                         BUG();
1341                 if (xchg((unsigned long *)(objp+cachep->objsize -
1342                                 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1343                         /* Either write past end, or a double free. */
1344                         BUG();
1345         }
1346         if (cachep->flags & SLAB_POISON)
1347                 kmem_poison_obj(cachep, objp);
1348         if (kmem_extra_free_checks(cachep, slabp, objp))
1349                 return;
1350 #endif
1351         {
1352                 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1353
1354                 slab_bufctl(slabp)[objnr] = slabp->free;
1355                 slabp->free = objnr;
1356         }
1357         STATS_DEC_ACTIVE(cachep);
1358
1359         /* fixup slab chain */
1360         if (slabp->inuse-- == cachep->num)
1361                 goto moveslab_partial;
1362         if (!slabp->inuse)
1363                 goto moveslab_free;
1364         return;
1365
1366 moveslab_partial:
1367         /* was full.
1368          * Even if the page is now empty, we can set c_firstnotfull to
1369          * slabp: there are no partial slabs in this case
1370          */
1371         {
1372                 struct list_head *t = cachep->firstnotfull;
1373
1374                 cachep->firstnotfull = &slabp->list;
1375                 if (slabp->list.next == t)
1376                         return;
1377                 list_del(&slabp->list);
1378                 list_add_tail(&slabp->list, t);
1379                 return;
1380         }
1381 moveslab_free:
1382         /*
1383          * was partial, now empty.
1384          * c_firstnotfull might point to slabp
1385          * FIXME: optimize
1386          */
1387         {
1388                 struct list_head *t = cachep->firstnotfull->prev;
1389
1390                 list_del(&slabp->list);
1391                 list_add_tail(&slabp->list, &cachep->slabs);
1392                 if (cachep->firstnotfull == &slabp->list)
1393                         cachep->firstnotfull = t->next;
1394                 return;
1395         }
1396 }
1397
1398 #ifdef CONFIG_SMP
1399 static inline void __free_block (kmem_cache_t* cachep,
1400                                                         void** objpp, int len)
1401 {
1402         for ( ; len > 0; len--, objpp++)
1403                 kmem_cache_free_one(cachep, *objpp);
1404 }
1405
1406 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1407 {
1408         spin_lock(&cachep->spinlock);
1409         __free_block(cachep, objpp, len);
1410         spin_unlock(&cachep->spinlock);
1411 }
1412 #endif
1413
1414 /*
1415  * __kmem_cache_free
1416  * called with disabled ints
1417  */
1418 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1419 {
1420 #ifdef CONFIG_SMP
1421         cpucache_t *cc = cc_data(cachep);
1422
1423         CHECK_NR(MAP_NR(objp));
1424         CHECK_PAGE(mem_map + MAP_NR(objp));
1425         if (cc) {
1426                 int batchcount;
1427                 if (cc->avail < cc->limit) {
1428                         STATS_INC_FREEHIT(cachep);
1429                         cc_entry(cc)[cc->avail++] = objp;
1430                         return;
1431                 }
1432                 STATS_INC_FREEMISS(cachep);
1433                 batchcount = cachep->batchcount;
1434                 cc->avail -= batchcount;
1435                 free_block(cachep,
1436                                         &cc_entry(cc)[cc->avail],batchcount);
1437                 cc_entry(cc)[cc->avail++] = objp;
1438                 return;
1439         } else {
1440                 free_block(cachep, &objp, 1);
1441         }
1442 #else
1443         kmem_cache_free_one(cachep, objp);
1444 #endif
1445 }
1446
1447 /**
1448  * kmem_cache_alloc - Allocate an object
1449  * @cachep: The cache to allocate from.
1450  * @flags: See kmalloc().
1451  *
1452  * Allocate an object from this cache.  The flags are only relevant
1453  * if the cache has no available objects.
1454  */
1455 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1456 {
1457         return __kmem_cache_alloc(cachep, flags);
1458 }
1459
1460 /**
1461  * kmalloc - allocate memory
1462  * @size: how many bytes of memory are required.
1463  * @flags: the type of memory to allocate.
1464  *
1465  * kmalloc is the normal method of allocating memory
1466  * in the kernel.  The @flags argument may be one of:
1467  *
1468  * %GFP_BUFFER - XXX
1469  *
1470  * %GFP_ATOMIC - allocation will not sleep.  Use inside interrupt handlers.
1471  *
1472  * %GFP_USER - allocate memory on behalf of user.  May sleep.
1473  *
1474  * %GFP_KERNEL - allocate normal kernel ram.  May sleep.
1475  *
1476  * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1477  * Don't use unless you're in the NFS code.
1478  *
1479  * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1480  */
1481 void * kmalloc (size_t size, int flags)
1482 {
1483         cache_sizes_t *csizep = cache_sizes;
1484
1485         for (; csizep->cs_size; csizep++) {
1486                 if (size > csizep->cs_size)
1487                         continue;
1488                 return __kmem_cache_alloc(flags & GFP_DMA ?
1489                          csizep->cs_dmacachep : csizep->cs_cachep, flags);
1490         }
1491         BUG(); // too big size
1492         return NULL;
1493 }
1494
1495 /**
1496  * kmem_cache_free - Deallocate an object
1497  * @cachep: The cache the allocation was from.
1498  * @objp: The previously allocated object.
1499  *
1500  * Free an object which was previously allocated from this
1501  * cache.
1502  */
1503 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1504 {
1505         unsigned long flags;
1506 #if DEBUG
1507         CHECK_NR(MAP_NR(objp));
1508         CHECK_PAGE(mem_map + MAP_NR(objp));
1509         if (cachep != GET_PAGE_CACHE(mem_map + MAP_NR(objp)))
1510                 BUG();
1511 #endif
1512
1513         local_irq_save(flags);
1514         __kmem_cache_free(cachep, objp);
1515         local_irq_restore(flags);
1516 }
1517
1518 /**
1519  * kfree - free previously allocated memory
1520  * @objp: pointer returned by kmalloc.
1521  *
1522  * Don't free memory not originally allocated by kmalloc()
1523  * or you will run into trouble.
1524  */
1525 void kfree (const void *objp)
1526 {
1527         kmem_cache_t *c;
1528         unsigned long flags;
1529
1530         if (!objp)
1531                 return;
1532         local_irq_save(flags);
1533         CHECK_NR(MAP_NR(objp));
1534         CHECK_PAGE(mem_map + MAP_NR(objp));
1535         c = GET_PAGE_CACHE(mem_map + MAP_NR(objp));
1536         __kmem_cache_free(c, (void*)objp);
1537         local_irq_restore(flags);
1538 }
1539
1540 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1541 {
1542         cache_sizes_t *csizep = cache_sizes;
1543
1544         /* This function could be moved to the header file, and
1545          * made inline so consumers can quickly determine what
1546          * cache pointer they require.
1547          */
1548         for ( ; csizep->cs_size; csizep++) {
1549                 if (size > csizep->cs_size)
1550                         continue;
1551                 break;
1552         }
1553         return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1554 }
1555
1556 #ifdef CONFIG_SMP
1557 /*
1558  * called with local interrupts disabled
1559  */
1560 static void drain_cache (void* __cachep)
1561 {
1562         kmem_cache_t *cachep = __cachep;
1563         cpucache_t *cc = cc_data(cachep);
1564
1565         if (cc && cc->avail) {
1566                 free_block(cachep, cc_entry(cc), cc->avail);
1567                 cc->avail = 0;
1568         }
1569 }
1570
1571 typedef struct ccupdate_struct_s
1572 {
1573         kmem_cache_t* cachep;
1574         cpucache_t* new[NR_CPUS];
1575 } ccupdate_struct_t;
1576
1577 /*
1578  * called with local interrupts disabled
1579  */
1580 static void ccupdate_callback (void* __new)
1581 {
1582         ccupdate_struct_t* new = __new;
1583         cpucache_t *old = cc_data(new->cachep);
1584
1585         cc_data(new->cachep) = new->new[smp_processor_id()];
1586         new->new[smp_processor_id()] = old;
1587 }
1588
1589 /* called with cache_chain_sem acquired.  */
1590 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1591 {
1592         ccupdate_struct_t new;
1593         int i;
1594
1595         /*
1596          * These are admin-provided, so we are more graceful.
1597          */
1598         if (limit < 0)
1599                 return -EINVAL;
1600         if (batchcount < 0)
1601                 return -EINVAL;
1602         if (batchcount > limit)
1603                 return -EINVAL;
1604         if (limit != 0 && !batchcount)
1605                 return -EINVAL;
1606
1607         memset(&new.new,0,sizeof(new.new));
1608         if (limit) {
1609                 for (i = 0; i< smp_num_cpus; i++) {
1610                         cpucache_t* ccnew;
1611
1612
1613                         ccnew = kmalloc(sizeof(void*)*limit+
1614                                         sizeof(cpucache_t), GFP_KERNEL);
1615                         if (!ccnew)
1616                                 goto oom;
1617                         ccnew->limit = limit;
1618                         ccnew->avail = 0;
1619                         new.new[cpu_logical_map(i)] = ccnew;
1620                 }
1621         }
1622         new.cachep = cachep;
1623         spin_lock_irq(&cachep->spinlock);
1624         cachep->batchcount = batchcount;
1625         spin_unlock_irq(&cachep->spinlock);
1626
1627         smp_call_function(ccupdate_callback,&new,1,1);
1628         local_irq_disable();
1629         ccupdate_callback(&new);
1630         local_irq_enable();
1631
1632         for (i = 0; i < smp_num_cpus; i++) {
1633                 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1634                 if (!ccold)
1635                         continue;
1636                 local_irq_disable();
1637                 free_block(cachep, cc_entry(ccold), ccold->avail);
1638                 local_irq_enable();
1639                 kfree(ccold);
1640         }
1641         return 0;
1642 oom:
1643         for (i--; i >= 0; i--)
1644                 kfree(new.new[cpu_logical_map(i)]);
1645         return -ENOMEM;
1646 }
1647
1648 static void enable_cpucache (kmem_cache_t *cachep)
1649 {
1650         int err;
1651         int limit;
1652
1653         /* FIXME: optimize */
1654         if (cachep->objsize > PAGE_SIZE)
1655                 return;
1656         if (cachep->objsize > 1024)
1657                 limit = 60;
1658         else if (cachep->objsize > 256)
1659                 limit = 124;
1660         else
1661                 limit = 252;
1662
1663         err = kmem_tune_cpucache(cachep, limit, limit/2);
1664         if (err)
1665                 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1666                                         cachep->name, -err);
1667 }
1668
1669 static void enable_all_cpucaches (void)
1670 {
1671         struct list_head* p;
1672
1673         down(&cache_chain_sem);
1674
1675         p = &cache_cache.next;
1676         do {
1677                 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1678
1679                 enable_cpucache(cachep);
1680                 p = cachep->next.next;
1681         } while (p != &cache_cache.next);
1682
1683         up(&cache_chain_sem);
1684 }
1685 #endif
1686
1687 /**
1688  * kmem_cache_reap - Reclaim memory from caches.
1689  * @gfp_mask: the type of memory required.
1690  *
1691  * Called from try_to_free_page().
1692  */
1693 void kmem_cache_reap (int gfp_mask)
1694 {
1695         slab_t *slabp;
1696         kmem_cache_t *searchp;
1697         kmem_cache_t *best_cachep;
1698         unsigned int best_pages;
1699         unsigned int best_len;
1700         unsigned int scan;
1701
1702         if (gfp_mask & __GFP_WAIT)
1703                 down(&cache_chain_sem);
1704         else
1705                 if (down_trylock(&cache_chain_sem))
1706                         return;
1707
1708         scan = REAP_SCANLEN;
1709         best_len = 0;
1710         best_pages = 0;
1711         best_cachep = NULL;
1712         searchp = clock_searchp;
1713         do {
1714                 unsigned int pages;
1715                 struct list_head* p;
1716                 unsigned int full_free;
1717
1718                 /* It's safe to test this without holding the cache-lock. */
1719                 if (searchp->flags & SLAB_NO_REAP)
1720                         goto next;
1721                 /* FIXME: is this really a good idea? */
1722                 if (gfp_mask & GFP_DMA) {
1723                         if (!(searchp->gfpflags & GFP_DMA))
1724                                 goto next;
1725                 } else {
1726                         if (searchp->gfpflags & GFP_DMA)
1727                                 goto next;
1728                 }
1729                 spin_lock_irq(&searchp->spinlock);
1730                 if (searchp->growing)
1731                         goto next_unlock;
1732                 if (searchp->dflags & DFLGS_GROWN) {
1733                         searchp->dflags &= ~DFLGS_GROWN;
1734                         goto next_unlock;
1735                 }
1736 #ifdef CONFIG_SMP
1737                 {
1738                         cpucache_t *cc = cc_data(searchp);
1739                         if (cc && cc->avail) {
1740                                 __free_block(searchp, cc_entry(cc), cc->avail);
1741                                 cc->avail = 0;
1742                         }
1743                 }
1744 #endif
1745
1746                 full_free = 0;
1747                 p = searchp->slabs.prev;
1748                 while (p != &searchp->slabs) {
1749                         slabp = list_entry(p, slab_t, list);
1750                         if (slabp->inuse)
1751                                 break;
1752                         full_free++;
1753                         p = p->prev;
1754                 }
1755
1756                 /*
1757                  * Try to avoid slabs with constructors and/or
1758                  * more than one page per slab (as it can be difficult
1759                  * to get high orders from gfp()).
1760                  */
1761                 pages = full_free * (1<<searchp->gfporder);
1762                 if (searchp->ctor)
1763                         pages = (pages*4+1)/5;
1764                 if (searchp->gfporder)
1765                         pages = (pages*4+1)/5;
1766                 if (pages > best_pages) {
1767                         best_cachep = searchp;
1768                         best_len = full_free;
1769                         best_pages = pages;
1770                         if (full_free >= REAP_PERFECT) {
1771                                 clock_searchp = list_entry(searchp->next.next,
1772                                                         kmem_cache_t,next);
1773                                 goto perfect;
1774                         }
1775                 }
1776 next_unlock:
1777                 spin_unlock_irq(&searchp->spinlock);
1778 next:
1779                 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1780         } while (--scan && searchp != clock_searchp);
1781
1782         clock_searchp = searchp;
1783
1784         if (!best_cachep)
1785                 /* couldn't find anything to reap */
1786                 goto out;
1787
1788         spin_lock_irq(&best_cachep->spinlock);
1789 perfect:
1790         /* free only 80% of the free slabs */
1791         best_len = (best_len*4 + 1)/5;
1792         for (scan = 0; scan < best_len; scan++) {
1793                 struct list_head *p;
1794
1795                 if (best_cachep->growing)
1796                         break;
1797                 p = best_cachep->slabs.prev;
1798                 if (p == &best_cachep->slabs)
1799                         break;
1800                 slabp = list_entry(p,slab_t,list);
1801                 if (slabp->inuse)
1802                         break;
1803                 list_del(&slabp->list);
1804                 if (best_cachep->firstnotfull == &slabp->list)
1805                         best_cachep->firstnotfull = &best_cachep->slabs;
1806                 STATS_INC_REAPED(best_cachep);
1807
1808                 /* Safe to drop the lock. The slab is no longer linked to the
1809                  * cache.
1810                  */
1811                 spin_unlock_irq(&best_cachep->spinlock);
1812                 kmem_slab_destroy(best_cachep, slabp);
1813                 spin_lock_irq(&best_cachep->spinlock);
1814         }
1815         spin_unlock_irq(&best_cachep->spinlock);
1816 out:
1817         up(&cache_chain_sem);
1818         return;
1819 }
1820
1821 #ifdef CONFIG_PROC_FS
1822 /* /proc/slabinfo
1823  *      cache-name num-active-objs total-objs
1824  *      obj-size num-active-slabs total-slabs
1825  *      num-pages-per-slab
1826  */
1827 #define FIXUP(t)                                \
1828         do {                                    \
1829                 if (len <= off) {               \
1830                         off -= len;             \
1831                         len = 0;                \
1832                 } else {                        \
1833                         if (len-off > count)    \
1834                                 goto t;         \
1835                 }                               \
1836         } while (0)
1837
1838 static int proc_getdata (char*page, char**start, off_t off, int count)
1839 {
1840         struct list_head *p;
1841         int len = 0;
1842
1843         /* Output format version, so at least we can change it without _too_
1844          * many complaints.
1845          */
1846         len += sprintf(page+len, "slabinfo - version: 1.1"
1847 #if STATS
1848                                 " (statistics)"
1849 #endif
1850 #ifdef CONFIG_SMP
1851                                 " (SMP)"
1852 #endif
1853                                 "\n");
1854         FIXUP(got_data);
1855
1856         down(&cache_chain_sem);
1857         p = &cache_cache.next;
1858         do {
1859                 kmem_cache_t    *cachep;
1860                 struct list_head *q;
1861                 slab_t          *slabp;
1862                 unsigned long   active_objs;
1863                 unsigned long   num_objs;
1864                 unsigned long   active_slabs = 0;
1865                 unsigned long   num_slabs;
1866                 cachep = list_entry(p, kmem_cache_t, next);
1867
1868                 spin_lock_irq(&cachep->spinlock);
1869                 active_objs = 0;
1870                 num_slabs = 0;
1871                 list_for_each(q,&cachep->slabs) {
1872                         slabp = list_entry(q, slab_t, list);
1873                         active_objs += slabp->inuse;
1874                         num_objs += cachep->num;
1875                         if (slabp->inuse)
1876                                 active_slabs++;
1877                         else
1878                                 num_slabs++;
1879                 }
1880                 num_slabs+=active_slabs;
1881                 num_objs = num_slabs*cachep->num;
1882
1883                 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1884                         cachep->name, active_objs, num_objs, cachep->objsize,
1885                         active_slabs, num_slabs, (1<<cachep->gfporder));
1886
1887 #if STATS
1888                 {
1889                         unsigned long errors = cachep->errors;
1890                         unsigned long high = cachep->high_mark;
1891                         unsigned long grown = cachep->grown;
1892                         unsigned long reaped = cachep->reaped;
1893                         unsigned long allocs = cachep->num_allocations;
1894
1895                         len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1896                                         high, allocs, grown, reaped, errors);
1897                 }
1898 #endif
1899 #ifdef CONFIG_SMP
1900                 {
1901                         unsigned int batchcount = cachep->batchcount;
1902                         unsigned int limit;
1903
1904                         if (cc_data(cachep))
1905                                 limit = cc_data(cachep)->limit;
1906                          else
1907                                 limit = 0;
1908                         len += sprintf(page+len, " : %4u %4u",
1909                                         limit, batchcount);
1910                 }
1911 #endif
1912 #if STATS && defined(CONFIG_SMP)
1913                 {
1914                         unsigned long allochit = atomic_read(&cachep->allochit);
1915                         unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1916                         unsigned long freehit = atomic_read(&cachep->freehit);
1917                         unsigned long freemiss = atomic_read(&cachep->freemiss);
1918                         len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1919                                         allochit, allocmiss, freehit, freemiss);
1920                 }
1921 #endif
1922                 len += sprintf(page+len,"\n");
1923                 spin_unlock_irq(&cachep->spinlock);
1924                 FIXUP(got_data_up);
1925                 p = cachep->next.next;
1926         } while (p != &cache_cache.next);
1927 got_data_up:
1928         up(&cache_chain_sem);
1929
1930 got_data:
1931         *start = page+off;
1932         return len;
1933 }
1934
1935 /**
1936  * slabinfo_read_proc - generates /proc/slabinfo
1937  * @page: scratch area, one page long
1938  * @start: pointer to the pointer to the output buffer
1939  * @off: offset within /proc/slabinfo the caller is interested in
1940  * @count: requested len in bytes
1941  * @eof: eof marker
1942  * @data: unused
1943  *
1944  * The contents of the buffer are
1945  * cache-name
1946  * num-active-objs
1947  * total-objs
1948  * object size
1949  * num-active-slabs
1950  * total-slabs
1951  * num-pages-per-slab
1952  * + further values on SMP and with statistics enabled
1953  */
1954 int slabinfo_read_proc (char *page, char **start, off_t off,
1955                                  int count, int *eof, void *data)
1956 {
1957         int len = proc_getdata(page, start, off, count);
1958         len -= (*start-page);
1959         if (len <= count)
1960                 *eof = 1;
1961         if (len>count) len = count;
1962         if (len<0) len = 0;
1963         return len;
1964 }
1965
1966 #define MAX_SLABINFO_WRITE 128
1967 /**
1968  * slabinfo_write_proc - SMP tuning for the slab allocator
1969  * @file: unused
1970  * @buffer: user buffer
1971  * @count: data len
1972  * @data: unused
1973  */
1974 int slabinfo_write_proc (struct file *file, const char *buffer,
1975                                 unsigned long count, void *data)
1976 {
1977 #ifdef CONFIG_SMP
1978         char kbuf[MAX_SLABINFO_WRITE], *tmp;
1979         int limit, batchcount, res;
1980         struct list_head *p;
1981
1982         if (count > MAX_SLABINFO_WRITE)
1983                 return -EINVAL;
1984         if (copy_from_user(&kbuf, buffer, count))
1985                 return -EFAULT;
1986
1987         tmp = strchr(kbuf, ' ');
1988         if (!tmp)
1989                 return -EINVAL;
1990         *tmp = '\0';
1991         tmp++;
1992         limit = simple_strtol(tmp, &tmp, 10);
1993         while (*tmp == ' ')
1994                 tmp++;
1995         batchcount = simple_strtol(tmp, &tmp, 10);
1996
1997         /* Find the cache in the chain of caches. */
1998         down(&cache_chain_sem);
1999         res = -EINVAL;
2000         list_for_each(p,&cache_chain) {
2001                 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2002
2003                 if (!strcmp(cachep->name, kbuf)) {
2004                         res = kmem_tune_cpucache(cachep, limit, batchcount);
2005                         break;
2006                 }
2007         }
2008         up(&cache_chain_sem);
2009         if (res >= 0)
2010                 res = count;
2011         return res;
2012 #else
2013         return -EINVAL;
2014 #endif
2015 }
2016 #endif