mm/slab.c

   1 /*
   2  * linux/mm/slab.c
   3  * Written by Mark Hemment, 1996/97.
   4  * (markhe@nextd.demon.co.uk)
   5  *
   6  * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   7  *
   8  * Major cleanup, different bufctl logic, per-cpu arrays
   9  *      (c) 2000 Manfred Spraul
  10  *
  11  * An implementation of the Slab Allocator as described in outline in;
  12  *      UNIX Internals: The New Frontiers by Uresh Vahalia
  13  *      Pub: Prentice Hall      ISBN 0-13-101908-2
  14  * or with a little more detail in;
  15  *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  16  *      Jeff Bonwick (Sun Microsystems).
  17  *      Presented at: USENIX Summer 1994 Technical Conference
  18  *
  19  *
  20  * The memory is organized in caches, one cache for each object type.
  21  * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
  22  * Each cache consists out of many slabs (they are small (usually one
  23  * page long) and always contiguous), and each slab contains multiple
  24  * initialized objects.
  25  *
  26  * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
  27  * normal). If you need a special memory type, then must create a new
  28  * cache for that memory type.
  29  *
  30  * In order to reduce fragmentation, the slabs are sorted in 3 groups:
  31  *   full slabs with 0 free objects
  32  *   partial slabs
  33  *   empty slabs with no allocated objects
  34  *
  35  * If partial slabs exist, then new allocations come from these slabs,
  36  * otherwise from empty slabs or new slabs are allocated.
  37  *
  38  * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
  39  * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
  40  *
  41  * On SMP systems, each cache has a short per-cpu head array, most allocs
  42  * and frees go into that array, and if that array overflows, then 1/2
  43  * of the entries in the array are given back into the global cache.
  44  * This reduces the number of spinlock operations.
  45  *
  46  * The c_cpuarray can be changed with a smp_call_function call,
  47  * it may not be read with enabled local interrupts.
  48  *
  49  * SMP synchronization:
  50  *  constructors and destructors are called without any locking.
  51  *  Several members in kmem_cache_t and slab_t never change, they
  52  *      are accessed without any locking.
  53  *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
  54  *      smp_call_function() is used if one cpu must flush the arrays from
  55  *      other cpus.
  56  *  The non-constant members are protected with a per-cache irq spinlock.
  57  *
  58  * Further notes from the original documentation:
  59  *
  60  * 11 April '97.  Started multi-threading - markhe
  61  *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
  62  *      The sem is only needed when accessing/extending the cache-chain, which
  63  *      can never happen inside an interrupt (kmem_cache_create(),
  64  *      kmem_cache_shrink() and kmem_cache_reap()).
  65  *
  66  *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  67  *      maybe be sleeping and therefore not holding the semaphore/lock), the
  68  *      growing field is used.  This also prevents reaping from a cache.
  69  *
  70  *      At present, each engine can be growing a cache.  This should be blocked.
  71  *
  72  */
  73
  74 #include        <linux/config.h>
  75 #include        <linux/slab.h>
  76 #include        <linux/interrupt.h>
  77 #include        <linux/init.h>
  78 #include        <asm/uaccess.h>
  79
  80 /*
  81  * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
  82  *                SLAB_RED_ZONE & SLAB_POISON.
  83  *                0 for faster, smaller code (especially in the critical paths).
  84  *
  85  * STATS        - 1 to collect stats for /proc/slabinfo.
  86  *                0 for faster, smaller code (especially in the critical paths).
  87  *
  88  * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
  89  */
  90
  91 #define DEBUG           0
  92 #define STATS           0
  93 #define FORCED_DEBUG    0
  94
  95 /*
  96  * Parameters for kmem_cache_reap
  97  */
  98 #define REAP_SCANLEN    10
  99 #define REAP_PERFECT    10
 100
 101 /* Shouldn't this be in a header file somewhere? */
 102 #define BYTES_PER_WORD          sizeof(void *)
 103
 104 /* Legal flag mask for kmem_cache_create(). */
 105 #if DEBUG
 106 # define CREATE_MASK    (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
 107                          SLAB_POISON | SLAB_HWCACHE_ALIGN | \
 108                          SLAB_NO_REAP | SLAB_CACHE_DMA)
 109 #else
 110 # define CREATE_MASK    (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
 111 #endif
 112
 113 /*
 114  * kmem_bufctl_t:
 115  *
 116  * Bufctl's are used for linking objs within a slab
 117  * linked offsets.
 118  *
 119  * This implementaion relies on "struct page" for locating the cache &
 120  * slab an object belongs to.
 121  * This allows the bufctl structure to be small (one int), but limits
 122  * the number of objects a slab (not a cache) can contain when off-slab
 123  * bufctls are used. The limit is the size of the largest general cache
 124  * that does not use off-slab slabs.
 125  * For 32bit archs with 4 kB pages, is this 56.
 126  * This is not serious, as it is only for large objects, when it is unwise
 127  * to have too many per slab.
 128  * Note: This limit can be raised by introducing a general cache whose size
 129  * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 130  */
 131
 132 #define BUFCTL_END 0xffffFFFF
 133 #define SLAB_LIMIT 0xffffFFFE
 134 typedef unsigned int kmem_bufctl_t;
 135
 136 /* Max number of objs-per-slab for caches which use off-slab slabs.
 137  * Needed to avoid a possible looping condition in kmem_cache_grow().
 138  */
 139 static unsigned long offslab_limit;
 140
 141 /*
 142  * slab_t
 143  *
 144  * Manages the objs in a slab. Placed either at the beginning of mem allocated
 145  * for a slab, or allocated from an general cache.
 146  * Slabs are chained into one ordered list: fully used, partial, then fully
 147  * free slabs.
 148  */
 149 typedef struct slab_s {
 150         struct list_head        list;
 151         unsigned long           colouroff;
 152         void                    *s_mem;         /* including colour offset */
 153         unsigned int            inuse;          /* num of objs active in slab */
 154         kmem_bufctl_t           free;
 155 } slab_t;
 156
 157 #define slab_bufctl(slabp) \
 158         ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
 159
 160 /*
 161  * cpucache_t
 162  *
 163  * Per cpu structures
 164  * The limit is stored in the per-cpu structure to reduce the data cache
 165  * footprint.
 166  */
 167 typedef struct cpucache_s {
 168         unsigned int avail;
 169         unsigned int limit;
 170 } cpucache_t;
 171
 172 #define cc_entry(cpucache) \
 173         ((void **)(((cpucache_t*)cpucache)+1))
 174 #define cc_data(cachep) \
 175         ((cachep)->cpudata[smp_processor_id()])
 176 /*
 177  * kmem_cache_t
 178  *
 179  * manages a cache.
 180  */
 181
 182 #define CACHE_NAMELEN   20      /* max name length for a slab cache */
 183
 184 struct kmem_cache_s {
 185 /* 1) each alloc & free */
 186         /* full, partial first, then free */
 187         struct list_head        slabs;
 188         struct list_head        *firstnotfull;
 189         unsigned int            objsize;
 190         unsigned int            flags;  /* constant flags */
 191         unsigned int            num;    /* # of objs per slab */
 192         spinlock_t              spinlock;
 193 #ifdef CONFIG_SMP
 194         unsigned int            batchcount;
 195 #endif
 196
 197 /* 2) slab additions /removals */
 198         /* order of pgs per slab (2^n) */
 199         unsigned int            gfporder;
 200
 201         /* force GFP flags, e.g. GFP_DMA */
 202         unsigned int            gfpflags;
 203
 204         size_t                  colour;         /* cache colouring range */
 205         unsigned int            colour_off;     /* colour offset */
 206         unsigned int            colour_next;    /* cache colouring */
 207         kmem_cache_t            *slabp_cache;
 208         unsigned int            growing;
 209         unsigned int            dflags;         /* dynamic flags */
 210
 211         /* constructor func */
 212         void (*ctor)(void *, kmem_cache_t *, unsigned long);
 213
 214         /* de-constructor func */
 215         void (*dtor)(void *, kmem_cache_t *, unsigned long);
 216
 217         unsigned long           failures;
 218
 219 /* 3) cache creation/removal */
 220         char                    name[CACHE_NAMELEN];
 221         struct list_head        next;
 222 #ifdef CONFIG_SMP
 223 /* 4) per-cpu data */
 224         cpucache_t              *cpudata[NR_CPUS];
 225 #endif
 226 #if STATS
 227         unsigned long           num_active;
 228         unsigned long           num_allocations;
 229         unsigned long           high_mark;
 230         unsigned long           grown;
 231         unsigned long           reaped;
 232         unsigned long           errors;
 233 #ifdef CONFIG_SMP
 234         atomic_t                allochit;
 235         atomic_t                allocmiss;
 236         atomic_t                freehit;
 237         atomic_t                freemiss;
 238 #endif
 239 #endif
 240 };
 241
 242 /* internal c_flags */
 243 #define CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
 244 #define CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
 245
 246 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
 247 #define DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
 248
 249 #define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
 250 #define OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
 251 #define GROWN(x)        ((x)->dlags & DFLGS_GROWN)
 252
 253 #if STATS
 254 #define STATS_INC_ACTIVE(x)     ((x)->num_active++)
 255 #define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 256 #define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 257 #define STATS_INC_GROWN(x)      ((x)->grown++)
 258 #define STATS_INC_REAPED(x)     ((x)->reaped++)
 259 #define STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
 260                                         (x)->high_mark = (x)->num_active; \
 261                                 } while (0)
 262 #define STATS_INC_ERR(x)        ((x)->errors++)
 263 #else
 264 #define STATS_INC_ACTIVE(x)     do { } while (0)
 265 #define STATS_DEC_ACTIVE(x)     do { } while (0)
 266 #define STATS_INC_ALLOCED(x)    do { } while (0)
 267 #define STATS_INC_GROWN(x)      do { } while (0)
 268 #define STATS_INC_REAPED(x)     do { } while (0)
 269 #define STATS_SET_HIGH(x)       do { } while (0)
 270 #define STATS_INC_ERR(x)        do { } while (0)
 271 #endif
 272
 273 #if STATS && defined(CONFIG_SMP)
 274 #define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
 275 #define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
 276 #define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
 277 #define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
 278 #else
 279 #define STATS_INC_ALLOCHIT(x)   do { } while (0)
 280 #define STATS_INC_ALLOCMISS(x)  do { } while (0)
 281 #define STATS_INC_FREEHIT(x)    do { } while (0)
 282 #define STATS_INC_FREEMISS(x)   do { } while (0)
 283 #endif
 284
 285 #if DEBUG
 286 /* Magic nums for obj red zoning.
 287  * Placed in the first word before and the first word after an obj.
 288  */
 289 #define RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
 290 #define RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
 291
 292 /* ...and for poisoning */
 293 #define POISON_BYTE     0x5a            /* byte value for poisoning */
 294 #define POISON_END      0xa5            /* end-byte of poisoning */
 295
 296 #endif
 297
 298 /* maximum size of an obj (in 2^order pages) */
 299 #define MAX_OBJ_ORDER   5       /* 32 pages */
 300
 301 /*
 302  * Do not go above this order unless 0 objects fit into the slab.
 303  */
 304 #define BREAK_GFP_ORDER_HI      2
 305 #define BREAK_GFP_ORDER_LO      1
 306 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
 307
 308 /*
 309  * Absolute limit for the gfp order
 310  */
 311 #define MAX_GFP_ORDER   5       /* 32 pages */
 312
 313
 314 /* Macros for storing/retrieving the cachep and or slab from the
 315  * global 'mem_map'. These are used to find the slab an obj belongs to.
 316  * With kfree(), these are used to find the cache which an obj belongs to.
 317  */
 318 #define SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
 319 #define GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
 320 #define SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
 321 #define GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
 322
 323 /* Size description struct for general caches. */
 324 typedef struct cache_sizes {
 325         size_t           cs_size;
 326         kmem_cache_t    *cs_cachep;
 327         kmem_cache_t    *cs_dmacachep;
 328 } cache_sizes_t;
 329
 330 static cache_sizes_t cache_sizes[] = {
 331 #if PAGE_SIZE == 4096
 332         {    32,        NULL, NULL},
 333 #endif
 334         {    64,        NULL, NULL},
 335         {   128,        NULL, NULL},
 336         {   256,        NULL, NULL},
 337         {   512,        NULL, NULL},
 338         {  1024,        NULL, NULL},
 339         {  2048,        NULL, NULL},
 340         {  4096,        NULL, NULL},
 341         {  8192,        NULL, NULL},
 342         { 16384,        NULL, NULL},
 343         { 32768,        NULL, NULL},
 344         { 65536,        NULL, NULL},
 345         {131072,        NULL, NULL},
 346         {     0,        NULL, NULL}
 347 };
 348
 349 /* internal cache of cache description objs */
 350 static kmem_cache_t cache_cache = {
 351         slabs:          LIST_HEAD_INIT(cache_cache.slabs),
 352         firstnotfull:   &cache_cache.slabs,
 353         objsize:        sizeof(kmem_cache_t),
 354         flags:          SLAB_NO_REAP,
 355         spinlock:       SPIN_LOCK_UNLOCKED,
 356         colour_off:     L1_CACHE_BYTES,
 357         name:           "kmem_cache",
 358         next:           LIST_HEAD_INIT(cache_cache.next)
 359 };
 360
 361 /* Guard access to the cache-chain. */
 362 static struct semaphore cache_chain_sem;
 363
 364 /* Place maintainer for reaping. */
 365 static kmem_cache_t *clock_searchp = &cache_cache;
 366
 367 #define cache_chain (cache_cache.next)
 368
 369 #ifdef CONFIG_SMP
 370 /*
 371  * chicken and egg problem: delay the per-cpu array allocation
 372  * until the general caches are up.
 373  */
 374 static int g_cpucache_up;
 375
 376 static void drain_cache (void *__cachep);
 377 static void enable_cpucache (kmem_cache_t *cachep);
 378 static void enable_all_cpucaches (void);
 379 #endif
 380
 381 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
 382 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
 383                  int flags, size_t *left_over, unsigned int *num)
 384 {
 385         int i;
 386         size_t wastage = PAGE_SIZE<<gfporder;
 387         size_t extra = 0;
 388         size_t base = 0;
 389
 390         if (!(flags & CFLGS_OFF_SLAB)) {
 391                 base = sizeof(slab_t);
 392                 extra = sizeof(kmem_bufctl_t);
 393         }
 394         i = 0;
 395         while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
 396                 i++;
 397         if (i > 0)
 398                 i--;
 399
 400         if (i > SLAB_LIMIT)
 401                 i = SLAB_LIMIT;
 402
 403         *num = i;
 404         wastage -= i*size;
 405         wastage -= L1_CACHE_ALIGN(base+i*extra);
 406         *left_over = wastage;
 407 }
 408
 409 /* Initialisation - setup the `cache' cache. */
 410 void __init kmem_cache_init(void)
 411 {
 412         size_t left_over;
 413
 414         init_MUTEX(&cache_chain_sem);
 415         list_add(&cache_cache.next,&cache_chain);
 416
 417         kmem_cache_estimate(0, cache_cache.objsize, 0,
 418                         &left_over, &cache_cache.num);
 419         if (!cache_cache.num)
 420                 BUG();
 421
 422         cache_cache.colour = left_over/cache_cache.colour_off;
 423         cache_cache.colour_next = 0;
 424 }
 425
 426
 427 /* Initialisation - setup remaining internal and general caches.
 428  * Called after the gfp() functions have been enabled, and before smp_init().
 429  */
 430 void __init kmem_cache_sizes_init(void)
 431 {
 432         cache_sizes_t *sizes = cache_sizes;
 433         char name[20];
 434         /*
 435          * Fragmentation resistance on low memory - only use bigger
 436          * page orders on machines with more than 32MB of memory.
 437          */
 438         if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 439                 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 440         do {
 441                 /* For performance, all the general caches are L1 aligned.
 442                  * This should be particularly beneficial on SMP boxes, as it
 443                  * eliminates "false sharing".
 444                  * Note for systems short on memory removing the alignment will
 445                  * allow tighter packing of the smaller caches. */
 446                 sprintf(name,"size-%ld", (unsigned long) sizes->cs_size);
 447                 if (!(sizes->cs_cachep =
 448                         kmem_cache_create(name, sizes->cs_size,
 449                                         0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
 450                         BUG();
 451                 }
 452
 453                 /* Inc off-slab bufctl limit until the ceiling is hit. */
 454                 if (!(OFF_SLAB(sizes->cs_cachep))) {
 455                         offslab_limit = sizes->cs_size-sizeof(slab_t);
 456                         offslab_limit /= 2;
 457                 }
 458                 sprintf(name, "size-%ld(DMA)", (unsigned long) sizes->cs_size);
 459                 sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
 460                               SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
 461                 if (!sizes->cs_dmacachep)
 462                         BUG();
 463                 sizes++;
 464         } while (sizes->cs_size);
 465 }
 466
 467 void __init kmem_cpucache_init(void)
 468 {
 469 #ifdef CONFIG_SMP
 470         g_cpucache_up = 1;
 471         enable_all_cpucaches();
 472 #endif
 473 }
 474
 475 /* Interface to system's page allocator. No need to hold the cache-lock.
 476  */
 477 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
 478 {
 479         void    *addr;
 480
 481         /*
 482          * If we requested dmaable memory, we will get it. Even if we
 483          * did not request dmaable memory, we might get it, but that
 484          * would be relatively rare and ignorable.
 485          */
 486         flags |= cachep->gfpflags;
 487         addr = (void*) __get_free_pages(flags, cachep->gfporder);
 488         /* Assume that now we have the pages no one else can legally
 489          * messes with the 'struct page's.
 490          * However vm_scan() might try to test the structure to see if
 491          * it is a named-page or buffer-page.  The members it tests are
 492          * of no interest here.....
 493          */
 494         return addr;
 495 }
 496
 497 /* Interface to system's page release. */
 498 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
 499 {
 500         unsigned long i = (1<<cachep->gfporder);
 501         struct page *page = mem_map + MAP_NR(addr);
 502
 503         /* free_pages() does not clear the type bit - we do that.
 504          * The pages have been unlinked from their cache-slab,
 505          * but their 'struct page's might be accessed in
 506          * vm_scan(). Shouldn't be a worry.
 507          */
 508         while (i--) {
 509                 PageClearSlab(page);
 510                 page++;
 511         }
 512         free_pages((unsigned long)addr, cachep->gfporder);
 513 }
 514
 515 #if DEBUG
 516 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
 517 {
 518         int size = cachep->objsize;
 519         if (cachep->flags & SLAB_RED_ZONE) {
 520                 addr += BYTES_PER_WORD;
 521                 size -= 2*BYTES_PER_WORD;
 522         }
 523         memset(addr, POISON_BYTE, size);
 524         *(unsigned char *)(addr+size-1) = POISON_END;
 525 }
 526
 527 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
 528 {
 529         int size = cachep->objsize;
 530         void *end;
 531         if (cachep->flags & SLAB_RED_ZONE) {
 532                 addr += BYTES_PER_WORD;
 533                 size -= 2*BYTES_PER_WORD;
 534         }
 535         end = memchr(addr, POISON_END, size);
 536         if (end != (addr+size-1))
 537                 return 1;
 538         return 0;
 539 }
 540 #endif
 541
 542 /* Destroy all the objs in a slab, and release the mem back to the system.
 543  * Before calling the slab must have been unlinked from the cache.
 544  * The cache-lock is not held/needed.
 545  */
 546 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
 547 {
 548         if (cachep->dtor
 549 #if DEBUG
 550                 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
 551 #endif
 552         ) {
 553                 int i;
 554                 for (i = 0; i < cachep->num; i++) {
 555                         void* objp = slabp->s_mem+cachep->objsize*i;
 556 #if DEBUG
 557                         if (cachep->flags & SLAB_RED_ZONE) {
 558                                 if (*((unsigned long*)(objp)) != RED_MAGIC1)
 559                                         BUG();
 560                                 if (*((unsigned long*)(objp + cachep->objsize
 561                                                 -BYTES_PER_WORD)) != RED_MAGIC1)
 562                                         BUG();
 563                                 objp += BYTES_PER_WORD;
 564                         }
 565 #endif
 566                         if (cachep->dtor)
 567                                 (cachep->dtor)(objp, cachep, 0);
 568 #if DEBUG
 569                         if (cachep->flags & SLAB_RED_ZONE) {
 570                                 objp -= BYTES_PER_WORD;
 571                         }
 572                         if ((cachep->flags & SLAB_POISON)  &&
 573                                 kmem_check_poison_obj(cachep, objp))
 574                                 BUG();
 575 #endif
 576                 }
 577         }
 578
 579         kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
 580         if (OFF_SLAB(cachep))
 581                 kmem_cache_free(cachep->slabp_cache, slabp);
 582 }
 583
 584
 585 /**
 586  * kmem_cache_create - Create a cache.
 587  * @name: A string which is used in /proc/slabinfo to identify this cache.
 588  * @size: The size of objects to be created in this cache.
 589  * @offset: The offset to use within the page.
 590  * @flags: SLAB flags
 591  * @ctor: A constructor for the objects.
 592  * @dtor: A destructor for the objects.
 593  *
 594  * Returns a ptr to the cache on success, NULL on failure.
 595  * Cannot be called within a int, but can be interrupted.
 596  * The @ctor is run when new pages are allocated by the cache
 597  * and the @dtor is run before the pages are handed back.
 598  * The flags are
 599  *
 600  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 601  * to catch references to uninitialised memory.
 602  *
 603  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 604  * for buffer overruns.
 605  *
 606  * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
 607  * memory pressure.
 608  *
 609  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 610  * cacheline.  This can be beneficial if you're counting cycles as closely
 611  * as davem.
 612  */
 613 kmem_cache_t *
 614 kmem_cache_create (const char *name, size_t size, size_t offset,
 615         unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 616         void (*dtor)(void*, kmem_cache_t *, unsigned long))
 617 {
 618         const char *func_nm = KERN_ERR "kmem_create: ";
 619         size_t left_over, align, slab_size;
 620         kmem_cache_t *cachep = NULL;
 621
 622         /*
 623          * Sanity checks... these are all serious usage bugs.
 624          */
 625         if ((!name) ||
 626                 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
 627                 in_interrupt() ||
 628                 (size < BYTES_PER_WORD) ||
 629                 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
 630                 (dtor && !ctor) ||
 631                 (offset < 0 || offset > size))
 632                         BUG();
 633
 634 #if DEBUG
 635         if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 636                 /* No constructor, but inital state check requested */
 637                 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 638                 flags &= ~SLAB_DEBUG_INITIAL;
 639         }
 640
 641         if ((flags & SLAB_POISON) && ctor) {
 642                 /* request for poisoning, but we can't do that with a constructor */
 643                 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 644                 flags &= ~SLAB_POISON;
 645         }
 646 #if FORCED_DEBUG
 647         if (size < (PAGE_SIZE>>3))
 648                 /*
 649                  * do not red zone large object, causes severe
 650                  * fragmentation.
 651                  */
 652                 flags |= SLAB_RED_ZONE;
 653         if (!ctor)
 654                 flags |= SLAB_POISON;
 655 #endif
 656 #endif
 657
 658         /*
 659          * Always checks flags, a caller might be expecting debug
 660          * support which isn't available.
 661          */
 662         if (flags & ~CREATE_MASK)
 663                 BUG();
 664
 665         /* Get cache's description obj. */
 666         cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 667         if (!cachep)
 668                 goto opps;
 669         memset(cachep, 0, sizeof(kmem_cache_t));
 670
 671         /* Check that size is in terms of words.  This is needed to avoid
 672          * unaligned accesses for some archs when redzoning is used, and makes
 673          * sure any on-slab bufctl's are also correctly aligned.
 674          */
 675         if (size & (BYTES_PER_WORD-1)) {
 676                 size += (BYTES_PER_WORD-1);
 677                 size &= ~(BYTES_PER_WORD-1);
 678                 printk("%sForcing size word alignment - %s\n", func_nm, name);
 679         }
 680
 681 #if DEBUG
 682         if (flags & SLAB_RED_ZONE) {
 683                 /*
 684                  * There is no point trying to honour cache alignment
 685                  * when redzoning.
 686                  */
 687                 flags &= ~SLAB_HWCACHE_ALIGN;
 688                 size += 2*BYTES_PER_WORD;       /* words for redzone */
 689         }
 690 #endif
 691         align = BYTES_PER_WORD;
 692         if (flags & SLAB_HWCACHE_ALIGN)
 693                 align = L1_CACHE_BYTES;
 694
 695         /* Determine if the slab management is 'on' or 'off' slab. */
 696         if (size >= (PAGE_SIZE>>3))
 697                 /*
 698                  * Size is large, assume best to place the slab management obj
 699                  * off-slab (should allow better packing of objs).
 700                  */
 701                 flags |= CFLGS_OFF_SLAB;
 702
 703         if (flags & SLAB_HWCACHE_ALIGN) {
 704                 /* Need to adjust size so that objs are cache aligned. */
 705                 /* Small obj size, can get at least two per cache line. */
 706                 /* FIXME: only power of 2 supported, was better */
 707                 while (size < align/2)
 708                         align /= 2;
 709                 size = (size+align-1)&(~(align-1));
 710         }
 711
 712         /* Cal size (in pages) of slabs, and the num of objs per slab.
 713          * This could be made much more intelligent.  For now, try to avoid
 714          * using high page-orders for slabs.  When the gfp() funcs are more
 715          * friendly towards high-order requests, this should be changed.
 716          */
 717         do {
 718                 unsigned int break_flag = 0;
 719 cal_wastage:
 720                 kmem_cache_estimate(cachep->gfporder, size, flags,
 721                                                 &left_over, &cachep->num);
 722                 if (break_flag)
 723                         break;
 724                 if (cachep->gfporder >= MAX_GFP_ORDER)
 725                         break;
 726                 if (!cachep->num)
 727                         goto next;
 728                 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
 729                         /* Oops, this num of objs will cause problems. */
 730                         cachep->gfporder--;
 731                         break_flag++;
 732                         goto cal_wastage;
 733                 }
 734
 735                 /*
 736                  * Large num of objs is good, but v. large slabs are currently
 737                  * bad for the gfp()s.
 738                  */
 739                 if (cachep->gfporder >= slab_break_gfp_order)
 740                         break;
 741
 742                 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
 743                         break;  /* Acceptable internal fragmentation. */
 744 next:
 745                 cachep->gfporder++;
 746         } while (1);
 747
 748         if (!cachep->num) {
 749                 printk("kmem_cache_create: couldn't create cache %s.\n", name);
 750                 kmem_cache_free(&cache_cache, cachep);
 751                 cachep = NULL;
 752                 goto opps;
 753         }
 754         slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
 755
 756         /*
 757          * If the slab has been placed off-slab, and we have enough space then
 758          * move it on-slab. This is at the expense of any extra colouring.
 759          */
 760         if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
 761                 flags &= ~CFLGS_OFF_SLAB;
 762                 left_over -= slab_size;
 763         }
 764
 765         /* Offset must be a multiple of the alignment. */
 766         offset += (align-1);
 767         offset &= ~(align-1);
 768         if (!offset)
 769                 offset = L1_CACHE_BYTES;
 770         cachep->colour_off = offset;
 771         cachep->colour = left_over/offset;
 772
 773         /* init remaining fields */
 774         if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
 775                 flags |= CFLGS_OPTIMIZE;
 776
 777         cachep->flags = flags;
 778         cachep->gfpflags = 0;
 779         if (flags & SLAB_CACHE_DMA)
 780                 cachep->gfpflags |= GFP_DMA;
 781         spin_lock_init(&cachep->spinlock);
 782         cachep->objsize = size;
 783         INIT_LIST_HEAD(&cachep->slabs);
 784         cachep->firstnotfull = &cachep->slabs;
 785
 786         if (flags & CFLGS_OFF_SLAB)
 787                 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
 788         cachep->ctor = ctor;
 789         cachep->dtor = dtor;
 790         /* Copy name over so we don't have problems with unloaded modules */
 791         strcpy(cachep->name, name);
 792
 793 #ifdef CONFIG_SMP
 794         if (g_cpucache_up)
 795                 enable_cpucache(cachep);
 796 #endif
 797         /* Need the semaphore to access the chain. */
 798         down(&cache_chain_sem);
 799         {
 800                 struct list_head *p;
 801
 802                 list_for_each(p, &cache_chain) {
 803                         kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
 804
 805                         /* The name field is constant - no lock needed. */
 806                         if (!strcmp(pc->name, name))
 807                                 BUG();
 808                 }
 809         }
 810
 811         /* There is no reason to lock our new cache before we
 812          * link it in - no one knows about it yet...
 813          */
 814         list_add(&cachep->next, &cache_chain);
 815         up(&cache_chain_sem);
 816 opps:
 817         return cachep;
 818 }
 819
 820 /*
 821  * This check if the kmem_cache_t pointer is chained in the cache_cache
 822  * list. -arca
 823  */
 824 static int is_chained_kmem_cache(kmem_cache_t * cachep)
 825 {
 826         struct list_head *p;
 827         int ret = 0;
 828
 829         /* Find the cache in the chain of caches. */
 830         down(&cache_chain_sem);
 831         list_for_each(p, &cache_chain) {
 832                 if (p == &cachep->next) {
 833                         ret = 1;
 834                         break;
 835                 }
 836         }
 837         up(&cache_chain_sem);
 838
 839         return ret;
 840 }
 841
 842 static int __kmem_cache_shrink(kmem_cache_t *cachep)
 843 {
 844         slab_t *slabp;
 845         int ret;
 846
 847 #ifdef CONFIG_SMP
 848         smp_call_function(drain_cache, cachep, 1, 1);
 849         local_irq_disable();
 850         drain_cache(cachep);
 851         local_irq_enable();
 852 #endif
 853         spin_lock_irq(&cachep->spinlock);
 854
 855         /* If the cache is growing, stop shrinking. */
 856         while (!cachep->growing) {
 857                 struct list_head *p;
 858
 859                 p = cachep->slabs.prev;
 860                 if (p == &cachep->slabs)
 861                         break;
 862
 863                 slabp = list_entry(cachep->slabs.prev, slab_t, list);
 864                 if (slabp->inuse)
 865                         break;
 866
 867                 list_del(&slabp->list);
 868                 if (cachep->firstnotfull == &slabp->list)
 869                         cachep->firstnotfull = &cachep->slabs;
 870
 871                 spin_unlock_irq(&cachep->spinlock);
 872                 kmem_slab_destroy(cachep, slabp);
 873                 spin_lock_irq(&cachep->spinlock);
 874         }
 875         ret = !list_empty(&cachep->slabs);
 876         spin_unlock_irq(&cachep->spinlock);
 877         return ret;
 878 }
 879
 880 /**
 881  * kmem_cache_shrink - Shrink a cache.
 882  * @cachep: The cache to shrink.
 883  *
 884  * Releases as many slabs as possible for a cache.
 885  * To help debugging, a zero exit status indicates all slabs were released.
 886  */
 887 int kmem_cache_shrink(kmem_cache_t *cachep)
 888 {
 889         if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
 890                 BUG();
 891
 892         return __kmem_cache_shrink(cachep);
 893 }
 894
 895 /**
 896  * kmem_cache_destroy - delete a cache
 897  * @cachep: the cache to destroy
 898  *
 899  * Remove a kmem_cache_t object from the slab cache.
 900  * Returns 0 on success.
 901  *
 902  * It is expected this function will be called by a module when it is
 903  * unloaded.  This will remove the cache completely, and avoid a duplicate
 904  * cache being allocated each time a module is loaded and unloaded, if the
 905  * module doesn't have persistent in-kernel storage across loads and unloads.
 906  *
 907  * The caller must guarantee that noone will allocate memory from the cache
 908  * during the kmem_cache_destroy().
 909  */
 910 int kmem_cache_destroy (kmem_cache_t * cachep)
 911 {
 912         if (!cachep || in_interrupt() || cachep->growing)
 913                 BUG();
 914
 915         /* Find the cache in the chain of caches. */
 916         down(&cache_chain_sem);
 917         /* the chain is never empty, cache_cache is never destroyed */
 918         if (clock_searchp == cachep)
 919                 clock_searchp = list_entry(cachep->next.next,
 920                                                 kmem_cache_t, next);
 921         list_del(&cachep->next);
 922         up(&cache_chain_sem);
 923
 924         if (__kmem_cache_shrink(cachep)) {
 925                 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
 926                        cachep);
 927                 down(&cache_chain_sem);
 928                 list_add(&cachep->next,&cache_chain);
 929                 up(&cache_chain_sem);
 930                 return 1;
 931         }
 932 #ifdef CONFIG_SMP
 933         {
 934                 int i;
 935                 for (i = 0; i < NR_CPUS; i++)
 936                         kfree(cachep->cpudata[i]);
 937         }
 938 #endif
 939         kmem_cache_free(&cache_cache, cachep);
 940
 941         return 0;
 942 }
 943
 944 /* Get the memory for a slab management obj. */
 945 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
 946                         void *objp, int colour_off, int local_flags)
 947 {
 948         slab_t *slabp;
 949
 950         if (OFF_SLAB(cachep)) {
 951                 /* Slab management obj is off-slab. */
 952                 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
 953                 if (!slabp)
 954                         return NULL;
 955         } else {
 956                 /* FIXME: change to
 957                         slabp = objp
 958                  * if you enable OPTIMIZE
 959                  */
 960                 slabp = objp+colour_off;
 961                 colour_off += L1_CACHE_ALIGN(cachep->num *
 962                                 sizeof(kmem_bufctl_t) + sizeof(slab_t));
 963         }
 964         slabp->inuse = 0;
 965         slabp->colouroff = colour_off;
 966         slabp->s_mem = objp+colour_off;
 967
 968         return slabp;
 969 }
 970
 971 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
 972                         slab_t * slabp, unsigned long ctor_flags)
 973 {
 974         int i;
 975
 976         for (i = 0; i < cachep->num; i++) {
 977                 void* objp = slabp->s_mem+cachep->objsize*i;
 978 #if DEBUG
 979                 if (cachep->flags & SLAB_RED_ZONE) {
 980                         *((unsigned long*)(objp)) = RED_MAGIC1;
 981                         *((unsigned long*)(objp + cachep->objsize -
 982                                         BYTES_PER_WORD)) = RED_MAGIC1;
 983                         objp += BYTES_PER_WORD;
 984                 }
 985 #endif
 986
 987                 /*
 988                  * Constructors are not allowed to allocate memory from
 989                  * the same cache which they are a constructor for.
 990                  * Otherwise, deadlock. They must also be threaded.
 991                  */
 992                 if (cachep->ctor)
 993                         cachep->ctor(objp, cachep, ctor_flags);
 994 #if DEBUG
 995                 if (cachep->flags & SLAB_RED_ZONE)
 996                         objp -= BYTES_PER_WORD;
 997                 if (cachep->flags & SLAB_POISON)
 998                         /* need to poison the objs */
 999                         kmem_poison_obj(cachep, objp);
1000                 if (cachep->flags & SLAB_RED_ZONE) {
1001                         if (*((unsigned long*)(objp)) != RED_MAGIC1)
1002                                 BUG();
1003                         if (*((unsigned long*)(objp + cachep->objsize -
1004                                         BYTES_PER_WORD)) != RED_MAGIC1)
1005                                 BUG();
1006                 }
1007 #endif
1008                 slab_bufctl(slabp)[i] = i+1;
1009         }
1010         slab_bufctl(slabp)[i-1] = BUFCTL_END;
1011         slabp->free = 0;
1012 }
1013
1014 /*
1015  * Grow (by 1) the number of slabs within a cache.  This is called by
1016  * kmem_cache_alloc() when there are no active objs left in a cache.
1017  */
1018 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1019 {
1020         slab_t  *slabp;
1021         struct page     *page;
1022         void            *objp;
1023         size_t           offset;
1024         unsigned int     i, local_flags;
1025         unsigned long    ctor_flags;
1026         unsigned long    save_flags;
1027
1028         /* Be lazy and only check for valid flags here,
1029          * keeping it out of the critical path in kmem_cache_alloc().
1030          */
1031         if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1032                 BUG();
1033         if (flags & SLAB_NO_GROW)
1034                 return 0;
1035
1036         /*
1037          * The test for missing atomic flag is performed here, rather than
1038          * the more obvious place, simply to reduce the critical path length
1039          * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1040          * will eventually be caught here (where it matters).
1041          */
1042         if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1043                 BUG();
1044
1045         ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1046         local_flags = (flags & SLAB_LEVEL_MASK);
1047         if (local_flags == SLAB_ATOMIC)
1048                 /*
1049                  * Not allowed to sleep.  Need to tell a constructor about
1050                  * this - it might need to know...
1051                  */
1052                 ctor_flags |= SLAB_CTOR_ATOMIC;
1053
1054         /* About to mess with non-constant members - lock. */
1055         spin_lock_irqsave(&cachep->spinlock, save_flags);
1056
1057         /* Get colour for the slab, and cal the next value. */
1058         offset = cachep->colour_next;
1059         cachep->colour_next++;
1060         if (cachep->colour_next >= cachep->colour)
1061                 cachep->colour_next = 0;
1062         offset *= cachep->colour_off;
1063         cachep->dflags |= DFLGS_GROWN;
1064
1065         cachep->growing++;
1066         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1067
1068         /* A series of memory allocations for a new slab.
1069          * Neither the cache-chain semaphore, or cache-lock, are
1070          * held, but the incrementing c_growing prevents this
1071          * cache from being reaped or shrunk.
1072          * Note: The cache could be selected in for reaping in
1073          * kmem_cache_reap(), but when the final test is made the
1074          * growing value will be seen.
1075          */
1076
1077         /* Get mem for the objs. */
1078         if (!(objp = kmem_getpages(cachep, flags)))
1079                 goto failed;
1080
1081         /* Get slab management. */
1082         if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1083                 goto opps1;
1084
1085         /* Nasty!!!!!! I hope this is OK. */
1086         i = 1 << cachep->gfporder;
1087         page = mem_map + MAP_NR(objp);
1088         do {
1089                 SET_PAGE_CACHE(page, cachep);
1090                 SET_PAGE_SLAB(page, slabp);
1091                 PageSetSlab(page);
1092                 page++;
1093         } while (--i);
1094
1095         kmem_cache_init_objs(cachep, slabp, ctor_flags);
1096
1097         spin_lock_irqsave(&cachep->spinlock, save_flags);
1098         cachep->growing--;
1099
1100         /* Make slab active. */
1101         list_add_tail(&slabp->list,&cachep->slabs);
1102         if (cachep->firstnotfull == &cachep->slabs)
1103                 cachep->firstnotfull = &slabp->list;
1104         STATS_INC_GROWN(cachep);
1105         cachep->failures = 0;
1106
1107         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1108         return 1;
1109 opps1:
1110         kmem_freepages(cachep, objp);
1111 failed:
1112         spin_lock_irqsave(&cachep->spinlock, save_flags);
1113         cachep->growing--;
1114         spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1115         return 0;
1116 }
1117
1118 /*
1119  * Perform extra freeing checks:
1120  * - detect double free
1121  * - detect bad pointers.
1122  * Called with the cache-lock held.
1123  */
1124
1125 #if DEBUG
1126 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1127                         slab_t *slabp, void * objp)
1128 {
1129         int i;
1130         unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1131
1132         if (objnr >= cachep->num)
1133                 BUG();
1134         if (objp != slabp->s_mem + objnr*cachep->objsize)
1135                 BUG();
1136
1137         /* Check slab's freelist to see if this obj is there. */
1138         for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1139                 if (i == objnr)
1140                         BUG();
1141         }
1142         return 0;
1143 }
1144 #endif
1145
1146 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1147 {
1148 #if DEBUG
1149         if (flags & SLAB_DMA) {
1150                 if (!(cachep->gfpflags & GFP_DMA))
1151                         BUG();
1152         } else {
1153                 if (cachep->gfpflags & GFP_DMA)
1154                         BUG();
1155         }
1156 #endif
1157 }
1158
1159 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1160                                                          slab_t *slabp)
1161 {
1162         void *objp;
1163
1164         STATS_INC_ALLOCED(cachep);
1165         STATS_INC_ACTIVE(cachep);
1166         STATS_SET_HIGH(cachep);
1167
1168         /* get obj pointer */
1169         slabp->inuse++;
1170         objp = slabp->s_mem + slabp->free*cachep->objsize;
1171         slabp->free=slab_bufctl(slabp)[slabp->free];
1172
1173         if (slabp->free == BUFCTL_END)
1174                 /* slab now full: move to next slab for next alloc */
1175                 cachep->firstnotfull = slabp->list.next;
1176 #if DEBUG
1177         if (cachep->flags & SLAB_POISON)
1178                 if (kmem_check_poison_obj(cachep, objp))
1179                         BUG();
1180         if (cachep->flags & SLAB_RED_ZONE) {
1181                 /* Set alloc red-zone, and check old one. */
1182                 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1183                                                          RED_MAGIC1)
1184                         BUG();
1185                 if (xchg((unsigned long *)(objp+cachep->objsize -
1186                           BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1187                         BUG();
1188                 objp += BYTES_PER_WORD;
1189         }
1190 #endif
1191         return objp;
1192 }
1193
1194 /*
1195  * Returns a ptr to an obj in the given cache.
1196  * caller must guarantee synchronization
1197  * #define for the goto optimization 8-)
1198  */
1199 #define kmem_cache_alloc_one(cachep)                            \
1200 ({                                                              \
1201         slab_t  *slabp;                                 \
1202                                                                 \
1203         /* Get slab alloc is to come from. */                   \
1204         {                                                       \
1205                 struct list_head* p = cachep->firstnotfull;     \
1206                 if (p == &cachep->slabs)                        \
1207                         goto alloc_new_slab;                    \
1208                 slabp = list_entry(p,slab_t, list);     \
1209         }                                                       \
1210         kmem_cache_alloc_one_tail(cachep, slabp);               \
1211 })
1212
1213 #ifdef CONFIG_SMP
1214 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1215 {
1216         int batchcount = cachep->batchcount;
1217         cpucache_t* cc = cc_data(cachep);
1218
1219         spin_lock(&cachep->spinlock);
1220         while (batchcount--) {
1221                 /* Get slab alloc is to come from. */
1222                 struct list_head *p = cachep->firstnotfull;
1223                 slab_t *slabp;
1224
1225                 if (p == &cachep->slabs)
1226                         break;
1227                 slabp = list_entry(p,slab_t, list);
1228                 cc_entry(cc)[cc->avail++] =
1229                                 kmem_cache_alloc_one_tail(cachep, slabp);
1230         }
1231         spin_unlock(&cachep->spinlock);
1232
1233         if (cc->avail)
1234                 return cc_entry(cc)[--cc->avail];
1235         return NULL;
1236 }
1237 #endif
1238
1239 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1240 {
1241         unsigned long save_flags;
1242         void* objp;
1243
1244         kmem_cache_alloc_head(cachep, flags);
1245 try_again:
1246         local_irq_save(save_flags);
1247 #ifdef CONFIG_SMP
1248         {
1249                 cpucache_t *cc = cc_data(cachep);
1250
1251                 if (cc) {
1252                         if (cc->avail) {
1253                                 STATS_INC_ALLOCHIT(cachep);
1254                                 objp = cc_entry(cc)[--cc->avail];
1255                         } else {
1256                                 STATS_INC_ALLOCMISS(cachep);
1257                                 objp = kmem_cache_alloc_batch(cachep,flags);
1258                                 if (!objp)
1259                                         goto alloc_new_slab_nolock;
1260                         }
1261                 } else {
1262                         spin_lock(&cachep->spinlock);
1263                         objp = kmem_cache_alloc_one(cachep);
1264                         spin_unlock(&cachep->spinlock);
1265                 }
1266         }
1267 #else
1268         objp = kmem_cache_alloc_one(cachep);
1269 #endif
1270         local_irq_restore(save_flags);
1271         return objp;
1272 alloc_new_slab:
1273 #ifdef CONFIG_SMP
1274         spin_unlock(&cachep->spinlock);
1275 alloc_new_slab_nolock:
1276 #endif
1277         local_irq_restore(save_flags);
1278         if (kmem_cache_grow(cachep, flags))
1279                 /* Someone may have stolen our objs.  Doesn't matter, we'll
1280                  * just come back here again.
1281                  */
1282                 goto try_again;
1283         return NULL;
1284 }
1285
1286 /*
1287  * Release an obj back to its cache. If the obj has a constructed
1288  * state, it should be in this state _before_ it is released.
1289  * - caller is responsible for the synchronization
1290  */
1291
1292 #if DEBUG
1293 # define CHECK_NR(nr)                                           \
1294         do {                                                    \
1295                 if (nr >= max_mapnr) {                          \
1296                         printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1297                                 (unsigned long)objp);           \
1298                         BUG();                                  \
1299                 } \
1300         } while (0)
1301 # define CHECK_PAGE(page)                                       \
1302         do {                                                    \
1303                 if (!PageSlab(page)) {                          \
1304                         printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1305                                 (unsigned long)objp);           \
1306                         BUG();                                  \
1307                 }                                               \
1308         } while (0)
1309
1310 #else
1311 # define CHECK_NR(nr)   do { } while (0)
1312 # define CHECK_PAGE(nr) do { } while (0)
1313 #endif
1314
1315 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1316 {
1317         slab_t* slabp;
1318
1319         CHECK_NR(MAP_NR(objp));
1320         CHECK_PAGE(mem_map + MAP_NR(objp));
1321         /* reduces memory footprint
1322          *
1323         if (OPTIMIZE(cachep))
1324                 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1325          else
1326          */
1327         slabp = GET_PAGE_SLAB(mem_map + MAP_NR(objp));
1328
1329 #if DEBUG
1330         if (cachep->flags & SLAB_DEBUG_INITIAL)
1331                 /* Need to call the slab's constructor so the
1332                  * caller can perform a verify of its state (debugging).
1333                  * Called without the cache-lock held.
1334                  */
1335                 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1336
1337         if (cachep->flags & SLAB_RED_ZONE) {
1338                 objp -= BYTES_PER_WORD;
1339                 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1340                         /* Either write before start, or a double free. */
1341                         BUG();
1342                 if (xchg((unsigned long *)(objp+cachep->objsize -
1343                                 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1344                         /* Either write past end, or a double free. */
1345                         BUG();
1346         }
1347         if (cachep->flags & SLAB_POISON)
1348                 kmem_poison_obj(cachep, objp);
1349         if (kmem_extra_free_checks(cachep, slabp, objp))
1350                 return;
1351 #endif
1352         {
1353                 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1354
1355                 slab_bufctl(slabp)[objnr] = slabp->free;
1356                 slabp->free = objnr;
1357         }
1358         STATS_DEC_ACTIVE(cachep);
1359
1360         /* fixup slab chain */
1361         if (slabp->inuse-- == cachep->num)
1362                 goto moveslab_partial;
1363         if (!slabp->inuse)
1364                 goto moveslab_free;
1365         return;
1366
1367 moveslab_partial:
1368         /* was full.
1369          * Even if the page is now empty, we can set c_firstnotfull to
1370          * slabp: there are no partial slabs in this case
1371          */
1372         {
1373                 struct list_head *t = cachep->firstnotfull;
1374
1375                 cachep->firstnotfull = &slabp->list;
1376                 if (slabp->list.next == t)
1377                         return;
1378                 list_del(&slabp->list);
1379                 list_add_tail(&slabp->list, t);
1380                 return;
1381         }
1382 moveslab_free:
1383         /*
1384          * was partial, now empty.
1385          * c_firstnotfull might point to slabp
1386          * FIXME: optimize
1387          */
1388         {
1389                 struct list_head *t = cachep->firstnotfull->prev;
1390
1391                 list_del(&slabp->list);
1392                 list_add_tail(&slabp->list, &cachep->slabs);
1393                 if (cachep->firstnotfull == &slabp->list)
1394                         cachep->firstnotfull = t->next;
1395                 return;
1396         }
1397 }
1398
1399 #ifdef CONFIG_SMP
1400 static inline void __free_block (kmem_cache_t* cachep,
1401                                                         void** objpp, int len)
1402 {
1403         for ( ; len > 0; len--, objpp++)
1404                 kmem_cache_free_one(cachep, *objpp);
1405 }
1406
1407 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1408 {
1409         spin_lock(&cachep->spinlock);
1410         __free_block(cachep, objpp, len);
1411         spin_unlock(&cachep->spinlock);
1412 }
1413 #endif
1414
1415 /*
1416  * __kmem_cache_free
1417  * called with disabled ints
1418  */
1419 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1420 {
1421 #ifdef CONFIG_SMP
1422         cpucache_t *cc = cc_data(cachep);
1423
1424         CHECK_NR(MAP_NR(objp));
1425         CHECK_PAGE(mem_map + MAP_NR(objp));
1426         if (cc) {
1427                 int batchcount;
1428                 if (cc->avail < cc->limit) {
1429                         STATS_INC_FREEHIT(cachep);
1430                         cc_entry(cc)[cc->avail++] = objp;
1431                         return;
1432                 }
1433                 STATS_INC_FREEMISS(cachep);
1434                 batchcount = cachep->batchcount;
1435                 cc->avail -= batchcount;
1436                 free_block(cachep,
1437                                         &cc_entry(cc)[cc->avail],batchcount);
1438                 cc_entry(cc)[cc->avail++] = objp;
1439                 return;
1440         } else {
1441                 free_block(cachep, &objp, 1);
1442         }
1443 #else
1444         kmem_cache_free_one(cachep, objp);
1445 #endif
1446 }
1447
1448 /**
1449  * kmem_cache_alloc - Allocate an object
1450  * @cachep: The cache to allocate from.
1451  * @flags: See kmalloc().
1452  *
1453  * Allocate an object from this cache.  The flags are only relevant
1454  * if the cache has no available objects.
1455  */
1456 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1457 {
1458         return __kmem_cache_alloc(cachep, flags);
1459 }
1460
1461 /**
1462  * kmalloc - allocate memory
1463  * @size: how many bytes of memory are required.
1464  * @flags: the type of memory to allocate.
1465  *
1466  * kmalloc is the normal method of allocating memory
1467  * in the kernel.  The @flags argument may be one of:
1468  *
1469  * %GFP_BUFFER - XXX
1470  *
1471  * %GFP_ATOMIC - allocation will not sleep.  Use inside interrupt handlers.
1472  *
1473  * %GFP_USER - allocate memory on behalf of user.  May sleep.
1474  *
1475  * %GFP_KERNEL - allocate normal kernel ram.  May sleep.
1476  *
1477  * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1478  * Don't use unless you're in the NFS code.
1479  *
1480  * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1481  */
1482 void * kmalloc (size_t size, int flags)
1483 {
1484         cache_sizes_t *csizep = cache_sizes;
1485
1486         for (; csizep->cs_size; csizep++) {
1487                 if (size > csizep->cs_size)
1488                         continue;
1489                 return __kmem_cache_alloc(flags & GFP_DMA ?
1490                          csizep->cs_dmacachep : csizep->cs_cachep, flags);
1491         }
1492         BUG(); // too big size
1493         return NULL;
1494 }
1495
1496 /**
1497  * kmem_cache_free - Deallocate an object
1498  * @cachep: The cache the allocation was from.
1499  * @objp: The previously allocated object.
1500  *
1501  * Free an object which was previously allocated from this
1502  * cache.
1503  */
1504 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1505 {
1506         unsigned long flags;
1507 #if DEBUG
1508         CHECK_NR(MAP_NR(objp));
1509         CHECK_PAGE(mem_map + MAP_NR(objp));
1510         if (cachep != GET_PAGE_CACHE(mem_map + MAP_NR(objp)))
1511                 BUG();
1512 #endif
1513
1514         local_irq_save(flags);
1515         __kmem_cache_free(cachep, objp);
1516         local_irq_restore(flags);
1517 }
1518
1519 /**
1520  * kfree - free previously allocated memory
1521  * @objp: pointer returned by kmalloc.
1522  *
1523  * Don't free memory not originally allocated by kmalloc()
1524  * or you will run into trouble.
1525  */
1526 void kfree (const void *objp)
1527 {
1528         kmem_cache_t *c;
1529         unsigned long flags;
1530
1531         if (!objp)
1532                 return;
1533         local_irq_save(flags);
1534         CHECK_NR(MAP_NR(objp));
1535         CHECK_PAGE(mem_map + MAP_NR(objp));
1536         c = GET_PAGE_CACHE(mem_map + MAP_NR(objp));
1537         __kmem_cache_free(c, (void*)objp);
1538         local_irq_restore(flags);
1539 }
1540
1541 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1542 {
1543         cache_sizes_t *csizep = cache_sizes;
1544
1545         /* This function could be moved to the header file, and
1546          * made inline so consumers can quickly determine what
1547          * cache pointer they require.
1548          */
1549         for ( ; csizep->cs_size; csizep++) {
1550                 if (size > csizep->cs_size)
1551                         continue;
1552                 break;
1553         }
1554         return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1555 }
1556
1557 #ifdef CONFIG_SMP
1558 /*
1559  * called with local interrupts disabled
1560  */
1561 static void drain_cache (void* __cachep)
1562 {
1563         kmem_cache_t *cachep = __cachep;
1564         cpucache_t *cc = cc_data(cachep);
1565
1566         if (cc && cc->avail) {
1567                 free_block(cachep, cc_entry(cc), cc->avail);
1568                 cc->avail = 0;
1569         }
1570 }
1571
1572 typedef struct ccupdate_struct_s
1573 {
1574         kmem_cache_t* cachep;
1575         cpucache_t* new[NR_CPUS];
1576 } ccupdate_struct_t;
1577
1578 /*
1579  * called with local interrupts disabled
1580  */
1581 static void ccupdate_callback (void* __new)
1582 {
1583         ccupdate_struct_t* new = __new;
1584         cpucache_t *old = cc_data(new->cachep);
1585
1586         cc_data(new->cachep) = new->new[smp_processor_id()];
1587         new->new[smp_processor_id()] = old;
1588 }
1589
1590 /* called with cache_chain_sem acquired.  */
1591 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1592 {
1593         ccupdate_struct_t new;
1594         int i;
1595
1596         /*
1597          * These are admin-provided, so we are more graceful.
1598          */
1599         if (limit < 0)
1600                 return -EINVAL;
1601         if (batchcount < 0)
1602                 return -EINVAL;
1603         if (batchcount > limit)
1604                 return -EINVAL;
1605         if (limit != 0 && !batchcount)
1606                 return -EINVAL;
1607
1608         memset(&new.new,0,sizeof(new.new));
1609         if (limit) {
1610                 for (i = 0; i< smp_num_cpus; i++) {
1611                         cpucache_t* ccnew;
1612
1613
1614                         ccnew = kmalloc(sizeof(void*)*limit+
1615                                         sizeof(cpucache_t), GFP_KERNEL);
1616                         if (!ccnew)
1617                                 goto oom;
1618                         ccnew->limit = limit;
1619                         ccnew->avail = 0;
1620                         new.new[cpu_logical_map(i)] = ccnew;
1621                 }
1622         }
1623         new.cachep = cachep;
1624         spin_lock_irq(&cachep->spinlock);
1625         cachep->batchcount = batchcount;
1626         spin_unlock_irq(&cachep->spinlock);
1627
1628         smp_call_function(ccupdate_callback,&new,1,1);
1629         local_irq_disable();
1630         ccupdate_callback(&new);
1631         local_irq_enable();
1632
1633         for (i = 0; i < smp_num_cpus; i++) {
1634                 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1635                 if (!ccold)
1636                         continue;
1637                 local_irq_disable();
1638                 free_block(cachep, cc_entry(ccold), ccold->avail);
1639                 local_irq_enable();
1640                 kfree(ccold);
1641         }
1642         return 0;
1643 oom:
1644         for (i--; i >= 0; i--)
1645                 kfree(new.new[cpu_logical_map(i)]);
1646         return -ENOMEM;
1647 }
1648
1649 static void enable_cpucache (kmem_cache_t *cachep)
1650 {
1651         int err;
1652         int limit;
1653
1654         /* FIXME: optimize */
1655         if (cachep->objsize > PAGE_SIZE)
1656                 return;
1657         if (cachep->objsize > 1024)
1658                 limit = 60;
1659         else if (cachep->objsize > 256)
1660                 limit = 124;
1661         else
1662                 limit = 252;
1663
1664         err = kmem_tune_cpucache(cachep, limit, limit/2);
1665         if (err)
1666                 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1667                                         cachep->name, -err);
1668 }
1669
1670 static void enable_all_cpucaches (void)
1671 {
1672         struct list_head* p;
1673
1674         down(&cache_chain_sem);
1675
1676         p = &cache_cache.next;
1677         do {
1678                 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1679
1680                 enable_cpucache(cachep);
1681                 p = cachep->next.next;
1682         } while (p != &cache_cache.next);
1683
1684         up(&cache_chain_sem);
1685 }
1686 #endif
1687
1688 /**
1689  * kmem_cache_reap - Reclaim memory from caches.
1690  * @gfp_mask: the type of memory required.
1691  *
1692  * Called from try_to_free_page().
1693  */
1694 void kmem_cache_reap (int gfp_mask)
1695 {
1696         slab_t *slabp;
1697         kmem_cache_t *searchp;
1698         kmem_cache_t *best_cachep;
1699         unsigned int best_pages;
1700         unsigned int best_len;
1701         unsigned int scan;
1702
1703         if (gfp_mask & __GFP_WAIT)
1704                 down(&cache_chain_sem);
1705         else
1706                 if (down_trylock(&cache_chain_sem))
1707                         return;
1708
1709         scan = REAP_SCANLEN;
1710         best_len = 0;
1711         best_pages = 0;
1712         best_cachep = NULL;
1713         searchp = clock_searchp;
1714         do {
1715                 unsigned int pages;
1716                 struct list_head* p;
1717                 unsigned int full_free;
1718
1719                 /* It's safe to test this without holding the cache-lock. */
1720                 if (searchp->flags & SLAB_NO_REAP)
1721                         goto next;
1722                 /* FIXME: is this really a good idea? */
1723                 if (gfp_mask & GFP_DMA) {
1724                         if (!(searchp->gfpflags & GFP_DMA))
1725                                 goto next;
1726                 } else {
1727                         if (searchp->gfpflags & GFP_DMA)
1728                                 goto next;
1729                 }
1730                 spin_lock_irq(&searchp->spinlock);
1731                 if (searchp->growing)
1732                         goto next_unlock;
1733                 if (searchp->dflags & DFLGS_GROWN) {
1734                         searchp->dflags &= ~DFLGS_GROWN;
1735                         goto next_unlock;
1736                 }
1737 #ifdef CONFIG_SMP
1738                 {
1739                         cpucache_t *cc = cc_data(searchp);
1740                         if (cc && cc->avail) {
1741                                 __free_block(searchp, cc_entry(cc), cc->avail);
1742                                 cc->avail = 0;
1743                         }
1744                 }
1745 #endif
1746
1747                 full_free = 0;
1748                 p = searchp->slabs.prev;
1749                 while (p != &searchp->slabs) {
1750                         slabp = list_entry(p, slab_t, list);
1751                         if (slabp->inuse)
1752                                 break;
1753                         full_free++;
1754                         p = p->prev;
1755                 }
1756
1757                 /*
1758                  * Try to avoid slabs with constructors and/or
1759                  * more than one page per slab (as it can be difficult
1760                  * to get high orders from gfp()).
1761                  */
1762                 pages = full_free * (1<<searchp->gfporder);
1763                 if (searchp->ctor)
1764                         pages = (pages*4+1)/5;
1765                 if (searchp->gfporder)
1766                         pages = (pages*4+1)/5;
1767                 if (pages > best_pages) {
1768                         best_cachep = searchp;
1769                         best_len = full_free;
1770                         best_pages = pages;
1771                         if (full_free >= REAP_PERFECT) {
1772                                 clock_searchp = list_entry(searchp->next.next,
1773                                                         kmem_cache_t,next);
1774                                 goto perfect;
1775                         }
1776                 }
1777 next_unlock:
1778                 spin_unlock_irq(&searchp->spinlock);
1779 next:
1780                 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1781         } while (--scan && searchp != clock_searchp);
1782
1783         clock_searchp = searchp;
1784
1785         if (!best_cachep)
1786                 /* couldn't find anything to reap */
1787                 goto out;
1788
1789         spin_lock_irq(&best_cachep->spinlock);
1790 perfect:
1791         /* free only 80% of the free slabs */
1792         best_len = (best_len*4 + 1)/5;
1793         for (scan = 0; scan < best_len; scan++) {
1794                 struct list_head *p;
1795
1796                 if (best_cachep->growing)
1797                         break;
1798                 p = best_cachep->slabs.prev;
1799                 if (p == &best_cachep->slabs)
1800                         break;
1801                 slabp = list_entry(p,slab_t,list);
1802                 if (slabp->inuse)
1803                         break;
1804                 list_del(&slabp->list);
1805                 if (best_cachep->firstnotfull == &slabp->list)
1806                         best_cachep->firstnotfull = &best_cachep->slabs;
1807                 STATS_INC_REAPED(best_cachep);
1808
1809                 /* Safe to drop the lock. The slab is no longer linked to the
1810                  * cache.
1811                  */
1812                 spin_unlock_irq(&best_cachep->spinlock);
1813                 kmem_slab_destroy(best_cachep, slabp);
1814                 spin_lock_irq(&best_cachep->spinlock);
1815         }
1816         spin_unlock_irq(&best_cachep->spinlock);
1817 out:
1818         up(&cache_chain_sem);
1819         return;
1820 }
1821
1822 #ifdef CONFIG_PROC_FS
1823 /* /proc/slabinfo
1824  *      cache-name num-active-objs total-objs
1825  *      obj-size num-active-slabs total-slabs
1826  *      num-pages-per-slab
1827  */
1828 #define FIXUP(t)                                \
1829         do {                                    \
1830                 if (len <= off) {               \
1831                         off -= len;             \
1832                         len = 0;                \
1833                 } else {                        \
1834                         if (len-off > count)    \
1835                                 goto t;         \
1836                 }                               \
1837         } while (0)
1838
1839 static int proc_getdata (char*page, char**start, off_t off, int count)
1840 {
1841         struct list_head *p;
1842         int len = 0;
1843
1844         /* Output format version, so at least we can change it without _too_
1845          * many complaints.
1846          */
1847         len += sprintf(page+len, "slabinfo - version: 1.1"
1848 #if STATS
1849                                 " (statistics)"
1850 #endif
1851 #ifdef CONFIG_SMP
1852                                 " (SMP)"
1853 #endif
1854                                 "\n");
1855         FIXUP(got_data);
1856
1857         down(&cache_chain_sem);
1858         p = &cache_cache.next;
1859         do {
1860                 kmem_cache_t    *cachep;
1861                 struct list_head *q;
1862                 slab_t          *slabp;
1863                 unsigned long   active_objs;
1864                 unsigned long   num_objs;
1865                 unsigned long   active_slabs = 0;
1866                 unsigned long   num_slabs;
1867                 cachep = list_entry(p, kmem_cache_t, next);
1868
1869                 spin_lock_irq(&cachep->spinlock);
1870                 active_objs = 0;
1871                 num_slabs = 0;
1872                 list_for_each(q,&cachep->slabs) {
1873                         slabp = list_entry(q, slab_t, list);
1874                         active_objs += slabp->inuse;
1875                         num_objs += cachep->num;
1876                         if (slabp->inuse)
1877                                 active_slabs++;
1878                         else
1879                                 num_slabs++;
1880                 }
1881                 num_slabs+=active_slabs;
1882                 num_objs = num_slabs*cachep->num;
1883
1884                 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1885                         cachep->name, active_objs, num_objs, cachep->objsize,
1886                         active_slabs, num_slabs, (1<<cachep->gfporder));
1887
1888 #if STATS
1889                 {
1890                         unsigned long errors = cachep->errors;
1891                         unsigned long high = cachep->high_mark;
1892                         unsigned long grown = cachep->grown;
1893                         unsigned long reaped = cachep->reaped;
1894                         unsigned long allocs = cachep->num_allocations;
1895
1896                         len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1897                                         high, allocs, grown, reaped, errors);
1898                 }
1899 #endif
1900 #ifdef CONFIG_SMP
1901                 {
1902                         unsigned int batchcount = cachep->batchcount;
1903                         unsigned int limit;
1904
1905                         if (cc_data(cachep))
1906                                 limit = cc_data(cachep)->limit;
1907                          else
1908                                 limit = 0;
1909                         len += sprintf(page+len, " : %4u %4u",
1910                                         limit, batchcount);
1911                 }
1912 #endif
1913 #if STATS && defined(CONFIG_SMP)
1914                 {
1915                         unsigned long allochit = atomic_read(&cachep->allochit);
1916                         unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1917                         unsigned long freehit = atomic_read(&cachep->freehit);
1918                         unsigned long freemiss = atomic_read(&cachep->freemiss);
1919                         len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1920                                         allochit, allocmiss, freehit, freemiss);
1921                 }
1922 #endif
1923                 len += sprintf(page+len,"\n");
1924                 spin_unlock_irq(&cachep->spinlock);
1925                 FIXUP(got_data_up);
1926                 p = cachep->next.next;
1927         } while (p != &cache_cache.next);
1928 got_data_up:
1929         up(&cache_chain_sem);
1930
1931 got_data:
1932         *start = page+off;
1933         return len;
1934 }
1935
1936 /**
1937  * slabinfo_read_proc - generates /proc/slabinfo
1938  * @page: scratch area, one page long
1939  * @start: pointer to the pointer to the output buffer
1940  * @off: offset within /proc/slabinfo the caller is interested in
1941  * @count: requested len in bytes
1942  * @eof: eof marker
1943  * @data: unused
1944  *
1945  * The contents of the buffer are
1946  * cache-name
1947  * num-active-objs
1948  * total-objs
1949  * object size
1950  * num-active-slabs
1951  * total-slabs
1952  * num-pages-per-slab
1953  * + further values on SMP and with statistics enabled
1954  */
1955 int slabinfo_read_proc (char *page, char **start, off_t off,
1956                                  int count, int *eof, void *data)
1957 {
1958         int len = proc_getdata(page, start, off, count);
1959         len -= (*start-page);
1960         if (len <= count)
1961                 *eof = 1;
1962         if (len>count) len = count;
1963         if (len<0) len = 0;
1964         return len;
1965 }
1966
1967 #define MAX_SLABINFO_WRITE 128
1968 /**
1969  * slabinfo_write_proc - SMP tuning for the slab allocator
1970  * @file:
1971  * @buffer: user buffer
1972  * @count: data len
1973  * @data: unused
1974  */
1975 int slabinfo_write_proc (struct file *file, const char *buffer,
1976                                 unsigned long count, void *data)
1977 {
1978 #ifdef CONFIG_SMP
1979         char kbuf[MAX_SLABINFO_WRITE], *tmp;
1980         int limit, batchcount, res;
1981         struct list_head *p;
1982
1983         if (count > MAX_SLABINFO_WRITE)
1984                 return -EINVAL;
1985         if (copy_from_user(&kbuf, buffer, count))
1986                 return -EFAULT;
1987
1988         tmp = strchr(kbuf, ' ');
1989         if (!tmp)
1990                 return -EINVAL;
1991         *tmp = '\0';
1992         tmp++;
1993         limit = simple_strtol(tmp, &tmp, 10);
1994         while (*tmp == ' ')
1995                 tmp++;
1996         batchcount = simple_strtol(tmp, &tmp, 10);
1997
1998         /* Find the cache in the chain of caches. */
1999         down(&cache_chain_sem);
2000         res = -EINVAL;
2001         list_for_each(p,&cache_chain) {
2002                 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2003
2004                 if (!strcmp(cachep->name, kbuf)) {
2005                         res = kmem_tune_cpucache(cachep, limit, batchcount);
2006                         break;
2007                 }
2008         }
2009         up(&cache_chain_sem);
2010         if (res >= 0)
2011                 res = count;
2012         return res;
2013 #else
2014         return -EINVAL;
2015 #endif
2016 }
2017 #endif