mm/slab.c

   1 /*
   2  * linux/mm/slab.c
   3  * Written by Mark Hemment, 1996/97.
   4  * (markhe@nextd.demon.co.uk)
   5  *
   6  * 11 April '97.  Started multi-threading - markhe
   7  *      The global cache-chain is protected by the semaphore 'cache_chain_sem'.
   8  *      The sem is only needed when accessing/extending the cache-chain, which
   9  *      can never happen inside an interrupt (kmem_cache_create(),
  10  *      kmem_cache_shrink() and kmem_cache_reap()).
  11  *      This is a medium-term exclusion lock.
  12  *
  13  *      Each cache has its own lock; 'c_spinlock'.  This lock is needed only
  14  *      when accessing non-constant members of a cache-struct.
  15  *      Note: 'constant members' are assigned a value in kmem_cache_create() before
  16  *      the cache is linked into the cache-chain.  The values never change, so not
  17  *      even a multi-reader lock is needed for these members.
  18  *      The c_spinlock is only ever held for a few cycles.
  19  *
  20  *      To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
  21  *      maybe be sleeping and therefore not holding the semaphore/lock), the
  22  *      c_growing field is used.  This also prevents reaping from a cache.
  23  *
  24  *      Note, caches can _never_ be destroyed.  When a sub-system (eg module) has
  25  *      finished with a cache, it can only be shrunk.  This leaves the cache empty,
  26  *      but already enabled for re-use, eg. during a module re-load.
  27  *
  28  *      Notes:
  29  *              o Constructors/deconstructors are called while the cache-lock
  30  *                is _not_ held.  Therefore they _must_ be threaded.
  31  *              o Constructors must not attempt to allocate memory from the
  32  *                same cache that they are a constructor for - infinite loop!
  33  *                (There is no easy way to trap this.)
  34  *              o The per-cache locks must be obtained with local-interrupts disabled.
  35  *              o When compiled with debug support, and an object-verify (upon release)
  36  *                is request for a cache, the verify-function is called with the cache
  37  *                lock held.  This helps debugging.
  38  *              o The functions called from try_to_free_page() must not attempt
  39  *                to allocate memory from a cache which is being grown.
  40  *                The buffer sub-system might try to allocate memory, via buffer_cachep.
  41  *                As this pri is passed to the SLAB, and then (if necessary) onto the
  42  *                gfp() funcs (which avoid calling try_to_free_page()), no deadlock
  43  *                should happen.
  44  *
  45  *      The positioning of the per-cache lock is tricky.  If the lock is
  46  *      placed on the same h/w cache line as commonly accessed members
  47  *      the number of L1 cache-line faults is reduced.  However, this can
  48  *      lead to the cache-line ping-ponging between processors when the
  49  *      lock is in contention (and the common members are being accessed).
  50  *      Decided to keep it away from common members.
  51  *
  52  *      More fine-graining is possible, with per-slab locks...but this might be
  53  *      taking fine graining too far, but would have the advantage;
  54  *              During most allocs/frees no writes occur to the cache-struct.
  55  *              Therefore a multi-reader/one writer lock could be used (the writer
  56  *              needed when the slab chain is being link/unlinked).
  57  *              As we would not have an exclusion lock for the cache-structure, one
  58  *              would be needed per-slab (for updating s_free ptr, and/or the contents
  59  *              of s_index).
  60  *      The above locking would allow parallel operations to different slabs within
  61  *      the same cache with reduced spinning.
  62  *
  63  *      Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
  64  *      would allow most allocations from the same cache to execute in parallel.
  65  *
  66  *      At present, each engine can be growing a cache.  This should be blocked.
  67  *
  68  *      It is not currently 100% safe to examine the page_struct outside of a kernel
  69  *      or global cli lock.  The risk is v. small, and non-fatal.
  70  *
  71  *      Calls to printk() are not 100% safe (the function is not threaded).  However,
  72  *      printk() is only used under an error condition, and the risk is v. small (not
  73  *      sure if the console write functions 'enjoy' executing multiple contexts in
  74  *      parallel.  I guess they don't...).
  75  *      Note, for most calls to printk() any held cache-lock is dropped.  This is not
  76  *      always done for text size reasons - having *_unlock() everywhere is bloat.
  77  */
  78
  79 /*
  80  * An implementation of the Slab Allocator as described in outline in;
  81  *      UNIX Internals: The New Frontiers by Uresh Vahalia
  82  *      Pub: Prentice Hall      ISBN 0-13-101908-2
  83  * or with a little more detail in;
  84  *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
  85  *      Jeff Bonwick (Sun Microsystems).
  86  *      Presented at: USENIX Summer 1994 Technical Conference
  87  */
  88
  89 /*
  90  * This implementation deviates from Bonwick's paper as it
  91  * does not use a hash-table for large objects, but rather a per slab
  92  * index to hold the bufctls.  This allows the bufctl structure to
  93  * be small (one word), but limits the number of objects a slab (not
  94  * a cache) can contain when off-slab bufctls are used.  The limit is the
  95  * size of the largest general cache that does not use off-slab bufctls,
  96  * divided by the size of a bufctl.  For 32bit archs, is this 256/4 = 64.
  97  * This is not serious, as it is only for large objects, when it is unwise
  98  * to have too many per slab.
  99  * Note: This limit can be raised by introducing a general cache whose size
 100  * is less than 512 (PAGE_SIZE<<3), but greater than 256.
 101  */
 102
 103 #include        <linux/config.h>
 104 #include        <linux/slab.h>
 105 #include        <linux/interrupt.h>
 106 #include        <linux/init.h>
 107
 108 /* If there is a different PAGE_SIZE around, and it works with this allocator,
 109  * then change the following.
 110  */
 111 #if     (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
 112 #error  Your page size is probably not correctly supported - please check
 113 #endif
 114
 115 /* SLAB_MGMT_CHECKS     - 1 to enable extra checks in kmem_cache_create().
 116  *                        0 if you wish to reduce memory usage.
 117  *
 118  * SLAB_DEBUG_SUPPORT   - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
 119  *                        SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
 120  *                        0 for faster, smaller, code (especially in the critical paths).
 121  *
 122  * SLAB_STATS           - 1 to collect stats for /proc/slabinfo.
 123  *                        0 for faster, smaller, code (especially in the critical paths).
 124  *
 125  * SLAB_SELFTEST        - 1 to perform a few tests, mainly for development.
 126  */
 127 #define         SLAB_MGMT_CHECKS        1
 128 #define         SLAB_DEBUG_SUPPORT      0
 129 #define         SLAB_STATS              0
 130 #define         SLAB_SELFTEST           0
 131
 132 /* Shouldn't this be in a header file somewhere? */
 133 #define BYTES_PER_WORD          sizeof(void *)
 134
 135 /* Legal flag mask for kmem_cache_create(). */
 136 #if     SLAB_DEBUG_SUPPORT
 137 #if     0
 138 #define SLAB_C_MASK             (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
 139                                  SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
 140                                  SLAB_HIGH_PACK)
 141 #endif
 142 #define SLAB_C_MASK             (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
 143                                  SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
 144 #else
 145 #if     0
 146 #define SLAB_C_MASK             (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
 147 #endif
 148 #define SLAB_C_MASK             (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
 149 #endif  /* SLAB_DEBUG_SUPPORT */
 150
 151 /* Slab management struct.
 152  * Manages the objs in a slab.  Placed either at the end of mem allocated
 153  * for a slab, or from an internal obj cache (cache_slabp).
 154  * Slabs are chained into a partially ordered list; fully used first, partial
 155  * next, and then fully free slabs.
 156  * The first 4 members are referenced during an alloc/free operation, and
 157  * should always appear on the same cache line.
 158  * Note: The offset between some members _must_ match offsets within
 159  * the kmem_cache_t - see kmem_cache_init() for the checks. */
 160
 161 #define SLAB_OFFSET_BITS        16      /* could make this larger for 64bit archs */
 162
 163 typedef struct kmem_slab_s {
 164         struct kmem_bufctl_s    *s_freep;  /* ptr to first inactive obj in slab */
 165         struct kmem_bufctl_s    *s_index;
 166         unsigned long            s_magic;
 167         unsigned long            s_inuse;  /* num of objs active in slab */
 168
 169         struct kmem_slab_s      *s_nextp;
 170         struct kmem_slab_s      *s_prevp;
 171         void                    *s_mem;    /* addr of first obj in slab */
 172         unsigned long            s_offset:SLAB_OFFSET_BITS,
 173                                  s_dma:1;
 174 } kmem_slab_t;
 175
 176 /* When the slab management is on-slab, this gives the size to use. */
 177 #define slab_align_size         (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
 178
 179 /* Test for end of slab chain. */
 180 #define kmem_slab_end(x)        ((kmem_slab_t*)&((x)->c_offset))
 181
 182 /* s_magic */
 183 #define SLAB_MAGIC_ALLOC        0xA5C32F2BUL    /* slab is alive */
 184 #define SLAB_MAGIC_DESTROYED    0xB2F23C5AUL    /* slab has been destroyed */
 185
 186 /* Bufctl's are used for linking objs within a slab, identifying what slab an obj
 187  * is in, and the address of the associated obj (for sanity checking with off-slab
 188  * bufctls).  What a bufctl contains depends upon the state of the obj and
 189  * the organisation of the cache.
 190  */
 191 typedef struct kmem_bufctl_s {
 192         union {
 193                 struct kmem_bufctl_s    *buf_nextp;
 194                 kmem_slab_t             *buf_slabp;     /* slab for obj */
 195                 void *                   buf_objp;
 196         } u;
 197 } kmem_bufctl_t;
 198
 199 /* ...shorthand... */
 200 #define buf_nextp       u.buf_nextp
 201 #define buf_slabp       u.buf_slabp
 202 #define buf_objp        u.buf_objp
 203
 204 #if     SLAB_DEBUG_SUPPORT
 205 /* Magic nums for obj red zoning.
 206  * Placed in the first word before and the first word after an obj.
 207  */
 208 #define SLAB_RED_MAGIC1         0x5A2CF071UL    /* when obj is active */
 209 #define SLAB_RED_MAGIC2         0x170FC2A5UL    /* when obj is inactive */
 210
 211 /* ...and for poisoning */
 212 #define SLAB_POISON_BYTE        0x5a            /* byte value for poisoning */
 213 #define SLAB_POISON_END 0xa5            /* end-byte of poisoning */
 214
 215 #endif  /* SLAB_DEBUG_SUPPORT */
 216
 217 /* Cache struct - manages a cache.
 218  * First four members are commonly referenced during an alloc/free operation.
 219  */
 220 struct kmem_cache_s {
 221         kmem_slab_t              *c_freep;      /* first slab w. free objs */
 222         unsigned long             c_flags;      /* constant flags */
 223         unsigned long             c_offset;
 224         unsigned long             c_num;        /* # of objs per slab */
 225
 226         unsigned long             c_magic;
 227         unsigned long             c_inuse;      /* kept at zero */
 228         kmem_slab_t              *c_firstp;     /* first slab in chain */
 229         kmem_slab_t              *c_lastp;      /* last slab in chain */
 230
 231         spinlock_t                c_spinlock;
 232         unsigned long             c_growing;
 233         unsigned long             c_dflags;     /* dynamic flags */
 234         size_t                    c_org_size;
 235         unsigned long             c_gfporder;   /* order of pgs per slab (2^n) */
 236         void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
 237         void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
 238         unsigned long             c_align;      /* alignment of objs */
 239         size_t                    c_colour;     /* cache colouring range */
 240         size_t                    c_colour_next;/* cache colouring */
 241         unsigned long             c_failures;
 242         const char               *c_name;
 243         struct kmem_cache_s      *c_nextp;
 244         kmem_cache_t             *c_index_cachep;
 245 #if     SLAB_STATS
 246         unsigned long             c_num_active;
 247         unsigned long             c_num_allocations;
 248         unsigned long             c_high_mark;
 249         unsigned long             c_grown;
 250         unsigned long             c_reaped;
 251         atomic_t                  c_errors;
 252 #endif  /* SLAB_STATS */
 253 };
 254
 255 /* internal c_flags */
 256 #define SLAB_CFLGS_OFF_SLAB     0x010000UL      /* slab management in own cache */
 257 #define SLAB_CFLGS_BUFCTL       0x020000UL      /* bufctls in own cache */
 258 #define SLAB_CFLGS_GENERAL      0x080000UL      /* a general cache */
 259
 260 /* c_dflags (dynamic flags).  Need to hold the spinlock to access this member */
 261 #define SLAB_CFLGS_GROWN        0x000002UL      /* don't reap a recently grown */
 262
 263 #define SLAB_OFF_SLAB(x)        ((x) & SLAB_CFLGS_OFF_SLAB)
 264 #define SLAB_BUFCTL(x)          ((x) & SLAB_CFLGS_BUFCTL)
 265 #define SLAB_GROWN(x)           ((x) & SLAB_CFLGS_GROWN)
 266
 267 #if     SLAB_STATS
 268 #define SLAB_STATS_INC_ACTIVE(x)        ((x)->c_num_active++)
 269 #define SLAB_STATS_DEC_ACTIVE(x)        ((x)->c_num_active--)
 270 #define SLAB_STATS_INC_ALLOCED(x)       ((x)->c_num_allocations++)
 271 #define SLAB_STATS_INC_GROWN(x)         ((x)->c_grown++)
 272 #define SLAB_STATS_INC_REAPED(x)        ((x)->c_reaped++)
 273 #define SLAB_STATS_SET_HIGH(x)          do { if ((x)->c_num_active > (x)->c_high_mark) \
 274                                                 (x)->c_high_mark = (x)->c_num_active; \
 275                                         } while (0)
 276 #define SLAB_STATS_INC_ERR(x)           (atomic_inc(&(x)->c_errors))
 277 #else
 278 #define SLAB_STATS_INC_ACTIVE(x)
 279 #define SLAB_STATS_DEC_ACTIVE(x)
 280 #define SLAB_STATS_INC_ALLOCED(x)
 281 #define SLAB_STATS_INC_GROWN(x)
 282 #define SLAB_STATS_INC_REAPED(x)
 283 #define SLAB_STATS_SET_HIGH(x)
 284 #define SLAB_STATS_INC_ERR(x)
 285 #endif  /* SLAB_STATS */
 286
 287 #if     SLAB_SELFTEST
 288 #if     !SLAB_DEBUG_SUPPORT
 289 #error  Debug support needed for self-test
 290 #endif
 291 static void kmem_self_test(void);
 292 #endif  /* SLAB_SELFTEST */
 293
 294 /* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
 295 #define SLAB_C_MAGIC            0x4F17A36DUL
 296
 297 /* maximum size of an obj (in 2^order pages) */
 298 #define SLAB_OBJ_MAX_ORDER      5       /* 32 pages */
 299
 300 /* maximum num of pages for a slab (prevents large requests to the VM layer) */
 301 #define SLAB_MAX_GFP_ORDER      5       /* 32 pages */
 302
 303 /* the 'preferred' minimum num of objs per slab - maybe less for large objs */
 304 #define SLAB_MIN_OBJS_PER_SLAB  4
 305
 306 /* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
 307  * then the page order must be less than this before trying the next order.
 308  */
 309 #define SLAB_BREAK_GFP_ORDER_HI 2
 310 #define SLAB_BREAK_GFP_ORDER_LO 1
 311 static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
 312
 313 /* Macros for storing/retrieving the cachep and or slab from the
 314  * global 'mem_map'.  With off-slab bufctls, these are used to find the
 315  * slab an obj belongs to.  With kmalloc(), and kfree(), these are used
 316  * to find the cache which an obj belongs to.
 317  */
 318 #define SLAB_SET_PAGE_CACHE(pg, x)      ((pg)->next = (struct page *)(x))
 319 #define SLAB_GET_PAGE_CACHE(pg)         ((kmem_cache_t *)(pg)->next)
 320 #define SLAB_SET_PAGE_SLAB(pg, x)       ((pg)->prev = (struct page *)(x))
 321 #define SLAB_GET_PAGE_SLAB(pg)          ((kmem_slab_t *)(pg)->prev)
 322
 323 /* Size description struct for general caches. */
 324 typedef struct cache_sizes {
 325         size_t           cs_size;
 326         kmem_cache_t    *cs_cachep;
 327 } cache_sizes_t;
 328
 329 static cache_sizes_t cache_sizes[] = {
 330 #if     PAGE_SIZE == 4096
 331         {  32,          NULL},
 332 #endif
 333         {  64,          NULL},
 334         { 128,          NULL},
 335         { 256,          NULL},
 336         { 512,          NULL},
 337         {1024,          NULL},
 338         {2048,          NULL},
 339         {4096,          NULL},
 340         {8192,          NULL},
 341         {16384,         NULL},
 342         {32768,         NULL},
 343         {65536,         NULL},
 344         {131072,        NULL},
 345         {0,             NULL}
 346 };
 347
 348 /* Names for the general caches.  Not placed into the sizes struct for
 349  * a good reason; the string ptr is not needed while searching in kmalloc(),
 350  * and would 'get-in-the-way' in the h/w cache.
 351  */
 352 static char *cache_sizes_name[] = {
 353 #if     PAGE_SIZE == 4096
 354         "size-32",
 355 #endif
 356         "size-64",
 357         "size-128",
 358         "size-256",
 359         "size-512",
 360         "size-1024",
 361         "size-2048",
 362         "size-4096",
 363         "size-8192",
 364         "size-16384",
 365         "size-32768",
 366         "size-65536",
 367         "size-131072"
 368 };
 369
 370 /* internal cache of cache description objs */
 371 static  kmem_cache_t    cache_cache = {
 372 /* freep, flags */              kmem_slab_end(&cache_cache), SLAB_NO_REAP,
 373 /* offset, num */               sizeof(kmem_cache_t),   0,
 374 /* c_magic, c_inuse */          SLAB_C_MAGIC, 0,
 375 /* firstp, lastp */             kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
 376 /* spinlock */                  SPIN_LOCK_UNLOCKED,
 377 /* growing */                   0,
 378 /* dflags */                    0,
 379 /* org_size, gfp */             0, 0,
 380 /* ctor, dtor, align */         NULL, NULL, L1_CACHE_BYTES,
 381 /* colour, colour_next */       0, 0,
 382 /* failures */                  0,
 383 /* name */                      "kmem_cache",
 384 /* nextp */                     &cache_cache,
 385 /* index */                     NULL,
 386 };
 387
 388 /* Guard access to the cache-chain. */
 389 static struct semaphore cache_chain_sem;
 390
 391 /* Place maintainer for reaping. */
 392 static  kmem_cache_t    *clock_searchp = &cache_cache;
 393
 394 /* Internal slab management cache, for when slab management is off-slab. */
 395 static kmem_cache_t     *cache_slabp = NULL;
 396
 397 /* Max number of objs-per-slab for caches which use bufctl's.
 398  * Needed to avoid a possible looping condition in kmem_cache_grow().
 399  */
 400 static unsigned long bufctl_limit = 0;
 401
 402 /* Initialisation - setup the `cache' cache. */
 403 long __init kmem_cache_init(long start, long end)
 404 {
 405         size_t size, i;
 406
 407 #define kmem_slab_offset(x)  ((unsigned long)&((kmem_slab_t *)0)->x)
 408 #define kmem_slab_diff(a,b)  (kmem_slab_offset(a) - kmem_slab_offset(b))
 409 #define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
 410 #define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
 411
 412         /* Sanity checks... */
 413         if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
 414             kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
 415             ((kmem_cache_offset(c_lastp) -
 416               ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
 417              kmem_slab_offset(s_prevp)) ||
 418             kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
 419                 /* Offsets to the magic are incorrect, either the structures have
 420                  * been incorrectly changed, or adjustments are needed for your
 421                  * architecture.
 422                  */
 423                 panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
 424                 /* NOTREACHED */
 425         }
 426 #undef  kmem_cache_offset
 427 #undef  kmem_cache_diff
 428 #undef  kmem_slab_offset
 429 #undef  kmem_slab_diff
 430
 431         init_MUTEX(&cache_chain_sem);
 432
 433         size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
 434         size += (L1_CACHE_BYTES-1);
 435         size &= ~(L1_CACHE_BYTES-1);
 436         cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
 437
 438         i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
 439         cache_cache.c_num = i / size;   /* num of objs per slab */
 440
 441         /* Cache colouring. */
 442         cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
 443         cache_cache.c_colour_next = cache_cache.c_colour;
 444
 445         /*
 446          * Fragmentation resistance on low memory - only use bigger
 447          * page orders on machines with more than 32MB of memory.
 448          */
 449         if (num_physpages > (32 << 20) >> PAGE_SHIFT)
 450                 slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
 451         return start;
 452 }
 453
 454 /* Initialisation - setup remaining internal and general caches.
 455  * Called after the gfp() functions have been enabled, and before smp_init().
 456  */
 457 void __init kmem_cache_sizes_init(void)
 458 {
 459         unsigned int    found = 0;
 460
 461         cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
 462                                         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 463         if (cache_slabp) {
 464                 char **names = cache_sizes_name;
 465                 cache_sizes_t *sizes = cache_sizes;
 466                 do {
 467                         /* For performance, all the general caches are L1 aligned.
 468                          * This should be particularly beneficial on SMP boxes, as it
 469                          * eliminates "false sharing".
 470                          * Note for systems short on memory removing the alignment will
 471                          * allow tighter packing of the smaller caches. */
 472                         if (!(sizes->cs_cachep =
 473                               kmem_cache_create(*names++, sizes->cs_size,
 474                                                 0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
 475                                 goto panic_time;
 476                         if (!found) {
 477                                 /* Inc off-slab bufctl limit until the ceiling is hit. */
 478                                 if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
 479                                         found++;
 480                                 else
 481                                         bufctl_limit =
 482                                                 (sizes->cs_size/sizeof(kmem_bufctl_t));
 483                         }
 484                         sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
 485                         sizes++;
 486                 } while (sizes->cs_size);
 487 #if     SLAB_SELFTEST
 488                 kmem_self_test();
 489 #endif  /* SLAB_SELFTEST */
 490                 return;
 491         }
 492 panic_time:
 493         panic("kmem_cache_sizes_init: Error creating caches");
 494         /* NOTREACHED */
 495 }
 496
 497 /* Interface to system's page allocator.  Dma pts to non-zero if all
 498  * of memory is DMAable. No need to hold the cache-lock.
 499  */
 500 static inline void *
 501 kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
 502 {
 503         void    *addr;
 504
 505         *dma = flags & SLAB_DMA;
 506         addr = (void*) __get_free_pages(flags, cachep->c_gfporder);
 507         /* Assume that now we have the pages no one else can legally
 508          * messes with the 'struct page's.
 509          * However vm_scan() might try to test the structure to see if
 510          * it is a named-page or buffer-page.  The members it tests are
 511          * of no interest here.....
 512          */
 513         if (!*dma && addr) {
 514                 /* Need to check if can dma. */
 515                 struct page *page = mem_map + MAP_NR(addr);
 516                 *dma = 1<<cachep->c_gfporder;
 517                 while ((*dma)--) {
 518                         if (!PageDMA(page)) {
 519                                 *dma = 0;
 520                                 break;
 521                         }
 522                         page++;
 523                 }
 524         }
 525         return addr;
 526 }
 527
 528 /* Interface to system's page release. */
 529 static inline void
 530 kmem_freepages(kmem_cache_t *cachep, void *addr)
 531 {
 532         unsigned long i = (1<<cachep->c_gfporder);
 533         struct page *page = &mem_map[MAP_NR(addr)];
 534
 535         /* free_pages() does not clear the type bit - we do that.
 536          * The pages have been unlinked from their cache-slab,
 537          * but their 'struct page's might be accessed in
 538          * vm_scan(). Shouldn't be a worry.
 539          */
 540         while (i--) {
 541                 PageClearSlab(page);
 542                 page++;
 543         }
 544         free_pages((unsigned long)addr, cachep->c_gfporder);
 545 }
 546
 547 #if     SLAB_DEBUG_SUPPORT
 548 static inline void
 549 kmem_poison_obj(kmem_cache_t *cachep, void *addr)
 550 {
 551         memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
 552         *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISON_END;
 553 }
 554
 555 static inline int
 556 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
 557 {
 558         void *end;
 559         end = memchr(addr, SLAB_POISON_END, cachep->c_org_size);
 560         if (end != (addr+cachep->c_org_size-1))
 561                 return 1;
 562         return 0;
 563 }
 564 #endif  /* SLAB_DEBUG_SUPPORT */
 565
 566 /* Three slab chain funcs - all called with ints disabled and the appropriate
 567  * cache-lock held.
 568  */
 569 static inline void
 570 kmem_slab_unlink(kmem_slab_t *slabp)
 571 {
 572         kmem_slab_t     *prevp = slabp->s_prevp;
 573         kmem_slab_t     *nextp = slabp->s_nextp;
 574         prevp->s_nextp = nextp;
 575         nextp->s_prevp = prevp;
 576 }
 577
 578 static inline void
 579 kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
 580 {
 581         kmem_slab_t     *lastp = cachep->c_lastp;
 582         slabp->s_nextp = kmem_slab_end(cachep);
 583         slabp->s_prevp = lastp;
 584         cachep->c_lastp = slabp;
 585         lastp->s_nextp = slabp;
 586 }
 587
 588 static inline void
 589 kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
 590 {
 591         kmem_slab_t     *nextp = cachep->c_freep;
 592         kmem_slab_t     *prevp = nextp->s_prevp;
 593         slabp->s_nextp = nextp;
 594         slabp->s_prevp = prevp;
 595         nextp->s_prevp = slabp;
 596         slabp->s_prevp->s_nextp = slabp;
 597 }
 598
 599 /* Destroy all the objs in a slab, and release the mem back to the system.
 600  * Before calling the slab must have been unlinked from the cache.
 601  * The cache-lock is not held/needed.
 602  */
 603 static void
 604 kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
 605 {
 606         if (cachep->c_dtor
 607 #if     SLAB_DEBUG_SUPPORT
 608                 || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
 609 #endif  /*SLAB_DEBUG_SUPPORT*/
 610         ) {
 611                 /* Doesn't use the bufctl ptrs to find objs. */
 612                 unsigned long num = cachep->c_num;
 613                 void *objp = slabp->s_mem;
 614                 do {
 615 #if     SLAB_DEBUG_SUPPORT
 616                         if (cachep->c_flags & SLAB_RED_ZONE) {
 617                                 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
 618                                         printk(KERN_ERR "kmem_slab_destroy: "
 619                                                "Bad front redzone - %s\n",
 620                                                cachep->c_name);
 621                                 objp += BYTES_PER_WORD;
 622                                 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
 623                                     SLAB_RED_MAGIC1)
 624                                         printk(KERN_ERR "kmem_slab_destroy: "
 625                                                "Bad rear redzone - %s\n",
 626                                                cachep->c_name);
 627                         }
 628                         if (cachep->c_dtor)
 629 #endif  /*SLAB_DEBUG_SUPPORT*/
 630                                 (cachep->c_dtor)(objp, cachep, 0);
 631 #if     SLAB_DEBUG_SUPPORT
 632                         else if (cachep->c_flags & SLAB_POISON) {
 633                                 if (kmem_check_poison_obj(cachep, objp))
 634                                         printk(KERN_ERR "kmem_slab_destroy: "
 635                                                "Bad poison - %s\n", cachep->c_name);
 636                         }
 637                         if (cachep->c_flags & SLAB_RED_ZONE)
 638                                 objp -= BYTES_PER_WORD;
 639 #endif  /* SLAB_DEBUG_SUPPORT */
 640                         objp += cachep->c_offset;
 641                         if (!slabp->s_index)
 642                                 objp += sizeof(kmem_bufctl_t);
 643                 } while (--num);
 644         }
 645
 646         slabp->s_magic = SLAB_MAGIC_DESTROYED;
 647         if (slabp->s_index)
 648                 kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
 649         kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
 650         if (SLAB_OFF_SLAB(cachep->c_flags))
 651                 kmem_cache_free(cache_slabp, slabp);
 652 }
 653
 654 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
 655 static inline size_t
 656 kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
 657                      unsigned long flags, size_t *left_over, unsigned long *num)
 658 {
 659         size_t wastage = PAGE_SIZE<<gfporder;
 660
 661         if (SLAB_OFF_SLAB(flags))
 662                 gfporder = 0;
 663         else
 664                 gfporder = slab_align_size;
 665         wastage -= gfporder;
 666         *num = wastage / size;
 667         wastage -= (*num * size);
 668         *left_over = wastage;
 669
 670         return (wastage + gfporder + (extra * *num));
 671 }
 672
 673 /* Create a cache:
 674  * Returns a ptr to the cache on success, NULL on failure.
 675  * Cannot be called within a int, but can be interrupted.
 676  * NOTE: The 'name' is assumed to be memory that is _not_  going to disappear.
 677  */
 678 kmem_cache_t *
 679 kmem_cache_create(const char *name, size_t size, size_t offset,
 680         unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
 681         void (*dtor)(void*, kmem_cache_t *, unsigned long))
 682 {
 683         const char *func_nm= KERN_ERR "kmem_create: ";
 684         kmem_cache_t    *searchp;
 685         kmem_cache_t    *cachep=NULL;
 686         size_t          extra;
 687         size_t          left_over;
 688         size_t          align;
 689
 690         /* Sanity checks... */
 691 #if     SLAB_MGMT_CHECKS
 692         if (!name) {
 693                 printk("%sNULL ptr\n", func_nm);
 694                 goto opps;
 695         }
 696         if (in_interrupt()) {
 697                 printk("%sCalled during int - %s\n", func_nm, name);
 698                 goto opps;
 699         }
 700
 701         if (size < BYTES_PER_WORD) {
 702                 printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
 703                 size = BYTES_PER_WORD;
 704         }
 705
 706         if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
 707                 printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
 708                 goto opps;
 709         }
 710
 711         if (dtor && !ctor) {
 712                 /* Decon, but no con - doesn't make sense */
 713                 printk("%sDecon but no con - %s\n", func_nm, name);
 714                 goto opps;
 715         }
 716
 717         if (offset < 0 || offset > size) {
 718                 printk("%sOffset weird %d - %s\n", func_nm, (int) offset, name);
 719                 offset = 0;
 720         }
 721
 722 #if     SLAB_DEBUG_SUPPORT
 723         if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
 724                 /* No constructor, but inital state check requested */
 725                 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
 726                 flags &= ~SLAB_DEBUG_INITIAL;
 727         }
 728
 729         if ((flags & SLAB_POISON) && ctor) {
 730                 /* request for poisoning, but we can't do that with a constructor */
 731                 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
 732                 flags &= ~SLAB_POISON;
 733         }
 734 #if     0
 735         if ((flags & SLAB_HIGH_PACK) && ctor) {
 736                 printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
 737                 flags &= ~SLAB_HIGH_PACK;
 738         }
 739         if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
 740                 printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
 741                        func_nm, name);
 742                 flags &= ~SLAB_HIGH_PACK;
 743         }
 744 #endif
 745 #endif  /* SLAB_DEBUG_SUPPORT */
 746 #endif  /* SLAB_MGMT_CHECKS */
 747
 748         /* Always checks flags, a caller might be expecting debug
 749          * support which isn't available.
 750          */
 751         if (flags & ~SLAB_C_MASK) {
 752                 printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
 753                 flags &= SLAB_C_MASK;
 754         }
 755
 756         /* Get cache's description obj. */
 757         cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
 758         if (!cachep)
 759                 goto opps;
 760         memset(cachep, 0, sizeof(kmem_cache_t));
 761
 762         /* Check that size is in terms of words.  This is needed to avoid
 763          * unaligned accesses for some archs when redzoning is used, and makes
 764          * sure any on-slab bufctl's are also correctly aligned.
 765          */
 766         if (size & (BYTES_PER_WORD-1)) {
 767                 size += (BYTES_PER_WORD-1);
 768                 size &= ~(BYTES_PER_WORD-1);
 769                 printk("%sForcing size word alignment - %s\n", func_nm, name);
 770         }
 771
 772         cachep->c_org_size = size;
 773 #if     SLAB_DEBUG_SUPPORT
 774         if (flags & SLAB_RED_ZONE) {
 775                 /* There is no point trying to honour cache alignment when redzoning. */
 776                 flags &= ~SLAB_HWCACHE_ALIGN;
 777                 size += 2*BYTES_PER_WORD;               /* words for redzone */
 778         }
 779 #endif  /* SLAB_DEBUG_SUPPORT */
 780
 781         align = BYTES_PER_WORD;
 782         if (flags & SLAB_HWCACHE_ALIGN)
 783                 align = L1_CACHE_BYTES;
 784
 785         /* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
 786         extra = sizeof(kmem_bufctl_t);
 787         if (size < (PAGE_SIZE>>3)) {
 788                 /* Size is small(ish).  Use packing where bufctl size per
 789                  * obj is low, and slab management is on-slab.
 790                  */
 791 #if     0
 792                 if ((flags & SLAB_HIGH_PACK)) {
 793                         /* Special high packing for small objects
 794                          * (mainly for vm_mapping structs, but
 795                          * others can use it).
 796                          */
 797                         if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
 798                             size == L1_CACHE_BYTES) {
 799                                 /* The bufctl is stored with the object. */
 800                                 extra = 0;
 801                         } else
 802                                 flags &= ~SLAB_HIGH_PACK;
 803                 }
 804 #endif
 805         } else {
 806                 /* Size is large, assume best to place the slab management obj
 807                  * off-slab (should allow better packing of objs).
 808                  */
 809                 flags |= SLAB_CFLGS_OFF_SLAB;
 810                 if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
 811                     || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
 812                         /* To avoid waste the bufctls are off-slab... */
 813                         flags |= SLAB_CFLGS_BUFCTL;
 814                         extra = 0;
 815                 } /* else slab management is off-slab, but freelist pointers are on. */
 816         }
 817         size += extra;
 818
 819         if (flags & SLAB_HWCACHE_ALIGN) {
 820                 /* Need to adjust size so that objs are cache aligned. */
 821                 if (size > (L1_CACHE_BYTES/2)) {
 822                         size_t words = size % L1_CACHE_BYTES;
 823                         if (words)
 824                                 size += (L1_CACHE_BYTES-words);
 825                 } else {
 826                         /* Small obj size, can get at least two per cache line. */
 827                         int num_per_line = L1_CACHE_BYTES/size;
 828                         left_over = L1_CACHE_BYTES - (num_per_line*size);
 829                         if (left_over) {
 830                                 /* Need to adjust size so objs cache align. */
 831                                 if (left_over%num_per_line) {
 832                                         /* Odd num of objs per line - fixup. */
 833                                         num_per_line--;
 834                                         left_over += size;
 835                                 }
 836                                 size += (left_over/num_per_line);
 837                         }
 838                 }
 839         } else if (!(size%L1_CACHE_BYTES)) {
 840                 /* Size happens to cache align... */
 841                 flags |= SLAB_HWCACHE_ALIGN;
 842                 align = L1_CACHE_BYTES;
 843         }
 844
 845         /* Cal size (in pages) of slabs, and the num of objs per slab.
 846          * This could be made much more intelligent.  For now, try to avoid
 847          * using high page-orders for slabs.  When the gfp() funcs are more
 848          * friendly towards high-order requests, this should be changed.
 849          */
 850         do {
 851                 size_t wastage;
 852                 unsigned int break_flag = 0;
 853 cal_wastage:
 854                 wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
 855                                                flags, &left_over, &cachep->c_num);
 856                 if (!cachep->c_num)
 857                         goto next;
 858                 if (break_flag)
 859                         break;
 860                 if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
 861                         /* Oops, this num of objs will cause problems. */
 862                         cachep->c_gfporder--;
 863                         break_flag++;
 864                         goto cal_wastage;
 865                 }
 866                 if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
 867                         break;
 868
 869                 /* Large num of objs is good, but v. large slabs are currently
 870                  * bad for the gfp()s.
 871                  */
 872                 if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
 873                         if (cachep->c_gfporder < slab_break_gfp_order)
 874                                 goto next;
 875                 }
 876
 877                 /* Stop caches with small objs having a large num of pages. */
 878                 if (left_over <= slab_align_size)
 879                         break;
 880                 if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
 881                         break;  /* Acceptable internal fragmentation. */
 882 next:
 883                 cachep->c_gfporder++;
 884         } while (1);
 885
 886         /* If the slab has been placed off-slab, and we have enough space then
 887          * move it on-slab.  This is at the expense of any extra colouring.
 888          */
 889         if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
 890             left_over >= slab_align_size) {
 891                 flags &= ~SLAB_CFLGS_OFF_SLAB;
 892                 left_over -= slab_align_size;
 893         }
 894
 895         /* Offset must be a multiple of the alignment. */
 896         offset += (align-1);
 897         offset &= ~(align-1);
 898
 899         /* Mess around with the offset alignment. */
 900         if (!left_over) {
 901                 offset = 0;
 902         } else if (left_over < offset) {
 903                 offset = align;
 904                 if (flags & SLAB_HWCACHE_ALIGN) {
 905                         if (left_over < offset)
 906                                 offset = 0;
 907                 } else {
 908                         /* Offset is BYTES_PER_WORD, and left_over is at
 909                          * least BYTES_PER_WORD.
 910                          */
 911                         if (left_over >= (BYTES_PER_WORD*2)) {
 912                                 offset >>= 1;
 913                                 if (left_over >= (BYTES_PER_WORD*4))
 914                                         offset >>= 1;
 915                         }
 916                 }
 917         } else if (!offset) {
 918                 /* No offset requested, but space enough - give one. */
 919                 offset = left_over/align;
 920                 if (flags & SLAB_HWCACHE_ALIGN) {
 921                         if (offset >= 8) {
 922                                 /* A large number of colours - use a larger alignment. */
 923                                 align <<= 1;
 924                         }
 925                 } else {
 926                         if (offset >= 10) {
 927                                 align <<= 1;
 928                                 if (offset >= 16)
 929                                         align <<= 1;
 930                         }
 931                 }
 932                 offset = align;
 933         }
 934
 935 #if     0
 936 printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
 937 #endif
 938
 939         if ((cachep->c_align = (unsigned long) offset))
 940                 cachep->c_colour = (left_over/offset);
 941         cachep->c_colour_next = cachep->c_colour;
 942
 943         /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
 944         if (!SLAB_BUFCTL(flags))
 945                 size -= sizeof(kmem_bufctl_t);
 946         else
 947                 cachep->c_index_cachep =
 948                         kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
 949         cachep->c_offset = (unsigned long) size;
 950         cachep->c_freep = kmem_slab_end(cachep);
 951         cachep->c_firstp = kmem_slab_end(cachep);
 952         cachep->c_lastp = kmem_slab_end(cachep);
 953         cachep->c_flags = flags;
 954         cachep->c_ctor = ctor;
 955         cachep->c_dtor = dtor;
 956         cachep->c_magic = SLAB_C_MAGIC;
 957         cachep->c_name = name;          /* Simply point to the name. */
 958         spin_lock_init(&cachep->c_spinlock);
 959
 960         /* Need the semaphore to access the chain. */
 961         down(&cache_chain_sem);
 962         searchp = &cache_cache;
 963         do {
 964                 /* The name field is constant - no lock needed. */
 965                 if (!strcmp(searchp->c_name, name)) {
 966                         printk("%sDup name - %s\n", func_nm, name);
 967                         break;
 968                 }
 969                 searchp = searchp->c_nextp;
 970         } while (searchp != &cache_cache);
 971
 972         /* There is no reason to lock our new cache before we
 973          * link it in - no one knows about it yet...
 974          */
 975         cachep->c_nextp = cache_cache.c_nextp;
 976         cache_cache.c_nextp = cachep;
 977         up(&cache_chain_sem);
 978 opps:
 979         return cachep;
 980 }
 981
 982 /* Shrink a cache.  Releases as many slabs as possible for a cache.
 983  * It is expected this function will be called by a module when it is
 984  * unloaded.  The cache is _not_ removed, this creates too many problems and
 985  * the cache-structure does not take up much room.  A module should keep its
 986  * cache pointer(s) in unloaded memory, so when reloaded it knows the cache
 987  * is available.  To help debugging, a zero exit status indicates all slabs
 988  * were released.
 989  */
 990 int
 991 kmem_cache_shrink(kmem_cache_t *cachep)
 992 {
 993         kmem_cache_t    *searchp;
 994         kmem_slab_t     *slabp;
 995         int     ret;
 996
 997         if (!cachep) {
 998                 printk(KERN_ERR "kmem_shrink: NULL ptr\n");
 999                 return 2;
1000         }
1001         if (in_interrupt()) {
1002                 printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name);
1003                 return 2;
1004         }
1005
1006         /* Find the cache in the chain of caches. */
1007         down(&cache_chain_sem);         /* Semaphore is needed. */
1008         searchp = &cache_cache;
1009         for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) {
1010                 if (searchp->c_nextp != cachep)
1011                         continue;
1012
1013                 /* Accessing clock_searchp is safe - we hold the mutex. */
1014                 if (cachep == clock_searchp)
1015                         clock_searchp = cachep->c_nextp;
1016                 goto found;
1017         }
1018         up(&cache_chain_sem);
1019         printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep);
1020         return 2;
1021 found:
1022         /* Release the semaphore before getting the cache-lock.  This could
1023          * mean multiple engines are shrinking the cache, but so what.
1024          */
1025         up(&cache_chain_sem);
1026         spin_lock_irq(&cachep->c_spinlock);
1027
1028         /* If the cache is growing, stop shrinking. */
1029         while (!cachep->c_growing) {
1030                 slabp = cachep->c_lastp;
1031                 if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
1032                         break;
1033                 kmem_slab_unlink(slabp);
1034                 spin_unlock_irq(&cachep->c_spinlock);
1035                 kmem_slab_destroy(cachep, slabp);
1036                 spin_lock_irq(&cachep->c_spinlock);
1037         }
1038         ret = 1;
1039         if (cachep->c_lastp == kmem_slab_end(cachep))
1040                 ret--;          /* Cache is empty. */
1041         spin_unlock_irq(&cachep->c_spinlock);
1042         return ret;
1043 }
1044
1045 /* Get the memory for a slab management obj. */
1046 static inline kmem_slab_t *
1047 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
1048 {
1049         kmem_slab_t     *slabp;
1050
1051         if (SLAB_OFF_SLAB(cachep->c_flags)) {
1052                 /* Slab management obj is off-slab. */
1053                 slabp = kmem_cache_alloc(cache_slabp, local_flags);
1054         } else {
1055                 /* Slab management at end of slab memory, placed so that
1056                  * the position is 'coloured'.
1057                  */
1058                 void *end;
1059                 end = objp + (cachep->c_num * cachep->c_offset);
1060                 if (!SLAB_BUFCTL(cachep->c_flags))
1061                         end += (cachep->c_num * sizeof(kmem_bufctl_t));
1062                 slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
1063         }
1064
1065         if (slabp) {
1066                 slabp->s_inuse = 0;
1067                 slabp->s_dma = 0;
1068                 slabp->s_index = NULL;
1069         }
1070
1071         return slabp;
1072 }
1073
1074 static inline void
1075 kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
1076                                 unsigned long ctor_flags)
1077 {
1078         kmem_bufctl_t   **bufpp = &slabp->s_freep;
1079         unsigned long   num = cachep->c_num-1;
1080
1081         do {
1082 #if     SLAB_DEBUG_SUPPORT
1083                 if (cachep->c_flags & SLAB_RED_ZONE) {
1084                         *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1085                         objp += BYTES_PER_WORD;
1086                         *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
1087                 }
1088 #endif  /* SLAB_DEBUG_SUPPORT */
1089
1090                 /* Constructors are not allowed to allocate memory from the same cache
1091                  * which they are a constructor for.  Otherwise, deadlock.
1092                  * They must also be threaded.
1093                  */
1094                 if (cachep->c_ctor)
1095                         cachep->c_ctor(objp, cachep, ctor_flags);
1096 #if     SLAB_DEBUG_SUPPORT
1097                 else if (cachep->c_flags & SLAB_POISON) {
1098                         /* need to poison the objs */
1099                         kmem_poison_obj(cachep, objp);
1100                 }
1101
1102                 if (cachep->c_flags & SLAB_RED_ZONE) {
1103                         if (*((unsigned long*)(objp+cachep->c_org_size)) !=
1104                             SLAB_RED_MAGIC1) {
1105                                 *((unsigned long*)(objp+cachep->c_org_size)) =
1106                                         SLAB_RED_MAGIC1;
1107                                 printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
1108                                        "after constructor - %s\n", cachep->c_name);
1109                         }
1110                         objp -= BYTES_PER_WORD;
1111                         if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
1112                                 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
1113                                 printk(KERN_ERR "kmem_init_obj: Bad front redzone "
1114                                        "after constructor - %s\n", cachep->c_name);
1115                         }
1116                 }
1117 #endif  /* SLAB_DEBUG_SUPPORT */
1118
1119                 objp += cachep->c_offset;
1120                 if (!slabp->s_index) {
1121                         *bufpp = objp;
1122                         objp += sizeof(kmem_bufctl_t);
1123                 } else
1124                         *bufpp = &slabp->s_index[num];
1125                 bufpp = &(*bufpp)->buf_nextp;
1126         } while (num--);
1127
1128         *bufpp = NULL;
1129 }
1130
1131 /* Grow (by 1) the number of slabs within a cache.  This is called by
1132  * kmem_cache_alloc() when there are no active objs left in a cache.
1133  */
1134 static int
1135 kmem_cache_grow(kmem_cache_t * cachep, int flags)
1136 {
1137         kmem_slab_t     *slabp;
1138         struct page     *page;
1139         void            *objp;
1140         size_t           offset;
1141         unsigned int     dma, local_flags;
1142         unsigned long    ctor_flags;
1143         unsigned long    save_flags;
1144
1145         /* Be lazy and only check for valid flags here,
1146          * keeping it out of the critical path in kmem_cache_alloc().
1147          */
1148         if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
1149                 printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
1150                        flags, cachep->c_name);
1151                 flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
1152         }
1153
1154         if (flags & SLAB_NO_GROW)
1155                 return 0;
1156
1157         /* The test for missing atomic flag is performed here, rather than
1158          * the more obvious place, simply to reduce the critical path length
1159          * in kmem_cache_alloc().  If a caller is slightly mis-behaving they
1160          * will eventually be caught here (where it matters).
1161          */
1162         if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
1163                 printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
1164                        cachep->c_name);
1165                 flags &= ~SLAB_LEVEL_MASK;
1166                 flags |= SLAB_ATOMIC;
1167         }
1168         ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1169         local_flags = (flags & SLAB_LEVEL_MASK);
1170         if (local_flags == SLAB_ATOMIC) {
1171                 /* Not allowed to sleep.  Need to tell a constructor about
1172                  * this - it might need to know...
1173                  */
1174                 ctor_flags |= SLAB_CTOR_ATOMIC;
1175         }
1176
1177         /* About to mess with non-constant members - lock. */
1178         spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1179
1180         /* Get colour for the slab, and cal the next value. */
1181         if (!(offset = cachep->c_colour_next--))
1182                 cachep->c_colour_next = cachep->c_colour;
1183         offset *= cachep->c_align;
1184         cachep->c_dflags = SLAB_CFLGS_GROWN;
1185
1186         cachep->c_growing++;
1187         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1188
1189         /* A series of memory allocations for a new slab.
1190          * Neither the cache-chain semaphore, or cache-lock, are
1191          * held, but the incrementing c_growing prevents this
1192          * this cache from being reaped or shrunk.
1193          * Note: The cache could be selected in for reaping in
1194          * kmem_cache_reap(), but when the final test is made the
1195          * growing value will be seen.
1196          */
1197
1198         /* Get mem for the objs. */
1199         if (!(objp = kmem_getpages(cachep, flags, &dma)))
1200                 goto failed;
1201
1202         /* Get slab management. */
1203         if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
1204                 goto opps1;
1205         if (dma)
1206                 slabp->s_dma = 1;
1207         if (SLAB_BUFCTL(cachep->c_flags)) {
1208                 slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
1209                 if (!slabp->s_index)
1210                         goto opps2;
1211         }
1212
1213         /* Nasty!!!!!!  I hope this is OK. */
1214         dma = 1 << cachep->c_gfporder;
1215         page = &mem_map[MAP_NR(objp)];
1216         do {
1217                 SLAB_SET_PAGE_CACHE(page, cachep);
1218                 SLAB_SET_PAGE_SLAB(page, slabp);
1219                 PageSetSlab(page);
1220                 page++;
1221         } while (--dma);
1222
1223         slabp->s_offset = offset;       /* It will fit... */
1224         objp += offset;         /* Address of first object. */
1225         slabp->s_mem = objp;
1226
1227         /* For on-slab bufctls, c_offset is the distance between the start of
1228          * an obj and its related bufctl.  For off-slab bufctls, c_offset is
1229          * the distance between objs in the slab.
1230          */
1231         kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
1232
1233         spin_lock_irq(&cachep->c_spinlock);
1234
1235         /* Make slab active. */
1236         slabp->s_magic = SLAB_MAGIC_ALLOC;
1237         kmem_slab_link_end(cachep, slabp);
1238         if (cachep->c_freep == kmem_slab_end(cachep))
1239                 cachep->c_freep = slabp;
1240         SLAB_STATS_INC_GROWN(cachep);
1241         cachep->c_failures = 0;
1242         cachep->c_growing--;
1243
1244         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1245         return 1;
1246 opps2:
1247         if (SLAB_OFF_SLAB(cachep->c_flags))
1248                 kmem_cache_free(cache_slabp, slabp);
1249 opps1:
1250         kmem_freepages(cachep, objp);
1251 failed:
1252         spin_lock_irq(&cachep->c_spinlock);
1253         cachep->c_growing--;
1254         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1255         return 0;
1256 }
1257
1258 static void
1259 kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
1260 {
1261         if (cachep)
1262                 SLAB_STATS_INC_ERR(cachep);     /* this is atomic */
1263         printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
1264                str, cachep ? cachep->c_name : "unknown");
1265 }
1266
1267 static void
1268 kmem_report_free_err(const char *str, const void *objp, kmem_cache_t * cachep)
1269 {
1270         if (cachep)
1271                 SLAB_STATS_INC_ERR(cachep);
1272         printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
1273                str, objp, cachep ? cachep->c_name : "unknown");
1274 }
1275
1276 /* Search for a slab whose objs are suitable for DMA.
1277  * Note: since testing the first free slab (in __kmem_cache_alloc()),
1278  * ints must not have been enabled, or the cache-lock released!
1279  */
1280 static inline kmem_slab_t *
1281 kmem_cache_search_dma(kmem_cache_t * cachep)
1282 {
1283         kmem_slab_t     *slabp = cachep->c_freep->s_nextp;
1284
1285         for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1286                 if (!(slabp->s_dma))
1287                         continue;
1288                 kmem_slab_unlink(slabp);
1289                 kmem_slab_link_free(cachep, slabp);
1290                 cachep->c_freep = slabp;
1291                 break;
1292         }
1293         return slabp;
1294 }
1295
1296 #if     SLAB_DEBUG_SUPPORT
1297 /* Perform extra freeing checks.  Currently, this check is only for caches
1298  * that use bufctl structures within the slab.  Those which use bufctl's
1299  * from the internal cache have a reasonable check when the address is
1300  * searched for.  Called with the cache-lock held.
1301  */
1302 static void *
1303 kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
1304                        kmem_bufctl_t *bufp, void * objp)
1305 {
1306         if (SLAB_BUFCTL(cachep->c_flags))
1307                 return objp;
1308
1309         /* Check slab's freelist to see if this obj is there. */
1310         for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
1311                 if (search_bufp != bufp)
1312                         continue;
1313                 return NULL;
1314         }
1315         return objp;
1316 }
1317 #endif  /* SLAB_DEBUG_SUPPORT */
1318
1319 /* Called with cache lock held. */
1320 static inline void
1321 kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1322 {
1323         if (slabp->s_nextp->s_inuse) {
1324                 /* Not at correct position. */
1325                 if (cachep->c_freep == slabp)
1326                         cachep->c_freep = slabp->s_nextp;
1327                 kmem_slab_unlink(slabp);
1328                 kmem_slab_link_end(cachep, slabp);
1329         }
1330 }
1331
1332 /* Called with cache lock held. */
1333 static inline void
1334 kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
1335 {
1336         if (slabp->s_nextp->s_inuse == cachep->c_num) {
1337                 kmem_slab_unlink(slabp);
1338                 kmem_slab_link_free(cachep, slabp);
1339         }
1340         cachep->c_freep = slabp;
1341 }
1342
1343 /* Returns a ptr to an obj in the given cache. */
1344 static inline void *
1345 __kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1346 {
1347         kmem_slab_t     *slabp;
1348         kmem_bufctl_t   *bufp;
1349         void            *objp;
1350         unsigned long   save_flags;
1351
1352         /* Sanity check. */
1353         if (!cachep)
1354                 goto nul_ptr;
1355         spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1356 try_again:
1357         /* Get slab alloc is to come from. */
1358         slabp = cachep->c_freep;
1359
1360         /* Magic is a sanity check _and_ says if we need a new slab. */
1361         if (slabp->s_magic != SLAB_MAGIC_ALLOC)
1362                 goto alloc_new_slab;
1363         /* DMA requests are 'rare' - keep out of the critical path. */
1364         if (flags & SLAB_DMA)
1365                 goto search_dma;
1366 try_again_dma:
1367         SLAB_STATS_INC_ALLOCED(cachep);
1368         SLAB_STATS_INC_ACTIVE(cachep);
1369         SLAB_STATS_SET_HIGH(cachep);
1370         slabp->s_inuse++;
1371         bufp = slabp->s_freep;
1372         slabp->s_freep = bufp->buf_nextp;
1373         if (slabp->s_freep) {
1374 ret_obj:
1375                 if (!slabp->s_index) {
1376                         bufp->buf_slabp = slabp;
1377                         objp = ((void*)bufp) - cachep->c_offset;
1378 finished:
1379                         /* The lock is not needed by the red-zone or poison ops, and the
1380                          * obj has been removed from the slab.  Should be safe to drop
1381                          * the lock here.
1382                          */
1383                         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1384 #if     SLAB_DEBUG_SUPPORT
1385                         if (cachep->c_flags & SLAB_RED_ZONE)
1386                                 goto red_zone;
1387 ret_red:
1388                         if ((cachep->c_flags & SLAB_POISON) && kmem_check_poison_obj(cachep, objp))
1389                                 kmem_report_alloc_err("Bad poison", cachep);
1390 #endif  /* SLAB_DEBUG_SUPPORT */
1391                         return objp;
1392                 }
1393                 /* Update index ptr. */
1394                 objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
1395                 bufp->buf_objp = objp;
1396                 goto finished;
1397         }
1398         cachep->c_freep = slabp->s_nextp;
1399         goto ret_obj;
1400
1401 #if     SLAB_DEBUG_SUPPORT
1402 red_zone:
1403         /* Set alloc red-zone, and check old one. */
1404         if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1405                 kmem_report_alloc_err("Bad front redzone", cachep);
1406         objp += BYTES_PER_WORD;
1407         if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
1408                 kmem_report_alloc_err("Bad rear redzone", cachep);
1409         goto ret_red;
1410 #endif  /* SLAB_DEBUG_SUPPORT */
1411
1412 search_dma:
1413         if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
1414                 goto try_again_dma;
1415 alloc_new_slab:
1416         /* Either out of slabs, or magic number corruption. */
1417         if (slabp == kmem_slab_end(cachep)) {
1418                 /* Need a new slab.  Release the lock before calling kmem_cache_grow().
1419                  * This allows objs to be released back into the cache while growing.
1420                  */
1421                 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1422                 if (kmem_cache_grow(cachep, flags)) {
1423                         /* Someone may have stolen our objs.  Doesn't matter, we'll
1424                          * just come back here again.
1425                          */
1426                         spin_lock_irq(&cachep->c_spinlock);
1427                         goto try_again;
1428                 }
1429                 /* Couldn't grow, but some objs may have been freed. */
1430                 spin_lock_irq(&cachep->c_spinlock);
1431                 if (cachep->c_freep != kmem_slab_end(cachep)) {
1432                         if ((flags & SLAB_ATOMIC) == 0)
1433                                 goto try_again;
1434                 }
1435         } else {
1436                 /* Very serious error - maybe panic() here? */
1437                 kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
1438         }
1439         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1440 err_exit:
1441         return NULL;
1442 nul_ptr:
1443         kmem_report_alloc_err("NULL ptr", NULL);
1444         goto err_exit;
1445 }
1446
1447 /* Release an obj back to its cache.  If the obj has a constructed state,
1448  * it should be in this state _before_ it is released.
1449  */
1450 static inline void
1451 __kmem_cache_free(kmem_cache_t *cachep, const void *objp)
1452 {
1453         kmem_slab_t     *slabp;
1454         kmem_bufctl_t   *bufp;
1455         unsigned long   save_flags;
1456
1457         /* Basic sanity checks. */
1458         if (!cachep || !objp)
1459                 goto null_addr;
1460
1461 #if     SLAB_DEBUG_SUPPORT
1462         /* A verify func is called without the cache-lock held. */
1463         if (cachep->c_flags & SLAB_DEBUG_INITIAL)
1464                 goto init_state_check;
1465 finished_initial:
1466
1467         if (cachep->c_flags & SLAB_RED_ZONE)
1468                 goto red_zone;
1469 return_red:
1470 #endif  /* SLAB_DEBUG_SUPPORT */
1471
1472         spin_lock_irqsave(&cachep->c_spinlock, save_flags);
1473
1474         if (SLAB_BUFCTL(cachep->c_flags))
1475                 goto bufctl;
1476         bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
1477
1478         /* Get slab for the object. */
1479 #if     0
1480         /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
1481          * Is this worth while? XXX
1482          */
1483         if (cachep->c_flags & SLAB_HIGH_PACK)
1484                 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
1485         else
1486 #endif
1487                 slabp = bufp->buf_slabp;
1488
1489 check_magic:
1490         if (slabp->s_magic != SLAB_MAGIC_ALLOC)         /* Sanity check. */
1491                 goto bad_slab;
1492
1493 #if     SLAB_DEBUG_SUPPORT
1494         if (cachep->c_flags & SLAB_DEBUG_FREE)
1495                 goto extra_checks;
1496 passed_extra:
1497 #endif  /* SLAB_DEBUG_SUPPORT */
1498
1499         if (slabp->s_inuse) {           /* Sanity check. */
1500                 SLAB_STATS_DEC_ACTIVE(cachep);
1501                 slabp->s_inuse--;
1502                 bufp->buf_nextp = slabp->s_freep;
1503                 slabp->s_freep = bufp;
1504                 if (bufp->buf_nextp) {
1505                         if (slabp->s_inuse) {
1506                                 /* (hopefully) The most common case. */
1507 finished:
1508 #if     SLAB_DEBUG_SUPPORT
1509                                 if (cachep->c_flags & SLAB_POISON) {
1510                                         if (cachep->c_flags & SLAB_RED_ZONE)
1511                                                 objp += BYTES_PER_WORD;
1512                                         kmem_poison_obj(cachep, objp);
1513                                 }
1514 #endif  /* SLAB_DEBUG_SUPPORT */
1515                                 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1516                                 return;
1517                         }
1518                         kmem_cache_full_free(cachep, slabp);
1519                         goto finished;
1520                 }
1521                 kmem_cache_one_free(cachep, slabp);
1522                 goto finished;
1523         }
1524
1525         /* Don't add to freelist. */
1526         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1527         kmem_report_free_err("free with no active objs", objp, cachep);
1528         return;
1529 bufctl:
1530         /* No 'extra' checks are performed for objs stored this way, finding
1531          * the obj is check enough.
1532          */
1533         slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
1534         bufp =  &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
1535         if (bufp->buf_objp == objp)
1536                 goto check_magic;
1537         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1538         kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
1539         return;
1540 #if     SLAB_DEBUG_SUPPORT
1541 init_state_check:
1542         /* Need to call the slab's constructor so the
1543          * caller can perform a verify of its state (debugging).
1544          */
1545         cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1546         goto finished_initial;
1547 extra_checks:
1548         if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
1549                 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1550                 kmem_report_free_err("Double free detected during checks", objp, cachep);
1551                 return;
1552         }
1553         goto passed_extra;
1554 red_zone:
1555         /* We do not hold the cache-lock while checking the red-zone.
1556          */
1557         objp -= BYTES_PER_WORD;
1558         if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1559                 /* Either write before start of obj, or a double free. */
1560                 kmem_report_free_err("Bad front redzone", objp, cachep);
1561         }
1562         if (xchg((unsigned long *)(objp+cachep->c_org_size+BYTES_PER_WORD), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
1563                 /* Either write past end of obj, or a double free. */
1564                 kmem_report_free_err("Bad rear redzone", objp, cachep);
1565         }
1566         goto return_red;
1567 #endif  /* SLAB_DEBUG_SUPPORT */
1568
1569 bad_slab:
1570         /* Slab doesn't contain the correct magic num. */
1571         if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
1572                 /* Magic num says this is a destroyed slab. */
1573                 kmem_report_free_err("free from inactive slab", objp, cachep);
1574         } else
1575                 kmem_report_free_err("Bad obj addr", objp, cachep);
1576         spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1577
1578 #if 1
1579 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1580         BUG();
1581 #endif
1582
1583         return;
1584 null_addr:
1585         kmem_report_free_err("NULL ptr", objp, cachep);
1586         return;
1587 }
1588
1589 void *
1590 kmem_cache_alloc(kmem_cache_t *cachep, int flags)
1591 {
1592         return __kmem_cache_alloc(cachep, flags);
1593 }
1594
1595 void
1596 kmem_cache_free(kmem_cache_t *cachep, void *objp)
1597 {
1598         __kmem_cache_free(cachep, objp);
1599 }
1600
1601 void *
1602 kmalloc(size_t size, int flags)
1603 {
1604         cache_sizes_t   *csizep = cache_sizes;
1605
1606         for (; csizep->cs_size; csizep++) {
1607                 if (size > csizep->cs_size)
1608                         continue;
1609                 return __kmem_cache_alloc(csizep->cs_cachep, flags);
1610         }
1611         printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
1612         return NULL;
1613 }
1614
1615 void
1616 kfree(const void *objp)
1617 {
1618         struct page *page;
1619         int     nr;
1620
1621         if (!objp)
1622                 goto null_ptr;
1623         nr = MAP_NR(objp);
1624         if (nr >= max_mapnr)
1625                 goto bad_ptr;
1626
1627         /* Assume we own the page structure - hence no locking.
1628          * If someone is misbehaving (for example, calling us with a bad
1629          * address), then access to the page structure can race with the
1630          * kmem_slab_destroy() code.  Need to add a spin_lock to each page
1631          * structure, which would be useful in threading the gfp() functions....
1632          */
1633         page = &mem_map[nr];
1634         if (PageSlab(page)) {
1635                 kmem_cache_t    *cachep;
1636
1637                 /* Here, we again assume the obj address is good.
1638                  * If it isn't, and happens to map onto another
1639                  * general cache page which has no active objs, then
1640                  * we race.
1641                  */
1642                 cachep = SLAB_GET_PAGE_CACHE(page);
1643                 if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
1644                         __kmem_cache_free(cachep, objp);
1645                         return;
1646                 }
1647         }
1648 bad_ptr:
1649         printk(KERN_ERR "kfree: Bad obj %p\n", objp);
1650
1651 #if 1
1652 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1653 *(int *) 0 = 0;
1654 #endif
1655
1656 null_ptr:
1657         return;
1658 }
1659
1660 void
1661 kfree_s(const void *objp, size_t size)
1662 {
1663         struct page *page;
1664         int     nr;
1665
1666         if (!objp)
1667                 goto null_ptr;
1668         nr = MAP_NR(objp);
1669         if (nr >= max_mapnr)
1670                 goto null_ptr;
1671         /* See comment in kfree() */
1672         page = &mem_map[nr];
1673         if (PageSlab(page)) {
1674                 kmem_cache_t    *cachep;
1675                 /* See comment in kfree() */
1676                 cachep = SLAB_GET_PAGE_CACHE(page);
1677                 if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
1678                         if (size <= cachep->c_org_size) {       /* XXX better check */
1679                                 __kmem_cache_free(cachep, objp);
1680                                 return;
1681                         }
1682                 }
1683         }
1684 null_ptr:
1685         printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
1686         return;
1687 }
1688
1689 kmem_cache_t *
1690 kmem_find_general_cachep(size_t size)
1691 {
1692         cache_sizes_t   *csizep = cache_sizes;
1693
1694         /* This function could be moved to the header file, and
1695          * made inline so consumers can quickly determine what
1696          * cache pointer they require.
1697          */
1698         for (; csizep->cs_size; csizep++) {
1699                 if (size > csizep->cs_size)
1700                         continue;
1701                 break;
1702         }
1703         return csizep->cs_cachep;
1704 }
1705
1706
1707 /* Called from try_to_free_page().
1708  * This function _cannot_ be called within a int, but it
1709  * can be interrupted.
1710  */
1711 void
1712 kmem_cache_reap(int gfp_mask)
1713 {
1714         kmem_slab_t     *slabp;
1715         kmem_cache_t    *searchp;
1716         kmem_cache_t    *best_cachep;
1717         unsigned int     scan;
1718         unsigned int     reap_level;
1719
1720         if (in_interrupt()) {
1721                 printk("kmem_cache_reap() called within int!\n");
1722                 return;
1723         }
1724
1725         /* We really need a test semaphore op so we can avoid sleeping when
1726          * !wait is true.
1727          */
1728         down(&cache_chain_sem);
1729
1730         scan = 10;
1731         reap_level = 0;
1732
1733         best_cachep = NULL;
1734         searchp = clock_searchp;
1735         do {
1736                 unsigned int    full_free;
1737                 unsigned int    dma_flag;
1738
1739                 /* It's safe to test this without holding the cache-lock. */
1740                 if (searchp->c_flags & SLAB_NO_REAP)
1741                         goto next;
1742                 spin_lock_irq(&searchp->c_spinlock);
1743                 if (searchp->c_growing)
1744                         goto next_unlock;
1745                 if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
1746                         searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
1747                         goto next_unlock;
1748                 }
1749                 /* Sanity check for corruption of static values. */
1750                 if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
1751                         spin_unlock_irq(&searchp->c_spinlock);
1752                         printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
1753                         goto next;
1754                 }
1755                 dma_flag = 0;
1756                 full_free = 0;
1757
1758                 /* Count the fully free slabs.  There should not be not many,
1759                  * since we are holding the cache lock.
1760                  */
1761                 slabp = searchp->c_lastp;
1762                 while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
1763                         slabp = slabp->s_prevp;
1764                         full_free++;
1765                         if (slabp->s_dma)
1766                                 dma_flag++;
1767                 }
1768                 spin_unlock_irq(&searchp->c_spinlock);
1769
1770                 if ((gfp_mask & GFP_DMA) && !dma_flag)
1771                         goto next;
1772
1773                 if (full_free) {
1774                         if (full_free >= 10) {
1775                                 best_cachep = searchp;
1776                                 break;
1777                         }
1778
1779                         /* Try to avoid slabs with constructors and/or
1780                          * more than one page per slab (as it can be difficult
1781                          * to get high orders from gfp()).
1782                          */
1783                         if (full_free >= reap_level) {
1784                                 reap_level = full_free;
1785                                 best_cachep = searchp;
1786                         }
1787                 }
1788                 goto next;
1789 next_unlock:
1790                 spin_unlock_irq(&searchp->c_spinlock);
1791 next:
1792                 searchp = searchp->c_nextp;
1793         } while (--scan && searchp != clock_searchp);
1794
1795         clock_searchp = searchp;
1796         up(&cache_chain_sem);
1797
1798         if (!best_cachep) {
1799                 /* couldn't find anything to reap */
1800                 return;
1801         }
1802
1803         spin_lock_irq(&best_cachep->c_spinlock);
1804         while (!best_cachep->c_growing &&
1805                !(slabp = best_cachep->c_lastp)->s_inuse &&
1806                slabp != kmem_slab_end(best_cachep)) {
1807                 if (gfp_mask & GFP_DMA) {
1808                         do {
1809                                 if (slabp->s_dma)
1810                                         goto good_dma;
1811                                 slabp = slabp->s_prevp;
1812                         } while (!slabp->s_inuse && slabp != kmem_slab_end(best_cachep));
1813
1814                         /* Didn't found a DMA slab (there was a free one -
1815                          * must have been become active).
1816                          */
1817                         goto dma_fail;
1818 good_dma:
1819                 }
1820                 if (slabp == best_cachep->c_freep)
1821                         best_cachep->c_freep = slabp->s_nextp;
1822                 kmem_slab_unlink(slabp);
1823                 SLAB_STATS_INC_REAPED(best_cachep);
1824
1825                 /* Safe to drop the lock.  The slab is no longer linked to the
1826                  * cache.
1827                  */
1828                 spin_unlock_irq(&best_cachep->c_spinlock);
1829                 kmem_slab_destroy(best_cachep, slabp);
1830                 spin_lock_irq(&best_cachep->c_spinlock);
1831         }
1832 dma_fail:
1833         spin_unlock_irq(&best_cachep->c_spinlock);
1834         return;
1835 }
1836
1837 #if     SLAB_SELFTEST
1838 /* A few v. simple tests */
1839 static void
1840 kmem_self_test(void)
1841 {
1842         kmem_cache_t    *test_cachep;
1843
1844         printk(KERN_INFO "kmem_test() - start\n");
1845         test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISON, NULL, NULL);
1846         if (test_cachep) {
1847                 char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1848                 if (objp) {
1849                         /* Write in front and past end, red-zone test. */
1850                         *(objp-1) = 1;
1851                         *(objp+16) = 1;
1852                         kmem_cache_free(test_cachep, objp);
1853
1854                         /* Mess up poisoning. */
1855                         *objp = 10;
1856                         objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
1857                         kmem_cache_free(test_cachep, objp);
1858
1859                         /* Mess up poisoning (again). */
1860                         *objp = 10;
1861                         kmem_cache_shrink(test_cachep);
1862                 }
1863         }
1864         printk(KERN_INFO "kmem_test() - finished\n");
1865 }
1866 #endif  /* SLAB_SELFTEST */
1867
1868 #if     defined(CONFIG_PROC_FS)
1869 /* /proc/slabinfo
1870  * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
1871  */
1872 int
1873 get_slabinfo(char *buf)
1874 {
1875         kmem_cache_t    *cachep;
1876         kmem_slab_t     *slabp;
1877         unsigned long   active_objs;
1878         unsigned long   save_flags;
1879         unsigned long   num_slabs;
1880         unsigned long   num_objs;
1881         int             len=0;
1882 #if     SLAB_STATS
1883         unsigned long   active_slabs;
1884 #endif  /* SLAB_STATS */
1885
1886         __save_flags(save_flags);
1887
1888         /* Output format version, so at least we can change it without _too_
1889          * many complaints.
1890          */
1891 #if     SLAB_STATS
1892         len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
1893 #else
1894         len = sprintf(buf, "slabinfo - version: 1.0\n");
1895 #endif  /* SLAB_STATS */
1896         down(&cache_chain_sem);
1897         cachep = &cache_cache;
1898         do {
1899 #if     SLAB_STATS
1900                 active_slabs = 0;
1901 #endif  /* SLAB_STATS */
1902                 num_slabs = active_objs = 0;
1903                 spin_lock_irq(&cachep->c_spinlock);
1904                 for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
1905                         active_objs += slabp->s_inuse;
1906                         num_slabs++;
1907 #if     SLAB_STATS
1908                         if (slabp->s_inuse)
1909                                 active_slabs++;
1910 #endif  /* SLAB_STATS */
1911                 }
1912                 num_objs = cachep->c_num*num_slabs;
1913 #if     SLAB_STATS
1914                 {
1915                 unsigned long errors;
1916                 unsigned long high = cachep->c_high_mark;
1917                 unsigned long grown = cachep->c_grown;
1918                 unsigned long reaped = cachep->c_reaped;
1919                 unsigned long allocs = cachep->c_num_allocations;
1920                 errors = (unsigned long) atomic_read(&cachep->c_errors);
1921                 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1922                 len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
1923                                 cachep->c_name, active_objs, num_objs, active_slabs, num_slabs,
1924                                 (1<<cachep->c_gfporder)*num_slabs,
1925                                 high, allocs, grown, reaped, errors);
1926                 }
1927 #else
1928                 spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
1929                 len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs);
1930 #endif  /* SLAB_STATS */
1931         } while ((cachep = cachep->c_nextp) != &cache_cache);
1932         up(&cache_chain_sem);
1933
1934         return len;
1935 }
1936 #endif  /* CONFIG_PROC_FS */