libbase/jemalloc.c

   1 /*-
   2  * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Cache line sharing between arenas is avoided for internal data
  39  *     structures.
  40  *
  41  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  42  *     rather than as individual pages.  This provides a constant-time
  43  *     mechanism for associating allocations with particular arenas.
  44  *
  45  * Allocation requests are rounded up to the nearest size class, and no record
  46  * of the original request size is maintained.  Allocations are broken into
  47  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  48  * and a 16 byte quantum, the size classes in each category are as follows:
  49  *
  50  *   |=====================================|
  51  *   | Category | Subcategory    |    Size |
  52  *   |=====================================|
  53  *   | Small    | Tiny           |       2 |
  54  *   |          |                |       4 |
  55  *   |          |                |       8 |
  56  *   |          |----------------+---------|
  57  *   |          | Quantum-spaced |      16 |
  58  *   |          |                |      32 |
  59  *   |          |                |      48 |
  60  *   |          |                |     ... |
  61  *   |          |                |     480 |
  62  *   |          |                |     496 |
  63  *   |          |                |     512 |
  64  *   |          |----------------+---------|
  65  *   |          | Sub-page       |    1 kB |
  66  *   |          |                |    2 kB |
  67  *   |=====================================|
  68  *   | Large                     |    4 kB |
  69  *   |                           |    8 kB |
  70  *   |                           |   12 kB |
  71  *   |                           |     ... |
  72  *   |                           | 1012 kB |
  73  *   |                           | 1016 kB |
  74  *   |                           | 1020 kB |
  75  *   |=====================================|
  76  *   | Huge                      |    1 MB |
  77  *   |                           |    2 MB |
  78  *   |                           |    3 MB |
  79  *   |                           |     ... |
  80  *   |=====================================|
  81  *
  82  * A different mechanism is used for each category:
  83  *
  84  *   Small : Each size class is segregated into its own set of runs.  Each run
  85  *           maintains a bitmap of which regions are free/allocated.
  86  *
  87  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  88  *           in the associated arena chunk header maps.
  89  *
  90  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  91  *          Metadata are stored in a separate red-black tree.
  92  *
  93  *******************************************************************************
  94  */
  95
  96 /*
  97  * This has been hacked on heavily by Rob Savoye, so it compiles
  98  * within Gnash, using the same configuration settings as
  99  * everything else in Gnash.
 100  */
 101 #ifdef HAVE_CONFIG_H
 102 # include "gnashconfig.h"
 103 #endif
 104
 105 #include "dsodefs.h"
 106
 107 /*
 108  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 109  * defaults the A and J runtime options to off.  These settings are appropriate
 110  * for production systems.
 111  */
 112 #ifndef USE_STATS_MEMORY
 113 # define MALLOC_PRODUCTION 1
 114 #endif
 115
 116 #ifndef MALLOC_PRODUCTION
 117    /*
 118     * MALLOC_DEBUG enables assertions and other sanity checks, and disables
 119     * inline functions.
 120     */
 121 #  define MALLOC_DEBUG 1
 122
 123    /* MALLOC_STATS enables statistics calculation. */
 124 #  define MALLOC_STATS 1
 125
 126    /* Memory filling (junk/zero). */
 127 #  define MALLOC_FILL 1
 128
 129    /* Allocation tracing. */
 130 #  define MALLOC_UTRACE 1
 131
 132    /* Support optional abort() on OOM. */
 133 #  define MALLOC_XMALLOC 1
 134
 135    /* Support SYSV semantics. */
 136 #  define MALLOC_SYSV 1
 137 #endif
 138
 139 /*
 140  * MALLOC_LAZY_FREE enables the use of a per-thread vector of slots that free()
 141  * can atomically stuff object pointers into.  This can reduce arena lock
 142  * contention.
 143  */
 144 /* #define      MALLOC_LAZY_FREE 1 */
 145
 146 /*
 147  * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
 148  * re-balances arena load if exponentially averaged contention exceeds a
 149  * certain threshold.
 150  */
 151 /* #define      MALLOC_BALANCE 1 */
 152
 153 /*
 154  * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
 155  * segment (DSS).  In an ideal world, this functionality would be completely
 156  * unnecessary, but we are burdened by history and the lack of resource limits
 157  * for anonymous mapped memory.
 158  */
 159 #if (!defined(DARWIN) && !defined(WIN32))
 160 # define MALLOC_DSS
 161 #endif
 162
 163 #ifdef LINUX_HOST
 164 # define        _GNU_SOURCE /* For mremap(2). */
 165 # define        issetugid() 0
 166 # if 0 /* Enable in order to test decommit code on Linux. */
 167 #  define MALLOC_DECOMMIT 1
 168 /*
 169  * The decommit code for Unix doesn't bother to make sure deallocated DSS
 170  * chunks are writable.
 171  */
 172 # undef MALLOC_DSS
 173 # endif
 174 #endif
 175
 176 #include <sys/types.h>
 177
 178 #include <errno.h>
 179 #include <limits.h>
 180 #include <stdarg.h>
 181 #include <stdio.h>
 182 #include <stdlib.h>
 183 #include <string.h>
 184
 185 #ifdef WIN32
 186 # include <cruntime.h>
 187 # include <internal.h>
 188 # include <windows.h>
 189 # include <io.h>
 190 # include "jemtree.h"
 191
 192 # pragma warning( disable: 4267 4996 4146 )
 193
 194 # define        bool BOOL
 195 # define        false FALSE
 196 # define        true TRUE
 197 # define        inline __inline
 198 # define        SIZE_T_MAX SIZE_MAX
 199 # define        STDERR_FILENO 2
 200 # define        PATH_MAX MAX_PATH
 201 # define        vsnprintf _vsnprintf
 202 # define        assert(f) /* we can't assert in the CRT */
 203
 204 static unsigned long tlsIndex = 0xffffffff;
 205
 206 # define        __thread
 207 # define        _pthread_self() __threadid()
 208 # define        issetugid() 0
 209
 210 /* use MSVC intrinsics */
 211 # pragma intrinsic(_BitScanForward)
 212 static __forceinline int
 213 ffs(int x)
 214 {
 215         unsigned long i;
 216
 217         if (_BitScanForward(&i, x) != 0)
 218                 return (i + 1);
 219
 220         return (0);
 221 }
 222
 223 typedef unsigned char uint8_t;
 224 typedef unsigned uint32_t;
 225 typedef unsigned long long uint64_t;
 226 typedef unsigned long long uintmax_t;
 227
 228 # define        MALLOC_DECOMMIT
 229 #endif  /* end of WIN32 */
 230
 231 #ifdef HAVE_PTHREADS
 232 # include <pthread.h>
 233 #endif
 234
 235 #ifndef WIN32
 236 # include <sys/cdefs.h>
 237 # ifndef __DECONST
 238 #  define __DECONST(type, var)  ((type)(uintptr_t)(const void *)(var))
 239 # endif
 240 # include <sys/mman.h>
 241 # ifndef MADV_FREE
 242 #  define MADV_FREE     MADV_DONTNEED
 243 # endif
 244 # include <sys/param.h>
 245 # include <sys/time.h>
 246 # include <sys/types.h>
 247 # include <sys/sysctl.h>
 248 # include "jemtree.h"
 249 # include <sys/uio.h>
 250
 251 # include <errno.h>
 252 # include <limits.h>
 253 # ifndef SIZE_T_MAX
 254 #  define SIZE_T_MAX    SIZE_MAX
 255 # endif
 256 # if defined(DARWIN) || defined(LINUX_HOST)
 257 #  define _pthread_self pthread_self
 258 #  define _pthread_mutex_init pthread_mutex_init
 259 #  define _pthread_mutex_trylock pthread_mutex_trylock
 260 #  define _pthread_mutex_lock pthread_mutex_lock
 261 #  define _pthread_mutex_unlock pthread_mutex_unlock
 262 # endif
 263 # include <sched.h>
 264 # include <stdarg.h>
 265 # include <stdbool.h>
 266 # include <stdio.h>
 267 # include <stdint.h>
 268 # include <stdlib.h>
 269 # include <string.h>
 270 # ifndef DARWIN
 271 #  include <strings.h>
 272 # endif
 273 # include <unistd.h>
 274
 275 # ifdef DARWIN
 276 #  include <libkern/OSAtomic.h>
 277 #  include <mach/mach_error.h>
 278 #  include <mach/mach_init.h>
 279 #  include <mach/vm_map.h>
 280 #  include <malloc/malloc.h>
 281 # endif  /* end of DARWIN */
 282 #endif   /* end of WIN32 */
 283
 284
 285 #ifdef DARWIN
 286 DSOEXPORT void *moz_memalign(size_t alignment, size_t size);
 287 DSOEXPORT size_t moz_malloc_usable_size(const void *ptr)
 288 DSOEXPORT void *moz_calloc(size_t num, size_t size);
 289 DSOEXPORT void *moz_realloc(void *ptr, size_t size);
 290 DSOEXPORT void *moz_valloc(size_t size);
 291 DSOEXPORT size_t moz_malloc_usable_size(const void *ptr);
 292 #else
 293 DSOEXPORT void *memalign(size_t alignment, size_t size);
 294 DSOEXPORT size_t malloc_usable_size(const void *ptr);
 295 DSOEXPORT void *calloc(size_t num, size_t size);
 296 DSOEXPORT void *realloc(void *ptr, size_t size);
 297 DSOEXPORT void *valloc(size_t size);
 298 DSOEXPORT size_t malloc_usable_size(const void *ptr);
 299 #endif
 300 void _malloc_postfork(void);
 301 void _malloc_prefork(void);
 302
 303 #ifdef DARWIN
 304 static const bool g_isthreaded = true;
 305 #endif
 306
 307 #define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
 308
 309 #ifdef MALLOC_DEBUG
 310 # ifdef NDEBUG
 311 #  undef NDEBUG
 312 # endif
 313 #else
 314 # ifndef NDEBUG
 315 #  define NDEBUG
 316 # endif
 317 #endif
 318 #ifndef WIN32
 319 # include <assert.h>
 320 #endif
 321
 322 #ifdef MALLOC_DEBUG
 323 /* Disable inlining to make debugging easier. */
 324 # ifdef inline
 325 #  undef inline
 326 # endif
 327
 328 # define inline
 329 #endif  /* end of MALLOC_DEBUG */
 330
 331 /* Size of stack-allocated buffer passed to strerror_r(). */
 332 #define STRERROR_BUF            64
 333
 334 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 335 #define QUANTUM_2POW_MIN      4
 336 #ifdef MOZ_MEMORY_SIZEOF_PTR_2POW
 337 # define SIZEOF_PTR_2POW                MOZ_MEMORY_SIZEOF_PTR_2POW
 338 #else
 339 # define SIZEOF_PTR_2POW       2
 340 #endif
 341 // __as __isthreaded is already defined on FreeBSD with a different value that does
 342 // the same thing, rename our version to be unique. Althought jemalloc is the default
 343 // allocator on FreeBSD< we still want to use our own version, as it has additional Gnash
 344 // specific tweaks.
 345 #ifndef DARWIN
 346 static const bool g_isthreaded = true;
 347 #else
 348 # define NO_TLS
 349 #endif
 350 #if 0
 351 #ifdef __i386__
 352 #  define QUANTUM_2POW_MIN      4
 353 #  define SIZEOF_PTR_2POW       2
 354 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 355 #endif
 356 #ifdef __ia64__
 357 #  define QUANTUM_2POW_MIN      4
 358 #  define SIZEOF_PTR_2POW       3
 359 #endif
 360 #ifdef __alpha__
 361 #  define QUANTUM_2POW_MIN      4
 362 #  define SIZEOF_PTR_2POW       3
 363 #  define NO_TLS
 364 #endif
 365 #ifdef __sparc64__
 366 #  define QUANTUM_2POW_MIN      4
 367 #  define SIZEOF_PTR_2POW       3
 368 #  define NO_TLS
 369 #endif
 370 #ifdef __amd64__
 371 #  define QUANTUM_2POW_MIN      4
 372 #  define SIZEOF_PTR_2POW       3
 373 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 374 #endif
 375 #ifdef __arm__
 376 #  define QUANTUM_2POW_MIN      3
 377 #  define SIZEOF_PTR_2POW       2
 378 #  define NO_TLS
 379 #endif
 380 #ifdef __powerpc__
 381 #  define QUANTUM_2POW_MIN      4
 382 #  define SIZEOF_PTR_2POW       2
 383 #endif
 384 #endif
 385
 386 #define SIZEOF_PTR              (1U << SIZEOF_PTR_2POW)
 387
 388 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
 389 #ifndef SIZEOF_INT_2POW
 390 # define SIZEOF_INT_2POW        2
 391 #endif
 392
 393 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 394 #if (!defined(PIC) && !defined(NO_TLS))
 395 # define NO_TLS
 396 #endif
 397
 398 #ifdef NO_TLS
 399    /* MALLOC_BALANCE requires TLS. */
 400 #  ifdef MALLOC_BALANCE
 401 #    undef MALLOC_BALANCE
 402 #  endif
 403    /* MALLOC_LAZY_FREE requires TLS. */
 404 #  ifdef MALLOC_LAZY_FREE
 405 #    undef MALLOC_LAZY_FREE
 406 #  endif
 407 #endif
 408
 409 /*
 410  * Size and alignment of memory chunks that are allocated by the OS's virtual
 411  * memory system.
 412  */
 413 #define CHUNK_2POW_DEFAULT      20
 414
 415 /* Maximum number of dirty pages per arena. */
 416 #define DIRTY_MAX_DEFAULT       (1U << 9)
 417
 418 /*
 419  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 420  * so over-estimates are okay (up to a point), but under-estimates will
 421  * negatively affect performance.
 422  */
 423 #define CACHELINE_2POW          6
 424 #define CACHELINE               ((size_t)(1U << CACHELINE_2POW))
 425
 426 /* Smallest size class to support. */
 427 #define TINY_MIN_2POW           1
 428
 429 /*
 430  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 431  * a power of 2.  Above this size, allocations are rounded up to the nearest
 432  * power of 2.
 433  */
 434 #define SMALL_MAX_2POW_DEFAULT  9
 435 #define SMALL_MAX_DEFAULT       (1U << SMALL_MAX_2POW_DEFAULT)
 436
 437 /*
 438  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 439  * as small as possible such that this setting is still honored, without
 440  * violating other constraints.  The goal is to make runs as small as possible
 441  * without exceeding a per run external fragmentation threshold.
 442  *
 443  * We use binary fixed point math for overhead computations, where the binary
 444  * point is implicitly RUN_BFP bits to the left.
 445  *
 446  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 447  * honored for some/all object sizes, since there is one bit of header overhead
 448  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 449  * that are so small that the per-region overhead is greater than:
 450  *
 451  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 452  */
 453 #define RUN_BFP                 12
 454 /*                                    \/   Implicit binary fixed point. */
 455 #define RUN_MAX_OVRHD           0x0000003dU
 456 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 457
 458 /*
 459  * Put a cap on small object run size.  This overrides RUN_MAX_OVRHD.  Note
 460  * that small runs must be small enough that page offsets can fit within the
 461  * CHUNK_MAP_POS_MASK bits.
 462  */
 463 #define RUN_MAX_SMALL_2POW      15
 464 #define RUN_MAX_SMALL           (1U << RUN_MAX_SMALL_2POW)
 465
 466 #ifdef MALLOC_LAZY_FREE
 467    /* Default size of each arena's lazy free cache. */
 468 #  define LAZY_FREE_2POW_DEFAULT 8
 469    /*
 470     * Number of pseudo-random probes to conduct before considering the cache to
 471     * be overly full.  It takes on average n probes to detect fullness of
 472     * (n-1)/n.  However, we are effectively doing multiple non-independent
 473     * trials (each deallocation is a trial), so the actual average threshold
 474     * for clearing the cache is somewhat lower.
 475     */
 476 #  define LAZY_FREE_NPROBES     5
 477 #endif
 478
 479 /*
 480  * Hyper-threaded CPUs may need a special instruction inside spin loops in
 481  * order to yield to another virtual CPU.  If no such instruction is defined
 482  * above, make CPU_SPINWAIT a no-op.
 483  */
 484 #ifndef CPU_SPINWAIT
 485 #  define CPU_SPINWAIT
 486 #endif
 487
 488 /*
 489  * Adaptive spinning must eventually switch to blocking, in order to avoid the
 490  * potential for priority inversion deadlock.  Backing off past a certain point
 491  * can actually waste time.
 492  */
 493 #define SPIN_LIMIT_2POW         11
 494
 495 /*
 496  * Conversion from spinning to blocking is expensive; we use (1U <<
 497  * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
 498  * worst-case spinning.
 499  */
 500 #define BLOCK_COST_2POW         4
 501
 502 #ifdef MALLOC_BALANCE
 503    /*
 504     * We use an exponential moving average to track recent lock contention,
 505     * where the size of the history window is N, and alpha=2/(N+1).
 506     *
 507     * Due to integer math rounding, very small values here can cause
 508     * substantial degradation in accuracy, thus making the moving average decay
 509     * faster than it would with precise calculation.
 510     */
 511 #  define BALANCE_ALPHA_INV_2POW        9
 512
 513    /*
 514     * Threshold value for the exponential moving contention average at which to
 515     * re-assign a thread.
 516     */
 517 #  define BALANCE_THRESHOLD_DEFAULT     (1U << (SPIN_LIMIT_2POW-4))
 518 #endif
 519
 520 /******************************************************************************/
 521
 522 /*
 523  * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 524  * places, because they require malloc()ed memory, which causes bootstrapping
 525  * issues in some cases.
 526  */
 527 #if defined(WIN32)
 528 #define malloc_mutex_t CRITICAL_SECTION
 529 #define malloc_spinlock_t CRITICAL_SECTION
 530 #elif defined(DARWIN)
 531 typedef struct {
 532         OSSpinLock      lock;
 533 } malloc_mutex_t;
 534 typedef struct {
 535         OSSpinLock      lock;
 536 } malloc_spinlock_t;
 537 #elif defined(USE_JEMALLOC)
 538 typedef pthread_mutex_t malloc_mutex_t;
 539 typedef pthread_mutex_t malloc_spinlock_t;
 540 #else
 541 /* XXX these should #ifdef these for freebsd (and linux?) only */
 542 typedef struct {
 543         spinlock_t      lock;
 544 } malloc_mutex_t;
 545 typedef malloc_spinlock_t malloc_mutex_t;
 546 #endif
 547
 548 /* Set to true once the allocator has been initialized. */
 549 static bool malloc_initialized = false;
 550
 551 #ifdef WIN32
 552 /* No init lock for Windows. */
 553 #elif defined(DARWIN)
 554 static malloc_mutex_t init_lock = {OS_SPINLOCK_INIT};
 555 #elif defined(LINUX_HOST)
 556 static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
 557 #elif defined(USE_JEMALLOC)
 558 static malloc_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
 559 #else
 560 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 561 #endif
 562
 563 /******************************************************************************/
 564 /*
 565  * Statistics data structures.
 566  */
 567
 568 #ifdef MALLOC_STATS
 569
 570 /* Borrowed from malloc.h, as this is Linux specific. This has been
 571  * added to jemalloc so the existing memory profiling in Gnash will
 572  * continue to work. Most of these fields aren't used by the Gnash
 573  * memory profiling, but we leave them here for a semblance of
 574  * portability. The only fields Gnash uses are arena, uordblks. and
 575  * fordblks.
 576  *
 577  * This gets more interesting, as jemalloc maintains multiple
 578  * arenas, one for each CPU in a multiprocessor system. We cheat
 579  * by just accumulating the stats for all arenas, since our primary
 580  * purpose is to track memory leaks.
 581  */
 582 struct mallinfo {
 583   int arena;    /* non-mmapped space allocated from system */
 584   int ordblks;  /* number of free chunks UNUSED */
 585   int smblks;   /* number of fastbin blocks UNUSED */
 586   int hblks;    /* number of mmapped regions UNUSED */
 587   int hblkhd;   /* space in mmapped regions UNUSED */
 588   int usmblks;  /* maximum total allocated space UNUSED */
 589   int fsmblks;  /* space available in freed fastbin blocks UNUSED */
 590   int uordblks; /* total allocated space */
 591   int fordblks; /* total free space */
 592   int keepcost; /* top-most, releasable space UNUSED */
 593 };
 594
 595 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 596 struct malloc_bin_stats_s {
 597         /*
 598          * Number of allocation requests that corresponded to the size of this
 599          * bin.
 600          */
 601         uint64_t        nrequests;
 602
 603         /* Total number of runs created for this bin's size class. */
 604         uint64_t        nruns;
 605
 606         /*
 607          * Total number of runs reused by extracting them from the runs tree for
 608          * this bin's size class.
 609          */
 610         uint64_t        reruns;
 611
 612         /* High-water mark for this bin. */
 613         unsigned long   highruns;
 614
 615         /* Current number of runs in this bin. */
 616         unsigned long   curruns;
 617 };
 618
 619 typedef struct arena_stats_s arena_stats_t;
 620 struct arena_stats_s {
 621         /* Number of bytes currently mapped. */
 622         size_t          mapped;
 623
 624         /*
 625          * Total number of purge sweeps, total number of madvise calls made,
 626          * and total pages purged in order to keep dirty unused memory under
 627          * control.
 628          */
 629         uint64_t        npurge;
 630         uint64_t        nmadvise;
 631         uint64_t        purged;
 632 #ifdef MALLOC_DECOMMIT
 633         /*
 634          * Total number of decommit/commit operations, and total number of
 635          * pages decommitted.
 636          */
 637         uint64_t        ndecommit;
 638         uint64_t        ncommit;
 639         uint64_t        decommitted;
 640 #endif
 641
 642         /* Per-size-category statistics. */
 643         size_t          allocated_small;
 644         uint64_t        nmalloc_small;
 645         uint64_t        ndalloc_small;
 646
 647         size_t          allocated_large;
 648         uint64_t        nmalloc_large;
 649         uint64_t        ndalloc_large;
 650
 651 #ifdef MALLOC_BALANCE
 652         /* Number of times this arena reassigned a thread due to contention. */
 653         uint64_t        nbalance;
 654 #endif
 655 };
 656
 657 typedef struct chunk_stats_s chunk_stats_t;
 658 struct chunk_stats_s {
 659         /* Number of chunks that were allocated. */
 660         uint64_t        nchunks;
 661
 662         /* High-water mark for number of chunks allocated. */
 663         unsigned long   highchunks;
 664
 665         /*
 666          * Current number of chunks allocated.  This value isn't maintained for
 667          * any other purpose, so keep track of it in order to be able to set
 668          * highchunks.
 669          */
 670         unsigned long   curchunks;
 671 };
 672
 673 #endif /* #ifdef MALLOC_STATS */
 674
 675 /******************************************************************************/
 676 /*
 677  * Extent data structures.
 678  */
 679
 680 /* Tree of extents. */
 681 typedef struct extent_node_s extent_node_t;
 682 struct extent_node_s {
 683         /* Linkage for the size/address-ordered tree. */
 684         RB_ENTRY(extent_node_s) link_szad;
 685
 686         /* Linkage for the address-ordered tree. */
 687         RB_ENTRY(extent_node_s) link_ad;
 688
 689         /* Pointer to the extent that this tree node is responsible for. */
 690         void    *addr;
 691
 692         /* Total region size. */
 693         size_t  size;
 694 };
 695 typedef struct extent_tree_szad_s extent_tree_szad_t;
 696 RB_HEAD(extent_tree_szad_s, extent_node_s);
 697 typedef struct extent_tree_ad_s extent_tree_ad_t;
 698 RB_HEAD(extent_tree_ad_s, extent_node_s);
 699
 700 /******************************************************************************/
 701 /*
 702  * Arena data structures.
 703  */
 704
 705 typedef struct arena_s arena_t;
 706 typedef struct arena_bin_s arena_bin_t;
 707
 708 /*
 709  * Each map element contains several flags, plus page position for runs that
 710  * service small allocations.
 711  */
 712 typedef uint8_t arena_chunk_map_t;
 713 #define CHUNK_MAP_UNTOUCHED     0x80U
 714 #define CHUNK_MAP_DIRTY         0x40U
 715 #define CHUNK_MAP_LARGE         0x20U
 716 #ifdef MALLOC_DECOMMIT
 717 #define CHUNK_MAP_DECOMMITTED   0x10U
 718 #define CHUNK_MAP_POS_MASK      0x0fU
 719 #else
 720 #define CHUNK_MAP_POS_MASK      0x1fU
 721 #endif
 722
 723 /* Arena chunk header. */
 724 typedef struct arena_chunk_s arena_chunk_t;
 725 struct arena_chunk_s {
 726         /* Arena that owns the chunk. */
 727         arena_t         *arena;
 728
 729         /* Linkage for the arena's chunk tree. */
 730         RB_ENTRY(arena_chunk_s) link;
 731
 732         /*
 733          * Number of pages in use.  This is maintained in order to make
 734          * detection of empty chunks fast.
 735          */
 736         size_t          pages_used;
 737
 738         /* Number of dirty pages. */
 739         size_t          ndirty;
 740
 741         /*
 742          * Tree of extent nodes that are embedded in the arena chunk header
 743          * page(s).  These nodes are used by arena_chunk_node_alloc().
 744          */
 745         extent_tree_ad_t nodes;
 746         extent_node_t   *nodes_past;
 747
 748         /*
 749          * Map of pages within chunk that keeps track of free/large/small.  For
 750          * free runs, only the map entries for the first and last pages are
 751          * kept up to date, so that free runs can be quickly coalesced.
 752          */
 753         arena_chunk_map_t map[1]; /* Dynamically sized. */
 754 };
 755 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 756 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 757
 758 typedef struct arena_run_s arena_run_t;
 759 struct arena_run_s {
 760         /* Linkage for run trees. */
 761         RB_ENTRY(arena_run_s) link;
 762
 763 #ifdef MALLOC_DEBUG
 764         uint32_t        magic;
 765 #  define ARENA_RUN_MAGIC 0x384adf93
 766 #endif
 767
 768         /* Bin this run is associated with. */
 769         arena_bin_t     *bin;
 770
 771         /* Index of first element that might have a free region. */
 772         unsigned        regs_minelm;
 773
 774         /* Number of free regions in run. */
 775         unsigned        nfree;
 776
 777         /* Bitmask of in-use regions (0: in use, 1: free). */
 778         unsigned        regs_mask[1]; /* Dynamically sized. */
 779 };
 780 typedef struct arena_run_tree_s arena_run_tree_t;
 781 RB_HEAD(arena_run_tree_s, arena_run_s);
 782
 783 struct arena_bin_s {
 784         /*
 785          * Current run being used to service allocations of this bin's size
 786          * class.
 787          */
 788         arena_run_t     *runcur;
 789
 790         /*
 791          * Tree of non-full runs.  This tree is used when looking for an
 792          * existing run when runcur is no longer usable.  We choose the
 793          * non-full run that is lowest in memory; this policy tends to keep
 794          * objects packed well, and it can also help reduce the number of
 795          * almost-empty chunks.
 796          */
 797         arena_run_tree_t runs;
 798
 799         /* Size of regions in a run for this bin's size class. */
 800         size_t          reg_size;
 801
 802         /* Total size of a run for this bin's size class. */
 803         size_t          run_size;
 804
 805         /* Total number of regions in a run for this bin's size class. */
 806         uint32_t        nregs;
 807
 808         /* Number of elements in a run's regs_mask for this bin's size class. */
 809         uint32_t        regs_mask_nelms;
 810
 811         /* Offset of first region in a run for this bin's size class. */
 812         uint32_t        reg0_offset;
 813
 814 #ifdef MALLOC_STATS
 815         /* Bin statistics. */
 816         malloc_bin_stats_t stats;
 817 #endif
 818 };
 819
 820 struct arena_s {
 821 #ifdef MALLOC_DEBUG
 822         uint32_t                magic;
 823 #  define ARENA_MAGIC 0x947d3d24
 824 #endif
 825
 826         /* All operations on this arena require that lock be locked. */
 827         malloc_spinlock_t       lock;
 828
 829 #ifdef MALLOC_STATS
 830         arena_stats_t           stats;
 831 #endif
 832
 833         /*
 834          * Tree of chunks this arena manages.
 835          */
 836         arena_chunk_tree_t      chunks;
 837
 838         /*
 839          * In order to avoid rapid chunk allocation/deallocation when an arena
 840          * oscillates right on the cusp of needing a new chunk, cache the most
 841          * recently freed chunk.  The spare is left in the arena's chunk tree
 842          * until it is deleted.
 843          *
 844          * There is one spare chunk per arena, rather than one spare total, in
 845          * order to avoid interactions between multiple threads that could make
 846          * a single spare inadequate.
 847          */
 848         arena_chunk_t           *spare;
 849
 850         /*
 851          * Current count of pages within unused runs that are potentially
 852          * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 853          * tracking this, we can institute a limit on how much dirty unused
 854          * memory is mapped for each arena.
 855          */
 856         size_t                  ndirty;
 857
 858         /*
 859          * Trees of this arena's available runs.  Two trees are maintained
 860          * using one set of nodes, since one is needed for first-best-fit run
 861          * allocation, and the other is needed for coalescing.
 862          */
 863         extent_tree_szad_t      runs_avail_szad;
 864         extent_tree_ad_t        runs_avail_ad;
 865
 866         /* Tree of this arena's allocated (in-use) runs. */
 867         extent_tree_ad_t        runs_alloced_ad;
 868
 869 #ifdef MALLOC_BALANCE
 870         /*
 871          * The arena load balancing machinery needs to keep track of how much
 872          * lock contention there is.  This value is exponentially averaged.
 873          */
 874         uint32_t                contention;
 875 #endif
 876
 877 #ifdef MALLOC_LAZY_FREE
 878         /*
 879          * Deallocation of small objects can be lazy, in which case free_cache
 880          * stores pointers to those objects that have not yet been deallocated.
 881          * In order to avoid lock contention, slots are chosen randomly.  Empty
 882          * slots contain NULL.
 883          */
 884         void                    **free_cache;
 885 #endif
 886
 887         /*
 888          * bins is used to store rings of free regions of the following sizes,
 889          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 890          *
 891          *   bins[i] | size |
 892          *   --------+------+
 893          *        0  |    2 |
 894          *        1  |    4 |
 895          *        2  |    8 |
 896          *   --------+------+
 897          *        3  |   16 |
 898          *        4  |   32 |
 899          *        5  |   48 |
 900          *        6  |   64 |
 901          *           :      :
 902          *           :      :
 903          *       33  |  496 |
 904          *       34  |  512 |
 905          *   --------+------+
 906          *       35  | 1024 |
 907          *       36  | 2048 |
 908          *   --------+------+
 909          */
 910         arena_bin_t             bins[1]; /* Dynamically sized. */
 911 };
 912
 913 /******************************************************************************/
 914 /*
 915  * Data.
 916  */
 917
 918 /* Number of CPUs. */
 919 static unsigned         ncpus;
 920
 921 /* VM page size. */
 922 static size_t           pagesize;
 923 static size_t           pagesize_mask;
 924 static size_t           pagesize_2pow;
 925
 926 /* Various bin-related settings. */
 927 static size_t           bin_maxclass; /* Max size class for bins. */
 928 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 929 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 930 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 931 static size_t           small_min;
 932 static size_t           small_max;
 933
 934 /* Various quantum-related settings. */
 935 static size_t           quantum;
 936 static size_t           quantum_mask; /* (quantum - 1). */
 937
 938 /* Various chunk-related settings. */
 939 static size_t           chunksize;
 940 static size_t           chunksize_mask; /* (chunksize - 1). */
 941 static size_t           chunk_npages;
 942 static size_t           arena_chunk_header_npages;
 943 static size_t           arena_maxclass; /* Max size class for arenas. */
 944
 945 /********/
 946 /*
 947  * Chunks.
 948  */
 949
 950 /* Protects chunk-related data structures. */
 951 static malloc_mutex_t   huge_mtx;
 952
 953 /* Tree of chunks that are stand-alone huge allocations. */
 954 static extent_tree_ad_t huge;
 955
 956 #ifdef MALLOC_DSS
 957 /*
 958  * Protects sbrk() calls.  This avoids malloc races among threads, though it
 959  * does not protect against races with threads that call sbrk() directly.
 960  */
 961 static malloc_mutex_t   dss_mtx;
 962 /* Base address of the DSS. */
 963 static void             *dss_base;
 964 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
 965 static void             *dss_prev;
 966 /* Current upper limit on DSS addresses. */
 967 static void             *dss_max;
 968
 969 /*
 970  * Trees of chunks that were previously allocated (trees differ only in node
 971  * ordering).  These are used when allocating chunks, in an attempt to re-use
 972  * address space.  Depending on function, different tree orderings are needed,
 973  * which is why there are two trees with the same contents.
 974  */
 975 static extent_tree_szad_t dss_chunks_szad;
 976 static extent_tree_ad_t dss_chunks_ad;
 977 #endif
 978
 979 #ifdef MALLOC_STATS
 980 /* Huge allocation statistics. */
 981 static uint64_t         huge_nmalloc;
 982 static uint64_t         huge_ndalloc;
 983 static size_t           huge_allocated;
 984 #endif
 985
 986 /****************************/
 987 /*
 988  * base (internal allocation).
 989  */
 990
 991 /*
 992  * Current pages that are being used for internal memory allocations.  These
 993  * pages are carved up in cacheline-size quanta, so that there is no chance of
 994  * false cache line sharing.
 995  */
 996 static void             *base_pages;
 997 static void             *base_next_addr;
 998 static void             *base_past_addr; /* Addr immediately past base_pages. */
 999 static extent_node_t    *base_nodes;
1000 static malloc_mutex_t   base_mtx;
1001 #ifdef MALLOC_STATS
1002 static size_t           base_mapped;
1003 #endif
1004
1005 /********/
1006 /*
1007  * Arenas.
1008  */
1009
1010 /*
1011  * Arenas that are used to service external requests.  Not all elements of the
1012  * arenas array are necessarily used; arenas are created lazily as needed.
1013  */
1014 static arena_t          **arenas;
1015 static unsigned         narenas;
1016 #ifndef NO_TLS
1017 #  ifdef MALLOC_BALANCE
1018 static unsigned         narenas_2pow;
1019 #  else
1020 static unsigned         next_arena;
1021 #  endif
1022 #endif
1023 static malloc_spinlock_t arenas_lock; /* Protects arenas initialization. */
1024
1025 #ifndef NO_TLS
1026 /*
1027  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1028  * for allocations.
1029  */
1030 #ifdef HAVE_LOCAL_THREAD_STORAGE
1031 static __thread arena_t *arenas_map;
1032 #endif
1033 #endif
1034
1035 #ifdef MALLOC_STATS
1036 /* Chunk statistics. */
1037 static chunk_stats_t    stats_chunks;
1038 #endif
1039
1040 /*******************************/
1041 /*
1042  * Runtime configuration options.
1043  */
1044 const char      *_malloc_options
1045 #ifdef WIN32
1046 = "A10n2F"
1047 #elif (defined(DARWIN))
1048 = "AP10n"
1049 #elif (defined(LINUX_HOST))
1050 = "A10n2F"
1051 #endif
1052 ;
1053
1054 #ifndef MALLOC_PRODUCTION
1055 static bool     opt_abort = true;
1056 #ifdef MALLOC_FILL
1057 static bool     opt_junk = true;
1058 #endif
1059 #else
1060 static bool     opt_abort = false;
1061 #ifdef MALLOC_FILL
1062 static bool     opt_junk = false;
1063 #endif
1064 #endif
1065 #ifdef MALLOC_DSS
1066 static bool     opt_dss = true;
1067 static bool     opt_mmap = true;
1068 #endif
1069 static size_t   opt_dirty_max = DIRTY_MAX_DEFAULT;
1070 #ifdef MALLOC_LAZY_FREE
1071 static int      opt_lazy_free_2pow = LAZY_FREE_2POW_DEFAULT;
1072 #endif
1073 #ifdef MALLOC_BALANCE
1074 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1075 #endif
1076 /*
1077  * this toggles the printing of statistics when the program exists.
1078  */
1079 static bool     opt_print_stats = false;
1080 static size_t   opt_quantum_2pow = QUANTUM_2POW_MIN;
1081 static size_t   opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
1082 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1083 #ifdef MALLOC_UTRACE
1084 static bool     opt_utrace = false;
1085 #endif
1086 #ifdef MALLOC_SYSV
1087 static bool     opt_sysv = false;
1088 #endif
1089 #ifdef MALLOC_XMALLOC
1090 static bool     opt_xmalloc = false;
1091 #endif
1092 #ifdef MALLOC_FILL
1093 static bool     opt_zero = false;
1094 #endif
1095 static int      opt_narenas_lshift = 0;
1096
1097 #ifdef MALLOC_UTRACE
1098 typedef struct {
1099         void    *p;
1100         size_t  s;
1101         void    *r;
1102 } malloc_utrace_t;
1103
1104 #define UTRACE(a, b, c)                                                 \
1105         if (opt_utrace) {                                               \
1106                 malloc_utrace_t ut;                                     \
1107                 ut.p = (a);                                             \
1108                 ut.s = (b);                                             \
1109                 ut.r = (c);                                             \
1110                 utrace(&ut, sizeof(ut));                                \
1111         }
1112 #else
1113 #define UTRACE(a, b, c)
1114 #endif
1115
1116 /******************************************************************************/
1117 /*
1118  * Begin function prototypes for non-inline static functions.
1119  */
1120
1121 static bool     malloc_mutex_init(malloc_mutex_t *mutex);
1122 static bool     malloc_spin_init(malloc_spinlock_t *lock);
1123 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1124                 const char *p4);
1125 #ifdef MALLOC_STATS
1126 #ifdef DARWIN
1127 /* Avoid namespace collision with OS X's malloc APIs. */
1128 #define malloc_printf xmalloc_printf
1129 #endif
1130 static void     malloc_printf(const char *format, ...);
1131 #endif
1132 static char     *umax2s(uintmax_t x, char *s);
1133 #ifdef MALLOC_DSS
1134 static bool     base_pages_alloc_dss(size_t minsize);
1135 #endif
1136 static bool     base_pages_alloc_mmap(size_t minsize);
1137 static bool     base_pages_alloc(size_t minsize);
1138 static void     *base_alloc(size_t size);
1139 static void     *base_calloc(size_t number, size_t size);
1140 static extent_node_t *base_node_alloc(void);
1141 static void     base_node_dealloc(extent_node_t *node);
1142 #ifdef MALLOC_STATS
1143 static void     stats_print(arena_t *arena);
1144 #endif
1145 static void     *pages_map(void *addr, size_t size);
1146 static void     pages_unmap(void *addr, size_t size);
1147 #ifdef MALLOC_DSS
1148 static void     *chunk_alloc_dss(size_t size);
1149 static void     *chunk_recycle_dss(size_t size, bool zero);
1150 #endif
1151 static void     *chunk_alloc_mmap(size_t size);
1152 static void     *chunk_alloc(size_t size, bool zero);
1153 #ifdef MALLOC_DSS
1154 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1155 static bool     chunk_dealloc_dss(void *chunk, size_t size);
1156 #endif
1157 static void     chunk_dealloc_mmap(void *chunk, size_t size);
1158 static void     chunk_dealloc(void *chunk, size_t size);
1159 #ifndef NO_TLS
1160 static arena_t  *choose_arena_hard(void);
1161 #endif
1162 static extent_node_t *arena_chunk_node_alloc(arena_chunk_t *chunk);
1163 static void     arena_chunk_node_dealloc(arena_chunk_t *chunk,
1164     extent_node_t *node);
1165 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1166     bool small, bool zero);
1167 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1168 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1169 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool small,
1170     bool zero);
1171 static void     arena_purge(arena_t *arena);
1172 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1173 static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1174     extent_node_t *nodeB, arena_run_t *run, size_t oldsize, size_t newsize);
1175 static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1176     extent_node_t *nodeA, arena_run_t *run, size_t oldsize, size_t newsize,
1177     bool dirty);
1178 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1179 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1180 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1181 #ifdef MALLOC_BALANCE
1182 static void     arena_lock_balance_hard(arena_t *arena);
1183 #endif
1184 static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1185 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1186     size_t alloc_size);
1187 static size_t   arena_salloc(const void *ptr);
1188 #ifdef MALLOC_LAZY_FREE
1189 static void     arena_dalloc_lazy_hard(arena_t *arena, arena_chunk_t *chunk,
1190     void *ptr, size_t pageind, arena_chunk_map_t *mapelm);
1191 #endif
1192 static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1193     void *ptr);
1194 static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1195     void *ptr, size_t size, size_t oldsize);
1196 static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1197     void *ptr, size_t size, size_t oldsize);
1198 static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1199 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1200 static bool     arena_new(arena_t *arena);
1201 static arena_t  *arenas_extend(unsigned ind);
1202 static void     *huge_malloc(size_t size, bool zero);
1203 static void     *huge_palloc(size_t alignment, size_t size);
1204 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1205 static void     huge_dalloc(void *ptr);
1206 static void     malloc_print_stats(void);
1207 #ifndef WIN32
1208 static
1209 #endif
1210 bool            malloc_init_hard(void);
1211
1212 /*
1213  * End function prototypes.
1214  */
1215 /******************************************************************************/
1216 /*
1217  * Begin mutex.  We can't use normal pthread mutexes in all places, because
1218  * they require malloc()ed memory, which causes bootstrapping issues in some
1219  * cases.
1220  */
1221
1222 static bool
1223 malloc_mutex_init(malloc_mutex_t *mutex)
1224 {
1225 #if defined(WIN32)
1226         if (g_isthreaded)
1227                 if (! __crtInitCritSecAndSpinCount(mutex, _CRT_SPINCOUNT))
1228                         return (true);
1229 #elif defined(DARWIN)
1230         mutex->lock = OS_SPINLOCK_INIT;
1231 #elif defined(LINUX_HOST)
1232         pthread_mutexattr_t attr;
1233         if (pthread_mutexattr_init(&attr) != 0)
1234                 return (true);
1235         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
1236         if (pthread_mutex_init(mutex, &attr) != 0) {
1237                 pthread_mutexattr_destroy(&attr);
1238                 return (true);
1239         }
1240         pthread_mutexattr_destroy(&attr);
1241 #elif defined(USE_JEMALLOC)
1242         if (pthread_mutex_init(mutex, NULL) != 0)
1243                 return (true);
1244 #else
1245         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1246
1247         mutex->lock = lock;
1248 #endif
1249         return (false);
1250 }
1251
1252 static inline void
1253 malloc_mutex_lock(malloc_mutex_t *mutex)
1254 {
1255
1256 #if defined(WIN32)
1257         EnterCriticalSection(mutex);
1258 #elif defined(DARWIN)
1259         OSSpinLockLock(&mutex->lock);
1260 #elif defined(USE_JEMALLOC)
1261         pthread_mutex_lock(mutex);
1262 #else
1263         if (g_isthreaded)
1264                 _SPINLOCK(&mutex->lock);
1265 #endif
1266 }
1267
1268 static inline void
1269 malloc_mutex_unlock(malloc_mutex_t *mutex)
1270 {
1271
1272 #if defined(WIN32)
1273         LeaveCriticalSection(mutex);
1274 #elif defined(DARWIN)
1275         OSSpinLockUnlock(&mutex->lock);
1276 #elif defined(USE_JEMALLOC)
1277         pthread_mutex_unlock(mutex);
1278 #else
1279         if (g_isthreaded)
1280                 _SPINUNLOCK(&mutex->lock);
1281 #endif
1282 }
1283
1284 static bool
1285 malloc_spin_init(malloc_spinlock_t *lock)
1286 {
1287 #if defined(WIN32)
1288         if (g_isthreaded)
1289                 if (! __crtInitCritSecAndSpinCount(lock, _CRT_SPINCOUNT))
1290                         return (true);
1291 #elif defined(DARWIN)
1292         lock->lock = OS_SPINLOCK_INIT;
1293 #elif defined(LINUX_HOST)
1294         pthread_mutexattr_t attr;
1295         if (pthread_mutexattr_init(&attr) != 0)
1296                 return (true);
1297         pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
1298         if (pthread_mutex_init(lock, &attr) != 0) {
1299                 pthread_mutexattr_destroy(&attr);
1300                 return (true);
1301         }
1302         pthread_mutexattr_destroy(&attr);
1303 #elif defined(USE_JEMALLOC)
1304         if (pthread_mutex_init(lock, NULL) != 0)
1305                 return (true);
1306 #else
1307         lock->lock = _SPINLOCK_INITIALIZER;
1308 #endif
1309         return (false);
1310 }
1311
1312 static inline void
1313 malloc_spin_lock(malloc_spinlock_t *lock)
1314 {
1315
1316 #if defined(WIN32)
1317         EnterCriticalSection(lock);
1318 #elif defined(DARWIN)
1319         OSSpinLockLock(&lock->lock);
1320 #elif defined(USE_JEMALLOC)
1321         pthread_mutex_lock(lock);
1322 #else
1323         if (g_isthreaded)
1324                 _SPINLOCK(&lock->lock);
1325 #endif
1326 }
1327
1328 static inline void
1329 malloc_spin_unlock(malloc_spinlock_t *lock)
1330 {
1331 #if defined(WIN32)
1332         LeaveCriticalSection(lock);
1333 #elif defined(DARWIN)
1334         OSSpinLockUnlock(&lock->lock);
1335 #elif defined(USE_JEMALLOC)
1336         pthread_mutex_unlock(lock);
1337 #else
1338         if (g_isthreaded)
1339                 _SPINUNLOCK(&lock->lock);
1340 #endif
1341 }
1342
1343 /*
1344  * End mutex.
1345  */
1346 /******************************************************************************/
1347 /*
1348  * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1349  * after a period of spinning, because unbounded spinning would allow for
1350  * priority inversion.
1351  */
1352
1353 #ifndef DARWIN
1354 #  define       malloc_spin_init        malloc_mutex_init
1355 #  define       malloc_spin_lock        malloc_mutex_lock
1356 #  define       malloc_spin_unlock      malloc_mutex_unlock
1357 #endif
1358
1359 /*
1360  * End spin lock.
1361  */
1362
1363 /******************************************************************************/
1364 /*
1365  * Begin Utility functions/macros.
1366  */
1367
1368 /* Return the chunk address for allocation address a. */
1369 #define CHUNK_ADDR2BASE(a)                                              \
1370         ((void *)((uintptr_t)(a) & ~chunksize_mask))
1371
1372 /* Return the chunk offset of address a. */
1373 #define CHUNK_ADDR2OFFSET(a)                                            \
1374         ((size_t)((uintptr_t)(a) & chunksize_mask))
1375
1376 /* Return the smallest chunk multiple that is >= s. */
1377 #define CHUNK_CEILING(s)                                                \
1378         (((s) + chunksize_mask) & ~chunksize_mask)
1379
1380 /* Return the smallest cacheline multiple that is >= s. */
1381 #define CACHELINE_CEILING(s)                                            \
1382         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
1383
1384 /* Return the smallest quantum multiple that is >= a. */
1385 #define QUANTUM_CEILING(a)                                              \
1386         (((a) + quantum_mask) & ~quantum_mask)
1387
1388 /* Return the smallest pagesize multiple that is >= s. */
1389 #define PAGE_CEILING(s)                                                 \
1390         (((s) + pagesize_mask) & ~pagesize_mask)
1391
1392 /* Compute the smallest power of 2 that is >= x. */
1393 static inline size_t
1394 pow2_ceil(size_t x)
1395 {
1396
1397         x--;
1398         x |= x >> 1;
1399         x |= x >> 2;
1400         x |= x >> 4;
1401         x |= x >> 8;
1402         x |= x >> 16;
1403 #if (SIZEOF_PTR == 8)
1404         x |= x >> 32;
1405 #endif
1406         x++;
1407         return (x);
1408 }
1409
1410 #if (defined(MALLOC_LAZY_FREE) || defined(MALLOC_BALANCE))
1411 /*
1412  * Use a simple linear congruential pseudo-random number generator:
1413  *
1414  *   prn(y) = (a*x + c) % m
1415  *
1416  * where the following constants ensure maximal period:
1417  *
1418  *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1419  *   c == Odd number (relatively prime to 2^n).
1420  *   m == 2^32
1421  *
1422  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1423  *
1424  * This choice of m has the disadvantage that the quality of the bits is
1425  * proportional to bit position.  For example. the lowest bit has a cycle of 2,
1426  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
1427  * bits.
1428  */
1429 #  define PRN_DEFINE(suffix, var, a, c)                                 \
1430 static inline void                                                      \
1431 sprn_##suffix(uint32_t seed)                                            \
1432 {                                                                       \
1433         var = seed;                                                     \
1434 }                                                                       \
1435                                                                         \
1436 static inline uint32_t                                                  \
1437 prn_##suffix(uint32_t lg_range)                                         \
1438 {                                                                       \
1439         uint32_t ret, x;                                                \
1440                                                                         \
1441         assert(lg_range > 0);                                           \
1442         assert(lg_range <= 32);                                         \
1443                                                                         \
1444         x = (var * (a)) + (c);                                          \
1445         var = x;                                                        \
1446         ret = x >> (32 - lg_range);                                     \
1447                                                                         \
1448         return (ret);                                                   \
1449 }
1450 #  define SPRN(suffix, seed)    sprn_##suffix(seed)
1451 #  define PRN(suffix, lg_range) prn_##suffix(lg_range)
1452 #endif
1453
1454 /*
1455  * Define PRNGs, one for each purpose, in order to avoid auto-correlation
1456  * problems.
1457  */
1458
1459 #ifdef MALLOC_LAZY_FREE
1460 /* Define the per-thread PRNG used for lazy deallocation. */
1461 static __thread uint32_t lazy_free_x;
1462 PRN_DEFINE(lazy_free, lazy_free_x, 12345, 12347)
1463 #endif
1464
1465 #ifdef MALLOC_BALANCE
1466 /* Define the PRNG used for arena assignment. */
1467 static __thread uint32_t balance_x;
1468 PRN_DEFINE(balance, balance_x, 1297, 1301)
1469 #endif
1470
1471 #ifdef MALLOC_UTRACE
1472 static int
1473 utrace(const void *addr, size_t len)
1474 {
1475         malloc_utrace_t *ut = (malloc_utrace_t *)addr;
1476
1477         assert(len == sizeof(malloc_utrace_t));
1478
1479         if (ut->p == NULL && ut->s == 0 && ut->r == NULL)
1480                 malloc_printf("%d x USER malloc_init()\n", getpid());
1481         else if (ut->p == NULL && ut->r != NULL) {
1482                 malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut->r,
1483                     ut->s);
1484         } else if (ut->p != NULL && ut->r != NULL) {
1485                 malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(),
1486                     ut->r, ut->p, ut->s);
1487         } else
1488                 malloc_printf("%d x USER free(%p)\n", getpid(), ut->p);
1489
1490         return (0);
1491 }
1492 #endif
1493
1494 static inline const char *
1495 _getprogname(void)
1496 {
1497
1498         return ("<jemalloc>");
1499 }
1500
1501 static void
1502 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1503 {
1504 #ifndef WIN32
1505 #define _write  write
1506 #endif
1507         size_t ret =_write(STDERR_FILENO, p1, (unsigned int) strlen(p1));
1508         ret = _write(STDERR_FILENO, p2, (unsigned int) strlen(p2));
1509         ret = _write(STDERR_FILENO, p3, (unsigned int) strlen(p3));
1510         ret = _write(STDERR_FILENO, p4, (unsigned int) strlen(p4));
1511 }
1512
1513 #define _malloc_message malloc_message
1514
1515 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1516             const char *p4) = wrtmessage;
1517
1518 #ifdef MALLOC_STATS
1519 /*
1520  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1521  */
1522 static void
1523 malloc_printf(const char *format, ...)
1524 {
1525         char buf[4096];
1526         va_list ap;
1527
1528         va_start(ap, format);
1529         vsnprintf(buf, sizeof(buf), format, ap);
1530         va_end(ap);
1531         _malloc_message(buf, "", "", "");
1532 }
1533 #endif
1534
1535 /*
1536  * We don't want to depend on vsnprintf() for production builds, since that can
1537  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1538  * integer printing functionality, so that malloc_printf() use can be limited to
1539  * MALLOC_STATS code.
1540  */
1541 #define UMAX2S_BUFSIZE  21
1542 static char *
1543 umax2s(uintmax_t x, char *s)
1544 {
1545         unsigned i;
1546
1547         /* Make sure UMAX2S_BUFSIZE is large enough. */
1548         assert(sizeof(uintmax_t) <= 8);
1549
1550         i = UMAX2S_BUFSIZE - 1;
1551         s[i] = '\0';
1552         do {
1553                 i--;
1554                 s[i] = "0123456789"[x % 10];
1555                 x /= 10;
1556         } while (x > 0);
1557
1558         return (&s[i]);
1559 }
1560
1561 /******************************************************************************/
1562
1563 #ifdef MALLOC_DSS
1564 static bool
1565 base_pages_alloc_dss(size_t minsize)
1566 {
1567
1568         /*
1569          * Do special DSS allocation here, since base allocations don't need to
1570          * be chunk-aligned.
1571          */
1572         malloc_mutex_lock(&dss_mtx);
1573         if (dss_prev != (void *)-1) {
1574                 intptr_t incr;
1575                 size_t csize = CHUNK_CEILING(minsize);
1576
1577                 do {
1578                         /* Get the current end of the DSS. */
1579                         dss_max = sbrk(0);
1580
1581                         /*
1582                          * Calculate how much padding is necessary to
1583                          * chunk-align the end of the DSS.  Don't worry about
1584                          * dss_max not being chunk-aligned though.
1585                          */
1586                         incr = (intptr_t)chunksize
1587                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1588                         assert(incr >= 0);
1589                         if ((size_t)incr < minsize)
1590                                 incr += csize;
1591
1592                         dss_prev = sbrk(incr);
1593                         if (dss_prev == dss_max) {
1594                                 /* Success. */
1595                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1596                                 base_pages = dss_prev;
1597                                 base_next_addr = base_pages;
1598                                 base_past_addr = dss_max;
1599 #ifdef MALLOC_STATS
1600                                 base_mapped += incr;
1601 #endif
1602                                 malloc_mutex_unlock(&dss_mtx);
1603                                 return (false);
1604                         }
1605                 } while (dss_prev != (void *)-1);
1606         }
1607         malloc_mutex_unlock(&dss_mtx);
1608
1609         return (true);
1610 }
1611 #endif
1612
1613 static bool
1614 base_pages_alloc_mmap(size_t minsize)
1615 {
1616         size_t csize;
1617
1618         assert(minsize != 0);
1619         csize = PAGE_CEILING(minsize);
1620         base_pages = pages_map(NULL, csize);
1621         if (base_pages == NULL)
1622                 return (true);
1623         base_next_addr = base_pages;
1624         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1625 #ifdef MALLOC_STATS
1626         base_mapped += csize;
1627 #endif
1628
1629         return (false);
1630 }
1631
1632 static bool
1633 base_pages_alloc(size_t minsize)
1634 {
1635
1636 #ifdef MALLOC_DSS
1637         if (opt_dss) {
1638                 if (base_pages_alloc_dss(minsize) == false)
1639                         return (false);
1640         }
1641
1642         if (opt_mmap && minsize != 0)
1643 #endif
1644         {
1645                 if (base_pages_alloc_mmap(minsize) == false)
1646                         return (false);
1647         }
1648
1649         return (true);
1650 }
1651
1652 static void *
1653 base_alloc(size_t size)
1654 {
1655         void *ret;
1656         size_t csize;
1657
1658         /* Round size up to nearest multiple of the cacheline size. */
1659         csize = CACHELINE_CEILING(size);
1660
1661         malloc_mutex_lock(&base_mtx);
1662         /* Make sure there's enough space for the allocation. */
1663         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1664                 if (base_pages_alloc(csize))
1665                         return (NULL);
1666         }
1667         /* Allocate. */
1668         ret = base_next_addr;
1669         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1670         malloc_mutex_unlock(&base_mtx);
1671
1672         return (ret);
1673 }
1674
1675 static void *
1676 base_calloc(size_t number, size_t size)
1677 {
1678         void *ret;
1679
1680         ret = base_alloc(number * size);
1681         memset(ret, 0, number * size);
1682
1683         return (ret);
1684 }
1685
1686 static extent_node_t *
1687 base_node_alloc(void)
1688 {
1689         extent_node_t *ret;
1690
1691         malloc_mutex_lock(&base_mtx);
1692         if (base_nodes != NULL) {
1693                 ret = base_nodes;
1694                 base_nodes = *(extent_node_t **)ret;
1695                 malloc_mutex_unlock(&base_mtx);
1696         } else {
1697                 malloc_mutex_unlock(&base_mtx);
1698                 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1699         }
1700
1701         return (ret);
1702 }
1703
1704 static void
1705 base_node_dealloc(extent_node_t *node)
1706 {
1707
1708         malloc_mutex_lock(&base_mtx);
1709         *(extent_node_t **)node = base_nodes;
1710         base_nodes = node;
1711         malloc_mutex_unlock(&base_mtx);
1712 }
1713
1714 /******************************************************************************/
1715
1716 #ifdef MALLOC_STATS
1717 static void
1718 stats_print(arena_t *arena)
1719 {
1720         unsigned i, gap_start;
1721
1722 #ifdef WIN32
1723         malloc_printf("dirty: %Iu page%s dirty, %I64u sweep%s,"
1724             " %I64u madvise%s, %I64u page%s purged\n",
1725             arena->ndirty, arena->ndirty == 1 ? "" : "s",
1726             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1727             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1728             arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1729 #  ifdef MALLOC_DECOMMIT
1730         malloc_printf("decommit: %I64u decommit%s, %I64u commit%s,"
1731             " %I64u page%s decommitted\n",
1732             arena->stats.ndecommit, (arena->stats.ndecommit == 1) ? "" : "s",
1733             arena->stats.ncommit, (arena->stats.ncommit == 1) ? "" : "s",
1734             arena->stats.decommitted,
1735             (arena->stats.decommitted == 1) ? "" : "s");
1736 #  endif
1737
1738         malloc_printf("            allocated      nmalloc      ndalloc\n");
1739         malloc_printf("small:   %12Iu %12I64u %12I64u\n",
1740             arena->stats.allocated_small, arena->stats.nmalloc_small,
1741             arena->stats.ndalloc_small);
1742         malloc_printf("large:   %12Iu %12I64u %12I64u\n",
1743             arena->stats.allocated_large, arena->stats.nmalloc_large,
1744             arena->stats.ndalloc_large);
1745         malloc_printf("total:   %12Iu %12I64u %12I64u\n",
1746             arena->stats.allocated_small + arena->stats.allocated_large,
1747             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1748             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1749         malloc_printf("mapped:  %12Iu\n", arena->stats.mapped);
1750 #else
1751         malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1752             " %llu madvise%s, %llu page%s purged\n",
1753             arena->ndirty, arena->ndirty == 1 ? "" : "s",
1754             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1755             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1756             arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1757 #  ifdef MALLOC_DECOMMIT
1758         malloc_printf("decommit: %llu decommit%s, %llu commit%s,"
1759             " %llu page%s decommitted\n",
1760             arena->stats.ndecommit, (arena->stats.ndecommit == 1) ? "" : "s",
1761             arena->stats.ncommit, (arena->stats.ncommit == 1) ? "" : "s",
1762             arena->stats.decommitted,
1763             (arena->stats.decommitted == 1) ? "" : "s");
1764 #  endif
1765
1766         malloc_printf("            allocated      nmalloc      ndalloc\n");
1767         malloc_printf("small:   %12zu %12llu %12llu\n",
1768             arena->stats.allocated_small, arena->stats.nmalloc_small,
1769             arena->stats.ndalloc_small);
1770         malloc_printf("large:   %12zu %12llu %12llu\n",
1771             arena->stats.allocated_large, arena->stats.nmalloc_large,
1772             arena->stats.ndalloc_large);
1773         malloc_printf("total:   %12zu %12llu %12llu\n",
1774             arena->stats.allocated_small + arena->stats.allocated_large,
1775             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1776             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1777         malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
1778 #endif
1779         malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1780             "    reruns maxruns curruns\n");
1781         for (i = 0, gap_start = UINT_MAX; i < ntbins + nqbins + nsbins; i++) {
1782                 if (arena->bins[i].stats.nrequests == 0) {
1783                         if (gap_start == UINT_MAX)
1784                                 gap_start = i;
1785                 } else {
1786                         if (gap_start != UINT_MAX) {
1787                                 if (i > gap_start + 1) {
1788                                         /* Gap of more than one size class. */
1789                                         malloc_printf("[%u..%u]\n",
1790                                             gap_start, i - 1);
1791                                 } else {
1792                                         /* Gap of one size class. */
1793                                         malloc_printf("[%u]\n", gap_start);
1794                                 }
1795                                 gap_start = UINT_MAX;
1796                         }
1797                         malloc_printf(
1798 #if defined(WIN32)
1799                             "%13u %1s %4u %4u %3u %9I64u %9I64u"
1800                             " %9I64u %7u %7u\n",
1801 #else
1802                             "%13u %1s %4u %4u %3u %9llu %9llu"
1803                             " %9llu %7lu %7lu\n",
1804 #endif
1805                             i,
1806                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1807                             arena->bins[i].reg_size,
1808                             arena->bins[i].nregs,
1809                             arena->bins[i].run_size >> pagesize_2pow,
1810                             arena->bins[i].stats.nrequests,
1811                             arena->bins[i].stats.nruns,
1812                             arena->bins[i].stats.reruns,
1813                             arena->bins[i].stats.highruns,
1814                             arena->bins[i].stats.curruns);
1815                 }
1816         }
1817         if (gap_start != UINT_MAX) {
1818                 if (i > gap_start + 1) {
1819                         /* Gap of more than one size class. */
1820                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1821                 } else {
1822                         /* Gap of one size class. */
1823                         malloc_printf("[%u]\n", gap_start);
1824                 }
1825         }
1826 }
1827 #endif
1828
1829 /*
1830  * End Utility functions/macros.
1831  */
1832 /******************************************************************************/
1833 /*
1834  * Begin extent tree code.
1835  */
1836
1837 static inline int
1838 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1839 {
1840         int ret;
1841         size_t a_size = a->size;
1842         size_t b_size = b->size;
1843
1844         ret = (a_size > b_size) - (a_size < b_size);
1845         if (ret == 0) {
1846                 uintptr_t a_addr = (uintptr_t)a->addr;
1847                 uintptr_t b_addr = (uintptr_t)b->addr;
1848
1849                 ret = (a_addr > b_addr) - (a_addr < b_addr);
1850         }
1851
1852         return (ret);
1853 }
1854
1855 /* Generate red-black tree code for size/address-ordered extents. */
1856 RB_GENERATE_STATIC(extent_tree_szad_s, extent_node_s, link_szad,
1857     extent_szad_comp)
1858
1859 static inline int
1860 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1861 {
1862         uintptr_t a_addr = (uintptr_t)a->addr;
1863         uintptr_t b_addr = (uintptr_t)b->addr;
1864
1865         return ((a_addr > b_addr) - (a_addr < b_addr));
1866 }
1867
1868 /* Generate red-black tree code for address-ordered extents. */
1869 RB_GENERATE_STATIC(extent_tree_ad_s, extent_node_s, link_ad, extent_ad_comp)
1870
1871
1872 /*
1873  * End extent tree code.
1874  */
1875 /******************************************************************************/
1876 /*
1877  * Begin chunk management functions.
1878  */
1879
1880 #ifdef WIN32
1881 static void *
1882 pages_map(void *addr, size_t size)
1883 {
1884         void *ret;
1885
1886         ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
1887             PAGE_READWRITE);
1888
1889         return (ret);
1890 }
1891
1892 static void
1893 pages_unmap(void *addr, size_t size)
1894 {
1895
1896         if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
1897                 _malloc_message(_getprogname(),
1898                     ": (malloc) Error in VirtualFree()\n", "", "");
1899                 if (opt_abort)
1900                         abort();
1901         }
1902 }
1903 #elif (defined(DARWIN))
1904 static void *
1905 pages_map(void *addr, size_t size)
1906 {
1907         void *ret;
1908         kern_return_t err;
1909         int flags;
1910
1911         if (addr != NULL) {
1912                 ret = addr;
1913                 flags = 0;
1914         } else
1915                 flags = VM_FLAGS_ANYWHERE;
1916
1917         err = vm_allocate((vm_map_t)mach_task_self(), (vm_address_t *)&ret,
1918             (vm_size_t)size, flags);
1919         if (err != KERN_SUCCESS)
1920                 ret = NULL;
1921
1922         assert(ret == NULL || (addr == NULL && ret != addr)
1923             || (addr != NULL && ret == addr));
1924         return (ret);
1925 }
1926
1927 static void
1928 pages_unmap(void *addr, size_t size)
1929 {
1930         kern_return_t err;
1931
1932         err = vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)addr,
1933             (vm_size_t)size);
1934         if (err != KERN_SUCCESS) {
1935                 malloc_message(_getprogname(),
1936                     ": (malloc) Error in vm_deallocate(): ",
1937                     mach_error_string(err), "\n");
1938                 if (opt_abort)
1939                         abort();
1940         }
1941 }
1942
1943 #define VM_COPY_MIN (pagesize << 5)
1944 static inline void
1945 pages_copy(void *dest, const void *src, size_t n)
1946 {
1947
1948         assert((void *)((uintptr_t)dest & ~pagesize_mask) == dest);
1949         assert(n >= VM_COPY_MIN);
1950         assert((void *)((uintptr_t)src & ~pagesize_mask) == src);
1951
1952         vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
1953             (vm_address_t)dest);
1954 }
1955 #else /* DARWIN */
1956 static void *
1957 pages_map(void *addr, size_t size)
1958 {
1959         void *ret;
1960
1961         /*
1962          * We don't use MAP_FIXED here, because it can cause the *replacement*
1963          * of existing mappings, and we only want to create new mappings.
1964          */
1965         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1966             -1, 0);
1967         assert(ret != NULL);
1968
1969         if (ret == MAP_FAILED)
1970                 ret = NULL;
1971         else if (addr != NULL && ret != addr) {
1972                 /*
1973                  * We succeeded in mapping memory, but not in the right place.
1974                  */
1975                 if (munmap(ret, size) == -1) {
1976                         char buf[STRERROR_BUF];
1977
1978                         strerror_r(errno, buf, sizeof(buf));
1979                         _malloc_message(_getprogname(),
1980                             ": (malloc) Error in munmap(): ", buf, "\n");
1981                         if (opt_abort)
1982                                 abort();
1983                 }
1984                 ret = NULL;
1985         }
1986
1987         assert(ret == NULL || (addr == NULL && ret != addr)
1988             || (addr != NULL && ret == addr));
1989         return (ret);
1990 }
1991
1992 static void
1993 pages_unmap(void *addr, size_t size)
1994 {
1995
1996         if (munmap(addr, size) == -1) {
1997                 char buf[STRERROR_BUF];
1998
1999                 strerror_r(errno, buf, sizeof(buf));
2000                 _malloc_message(_getprogname(),
2001                     ": (malloc) Error in munmap(): ", buf, "\n");
2002                 if (opt_abort)
2003                         abort();
2004         }
2005 }
2006 #endif
2007
2008 #ifdef MALLOC_DECOMMIT
2009 static inline void
2010 pages_decommit(void *addr, size_t size)
2011 {
2012
2013 #ifdef WIN32
2014         VirtualFree(addr, size, MEM_DECOMMIT);
2015 #else
2016         if (mmap(addr, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
2017             0) == MAP_FAILED)
2018                 abort();
2019 #endif
2020 }
2021
2022 static inline void
2023 pages_commit(void *addr, size_t size)
2024 {
2025
2026 #  ifdef WIN32
2027         VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
2028 #  else
2029         if (mmap(addr, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE |
2030             MAP_ANON, -1, 0) == MAP_FAILED)
2031                 abort();
2032 #  endif
2033 }
2034 #endif
2035
2036 #ifdef MALLOC_DSS
2037 static void *
2038 chunk_alloc_dss(size_t size)
2039 {
2040
2041         malloc_mutex_lock(&dss_mtx);
2042         if (dss_prev != (void *)-1) {
2043                 intptr_t incr;
2044
2045                 /*
2046                  * The loop is necessary to recover from races with other
2047                  * threads that are using the DSS for something other than
2048                  * malloc.
2049                  */
2050                 do {
2051                         void *ret;
2052
2053                         /* Get the current end of the DSS. */
2054                         dss_max = sbrk(0);
2055
2056                         /*
2057                          * Calculate how much padding is necessary to
2058                          * chunk-align the end of the DSS.
2059                          */
2060                         incr = (intptr_t)size
2061                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
2062                         if (incr == (intptr_t)size)
2063                                 ret = dss_max;
2064                         else {
2065                                 ret = (void *)((intptr_t)dss_max + incr);
2066                                 incr += size;
2067                         }
2068
2069                         dss_prev = sbrk(incr);
2070                         if (dss_prev == dss_max) {
2071                                 /* Success. */
2072                                 dss_max = (void *)((intptr_t)dss_prev + incr);
2073                                 malloc_mutex_unlock(&dss_mtx);
2074                                 return (ret);
2075                         }
2076                 } while (dss_prev != (void *)-1);
2077         }
2078         malloc_mutex_unlock(&dss_mtx);
2079
2080         return (NULL);
2081 }
2082
2083 static void *
2084 chunk_recycle_dss(size_t size, bool zero)
2085 {
2086         extent_node_t *node, key;
2087
2088         key.addr = NULL;
2089         key.size = size;
2090         malloc_mutex_lock(&dss_mtx);
2091         node = RB_NFIND(extent_tree_szad_s, &dss_chunks_szad, &key);
2092         if (node != NULL) {
2093                 void *ret = node->addr;
2094
2095                 /* Remove node from the tree. */
2096                 RB_REMOVE(extent_tree_szad_s, &dss_chunks_szad, node);
2097                 if (node->size == size) {
2098                         RB_REMOVE(extent_tree_ad_s, &dss_chunks_ad, node);
2099                         base_node_dealloc(node);
2100                 } else {
2101                         /*
2102                          * Insert the remainder of node's address range as a
2103                          * smaller chunk.  Its position within dss_chunks_ad
2104                          * does not change.
2105                          */
2106                         assert(node->size > size);
2107                         node->addr = (void *)((uintptr_t)node->addr + size);
2108                         node->size -= size;
2109                         RB_INSERT(extent_tree_szad_s, &dss_chunks_szad, node);
2110                 }
2111                 malloc_mutex_unlock(&dss_mtx);
2112
2113                 if (zero)
2114                         memset(ret, 0, size);
2115                 return (ret);
2116         }
2117         malloc_mutex_unlock(&dss_mtx);
2118
2119         return (NULL);
2120 }
2121 #endif
2122
2123 #ifdef WIN32
2124 static inline void *
2125 chunk_alloc_mmap(size_t size)
2126 {
2127         void *ret;
2128         size_t offset;
2129
2130         /*
2131          * Windows requires that there be a 1:1 mapping between VM
2132          * allocation/deallocation operations.  Therefore, take care here to
2133          * acquire the final result via one mapping operation.  This means
2134          * unmapping any preliminary result that is not correctly aligned.
2135          */
2136
2137         ret = pages_map(NULL, size);
2138         if (ret == NULL)
2139                 return (NULL);
2140
2141         offset = CHUNK_ADDR2OFFSET(ret);
2142         if (offset != 0) {
2143                 /* Deallocate, then try to allocate at (ret + size - offset). */
2144                 pages_unmap(ret, size);
2145                 ret = pages_map((void *)((uintptr_t)ret + size - offset), size);
2146                 while (ret == NULL) {
2147                         /*
2148                          * Over-allocate in order to map a memory region that
2149                          * is definitely large enough.
2150                          */
2151                         ret = pages_map(NULL, size + chunksize);
2152                         if (ret == NULL)
2153                                 return (NULL);
2154                         /*
2155                          * Deallocate, then allocate the correct size, within
2156                          * the over-sized mapping.
2157                          */
2158                         offset = CHUNK_ADDR2OFFSET(ret);
2159                         pages_unmap(ret, size + chunksize);
2160                         if (offset == 0)
2161                                 ret = pages_map(ret, size);
2162                         else {
2163                                 ret = pages_map((void *)((uintptr_t)ret +
2164                                     chunksize - offset), size);
2165                         }
2166                         /*
2167                          * Failure here indicates a race with another thread, so
2168                          * try again.
2169                          */
2170                 }
2171         }
2172
2173         return (ret);
2174 }
2175 #else
2176 static inline void *
2177 chunk_alloc_mmap(size_t size)
2178 {
2179         void *ret;
2180         size_t offset;
2181
2182         /*
2183          * Ideally, there would be a way to specify alignment to mmap() (like
2184          * NetBSD has), but in the absence of such a feature, we have to work
2185          * hard to efficiently create aligned mappings.  The reliable, but
2186          * expensive method is to create a mapping that is over-sized, then
2187          * trim the excess.  However, that always results in at least one call
2188          * to pages_unmap().
2189          *
2190          * A more optimistic approach is to try mapping precisely the right
2191          * amount, then try to append another mapping if alignment is off.  In
2192          * practice, this works out well as long as the application is not
2193          * interleaving mappings via direct mmap() calls.  If we do run into a
2194          * situation where there is an interleaved mapping and we are unable to
2195          * extend an unaligned mapping, our best option is to momentarily
2196          * revert to the reliable-but-expensive method.  This will tend to
2197          * leave a gap in the memory map that is too small to cause later
2198          * problems for the optimistic method.
2199          */
2200
2201         ret = pages_map(NULL, size);
2202         if (ret == NULL)
2203                 return (NULL);
2204
2205         offset = CHUNK_ADDR2OFFSET(ret);
2206         if (offset != 0) {
2207                 /* Try to extend chunk boundary. */
2208                 if (pages_map((void *)((uintptr_t)ret + size),
2209                     chunksize - offset) == NULL) {
2210                         /*
2211                          * Extension failed.  Clean up, then revert to the
2212                          * reliable-but-expensive method.
2213                          */
2214                         pages_unmap(ret, size);
2215
2216                         /* Beware size_t wrap-around. */
2217                         if (size + chunksize <= size)
2218                                 return NULL;
2219
2220                         ret = pages_map(NULL, size + chunksize);
2221                         if (ret == NULL)
2222                                 return (NULL);
2223
2224                         /* Clean up unneeded leading/trailing space. */
2225                         offset = CHUNK_ADDR2OFFSET(ret);
2226                         if (offset != 0) {
2227                                 /* Leading space. */
2228                                 pages_unmap(ret, chunksize - offset);
2229
2230                                 ret = (void *)((uintptr_t)ret +
2231                                     (chunksize - offset));
2232
2233                                 /* Trailing space. */
2234                                 pages_unmap((void *)((uintptr_t)ret + size),
2235                                     offset);
2236                         } else {
2237                                 /* Trailing space only. */
2238                                 pages_unmap((void *)((uintptr_t)ret + size),
2239                                     chunksize);
2240                         }
2241                 } else {
2242                         /* Clean up unneeded leading space. */
2243                         pages_unmap(ret, chunksize - offset);
2244                         ret = (void *)((uintptr_t)ret + (chunksize - offset));
2245                 }
2246         }
2247
2248         return (ret);
2249 }
2250 #endif
2251
2252 static void *
2253 chunk_alloc(size_t size, bool zero)
2254 {
2255         void *ret;
2256
2257         assert(size != 0);
2258         assert((size & chunksize_mask) == 0);
2259
2260 #ifdef MALLOC_DSS
2261         if (opt_dss) {
2262                 ret = chunk_recycle_dss(size, zero);
2263                 if (ret != NULL) {
2264                         goto RETURN;
2265                 }
2266
2267                 ret = chunk_alloc_dss(size);
2268                 if (ret != NULL)
2269                         goto RETURN;
2270         }
2271
2272         if (opt_mmap)
2273 #endif
2274         {
2275                 ret = chunk_alloc_mmap(size);
2276                 if (ret != NULL)
2277                         goto RETURN;
2278         }
2279
2280         /* All strategies for allocation failed. */
2281         ret = NULL;
2282 RETURN:
2283 #ifdef MALLOC_STATS
2284         if (ret != NULL) {
2285                 stats_chunks.nchunks += (size / chunksize);
2286                 stats_chunks.curchunks += (size / chunksize);
2287         }
2288         if (stats_chunks.curchunks > stats_chunks.highchunks)
2289                 stats_chunks.highchunks = stats_chunks.curchunks;
2290 #endif
2291
2292         assert(CHUNK_ADDR2BASE(ret) == ret);
2293         return (ret);
2294 }
2295
2296 #ifdef MALLOC_DSS
2297 static extent_node_t *
2298 chunk_dealloc_dss_record(void *chunk, size_t size)
2299 {
2300         extent_node_t *node, *prev, key;
2301
2302         key.addr = (void *)((uintptr_t)chunk + size);
2303         node = RB_NFIND(extent_tree_ad_s, &dss_chunks_ad, &key);
2304         /* Try to coalesce forward. */
2305         if (node != NULL && node->addr == key.addr) {
2306                 /*
2307                  * Coalesce chunk with the following address range.  This does
2308                  * not change the position within dss_chunks_ad, so only
2309                  * remove/insert from/into dss_chunks_szad.
2310                  */
2311                 RB_REMOVE(extent_tree_szad_s, &dss_chunks_szad, node);
2312                 node->addr = chunk;
2313                 node->size += size;
2314                 RB_INSERT(extent_tree_szad_s, &dss_chunks_szad, node);
2315         } else {
2316                 /*
2317                  * Coalescing forward failed, so insert a new node.  Drop
2318                  * dss_mtx during node allocation, since it is possible that a
2319                  * new base chunk will be allocated.
2320                  */
2321                 malloc_mutex_unlock(&dss_mtx);
2322                 node = base_node_alloc();
2323                 malloc_mutex_lock(&dss_mtx);
2324                 if (node == NULL)
2325                         return (NULL);
2326                 node->addr = chunk;
2327                 node->size = size;
2328                 RB_INSERT(extent_tree_ad_s, &dss_chunks_ad, node);
2329                 RB_INSERT(extent_tree_szad_s, &dss_chunks_szad, node);
2330         }
2331
2332         /* Try to coalesce backward. */
2333         prev = RB_PREV(extent_tree_ad_s, &dss_chunks_ad, node);
2334         if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2335             chunk) {
2336                 /*
2337                  * Coalesce chunk with the previous address range.  This does
2338                  * not change the position within dss_chunks_ad, so only
2339                  * remove/insert node from/into dss_chunks_szad.
2340                  */
2341                 RB_REMOVE(extent_tree_szad_s, &dss_chunks_szad, prev);
2342                 RB_REMOVE(extent_tree_ad_s, &dss_chunks_ad, prev);
2343
2344                 RB_REMOVE(extent_tree_szad_s, &dss_chunks_szad, node);
2345                 node->addr = prev->addr;
2346                 node->size += prev->size;
2347                 RB_INSERT(extent_tree_szad_s, &dss_chunks_szad, node);
2348
2349                 base_node_dealloc(prev);
2350         }
2351
2352         return (node);
2353 }
2354
2355 static bool
2356 chunk_dealloc_dss(void *chunk, size_t size)
2357 {
2358
2359         malloc_mutex_lock(&dss_mtx);
2360         if ((uintptr_t)chunk >= (uintptr_t)dss_base
2361             && (uintptr_t)chunk < (uintptr_t)dss_max) {
2362                 extent_node_t *node;
2363
2364                 /* Try to coalesce with other unused chunks. */
2365                 node = chunk_dealloc_dss_record(chunk, size);
2366                 if (node != NULL) {
2367                         chunk = node->addr;
2368                         size = node->size;
2369                 }
2370
2371                 /* Get the current end of the DSS. */
2372                 dss_max = sbrk(0);
2373
2374                 /*
2375                  * Try to shrink the DSS if this chunk is at the end of the
2376                  * DSS.  The sbrk() call here is subject to a race condition
2377                  * with threads that use brk(2) or sbrk(2) directly, but the
2378                  * alternative would be to leak memory for the sake of poorly
2379                  * designed multi-threaded programs.
2380                  */
2381                 if ((void *)((uintptr_t)chunk + size) == dss_max
2382                     && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2383                         /* Success. */
2384                         dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2385
2386                         if (node != NULL) {
2387                                 RB_REMOVE(extent_tree_szad_s, &dss_chunks_szad,
2388                                     node);
2389                                 RB_REMOVE(extent_tree_ad_s, &dss_chunks_ad,
2390                                     node);
2391                                 base_node_dealloc(node);
2392                         }
2393                         malloc_mutex_unlock(&dss_mtx);
2394                 } else {
2395                         malloc_mutex_unlock(&dss_mtx);
2396 #ifdef WIN32
2397                         VirtualAlloc(chunk, size, MEM_RESET, PAGE_READWRITE);
2398 #elif (defined(DARWIN))
2399                         mmap(chunk, size, PROT_READ | PROT_WRITE, MAP_PRIVATE
2400                             | MAP_ANON | MAP_FIXED, -1, 0);
2401 #else
2402                         madvise(chunk, size, MADV_FREE);
2403 #endif
2404                 }
2405
2406                 return (false);
2407         }
2408         malloc_mutex_unlock(&dss_mtx);
2409
2410         return (true);
2411 }
2412 #endif
2413
2414 static void
2415 chunk_dealloc_mmap(void *chunk, size_t size)
2416 {
2417
2418         pages_unmap(chunk, size);
2419 }
2420
2421 static void
2422 chunk_dealloc(void *chunk, size_t size)
2423 {
2424
2425         assert(chunk != NULL);
2426         assert(CHUNK_ADDR2BASE(chunk) == chunk);
2427         assert(size != 0);
2428         assert((size & chunksize_mask) == 0);
2429
2430 #ifdef MALLOC_STATS
2431         stats_chunks.curchunks -= (size / chunksize);
2432 #endif
2433
2434 #ifdef MALLOC_DSS
2435         if (opt_dss) {
2436                 if (chunk_dealloc_dss(chunk, size) == false)
2437                         return;
2438         }
2439
2440         if (opt_mmap)
2441 #endif
2442                 chunk_dealloc_mmap(chunk, size);
2443 }
2444
2445 /*
2446  * End chunk management functions.
2447  */
2448 /******************************************************************************/
2449 /*
2450  * Begin arena.
2451  */
2452
2453 /*
2454  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2455  * code if necessary).
2456  */
2457 static inline arena_t *
2458 choose_arena(void)
2459 {
2460         arena_t *ret;
2461
2462         /*
2463          * We can only use TLS if this is a PIC library, since for the static
2464          * library version, libc's malloc is used by TLS allocation, which
2465          * introduces a bootstrapping issue.
2466          */
2467 #ifndef NO_TLS
2468         if (g_isthreaded == false) {
2469             /* Avoid the overhead of TLS for single-threaded operation. */
2470             return (arenas[0]);
2471         }
2472
2473 #  ifdef WIN32
2474         ret = TlsGetValue(tlsIndex);
2475 #  else
2476         ret = arenas_map;
2477 #  endif
2478
2479         if (ret == NULL) {
2480                 ret = choose_arena_hard();
2481                 assert(ret != NULL);
2482         }
2483 #else
2484         if (g_isthreaded && narenas > 1) {
2485                 unsigned long ind;
2486
2487                 /*
2488                  * Hash _pthread_self() to one of the arenas.  There is a prime
2489                  * number of arenas, so this has a reasonable chance of
2490                  * working.  Even so, the hashing can be easily thwarted by
2491                  * inconvenient _pthread_self() values.  Without specific
2492                  * knowledge of how _pthread_self() calculates values, we can't
2493                  * easily do much better than this.
2494                  */
2495                 ind = (unsigned long) _pthread_self() % narenas;
2496
2497                 /*
2498                  * Optimistially assume that arenas[ind] has been initialized.
2499                  * At worst, we find out that some other thread has already
2500                  * done so, after acquiring the lock in preparation.  Note that
2501                  * this lazy locking also has the effect of lazily forcing
2502                  * cache coherency; without the lock acquisition, there's no
2503                  * guarantee that modification of arenas[ind] by another thread
2504                  * would be seen on this CPU for an arbitrary amount of time.
2505                  *
2506                  * In general, this approach to modifying a synchronized value
2507                  * isn't a good idea, but in this case we only ever modify the
2508                  * value once, so things work out well.
2509                  */
2510                 ret = arenas[ind];
2511                 if (ret == NULL) {
2512                         /*
2513                          * Avoid races with another thread that may have already
2514                          * initialized arenas[ind].
2515                          */
2516                         malloc_spin_lock(&arenas_lock);
2517                         if (arenas[ind] == NULL)
2518                                 ret = arenas_extend((unsigned)ind);
2519                         else
2520                                 ret = arenas[ind];
2521                         malloc_spin_unlock(&arenas_lock);
2522                 }
2523         } else
2524                 ret = arenas[0];
2525 #endif
2526
2527         assert(ret != NULL);
2528         return (ret);
2529 }
2530
2531 #ifndef NO_TLS
2532 /*
2533  * Choose an arena based on a per-thread value (slow-path code only, called
2534  * only by choose_arena()).
2535  */
2536 static arena_t *
2537 choose_arena_hard(void)
2538 {
2539         arena_t *ret;
2540
2541         assert(g_isthreaded);
2542
2543 #ifdef MALLOC_LAZY_FREE
2544         /*
2545          * Seed the PRNG used for lazy deallocation.  Since seeding only occurs
2546          * on the first allocation by a thread, it is possible for a thread to
2547          * deallocate before seeding.  This is not a critical issue though,
2548          * since it is extremely unusual for an application to to use threads
2549          * that deallocate but *never* allocate, and because even if seeding
2550          * never occurs for multiple threads, they will tend to drift apart
2551          * unless some aspect of the application forces deallocation
2552          * synchronization.
2553          */
2554         SPRN(lazy_free, (uint32_t)(uintptr_t)(_pthread_self()));
2555 #endif
2556
2557 #ifdef MALLOC_BALANCE
2558         /*
2559          * Seed the PRNG used for arena load balancing.  We can get away with
2560          * using the same seed here as for the lazy_free PRNG without
2561          * introducing autocorrelation because the PRNG parameters are
2562          * distinct.
2563          */
2564         SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2565 #endif
2566
2567         if (narenas > 1) {
2568 #ifdef MALLOC_BALANCE
2569                 unsigned ind;
2570
2571                 ind = PRN(balance, narenas_2pow);
2572                 if ((ret = arenas[ind]) == NULL) {
2573                         malloc_spin_lock(&arenas_lock);
2574                         if ((ret = arenas[ind]) == NULL)
2575                                 ret = arenas_extend(ind);
2576                         malloc_spin_unlock(&arenas_lock);
2577                 }
2578 #else
2579                 malloc_spin_lock(&arenas_lock);
2580                 if ((ret = arenas[next_arena]) == NULL)
2581                         ret = arenas_extend(next_arena);
2582                 next_arena = (next_arena + 1) % narenas;
2583                 malloc_spin_unlock(&arenas_lock);
2584 #endif
2585         } else
2586                 ret = arenas[0];
2587
2588 #ifdef WIN32
2589         TlsSetValue(tlsIndex, ret);
2590 #else
2591         arenas_map = ret;
2592 #endif
2593
2594         return (ret);
2595 }
2596 #endif
2597
2598 static inline int
2599 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2600 {
2601         uintptr_t a_chunk = (uintptr_t)a;
2602         uintptr_t b_chunk = (uintptr_t)b;
2603
2604         assert(a != NULL);
2605         assert(b != NULL);
2606
2607         return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2608 }
2609
2610 /* Generate red-black tree code for arena chunks. */
2611 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp)
2612
2613 static inline int
2614 arena_run_comp(arena_run_t *a, arena_run_t *b)
2615 {
2616         uintptr_t a_run = (uintptr_t)a;
2617         uintptr_t b_run = (uintptr_t)b;
2618
2619         assert(a != NULL);
2620         assert(b != NULL);
2621
2622         return ((a_run > b_run) - (a_run < b_run));
2623 }
2624
2625 /* Generate red-black tree code for arena runs. */
2626 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp)
2627
2628 static extent_node_t *
2629 arena_chunk_node_alloc(arena_chunk_t *chunk)
2630 {
2631         extent_node_t *ret;
2632
2633         ret = RB_MIN(extent_tree_ad_s, &chunk->nodes);
2634         if (ret != NULL)
2635                 RB_REMOVE(extent_tree_ad_s, &chunk->nodes, ret);
2636         else {
2637                 ret = chunk->nodes_past;
2638                 chunk->nodes_past = (extent_node_t *)
2639                     ((uintptr_t)chunk->nodes_past + sizeof(extent_node_t));
2640                 assert((uintptr_t)ret + sizeof(extent_node_t) <=
2641                     (uintptr_t)chunk + (arena_chunk_header_npages <<
2642                     pagesize_2pow));
2643         }
2644
2645         return (ret);
2646 }
2647
2648 static void
2649 arena_chunk_node_dealloc(arena_chunk_t *chunk, extent_node_t *node)
2650 {
2651
2652         node->addr = (void *)node;
2653         RB_INSERT(extent_tree_ad_s, &chunk->nodes, node);
2654 }
2655
2656 static inline void *
2657 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2658 {
2659         void *ret;
2660         unsigned i, mask, bit, regind;
2661
2662         assert(run->magic == ARENA_RUN_MAGIC);
2663         assert(run->regs_minelm < bin->regs_mask_nelms);
2664
2665         /*
2666          * Move the first check outside the loop, so that run->regs_minelm can
2667          * be updated unconditionally, without the possibility of updating it
2668          * multiple times.
2669          */
2670         i = run->regs_minelm;
2671         mask = run->regs_mask[i];
2672         if (mask != 0) {
2673                 /* Usable allocation found. */
2674                 bit = ffs((int)mask) - 1;
2675
2676                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2677                 assert(regind < bin->nregs);
2678                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2679                     + (bin->reg_size * regind));
2680
2681                 /* Clear bit. */
2682                 mask ^= (1U << bit);
2683                 run->regs_mask[i] = mask;
2684
2685                 return (ret);
2686         }
2687
2688         for (i++; i < bin->regs_mask_nelms; i++) {
2689                 mask = run->regs_mask[i];
2690                 if (mask != 0) {
2691                         /* Usable allocation found. */
2692                         bit = ffs((int)mask) - 1;
2693
2694                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2695                         assert(regind < bin->nregs);
2696                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2697                             + (bin->reg_size * regind));
2698
2699                         /* Clear bit. */
2700                         mask ^= (1U << bit);
2701                         run->regs_mask[i] = mask;
2702
2703                         /*
2704                          * Make a note that nothing before this element
2705                          * contains a free region.
2706                          */
2707                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2708
2709                         return (ret);
2710                 }
2711         }
2712         /* Not reached. */
2713         assert(0);
2714         return (NULL);
2715 }
2716
2717 static inline void
2718 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2719 {
2720         /*
2721          * To divide by a number D that is not a power of two we multiply
2722          * by (2^21 / D) and then right shift by 21 positions.
2723          *
2724          *   X / D
2725          *
2726          * becomes
2727          *
2728          *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
2729          */
2730 #define SIZE_INV_SHIFT 21
2731 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
2732         static const unsigned size_invs[] = {
2733             SIZE_INV(3),
2734             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2735             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2736             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2737             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2738             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2739             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2740             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2741 #if (QUANTUM_2POW_MIN < 4)
2742             ,
2743             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
2744             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
2745             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
2746             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
2747             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
2748             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
2749             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
2750             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
2751 #endif
2752         };
2753         unsigned diff, regind, elm, bit;
2754
2755         assert(run->magic == ARENA_RUN_MAGIC);
2756         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
2757             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
2758
2759         /*
2760          * Avoid doing division with a variable divisor if possible.  Using
2761          * actual division here can reduce allocator throughput by over 20%!
2762          */
2763         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2764         if ((size & (size - 1)) == 0) {
2765                 /*
2766                  * log2_table allows fast division of a power of two in the
2767                  * [1..128] range.
2768                  *
2769                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2770                  */
2771                 static const unsigned char log2_table[] = {
2772                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2773                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2774                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2775                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2776                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2777                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2778                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2779                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2780                 };
2781
2782                 if (size <= 128)
2783                         regind = (diff >> log2_table[size - 1]);
2784                 else if (size <= 32768)
2785                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2786                 else {
2787                         /*
2788                          * The run size is too large for us to use the lookup
2789                          * table.  Use real division.
2790                          */
2791                         regind = diff / size;
2792                 }
2793         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
2794             << QUANTUM_2POW_MIN) + 2) {
2795                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
2796                 regind >>= SIZE_INV_SHIFT;
2797         } else {
2798                 /*
2799                  * size_invs isn't large enough to handle this size class, so
2800                  * calculate regind using actual division.  This only happens
2801                  * if the user increases small_max via the 'S' runtime
2802                  * configuration option.
2803                  */
2804                 regind = diff / size;
2805         };
2806         assert(diff == regind * size);
2807         assert(regind < bin->nregs);
2808
2809         elm = regind >> (SIZEOF_INT_2POW + 3);
2810         if (elm < run->regs_minelm)
2811                 run->regs_minelm = elm;
2812         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2813         assert((run->regs_mask[elm] & (1U << bit)) == 0);
2814         run->regs_mask[elm] |= (1U << bit);
2815 #undef SIZE_INV
2816 #undef SIZE_INV_SHIFT
2817 }
2818
2819 static void
2820 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool small,
2821     bool zero)
2822 {
2823         arena_chunk_t *chunk;
2824         size_t run_ind, total_pages, need_pages, rem_pages, i;
2825         extent_node_t *nodeA, *nodeB, key;
2826
2827         /* Insert a node into runs_alloced_ad for the first part of the run. */
2828         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2829         nodeA = arena_chunk_node_alloc(chunk);
2830         nodeA->addr = run;
2831         nodeA->size = size;
2832         RB_INSERT(extent_tree_ad_s, &arena->runs_alloced_ad, nodeA);
2833
2834         key.addr = run;
2835         nodeB = RB_FIND(extent_tree_ad_s, &arena->runs_avail_ad, &key);
2836         assert(nodeB != NULL);
2837
2838         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2839             >> pagesize_2pow);
2840         total_pages = nodeB->size >> pagesize_2pow;
2841         need_pages = (size >> pagesize_2pow);
2842         assert(need_pages > 0);
2843         assert(need_pages <= total_pages);
2844         assert(need_pages <= CHUNK_MAP_POS_MASK || small == false);
2845         rem_pages = total_pages - need_pages;
2846
2847         for (i = 0; i < need_pages; i++) {
2848 #ifdef MALLOC_DECOMMIT
2849                 /*
2850                  * Commit decommitted pages if necessary.  If a decommitted
2851                  * page is encountered, commit all needed adjacent decommitted
2852                  * pages in one operation, in order to reduce system call
2853                  * overhead.
2854                  */
2855                 if (chunk->map[run_ind + i] & CHUNK_MAP_DECOMMITTED) {
2856                         size_t j;
2857
2858                         /*
2859                          * Advance i+j to just past the index of the last page
2860                          * to commit.  Clear CHUNK_MAP_DECOMMITTED along the
2861                          * way.
2862                          */
2863                         for (j = 0; i + j < need_pages && (chunk->map[run_ind +
2864                             i + j] & CHUNK_MAP_DECOMMITTED); j++) {
2865                                 chunk->map[run_ind + i + j] ^=
2866                                     CHUNK_MAP_DECOMMITTED;
2867                         }
2868
2869                         pages_commit((void *)((uintptr_t)chunk + ((run_ind + i)
2870                             << pagesize_2pow)), (j << pagesize_2pow));
2871 #  ifdef MALLOC_STATS
2872                         arena->stats.ncommit++;
2873 #  endif
2874                 }
2875 #endif
2876
2877                 /* Zero if necessary. */
2878                 if (zero) {
2879                         if ((chunk->map[run_ind + i] & CHUNK_MAP_UNTOUCHED)
2880                             == 0) {
2881                                 memset((void *)((uintptr_t)chunk + ((run_ind
2882                                     + i) << pagesize_2pow)), 0, pagesize);
2883                                 /* CHUNK_MAP_UNTOUCHED is cleared below. */
2884                         }
2885                 }
2886
2887                 /* Update dirty page accounting. */
2888                 if (chunk->map[run_ind + i] & CHUNK_MAP_DIRTY) {
2889                         chunk->ndirty--;
2890                         arena->ndirty--;
2891                 }
2892
2893                 /* Initialize the chunk map. */
2894                 if (small)
2895                         chunk->map[run_ind + i] = (uint8_t)i;
2896                 else
2897                         chunk->map[run_ind + i] = CHUNK_MAP_LARGE;
2898         }
2899
2900         /* Keep track of trailing unused pages for later use. */
2901         RB_REMOVE(extent_tree_szad_s, &arena->runs_avail_szad, nodeB);
2902         if (rem_pages > 0) {
2903                 /*
2904                  * Update nodeB in runs_avail_*.  Its position within
2905                  * runs_avail_ad does not change.
2906                  */
2907                 nodeB->addr = (void *)((uintptr_t)nodeB->addr + size);
2908                 nodeB->size -= size;
2909                 RB_INSERT(extent_tree_szad_s, &arena->runs_avail_szad, nodeB);
2910         } else {
2911                 /* Remove nodeB from runs_avail_*. */
2912                 RB_REMOVE(extent_tree_ad_s, &arena->runs_avail_ad, nodeB);
2913                 arena_chunk_node_dealloc(chunk, nodeB);
2914         }
2915
2916         chunk->pages_used += need_pages;
2917 }
2918
2919 static arena_chunk_t *
2920 arena_chunk_alloc(arena_t *arena)
2921 {
2922         arena_chunk_t *chunk;
2923         extent_node_t *node;
2924
2925         if (arena->spare != NULL) {
2926                 chunk = arena->spare;
2927                 arena->spare = NULL;
2928         } else {
2929                 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2930                 if (chunk == NULL)
2931                         return (NULL);
2932 #ifdef MALLOC_STATS
2933                 arena->stats.mapped += chunksize;
2934 #endif
2935
2936                 chunk->arena = arena;
2937
2938                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
2939
2940                 /*
2941                  * Claim that no pages are in use, since the header is merely
2942                  * overhead.
2943                  */
2944                 chunk->pages_used = 0;
2945                 chunk->ndirty = 0;
2946
2947                 /*
2948                  * Initialize the map to contain one maximal free untouched
2949                  * run.
2950                  */
2951                 memset(chunk->map, (CHUNK_MAP_LARGE | CHUNK_MAP_POS_MASK),
2952                     arena_chunk_header_npages);
2953                 memset(&chunk->map[arena_chunk_header_npages],
2954                     (CHUNK_MAP_UNTOUCHED
2955 #ifdef MALLOC_DECOMMIT
2956                     | CHUNK_MAP_DECOMMITTED
2957 #endif
2958                     ), (chunk_npages -
2959                     arena_chunk_header_npages));
2960
2961                 /* Initialize the tree of unused extent nodes. */
2962                 RB_INIT(&chunk->nodes);
2963                 chunk->nodes_past = (extent_node_t *)QUANTUM_CEILING(
2964                     (uintptr_t)&chunk->map[chunk_npages]);
2965
2966 #ifdef MALLOC_DECOMMIT
2967                 /*
2968                  * Start out decommitted, in order to force a closer
2969                  * correspondence between dirty pages and committed untouched
2970                  * pages.
2971                  */
2972                 pages_decommit((void *)((uintptr_t)chunk +
2973                     (arena_chunk_header_npages << pagesize_2pow)),
2974                     ((chunk_npages - arena_chunk_header_npages) <<
2975                     pagesize_2pow));
2976 #  ifdef MALLOC_STATS
2977                 arena->stats.ndecommit++;
2978                 arena->stats.decommitted += (chunk_npages -
2979                     arena_chunk_header_npages);
2980 #  endif
2981 #endif
2982         }
2983
2984         /* Insert the run into the runs_avail_* red-black trees. */
2985         node = arena_chunk_node_alloc(chunk);
2986         node->addr = (void *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2987             pagesize_2pow));
2988         node->size = chunksize - (arena_chunk_header_npages << pagesize_2pow);
2989         RB_INSERT(extent_tree_szad_s, &arena->runs_avail_szad, node);
2990         RB_INSERT(extent_tree_ad_s, &arena->runs_avail_ad, node);
2991
2992         return (chunk);
2993 }
2994
2995 static void
2996 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2997 {
2998         extent_node_t *node, key;
2999
3000         if (arena->spare != NULL) {
3001                 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks,
3002                     arena->spare);
3003                 arena->ndirty -= arena->spare->ndirty;
3004                 chunk_dealloc((void *)arena->spare, chunksize);
3005 #ifdef MALLOC_STATS
3006                 arena->stats.mapped -= chunksize;
3007 #endif
3008         }
3009
3010         /*
3011          * Remove run from the runs trees, regardless of whether this chunk
3012          * will be cached, so that the arena does not use it.  Dirty page
3013          * flushing only uses the chunks tree, so leaving this chunk in that
3014          * tree is sufficient for that purpose.
3015          */
3016         key.addr = (void *)((uintptr_t)chunk + (arena_chunk_header_npages <<
3017             pagesize_2pow));
3018         node = RB_FIND(extent_tree_ad_s, &arena->runs_avail_ad, &key);
3019         assert(node != NULL);
3020         RB_REMOVE(extent_tree_szad_s, &arena->runs_avail_szad, node);
3021         RB_REMOVE(extent_tree_ad_s, &arena->runs_avail_ad, node);
3022         arena_chunk_node_dealloc(chunk, node);
3023
3024         arena->spare = chunk;
3025 }
3026
3027 static arena_run_t *
3028 arena_run_alloc(arena_t *arena, size_t size, bool small, bool zero)
3029 {
3030         arena_chunk_t *chunk;
3031         arena_run_t *run;
3032         extent_node_t *node, key;
3033
3034         assert(size <= (chunksize - (arena_chunk_header_npages <<
3035             pagesize_2pow)));
3036         assert((size & pagesize_mask) == 0);
3037
3038         /* Search the arena's chunks for the lowest best fit. */
3039         key.addr = NULL;
3040         key.size = size;
3041         node = RB_NFIND(extent_tree_szad_s, &arena->runs_avail_szad, &key);
3042         if (node != NULL) {
3043                 run = (arena_run_t *)node->addr;
3044                 arena_run_split(arena, run, size, small, zero);
3045                 return (run);
3046         }
3047
3048         /*
3049          * No usable runs.  Create a new chunk from which to allocate the run.
3050          */
3051         chunk = arena_chunk_alloc(arena);
3052         if (chunk == NULL)
3053                 return (NULL);
3054         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
3055             pagesize_2pow));
3056         /* Update page map. */
3057         arena_run_split(arena, run, size, small, zero);
3058         return (run);
3059 }
3060
3061 static void
3062 arena_purge(arena_t *arena)
3063 {
3064         arena_chunk_t *chunk;
3065 #ifdef MALLOC_DEBUG
3066         size_t ndirty;
3067
3068         ndirty = 0;
3069         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
3070                 ndirty += chunk->ndirty;
3071         }
3072         assert(ndirty == arena->ndirty);
3073 #endif
3074         assert(arena->ndirty > opt_dirty_max);
3075
3076 #ifdef MALLOC_STATS
3077         arena->stats.npurge++;
3078 #endif
3079
3080         /*
3081          * Iterate downward through chunks until enough dirty memory has been
3082          * purged.
3083          */
3084         RB_FOREACH_REVERSE(chunk, arena_chunk_tree_s, &arena->chunks) {
3085                 if (chunk->ndirty > 0) {
3086                         size_t i;
3087
3088                         for (i = chunk_npages - 1; i >=
3089                             arena_chunk_header_npages; i--) {
3090                                 if (chunk->map[i] & CHUNK_MAP_DIRTY) {
3091                                         size_t npages;
3092
3093                                         chunk->map[i] = (CHUNK_MAP_LARGE |
3094 #ifdef MALLOC_DECOMMIT
3095                                             CHUNK_MAP_DECOMMITTED |
3096 #endif
3097                                             CHUNK_MAP_POS_MASK);
3098                                         chunk->ndirty--;
3099                                         arena->ndirty--;
3100                                         /* Find adjacent dirty run(s). */
3101                                         for (npages = 1; i >
3102                                             arena_chunk_header_npages &&
3103                                             (chunk->map[i - 1] &
3104                                             CHUNK_MAP_DIRTY); npages++) {
3105                                                 i--;
3106                                                 chunk->map[i] = (CHUNK_MAP_LARGE
3107 #ifdef MALLOC_DECOMMIT
3108                                                     | CHUNK_MAP_DECOMMITTED
3109 #endif
3110                                                     | CHUNK_MAP_POS_MASK);
3111                                                 chunk->ndirty--;
3112                                                 arena->ndirty--;
3113                                         }
3114
3115 #ifdef MALLOC_DECOMMIT
3116                                         pages_decommit((void *)((uintptr_t)
3117                                             chunk + (i << pagesize_2pow)),
3118                                             (npages << pagesize_2pow));
3119 #  ifdef MALLOC_STATS
3120                                         arena->stats.ndecommit++;
3121                                         arena->stats.decommitted += npages;
3122 #  endif
3123 #else
3124                                         madvise((void *)((uintptr_t)chunk + (i
3125                                             << pagesize_2pow)), pagesize *
3126                                             npages, MADV_FREE);
3127 #endif
3128 #ifdef MALLOC_STATS
3129                                         arena->stats.nmadvise++;
3130                                         arena->stats.purged += npages;
3131 #endif
3132                                 }
3133                         }
3134                 }
3135         }
3136 }
3137
3138 static void
3139 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
3140 {
3141         arena_chunk_t *chunk;
3142         extent_node_t *nodeA, *nodeB, *nodeC, key;
3143         size_t size, run_ind, run_pages;
3144
3145         /* Remove run from runs_alloced_ad. */
3146         key.addr = run;
3147         nodeB = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad, &key);
3148         assert(nodeB != NULL);
3149         RB_REMOVE(extent_tree_ad_s, &arena->runs_alloced_ad, nodeB);
3150         size = nodeB->size;
3151
3152         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
3153         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
3154             >> pagesize_2pow);
3155         assert(run_ind >= arena_chunk_header_npages);
3156         assert(run_ind < (chunksize >> pagesize_2pow));
3157         run_pages = (size >> pagesize_2pow);
3158
3159         /* Subtract pages from count of pages used in chunk. */
3160         chunk->pages_used -= run_pages;
3161
3162         if (dirty) {
3163                 size_t i;
3164
3165                 for (i = 0; i < run_pages; i++) {
3166                         assert((chunk->map[run_ind + i] & CHUNK_MAP_DIRTY) ==
3167                             0);
3168                         chunk->map[run_ind + i] |= CHUNK_MAP_DIRTY;
3169                         chunk->ndirty++;
3170                         arena->ndirty++;
3171                 }
3172         }
3173 #ifdef MALLOC_DEBUG
3174         /* Set map elements to a bogus value in order to aid error detection. */
3175         {
3176                 size_t i;
3177
3178                 for (i = 0; i < run_pages; i++) {
3179                         chunk->map[run_ind + i] |= (CHUNK_MAP_LARGE |
3180                             CHUNK_MAP_POS_MASK);
3181                 }
3182         }
3183 #endif
3184
3185         /* Try to coalesce forward. */
3186         key.addr = (void *)((uintptr_t)run + size);
3187         nodeC = RB_NFIND(extent_tree_ad_s, &arena->runs_avail_ad, &key);
3188         if (nodeC != NULL && nodeC->addr == key.addr) {
3189                 /*
3190                  * Coalesce forward.  This does not change the position within
3191                  * runs_avail_ad, so only remove/insert from/into
3192                  * runs_avail_szad.
3193                  */
3194                 RB_REMOVE(extent_tree_szad_s, &arena->runs_avail_szad, nodeC);
3195                 nodeC->addr = (void *)run;
3196                 nodeC->size += size;
3197                 RB_INSERT(extent_tree_szad_s, &arena->runs_avail_szad, nodeC);
3198                 arena_chunk_node_dealloc(chunk, nodeB);
3199                 nodeB = nodeC;
3200         } else {
3201                 /*
3202                  * Coalescing forward failed, so insert nodeB into runs_avail_*.
3203                  */
3204                 RB_INSERT(extent_tree_szad_s, &arena->runs_avail_szad, nodeB);
3205                 RB_INSERT(extent_tree_ad_s, &arena->runs_avail_ad, nodeB);
3206         }
3207
3208         /* Try to coalesce backward. */
3209         nodeA = RB_PREV(extent_tree_ad_s, &arena->runs_avail_ad, nodeB);
3210         if (nodeA != NULL && (void *)((uintptr_t)nodeA->addr + nodeA->size) ==
3211             (void *)run) {
3212                 /*
3213                  * Coalesce with previous run.  This does not change nodeB's
3214                  * position within runs_avail_ad, so only remove/insert
3215                  * from/into runs_avail_szad.
3216                  */
3217                 RB_REMOVE(extent_tree_szad_s, &arena->runs_avail_szad, nodeA);
3218                 RB_REMOVE(extent_tree_ad_s, &arena->runs_avail_ad, nodeA);
3219
3220                 RB_REMOVE(extent_tree_szad_s, &arena->runs_avail_szad, nodeB);
3221                 nodeB->addr = nodeA->addr;
3222                 nodeB->size += nodeA->size;
3223                 RB_INSERT(extent_tree_szad_s, &arena->runs_avail_szad, nodeB);
3224
3225                 arena_chunk_node_dealloc(chunk, nodeA);
3226         }
3227
3228         /* Deallocate chunk if it is now completely unused. */
3229         if (chunk->pages_used == 0)
3230                 arena_chunk_dealloc(arena, chunk);
3231
3232         /* Enforce opt_dirty_max. */
3233         if (arena->ndirty > opt_dirty_max)
3234                 arena_purge(arena);
3235 }
3236
3237 static void
3238 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, extent_node_t *nodeB,
3239     arena_run_t *run, size_t oldsize, size_t newsize)
3240 {
3241         extent_node_t *nodeA;
3242
3243         assert(nodeB->addr == run);
3244         assert(nodeB->size == oldsize);
3245         assert(oldsize > newsize);
3246
3247         /*
3248          * Update the run's node in runs_alloced_ad.  Its position does not
3249          * change.
3250          */
3251         nodeB->addr = (void *)((uintptr_t)run + (oldsize - newsize));
3252         nodeB->size = newsize;
3253
3254         /*
3255          * Insert a node into runs_alloced_ad so that arena_run_dalloc() can
3256          * treat the leading run as separately allocated.
3257          */
3258         nodeA = arena_chunk_node_alloc(chunk);
3259         nodeA->addr = (void *)run;
3260         nodeA->size = oldsize - newsize;
3261         RB_INSERT(extent_tree_ad_s, &arena->runs_alloced_ad, nodeA);
3262
3263         arena_run_dalloc(arena, (arena_run_t *)run, false);
3264 }
3265
3266 static void
3267 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, extent_node_t *nodeA,
3268     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
3269 {
3270         extent_node_t *nodeB;
3271
3272         assert(nodeA->addr == run);
3273         assert(nodeA->size == oldsize);
3274         assert(oldsize > newsize);
3275
3276         /*
3277          * Update the run's node in runs_alloced_ad.  Its position does not
3278          * change.
3279          */
3280         nodeA->size = newsize;
3281
3282         /*
3283          * Insert a node into runs_alloced_ad so that arena_run_dalloc() can
3284          * treat the trailing run as separately allocated.
3285          */
3286         nodeB = arena_chunk_node_alloc(chunk);
3287         nodeB->addr = (void *)((uintptr_t)run + newsize);
3288         nodeB->size = oldsize - newsize;
3289         RB_INSERT(extent_tree_ad_s, &arena->runs_alloced_ad, nodeB);
3290
3291         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
3292             dirty);
3293 }
3294
3295 static arena_run_t *
3296 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
3297 {
3298         arena_run_t *run;
3299         unsigned i, remainder;
3300
3301         /* Look for a usable run. */
3302         if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
3303                 /* run is guaranteed to have available space. */
3304                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
3305 #ifdef MALLOC_STATS
3306                 bin->stats.reruns++;
3307 #endif
3308                 return (run);
3309         }
3310         /* No existing runs have any space available. */
3311
3312         /* Allocate a new run. */
3313         run = arena_run_alloc(arena, bin->run_size, true, false);
3314         if (run == NULL)
3315                 return (NULL);
3316
3317         /* Initialize run internals. */
3318         run->bin = bin;
3319
3320         for (i = 0; i < bin->regs_mask_nelms; i++)
3321                 run->regs_mask[i] = UINT_MAX;
3322         remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
3323         if (remainder != 0) {
3324                 /* The last element has spare bits that need to be unset. */
3325                 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
3326                     - remainder));
3327         }
3328
3329         run->regs_minelm = 0;
3330
3331         run->nfree = bin->nregs;
3332 #ifdef MALLOC_DEBUG
3333         run->magic = ARENA_RUN_MAGIC;
3334 #endif
3335
3336 #ifdef MALLOC_STATS
3337         bin->stats.nruns++;
3338         bin->stats.curruns++;
3339         if (bin->stats.curruns > bin->stats.highruns)
3340                 bin->stats.highruns = bin->stats.curruns;
3341 #endif
3342         return (run);
3343 }
3344
3345 /* bin->runcur must have space available before this function is called. */
3346 static inline void *
3347 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3348 {
3349         void *ret;
3350         arena = 0;              /* this is just to quiet a compiler warning */
3351
3352         assert(run->magic == ARENA_RUN_MAGIC);
3353         assert(run->nfree > 0);
3354
3355         ret = arena_run_reg_alloc(run, bin);
3356         assert(ret != NULL);
3357         run->nfree--;
3358
3359         return (ret);
3360 }
3361
3362 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3363 static void *
3364 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3365 {
3366
3367         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3368         if (bin->runcur == NULL)
3369                 return (NULL);
3370         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3371         assert(bin->runcur->nfree > 0);
3372
3373         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3374 }
3375
3376 /*
3377  * Calculate bin->run_size such that it meets the following constraints:
3378  *
3379  *   *) bin->run_size >= min_run_size
3380  *   *) bin->run_size <= arena_maxclass
3381  *   *) bin->run_size <= RUN_MAX_SMALL
3382  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3383  *
3384  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3385  * also calculated here, since these settings are all interdependent.
3386  */
3387 static size_t
3388 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3389 {
3390         size_t try_run_size, good_run_size;
3391         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3392         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3393
3394         assert(min_run_size >= pagesize);
3395         assert(min_run_size <= arena_maxclass);
3396         assert(min_run_size <= RUN_MAX_SMALL);
3397
3398         /*
3399          * Calculate known-valid settings before entering the run_size
3400          * expansion loop, so that the first part of the loop always copies
3401          * valid settings.
3402          *
3403          * The do..while loop iteratively reduces the number of regions until
3404          * the run header and the regions no longer overlap.  A closed formula
3405          * would be quite messy, since there is an interdependency between the
3406          * header's mask length and the number of regions.
3407          */
3408         try_run_size = min_run_size;
3409         try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3410             + 1; /* Counter-act try_nregs-- in loop. */
3411         do {
3412                 try_nregs--;
3413                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3414                     ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3415                 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3416         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3417             > try_reg0_offset);
3418
3419         /* run_size expansion loop. */
3420         do {
3421                 /*
3422                  * Copy valid settings before trying more aggressive settings.
3423                  */
3424                 good_run_size = try_run_size;
3425                 good_nregs = try_nregs;
3426                 good_mask_nelms = try_mask_nelms;
3427                 good_reg0_offset = try_reg0_offset;
3428
3429                 /* Try more aggressive settings. */
3430                 try_run_size += pagesize;
3431                 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3432                     bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3433                 do {
3434                         try_nregs--;
3435                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3436                             ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3437                             1 : 0);
3438                         try_reg0_offset = try_run_size - (try_nregs *
3439                             bin->reg_size);
3440                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3441                     (try_mask_nelms - 1)) > try_reg0_offset);
3442         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3443             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3444             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3445
3446         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3447             <= good_reg0_offset);
3448         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3449
3450         /* Copy final settings. */
3451         bin->run_size = good_run_size;
3452         bin->nregs = good_nregs;
3453         bin->regs_mask_nelms = good_mask_nelms;
3454         bin->reg0_offset = good_reg0_offset;
3455
3456         return (good_run_size);
3457 }
3458
3459 #ifdef MALLOC_BALANCE
3460 static inline void
3461 arena_lock_balance(arena_t *arena)
3462 {
3463         unsigned contention;
3464
3465         contention = malloc_spin_lock(&arena->lock);
3466         if (narenas > 1) {
3467                 /*
3468                  * Calculate the exponentially averaged contention for this
3469                  * arena.  Due to integer math always rounding down, this value
3470                  * decays somewhat faster then normal.
3471                  */
3472                 arena->contention = (((uint64_t)arena->contention
3473                     * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3474                     + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3475                 if (arena->contention >= opt_balance_threshold)
3476                         arena_lock_balance_hard(arena);
3477         }
3478 }
3479
3480 static void
3481 arena_lock_balance_hard(arena_t *arena)
3482 {
3483         uint32_t ind;
3484
3485         arena->contention = 0;
3486 #ifdef MALLOC_STATS
3487         arena->stats.nbalance++;
3488 #endif
3489         ind = PRN(balance, narenas_2pow);
3490         if (arenas[ind] != NULL) {
3491 #ifdef WIN32
3492                 TlsSetValue(tlsIndex, arenas[ind]);
3493 #else
3494                 arenas_map = arenas[ind];
3495 #endif
3496         } else {
3497                 malloc_spin_lock(&arenas_lock);
3498                 if (arenas[ind] != NULL) {
3499 #ifdef WIN32
3500                         TlsSetValue(tlsIndex, arenas[ind]);
3501 #else
3502                         arenas_map = arenas[ind];
3503 #endif
3504                 } else {
3505 #ifdef WIN32
3506                         TlsSetValue(tlsIndex, arenas_extend(ind));
3507 #else
3508                         arenas_map = arenas_extend(ind);
3509 #endif
3510                 }
3511                 malloc_spin_unlock(&arenas_lock);
3512         }
3513 }
3514 #endif
3515
3516 static inline void *
3517 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3518 {
3519         void *ret;
3520         arena_bin_t *bin;
3521         arena_run_t *run;
3522
3523         if (size < small_min) {
3524                 /* Tiny. */
3525                 size = pow2_ceil(size);
3526                 bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
3527                     1)))];
3528 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
3529                 /*
3530                  * Bin calculation is always correct, but we may need
3531                  * to fix size for the purposes of assertions and/or
3532                  * stats accuracy.
3533                  */
3534                 if (size < (1U << TINY_MIN_2POW))
3535                         size = (1U << TINY_MIN_2POW);
3536 #endif
3537         } else if (size <= small_max) {
3538                 /* Quantum-spaced. */
3539                 size = QUANTUM_CEILING(size);
3540                 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
3541                     - 1];
3542         } else {
3543                 /* Sub-page. */
3544                 size = pow2_ceil(size);
3545                 bin = &arena->bins[ntbins + nqbins
3546                     + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
3547         }
3548         assert(size == bin->reg_size);
3549
3550 #ifdef MALLOC_BALANCE
3551         arena_lock_balance(arena);
3552 #else
3553         malloc_spin_lock(&arena->lock);
3554 #endif
3555         if ((run = bin->runcur) != NULL && run->nfree > 0)
3556                 ret = arena_bin_malloc_easy(arena, bin, run);
3557         else
3558                 ret = arena_bin_malloc_hard(arena, bin);
3559
3560         if (ret == NULL) {
3561                 malloc_spin_unlock(&arena->lock);
3562                 return (NULL);
3563         }
3564
3565 #ifdef MALLOC_STATS
3566         bin->stats.nrequests++;
3567         arena->stats.nmalloc_small++;
3568         arena->stats.allocated_small += size;
3569 #endif
3570         malloc_spin_unlock(&arena->lock);
3571
3572         if (zero == false) {
3573 #ifdef MALLOC_FILL
3574                 if (opt_junk)
3575                         memset(ret, 0xa5, size);
3576                 else if (opt_zero)
3577                         memset(ret, 0, size);
3578 #endif
3579         } else
3580                 memset(ret, 0, size);
3581
3582         return (ret);
3583 }
3584
3585 static void *
3586 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3587 {
3588         void *ret;
3589
3590         /* Large allocation. */
3591         size = PAGE_CEILING(size);
3592 #ifdef MALLOC_BALANCE
3593         arena_lock_balance(arena);
3594 #else
3595         malloc_spin_lock(&arena->lock);
3596 #endif
3597         ret = (void *)arena_run_alloc(arena, size, false, zero);
3598         if (ret == NULL) {
3599                 malloc_spin_unlock(&arena->lock);
3600                 return (NULL);
3601         }
3602 #ifdef MALLOC_STATS
3603         arena->stats.nmalloc_large++;
3604         arena->stats.allocated_large += size;
3605 #endif
3606         malloc_spin_unlock(&arena->lock);
3607
3608         if (zero == false) {
3609 #ifdef MALLOC_FILL
3610                 if (opt_junk)
3611                         memset(ret, 0xa5, size);
3612                 else if (opt_zero)
3613                         memset(ret, 0, size);
3614 #endif
3615         }
3616
3617         return (ret);
3618 }
3619
3620 static inline void *
3621 arena_malloc(arena_t *arena, size_t size, bool zero)
3622 {
3623
3624         assert(arena != NULL);
3625         assert(arena->magic == ARENA_MAGIC);
3626         assert(size != 0);
3627         assert(QUANTUM_CEILING(size) <= arena_maxclass);
3628
3629 /* #ifdef USE_STATS_MEMORY */
3630 /*     arena->mi.uordblks += size; */
3631 /* #endif */
3632         if (size <= bin_maxclass) {
3633                 return (arena_malloc_small(arena, size, zero));
3634         } else
3635                 return (arena_malloc_large(arena, size, zero));
3636 }
3637
3638 static inline void *
3639 imalloc(size_t size)
3640 {
3641
3642         assert(size != 0);
3643         if (size <= arena_maxclass)
3644                 return (arena_malloc(choose_arena(), size, false));
3645         else
3646                 return (huge_malloc(size, false));
3647 }
3648
3649 static inline void *
3650 icalloc(size_t size)
3651 {
3652         if (size <= arena_maxclass)
3653                 return (arena_malloc(choose_arena(), size, true));
3654         else
3655                 return (huge_malloc(size, true));
3656 }
3657
3658 /* Only handles large allocations that require more than page alignment. */
3659 static void *
3660 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3661 {
3662         void *ret;
3663         size_t offset;
3664         arena_chunk_t *chunk;
3665         extent_node_t *node, key;
3666
3667         assert((size & pagesize_mask) == 0);
3668         assert((alignment & pagesize_mask) == 0);
3669
3670 #ifdef MALLOC_BALANCE
3671         arena_lock_balance(arena);
3672 #else
3673         malloc_spin_lock(&arena->lock);
3674 #endif
3675         ret = (void *)arena_run_alloc(arena, alloc_size, false, false);
3676         if (ret == NULL) {
3677                 malloc_spin_unlock(&arena->lock);
3678                 return (NULL);
3679         }
3680
3681         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3682
3683         offset = (uintptr_t)ret & (alignment - 1);
3684         assert((offset & pagesize_mask) == 0);
3685         assert(offset < alloc_size);
3686         if (offset == 0) {
3687                 /*
3688                  * Update the run's node in runs_alloced_ad.  Its position
3689                  * does not change.
3690                  */
3691                 key.addr = ret;
3692                 node = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad, &key);
3693                 assert(node != NULL);
3694
3695                 arena_run_trim_tail(arena, chunk, node, ret, alloc_size, size,
3696                     false);
3697         } else {
3698                 size_t leadsize, trailsize;
3699
3700                 /*
3701                  * Update the run's node in runs_alloced_ad.  Its position
3702                  * does not change.
3703                  */
3704                 key.addr = ret;
3705                 node = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad, &key);
3706                 assert(node != NULL);
3707
3708                 leadsize = alignment - offset;
3709                 if (leadsize > 0) {
3710                         arena_run_trim_head(arena, chunk, node, ret, alloc_size,
3711                             alloc_size - leadsize);
3712                         ret = (void *)((uintptr_t)ret + leadsize);
3713                 }
3714
3715                 trailsize = alloc_size - leadsize - size;
3716                 if (trailsize != 0) {
3717                         /* Trim trailing space. */
3718                         assert(trailsize < alloc_size);
3719                         arena_run_trim_tail(arena, chunk, node, ret, size +
3720                             trailsize, size, false);
3721                 }
3722         }
3723
3724 #ifdef MALLOC_STATS
3725         arena->stats.nmalloc_large++;
3726         arena->stats.allocated_large += size;
3727 #endif
3728         malloc_spin_unlock(&arena->lock);
3729
3730 #ifdef MALLOC_FILL
3731         if (opt_junk)
3732                 memset(ret, 0xa5, size);
3733         else if (opt_zero)
3734                 memset(ret, 0, size);
3735 #endif
3736         return (ret);
3737 }
3738
3739 static inline void *
3740 ipalloc(size_t alignment, size_t size)
3741 {
3742         void *ret;
3743         size_t ceil_size;
3744
3745         /*
3746          * Round size up to the nearest multiple of alignment.
3747          *
3748          * This done, we can take advantage of the fact that for each small
3749          * size class, every object is aligned at the smallest power of two
3750          * that is non-zero in the base two representation of the size.  For
3751          * example:
3752          *
3753          *   Size |   Base 2 | Minimum alignment
3754          *   -----+----------+------------------
3755          *     96 |  1100000 |  32
3756          *    144 | 10100000 |  32
3757          *    192 | 11000000 |  64
3758          *
3759          * Depending on runtime settings, it is possible that arena_malloc()
3760          * will further round up to a power of two, but that never causes
3761          * correctness issues.
3762          */
3763         ceil_size = (size + (alignment - 1)) & (-alignment);
3764         /*
3765          * (ceil_size < size) protects against the combination of maximal
3766          * alignment and size greater than maximal alignment.
3767          */
3768         if (ceil_size < size) {
3769                 /* size_t overflow. */
3770                 return (NULL);
3771         }
3772
3773         if (ceil_size <= pagesize || (alignment <= pagesize
3774             && ceil_size <= arena_maxclass))
3775                 ret = arena_malloc(choose_arena(), ceil_size, false);
3776         else {
3777                 size_t run_size;
3778
3779                 /*
3780                  * We can't achieve sub-page alignment, so round up alignment
3781                  * permanently; it makes later calculations simpler.
3782                  */
3783                 alignment = PAGE_CEILING(alignment);
3784                 ceil_size = PAGE_CEILING(size);
3785                 /*
3786                  * (ceil_size < size) protects against very large sizes within
3787                  * pagesize of SIZE_T_MAX.
3788                  *
3789                  * (ceil_size + alignment < ceil_size) protects against the
3790                  * combination of maximal alignment and ceil_size large enough
3791                  * to cause overflow.  This is similar to the first overflow
3792                  * check above, but it needs to be repeated due to the new
3793                  * ceil_size value, which may now be *equal* to maximal
3794                  * alignment, whereas before we only detected overflow if the
3795                  * original size was *greater* than maximal alignment.
3796                  */
3797                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3798                         /* size_t overflow. */
3799                         return (NULL);
3800                 }
3801
3802                 /*
3803                  * Calculate the size of the over-size run that arena_palloc()
3804                  * would need to allocate in order to guarantee the alignment.
3805                  */
3806                 if (ceil_size >= alignment)
3807                         run_size = ceil_size + alignment - pagesize;
3808                 else {
3809                         /*
3810                          * It is possible that (alignment << 1) will cause
3811                          * overflow, but it doesn't matter because we also
3812                          * subtract pagesize, which in the case of overflow
3813                          * leaves us with a very large run_size.  That causes
3814                          * the first conditional below to fail, which means
3815                          * that the bogus run_size value never gets used for
3816                          * anything important.
3817                          */
3818                         run_size = (alignment << 1) - pagesize;
3819                 }
3820
3821                 if (run_size <= arena_maxclass) {
3822                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3823                             run_size);
3824                 } else if (alignment <= chunksize)
3825                         ret = huge_malloc(ceil_size, false);
3826                 else
3827                         ret = huge_palloc(alignment, ceil_size);
3828         }
3829
3830         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3831         return (ret);
3832 }
3833
3834 /* Return the size of the allocation pointed to by ptr. */
3835 static size_t
3836 arena_salloc(const void *ptr)
3837 {
3838         size_t ret;
3839         arena_chunk_t *chunk;
3840         arena_chunk_map_t mapelm;
3841         size_t pageind;
3842
3843         assert(ptr != NULL);
3844         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3845
3846         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3847         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
3848         mapelm = chunk->map[pageind];
3849         if ((mapelm & CHUNK_MAP_LARGE) == 0) {
3850                 arena_run_t *run;
3851
3852                 /* Small allocation size is in the run header. */
3853                 pageind -= (mapelm & CHUNK_MAP_POS_MASK);
3854                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
3855                     pagesize_2pow));
3856                 assert(run->magic == ARENA_RUN_MAGIC);
3857                 ret = run->bin->reg_size;
3858         } else {
3859                 arena_t *arena = chunk->arena;
3860                 extent_node_t *node, key;
3861
3862                 /* Large allocation size is in the extent tree. */
3863                 assert((mapelm & CHUNK_MAP_POS_MASK) == 0);
3864                 arena = chunk->arena;
3865                 malloc_spin_lock(&arena->lock);
3866                 key.addr = (void *)ptr;
3867                 node = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad, &key);
3868                 assert(node != NULL);
3869                 ret = node->size;
3870                 malloc_spin_unlock(&arena->lock);
3871         }
3872
3873         return (ret);
3874 }
3875
3876 static inline size_t
3877 isalloc(const void *ptr)
3878 {
3879         size_t ret;
3880         arena_chunk_t *chunk;
3881
3882         assert(ptr != NULL);
3883
3884         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3885         if (chunk != ptr) {
3886                 /* Region. */
3887                 assert(chunk->arena->magic == ARENA_MAGIC);
3888
3889                 ret = arena_salloc(ptr);
3890         } else {
3891                 extent_node_t *node, key;
3892
3893                 /* Chunk (huge allocation). */
3894
3895                 malloc_mutex_lock(&huge_mtx);
3896
3897                 /* Extract from tree of huge allocations. */
3898                 key.addr = __DECONST(void *, ptr);
3899                 node = RB_FIND(extent_tree_ad_s, &huge, &key);
3900                 assert(node != NULL);
3901
3902                 ret = node->size;
3903
3904                 malloc_mutex_unlock(&huge_mtx);
3905         }
3906
3907         return (ret);
3908 }
3909
3910 static inline void
3911 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3912     size_t pageind, arena_chunk_map_t mapelm)
3913 {
3914         arena_run_t *run;
3915         arena_bin_t *bin;
3916         size_t size;
3917
3918         pageind -= (mapelm & CHUNK_MAP_POS_MASK);
3919
3920         run = (arena_run_t *)((uintptr_t)chunk + (pageind << pagesize_2pow));
3921         assert(run->magic == ARENA_RUN_MAGIC);
3922         bin = run->bin;
3923         size = bin->reg_size;
3924
3925 /* #ifdef USE_STATS_MEMORY */
3926 /*         arena->mi.fordblks += size; */
3927 /* #endif */
3928 #ifdef MALLOC_FILL
3929         if (opt_junk)
3930                 memset(ptr, 0x5a, size);
3931 #endif
3932
3933         arena_run_reg_dalloc(run, bin, ptr, size);
3934         run->nfree++;
3935
3936         if (run->nfree == bin->nregs) {
3937                 /* Deallocate run. */
3938                 if (run == bin->runcur)
3939                         bin->runcur = NULL;
3940                 else if (bin->nregs != 1) {
3941                         /*
3942                          * This block's conditional is necessary because if the
3943                          * run only contains one region, then it never gets
3944                          * inserted into the non-full runs tree.
3945                          */
3946                         RB_REMOVE(arena_run_tree_s, &bin->runs, run);
3947                 }
3948 #ifdef MALLOC_DEBUG
3949                 run->magic = 0;
3950 #endif
3951                 arena_run_dalloc(arena, run, true);
3952 #ifdef MALLOC_STATS
3953                 bin->stats.curruns--;
3954 #endif
3955         } else if (run->nfree == 1 && run != bin->runcur) {
3956                 /*
3957                  * Make sure that bin->runcur always refers to the lowest
3958                  * non-full run, if one exists.
3959                  */
3960                 if (bin->runcur == NULL)
3961                         bin->runcur = run;
3962                 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3963                         /* Switch runcur. */
3964                         if (bin->runcur->nfree > 0) {
3965                                 /* Insert runcur. */
3966                                 RB_INSERT(arena_run_tree_s, &bin->runs,
3967                                     bin->runcur);
3968                         }
3969                         bin->runcur = run;
3970                 } else
3971                         RB_INSERT(arena_run_tree_s, &bin->runs, run);
3972         }
3973 #ifdef MALLOC_STATS
3974         arena->stats.allocated_small -= size;
3975         arena->stats.ndalloc_small++;
3976 #endif
3977 }
3978
3979 #ifdef MALLOC_LAZY_FREE
3980 static inline void
3981 arena_dalloc_lazy(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3982     size_t pageind, arena_chunk_map_t *mapelm)
3983 {
3984         void **free_cache = arena->free_cache;
3985         unsigned i, slot;
3986
3987         if (g_isthreaded == false || opt_lazy_free_2pow < 0) {
3988                 malloc_spin_lock(&arena->lock);
3989                 arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
3990                 malloc_spin_unlock(&arena->lock);
3991                 return;
3992         }
3993
3994         for (i = 0; i < LAZY_FREE_NPROBES; i++) {
3995                 slot = PRN(lazy_free, opt_lazy_free_2pow);
3996                 if (atomic_cmpset_ptr((uintptr_t *)&free_cache[slot],
3997                     (uintptr_t)NULL, (uintptr_t)ptr)) {
3998                         return;
3999                 }
4000         }
4001
4002         arena_dalloc_lazy_hard(arena, chunk, ptr, pageind, mapelm);
4003 }
4004
4005 static void
4006 arena_dalloc_lazy_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4007     size_t pageind, arena_chunk_map_t *mapelm)
4008 {
4009         void **free_cache = arena->free_cache;
4010         unsigned i, slot;
4011
4012         malloc_spin_lock(&arena->lock);
4013         arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
4014
4015         /*
4016          * Check whether another thread already cleared the cache.  It is
4017          * possible that another thread cleared the cache *and* this slot was
4018          * already refilled, which could result in a mostly fruitless cache
4019          * sweep, but such a sequence of events causes no correctness issues.
4020          */
4021         if ((ptr = (void *)atomic_readandclear_ptr(
4022             (uintptr_t *)&free_cache[slot]))
4023             != NULL) {
4024                 unsigned lazy_free_mask;
4025
4026                 /*
4027                  * Clear the cache, since we failed to find a slot.  It is
4028                  * possible that other threads will continue to insert objects
4029                  * into the cache while this one sweeps, but that is okay,
4030                  * since on average the cache is still swept with the same
4031                  * frequency.
4032                  */
4033
4034                 /* Handle pointer at current slot. */
4035                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4036                 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >>
4037                     pagesize_2pow);
4038                 mapelm = &chunk->map[pageind];
4039                 arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
4040
4041                 /* Sweep remainder of slots. */
4042                 lazy_free_mask = (1U << opt_lazy_free_2pow) - 1;
4043                 for (i = (slot + 1) & lazy_free_mask;
4044                      i != slot;
4045                      i = (i + 1) & lazy_free_mask) {
4046                         ptr = (void *)atomic_readandclear_ptr(
4047                             (uintptr_t *)&free_cache[i]);
4048                         if (ptr != NULL) {
4049                                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4050                                 pageind = (((uintptr_t)ptr - (uintptr_t)chunk)
4051                                     >> pagesize_2pow);
4052                                 mapelm = &chunk->map[pageind];
4053                                 arena_dalloc_small(arena, chunk, ptr, pageind,
4054                                     *mapelm);
4055                         }
4056                 }
4057         }
4058
4059         malloc_spin_unlock(&arena->lock);
4060 }
4061 #endif
4062
4063 static void
4064 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4065 {
4066         /* Large allocation. */
4067         malloc_spin_lock(&arena->lock);
4068
4069         chunk = 0;              /* this is just to quiet a compiler warning */
4070 #ifdef MALLOC_FILL
4071 #ifndef MALLOC_STATS
4072         if (opt_junk)
4073 #endif
4074 #endif
4075         {
4076                 extent_node_t *node, key;
4077                 size_t size;
4078
4079                 key.addr = ptr;
4080                 node = RB_FIND(extent_tree_ad_s,
4081                     &arena->runs_alloced_ad, &key);
4082                 assert(node != NULL);
4083                 size = node->size;
4084 #ifdef MALLOC_FILL
4085 #ifdef MALLOC_STATS
4086                 if (opt_junk)
4087 #endif
4088                         memset(ptr, 0x5a, size);
4089 #endif
4090 /* #ifdef USE_STATS_MEMORY */
4091 /*         arena->mi.fordblks += size; */
4092 /* #endif */
4093 #ifdef MALLOC_STATS
4094                 arena->stats.allocated_large -= size;
4095 #endif
4096         }
4097 #ifdef MALLOC_STATS
4098         arena->stats.ndalloc_large++;
4099 #endif
4100
4101         arena_run_dalloc(arena, (arena_run_t *)ptr, true);
4102         malloc_spin_unlock(&arena->lock);
4103 }
4104
4105 static inline void
4106 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4107 {
4108         size_t pageind;
4109         arena_chunk_map_t *mapelm;
4110
4111         assert(arena != NULL);
4112         assert(arena->magic == ARENA_MAGIC);
4113         assert(chunk->arena == arena);
4114         assert(ptr != NULL);
4115         assert(CHUNK_ADDR2BASE(ptr) != ptr);
4116
4117         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
4118         mapelm = &chunk->map[pageind];
4119         if ((*mapelm & CHUNK_MAP_LARGE) == 0) {
4120                 /* Small allocation. */
4121 #ifdef MALLOC_LAZY_FREE
4122                 arena_dalloc_lazy(arena, chunk, ptr, pageind, mapelm);
4123 #else
4124                 malloc_spin_lock(&arena->lock);
4125                 arena_dalloc_small(arena, chunk, ptr, pageind, *mapelm);
4126                 malloc_spin_unlock(&arena->lock);
4127 #endif
4128         } else {
4129                 assert((*mapelm & CHUNK_MAP_POS_MASK) == 0);
4130                 arena_dalloc_large(arena, chunk, ptr);
4131         }
4132 }
4133
4134 static inline void
4135 idalloc(void *ptr)
4136 {
4137         arena_chunk_t *chunk;
4138
4139         assert(ptr != NULL);
4140
4141         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4142         if (chunk != ptr)
4143                 arena_dalloc(chunk->arena, chunk, ptr);
4144         else
4145                 huge_dalloc(ptr);
4146 }
4147
4148 static void
4149 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4150     size_t size, size_t oldsize)
4151 {
4152         extent_node_t *node, key;
4153
4154         assert(size < oldsize);
4155
4156         /*
4157          * Shrink the run, and make trailing pages available for other
4158          * allocations.
4159          */
4160         key.addr = (void *)((uintptr_t)ptr);
4161 #ifdef MALLOC_BALANCE
4162         arena_lock_balance(arena);
4163 #else
4164         malloc_spin_lock(&arena->lock);
4165 #endif
4166         node = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad, &key);
4167         assert(node != NULL);
4168         arena_run_trim_tail(arena, chunk, node, (arena_run_t *)ptr, oldsize,
4169             size, true);
4170 #ifdef MALLOC_STATS
4171         arena->stats.allocated_large -= oldsize - size;
4172 #endif
4173         malloc_spin_unlock(&arena->lock);
4174 }
4175
4176 static bool
4177 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4178     size_t size, size_t oldsize)
4179 {
4180         extent_node_t *nodeC, key;
4181
4182         /* Try to extend the run. */
4183         assert(size > oldsize);
4184         key.addr = (void *)((uintptr_t)ptr + oldsize);
4185 #ifdef MALLOC_BALANCE
4186         arena_lock_balance(arena);
4187 #else
4188         malloc_spin_lock(&arena->lock);
4189 #endif
4190         nodeC = RB_FIND(extent_tree_ad_s, &arena->runs_avail_ad, &key);
4191         if (nodeC != NULL && oldsize + nodeC->size >= size) {
4192                 extent_node_t *nodeA, *nodeB;
4193
4194                 /*
4195                  * The next run is available and sufficiently large.  Split the
4196                  * following run, then merge the first part with the existing
4197                  * allocation.  This results in a bit more tree manipulation
4198                  * than absolutely necessary, but it substantially simplifies
4199                  * the code.
4200                  */
4201                 arena_run_split(arena, (arena_run_t *)nodeC->addr, size -
4202                     oldsize, false, false);
4203
4204                 key.addr = ptr;
4205                 nodeA = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad,
4206                     &key);
4207                 assert(nodeA != NULL);
4208
4209                 key.addr = (void *)((uintptr_t)ptr + oldsize);
4210                 nodeB = RB_FIND(extent_tree_ad_s, &arena->runs_alloced_ad,
4211                     &key);
4212                 assert(nodeB != NULL);
4213
4214                 nodeA->size += nodeB->size;
4215
4216                 RB_REMOVE(extent_tree_ad_s, &arena->runs_alloced_ad, nodeB);
4217                 arena_chunk_node_dealloc(chunk, nodeB);
4218
4219 #ifdef MALLOC_STATS
4220                 arena->stats.allocated_large += size - oldsize;
4221 #endif
4222                 malloc_spin_unlock(&arena->lock);
4223                 return (false);
4224         }
4225         malloc_spin_unlock(&arena->lock);
4226
4227         return (true);
4228 }
4229
4230 /*
4231  * Try to resize a large allocation, in order to avoid copying.  This will
4232  * always fail if growing an object, and the following run is already in use.
4233  */
4234 static bool
4235 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
4236 {
4237         size_t psize;
4238
4239         psize = PAGE_CEILING(size);
4240         if (psize == oldsize) {
4241                 /* Same size class. */
4242 #ifdef MALLOC_FILL
4243                 if (opt_junk && size < oldsize) {
4244                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
4245                             size);
4246                 }
4247 #endif
4248                 return (false);
4249         } else {
4250                 arena_chunk_t *chunk;
4251                 arena_t *arena;
4252
4253                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4254                 arena = chunk->arena;
4255                 assert(arena->magic == ARENA_MAGIC);
4256
4257                 if (psize < oldsize) {
4258 #ifdef MALLOC_FILL
4259                         /* Fill before shrinking in order avoid a race. */
4260                         if (opt_junk) {
4261                                 memset((void *)((uintptr_t)ptr + size), 0x5a,
4262                                     oldsize - size);
4263                         }
4264 #endif
4265                         arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4266                             oldsize);
4267                         return (false);
4268                 } else {
4269                         bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4270                             psize, oldsize);
4271 #ifdef MALLOC_FILL
4272                         if (ret == false && opt_zero) {
4273                                 memset((void *)((uintptr_t)ptr + oldsize), 0,
4274                                     size - oldsize);
4275                         }
4276 #endif
4277                         return (ret);
4278                 }
4279         }
4280 }
4281
4282 static void *
4283 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4284 {
4285         void *ret;
4286         size_t copysize;
4287
4288         /* Try to avoid moving the allocation. */
4289         if (size < small_min) {
4290                 if (oldsize < small_min &&
4291                     ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
4292                     == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
4293                         goto IN_PLACE; /* Same size class. */
4294         } else if (size <= small_max) {
4295                 if (oldsize >= small_min && oldsize <= small_max &&
4296                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
4297                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
4298                         goto IN_PLACE; /* Same size class. */
4299         } else if (size <= bin_maxclass) {
4300                 if (oldsize > small_max && oldsize <= bin_maxclass &&
4301                     pow2_ceil(size) == pow2_ceil(oldsize))
4302                         goto IN_PLACE; /* Same size class. */
4303         } else if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4304                 assert(size > bin_maxclass);
4305                 if (arena_ralloc_large(ptr, size, oldsize) == false)
4306                         return (ptr);
4307         }
4308
4309         /*
4310          * If we get here, then size and oldsize are different enough that we
4311          * need to move the object.  In that case, fall back to allocating new
4312          * space and copying.
4313          */
4314         ret = arena_malloc(choose_arena(), size, false);
4315         if (ret == NULL)
4316                 return (NULL);
4317
4318         /* Junk/zero-filling were already done by arena_malloc(). */
4319         copysize = (size < oldsize) ? size : oldsize;
4320 #ifdef VM_COPY_MIN
4321         if (copysize >= VM_COPY_MIN)
4322                 pages_copy(ret, ptr, copysize);
4323         else
4324 #endif
4325                 memcpy(ret, ptr, copysize);
4326         idalloc(ptr);
4327         return (ret);
4328 IN_PLACE:
4329 #ifdef MALLOC_FILL
4330         if (opt_junk && size < oldsize)
4331                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4332         else if (opt_zero && size > oldsize)
4333                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4334 #endif
4335         return (ptr);
4336 }
4337
4338 static inline void *
4339 iralloc(void *ptr, size_t size)
4340 {
4341         size_t oldsize;
4342
4343         assert(ptr != NULL);
4344         assert(size != 0);
4345
4346         oldsize = isalloc(ptr);
4347
4348         if (size <= arena_maxclass)
4349                 return (arena_ralloc(ptr, size, oldsize));
4350         else
4351                 return (huge_ralloc(ptr, size, oldsize));
4352 }
4353
4354 static bool
4355 arena_new(arena_t *arena)
4356 {
4357         unsigned i;
4358         arena_bin_t *bin;
4359         size_t pow2_size, prev_run_size;
4360
4361         if (malloc_spin_init(&arena->lock))
4362                 return (true);
4363
4364 #ifdef MALLOC_STATS
4365         memset(&arena->stats, 0, sizeof(arena_stats_t));
4366 #endif
4367
4368         /* Initialize chunks. */
4369         RB_INIT(&arena->chunks);
4370         arena->spare = NULL;
4371
4372         arena->ndirty = 0;
4373
4374         RB_INIT(&arena->runs_avail_szad);
4375         RB_INIT(&arena->runs_avail_ad);
4376         RB_INIT(&arena->runs_alloced_ad);
4377
4378 #ifdef MALLOC_BALANCE
4379         arena->contention = 0;
4380 #endif
4381 #ifdef MALLOC_LAZY_FREE
4382         if (opt_lazy_free_2pow >= 0) {
4383                 arena->free_cache = (void **) base_calloc(1, sizeof(void *)
4384                     * (1U << opt_lazy_free_2pow));
4385                 if (arena->free_cache == NULL)
4386                         return (true);
4387         } else
4388                 arena->free_cache = NULL;
4389 #endif
4390
4391         /* Initialize bins. */
4392         prev_run_size = pagesize;
4393
4394         /* (2^n)-spaced tiny bins. */
4395         for (i = 0; i < ntbins; i++) {
4396                 bin = &arena->bins[i];
4397                 bin->runcur = NULL;
4398                 RB_INIT(&bin->runs);
4399
4400                 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4401
4402                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4403
4404 #ifdef MALLOC_STATS
4405                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4406 #endif
4407         }
4408
4409         /* Quantum-spaced bins. */
4410         for (; i < ntbins + nqbins; i++) {
4411                 bin = &arena->bins[i];
4412                 bin->runcur = NULL;
4413                 RB_INIT(&bin->runs);
4414
4415                 bin->reg_size = quantum * (i - ntbins + 1);
4416
4417                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
4418                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4419
4420 #ifdef MALLOC_STATS
4421                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4422 #endif
4423         }
4424
4425         /* (2^n)-spaced sub-page bins. */
4426         for (; i < ntbins + nqbins + nsbins; i++) {
4427                 bin = &arena->bins[i];
4428                 bin->runcur = NULL;
4429                 RB_INIT(&bin->runs);
4430
4431                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
4432
4433                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4434
4435 #ifdef MALLOC_STATS
4436                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4437 #endif
4438         }
4439
4440 #ifdef MALLOC_DEBUG
4441         arena->magic = ARENA_MAGIC;
4442 #endif
4443
4444         return (false);
4445 }
4446
4447 /* Create a new arena and insert it into the arenas array at index ind. */
4448 static arena_t *
4449 arenas_extend(unsigned ind)
4450 {
4451         arena_t *ret;
4452
4453         /* Allocate enough space for trailing bins. */
4454         ret = (arena_t *)base_alloc(sizeof(arena_t)
4455             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
4456         if (ret != NULL && arena_new(ret) == false) {
4457                 arenas[ind] = ret;
4458                 return (ret);
4459         }
4460         /* Only reached if there is an OOM error. */
4461
4462         /*
4463          * OOM here is quite inconvenient to propagate, since dealing with it
4464          * would require a check for failure in the fast path.  Instead, punt
4465          * by using arenas[0].  In practice, this is an extremely unlikely
4466          * failure.
4467          */
4468         _malloc_message(_getprogname(),
4469             ": (malloc) Error initializing arena\n", "", "");
4470         if (opt_abort)
4471                 abort();
4472
4473         return (arenas[0]);
4474 }
4475
4476 /*
4477  * End arena.
4478  */
4479 /******************************************************************************/
4480 /*
4481  * Begin general internal functions.
4482  */
4483
4484 static void *
4485 huge_malloc(size_t size, bool zero)
4486 {
4487         void *ret;
4488         size_t csize;
4489         extent_node_t *node;
4490
4491         /* Allocate one or more contiguous chunks for this request. */
4492
4493         csize = CHUNK_CEILING(size);
4494         if (csize == 0) {
4495                 /* size is large enough to cause size_t wrap-around. */
4496                 return (NULL);
4497         }
4498
4499         /* Allocate an extent node with which to track the chunk. */
4500         node = base_node_alloc();
4501         if (node == NULL)
4502                 return (NULL);
4503
4504         ret = chunk_alloc(csize, zero);
4505         if (ret == NULL) {
4506                 base_node_dealloc(node);
4507                 return (NULL);
4508         }
4509
4510         /* Insert node into huge. */
4511         node->addr = ret;
4512         node->size = csize;
4513
4514         malloc_mutex_lock(&huge_mtx);
4515         RB_INSERT(extent_tree_ad_s, &huge, node);
4516 #ifdef MALLOC_STATS
4517         huge_nmalloc++;
4518         huge_allocated += csize;
4519 #endif
4520         malloc_mutex_unlock(&huge_mtx);
4521
4522 #ifdef MALLOC_FILL
4523         if (zero == false) {
4524                 if (opt_junk)
4525                         memset(ret, 0xa5, csize);
4526                 else if (opt_zero)
4527                         memset(ret, 0, csize);
4528         }
4529 #endif
4530
4531         return (ret);
4532 }
4533
4534 /* Only handles large allocations that require more than chunk alignment. */
4535 static void *
4536 huge_palloc(size_t alignment, size_t size)
4537 {
4538         void *ret;
4539         size_t alloc_size, chunk_size, offset;
4540         extent_node_t *node;
4541
4542         /*
4543          * This allocation requires alignment that is even larger than chunk
4544          * alignment.  This means that huge_malloc() isn't good enough.
4545          *
4546          * Allocate almost twice as many chunks as are demanded by the size or
4547          * alignment, in order to assure the alignment can be achieved, then
4548          * unmap leading and trailing chunks.
4549          */
4550         assert(alignment >= chunksize);
4551
4552         chunk_size = CHUNK_CEILING(size);
4553
4554         if (size >= alignment)
4555                 alloc_size = chunk_size + alignment - chunksize;
4556         else
4557                 alloc_size = (alignment << 1) - chunksize;
4558
4559         /* Allocate an extent node with which to track the chunk. */
4560         node = base_node_alloc();
4561         if (node == NULL)
4562                 return (NULL);
4563
4564         ret = chunk_alloc(alloc_size, false);
4565         if (ret == NULL) {
4566                 base_node_dealloc(node);
4567                 return (NULL);
4568         }
4569
4570         offset = (uintptr_t)ret & (alignment - 1);
4571         assert((offset & chunksize_mask) == 0);
4572         assert(offset < alloc_size);
4573         if (offset == 0) {
4574                 /* Trim trailing space. */
4575                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4576                     - chunk_size);
4577         } else {
4578                 size_t trailsize;
4579
4580                 /* Trim leading space. */
4581                 chunk_dealloc(ret, alignment - offset);
4582
4583                 ret = (void *)((uintptr_t)ret + (alignment - offset));
4584
4585                 trailsize = alloc_size - (alignment - offset) - chunk_size;
4586                 if (trailsize != 0) {
4587                     /* Trim trailing space. */
4588                     assert(trailsize < alloc_size);
4589                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4590                         trailsize);
4591                 }
4592         }
4593
4594         /* Insert node into huge. */
4595         node->addr = ret;
4596         node->size = chunk_size;
4597
4598         malloc_mutex_lock(&huge_mtx);
4599         RB_INSERT(extent_tree_ad_s, &huge, node);
4600 #ifdef MALLOC_STATS
4601         huge_nmalloc++;
4602         huge_allocated += chunk_size;
4603 #endif
4604         malloc_mutex_unlock(&huge_mtx);
4605
4606 #ifdef MALLOC_FILL
4607         if (opt_junk)
4608                 memset(ret, 0xa5, chunk_size);
4609         else if (opt_zero)
4610                 memset(ret, 0, chunk_size);
4611 #endif
4612
4613         return (ret);
4614 }
4615
4616 static void *
4617 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4618 {
4619         void *ret;
4620         size_t copysize;
4621
4622         /* Avoid moving the allocation if the size class would not change. */
4623         if (oldsize > arena_maxclass &&
4624             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4625 #ifdef MALLOC_FILL
4626                 if (opt_junk && size < oldsize) {
4627                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4628                             - size);
4629                 } else if (opt_zero && size > oldsize) {
4630                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
4631                             - oldsize);
4632                 }
4633 #endif
4634                 return (ptr);
4635         }
4636
4637         /*
4638          * If we get here, then size and oldsize are different enough that we
4639          * need to use a different size class.  In that case, fall back to
4640          * allocating new space and copying.
4641          */
4642         ret = huge_malloc(size, false);
4643         if (ret == NULL)
4644                 return (NULL);
4645
4646         copysize = (size < oldsize) ? size : oldsize;
4647 #ifdef VM_COPY_MIN
4648         if (copysize >= VM_COPY_MIN)
4649                 pages_copy(ret, ptr, copysize);
4650         else
4651 #endif
4652                 memcpy(ret, ptr, copysize);
4653         idalloc(ptr);
4654         return (ret);
4655 }
4656
4657 static void
4658 huge_dalloc(void *ptr)
4659 {
4660         extent_node_t *node, key;
4661
4662         malloc_mutex_lock(&huge_mtx);
4663
4664         /* Extract from tree of huge allocations. */
4665         key.addr = ptr;
4666         node = RB_FIND(extent_tree_ad_s, &huge, &key);
4667         assert(node != NULL);
4668         assert(node->addr == ptr);
4669         RB_REMOVE(extent_tree_ad_s, &huge, node);
4670
4671 #ifdef MALLOC_STATS
4672         huge_ndalloc++;
4673         huge_allocated -= node->size;
4674 #endif
4675
4676         malloc_mutex_unlock(&huge_mtx);
4677
4678         /* Unmap chunk. */
4679 #ifdef MALLOC_DSS
4680 #ifdef MALLOC_FILL
4681         if (opt_dss && opt_junk)
4682                 memset(node->addr, 0x5a, node->size);
4683 #endif
4684 #endif
4685         chunk_dealloc(node->addr, node->size);
4686
4687         base_node_dealloc(node);
4688 }
4689
4690 #ifdef BSD
4691 static inline unsigned
4692 malloc_ncpus(void)
4693 {
4694         unsigned ret;
4695         int mib[2];
4696         size_t len;
4697
4698         mib[0] = CTL_HW;
4699         mib[1] = HW_NCPU;
4700         len = sizeof(ret);
4701         if (sysctl(mib, 2, &ret, &len, (void *) 0, 0) == -1) {
4702                 /* Error. */
4703                 return (1);
4704         }
4705
4706         return (ret);
4707 }
4708 #elif (defined(LINUX_HOST))
4709 #include <fcntl.h>
4710
4711 static inline unsigned
4712 malloc_ncpus(void)
4713 {
4714         unsigned ret;
4715         int fd, nread, column;
4716         char buf[1];
4717         static const char matchstr[] = "processor\t:";
4718
4719         /*
4720          * sysconf(3) would be the preferred method for determining the number
4721          * of CPUs, but it uses malloc internally, which causes untennable
4722          * recursion during malloc initialization.
4723          */
4724         fd = open("/proc/cpuinfo", O_RDONLY);
4725         if (fd == -1)
4726                 return (1); /* Error. */
4727         /*
4728          * Count the number of occurrences of matchstr at the beginnings of
4729          * lines.  This treats hyperthreaded CPUs as multiple processors.
4730          */
4731         column = 0;
4732         ret = 0;
4733         while (true) {
4734                 nread = read(fd, &buf, sizeof(buf));
4735                 if (nread <= 0)
4736                         break; /* EOF or error. */
4737
4738                 if (buf[0] == '\n')
4739                         column = 0;
4740                 else if (column != -1) {
4741                         if (buf[0] == matchstr[column]) {
4742                                 column++;
4743                                 if (column == sizeof(matchstr) - 1) {
4744                                         column = -1;
4745                                         ret++;
4746                                 }
4747                         } else
4748                                 column = -1;
4749                 }
4750         }
4751         if (ret == 0)
4752                 ret = 1; /* Something went wrong in the parser. */
4753         close(fd);
4754
4755         return (ret);
4756 }
4757 #elif (defined(DARWIN))
4758 #include <mach/mach_init.h>
4759 #include <mach/mach_host.h>
4760
4761 static inline unsigned
4762 malloc_ncpus(void)
4763 {
4764         kern_return_t error;
4765         natural_t n;
4766         processor_info_array_t pinfo;
4767         mach_msg_type_number_t pinfocnt;
4768
4769         error = host_processor_info(mach_host_self(), PROCESSOR_BASIC_INFO,
4770                                     &n, &pinfo, &pinfocnt);
4771         if (error != KERN_SUCCESS)
4772                 return (1); /* Error. */
4773         else
4774                 return (n);
4775 }
4776 #elif (defined(HAVE_KSTAT))
4777 #include <kstat.h>
4778
4779 static inline unsigned
4780 malloc_ncpus(void)
4781 {
4782         unsigned ret;
4783         kstat_ctl_t *ctl;
4784         kstat_t *kstat;
4785         kstat_named_t *named;
4786         unsigned i;
4787
4788         if ((ctl = kstat_open()) == NULL)
4789                 return (1); /* Error. */
4790
4791         if ((kstat = kstat_lookup(ctl, "unix", -1, "system_misc")) == NULL)
4792                 return (1); /* Error. */
4793
4794         if (kstat_read(ctl, kstat, NULL) == -1)
4795                 return (1); /* Error. */
4796
4797         named = KSTAT_NAMED_PTR(kstat);
4798
4799         for (i = 0; i < kstat->ks_ndata; i++) {
4800                 if (strcmp(named[i].name, "ncpus") == 0) {
4801                         /* Figure out which one of these to actually use. */
4802                         switch(named[i].data_type) {
4803                         case KSTAT_DATA_INT32:
4804                                 ret = named[i].value.i32;
4805                                 break;
4806                         case KSTAT_DATA_UINT32:
4807                                 ret = named[i].value.ui32;
4808                                 break;
4809                         case KSTAT_DATA_INT64:
4810                                 ret = named[i].value.i64;
4811                                 break;
4812                         case KSTAT_DATA_UINT64:
4813                                 ret = named[i].value.ui64;
4814                                 break;
4815                         default:
4816                                 return (1); /* Error. */
4817                         }
4818                 }
4819         }
4820
4821         kstat_close(ctl); /* Don't bother checking for an error. */
4822
4823         return (ret);
4824 }
4825 #else
4826 static inline unsigned
4827 malloc_ncpus(void)
4828 {
4829
4830         /*
4831          * We lack a way to determine the number of CPUs on this platform, so
4832          * assume 1 CPU.
4833          */
4834         return (1);
4835 }
4836 #endif
4837
4838 static void
4839 malloc_print_stats(void)
4840 {
4841
4842         if (opt_print_stats) {
4843                 char s[UMAX2S_BUFSIZE];
4844                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4845                     "");
4846                 _malloc_message("Assertions ",
4847 #ifdef NDEBUG
4848                     "disabled",
4849 #else
4850                     "enabled",
4851 #endif
4852                     "\n", "");
4853                 _malloc_message("Boolean MALLOC_OPTIONS: ",
4854                     opt_abort ? "A" : "a", "", "");
4855 #ifdef MALLOC_DSS
4856                 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4857 #endif
4858 #ifdef MALLOC_FILL
4859                 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4860 #endif
4861 #ifdef MALLOC_DSS
4862                 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4863 #endif
4864                 _malloc_message("P", "", "", "");
4865 #ifdef MALLOC_UTRACE
4866                 _malloc_message(opt_utrace ? "U" : "u", "", "", "");
4867 #endif
4868 #ifdef MALLOC_SYSV
4869                 _malloc_message(opt_sysv ? "V" : "v", "", "", "");
4870 #endif
4871 #ifdef MALLOC_XMALLOC
4872                 _malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
4873 #endif
4874 #ifdef MALLOC_FILL
4875                 _malloc_message(opt_zero ? "Z" : "z", "", "", "");
4876 #endif
4877                 _malloc_message("\n", "", "", "");
4878
4879                 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4880                 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4881 #ifdef MALLOC_LAZY_FREE
4882                 if (opt_lazy_free_2pow >= 0) {
4883                         _malloc_message("Lazy free slots: ",
4884                             umax2s(1U << opt_lazy_free_2pow, s), "\n", "");
4885                 } else
4886                         _malloc_message("Lazy free slots: 0\n", "", "", "");
4887 #endif
4888 #ifdef MALLOC_BALANCE
4889                 _malloc_message("Arena balance threshold: ",
4890                     umax2s(opt_balance_threshold, s), "\n", "");
4891 #endif
4892                 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4893                     "\n", "");
4894                 _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
4895                 _malloc_message("Max small size: ", umax2s(small_max, s), "\n",
4896                     "");
4897                 _malloc_message("Max dirty pages per arena: ",
4898                     umax2s(opt_dirty_max, s), "\n", "");
4899
4900                 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4901                 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4902
4903 #ifdef MALLOC_STATS
4904                 {
4905                         size_t allocated, mapped;
4906 #ifdef MALLOC_BALANCE
4907                         uint64_t nbalance = 0;
4908 #endif
4909                         unsigned i;
4910                         arena_t *arena;
4911
4912                         /* Calculate and print allocated/mapped stats. */
4913
4914                         /* arenas. */
4915                         for (i = 0, allocated = 0; i < narenas; i++) {
4916                                 if (arenas[i] != NULL) {
4917                                         malloc_spin_lock(&arenas[i]->lock);
4918                                         allocated +=
4919                                             arenas[i]->stats.allocated_small;
4920                                         allocated +=
4921                                             arenas[i]->stats.allocated_large;
4922 #ifdef MALLOC_BALANCE
4923                                         nbalance += arenas[i]->stats.nbalance;
4924 #endif
4925                                         malloc_spin_unlock(&arenas[i]->lock);
4926                                 }
4927                         }
4928
4929                         /* huge/base. */
4930                         malloc_mutex_lock(&huge_mtx);
4931                         allocated += huge_allocated;
4932                         mapped = stats_chunks.curchunks * chunksize;
4933                         malloc_mutex_unlock(&huge_mtx);
4934
4935                         malloc_mutex_lock(&base_mtx);
4936                         mapped += base_mapped;
4937                         malloc_mutex_unlock(&base_mtx);
4938
4939 #ifdef WIN32
4940                         malloc_printf("Allocated: %lu, mapped: %lu\n",
4941                             allocated, mapped);
4942 #else
4943                         malloc_printf("Allocated: %zu, mapped: %zu\n",
4944                             allocated, mapped);
4945 #endif
4946
4947 #ifdef MALLOC_BALANCE
4948                         malloc_printf("Arena balance reassignments: %llu\n",
4949                             nbalance);
4950 #endif
4951
4952                         /* Print chunk stats. */
4953                         {
4954                                 chunk_stats_t chunks_stats;
4955
4956                                 malloc_mutex_lock(&huge_mtx);
4957                                 chunks_stats = stats_chunks;
4958                                 malloc_mutex_unlock(&huge_mtx);
4959
4960                                 malloc_printf("chunks: nchunks   "
4961                                     "highchunks    curchunks\n");
4962                                 malloc_printf("  %13llu%13lu%13lu\n",
4963                                     chunks_stats.nchunks,
4964                                     chunks_stats.highchunks,
4965                                     chunks_stats.curchunks);
4966                         }
4967
4968                         /* Print chunk stats. */
4969                         malloc_printf(
4970                             "huge: nmalloc      ndalloc    allocated\n");
4971 #ifdef WIN32
4972                         malloc_printf(" %12llu %12llu %12lu\n",
4973                             huge_nmalloc, huge_ndalloc, huge_allocated);
4974 #else
4975                         malloc_printf(" %12llu %12llu %12zu\n",
4976                             huge_nmalloc, huge_ndalloc, huge_allocated);
4977 #endif
4978                         /* Print stats for each arena. */
4979                         for (i = 0; i < narenas; i++) {
4980                                 arena = arenas[i];
4981                                 if (arena != NULL) {
4982                                         malloc_printf(
4983                                             "\narenas[%u]:\n", i);
4984                                         malloc_spin_lock(&arena->lock);
4985                                         stats_print(arena);
4986                                         malloc_spin_unlock(&arena->lock);
4987                                 }
4988                         }
4989                 }
4990 #endif /* #ifdef MALLOC_STATS */
4991                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4992         }
4993 }
4994
4995 /*
4996  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4997  * implementation has to take pains to avoid infinite recursion during
4998  * initialization.
4999  */
5000 #if (defined(WIN32) || defined(DARWIN))
5001 #define malloc_init() false
5002 #else
5003 static inline bool
5004 malloc_init(void)
5005 {
5006         if (malloc_initialized == false)
5007                 return (malloc_init_hard());
5008
5009         return (false);
5010 }
5011 #endif
5012
5013 #ifndef WIN32
5014 static
5015 #endif
5016 bool
5017 malloc_init_hard(void)
5018 {
5019         unsigned i;
5020         char buf[PATH_MAX + 1];
5021         const char *opts;
5022         long result;
5023 #ifndef WIN32
5024         int linklen;
5025 #endif
5026
5027 #ifndef WIN32
5028         malloc_mutex_lock(&init_lock);
5029 #endif
5030
5031         if (malloc_initialized) {
5032                 /*
5033                  * Another thread initialized the allocator before this one
5034                  * acquired init_lock.
5035                  */
5036 #ifndef WIN32
5037                 malloc_mutex_unlock(&init_lock);
5038 #endif
5039                 return (false);
5040         }
5041
5042 #ifdef WIN32
5043         /* get a thread local storage index */
5044         tlsIndex = TlsAlloc();
5045 #endif
5046
5047         /* Get page size and number of CPUs */
5048 #ifdef WIN32
5049         {
5050                 SYSTEM_INFO info;
5051
5052                 GetSystemInfo(&info);
5053                 result = info.dwPageSize;
5054
5055                 pagesize = (unsigned) result;
5056
5057                 ncpus = info.dwNumberOfProcessors;
5058         }
5059 #else
5060         ncpus = malloc_ncpus();
5061
5062         result = sysconf(_SC_PAGESIZE);
5063         assert(result != -1);
5064
5065         pagesize = (unsigned) result;
5066 #endif
5067
5068         /*
5069          * We assume that pagesize is a power of 2 when calculating
5070          * pagesize_mask and pagesize_2pow.
5071          */
5072         assert(((result - 1) & result) == 0);
5073         pagesize_mask = result - 1;
5074         pagesize_2pow = ffs((int)result) - 1;
5075
5076 #ifdef MALLOC_LAZY_FREE
5077                 if (ncpus == 1)
5078                         opt_lazy_free_2pow = -1;
5079 #endif
5080
5081         for (i = 0; i < 3; i++) {
5082                 unsigned j;
5083
5084                 /* Get runtime configuration. */
5085                 switch (i) {
5086                 case 0:
5087 #ifndef WIN32
5088                         if ((linklen = readlink("/etc/malloc.conf", buf,
5089                                                 sizeof(buf) - 1)) != -1) {
5090                                 /*
5091                                  * Use the contents of the "/etc/malloc.conf"
5092                                  * symbolic link's name.
5093                                  */
5094                                 buf[linklen] = '\0';
5095                                 opts = buf;
5096                         } else
5097 #endif
5098                         {
5099                                 /* No configuration specified. */
5100                                 buf[0] = '\0';
5101                                 opts = buf;
5102                         }
5103                         break;
5104                 case 1:
5105 /*                      if (issetugid() == 0 && (opts = */
5106 /*                          getenv("MALLOC_OPTIONS")) != NULL) { */
5107 /*                              /\* */
5108 /*                               * Do nothing; opts is already initialized to */
5109 /*                               * the value of the MALLOC_OPTIONS environment */
5110 /*                               * variable. */
5111 /*                               *\/ */
5112 /*                      } else { */
5113                                 /* No configuration specified. */
5114                                 buf[0] = '\0';
5115                                 opts = buf;
5116 /*                      } */
5117                         break;
5118                 case 2:
5119                         if (_malloc_options != NULL) {
5120                                 /*
5121                                  * Use options that were compiled into the
5122                                  * program.
5123                                  */
5124                                 opts = _malloc_options;
5125                         } else {
5126                                 /* No configuration specified. */
5127                                 buf[0] = '\0';
5128                                 opts = buf;
5129                         }
5130                         break;
5131                 default:
5132                         /* NOTREACHED */
5133                         buf[0] = '\0';
5134                         opts = buf;
5135                         assert(false);
5136                 }
5137
5138                 for (j = 0; opts[j] != '\0'; j++) {
5139                         unsigned k, nreps;
5140                         bool nseen;
5141
5142                         /* Parse repetition count, if any. */
5143                         for (nreps = 0, nseen = false;; j++, nseen = true) {
5144                                 switch (opts[j]) {
5145                                         case '0': case '1': case '2': case '3':
5146                                         case '4': case '5': case '6': case '7':
5147                                         case '8': case '9':
5148                                                 nreps *= 10;
5149                                                 nreps += opts[j] - '0';
5150                                                 break;
5151                                         default:
5152                                                 goto MALLOC_OUT;
5153                                 }
5154                         }
5155 MALLOC_OUT:
5156                         if (nseen == false)
5157                                 nreps = 1;
5158
5159                         for (k = 0; k < nreps; k++) {
5160                                 switch (opts[j]) {
5161                                 case 'a':
5162                                         opt_abort = false;
5163                                         break;
5164                                 case 'A':
5165                                         opt_abort = true;
5166                                         break;
5167                                 case 'b':
5168 #ifdef MALLOC_BALANCE
5169                                         opt_balance_threshold >>= 1;
5170 #endif
5171                                         break;
5172                                 case 'B':
5173 #ifdef MALLOC_BALANCE
5174                                         if (opt_balance_threshold == 0)
5175                                                 opt_balance_threshold = 1;
5176                                         else if ((opt_balance_threshold << 1)
5177                                             > opt_balance_threshold)
5178                                                 opt_balance_threshold <<= 1;
5179 #endif
5180                                         break;
5181                                 case 'd':
5182 #ifdef MALLOC_DSS
5183                                         opt_dss = false;
5184 #endif
5185                                         break;
5186                                 case 'D':
5187 #ifdef MALLOC_DSS
5188                                         opt_dss = true;
5189 #endif
5190                                         break;
5191                                 case 'f':
5192                                         opt_dirty_max >>= 1;
5193                                         break;
5194                                 case 'F':
5195                                         if (opt_dirty_max == 0)
5196                                                 opt_dirty_max = 1;
5197                                         else if ((opt_dirty_max << 1) != 0)
5198                                                 opt_dirty_max <<= 1;
5199                                         break;
5200 #ifdef MALLOC_FILL
5201                                 case 'j':
5202                                         opt_junk = false;
5203                                         break;
5204                                 case 'J':
5205                                         opt_junk = true;
5206                                         break;
5207 #endif
5208                                 case 'k':
5209                                         /*
5210                                          * Chunks always require at least one
5211                                          * header page, so chunks can never be
5212                                          * smaller than two pages.
5213                                          */
5214                                         if (opt_chunk_2pow > pagesize_2pow + 1)
5215                                                 opt_chunk_2pow--;
5216                                         break;
5217                                 case 'K':
5218                                         if (opt_chunk_2pow + 1 <
5219                                             (sizeof(size_t) << 3))
5220                                                 opt_chunk_2pow++;
5221                                         break;
5222                                 case 'l':
5223 #ifdef MALLOC_LAZY_FREE
5224                                         if (opt_lazy_free_2pow >= 0)
5225                                                 opt_lazy_free_2pow--;
5226 #endif
5227                                         break;
5228                                 case 'L':
5229 #ifdef MALLOC_LAZY_FREE
5230                                         if (ncpus > 1)
5231                                                 opt_lazy_free_2pow++;
5232 #endif
5233                                         break;
5234                                 case 'm':
5235 #ifdef MALLOC_DSS
5236                                         opt_mmap = false;
5237 #endif
5238                                         break;
5239                                 case 'M':
5240 #ifdef MALLOC_DSS
5241                                         opt_mmap = true;
5242 #endif
5243                                         break;
5244                                 case 'n':
5245                                         opt_narenas_lshift--;
5246                                         break;
5247                                 case 'N':
5248                                         opt_narenas_lshift++;
5249                                         break;
5250                                 case 'p':
5251                                         opt_print_stats = false;
5252                                         break;
5253                                 case 'P':
5254                                         opt_print_stats = true;
5255                                         break;
5256                                 case 'q':
5257                                         if (opt_quantum_2pow > QUANTUM_2POW_MIN)
5258                                                 opt_quantum_2pow--;
5259                                         break;
5260                                 case 'Q':
5261                                         if (opt_quantum_2pow < pagesize_2pow -
5262                                             1)
5263                                                 opt_quantum_2pow++;
5264                                         break;
5265                                 case 's':
5266                                         if (opt_small_max_2pow >
5267                                             QUANTUM_2POW_MIN)
5268                                                 opt_small_max_2pow--;
5269                                         break;
5270                                 case 'S':
5271                                         if (opt_small_max_2pow < pagesize_2pow
5272                                             - 1)
5273                                                 opt_small_max_2pow++;
5274                                         break;
5275 #ifdef MALLOC_UTRACE
5276                                 case 'u':
5277                                         opt_utrace = false;
5278                                         break;
5279                                 case 'U':
5280                                         opt_utrace = true;
5281                                         break;
5282 #endif
5283 #ifdef MALLOC_SYSV
5284                                 case 'v':
5285                                         opt_sysv = false;
5286                                         break;
5287                                 case 'V':
5288                                         opt_sysv = true;
5289                                         break;
5290 #endif
5291 #ifdef MALLOC_XMALLOC
5292                                 case 'x':
5293                                         opt_xmalloc = false;
5294                                         break;
5295                                 case 'X':
5296                                         opt_xmalloc = true;
5297                                         break;
5298 #endif
5299 #ifdef MALLOC_FILL
5300                                 case 'z':
5301                                         opt_zero = false;
5302                                         break;
5303                                 case 'Z':
5304                                         opt_zero = true;
5305                                         break;
5306 #endif
5307                                 default: {
5308                                         char cbuf[2];
5309
5310                                         cbuf[0] = opts[j];
5311                                         cbuf[1] = '\0';
5312                                         _malloc_message(_getprogname(),
5313                                             ": (malloc) Unsupported character "
5314                                             "in malloc options: '", cbuf,
5315                                             "'\n");
5316                                 }
5317                                 }
5318                         }
5319                 }
5320         }
5321
5322 #ifdef MALLOC_DSS
5323         /* Make sure that there is some method for acquiring memory. */
5324         if (opt_dss == false && opt_mmap == false)
5325                 opt_mmap = true;
5326 #endif
5327
5328         /* Take care to call atexit() only once. */
5329         if (opt_print_stats) {
5330 #ifndef WIN32
5331                 /* Print statistics at exit. */
5332                 atexit(malloc_print_stats);
5333 #endif
5334         }
5335
5336         /* Set variables according to the value of opt_small_max_2pow. */
5337         if (opt_small_max_2pow < opt_quantum_2pow)
5338                 opt_small_max_2pow = opt_quantum_2pow;
5339         small_max = (1U << opt_small_max_2pow);
5340
5341         /* Set bin-related variables. */
5342         bin_maxclass = (pagesize >> 1);
5343         assert(opt_quantum_2pow >= TINY_MIN_2POW);
5344         ntbins = opt_quantum_2pow - TINY_MIN_2POW;
5345         assert(ntbins <= opt_quantum_2pow);
5346         nqbins = (small_max >> opt_quantum_2pow);
5347         nsbins = pagesize_2pow - opt_small_max_2pow - 1;
5348
5349         /* Set variables according to the value of opt_quantum_2pow. */
5350         quantum = (1U << opt_quantum_2pow);
5351         quantum_mask = quantum - 1;
5352         if (ntbins > 0)
5353                 small_min = (quantum >> 1) + 1;
5354         else
5355                 small_min = 1;
5356         assert(small_min <= quantum);
5357
5358         /* Set variables according to the value of opt_chunk_2pow. */
5359         chunksize = (1LU << opt_chunk_2pow);
5360         chunksize_mask = chunksize - 1;
5361         chunk_npages = (chunksize >> pagesize_2pow);
5362         {
5363                 size_t header_size;
5364
5365                 /*
5366                  * Compute the header size such that it is large
5367                  * enough to contain the page map and enough nodes for the
5368                  * worst case: one node per non-header page plus one extra for
5369                  * situations where we briefly have one more node allocated
5370                  * than we will need.
5371                  */
5372                 header_size = sizeof(arena_chunk_t) +
5373                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1)) +
5374                     (sizeof(extent_node_t) * chunk_npages);
5375                 arena_chunk_header_npages = (header_size >> pagesize_2pow) +
5376                     ((header_size & pagesize_mask) != 0);
5377         }
5378         arena_maxclass = chunksize - (arena_chunk_header_npages <<
5379             pagesize_2pow);
5380 #ifdef MALLOC_LAZY_FREE
5381         /*
5382          * Make sure that allocating the free_cache does not exceed the limits
5383          * of what base_alloc() can handle.
5384          */
5385         while ((sizeof(void *) << opt_lazy_free_2pow) > chunksize)
5386                 opt_lazy_free_2pow--;
5387 #endif
5388
5389         UTRACE(0, 0, 0);
5390
5391 #ifdef MALLOC_STATS
5392         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5393 #endif
5394
5395         /* Various sanity checks that regard configuration. */
5396         assert(quantum >= sizeof(void *));
5397         assert(quantum <= pagesize);
5398         assert(chunksize >= pagesize);
5399         assert(quantum * 4 <= chunksize);
5400
5401         /* Initialize chunks data. */
5402         malloc_mutex_init(&huge_mtx);
5403         RB_INIT(&huge);
5404 #ifdef MALLOC_DSS
5405         malloc_mutex_init(&dss_mtx);
5406         dss_base = sbrk(0);
5407         dss_prev = dss_base;
5408         dss_max = dss_base;
5409         RB_INIT(&dss_chunks_szad);
5410         RB_INIT(&dss_chunks_ad);
5411 #endif
5412 #ifdef MALLOC_STATS
5413         huge_nmalloc = 0;
5414         huge_ndalloc = 0;
5415         huge_allocated = 0;
5416 #endif
5417
5418         /* Initialize base allocation data structures. */
5419 #ifdef MALLOC_STATS
5420         base_mapped = 0;
5421 #endif
5422 #ifdef MALLOC_DSS
5423         /*
5424          * Allocate a base chunk here, since it doesn't actually have to be
5425          * chunk-aligned.  Doing this before allocating any other chunks allows
5426          * the use of space that would otherwise be wasted.
5427          */
5428         if (opt_dss)
5429                 base_pages_alloc(0);
5430 #endif
5431         base_nodes = NULL;
5432         malloc_mutex_init(&base_mtx);
5433
5434         if (ncpus > 1) {
5435                 /*
5436                  * For SMP systems, create four times as many arenas as there
5437                  * are CPUs by default.
5438                  */
5439                 opt_narenas_lshift += 2;
5440         }
5441
5442         /* Determine how many arenas to use. */
5443         narenas = ncpus;
5444         if (opt_narenas_lshift > 0) {
5445                 if ((narenas << opt_narenas_lshift) > narenas)
5446                         narenas <<= opt_narenas_lshift;
5447                 /*
5448                  * Make sure not to exceed the limits of what base_alloc() can
5449                  * handle.
5450                  */
5451                 if (narenas * sizeof(arena_t *) > chunksize)
5452                         narenas = chunksize / sizeof(arena_t *);
5453         } else if (opt_narenas_lshift < 0) {
5454                 if ((narenas >> -opt_narenas_lshift) < narenas)
5455                         narenas >>= -opt_narenas_lshift;
5456                 /* Make sure there is at least one arena. */
5457                 if (narenas == 0)
5458                         narenas = 1;
5459         }
5460 #ifdef MALLOC_BALANCE
5461         assert(narenas != 0);
5462         for (narenas_2pow = 0;
5463              (narenas >> (narenas_2pow + 1)) != 0;
5464              narenas_2pow++);
5465 #endif
5466
5467 #ifdef NO_TLS
5468         if (narenas > 1) {
5469                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5470                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5471                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5472                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5473                     223, 227, 229, 233, 239, 241, 251, 257, 263};
5474                 unsigned nprimes, parenas;
5475
5476                 /*
5477                  * Pick a prime number of hash arenas that is more than narenas
5478                  * so that direct hashing of pthread_self() pointers tends to
5479                  * spread allocations evenly among the arenas.
5480                  */
5481                 assert((narenas & 1) == 0); /* narenas must be even. */
5482                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5483                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5484                 for (i = 1; i < nprimes; i++) {
5485                         if (primes[i] > narenas) {
5486                                 parenas = primes[i];
5487                                 break;
5488                         }
5489                 }
5490                 narenas = parenas;
5491         }
5492 #endif
5493
5494 #ifndef NO_TLS
5495 #  ifndef MALLOC_BALANCE
5496         next_arena = 0;
5497 #  endif
5498 #endif
5499
5500         /* Allocate and initialize arenas. */
5501         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5502         if (arenas == NULL) {
5503 #ifndef WIN32
5504                 malloc_mutex_unlock(&init_lock);
5505 #endif
5506                 return (true);
5507         }
5508         /*
5509          * Zero the array.  In practice, this should always be pre-zeroed,
5510          * since it was just mmap()ed, but let's be sure.
5511          */
5512         memset(arenas, 0, sizeof(arena_t *) * narenas);
5513
5514         /*
5515          * Initialize one arena here.  The rest are lazily created in
5516          * choose_arena_hard().
5517          */
5518         arenas_extend(0);
5519         if (arenas[0] == NULL) {
5520 #ifndef WIN32
5521                 malloc_mutex_unlock(&init_lock);
5522 #endif
5523                 return (true);
5524         }
5525 #ifndef NO_TLS
5526         /*
5527          * Assign the initial arena to the initial thread, in order to avoid
5528          * spurious creation of an extra arena if the application switches to
5529          * threaded mode.
5530          */
5531 #ifdef WIN32
5532         TlsSetValue(tlsIndex, arenas[0]);
5533 #else
5534         arenas_map = arenas[0];
5535 #endif
5536 #endif
5537
5538         /*
5539          * Seed here for the initial thread, since choose_arena_hard() is only
5540          * called for other threads.  The seed values don't really matter.
5541          */
5542 #ifdef MALLOC_LAZY_FREE
5543         SPRN(lazy_free, 42);
5544 #endif
5545 #ifdef MALLOC_BALANCE
5546         SPRN(balance, 42);
5547 #endif
5548
5549         malloc_spin_init(&arenas_lock);
5550
5551         malloc_initialized = true;
5552 #ifndef WIN32
5553         malloc_mutex_unlock(&init_lock);
5554 #endif
5555         return (false);
5556 }
5557
5558 /* XXX Why not just expose malloc_print_stats()? */
5559 #ifdef WIN32
5560 void
5561 malloc_shutdown()
5562 {
5563
5564         malloc_print_stats();
5565 }
5566 #endif
5567
5568 /*
5569  * End general internal functions.
5570  */
5571 /******************************************************************************/
5572 /*
5573  * Begin malloc(3)-compatible functions.
5574  */
5575
5576 DSOEXPORT
5577 #ifdef DARWIN
5578 inline void *
5579 moz_malloc(size_t size)
5580 #else
5581 void *
5582 malloc(size_t size)
5583 #endif
5584 {
5585         void *ret;
5586
5587         if (malloc_init()) {
5588                 ret = NULL;
5589                 goto RETURN;
5590         }
5591
5592         if (size == 0) {
5593 #ifdef MALLOC_SYSV
5594                 if (opt_sysv == false)
5595 #endif
5596                         size = 1;
5597 #ifdef MALLOC_SYSV
5598                 else {
5599                         ret = NULL;
5600                         goto RETURN;
5601                 }
5602 #endif
5603         }
5604
5605         ret = imalloc(size);
5606
5607 RETURN:
5608         if (ret == NULL) {
5609 #ifdef MALLOC_XMALLOC
5610                 if (opt_xmalloc) {
5611                         _malloc_message(_getprogname(),
5612                             ": (malloc) Error in malloc(): out of memory\n", "",
5613                             "");
5614                         abort();
5615                 }
5616 #endif
5617                 errno = ENOMEM;
5618         }
5619
5620         UTRACE(0, size, ret);
5621         return (ret);
5622 }
5623
5624 DSOEXPORT
5625 #ifdef DARWIN
5626 inline int
5627 moz_posix_memalign(void **memptr, size_t alignment, size_t size)
5628 #else
5629 int
5630 posix_memalign(void **memptr, size_t alignment, size_t size)
5631 #endif
5632 {
5633         int ret;
5634         void *result;
5635
5636         if (malloc_init())
5637                 result = NULL;
5638         else {
5639                 /* Make sure that alignment is a large enough power of 2. */
5640                 if (((alignment - 1) & alignment) != 0
5641                     || alignment < sizeof(void *)) {
5642 #ifdef MALLOC_XMALLOC
5643                         if (opt_xmalloc) {
5644                                 _malloc_message(_getprogname(),
5645                                     ": (malloc) Error in posix_memalign(): "
5646                                     "invalid alignment\n", "", "");
5647                                 abort();
5648                         }
5649 #endif
5650                         result = NULL;
5651                         ret = EINVAL;
5652                         goto RETURN;
5653                 }
5654
5655                 result = ipalloc(alignment, size);
5656         }
5657
5658         if (result == NULL) {
5659 #ifdef MALLOC_XMALLOC
5660                 if (opt_xmalloc) {
5661                         _malloc_message(_getprogname(),
5662                         ": (malloc) Error in posix_memalign(): out of memory\n",
5663                         "", "");
5664                         abort();
5665                 }
5666 #endif
5667                 ret = ENOMEM;
5668                 goto RETURN;
5669         }
5670
5671         *memptr = result;
5672         ret = 0;
5673
5674 RETURN:
5675         UTRACE(0, size, result);
5676         return (ret);
5677 }
5678
5679 DSOEXPORT
5680 #ifdef DARWIN
5681 inline void *
5682 moz_memalign(size_t alignment, size_t size)
5683 #else
5684 void *
5685 memalign(size_t alignment, size_t size)
5686 #endif
5687 {
5688         void *ret;
5689
5690 #ifdef DARWIN
5691         if (moz_posix_memalign(&ret, alignment, size) != 0)
5692 #else
5693         if (posix_memalign(&ret, alignment, size) != 0)
5694 #endif
5695                 return (NULL);
5696
5697         return ret;
5698 }
5699
5700 DSOEXPORT
5701 #ifdef DARWIN
5702 inline void *
5703 moz_valloc(size_t size)
5704 #else
5705 void *
5706 valloc(size_t size)
5707 #endif
5708 {
5709 #ifdef DARWIN
5710         return (moz_memalign(pagesize, size));
5711 #else
5712         return (memalign(pagesize, size));
5713 #endif
5714 }
5715
5716 DSOEXPORT
5717 #ifdef DARWIN
5718 inline void *
5719 moz_calloc(size_t num, size_t size)
5720 #else
5721 void *
5722 calloc(size_t num, size_t size)
5723 #endif
5724 {
5725         void *ret;
5726         size_t num_size;
5727
5728         if (malloc_init()) {
5729                 num_size = 0;
5730                 ret = NULL;
5731                 goto RETURN;
5732         }
5733
5734         num_size = num * size;
5735         if (num_size == 0) {
5736 #ifdef MALLOC_SYSV
5737                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5738 #endif
5739                         num_size = 1;
5740 #ifdef MALLOC_SYSV
5741                 else {
5742                         ret = NULL;
5743                         goto RETURN;
5744                 }
5745 #endif
5746         /*
5747          * Try to avoid division here.  We know that it isn't possible to
5748          * overflow during multiplication if neither operand uses any of the
5749          * most significant half of the bits in a size_t.
5750          */
5751         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5752             && (num_size / size != num)) {
5753                 /* size_t overflow. */
5754                 ret = NULL;
5755                 goto RETURN;
5756         }
5757
5758         ret = icalloc(num_size);
5759
5760 RETURN:
5761         if (ret == NULL) {
5762 #ifdef MALLOC_XMALLOC
5763                 if (opt_xmalloc) {
5764                         _malloc_message(_getprogname(),
5765                             ": (malloc) Error in calloc(): out of memory\n", "",
5766                             "");
5767                         abort();
5768                 }
5769 #endif
5770                 errno = ENOMEM;
5771         }
5772
5773         UTRACE(0, num_size, ret);
5774         return (ret);
5775 }
5776
5777 DSOEXPORT
5778 #ifdef DARWIN
5779 inline void *
5780 moz_realloc(void *ptr, size_t size)
5781 #else
5782 void *
5783 realloc(void *ptr, size_t size)
5784 #endif
5785 {
5786         void *ret;
5787
5788         if (size == 0) {
5789 #ifdef MALLOC_SYSV
5790                 if (opt_sysv == false)
5791 #endif
5792                         size = 1;
5793 #ifdef MALLOC_SYSV
5794                 else {
5795                         if (ptr != NULL)
5796                                 idalloc(ptr);
5797                         ret = NULL;
5798                         goto RETURN;
5799                 }
5800 #endif
5801         }
5802
5803         if (ptr != NULL) {
5804                 assert(malloc_initialized);
5805
5806                 ret = iralloc(ptr, size);
5807
5808                 if (ret == NULL) {
5809 #ifdef MALLOC_XMALLOC
5810                         if (opt_xmalloc) {
5811                                 _malloc_message(_getprogname(),
5812                                     ": (malloc) Error in realloc(): out of "
5813                                     "memory\n", "", "");
5814                                 abort();
5815                         }
5816 #endif
5817                         errno = ENOMEM;
5818                 }
5819         } else {
5820                 if (malloc_init())
5821                         ret = NULL;
5822                 else
5823                         ret = imalloc(size);
5824
5825                 if (ret == NULL) {
5826 #ifdef MALLOC_XMALLOC
5827                         if (opt_xmalloc) {
5828                                 _malloc_message(_getprogname(),
5829                                     ": (malloc) Error in realloc(): out of "
5830                                     "memory\n", "", "");
5831                                 abort();
5832                         }
5833 #endif
5834                         errno = ENOMEM;
5835                 }
5836         }
5837
5838 #ifdef MALLOC_SYSV
5839 RETURN:
5840 #endif
5841         UTRACE(ptr, size, ret);
5842         return (ret);
5843 }
5844
5845 DSOEXPORT
5846 #ifdef DARWIN
5847 inline void
5848 moz_free(void *ptr)
5849 #else
5850 void
5851 free(void *ptr)
5852 #endif
5853 {
5854
5855         UTRACE(ptr, 0, 0);
5856         if (ptr != NULL) {
5857                 assert(malloc_initialized);
5858
5859                 idalloc(ptr);
5860         }
5861 }
5862
5863 /*
5864  * This is a work in progress, which doesn't even get used.
5865  */
5866 #ifdef USE_STATS_MEMORY
5867 /* Utility to update mallinfo for malloc_stats() and mallinfo() */
5868
5869 static void
5870 malloc_update_mallinfo(arena_t *arena, struct mallinfo *mi)
5871 {
5872   int i, navail;
5873   size_t avail;
5874   avail = chunksize * narenas;
5875
5876 #if 0
5877   malloc_printf("allocated:   %u %u %u\n",
5878                 arena->stats.allocated_small + arena->stats.allocated_large,
5879                 arena->stats.nmalloc_small + arena->stats.nmalloc_large,
5880                 arena->stats.ndalloc_small + arena->stats.ndalloc_large);
5881
5882   malloc_printf("available:   %u\n", avail);
5883 #endif
5884
5885 /*   stats_print(arena); */
5886   malloc_spin_lock(&arena->lock);
5887
5888   /* clear unused fields */
5889   mi->arena = mi->ordblks = mi->smblks = mi->usmblks = mi->fsmblks = 0;
5890   mi->uordblks = arena->stats.allocated_small + arena->stats.allocated_large;
5891   mi->fordblks = arena->stats.mapped - mi->uordblks;
5892
5893   mi->hblks = mi->hblkhd = 0;
5894   mi->keepcost = arena->stats.mapped;
5895   malloc_spin_unlock(&arena->lock);
5896 }
5897
5898 #ifndef DARWIN
5899 DSOEXPORT
5900 struct mallinfo
5901 mallinfo()
5902 {
5903     struct mallinfo mi;
5904
5905     /* Calculate and print allocated/mapped stats. */
5906     malloc_update_mallinfo(choose_arena(), &mi);
5907     return mi;
5908 }
5909 # endif
5910 #endif
5911
5912 /*
5913  * End malloc(3)-compatible functions.
5914  */
5915 /******************************************************************************/
5916 /*
5917  * Begin non-standard functions.
5918  */
5919
5920 DSOEXPORT
5921 #ifdef DARWIN
5922 inline size_t
5923 moz_malloc_usable_size(const void *ptr)
5924 #else
5925 size_t
5926 malloc_usable_size(const void *ptr)
5927 #endif
5928 {
5929
5930         assert(ptr != NULL);
5931
5932         return (isalloc(ptr));
5933 }
5934
5935 #ifdef WIN32
5936 void*
5937 _recalloc(void *ptr, size_t count, size_t size)
5938 {
5939         size_t oldsize = (ptr != NULL) ? isalloc(ptr) : 0;
5940         size_t newsize = count * size;
5941
5942         /*
5943          * In order for all trailing bytes to be zeroed, the caller needs to
5944          * use calloc(), followed by recalloc().  However, the current calloc()
5945          * implementation only zeros the bytes requested, so if recalloc() is
5946          * to work 100% correctly, calloc() will need to change to zero
5947          * trailing bytes.
5948          */
5949
5950         ptr = realloc(ptr, newsize);
5951         if (ptr != NULL && oldsize < newsize) {
5952                 memset((void *)((uintptr_t)ptr + oldsize), 0, newsize -
5953                     oldsize);
5954         }
5955
5956         return ptr;
5957 }
5958
5959 /*
5960  * This impl of _expand doesn't ever actually expand or shrink blocks: it
5961  * simply replies that you may continue using a shrunk block.
5962  */
5963 void*
5964 _expand(void *ptr, size_t newsize)
5965 {
5966         if (isalloc(ptr) >= newsize)
5967                 return ptr;
5968
5969         return NULL;
5970 }
5971
5972 size_t
5973 _msize(const void *ptr)
5974 {
5975
5976         return malloc_usable_size(ptr);
5977 }
5978 #endif
5979
5980
5981 /*
5982  * End non-standard functions.
5983  */
5984 /******************************************************************************/
5985 /*
5986  * Begin library-private functions, used by threading libraries for protection
5987  * of malloc during fork().  These functions are only called if the program is
5988  * running in threaded mode, so there is no need to check whether the program
5989  * is threaded here.
5990  */
5991
5992 void
5993 _malloc_prefork(void)
5994 {
5995         unsigned i;
5996
5997         /* Acquire all mutexes in a safe order. */
5998
5999         malloc_spin_lock(&arenas_lock);
6000         for (i = 0; i < narenas; i++) {
6001                 if (arenas[i] != NULL)
6002                         malloc_spin_lock(&arenas[i]->lock);
6003         }
6004         malloc_spin_unlock(&arenas_lock);
6005
6006         malloc_mutex_lock(&base_mtx);
6007
6008         malloc_mutex_lock(&huge_mtx);
6009
6010 #ifdef MALLOC_DSS
6011         malloc_mutex_lock(&dss_mtx);
6012 #endif
6013 }
6014
6015 void
6016 _malloc_postfork(void)
6017 {
6018         unsigned i;
6019
6020         /* Release all mutexes, now that fork() has completed. */
6021
6022 #ifdef MALLOC_DSS
6023         malloc_mutex_unlock(&dss_mtx);
6024 #endif
6025
6026         malloc_mutex_unlock(&huge_mtx);
6027
6028         malloc_mutex_unlock(&base_mtx);
6029
6030         malloc_spin_lock(&arenas_lock);
6031         for (i = 0; i < narenas; i++) {
6032                 if (arenas[i] != NULL)
6033                         malloc_spin_unlock(&arenas[i]->lock);
6034         }
6035         malloc_spin_unlock(&arenas_lock);
6036 }
6037
6038 /*
6039  * End library-private functions.
6040  */
6041 /******************************************************************************/
6042
6043
6044 #ifdef DARWIN
6045 static malloc_zone_t zone;
6046 static struct malloc_introspection_t zone_introspect;
6047
6048 static size_t
6049 zone_size(malloc_zone_t *zone, void *ptr)
6050 {
6051         size_t ret = 0;
6052         arena_chunk_t *chunk;
6053
6054         /*
6055          * There appear to be places within Darwin (such as setenv(3)) that
6056          * cause calls to this function with pointers that *no* zone owns.  If
6057          * we knew that all pointers were owned by *some* zone, we could split
6058          * our zone into two parts, and use one as the default allocator and
6059          * the other as the default deallocator/reallocator.  Since that will
6060          * not work in practice, we must check all pointers to assure that they
6061          * reside within a mapped chunk before determining size.
6062          */
6063
6064         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
6065         if (chunk != ptr) {
6066                 arena_t *arena;
6067                 unsigned i;
6068                 arena_t *arenas_snapshot[narenas];
6069
6070                 /*
6071                  * Make a copy of the arenas vector while holding arenas_lock in
6072                  * order to assure that all elements are up to date in this
6073                  * processor's cache.  Do this outside the following loop in
6074                  * order to reduce lock acquisitions.
6075                  */
6076                 malloc_spin_lock(&arenas_lock);
6077                 memcpy(&arenas_snapshot, arenas, sizeof(arena_t *) * narenas);
6078                 malloc_spin_unlock(&arenas_lock);
6079
6080                 /* Region. */
6081                 for (i = 0; i < narenas; i++) {
6082                         arena = arenas_snapshot[i];
6083
6084                         if (arena != NULL) {
6085                                 bool own;
6086
6087                                 /* Make sure ptr is within a chunk. */
6088                                 malloc_spin_lock(&arena->lock);
6089                                 if (RB_FIND(arena_chunk_tree_s, &arena->chunks,
6090                                     chunk) == chunk)
6091                                         own = true;
6092                                 else
6093                                         own = false;
6094                                 malloc_spin_unlock(&arena->lock);
6095
6096                                 if (own) {
6097                                         ret = arena_salloc(ptr);
6098                                         goto RETURN;
6099                                 }
6100                         }
6101                 }
6102         } else {
6103                 extent_node_t *node;
6104                 extent_node_t key;
6105
6106                 /* Chunk. */
6107                 key.addr = (void *)chunk;
6108                 malloc_mutex_lock(&huge_mtx);
6109                 node = RB_FIND(extent_tree_ad_s, &huge, &key);
6110                 if (node != NULL)
6111                         ret = node->size;
6112                 else
6113                         ret = 0;
6114                 malloc_mutex_unlock(&huge_mtx);
6115         }
6116
6117 RETURN:
6118         return (ret);
6119 }
6120
6121 static void *
6122 zone_malloc(malloc_zone_t *zone, size_t size)
6123 {
6124
6125         return (moz_malloc(size));
6126 }
6127
6128 static void *
6129 zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
6130 {
6131
6132         return (moz_calloc(num, size));
6133 }
6134
6135 static void *
6136 zone_valloc(malloc_zone_t *zone, size_t size)
6137 {
6138         void *ret = NULL; /* Assignment avoids useless compiler warning. */
6139
6140         moz_posix_memalign(&ret, pagesize, size);
6141
6142         return (ret);
6143 }
6144
6145 static void
6146 zone_free(malloc_zone_t *zone, void *ptr)
6147 {
6148
6149         moz_free(ptr);
6150 }
6151
6152 static void *
6153 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
6154 {
6155
6156         return (moz_realloc(ptr, size));
6157 }
6158
6159 static void *
6160 zone_destroy(malloc_zone_t *zone)
6161 {
6162
6163         /* This function should never be called. */
6164         assert(false);
6165         return (NULL);
6166 }
6167
6168 static size_t
6169 zone_good_size(malloc_zone_t *zone, size_t size)
6170 {
6171         size_t ret;
6172         void *p;
6173
6174         /*
6175          * Actually create an object of the appropriate size, then find out
6176          * how large it could have been without moving up to the next size
6177          * class.
6178          */
6179         p = moz_malloc(size);
6180         if (p != NULL) {
6181                 ret = isalloc(p);
6182                 moz_free(p);
6183         } else
6184                 ret = size;
6185
6186         return (ret);
6187 }
6188
6189 static void
6190 zone_force_lock(malloc_zone_t *zone)
6191 {
6192
6193         _malloc_prefork();
6194 }
6195
6196 static void
6197 zone_force_unlock(malloc_zone_t *zone)
6198 {
6199
6200         _malloc_postfork();
6201 }
6202
6203 static malloc_zone_t *
6204 create_zone(void)
6205 {
6206
6207         assert(malloc_initialized);
6208
6209         zone.size = (void *)zone_size;
6210         zone.malloc = (void *)zone_malloc;
6211         zone.calloc = (void *)zone_calloc;
6212         zone.valloc = (void *)zone_valloc;
6213         zone.free = (void *)zone_free;
6214         zone.realloc = (void *)zone_realloc;
6215         zone.destroy = (void *)zone_destroy;
6216         zone.zone_name = "jemalloc_zone";
6217         zone.batch_malloc = NULL;
6218         zone.batch_free = NULL;
6219         zone.introspect = &zone_introspect;
6220
6221         zone_introspect.enumerator = NULL;
6222         zone_introspect.good_size = (void *)zone_good_size;
6223         zone_introspect.check = NULL;
6224         zone_introspect.print = NULL;
6225         zone_introspect.log = NULL;
6226         zone_introspect.force_lock = (void *)zone_force_lock;
6227         zone_introspect.force_unlock = (void *)zone_force_unlock;
6228         zone_introspect.statistics = NULL;
6229
6230         return (&zone);
6231 }
6232
6233 __attribute__((constructor))
6234 void
6235 jemalloc_darwin_init(void)
6236 {
6237         extern unsigned malloc_num_zones;
6238         extern malloc_zone_t **malloc_zones;
6239
6240         if (malloc_init_hard())
6241                 abort();
6242
6243         /*
6244          * The following code is *not* thread-safe, so it's critical that
6245          * initialization be manually triggered.
6246          */
6247
6248         /* Register the custom zones. */
6249         malloc_zone_register(create_zone());
6250         assert(malloc_zones[malloc_num_zones - 1] == &zone);
6251
6252         /*
6253          * Shift malloc_zones around so that zone is first, which makes it the
6254          * default zone.
6255          */
6256         assert(malloc_num_zones > 1);
6257         memmove(&malloc_zones[1], &malloc_zones[0],
6258                 sizeof(malloc_zone_t *) * (malloc_num_zones - 1));
6259         malloc_zones[0] = &zone;
6260 }
6261 #endif