2 * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Cache line sharing between arenas is avoided for internal data
41 * + Memory is managed in chunks and runs (chunks can be split into runs),
42 * rather than as individual pages. This provides a constant-time
43 * mechanism for associating allocations with particular arenas.
45 * Allocation requests are rounded up to the nearest size class, and no record
46 * of the original request size is maintained. Allocations are broken into
47 * categories according to size class. Assuming runtime defaults, 4 kB pages
48 * and a 16 byte quantum, the size classes in each category are as follows:
50 * |=====================================|
51 * | Category | Subcategory | Size |
52 * |=====================================|
53 * | Small | Tiny | 2 |
56 * | |----------------+---------|
57 * | | Quantum-spaced | 16 |
64 * | |----------------+---------|
65 * | | Sub-page | 1 kB |
67 * |=====================================|
75 * |=====================================|
80 * |=====================================|
82 * A different mechanism is used for each category:
84 * Small : Each size class is segregated into its own set of runs. Each run
85 * maintains a bitmap of which regions are free/allocated.
87 * Large : Each allocation is backed by a dedicated run. Metadata are stored
88 * in the associated arena chunk header maps.
90 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
91 * Metadata are stored in a separate red-black tree.
93 *******************************************************************************
97 * This has been hacked on heavily by Rob Savoye, so it compiles
98 * within Gnash, using the same configuration settings as
99 * everything else in Gnash.
102 # include "gnashconfig.h"
108 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
109 * defaults the A and J runtime options to off. These settings are appropriate
110 * for production systems.
112 #ifndef USE_STATS_MEMORY
113 # define MALLOC_PRODUCTION 1
116 #ifndef MALLOC_PRODUCTION
118 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
121 # define MALLOC_DEBUG 1
123 /* MALLOC_STATS enables statistics calculation. */
124 # define MALLOC_STATS 1
126 /* Memory filling (junk/zero). */
127 # define MALLOC_FILL 1
129 /* Allocation tracing. */
130 # define MALLOC_UTRACE 1
132 /* Support optional abort() on OOM. */
133 # define MALLOC_XMALLOC 1
135 /* Support SYSV semantics. */
136 # define MALLOC_SYSV 1
140 * MALLOC_LAZY_FREE enables the use of a per-thread vector of slots that free()
141 * can atomically stuff object pointers into. This can reduce arena lock
144 /* #define MALLOC_LAZY_FREE 1 */
147 * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
148 * re-balances arena load if exponentially averaged contention exceeds a
151 /* #define MALLOC_BALANCE 1 */
154 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
155 * segment (DSS). In an ideal world, this functionality would be completely
156 * unnecessary, but we are burdened by history and the lack of resource limits
157 * for anonymous mapped memory.
159 #if (!defined(DARWIN) && !defined(WIN32))
164 # define _GNU_SOURCE /* For mremap(2). */
165 # define issetugid() 0
166 # if 0 /* Enable in order to test decommit code on Linux. */
167 # define MALLOC_DECOMMIT 1
169 * The decommit code for Unix doesn't bother to make sure deallocated DSS
170 * chunks are writable.
176 #include <sys/types.h>
186 # include <cruntime.h>
187 # include <internal.h>
188 # include <windows.h>
190 # include "jemtree.h"
192 # pragma warning( disable: 4267 4996 4146 )
197 # define inline __inline
198 # define SIZE_T_MAX SIZE_MAX
199 # define STDERR_FILENO 2
200 # define PATH_MAX MAX_PATH
201 # define vsnprintf _vsnprintf
202 # define assert(f) /* we can't assert in the CRT */
204 static unsigned long tlsIndex
= 0xffffffff;
207 # define _pthread_self() __threadid()
208 # define issetugid() 0
210 /* use MSVC intrinsics */
211 # pragma intrinsic(_BitScanForward)
212 static __forceinline
int
217 if (_BitScanForward(&i
, x
) != 0)
223 typedef unsigned char uint8_t;
224 typedef unsigned uint32_t;
225 typedef unsigned long long uint64_t;
226 typedef unsigned long long uintmax_t;
228 # define MALLOC_DECOMMIT
229 #endif /* end of WIN32 */
232 # include <pthread.h>
236 # include <sys/cdefs.h>
238 # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
240 # include <sys/mman.h>
242 # define MADV_FREE MADV_DONTNEED
244 # include <sys/param.h>
245 # include <sys/time.h>
246 # include <sys/types.h>
247 # include <sys/sysctl.h>
248 # include "jemtree.h"
249 # include <sys/uio.h>
254 # define SIZE_T_MAX SIZE_MAX
256 # if defined(DARWIN) || defined(LINUX_HOST)
257 # define _pthread_self pthread_self
258 # define _pthread_mutex_init pthread_mutex_init
259 # define _pthread_mutex_trylock pthread_mutex_trylock
260 # define _pthread_mutex_lock pthread_mutex_lock
261 # define _pthread_mutex_unlock pthread_mutex_unlock
265 # include <stdbool.h>
271 # include <strings.h>
276 # include <libkern/OSAtomic.h>
277 # include <mach/mach_error.h>
278 # include <mach/mach_init.h>
279 # include <mach/vm_map.h>
280 # include <malloc/malloc.h>
281 # endif /* end of DARWIN */
282 #endif /* end of WIN32 */
286 DSOEXPORT
void *moz_memalign(size_t alignment
, size_t size
);
287 DSOEXPORT
size_t moz_malloc_usable_size(const void *ptr
)
288 DSOEXPORT
void *moz_calloc(size_t num
, size_t size
);
289 DSOEXPORT
void *moz_realloc(void *ptr
, size_t size
);
290 DSOEXPORT
void *moz_valloc(size_t size
);
291 DSOEXPORT
size_t moz_malloc_usable_size(const void *ptr
);
293 DSOEXPORT
void *memalign(size_t alignment
, size_t size
);
294 DSOEXPORT
size_t malloc_usable_size(const void *ptr
);
295 DSOEXPORT
void *calloc(size_t num
, size_t size
);
296 DSOEXPORT
void *realloc(void *ptr
, size_t size
);
297 DSOEXPORT
void *valloc(size_t size
);
298 DSOEXPORT
size_t malloc_usable_size(const void *ptr
);
300 void _malloc_postfork(void);
301 void _malloc_prefork(void);
304 static const bool g_isthreaded
= true;
307 #define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
323 /* Disable inlining to make debugging easier. */
329 #endif /* end of MALLOC_DEBUG */
331 /* Size of stack-allocated buffer passed to strerror_r(). */
332 #define STRERROR_BUF 64
334 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
335 #define QUANTUM_2POW_MIN 4
336 #ifdef MOZ_MEMORY_SIZEOF_PTR_2POW
337 # define SIZEOF_PTR_2POW MOZ_MEMORY_SIZEOF_PTR_2POW
339 # define SIZEOF_PTR_2POW 2
341 // __as __isthreaded is already defined on FreeBSD with a different value that does
342 // the same thing, rename our version to be unique. Althought jemalloc is the default
343 // allocator on FreeBSD< we still want to use our own version, as it has additional Gnash
346 static const bool g_isthreaded
= true;
352 # define QUANTUM_2POW_MIN 4
353 # define SIZEOF_PTR_2POW 2
354 # define CPU_SPINWAIT __asm__ volatile("pause")
357 # define QUANTUM_2POW_MIN 4
358 # define SIZEOF_PTR_2POW 3
361 # define QUANTUM_2POW_MIN 4
362 # define SIZEOF_PTR_2POW 3
366 # define QUANTUM_2POW_MIN 4
367 # define SIZEOF_PTR_2POW 3
371 # define QUANTUM_2POW_MIN 4
372 # define SIZEOF_PTR_2POW 3
373 # define CPU_SPINWAIT __asm__ volatile("pause")
376 # define QUANTUM_2POW_MIN 3
377 # define SIZEOF_PTR_2POW 2
381 # define QUANTUM_2POW_MIN 4
382 # define SIZEOF_PTR_2POW 2
386 #define SIZEOF_PTR (1U << SIZEOF_PTR_2POW)
388 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
389 #ifndef SIZEOF_INT_2POW
390 # define SIZEOF_INT_2POW 2
393 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
394 #if (!defined(PIC) && !defined(NO_TLS))
399 /* MALLOC_BALANCE requires TLS. */
400 # ifdef MALLOC_BALANCE
401 # undef MALLOC_BALANCE
403 /* MALLOC_LAZY_FREE requires TLS. */
404 # ifdef MALLOC_LAZY_FREE
405 # undef MALLOC_LAZY_FREE
410 * Size and alignment of memory chunks that are allocated by the OS's virtual
413 #define CHUNK_2POW_DEFAULT 20
415 /* Maximum number of dirty pages per arena. */
416 #define DIRTY_MAX_DEFAULT (1U << 9)
419 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
420 * so over-estimates are okay (up to a point), but under-estimates will
421 * negatively affect performance.
423 #define CACHELINE_2POW 6
424 #define CACHELINE ((size_t)(1U << CACHELINE_2POW))
426 /* Smallest size class to support. */
427 #define TINY_MIN_2POW 1
430 * Maximum size class that is a multiple of the quantum, but not (necessarily)
431 * a power of 2. Above this size, allocations are rounded up to the nearest
434 #define SMALL_MAX_2POW_DEFAULT 9
435 #define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT)
438 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
439 * as small as possible such that this setting is still honored, without
440 * violating other constraints. The goal is to make runs as small as possible
441 * without exceeding a per run external fragmentation threshold.
443 * We use binary fixed point math for overhead computations, where the binary
444 * point is implicitly RUN_BFP bits to the left.
446 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
447 * honored for some/all object sizes, since there is one bit of header overhead
448 * per object (plus a constant). This constraint is relaxed (ignored) for runs
449 * that are so small that the per-region overhead is greater than:
451 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
454 /* \/ Implicit binary fixed point. */
455 #define RUN_MAX_OVRHD 0x0000003dU
456 #define RUN_MAX_OVRHD_RELAX 0x00001800U
459 * Put a cap on small object run size. This overrides RUN_MAX_OVRHD. Note
460 * that small runs must be small enough that page offsets can fit within the
461 * CHUNK_MAP_POS_MASK bits.
463 #define RUN_MAX_SMALL_2POW 15
464 #define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
466 #ifdef MALLOC_LAZY_FREE
467 /* Default size of each arena's lazy free cache. */
468 # define LAZY_FREE_2POW_DEFAULT 8
470 * Number of pseudo-random probes to conduct before considering the cache to
471 * be overly full. It takes on average n probes to detect fullness of
472 * (n-1)/n. However, we are effectively doing multiple non-independent
473 * trials (each deallocation is a trial), so the actual average threshold
474 * for clearing the cache is somewhat lower.
476 # define LAZY_FREE_NPROBES 5
480 * Hyper-threaded CPUs may need a special instruction inside spin loops in
481 * order to yield to another virtual CPU. If no such instruction is defined
482 * above, make CPU_SPINWAIT a no-op.
485 # define CPU_SPINWAIT
489 * Adaptive spinning must eventually switch to blocking, in order to avoid the
490 * potential for priority inversion deadlock. Backing off past a certain point
491 * can actually waste time.
493 #define SPIN_LIMIT_2POW 11
496 * Conversion from spinning to blocking is expensive; we use (1U <<
497 * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
498 * worst-case spinning.
500 #define BLOCK_COST_2POW 4
502 #ifdef MALLOC_BALANCE
504 * We use an exponential moving average to track recent lock contention,
505 * where the size of the history window is N, and alpha=2/(N+1).
507 * Due to integer math rounding, very small values here can cause
508 * substantial degradation in accuracy, thus making the moving average decay
509 * faster than it would with precise calculation.
511 # define BALANCE_ALPHA_INV_2POW 9
514 * Threshold value for the exponential moving contention average at which to
515 * re-assign a thread.
517 # define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
520 /******************************************************************************/
523 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
524 * places, because they require malloc()ed memory, which causes bootstrapping
525 * issues in some cases.
528 #define malloc_mutex_t CRITICAL_SECTION
529 #define malloc_spinlock_t CRITICAL_SECTION
530 #elif defined(DARWIN)
537 #elif defined(USE_JEMALLOC)
538 typedef pthread_mutex_t malloc_mutex_t
;
539 typedef pthread_mutex_t malloc_spinlock_t
;
541 /* XXX these should #ifdef these for freebsd (and linux?) only */
545 typedef malloc_spinlock_t malloc_mutex_t
;
548 /* Set to true once the allocator has been initialized. */
549 static bool malloc_initialized
= false;
552 /* No init lock for Windows. */
553 #elif defined(DARWIN)
554 static malloc_mutex_t init_lock
= {OS_SPINLOCK_INIT
};
555 #elif defined(LINUX_HOST)
556 static malloc_mutex_t init_lock
= PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
;
557 #elif defined(USE_JEMALLOC)
558 static malloc_mutex_t init_lock
= PTHREAD_MUTEX_INITIALIZER
;
560 static malloc_mutex_t init_lock
= {_SPINLOCK_INITIALIZER
};
563 /******************************************************************************/
565 * Statistics data structures.
570 /* Borrowed from malloc.h, as this is Linux specific. This has been
571 * added to jemalloc so the existing memory profiling in Gnash will
572 * continue to work. Most of these fields aren't used by the Gnash
573 * memory profiling, but we leave them here for a semblance of
574 * portability. The only fields Gnash uses are arena, uordblks. and
577 * This gets more interesting, as jemalloc maintains multiple
578 * arenas, one for each CPU in a multiprocessor system. We cheat
579 * by just accumulating the stats for all arenas, since our primary
580 * purpose is to track memory leaks.
583 int arena
; /* non-mmapped space allocated from system */
584 int ordblks
; /* number of free chunks UNUSED */
585 int smblks
; /* number of fastbin blocks UNUSED */
586 int hblks
; /* number of mmapped regions UNUSED */
587 int hblkhd
; /* space in mmapped regions UNUSED */
588 int usmblks
; /* maximum total allocated space UNUSED */
589 int fsmblks
; /* space available in freed fastbin blocks UNUSED */
590 int uordblks
; /* total allocated space */
591 int fordblks
; /* total free space */
592 int keepcost
; /* top-most, releasable space UNUSED */
595 typedef struct malloc_bin_stats_s malloc_bin_stats_t
;
596 struct malloc_bin_stats_s
{
598 * Number of allocation requests that corresponded to the size of this
603 /* Total number of runs created for this bin's size class. */
607 * Total number of runs reused by extracting them from the runs tree for
608 * this bin's size class.
612 /* High-water mark for this bin. */
613 unsigned long highruns
;
615 /* Current number of runs in this bin. */
616 unsigned long curruns
;
619 typedef struct arena_stats_s arena_stats_t
;
620 struct arena_stats_s
{
621 /* Number of bytes currently mapped. */
625 * Total number of purge sweeps, total number of madvise calls made,
626 * and total pages purged in order to keep dirty unused memory under
632 #ifdef MALLOC_DECOMMIT
634 * Total number of decommit/commit operations, and total number of
639 uint64_t decommitted
;
642 /* Per-size-category statistics. */
643 size_t allocated_small
;
644 uint64_t nmalloc_small
;
645 uint64_t ndalloc_small
;
647 size_t allocated_large
;
648 uint64_t nmalloc_large
;
649 uint64_t ndalloc_large
;
651 #ifdef MALLOC_BALANCE
652 /* Number of times this arena reassigned a thread due to contention. */
657 typedef struct chunk_stats_s chunk_stats_t
;
658 struct chunk_stats_s
{
659 /* Number of chunks that were allocated. */
662 /* High-water mark for number of chunks allocated. */
663 unsigned long highchunks
;
666 * Current number of chunks allocated. This value isn't maintained for
667 * any other purpose, so keep track of it in order to be able to set
670 unsigned long curchunks
;
673 #endif /* #ifdef MALLOC_STATS */
675 /******************************************************************************/
677 * Extent data structures.
680 /* Tree of extents. */
681 typedef struct extent_node_s extent_node_t
;
682 struct extent_node_s
{
683 /* Linkage for the size/address-ordered tree. */
684 RB_ENTRY(extent_node_s
) link_szad
;
686 /* Linkage for the address-ordered tree. */
687 RB_ENTRY(extent_node_s
) link_ad
;
689 /* Pointer to the extent that this tree node is responsible for. */
692 /* Total region size. */
695 typedef struct extent_tree_szad_s extent_tree_szad_t
;
696 RB_HEAD(extent_tree_szad_s
, extent_node_s
);
697 typedef struct extent_tree_ad_s extent_tree_ad_t
;
698 RB_HEAD(extent_tree_ad_s
, extent_node_s
);
700 /******************************************************************************/
702 * Arena data structures.
705 typedef struct arena_s arena_t
;
706 typedef struct arena_bin_s arena_bin_t
;
709 * Each map element contains several flags, plus page position for runs that
710 * service small allocations.
712 typedef uint8_t arena_chunk_map_t
;
713 #define CHUNK_MAP_UNTOUCHED 0x80U
714 #define CHUNK_MAP_DIRTY 0x40U
715 #define CHUNK_MAP_LARGE 0x20U
716 #ifdef MALLOC_DECOMMIT
717 #define CHUNK_MAP_DECOMMITTED 0x10U
718 #define CHUNK_MAP_POS_MASK 0x0fU
720 #define CHUNK_MAP_POS_MASK 0x1fU
723 /* Arena chunk header. */
724 typedef struct arena_chunk_s arena_chunk_t
;
725 struct arena_chunk_s
{
726 /* Arena that owns the chunk. */
729 /* Linkage for the arena's chunk tree. */
730 RB_ENTRY(arena_chunk_s
) link
;
733 * Number of pages in use. This is maintained in order to make
734 * detection of empty chunks fast.
738 /* Number of dirty pages. */
742 * Tree of extent nodes that are embedded in the arena chunk header
743 * page(s). These nodes are used by arena_chunk_node_alloc().
745 extent_tree_ad_t nodes
;
746 extent_node_t
*nodes_past
;
749 * Map of pages within chunk that keeps track of free/large/small. For
750 * free runs, only the map entries for the first and last pages are
751 * kept up to date, so that free runs can be quickly coalesced.
753 arena_chunk_map_t map
[1]; /* Dynamically sized. */
755 typedef struct arena_chunk_tree_s arena_chunk_tree_t
;
756 RB_HEAD(arena_chunk_tree_s
, arena_chunk_s
);
758 typedef struct arena_run_s arena_run_t
;
760 /* Linkage for run trees. */
761 RB_ENTRY(arena_run_s
) link
;
765 # define ARENA_RUN_MAGIC 0x384adf93
768 /* Bin this run is associated with. */
771 /* Index of first element that might have a free region. */
772 unsigned regs_minelm
;
774 /* Number of free regions in run. */
777 /* Bitmask of in-use regions (0: in use, 1: free). */
778 unsigned regs_mask
[1]; /* Dynamically sized. */
780 typedef struct arena_run_tree_s arena_run_tree_t
;
781 RB_HEAD(arena_run_tree_s
, arena_run_s
);
785 * Current run being used to service allocations of this bin's size
791 * Tree of non-full runs. This tree is used when looking for an
792 * existing run when runcur is no longer usable. We choose the
793 * non-full run that is lowest in memory; this policy tends to keep
794 * objects packed well, and it can also help reduce the number of
795 * almost-empty chunks.
797 arena_run_tree_t runs
;
799 /* Size of regions in a run for this bin's size class. */
802 /* Total size of a run for this bin's size class. */
805 /* Total number of regions in a run for this bin's size class. */
808 /* Number of elements in a run's regs_mask for this bin's size class. */
809 uint32_t regs_mask_nelms
;
811 /* Offset of first region in a run for this bin's size class. */
812 uint32_t reg0_offset
;
815 /* Bin statistics. */
816 malloc_bin_stats_t stats
;
823 # define ARENA_MAGIC 0x947d3d24
826 /* All operations on this arena require that lock be locked. */
827 malloc_spinlock_t lock
;
834 * Tree of chunks this arena manages.
836 arena_chunk_tree_t chunks
;
839 * In order to avoid rapid chunk allocation/deallocation when an arena
840 * oscillates right on the cusp of needing a new chunk, cache the most
841 * recently freed chunk. The spare is left in the arena's chunk tree
842 * until it is deleted.
844 * There is one spare chunk per arena, rather than one spare total, in
845 * order to avoid interactions between multiple threads that could make
846 * a single spare inadequate.
848 arena_chunk_t
*spare
;
851 * Current count of pages within unused runs that are potentially
852 * dirty, and for which madvise(... MADV_FREE) has not been called. By
853 * tracking this, we can institute a limit on how much dirty unused
854 * memory is mapped for each arena.
859 * Trees of this arena's available runs. Two trees are maintained
860 * using one set of nodes, since one is needed for first-best-fit run
861 * allocation, and the other is needed for coalescing.
863 extent_tree_szad_t runs_avail_szad
;
864 extent_tree_ad_t runs_avail_ad
;
866 /* Tree of this arena's allocated (in-use) runs. */
867 extent_tree_ad_t runs_alloced_ad
;
869 #ifdef MALLOC_BALANCE
871 * The arena load balancing machinery needs to keep track of how much
872 * lock contention there is. This value is exponentially averaged.
877 #ifdef MALLOC_LAZY_FREE
879 * Deallocation of small objects can be lazy, in which case free_cache
880 * stores pointers to those objects that have not yet been deallocated.
881 * In order to avoid lock contention, slots are chosen randomly. Empty
882 * slots contain NULL.
888 * bins is used to store rings of free regions of the following sizes,
889 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
910 arena_bin_t bins
[1]; /* Dynamically sized. */
913 /******************************************************************************/
918 /* Number of CPUs. */
919 static unsigned ncpus
;
922 static size_t pagesize
;
923 static size_t pagesize_mask
;
924 static size_t pagesize_2pow
;
926 /* Various bin-related settings. */
927 static size_t bin_maxclass
; /* Max size class for bins. */
928 static unsigned ntbins
; /* Number of (2^n)-spaced tiny bins. */
929 static unsigned nqbins
; /* Number of quantum-spaced bins. */
930 static unsigned nsbins
; /* Number of (2^n)-spaced sub-page bins. */
931 static size_t small_min
;
932 static size_t small_max
;
934 /* Various quantum-related settings. */
935 static size_t quantum
;
936 static size_t quantum_mask
; /* (quantum - 1). */
938 /* Various chunk-related settings. */
939 static size_t chunksize
;
940 static size_t chunksize_mask
; /* (chunksize - 1). */
941 static size_t chunk_npages
;
942 static size_t arena_chunk_header_npages
;
943 static size_t arena_maxclass
; /* Max size class for arenas. */
950 /* Protects chunk-related data structures. */
951 static malloc_mutex_t huge_mtx
;
953 /* Tree of chunks that are stand-alone huge allocations. */
954 static extent_tree_ad_t huge
;
958 * Protects sbrk() calls. This avoids malloc races among threads, though it
959 * does not protect against races with threads that call sbrk() directly.
961 static malloc_mutex_t dss_mtx
;
962 /* Base address of the DSS. */
963 static void *dss_base
;
964 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
965 static void *dss_prev
;
966 /* Current upper limit on DSS addresses. */
967 static void *dss_max
;
970 * Trees of chunks that were previously allocated (trees differ only in node
971 * ordering). These are used when allocating chunks, in an attempt to re-use
972 * address space. Depending on function, different tree orderings are needed,
973 * which is why there are two trees with the same contents.
975 static extent_tree_szad_t dss_chunks_szad
;
976 static extent_tree_ad_t dss_chunks_ad
;
980 /* Huge allocation statistics. */
981 static uint64_t huge_nmalloc
;
982 static uint64_t huge_ndalloc
;
983 static size_t huge_allocated
;
986 /****************************/
988 * base (internal allocation).
992 * Current pages that are being used for internal memory allocations. These
993 * pages are carved up in cacheline-size quanta, so that there is no chance of
994 * false cache line sharing.
996 static void *base_pages
;
997 static void *base_next_addr
;
998 static void *base_past_addr
; /* Addr immediately past base_pages. */
999 static extent_node_t
*base_nodes
;
1000 static malloc_mutex_t base_mtx
;
1002 static size_t base_mapped
;
1011 * Arenas that are used to service external requests. Not all elements of the
1012 * arenas array are necessarily used; arenas are created lazily as needed.
1014 static arena_t
**arenas
;
1015 static unsigned narenas
;
1017 # ifdef MALLOC_BALANCE
1018 static unsigned narenas_2pow
;
1020 static unsigned next_arena
;
1023 static malloc_spinlock_t arenas_lock
; /* Protects arenas initialization. */
1027 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1030 #ifdef HAVE_LOCAL_THREAD_STORAGE
1031 static __thread arena_t
*arenas_map
;
1036 /* Chunk statistics. */
1037 static chunk_stats_t stats_chunks
;
1040 /*******************************/
1042 * Runtime configuration options.
1044 const char *_malloc_options
1047 #elif (defined(DARWIN))
1049 #elif (defined(LINUX_HOST))
1054 #ifndef MALLOC_PRODUCTION
1055 static bool opt_abort
= true;
1057 static bool opt_junk
= true;
1060 static bool opt_abort
= false;
1062 static bool opt_junk
= false;
1066 static bool opt_dss
= true;
1067 static bool opt_mmap
= true;
1069 static size_t opt_dirty_max
= DIRTY_MAX_DEFAULT
;
1070 #ifdef MALLOC_LAZY_FREE
1071 static int opt_lazy_free_2pow
= LAZY_FREE_2POW_DEFAULT
;
1073 #ifdef MALLOC_BALANCE
1074 static uint64_t opt_balance_threshold
= BALANCE_THRESHOLD_DEFAULT
;
1077 * this toggles the printing of statistics when the program exists.
1079 static bool opt_print_stats
= false;
1080 static size_t opt_quantum_2pow
= QUANTUM_2POW_MIN
;
1081 static size_t opt_small_max_2pow
= SMALL_MAX_2POW_DEFAULT
;
1082 static size_t opt_chunk_2pow
= CHUNK_2POW_DEFAULT
;
1083 #ifdef MALLOC_UTRACE
1084 static bool opt_utrace
= false;
1087 static bool opt_sysv
= false;
1089 #ifdef MALLOC_XMALLOC
1090 static bool opt_xmalloc
= false;
1093 static bool opt_zero
= false;
1095 static int opt_narenas_lshift
= 0;
1097 #ifdef MALLOC_UTRACE
1104 #define UTRACE(a, b, c) \
1106 malloc_utrace_t ut; \
1110 utrace(&ut, sizeof(ut)); \
1113 #define UTRACE(a, b, c)
1116 /******************************************************************************/
1118 * Begin function prototypes for non-inline static functions.
1121 static bool malloc_mutex_init(malloc_mutex_t
*mutex
);
1122 static bool malloc_spin_init(malloc_spinlock_t
*lock
);
1123 static void wrtmessage(const char *p1
, const char *p2
, const char *p3
,
1127 /* Avoid namespace collision with OS X's malloc APIs. */
1128 #define malloc_printf xmalloc_printf
1130 static void malloc_printf(const char *format
, ...);
1132 static char *umax2s(uintmax_t x
, char *s
);
1134 static bool base_pages_alloc_dss(size_t minsize
);
1136 static bool base_pages_alloc_mmap(size_t minsize
);
1137 static bool base_pages_alloc(size_t minsize
);
1138 static void *base_alloc(size_t size
);
1139 static void *base_calloc(size_t number
, size_t size
);
1140 static extent_node_t
*base_node_alloc(void);
1141 static void base_node_dealloc(extent_node_t
*node
);
1143 static void stats_print(arena_t
*arena
);
1145 static void *pages_map(void *addr
, size_t size
);
1146 static void pages_unmap(void *addr
, size_t size
);
1148 static void *chunk_alloc_dss(size_t size
);
1149 static void *chunk_recycle_dss(size_t size
, bool zero
);
1151 static void *chunk_alloc_mmap(size_t size
);
1152 static void *chunk_alloc(size_t size
, bool zero
);
1154 static extent_node_t
*chunk_dealloc_dss_record(void *chunk
, size_t size
);
1155 static bool chunk_dealloc_dss(void *chunk
, size_t size
);
1157 static void chunk_dealloc_mmap(void *chunk
, size_t size
);
1158 static void chunk_dealloc(void *chunk
, size_t size
);
1160 static arena_t
*choose_arena_hard(void);
1162 static extent_node_t
*arena_chunk_node_alloc(arena_chunk_t
*chunk
);
1163 static void arena_chunk_node_dealloc(arena_chunk_t
*chunk
,
1164 extent_node_t
*node
);
1165 static void arena_run_split(arena_t
*arena
, arena_run_t
*run
, size_t size
,
1166 bool small
, bool zero
);
1167 static arena_chunk_t
*arena_chunk_alloc(arena_t
*arena
);
1168 static void arena_chunk_dealloc(arena_t
*arena
, arena_chunk_t
*chunk
);
1169 static arena_run_t
*arena_run_alloc(arena_t
*arena
, size_t size
, bool small
,
1171 static void arena_purge(arena_t
*arena
);
1172 static void arena_run_dalloc(arena_t
*arena
, arena_run_t
*run
, bool dirty
);
1173 static void arena_run_trim_head(arena_t
*arena
, arena_chunk_t
*chunk
,
1174 extent_node_t
*nodeB
, arena_run_t
*run
, size_t oldsize
, size_t newsize
);
1175 static void arena_run_trim_tail(arena_t
*arena
, arena_chunk_t
*chunk
,
1176 extent_node_t
*nodeA
, arena_run_t
*run
, size_t oldsize
, size_t newsize
,
1178 static arena_run_t
*arena_bin_nonfull_run_get(arena_t
*arena
, arena_bin_t
*bin
);
1179 static void *arena_bin_malloc_hard(arena_t
*arena
, arena_bin_t
*bin
);
1180 static size_t arena_bin_run_size_calc(arena_bin_t
*bin
, size_t min_run_size
);
1181 #ifdef MALLOC_BALANCE
1182 static void arena_lock_balance_hard(arena_t
*arena
);
1184 static void *arena_malloc_large(arena_t
*arena
, size_t size
, bool zero
);
1185 static void *arena_palloc(arena_t
*arena
, size_t alignment
, size_t size
,
1187 static size_t arena_salloc(const void *ptr
);
1188 #ifdef MALLOC_LAZY_FREE
1189 static void arena_dalloc_lazy_hard(arena_t
*arena
, arena_chunk_t
*chunk
,
1190 void *ptr
, size_t pageind
, arena_chunk_map_t
*mapelm
);
1192 static void arena_dalloc_large(arena_t
*arena
, arena_chunk_t
*chunk
,
1194 static void arena_ralloc_large_shrink(arena_t
*arena
, arena_chunk_t
*chunk
,
1195 void *ptr
, size_t size
, size_t oldsize
);
1196 static bool arena_ralloc_large_grow(arena_t
*arena
, arena_chunk_t
*chunk
,
1197 void *ptr
, size_t size
, size_t oldsize
);
1198 static bool arena_ralloc_large(void *ptr
, size_t size
, size_t oldsize
);
1199 static void *arena_ralloc(void *ptr
, size_t size
, size_t oldsize
);
1200 static bool arena_new(arena_t
*arena
);
1201 static arena_t
*arenas_extend(unsigned ind
);
1202 static void *huge_malloc(size_t size
, bool zero
);
1203 static void *huge_palloc(size_t alignment
, size_t size
);
1204 static void *huge_ralloc(void *ptr
, size_t size
, size_t oldsize
);
1205 static void huge_dalloc(void *ptr
);
1206 static void malloc_print_stats(void);
1210 bool malloc_init_hard(void);
1213 * End function prototypes.
1215 /******************************************************************************/
1217 * Begin mutex. We can't use normal pthread mutexes in all places, because
1218 * they require malloc()ed memory, which causes bootstrapping issues in some
1223 malloc_mutex_init(malloc_mutex_t
*mutex
)
1227 if (! __crtInitCritSecAndSpinCount(mutex
, _CRT_SPINCOUNT
))
1229 #elif defined(DARWIN)
1230 mutex
->lock
= OS_SPINLOCK_INIT
;
1231 #elif defined(LINUX_HOST)
1232 pthread_mutexattr_t attr
;
1233 if (pthread_mutexattr_init(&attr
) != 0)
1235 pthread_mutexattr_settype(&attr
, PTHREAD_MUTEX_ADAPTIVE_NP
);
1236 if (pthread_mutex_init(mutex
, &attr
) != 0) {
1237 pthread_mutexattr_destroy(&attr
);
1240 pthread_mutexattr_destroy(&attr
);
1241 #elif defined(USE_JEMALLOC)
1242 if (pthread_mutex_init(mutex
, NULL
) != 0)
1245 static const spinlock_t lock
= _SPINLOCK_INITIALIZER
;
1253 malloc_mutex_lock(malloc_mutex_t
*mutex
)
1257 EnterCriticalSection(mutex
);
1258 #elif defined(DARWIN)
1259 OSSpinLockLock(&mutex
->lock
);
1260 #elif defined(USE_JEMALLOC)
1261 pthread_mutex_lock(mutex
);
1264 _SPINLOCK(&mutex
->lock
);
1269 malloc_mutex_unlock(malloc_mutex_t
*mutex
)
1273 LeaveCriticalSection(mutex
);
1274 #elif defined(DARWIN)
1275 OSSpinLockUnlock(&mutex
->lock
);
1276 #elif defined(USE_JEMALLOC)
1277 pthread_mutex_unlock(mutex
);
1280 _SPINUNLOCK(&mutex
->lock
);
1285 malloc_spin_init(malloc_spinlock_t
*lock
)
1289 if (! __crtInitCritSecAndSpinCount(lock
, _CRT_SPINCOUNT
))
1291 #elif defined(DARWIN)
1292 lock
->lock
= OS_SPINLOCK_INIT
;
1293 #elif defined(LINUX_HOST)
1294 pthread_mutexattr_t attr
;
1295 if (pthread_mutexattr_init(&attr
) != 0)
1297 pthread_mutexattr_settype(&attr
, PTHREAD_MUTEX_ADAPTIVE_NP
);
1298 if (pthread_mutex_init(lock
, &attr
) != 0) {
1299 pthread_mutexattr_destroy(&attr
);
1302 pthread_mutexattr_destroy(&attr
);
1303 #elif defined(USE_JEMALLOC)
1304 if (pthread_mutex_init(lock
, NULL
) != 0)
1307 lock
->lock
= _SPINLOCK_INITIALIZER
;
1313 malloc_spin_lock(malloc_spinlock_t
*lock
)
1317 EnterCriticalSection(lock
);
1318 #elif defined(DARWIN)
1319 OSSpinLockLock(&lock
->lock
);
1320 #elif defined(USE_JEMALLOC)
1321 pthread_mutex_lock(lock
);
1324 _SPINLOCK(&lock
->lock
);
1329 malloc_spin_unlock(malloc_spinlock_t
*lock
)
1332 LeaveCriticalSection(lock
);
1333 #elif defined(DARWIN)
1334 OSSpinLockUnlock(&lock
->lock
);
1335 #elif defined(USE_JEMALLOC)
1336 pthread_mutex_unlock(lock
);
1339 _SPINUNLOCK(&lock
->lock
);
1346 /******************************************************************************/
1348 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
1349 * after a period of spinning, because unbounded spinning would allow for
1350 * priority inversion.
1354 # define malloc_spin_init malloc_mutex_init
1355 # define malloc_spin_lock malloc_mutex_lock
1356 # define malloc_spin_unlock malloc_mutex_unlock
1363 /******************************************************************************/
1365 * Begin Utility functions/macros.
1368 /* Return the chunk address for allocation address a. */
1369 #define CHUNK_ADDR2BASE(a) \
1370 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1372 /* Return the chunk offset of address a. */
1373 #define CHUNK_ADDR2OFFSET(a) \
1374 ((size_t)((uintptr_t)(a) & chunksize_mask))
1376 /* Return the smallest chunk multiple that is >= s. */
1377 #define CHUNK_CEILING(s) \
1378 (((s) + chunksize_mask) & ~chunksize_mask)
1380 /* Return the smallest cacheline multiple that is >= s. */
1381 #define CACHELINE_CEILING(s) \
1382 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
1384 /* Return the smallest quantum multiple that is >= a. */
1385 #define QUANTUM_CEILING(a) \
1386 (((a) + quantum_mask) & ~quantum_mask)
1388 /* Return the smallest pagesize multiple that is >= s. */
1389 #define PAGE_CEILING(s) \
1390 (((s) + pagesize_mask) & ~pagesize_mask)
1392 /* Compute the smallest power of 2 that is >= x. */
1393 static inline size_t
1403 #if (SIZEOF_PTR == 8)
1410 #if (defined(MALLOC_LAZY_FREE) || defined(MALLOC_BALANCE))
1412 * Use a simple linear congruential pseudo-random number generator:
1414 * prn(y) = (a*x + c) % m
1416 * where the following constants ensure maximal period:
1418 * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1419 * c == Odd number (relatively prime to 2^n).
1422 * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1424 * This choice of m has the disadvantage that the quality of the bits is
1425 * proportional to bit position. For example. the lowest bit has a cycle of 2,
1426 * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
1429 # define PRN_DEFINE(suffix, var, a, c) \
1430 static inline void \
1431 sprn_##suffix(uint32_t seed) \
1436 static inline uint32_t \
1437 prn_##suffix(uint32_t lg_range) \
1441 assert(lg_range > 0); \
1442 assert(lg_range <= 32); \
1444 x = (var * (a)) + (c); \
1446 ret = x >> (32 - lg_range); \
1450 # define SPRN(suffix, seed) sprn_##suffix(seed)
1451 # define PRN(suffix, lg_range) prn_##suffix(lg_range)
1455 * Define PRNGs, one for each purpose, in order to avoid auto-correlation
1459 #ifdef MALLOC_LAZY_FREE
1460 /* Define the per-thread PRNG used for lazy deallocation. */
1461 static __thread
uint32_t lazy_free_x
;
1462 PRN_DEFINE(lazy_free
, lazy_free_x
, 12345, 12347)
1465 #ifdef MALLOC_BALANCE
1466 /* Define the PRNG used for arena assignment. */
1467 static __thread
uint32_t balance_x
;
1468 PRN_DEFINE(balance
, balance_x
, 1297, 1301)
1471 #ifdef MALLOC_UTRACE
1473 utrace(const void *addr
, size_t len
)
1475 malloc_utrace_t
*ut
= (malloc_utrace_t
*)addr
;
1477 assert(len
== sizeof(malloc_utrace_t
));
1479 if (ut
->p
== NULL
&& ut
->s
== 0 && ut
->r
== NULL
)
1480 malloc_printf("%d x USER malloc_init()\n", getpid());
1481 else if (ut
->p
== NULL
&& ut
->r
!= NULL
) {
1482 malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut
->r
,
1484 } else if (ut
->p
!= NULL
&& ut
->r
!= NULL
) {
1485 malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(),
1486 ut
->r
, ut
->p
, ut
->s
);
1488 malloc_printf("%d x USER free(%p)\n", getpid(), ut
->p
);
1494 static inline const char *
1498 return ("<jemalloc>");
1502 wrtmessage(const char *p1
, const char *p2
, const char *p3
, const char *p4
)
1505 #define _write write
1507 size_t ret
=_write(STDERR_FILENO
, p1
, (unsigned int) strlen(p1
));
1508 ret
= _write(STDERR_FILENO
, p2
, (unsigned int) strlen(p2
));
1509 ret
= _write(STDERR_FILENO
, p3
, (unsigned int) strlen(p3
));
1510 ret
= _write(STDERR_FILENO
, p4
, (unsigned int) strlen(p4
));
1513 #define _malloc_message malloc_message
1515 void (*_malloc_message
)(const char *p1
, const char *p2
, const char *p3
,
1516 const char *p4
) = wrtmessage
;
1520 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1523 malloc_printf(const char *format
, ...)
1528 va_start(ap
, format
);
1529 vsnprintf(buf
, sizeof(buf
), format
, ap
);
1531 _malloc_message(buf
, "", "", "");
1536 * We don't want to depend on vsnprintf() for production builds, since that can
1537 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1538 * integer printing functionality, so that malloc_printf() use can be limited to
1539 * MALLOC_STATS code.
1541 #define UMAX2S_BUFSIZE 21
1543 umax2s(uintmax_t x
, char *s
)
1547 /* Make sure UMAX2S_BUFSIZE is large enough. */
1548 assert(sizeof(uintmax_t) <= 8);
1550 i
= UMAX2S_BUFSIZE
- 1;
1554 s
[i
] = "0123456789"[x
% 10];
1561 /******************************************************************************/
1565 base_pages_alloc_dss(size_t minsize
)
1569 * Do special DSS allocation here, since base allocations don't need to
1572 malloc_mutex_lock(&dss_mtx
);
1573 if (dss_prev
!= (void *)-1) {
1575 size_t csize
= CHUNK_CEILING(minsize
);
1578 /* Get the current end of the DSS. */
1582 * Calculate how much padding is necessary to
1583 * chunk-align the end of the DSS. Don't worry about
1584 * dss_max not being chunk-aligned though.
1586 incr
= (intptr_t)chunksize
1587 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max
);
1589 if ((size_t)incr
< minsize
)
1592 dss_prev
= sbrk(incr
);
1593 if (dss_prev
== dss_max
) {
1595 dss_max
= (void *)((intptr_t)dss_prev
+ incr
);
1596 base_pages
= dss_prev
;
1597 base_next_addr
= base_pages
;
1598 base_past_addr
= dss_max
;
1600 base_mapped
+= incr
;
1602 malloc_mutex_unlock(&dss_mtx
);
1605 } while (dss_prev
!= (void *)-1);
1607 malloc_mutex_unlock(&dss_mtx
);
1614 base_pages_alloc_mmap(size_t minsize
)
1618 assert(minsize
!= 0);
1619 csize
= PAGE_CEILING(minsize
);
1620 base_pages
= pages_map(NULL
, csize
);
1621 if (base_pages
== NULL
)
1623 base_next_addr
= base_pages
;
1624 base_past_addr
= (void *)((uintptr_t)base_pages
+ csize
);
1626 base_mapped
+= csize
;
1633 base_pages_alloc(size_t minsize
)
1638 if (base_pages_alloc_dss(minsize
) == false)
1642 if (opt_mmap
&& minsize
!= 0)
1645 if (base_pages_alloc_mmap(minsize
) == false)
1653 base_alloc(size_t size
)
1658 /* Round size up to nearest multiple of the cacheline size. */
1659 csize
= CACHELINE_CEILING(size
);
1661 malloc_mutex_lock(&base_mtx
);
1662 /* Make sure there's enough space for the allocation. */
1663 if ((uintptr_t)base_next_addr
+ csize
> (uintptr_t)base_past_addr
) {
1664 if (base_pages_alloc(csize
))
1668 ret
= base_next_addr
;
1669 base_next_addr
= (void *)((uintptr_t)base_next_addr
+ csize
);
1670 malloc_mutex_unlock(&base_mtx
);
1676 base_calloc(size_t number
, size_t size
)
1680 ret
= base_alloc(number
* size
);
1681 memset(ret
, 0, number
* size
);
1686 static extent_node_t
*
1687 base_node_alloc(void)
1691 malloc_mutex_lock(&base_mtx
);
1692 if (base_nodes
!= NULL
) {
1694 base_nodes
= *(extent_node_t
**)ret
;
1695 malloc_mutex_unlock(&base_mtx
);
1697 malloc_mutex_unlock(&base_mtx
);
1698 ret
= (extent_node_t
*)base_alloc(sizeof(extent_node_t
));
1705 base_node_dealloc(extent_node_t
*node
)
1708 malloc_mutex_lock(&base_mtx
);
1709 *(extent_node_t
**)node
= base_nodes
;
1711 malloc_mutex_unlock(&base_mtx
);
1714 /******************************************************************************/
1718 stats_print(arena_t
*arena
)
1720 unsigned i
, gap_start
;
1723 malloc_printf("dirty: %Iu page%s dirty, %I64u sweep%s,"
1724 " %I64u madvise%s, %I64u page%s purged\n",
1725 arena
->ndirty
, arena
->ndirty
== 1 ? "" : "s",
1726 arena
->stats
.npurge
, arena
->stats
.npurge
== 1 ? "" : "s",
1727 arena
->stats
.nmadvise
, arena
->stats
.nmadvise
== 1 ? "" : "s",
1728 arena
->stats
.purged
, arena
->stats
.purged
== 1 ? "" : "s");
1729 # ifdef MALLOC_DECOMMIT
1730 malloc_printf("decommit: %I64u decommit%s, %I64u commit%s,"
1731 " %I64u page%s decommitted\n",
1732 arena
->stats
.ndecommit
, (arena
->stats
.ndecommit
== 1) ? "" : "s",
1733 arena
->stats
.ncommit
, (arena
->stats
.ncommit
== 1) ? "" : "s",
1734 arena
->stats
.decommitted
,
1735 (arena
->stats
.decommitted
== 1) ? "" : "s");
1738 malloc_printf(" allocated nmalloc ndalloc\n");
1739 malloc_printf("small: %12Iu %12I64u %12I64u\n",
1740 arena
->stats
.allocated_small
, arena
->stats
.nmalloc_small
,
1741 arena
->stats
.ndalloc_small
);
1742 malloc_printf("large: %12Iu %12I64u %12I64u\n",
1743 arena
->stats
.allocated_large
, arena
->stats
.nmalloc_large
,
1744 arena
->stats
.ndalloc_large
);
1745 malloc_printf("total: %12Iu %12I64u %12I64u\n",
1746 arena
->stats
.allocated_small
+ arena
->stats
.allocated_large
,
1747 arena
->stats
.nmalloc_small
+ arena
->stats
.nmalloc_large
,
1748 arena
->stats
.ndalloc_small
+ arena
->stats
.ndalloc_large
);
1749 malloc_printf("mapped: %12Iu\n", arena
->stats
.mapped
);
1751 malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1752 " %llu madvise%s, %llu page%s purged\n",
1753 arena
->ndirty
, arena
->ndirty
== 1 ? "" : "s",
1754 arena
->stats
.npurge
, arena
->stats
.npurge
== 1 ? "" : "s",
1755 arena
->stats
.nmadvise
, arena
->stats
.nmadvise
== 1 ? "" : "s",
1756 arena
->stats
.purged
, arena
->stats
.purged
== 1 ? "" : "s");
1757 # ifdef MALLOC_DECOMMIT
1758 malloc_printf("decommit: %llu decommit%s, %llu commit%s,"
1759 " %llu page%s decommitted\n",
1760 arena
->stats
.ndecommit
, (arena
->stats
.ndecommit
== 1) ? "" : "s",
1761 arena
->stats
.ncommit
, (arena
->stats
.ncommit
== 1) ? "" : "s",
1762 arena
->stats
.decommitted
,
1763 (arena
->stats
.decommitted
== 1) ? "" : "s");
1766 malloc_printf(" allocated nmalloc ndalloc\n");
1767 malloc_printf("small: %12zu %12llu %12llu\n",
1768 arena
->stats
.allocated_small
, arena
->stats
.nmalloc_small
,
1769 arena
->stats
.ndalloc_small
);
1770 malloc_printf("large: %12zu %12llu %12llu\n",
1771 arena
->stats
.allocated_large
, arena
->stats
.nmalloc_large
,
1772 arena
->stats
.ndalloc_large
);
1773 malloc_printf("total: %12zu %12llu %12llu\n",
1774 arena
->stats
.allocated_small
+ arena
->stats
.allocated_large
,
1775 arena
->stats
.nmalloc_small
+ arena
->stats
.nmalloc_large
,
1776 arena
->stats
.ndalloc_small
+ arena
->stats
.ndalloc_large
);
1777 malloc_printf("mapped: %12zu\n", arena
->stats
.mapped
);
1779 malloc_printf("bins: bin size regs pgs requests newruns"
1780 " reruns maxruns curruns\n");
1781 for (i
= 0, gap_start
= UINT_MAX
; i
< ntbins
+ nqbins
+ nsbins
; i
++) {
1782 if (arena
->bins
[i
].stats
.nrequests
== 0) {
1783 if (gap_start
== UINT_MAX
)
1786 if (gap_start
!= UINT_MAX
) {
1787 if (i
> gap_start
+ 1) {
1788 /* Gap of more than one size class. */
1789 malloc_printf("[%u..%u]\n",
1792 /* Gap of one size class. */
1793 malloc_printf("[%u]\n", gap_start
);
1795 gap_start
= UINT_MAX
;
1799 "%13u %1s %4u %4u %3u %9I64u %9I64u"
1800 " %9I64u %7u %7u\n",
1802 "%13u %1s %4u %4u %3u %9llu %9llu"
1803 " %9llu %7lu %7lu\n",
1806 i
< ntbins
? "T" : i
< ntbins
+ nqbins
? "Q" : "S",
1807 arena
->bins
[i
].reg_size
,
1808 arena
->bins
[i
].nregs
,
1809 arena
->bins
[i
].run_size
>> pagesize_2pow
,
1810 arena
->bins
[i
].stats
.nrequests
,
1811 arena
->bins
[i
].stats
.nruns
,
1812 arena
->bins
[i
].stats
.reruns
,
1813 arena
->bins
[i
].stats
.highruns
,
1814 arena
->bins
[i
].stats
.curruns
);
1817 if (gap_start
!= UINT_MAX
) {
1818 if (i
> gap_start
+ 1) {
1819 /* Gap of more than one size class. */
1820 malloc_printf("[%u..%u]\n", gap_start
, i
- 1);
1822 /* Gap of one size class. */
1823 malloc_printf("[%u]\n", gap_start
);
1830 * End Utility functions/macros.
1832 /******************************************************************************/
1834 * Begin extent tree code.
1838 extent_szad_comp(extent_node_t
*a
, extent_node_t
*b
)
1841 size_t a_size
= a
->size
;
1842 size_t b_size
= b
->size
;
1844 ret
= (a_size
> b_size
) - (a_size
< b_size
);
1846 uintptr_t a_addr
= (uintptr_t)a
->addr
;
1847 uintptr_t b_addr
= (uintptr_t)b
->addr
;
1849 ret
= (a_addr
> b_addr
) - (a_addr
< b_addr
);
1855 /* Generate red-black tree code for size/address-ordered extents. */
1856 RB_GENERATE_STATIC(extent_tree_szad_s
, extent_node_s
, link_szad
,
1860 extent_ad_comp(extent_node_t
*a
, extent_node_t
*b
)
1862 uintptr_t a_addr
= (uintptr_t)a
->addr
;
1863 uintptr_t b_addr
= (uintptr_t)b
->addr
;
1865 return ((a_addr
> b_addr
) - (a_addr
< b_addr
));
1868 /* Generate red-black tree code for address-ordered extents. */
1869 RB_GENERATE_STATIC(extent_tree_ad_s
, extent_node_s
, link_ad
, extent_ad_comp
)
1873 * End extent tree code.
1875 /******************************************************************************/
1877 * Begin chunk management functions.
1882 pages_map(void *addr
, size_t size
)
1886 ret
= VirtualAlloc(addr
, size
, MEM_COMMIT
| MEM_RESERVE
,
1893 pages_unmap(void *addr
, size_t size
)
1896 if (VirtualFree(addr
, 0, MEM_RELEASE
) == 0) {
1897 _malloc_message(_getprogname(),
1898 ": (malloc) Error in VirtualFree()\n", "", "");
1903 #elif (defined(DARWIN))
1905 pages_map(void *addr
, size_t size
)
1915 flags
= VM_FLAGS_ANYWHERE
;
1917 err
= vm_allocate((vm_map_t
)mach_task_self(), (vm_address_t
*)&ret
,
1918 (vm_size_t
)size
, flags
);
1919 if (err
!= KERN_SUCCESS
)
1922 assert(ret
== NULL
|| (addr
== NULL
&& ret
!= addr
)
1923 || (addr
!= NULL
&& ret
== addr
));
1928 pages_unmap(void *addr
, size_t size
)
1932 err
= vm_deallocate((vm_map_t
)mach_task_self(), (vm_address_t
)addr
,
1934 if (err
!= KERN_SUCCESS
) {
1935 malloc_message(_getprogname(),
1936 ": (malloc) Error in vm_deallocate(): ",
1937 mach_error_string(err
), "\n");
1943 #define VM_COPY_MIN (pagesize << 5)
1945 pages_copy(void *dest
, const void *src
, size_t n
)
1948 assert((void *)((uintptr_t)dest
& ~pagesize_mask
) == dest
);
1949 assert(n
>= VM_COPY_MIN
);
1950 assert((void *)((uintptr_t)src
& ~pagesize_mask
) == src
);
1952 vm_copy(mach_task_self(), (vm_address_t
)src
, (vm_size_t
)n
,
1953 (vm_address_t
)dest
);
1957 pages_map(void *addr
, size_t size
)
1962 * We don't use MAP_FIXED here, because it can cause the *replacement*
1963 * of existing mappings, and we only want to create new mappings.
1965 ret
= mmap(addr
, size
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANON
,
1967 assert(ret
!= NULL
);
1969 if (ret
== MAP_FAILED
)
1971 else if (addr
!= NULL
&& ret
!= addr
) {
1973 * We succeeded in mapping memory, but not in the right place.
1975 if (munmap(ret
, size
) == -1) {
1976 char buf
[STRERROR_BUF
];
1978 strerror_r(errno
, buf
, sizeof(buf
));
1979 _malloc_message(_getprogname(),
1980 ": (malloc) Error in munmap(): ", buf
, "\n");
1987 assert(ret
== NULL
|| (addr
== NULL
&& ret
!= addr
)
1988 || (addr
!= NULL
&& ret
== addr
));
1993 pages_unmap(void *addr
, size_t size
)
1996 if (munmap(addr
, size
) == -1) {
1997 char buf
[STRERROR_BUF
];
1999 strerror_r(errno
, buf
, sizeof(buf
));
2000 _malloc_message(_getprogname(),
2001 ": (malloc) Error in munmap(): ", buf
, "\n");
2008 #ifdef MALLOC_DECOMMIT
2010 pages_decommit(void *addr
, size_t size
)
2014 VirtualFree(addr
, size
, MEM_DECOMMIT
);
2016 if (mmap(addr
, size
, PROT_NONE
, MAP_FIXED
| MAP_PRIVATE
| MAP_ANON
, -1,
2023 pages_commit(void *addr
, size_t size
)
2027 VirtualAlloc(addr
, size
, MEM_COMMIT
, PAGE_READWRITE
);
2029 if (mmap(addr
, size
, PROT_READ
| PROT_WRITE
, MAP_FIXED
| MAP_PRIVATE
|
2030 MAP_ANON
, -1, 0) == MAP_FAILED
)
2038 chunk_alloc_dss(size_t size
)
2041 malloc_mutex_lock(&dss_mtx
);
2042 if (dss_prev
!= (void *)-1) {
2046 * The loop is necessary to recover from races with other
2047 * threads that are using the DSS for something other than
2053 /* Get the current end of the DSS. */
2057 * Calculate how much padding is necessary to
2058 * chunk-align the end of the DSS.
2060 incr
= (intptr_t)size
2061 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max
);
2062 if (incr
== (intptr_t)size
)
2065 ret
= (void *)((intptr_t)dss_max
+ incr
);
2069 dss_prev
= sbrk(incr
);
2070 if (dss_prev
== dss_max
) {
2072 dss_max
= (void *)((intptr_t)dss_prev
+ incr
);
2073 malloc_mutex_unlock(&dss_mtx
);
2076 } while (dss_prev
!= (void *)-1);
2078 malloc_mutex_unlock(&dss_mtx
);
2084 chunk_recycle_dss(size_t size
, bool zero
)
2086 extent_node_t
*node
, key
;
2090 malloc_mutex_lock(&dss_mtx
);
2091 node
= RB_NFIND(extent_tree_szad_s
, &dss_chunks_szad
, &key
);
2093 void *ret
= node
->addr
;
2095 /* Remove node from the tree. */
2096 RB_REMOVE(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2097 if (node
->size
== size
) {
2098 RB_REMOVE(extent_tree_ad_s
, &dss_chunks_ad
, node
);
2099 base_node_dealloc(node
);
2102 * Insert the remainder of node's address range as a
2103 * smaller chunk. Its position within dss_chunks_ad
2106 assert(node
->size
> size
);
2107 node
->addr
= (void *)((uintptr_t)node
->addr
+ size
);
2109 RB_INSERT(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2111 malloc_mutex_unlock(&dss_mtx
);
2114 memset(ret
, 0, size
);
2117 malloc_mutex_unlock(&dss_mtx
);
2124 static inline void *
2125 chunk_alloc_mmap(size_t size
)
2131 * Windows requires that there be a 1:1 mapping between VM
2132 * allocation/deallocation operations. Therefore, take care here to
2133 * acquire the final result via one mapping operation. This means
2134 * unmapping any preliminary result that is not correctly aligned.
2137 ret
= pages_map(NULL
, size
);
2141 offset
= CHUNK_ADDR2OFFSET(ret
);
2143 /* Deallocate, then try to allocate at (ret + size - offset). */
2144 pages_unmap(ret
, size
);
2145 ret
= pages_map((void *)((uintptr_t)ret
+ size
- offset
), size
);
2146 while (ret
== NULL
) {
2148 * Over-allocate in order to map a memory region that
2149 * is definitely large enough.
2151 ret
= pages_map(NULL
, size
+ chunksize
);
2155 * Deallocate, then allocate the correct size, within
2156 * the over-sized mapping.
2158 offset
= CHUNK_ADDR2OFFSET(ret
);
2159 pages_unmap(ret
, size
+ chunksize
);
2161 ret
= pages_map(ret
, size
);
2163 ret
= pages_map((void *)((uintptr_t)ret
+
2164 chunksize
- offset
), size
);
2167 * Failure here indicates a race with another thread, so
2176 static inline void *
2177 chunk_alloc_mmap(size_t size
)
2183 * Ideally, there would be a way to specify alignment to mmap() (like
2184 * NetBSD has), but in the absence of such a feature, we have to work
2185 * hard to efficiently create aligned mappings. The reliable, but
2186 * expensive method is to create a mapping that is over-sized, then
2187 * trim the excess. However, that always results in at least one call
2190 * A more optimistic approach is to try mapping precisely the right
2191 * amount, then try to append another mapping if alignment is off. In
2192 * practice, this works out well as long as the application is not
2193 * interleaving mappings via direct mmap() calls. If we do run into a
2194 * situation where there is an interleaved mapping and we are unable to
2195 * extend an unaligned mapping, our best option is to momentarily
2196 * revert to the reliable-but-expensive method. This will tend to
2197 * leave a gap in the memory map that is too small to cause later
2198 * problems for the optimistic method.
2201 ret
= pages_map(NULL
, size
);
2205 offset
= CHUNK_ADDR2OFFSET(ret
);
2207 /* Try to extend chunk boundary. */
2208 if (pages_map((void *)((uintptr_t)ret
+ size
),
2209 chunksize
- offset
) == NULL
) {
2211 * Extension failed. Clean up, then revert to the
2212 * reliable-but-expensive method.
2214 pages_unmap(ret
, size
);
2216 /* Beware size_t wrap-around. */
2217 if (size
+ chunksize
<= size
)
2220 ret
= pages_map(NULL
, size
+ chunksize
);
2224 /* Clean up unneeded leading/trailing space. */
2225 offset
= CHUNK_ADDR2OFFSET(ret
);
2227 /* Leading space. */
2228 pages_unmap(ret
, chunksize
- offset
);
2230 ret
= (void *)((uintptr_t)ret
+
2231 (chunksize
- offset
));
2233 /* Trailing space. */
2234 pages_unmap((void *)((uintptr_t)ret
+ size
),
2237 /* Trailing space only. */
2238 pages_unmap((void *)((uintptr_t)ret
+ size
),
2242 /* Clean up unneeded leading space. */
2243 pages_unmap(ret
, chunksize
- offset
);
2244 ret
= (void *)((uintptr_t)ret
+ (chunksize
- offset
));
2253 chunk_alloc(size_t size
, bool zero
)
2258 assert((size
& chunksize_mask
) == 0);
2262 ret
= chunk_recycle_dss(size
, zero
);
2267 ret
= chunk_alloc_dss(size
);
2275 ret
= chunk_alloc_mmap(size
);
2280 /* All strategies for allocation failed. */
2285 stats_chunks
.nchunks
+= (size
/ chunksize
);
2286 stats_chunks
.curchunks
+= (size
/ chunksize
);
2288 if (stats_chunks
.curchunks
> stats_chunks
.highchunks
)
2289 stats_chunks
.highchunks
= stats_chunks
.curchunks
;
2292 assert(CHUNK_ADDR2BASE(ret
) == ret
);
2297 static extent_node_t
*
2298 chunk_dealloc_dss_record(void *chunk
, size_t size
)
2300 extent_node_t
*node
, *prev
, key
;
2302 key
.addr
= (void *)((uintptr_t)chunk
+ size
);
2303 node
= RB_NFIND(extent_tree_ad_s
, &dss_chunks_ad
, &key
);
2304 /* Try to coalesce forward. */
2305 if (node
!= NULL
&& node
->addr
== key
.addr
) {
2307 * Coalesce chunk with the following address range. This does
2308 * not change the position within dss_chunks_ad, so only
2309 * remove/insert from/into dss_chunks_szad.
2311 RB_REMOVE(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2314 RB_INSERT(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2317 * Coalescing forward failed, so insert a new node. Drop
2318 * dss_mtx during node allocation, since it is possible that a
2319 * new base chunk will be allocated.
2321 malloc_mutex_unlock(&dss_mtx
);
2322 node
= base_node_alloc();
2323 malloc_mutex_lock(&dss_mtx
);
2328 RB_INSERT(extent_tree_ad_s
, &dss_chunks_ad
, node
);
2329 RB_INSERT(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2332 /* Try to coalesce backward. */
2333 prev
= RB_PREV(extent_tree_ad_s
, &dss_chunks_ad
, node
);
2334 if (prev
!= NULL
&& (void *)((uintptr_t)prev
->addr
+ prev
->size
) ==
2337 * Coalesce chunk with the previous address range. This does
2338 * not change the position within dss_chunks_ad, so only
2339 * remove/insert node from/into dss_chunks_szad.
2341 RB_REMOVE(extent_tree_szad_s
, &dss_chunks_szad
, prev
);
2342 RB_REMOVE(extent_tree_ad_s
, &dss_chunks_ad
, prev
);
2344 RB_REMOVE(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2345 node
->addr
= prev
->addr
;
2346 node
->size
+= prev
->size
;
2347 RB_INSERT(extent_tree_szad_s
, &dss_chunks_szad
, node
);
2349 base_node_dealloc(prev
);
2356 chunk_dealloc_dss(void *chunk
, size_t size
)
2359 malloc_mutex_lock(&dss_mtx
);
2360 if ((uintptr_t)chunk
>= (uintptr_t)dss_base
2361 && (uintptr_t)chunk
< (uintptr_t)dss_max
) {
2362 extent_node_t
*node
;
2364 /* Try to coalesce with other unused chunks. */
2365 node
= chunk_dealloc_dss_record(chunk
, size
);
2371 /* Get the current end of the DSS. */
2375 * Try to shrink the DSS if this chunk is at the end of the
2376 * DSS. The sbrk() call here is subject to a race condition
2377 * with threads that use brk(2) or sbrk(2) directly, but the
2378 * alternative would be to leak memory for the sake of poorly
2379 * designed multi-threaded programs.
2381 if ((void *)((uintptr_t)chunk
+ size
) == dss_max
2382 && (dss_prev
= sbrk(-(intptr_t)size
)) == dss_max
) {
2384 dss_max
= (void *)((intptr_t)dss_prev
- (intptr_t)size
);
2387 RB_REMOVE(extent_tree_szad_s
, &dss_chunks_szad
,
2389 RB_REMOVE(extent_tree_ad_s
, &dss_chunks_ad
,
2391 base_node_dealloc(node
);
2393 malloc_mutex_unlock(&dss_mtx
);
2395 malloc_mutex_unlock(&dss_mtx
);
2397 VirtualAlloc(chunk
, size
, MEM_RESET
, PAGE_READWRITE
);
2398 #elif (defined(DARWIN))
2399 mmap(chunk
, size
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
2400 | MAP_ANON
| MAP_FIXED
, -1, 0);
2402 madvise(chunk
, size
, MADV_FREE
);
2408 malloc_mutex_unlock(&dss_mtx
);
2415 chunk_dealloc_mmap(void *chunk
, size_t size
)
2418 pages_unmap(chunk
, size
);
2422 chunk_dealloc(void *chunk
, size_t size
)
2425 assert(chunk
!= NULL
);
2426 assert(CHUNK_ADDR2BASE(chunk
) == chunk
);
2428 assert((size
& chunksize_mask
) == 0);
2431 stats_chunks
.curchunks
-= (size
/ chunksize
);
2436 if (chunk_dealloc_dss(chunk
, size
) == false)
2442 chunk_dealloc_mmap(chunk
, size
);
2446 * End chunk management functions.
2448 /******************************************************************************/
2454 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2455 * code if necessary).
2457 static inline arena_t
*
2463 * We can only use TLS if this is a PIC library, since for the static
2464 * library version, libc's malloc is used by TLS allocation, which
2465 * introduces a bootstrapping issue.
2468 if (g_isthreaded
== false) {
2469 /* Avoid the overhead of TLS for single-threaded operation. */
2474 ret
= TlsGetValue(tlsIndex
);
2480 ret
= choose_arena_hard();
2481 assert(ret
!= NULL
);
2484 if (g_isthreaded
&& narenas
> 1) {
2488 * Hash _pthread_self() to one of the arenas. There is a prime
2489 * number of arenas, so this has a reasonable chance of
2490 * working. Even so, the hashing can be easily thwarted by
2491 * inconvenient _pthread_self() values. Without specific
2492 * knowledge of how _pthread_self() calculates values, we can't
2493 * easily do much better than this.
2495 ind
= (unsigned long) _pthread_self() % narenas
;
2498 * Optimistially assume that arenas[ind] has been initialized.
2499 * At worst, we find out that some other thread has already
2500 * done so, after acquiring the lock in preparation. Note that
2501 * this lazy locking also has the effect of lazily forcing
2502 * cache coherency; without the lock acquisition, there's no
2503 * guarantee that modification of arenas[ind] by another thread
2504 * would be seen on this CPU for an arbitrary amount of time.
2506 * In general, this approach to modifying a synchronized value
2507 * isn't a good idea, but in this case we only ever modify the
2508 * value once, so things work out well.
2513 * Avoid races with another thread that may have already
2514 * initialized arenas[ind].
2516 malloc_spin_lock(&arenas_lock
);
2517 if (arenas
[ind
] == NULL
)
2518 ret
= arenas_extend((unsigned)ind
);
2521 malloc_spin_unlock(&arenas_lock
);
2527 assert(ret
!= NULL
);
2533 * Choose an arena based on a per-thread value (slow-path code only, called
2534 * only by choose_arena()).
2537 choose_arena_hard(void)
2541 assert(g_isthreaded
);
2543 #ifdef MALLOC_LAZY_FREE
2545 * Seed the PRNG used for lazy deallocation. Since seeding only occurs
2546 * on the first allocation by a thread, it is possible for a thread to
2547 * deallocate before seeding. This is not a critical issue though,
2548 * since it is extremely unusual for an application to to use threads
2549 * that deallocate but *never* allocate, and because even if seeding
2550 * never occurs for multiple threads, they will tend to drift apart
2551 * unless some aspect of the application forces deallocation
2554 SPRN(lazy_free
, (uint32_t)(uintptr_t)(_pthread_self()));
2557 #ifdef MALLOC_BALANCE
2559 * Seed the PRNG used for arena load balancing. We can get away with
2560 * using the same seed here as for the lazy_free PRNG without
2561 * introducing autocorrelation because the PRNG parameters are
2564 SPRN(balance
, (uint32_t)(uintptr_t)(_pthread_self()));
2568 #ifdef MALLOC_BALANCE
2571 ind
= PRN(balance
, narenas_2pow
);
2572 if ((ret
= arenas
[ind
]) == NULL
) {
2573 malloc_spin_lock(&arenas_lock
);
2574 if ((ret
= arenas
[ind
]) == NULL
)
2575 ret
= arenas_extend(ind
);
2576 malloc_spin_unlock(&arenas_lock
);
2579 malloc_spin_lock(&arenas_lock
);
2580 if ((ret
= arenas
[next_arena
]) == NULL
)
2581 ret
= arenas_extend(next_arena
);
2582 next_arena
= (next_arena
+ 1) % narenas
;
2583 malloc_spin_unlock(&arenas_lock
);
2589 TlsSetValue(tlsIndex
, ret
);
2599 arena_chunk_comp(arena_chunk_t
*a
, arena_chunk_t
*b
)
2601 uintptr_t a_chunk
= (uintptr_t)a
;
2602 uintptr_t b_chunk
= (uintptr_t)b
;
2607 return ((a_chunk
> b_chunk
) - (a_chunk
< b_chunk
));
2610 /* Generate red-black tree code for arena chunks. */
2611 RB_GENERATE_STATIC(arena_chunk_tree_s
, arena_chunk_s
, link
, arena_chunk_comp
)
2614 arena_run_comp(arena_run_t
*a
, arena_run_t
*b
)
2616 uintptr_t a_run
= (uintptr_t)a
;
2617 uintptr_t b_run
= (uintptr_t)b
;
2622 return ((a_run
> b_run
) - (a_run
< b_run
));
2625 /* Generate red-black tree code for arena runs. */
2626 RB_GENERATE_STATIC(arena_run_tree_s
, arena_run_s
, link
, arena_run_comp
)
2628 static extent_node_t
*
2629 arena_chunk_node_alloc(arena_chunk_t
*chunk
)
2633 ret
= RB_MIN(extent_tree_ad_s
, &chunk
->nodes
);
2635 RB_REMOVE(extent_tree_ad_s
, &chunk
->nodes
, ret
);
2637 ret
= chunk
->nodes_past
;
2638 chunk
->nodes_past
= (extent_node_t
*)
2639 ((uintptr_t)chunk
->nodes_past
+ sizeof(extent_node_t
));
2640 assert((uintptr_t)ret
+ sizeof(extent_node_t
) <=
2641 (uintptr_t)chunk
+ (arena_chunk_header_npages
<<
2649 arena_chunk_node_dealloc(arena_chunk_t
*chunk
, extent_node_t
*node
)
2652 node
->addr
= (void *)node
;
2653 RB_INSERT(extent_tree_ad_s
, &chunk
->nodes
, node
);
2656 static inline void *
2657 arena_run_reg_alloc(arena_run_t
*run
, arena_bin_t
*bin
)
2660 unsigned i
, mask
, bit
, regind
;
2662 assert(run
->magic
== ARENA_RUN_MAGIC
);
2663 assert(run
->regs_minelm
< bin
->regs_mask_nelms
);
2666 * Move the first check outside the loop, so that run->regs_minelm can
2667 * be updated unconditionally, without the possibility of updating it
2670 i
= run
->regs_minelm
;
2671 mask
= run
->regs_mask
[i
];
2673 /* Usable allocation found. */
2674 bit
= ffs((int)mask
) - 1;
2676 regind
= ((i
<< (SIZEOF_INT_2POW
+ 3)) + bit
);
2677 assert(regind
< bin
->nregs
);
2678 ret
= (void *)(((uintptr_t)run
) + bin
->reg0_offset
2679 + (bin
->reg_size
* regind
));
2682 mask
^= (1U << bit
);
2683 run
->regs_mask
[i
] = mask
;
2688 for (i
++; i
< bin
->regs_mask_nelms
; i
++) {
2689 mask
= run
->regs_mask
[i
];
2691 /* Usable allocation found. */
2692 bit
= ffs((int)mask
) - 1;
2694 regind
= ((i
<< (SIZEOF_INT_2POW
+ 3)) + bit
);
2695 assert(regind
< bin
->nregs
);
2696 ret
= (void *)(((uintptr_t)run
) + bin
->reg0_offset
2697 + (bin
->reg_size
* regind
));
2700 mask
^= (1U << bit
);
2701 run
->regs_mask
[i
] = mask
;
2704 * Make a note that nothing before this element
2705 * contains a free region.
2707 run
->regs_minelm
= i
; /* Low payoff: + (mask == 0); */
2718 arena_run_reg_dalloc(arena_run_t
*run
, arena_bin_t
*bin
, void *ptr
, size_t size
)
2721 * To divide by a number D that is not a power of two we multiply
2722 * by (2^21 / D) and then right shift by 21 positions.
2728 * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
2730 #define SIZE_INV_SHIFT 21
2731 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
2732 static const unsigned size_invs
[] = {
2734 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2735 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2736 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2737 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2738 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2739 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2740 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2741 #if (QUANTUM_2POW_MIN < 4)
2743 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
2744 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
2745 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
2746 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
2747 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
2748 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
2749 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
2750 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
2753 unsigned diff
, regind
, elm
, bit
;
2755 assert(run
->magic
== ARENA_RUN_MAGIC
);
2756 assert(((sizeof(size_invs
)) / sizeof(unsigned)) + 3
2757 >= (SMALL_MAX_DEFAULT
>> QUANTUM_2POW_MIN
));
2760 * Avoid doing division with a variable divisor if possible. Using
2761 * actual division here can reduce allocator throughput by over 20%!
2763 diff
= (unsigned)((uintptr_t)ptr
- (uintptr_t)run
- bin
->reg0_offset
);
2764 if ((size
& (size
- 1)) == 0) {
2766 * log2_table allows fast division of a power of two in the
2769 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2771 static const unsigned char log2_table
[] = {
2772 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2775 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2776 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2783 regind
= (diff
>> log2_table
[size
- 1]);
2784 else if (size
<= 32768)
2785 regind
= diff
>> (8 + log2_table
[(size
>> 8) - 1]);
2788 * The run size is too large for us to use the lookup
2789 * table. Use real division.
2791 regind
= diff
/ size
;
2793 } else if (size
<= ((sizeof(size_invs
) / sizeof(unsigned))
2794 << QUANTUM_2POW_MIN
) + 2) {
2795 regind
= size_invs
[(size
>> QUANTUM_2POW_MIN
) - 3] * diff
;
2796 regind
>>= SIZE_INV_SHIFT
;
2799 * size_invs isn't large enough to handle this size class, so
2800 * calculate regind using actual division. This only happens
2801 * if the user increases small_max via the 'S' runtime
2802 * configuration option.
2804 regind
= diff
/ size
;
2806 assert(diff
== regind
* size
);
2807 assert(regind
< bin
->nregs
);
2809 elm
= regind
>> (SIZEOF_INT_2POW
+ 3);
2810 if (elm
< run
->regs_minelm
)
2811 run
->regs_minelm
= elm
;
2812 bit
= regind
- (elm
<< (SIZEOF_INT_2POW
+ 3));
2813 assert((run
->regs_mask
[elm
] & (1U << bit
)) == 0);
2814 run
->regs_mask
[elm
] |= (1U << bit
);
2816 #undef SIZE_INV_SHIFT
2820 arena_run_split(arena_t
*arena
, arena_run_t
*run
, size_t size
, bool small
,
2823 arena_chunk_t
*chunk
;
2824 size_t run_ind
, total_pages
, need_pages
, rem_pages
, i
;
2825 extent_node_t
*nodeA
, *nodeB
, key
;
2827 /* Insert a node into runs_alloced_ad for the first part of the run. */
2828 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(run
);
2829 nodeA
= arena_chunk_node_alloc(chunk
);
2832 RB_INSERT(extent_tree_ad_s
, &arena
->runs_alloced_ad
, nodeA
);
2835 nodeB
= RB_FIND(extent_tree_ad_s
, &arena
->runs_avail_ad
, &key
);
2836 assert(nodeB
!= NULL
);
2838 run_ind
= (unsigned)(((uintptr_t)run
- (uintptr_t)chunk
)
2840 total_pages
= nodeB
->size
>> pagesize_2pow
;
2841 need_pages
= (size
>> pagesize_2pow
);
2842 assert(need_pages
> 0);
2843 assert(need_pages
<= total_pages
);
2844 assert(need_pages
<= CHUNK_MAP_POS_MASK
|| small
== false);
2845 rem_pages
= total_pages
- need_pages
;
2847 for (i
= 0; i
< need_pages
; i
++) {
2848 #ifdef MALLOC_DECOMMIT
2850 * Commit decommitted pages if necessary. If a decommitted
2851 * page is encountered, commit all needed adjacent decommitted
2852 * pages in one operation, in order to reduce system call
2855 if (chunk
->map
[run_ind
+ i
] & CHUNK_MAP_DECOMMITTED
) {
2859 * Advance i+j to just past the index of the last page
2860 * to commit. Clear CHUNK_MAP_DECOMMITTED along the
2863 for (j
= 0; i
+ j
< need_pages
&& (chunk
->map
[run_ind
+
2864 i
+ j
] & CHUNK_MAP_DECOMMITTED
); j
++) {
2865 chunk
->map
[run_ind
+ i
+ j
] ^=
2866 CHUNK_MAP_DECOMMITTED
;
2869 pages_commit((void *)((uintptr_t)chunk
+ ((run_ind
+ i
)
2870 << pagesize_2pow
)), (j
<< pagesize_2pow
));
2871 # ifdef MALLOC_STATS
2872 arena
->stats
.ncommit
++;
2877 /* Zero if necessary. */
2879 if ((chunk
->map
[run_ind
+ i
] & CHUNK_MAP_UNTOUCHED
)
2881 memset((void *)((uintptr_t)chunk
+ ((run_ind
2882 + i
) << pagesize_2pow
)), 0, pagesize
);
2883 /* CHUNK_MAP_UNTOUCHED is cleared below. */
2887 /* Update dirty page accounting. */
2888 if (chunk
->map
[run_ind
+ i
] & CHUNK_MAP_DIRTY
) {
2893 /* Initialize the chunk map. */
2895 chunk
->map
[run_ind
+ i
] = (uint8_t)i
;
2897 chunk
->map
[run_ind
+ i
] = CHUNK_MAP_LARGE
;
2900 /* Keep track of trailing unused pages for later use. */
2901 RB_REMOVE(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeB
);
2902 if (rem_pages
> 0) {
2904 * Update nodeB in runs_avail_*. Its position within
2905 * runs_avail_ad does not change.
2907 nodeB
->addr
= (void *)((uintptr_t)nodeB
->addr
+ size
);
2908 nodeB
->size
-= size
;
2909 RB_INSERT(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeB
);
2911 /* Remove nodeB from runs_avail_*. */
2912 RB_REMOVE(extent_tree_ad_s
, &arena
->runs_avail_ad
, nodeB
);
2913 arena_chunk_node_dealloc(chunk
, nodeB
);
2916 chunk
->pages_used
+= need_pages
;
2919 static arena_chunk_t
*
2920 arena_chunk_alloc(arena_t
*arena
)
2922 arena_chunk_t
*chunk
;
2923 extent_node_t
*node
;
2925 if (arena
->spare
!= NULL
) {
2926 chunk
= arena
->spare
;
2927 arena
->spare
= NULL
;
2929 chunk
= (arena_chunk_t
*)chunk_alloc(chunksize
, true);
2933 arena
->stats
.mapped
+= chunksize
;
2936 chunk
->arena
= arena
;
2938 RB_INSERT(arena_chunk_tree_s
, &arena
->chunks
, chunk
);
2941 * Claim that no pages are in use, since the header is merely
2944 chunk
->pages_used
= 0;
2948 * Initialize the map to contain one maximal free untouched
2951 memset(chunk
->map
, (CHUNK_MAP_LARGE
| CHUNK_MAP_POS_MASK
),
2952 arena_chunk_header_npages
);
2953 memset(&chunk
->map
[arena_chunk_header_npages
],
2954 (CHUNK_MAP_UNTOUCHED
2955 #ifdef MALLOC_DECOMMIT
2956 | CHUNK_MAP_DECOMMITTED
2959 arena_chunk_header_npages
));
2961 /* Initialize the tree of unused extent nodes. */
2962 RB_INIT(&chunk
->nodes
);
2963 chunk
->nodes_past
= (extent_node_t
*)QUANTUM_CEILING(
2964 (uintptr_t)&chunk
->map
[chunk_npages
]);
2966 #ifdef MALLOC_DECOMMIT
2968 * Start out decommitted, in order to force a closer
2969 * correspondence between dirty pages and committed untouched
2972 pages_decommit((void *)((uintptr_t)chunk
+
2973 (arena_chunk_header_npages
<< pagesize_2pow
)),
2974 ((chunk_npages
- arena_chunk_header_npages
) <<
2976 # ifdef MALLOC_STATS
2977 arena
->stats
.ndecommit
++;
2978 arena
->stats
.decommitted
+= (chunk_npages
-
2979 arena_chunk_header_npages
);
2984 /* Insert the run into the runs_avail_* red-black trees. */
2985 node
= arena_chunk_node_alloc(chunk
);
2986 node
->addr
= (void *)((uintptr_t)chunk
+ (arena_chunk_header_npages
<<
2988 node
->size
= chunksize
- (arena_chunk_header_npages
<< pagesize_2pow
);
2989 RB_INSERT(extent_tree_szad_s
, &arena
->runs_avail_szad
, node
);
2990 RB_INSERT(extent_tree_ad_s
, &arena
->runs_avail_ad
, node
);
2996 arena_chunk_dealloc(arena_t
*arena
, arena_chunk_t
*chunk
)
2998 extent_node_t
*node
, key
;
3000 if (arena
->spare
!= NULL
) {
3001 RB_REMOVE(arena_chunk_tree_s
, &chunk
->arena
->chunks
,
3003 arena
->ndirty
-= arena
->spare
->ndirty
;
3004 chunk_dealloc((void *)arena
->spare
, chunksize
);
3006 arena
->stats
.mapped
-= chunksize
;
3011 * Remove run from the runs trees, regardless of whether this chunk
3012 * will be cached, so that the arena does not use it. Dirty page
3013 * flushing only uses the chunks tree, so leaving this chunk in that
3014 * tree is sufficient for that purpose.
3016 key
.addr
= (void *)((uintptr_t)chunk
+ (arena_chunk_header_npages
<<
3018 node
= RB_FIND(extent_tree_ad_s
, &arena
->runs_avail_ad
, &key
);
3019 assert(node
!= NULL
);
3020 RB_REMOVE(extent_tree_szad_s
, &arena
->runs_avail_szad
, node
);
3021 RB_REMOVE(extent_tree_ad_s
, &arena
->runs_avail_ad
, node
);
3022 arena_chunk_node_dealloc(chunk
, node
);
3024 arena
->spare
= chunk
;
3027 static arena_run_t
*
3028 arena_run_alloc(arena_t
*arena
, size_t size
, bool small
, bool zero
)
3030 arena_chunk_t
*chunk
;
3032 extent_node_t
*node
, key
;
3034 assert(size
<= (chunksize
- (arena_chunk_header_npages
<<
3036 assert((size
& pagesize_mask
) == 0);
3038 /* Search the arena's chunks for the lowest best fit. */
3041 node
= RB_NFIND(extent_tree_szad_s
, &arena
->runs_avail_szad
, &key
);
3043 run
= (arena_run_t
*)node
->addr
;
3044 arena_run_split(arena
, run
, size
, small
, zero
);
3049 * No usable runs. Create a new chunk from which to allocate the run.
3051 chunk
= arena_chunk_alloc(arena
);
3054 run
= (arena_run_t
*)((uintptr_t)chunk
+ (arena_chunk_header_npages
<<
3056 /* Update page map. */
3057 arena_run_split(arena
, run
, size
, small
, zero
);
3062 arena_purge(arena_t
*arena
)
3064 arena_chunk_t
*chunk
;
3069 RB_FOREACH(chunk
, arena_chunk_tree_s
, &arena
->chunks
) {
3070 ndirty
+= chunk
->ndirty
;
3072 assert(ndirty
== arena
->ndirty
);
3074 assert(arena
->ndirty
> opt_dirty_max
);
3077 arena
->stats
.npurge
++;
3081 * Iterate downward through chunks until enough dirty memory has been
3084 RB_FOREACH_REVERSE(chunk
, arena_chunk_tree_s
, &arena
->chunks
) {
3085 if (chunk
->ndirty
> 0) {
3088 for (i
= chunk_npages
- 1; i
>=
3089 arena_chunk_header_npages
; i
--) {
3090 if (chunk
->map
[i
] & CHUNK_MAP_DIRTY
) {
3093 chunk
->map
[i
] = (CHUNK_MAP_LARGE
|
3094 #ifdef MALLOC_DECOMMIT
3095 CHUNK_MAP_DECOMMITTED
|
3097 CHUNK_MAP_POS_MASK
);
3100 /* Find adjacent dirty run(s). */
3101 for (npages
= 1; i
>
3102 arena_chunk_header_npages
&&
3103 (chunk
->map
[i
- 1] &
3104 CHUNK_MAP_DIRTY
); npages
++) {
3106 chunk
->map
[i
] = (CHUNK_MAP_LARGE
3107 #ifdef MALLOC_DECOMMIT
3108 | CHUNK_MAP_DECOMMITTED
3110 | CHUNK_MAP_POS_MASK
);
3115 #ifdef MALLOC_DECOMMIT
3116 pages_decommit((void *)((uintptr_t)
3117 chunk
+ (i
<< pagesize_2pow
)),
3118 (npages
<< pagesize_2pow
));
3119 # ifdef MALLOC_STATS
3120 arena
->stats
.ndecommit
++;
3121 arena
->stats
.decommitted
+= npages
;
3124 madvise((void *)((uintptr_t)chunk
+ (i
3125 << pagesize_2pow
)), pagesize
*
3129 arena
->stats
.nmadvise
++;
3130 arena
->stats
.purged
+= npages
;
3139 arena_run_dalloc(arena_t
*arena
, arena_run_t
*run
, bool dirty
)
3141 arena_chunk_t
*chunk
;
3142 extent_node_t
*nodeA
, *nodeB
, *nodeC
, key
;
3143 size_t size
, run_ind
, run_pages
;
3145 /* Remove run from runs_alloced_ad. */
3147 nodeB
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
, &key
);
3148 assert(nodeB
!= NULL
);
3149 RB_REMOVE(extent_tree_ad_s
, &arena
->runs_alloced_ad
, nodeB
);
3152 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(run
);
3153 run_ind
= (unsigned)(((uintptr_t)run
- (uintptr_t)chunk
)
3155 assert(run_ind
>= arena_chunk_header_npages
);
3156 assert(run_ind
< (chunksize
>> pagesize_2pow
));
3157 run_pages
= (size
>> pagesize_2pow
);
3159 /* Subtract pages from count of pages used in chunk. */
3160 chunk
->pages_used
-= run_pages
;
3165 for (i
= 0; i
< run_pages
; i
++) {
3166 assert((chunk
->map
[run_ind
+ i
] & CHUNK_MAP_DIRTY
) ==
3168 chunk
->map
[run_ind
+ i
] |= CHUNK_MAP_DIRTY
;
3174 /* Set map elements to a bogus value in order to aid error detection. */
3178 for (i
= 0; i
< run_pages
; i
++) {
3179 chunk
->map
[run_ind
+ i
] |= (CHUNK_MAP_LARGE
|
3180 CHUNK_MAP_POS_MASK
);
3185 /* Try to coalesce forward. */
3186 key
.addr
= (void *)((uintptr_t)run
+ size
);
3187 nodeC
= RB_NFIND(extent_tree_ad_s
, &arena
->runs_avail_ad
, &key
);
3188 if (nodeC
!= NULL
&& nodeC
->addr
== key
.addr
) {
3190 * Coalesce forward. This does not change the position within
3191 * runs_avail_ad, so only remove/insert from/into
3194 RB_REMOVE(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeC
);
3195 nodeC
->addr
= (void *)run
;
3196 nodeC
->size
+= size
;
3197 RB_INSERT(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeC
);
3198 arena_chunk_node_dealloc(chunk
, nodeB
);
3202 * Coalescing forward failed, so insert nodeB into runs_avail_*.
3204 RB_INSERT(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeB
);
3205 RB_INSERT(extent_tree_ad_s
, &arena
->runs_avail_ad
, nodeB
);
3208 /* Try to coalesce backward. */
3209 nodeA
= RB_PREV(extent_tree_ad_s
, &arena
->runs_avail_ad
, nodeB
);
3210 if (nodeA
!= NULL
&& (void *)((uintptr_t)nodeA
->addr
+ nodeA
->size
) ==
3213 * Coalesce with previous run. This does not change nodeB's
3214 * position within runs_avail_ad, so only remove/insert
3215 * from/into runs_avail_szad.
3217 RB_REMOVE(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeA
);
3218 RB_REMOVE(extent_tree_ad_s
, &arena
->runs_avail_ad
, nodeA
);
3220 RB_REMOVE(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeB
);
3221 nodeB
->addr
= nodeA
->addr
;
3222 nodeB
->size
+= nodeA
->size
;
3223 RB_INSERT(extent_tree_szad_s
, &arena
->runs_avail_szad
, nodeB
);
3225 arena_chunk_node_dealloc(chunk
, nodeA
);
3228 /* Deallocate chunk if it is now completely unused. */
3229 if (chunk
->pages_used
== 0)
3230 arena_chunk_dealloc(arena
, chunk
);
3232 /* Enforce opt_dirty_max. */
3233 if (arena
->ndirty
> opt_dirty_max
)
3238 arena_run_trim_head(arena_t
*arena
, arena_chunk_t
*chunk
, extent_node_t
*nodeB
,
3239 arena_run_t
*run
, size_t oldsize
, size_t newsize
)
3241 extent_node_t
*nodeA
;
3243 assert(nodeB
->addr
== run
);
3244 assert(nodeB
->size
== oldsize
);
3245 assert(oldsize
> newsize
);
3248 * Update the run's node in runs_alloced_ad. Its position does not
3251 nodeB
->addr
= (void *)((uintptr_t)run
+ (oldsize
- newsize
));
3252 nodeB
->size
= newsize
;
3255 * Insert a node into runs_alloced_ad so that arena_run_dalloc() can
3256 * treat the leading run as separately allocated.
3258 nodeA
= arena_chunk_node_alloc(chunk
);
3259 nodeA
->addr
= (void *)run
;
3260 nodeA
->size
= oldsize
- newsize
;
3261 RB_INSERT(extent_tree_ad_s
, &arena
->runs_alloced_ad
, nodeA
);
3263 arena_run_dalloc(arena
, (arena_run_t
*)run
, false);
3267 arena_run_trim_tail(arena_t
*arena
, arena_chunk_t
*chunk
, extent_node_t
*nodeA
,
3268 arena_run_t
*run
, size_t oldsize
, size_t newsize
, bool dirty
)
3270 extent_node_t
*nodeB
;
3272 assert(nodeA
->addr
== run
);
3273 assert(nodeA
->size
== oldsize
);
3274 assert(oldsize
> newsize
);
3277 * Update the run's node in runs_alloced_ad. Its position does not
3280 nodeA
->size
= newsize
;
3283 * Insert a node into runs_alloced_ad so that arena_run_dalloc() can
3284 * treat the trailing run as separately allocated.
3286 nodeB
= arena_chunk_node_alloc(chunk
);
3287 nodeB
->addr
= (void *)((uintptr_t)run
+ newsize
);
3288 nodeB
->size
= oldsize
- newsize
;
3289 RB_INSERT(extent_tree_ad_s
, &arena
->runs_alloced_ad
, nodeB
);
3291 arena_run_dalloc(arena
, (arena_run_t
*)((uintptr_t)run
+ newsize
),
3295 static arena_run_t
*
3296 arena_bin_nonfull_run_get(arena_t
*arena
, arena_bin_t
*bin
)
3299 unsigned i
, remainder
;
3301 /* Look for a usable run. */
3302 if ((run
= RB_MIN(arena_run_tree_s
, &bin
->runs
)) != NULL
) {
3303 /* run is guaranteed to have available space. */
3304 RB_REMOVE(arena_run_tree_s
, &bin
->runs
, run
);
3306 bin
->stats
.reruns
++;
3310 /* No existing runs have any space available. */
3312 /* Allocate a new run. */
3313 run
= arena_run_alloc(arena
, bin
->run_size
, true, false);
3317 /* Initialize run internals. */
3320 for (i
= 0; i
< bin
->regs_mask_nelms
; i
++)
3321 run
->regs_mask
[i
] = UINT_MAX
;
3322 remainder
= bin
->nregs
& ((1U << (SIZEOF_INT_2POW
+ 3)) - 1);
3323 if (remainder
!= 0) {
3324 /* The last element has spare bits that need to be unset. */
3325 run
->regs_mask
[i
] = (UINT_MAX
>> ((1U << (SIZEOF_INT_2POW
+ 3))
3329 run
->regs_minelm
= 0;
3331 run
->nfree
= bin
->nregs
;
3333 run
->magic
= ARENA_RUN_MAGIC
;
3338 bin
->stats
.curruns
++;
3339 if (bin
->stats
.curruns
> bin
->stats
.highruns
)
3340 bin
->stats
.highruns
= bin
->stats
.curruns
;
3345 /* bin->runcur must have space available before this function is called. */
3346 static inline void *
3347 arena_bin_malloc_easy(arena_t
*arena
, arena_bin_t
*bin
, arena_run_t
*run
)
3350 arena
= 0; /* this is just to quiet a compiler warning */
3352 assert(run
->magic
== ARENA_RUN_MAGIC
);
3353 assert(run
->nfree
> 0);
3355 ret
= arena_run_reg_alloc(run
, bin
);
3356 assert(ret
!= NULL
);
3362 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3364 arena_bin_malloc_hard(arena_t
*arena
, arena_bin_t
*bin
)
3367 bin
->runcur
= arena_bin_nonfull_run_get(arena
, bin
);
3368 if (bin
->runcur
== NULL
)
3370 assert(bin
->runcur
->magic
== ARENA_RUN_MAGIC
);
3371 assert(bin
->runcur
->nfree
> 0);
3373 return (arena_bin_malloc_easy(arena
, bin
, bin
->runcur
));
3377 * Calculate bin->run_size such that it meets the following constraints:
3379 * *) bin->run_size >= min_run_size
3380 * *) bin->run_size <= arena_maxclass
3381 * *) bin->run_size <= RUN_MAX_SMALL
3382 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3384 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3385 * also calculated here, since these settings are all interdependent.
3388 arena_bin_run_size_calc(arena_bin_t
*bin
, size_t min_run_size
)
3390 size_t try_run_size
, good_run_size
;
3391 unsigned good_nregs
, good_mask_nelms
, good_reg0_offset
;
3392 unsigned try_nregs
, try_mask_nelms
, try_reg0_offset
;
3394 assert(min_run_size
>= pagesize
);
3395 assert(min_run_size
<= arena_maxclass
);
3396 assert(min_run_size
<= RUN_MAX_SMALL
);
3399 * Calculate known-valid settings before entering the run_size
3400 * expansion loop, so that the first part of the loop always copies
3403 * The do..while loop iteratively reduces the number of regions until
3404 * the run header and the regions no longer overlap. A closed formula
3405 * would be quite messy, since there is an interdependency between the
3406 * header's mask length and the number of regions.
3408 try_run_size
= min_run_size
;
3409 try_nregs
= ((try_run_size
- sizeof(arena_run_t
)) / bin
->reg_size
)
3410 + 1; /* Counter-act try_nregs-- in loop. */
3413 try_mask_nelms
= (try_nregs
>> (SIZEOF_INT_2POW
+ 3)) +
3414 ((try_nregs
& ((1U << (SIZEOF_INT_2POW
+ 3)) - 1)) ? 1 : 0);
3415 try_reg0_offset
= try_run_size
- (try_nregs
* bin
->reg_size
);
3416 } while (sizeof(arena_run_t
) + (sizeof(unsigned) * (try_mask_nelms
- 1))
3419 /* run_size expansion loop. */
3422 * Copy valid settings before trying more aggressive settings.
3424 good_run_size
= try_run_size
;
3425 good_nregs
= try_nregs
;
3426 good_mask_nelms
= try_mask_nelms
;
3427 good_reg0_offset
= try_reg0_offset
;
3429 /* Try more aggressive settings. */
3430 try_run_size
+= pagesize
;
3431 try_nregs
= ((try_run_size
- sizeof(arena_run_t
)) /
3432 bin
->reg_size
) + 1; /* Counter-act try_nregs-- in loop. */
3435 try_mask_nelms
= (try_nregs
>> (SIZEOF_INT_2POW
+ 3)) +
3436 ((try_nregs
& ((1U << (SIZEOF_INT_2POW
+ 3)) - 1)) ?
3438 try_reg0_offset
= try_run_size
- (try_nregs
*
3440 } while (sizeof(arena_run_t
) + (sizeof(unsigned) *
3441 (try_mask_nelms
- 1)) > try_reg0_offset
);
3442 } while (try_run_size
<= arena_maxclass
&& try_run_size
<= RUN_MAX_SMALL
3443 && RUN_MAX_OVRHD
* (bin
->reg_size
<< 3) > RUN_MAX_OVRHD_RELAX
3444 && (try_reg0_offset
<< RUN_BFP
) > RUN_MAX_OVRHD
* try_run_size
);
3446 assert(sizeof(arena_run_t
) + (sizeof(unsigned) * (good_mask_nelms
- 1))
3447 <= good_reg0_offset
);
3448 assert((good_mask_nelms
<< (SIZEOF_INT_2POW
+ 3)) >= good_nregs
);
3450 /* Copy final settings. */
3451 bin
->run_size
= good_run_size
;
3452 bin
->nregs
= good_nregs
;
3453 bin
->regs_mask_nelms
= good_mask_nelms
;
3454 bin
->reg0_offset
= good_reg0_offset
;
3456 return (good_run_size
);
3459 #ifdef MALLOC_BALANCE
3461 arena_lock_balance(arena_t
*arena
)
3463 unsigned contention
;
3465 contention
= malloc_spin_lock(&arena
->lock
);
3468 * Calculate the exponentially averaged contention for this
3469 * arena. Due to integer math always rounding down, this value
3470 * decays somewhat faster then normal.
3472 arena
->contention
= (((uint64_t)arena
->contention
3473 * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW
)-1))
3474 + (uint64_t)contention
) >> BALANCE_ALPHA_INV_2POW
;
3475 if (arena
->contention
>= opt_balance_threshold
)
3476 arena_lock_balance_hard(arena
);
3481 arena_lock_balance_hard(arena_t
*arena
)
3485 arena
->contention
= 0;
3487 arena
->stats
.nbalance
++;
3489 ind
= PRN(balance
, narenas_2pow
);
3490 if (arenas
[ind
] != NULL
) {
3492 TlsSetValue(tlsIndex
, arenas
[ind
]);
3494 arenas_map
= arenas
[ind
];
3497 malloc_spin_lock(&arenas_lock
);
3498 if (arenas
[ind
] != NULL
) {
3500 TlsSetValue(tlsIndex
, arenas
[ind
]);
3502 arenas_map
= arenas
[ind
];
3506 TlsSetValue(tlsIndex
, arenas_extend(ind
));
3508 arenas_map
= arenas_extend(ind
);
3511 malloc_spin_unlock(&arenas_lock
);
3516 static inline void *
3517 arena_malloc_small(arena_t
*arena
, size_t size
, bool zero
)
3523 if (size
< small_min
) {
3525 size
= pow2_ceil(size
);
3526 bin
= &arena
->bins
[ffs((int)(size
>> (TINY_MIN_2POW
+
3528 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
3530 * Bin calculation is always correct, but we may need
3531 * to fix size for the purposes of assertions and/or
3534 if (size
< (1U << TINY_MIN_2POW
))
3535 size
= (1U << TINY_MIN_2POW
);
3537 } else if (size
<= small_max
) {
3538 /* Quantum-spaced. */
3539 size
= QUANTUM_CEILING(size
);
3540 bin
= &arena
->bins
[ntbins
+ (size
>> opt_quantum_2pow
)
3544 size
= pow2_ceil(size
);
3545 bin
= &arena
->bins
[ntbins
+ nqbins
3546 + (ffs((int)(size
>> opt_small_max_2pow
)) - 2)];
3548 assert(size
== bin
->reg_size
);
3550 #ifdef MALLOC_BALANCE
3551 arena_lock_balance(arena
);
3553 malloc_spin_lock(&arena
->lock
);
3555 if ((run
= bin
->runcur
) != NULL
&& run
->nfree
> 0)
3556 ret
= arena_bin_malloc_easy(arena
, bin
, run
);
3558 ret
= arena_bin_malloc_hard(arena
, bin
);
3561 malloc_spin_unlock(&arena
->lock
);
3566 bin
->stats
.nrequests
++;
3567 arena
->stats
.nmalloc_small
++;
3568 arena
->stats
.allocated_small
+= size
;
3570 malloc_spin_unlock(&arena
->lock
);
3572 if (zero
== false) {
3575 memset(ret
, 0xa5, size
);
3577 memset(ret
, 0, size
);
3580 memset(ret
, 0, size
);
3586 arena_malloc_large(arena_t
*arena
, size_t size
, bool zero
)
3590 /* Large allocation. */
3591 size
= PAGE_CEILING(size
);
3592 #ifdef MALLOC_BALANCE
3593 arena_lock_balance(arena
);
3595 malloc_spin_lock(&arena
->lock
);
3597 ret
= (void *)arena_run_alloc(arena
, size
, false, zero
);
3599 malloc_spin_unlock(&arena
->lock
);
3603 arena
->stats
.nmalloc_large
++;
3604 arena
->stats
.allocated_large
+= size
;
3606 malloc_spin_unlock(&arena
->lock
);
3608 if (zero
== false) {
3611 memset(ret
, 0xa5, size
);
3613 memset(ret
, 0, size
);
3620 static inline void *
3621 arena_malloc(arena_t
*arena
, size_t size
, bool zero
)
3624 assert(arena
!= NULL
);
3625 assert(arena
->magic
== ARENA_MAGIC
);
3627 assert(QUANTUM_CEILING(size
) <= arena_maxclass
);
3629 /* #ifdef USE_STATS_MEMORY */
3630 /* arena->mi.uordblks += size; */
3632 if (size
<= bin_maxclass
) {
3633 return (arena_malloc_small(arena
, size
, zero
));
3635 return (arena_malloc_large(arena
, size
, zero
));
3638 static inline void *
3639 imalloc(size_t size
)
3643 if (size
<= arena_maxclass
)
3644 return (arena_malloc(choose_arena(), size
, false));
3646 return (huge_malloc(size
, false));
3649 static inline void *
3650 icalloc(size_t size
)
3652 if (size
<= arena_maxclass
)
3653 return (arena_malloc(choose_arena(), size
, true));
3655 return (huge_malloc(size
, true));
3658 /* Only handles large allocations that require more than page alignment. */
3660 arena_palloc(arena_t
*arena
, size_t alignment
, size_t size
, size_t alloc_size
)
3664 arena_chunk_t
*chunk
;
3665 extent_node_t
*node
, key
;
3667 assert((size
& pagesize_mask
) == 0);
3668 assert((alignment
& pagesize_mask
) == 0);
3670 #ifdef MALLOC_BALANCE
3671 arena_lock_balance(arena
);
3673 malloc_spin_lock(&arena
->lock
);
3675 ret
= (void *)arena_run_alloc(arena
, alloc_size
, false, false);
3677 malloc_spin_unlock(&arena
->lock
);
3681 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ret
);
3683 offset
= (uintptr_t)ret
& (alignment
- 1);
3684 assert((offset
& pagesize_mask
) == 0);
3685 assert(offset
< alloc_size
);
3688 * Update the run's node in runs_alloced_ad. Its position
3692 node
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
, &key
);
3693 assert(node
!= NULL
);
3695 arena_run_trim_tail(arena
, chunk
, node
, ret
, alloc_size
, size
,
3698 size_t leadsize
, trailsize
;
3701 * Update the run's node in runs_alloced_ad. Its position
3705 node
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
, &key
);
3706 assert(node
!= NULL
);
3708 leadsize
= alignment
- offset
;
3710 arena_run_trim_head(arena
, chunk
, node
, ret
, alloc_size
,
3711 alloc_size
- leadsize
);
3712 ret
= (void *)((uintptr_t)ret
+ leadsize
);
3715 trailsize
= alloc_size
- leadsize
- size
;
3716 if (trailsize
!= 0) {
3717 /* Trim trailing space. */
3718 assert(trailsize
< alloc_size
);
3719 arena_run_trim_tail(arena
, chunk
, node
, ret
, size
+
3720 trailsize
, size
, false);
3725 arena
->stats
.nmalloc_large
++;
3726 arena
->stats
.allocated_large
+= size
;
3728 malloc_spin_unlock(&arena
->lock
);
3732 memset(ret
, 0xa5, size
);
3734 memset(ret
, 0, size
);
3739 static inline void *
3740 ipalloc(size_t alignment
, size_t size
)
3746 * Round size up to the nearest multiple of alignment.
3748 * This done, we can take advantage of the fact that for each small
3749 * size class, every object is aligned at the smallest power of two
3750 * that is non-zero in the base two representation of the size. For
3753 * Size | Base 2 | Minimum alignment
3754 * -----+----------+------------------
3756 * 144 | 10100000 | 32
3757 * 192 | 11000000 | 64
3759 * Depending on runtime settings, it is possible that arena_malloc()
3760 * will further round up to a power of two, but that never causes
3761 * correctness issues.
3763 ceil_size
= (size
+ (alignment
- 1)) & (-alignment
);
3765 * (ceil_size < size) protects against the combination of maximal
3766 * alignment and size greater than maximal alignment.
3768 if (ceil_size
< size
) {
3769 /* size_t overflow. */
3773 if (ceil_size
<= pagesize
|| (alignment
<= pagesize
3774 && ceil_size
<= arena_maxclass
))
3775 ret
= arena_malloc(choose_arena(), ceil_size
, false);
3780 * We can't achieve sub-page alignment, so round up alignment
3781 * permanently; it makes later calculations simpler.
3783 alignment
= PAGE_CEILING(alignment
);
3784 ceil_size
= PAGE_CEILING(size
);
3786 * (ceil_size < size) protects against very large sizes within
3787 * pagesize of SIZE_T_MAX.
3789 * (ceil_size + alignment < ceil_size) protects against the
3790 * combination of maximal alignment and ceil_size large enough
3791 * to cause overflow. This is similar to the first overflow
3792 * check above, but it needs to be repeated due to the new
3793 * ceil_size value, which may now be *equal* to maximal
3794 * alignment, whereas before we only detected overflow if the
3795 * original size was *greater* than maximal alignment.
3797 if (ceil_size
< size
|| ceil_size
+ alignment
< ceil_size
) {
3798 /* size_t overflow. */
3803 * Calculate the size of the over-size run that arena_palloc()
3804 * would need to allocate in order to guarantee the alignment.
3806 if (ceil_size
>= alignment
)
3807 run_size
= ceil_size
+ alignment
- pagesize
;
3810 * It is possible that (alignment << 1) will cause
3811 * overflow, but it doesn't matter because we also
3812 * subtract pagesize, which in the case of overflow
3813 * leaves us with a very large run_size. That causes
3814 * the first conditional below to fail, which means
3815 * that the bogus run_size value never gets used for
3816 * anything important.
3818 run_size
= (alignment
<< 1) - pagesize
;
3821 if (run_size
<= arena_maxclass
) {
3822 ret
= arena_palloc(choose_arena(), alignment
, ceil_size
,
3824 } else if (alignment
<= chunksize
)
3825 ret
= huge_malloc(ceil_size
, false);
3827 ret
= huge_palloc(alignment
, ceil_size
);
3830 assert(((uintptr_t)ret
& (alignment
- 1)) == 0);
3834 /* Return the size of the allocation pointed to by ptr. */
3836 arena_salloc(const void *ptr
)
3839 arena_chunk_t
*chunk
;
3840 arena_chunk_map_t mapelm
;
3843 assert(ptr
!= NULL
);
3844 assert(CHUNK_ADDR2BASE(ptr
) != ptr
);
3846 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
3847 pageind
= (((uintptr_t)ptr
- (uintptr_t)chunk
) >> pagesize_2pow
);
3848 mapelm
= chunk
->map
[pageind
];
3849 if ((mapelm
& CHUNK_MAP_LARGE
) == 0) {
3852 /* Small allocation size is in the run header. */
3853 pageind
-= (mapelm
& CHUNK_MAP_POS_MASK
);
3854 run
= (arena_run_t
*)((uintptr_t)chunk
+ (pageind
<<
3856 assert(run
->magic
== ARENA_RUN_MAGIC
);
3857 ret
= run
->bin
->reg_size
;
3859 arena_t
*arena
= chunk
->arena
;
3860 extent_node_t
*node
, key
;
3862 /* Large allocation size is in the extent tree. */
3863 assert((mapelm
& CHUNK_MAP_POS_MASK
) == 0);
3864 arena
= chunk
->arena
;
3865 malloc_spin_lock(&arena
->lock
);
3866 key
.addr
= (void *)ptr
;
3867 node
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
, &key
);
3868 assert(node
!= NULL
);
3870 malloc_spin_unlock(&arena
->lock
);
3876 static inline size_t
3877 isalloc(const void *ptr
)
3880 arena_chunk_t
*chunk
;
3882 assert(ptr
!= NULL
);
3884 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
3887 assert(chunk
->arena
->magic
== ARENA_MAGIC
);
3889 ret
= arena_salloc(ptr
);
3891 extent_node_t
*node
, key
;
3893 /* Chunk (huge allocation). */
3895 malloc_mutex_lock(&huge_mtx
);
3897 /* Extract from tree of huge allocations. */
3898 key
.addr
= __DECONST(void *, ptr
);
3899 node
= RB_FIND(extent_tree_ad_s
, &huge
, &key
);
3900 assert(node
!= NULL
);
3904 malloc_mutex_unlock(&huge_mtx
);
3911 arena_dalloc_small(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
,
3912 size_t pageind
, arena_chunk_map_t mapelm
)
3918 pageind
-= (mapelm
& CHUNK_MAP_POS_MASK
);
3920 run
= (arena_run_t
*)((uintptr_t)chunk
+ (pageind
<< pagesize_2pow
));
3921 assert(run
->magic
== ARENA_RUN_MAGIC
);
3923 size
= bin
->reg_size
;
3925 /* #ifdef USE_STATS_MEMORY */
3926 /* arena->mi.fordblks += size; */
3930 memset(ptr
, 0x5a, size
);
3933 arena_run_reg_dalloc(run
, bin
, ptr
, size
);
3936 if (run
->nfree
== bin
->nregs
) {
3937 /* Deallocate run. */
3938 if (run
== bin
->runcur
)
3940 else if (bin
->nregs
!= 1) {
3942 * This block's conditional is necessary because if the
3943 * run only contains one region, then it never gets
3944 * inserted into the non-full runs tree.
3946 RB_REMOVE(arena_run_tree_s
, &bin
->runs
, run
);
3951 arena_run_dalloc(arena
, run
, true);
3953 bin
->stats
.curruns
--;
3955 } else if (run
->nfree
== 1 && run
!= bin
->runcur
) {
3957 * Make sure that bin->runcur always refers to the lowest
3958 * non-full run, if one exists.
3960 if (bin
->runcur
== NULL
)
3962 else if ((uintptr_t)run
< (uintptr_t)bin
->runcur
) {
3963 /* Switch runcur. */
3964 if (bin
->runcur
->nfree
> 0) {
3965 /* Insert runcur. */
3966 RB_INSERT(arena_run_tree_s
, &bin
->runs
,
3971 RB_INSERT(arena_run_tree_s
, &bin
->runs
, run
);
3974 arena
->stats
.allocated_small
-= size
;
3975 arena
->stats
.ndalloc_small
++;
3979 #ifdef MALLOC_LAZY_FREE
3981 arena_dalloc_lazy(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
,
3982 size_t pageind
, arena_chunk_map_t
*mapelm
)
3984 void **free_cache
= arena
->free_cache
;
3987 if (g_isthreaded
== false || opt_lazy_free_2pow
< 0) {
3988 malloc_spin_lock(&arena
->lock
);
3989 arena_dalloc_small(arena
, chunk
, ptr
, pageind
, *mapelm
);
3990 malloc_spin_unlock(&arena
->lock
);
3994 for (i
= 0; i
< LAZY_FREE_NPROBES
; i
++) {
3995 slot
= PRN(lazy_free
, opt_lazy_free_2pow
);
3996 if (atomic_cmpset_ptr((uintptr_t *)&free_cache
[slot
],
3997 (uintptr_t)NULL
, (uintptr_t)ptr
)) {
4002 arena_dalloc_lazy_hard(arena
, chunk
, ptr
, pageind
, mapelm
);
4006 arena_dalloc_lazy_hard(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
,
4007 size_t pageind
, arena_chunk_map_t
*mapelm
)
4009 void **free_cache
= arena
->free_cache
;
4012 malloc_spin_lock(&arena
->lock
);
4013 arena_dalloc_small(arena
, chunk
, ptr
, pageind
, *mapelm
);
4016 * Check whether another thread already cleared the cache. It is
4017 * possible that another thread cleared the cache *and* this slot was
4018 * already refilled, which could result in a mostly fruitless cache
4019 * sweep, but such a sequence of events causes no correctness issues.
4021 if ((ptr
= (void *)atomic_readandclear_ptr(
4022 (uintptr_t *)&free_cache
[slot
]))
4024 unsigned lazy_free_mask
;
4027 * Clear the cache, since we failed to find a slot. It is
4028 * possible that other threads will continue to insert objects
4029 * into the cache while this one sweeps, but that is okay,
4030 * since on average the cache is still swept with the same
4034 /* Handle pointer at current slot. */
4035 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
4036 pageind
= (((uintptr_t)ptr
- (uintptr_t)chunk
) >>
4038 mapelm
= &chunk
->map
[pageind
];
4039 arena_dalloc_small(arena
, chunk
, ptr
, pageind
, *mapelm
);
4041 /* Sweep remainder of slots. */
4042 lazy_free_mask
= (1U << opt_lazy_free_2pow
) - 1;
4043 for (i
= (slot
+ 1) & lazy_free_mask
;
4045 i
= (i
+ 1) & lazy_free_mask
) {
4046 ptr
= (void *)atomic_readandclear_ptr(
4047 (uintptr_t *)&free_cache
[i
]);
4049 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
4050 pageind
= (((uintptr_t)ptr
- (uintptr_t)chunk
)
4052 mapelm
= &chunk
->map
[pageind
];
4053 arena_dalloc_small(arena
, chunk
, ptr
, pageind
,
4059 malloc_spin_unlock(&arena
->lock
);
4064 arena_dalloc_large(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
)
4066 /* Large allocation. */
4067 malloc_spin_lock(&arena
->lock
);
4069 chunk
= 0; /* this is just to quiet a compiler warning */
4071 #ifndef MALLOC_STATS
4076 extent_node_t
*node
, key
;
4080 node
= RB_FIND(extent_tree_ad_s
,
4081 &arena
->runs_alloced_ad
, &key
);
4082 assert(node
!= NULL
);
4088 memset(ptr
, 0x5a, size
);
4090 /* #ifdef USE_STATS_MEMORY */
4091 /* arena->mi.fordblks += size; */
4094 arena
->stats
.allocated_large
-= size
;
4098 arena
->stats
.ndalloc_large
++;
4101 arena_run_dalloc(arena
, (arena_run_t
*)ptr
, true);
4102 malloc_spin_unlock(&arena
->lock
);
4106 arena_dalloc(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
)
4109 arena_chunk_map_t
*mapelm
;
4111 assert(arena
!= NULL
);
4112 assert(arena
->magic
== ARENA_MAGIC
);
4113 assert(chunk
->arena
== arena
);
4114 assert(ptr
!= NULL
);
4115 assert(CHUNK_ADDR2BASE(ptr
) != ptr
);
4117 pageind
= (((uintptr_t)ptr
- (uintptr_t)chunk
) >> pagesize_2pow
);
4118 mapelm
= &chunk
->map
[pageind
];
4119 if ((*mapelm
& CHUNK_MAP_LARGE
) == 0) {
4120 /* Small allocation. */
4121 #ifdef MALLOC_LAZY_FREE
4122 arena_dalloc_lazy(arena
, chunk
, ptr
, pageind
, mapelm
);
4124 malloc_spin_lock(&arena
->lock
);
4125 arena_dalloc_small(arena
, chunk
, ptr
, pageind
, *mapelm
);
4126 malloc_spin_unlock(&arena
->lock
);
4129 assert((*mapelm
& CHUNK_MAP_POS_MASK
) == 0);
4130 arena_dalloc_large(arena
, chunk
, ptr
);
4137 arena_chunk_t
*chunk
;
4139 assert(ptr
!= NULL
);
4141 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
4143 arena_dalloc(chunk
->arena
, chunk
, ptr
);
4149 arena_ralloc_large_shrink(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
,
4150 size_t size
, size_t oldsize
)
4152 extent_node_t
*node
, key
;
4154 assert(size
< oldsize
);
4157 * Shrink the run, and make trailing pages available for other
4160 key
.addr
= (void *)((uintptr_t)ptr
);
4161 #ifdef MALLOC_BALANCE
4162 arena_lock_balance(arena
);
4164 malloc_spin_lock(&arena
->lock
);
4166 node
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
, &key
);
4167 assert(node
!= NULL
);
4168 arena_run_trim_tail(arena
, chunk
, node
, (arena_run_t
*)ptr
, oldsize
,
4171 arena
->stats
.allocated_large
-= oldsize
- size
;
4173 malloc_spin_unlock(&arena
->lock
);
4177 arena_ralloc_large_grow(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
,
4178 size_t size
, size_t oldsize
)
4180 extent_node_t
*nodeC
, key
;
4182 /* Try to extend the run. */
4183 assert(size
> oldsize
);
4184 key
.addr
= (void *)((uintptr_t)ptr
+ oldsize
);
4185 #ifdef MALLOC_BALANCE
4186 arena_lock_balance(arena
);
4188 malloc_spin_lock(&arena
->lock
);
4190 nodeC
= RB_FIND(extent_tree_ad_s
, &arena
->runs_avail_ad
, &key
);
4191 if (nodeC
!= NULL
&& oldsize
+ nodeC
->size
>= size
) {
4192 extent_node_t
*nodeA
, *nodeB
;
4195 * The next run is available and sufficiently large. Split the
4196 * following run, then merge the first part with the existing
4197 * allocation. This results in a bit more tree manipulation
4198 * than absolutely necessary, but it substantially simplifies
4201 arena_run_split(arena
, (arena_run_t
*)nodeC
->addr
, size
-
4202 oldsize
, false, false);
4205 nodeA
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
,
4207 assert(nodeA
!= NULL
);
4209 key
.addr
= (void *)((uintptr_t)ptr
+ oldsize
);
4210 nodeB
= RB_FIND(extent_tree_ad_s
, &arena
->runs_alloced_ad
,
4212 assert(nodeB
!= NULL
);
4214 nodeA
->size
+= nodeB
->size
;
4216 RB_REMOVE(extent_tree_ad_s
, &arena
->runs_alloced_ad
, nodeB
);
4217 arena_chunk_node_dealloc(chunk
, nodeB
);
4220 arena
->stats
.allocated_large
+= size
- oldsize
;
4222 malloc_spin_unlock(&arena
->lock
);
4225 malloc_spin_unlock(&arena
->lock
);
4231 * Try to resize a large allocation, in order to avoid copying. This will
4232 * always fail if growing an object, and the following run is already in use.
4235 arena_ralloc_large(void *ptr
, size_t size
, size_t oldsize
)
4239 psize
= PAGE_CEILING(size
);
4240 if (psize
== oldsize
) {
4241 /* Same size class. */
4243 if (opt_junk
&& size
< oldsize
) {
4244 memset((void *)((uintptr_t)ptr
+ size
), 0x5a, oldsize
-
4250 arena_chunk_t
*chunk
;
4253 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
4254 arena
= chunk
->arena
;
4255 assert(arena
->magic
== ARENA_MAGIC
);
4257 if (psize
< oldsize
) {
4259 /* Fill before shrinking in order avoid a race. */
4261 memset((void *)((uintptr_t)ptr
+ size
), 0x5a,
4265 arena_ralloc_large_shrink(arena
, chunk
, ptr
, psize
,
4269 bool ret
= arena_ralloc_large_grow(arena
, chunk
, ptr
,
4272 if (ret
== false && opt_zero
) {
4273 memset((void *)((uintptr_t)ptr
+ oldsize
), 0,
4283 arena_ralloc(void *ptr
, size_t size
, size_t oldsize
)
4288 /* Try to avoid moving the allocation. */
4289 if (size
< small_min
) {
4290 if (oldsize
< small_min
&&
4291 ffs((int)(pow2_ceil(size
) >> (TINY_MIN_2POW
+ 1)))
4292 == ffs((int)(pow2_ceil(oldsize
) >> (TINY_MIN_2POW
+ 1))))
4293 goto IN_PLACE
; /* Same size class. */
4294 } else if (size
<= small_max
) {
4295 if (oldsize
>= small_min
&& oldsize
<= small_max
&&
4296 (QUANTUM_CEILING(size
) >> opt_quantum_2pow
)
4297 == (QUANTUM_CEILING(oldsize
) >> opt_quantum_2pow
))
4298 goto IN_PLACE
; /* Same size class. */
4299 } else if (size
<= bin_maxclass
) {
4300 if (oldsize
> small_max
&& oldsize
<= bin_maxclass
&&
4301 pow2_ceil(size
) == pow2_ceil(oldsize
))
4302 goto IN_PLACE
; /* Same size class. */
4303 } else if (oldsize
> bin_maxclass
&& oldsize
<= arena_maxclass
) {
4304 assert(size
> bin_maxclass
);
4305 if (arena_ralloc_large(ptr
, size
, oldsize
) == false)
4310 * If we get here, then size and oldsize are different enough that we
4311 * need to move the object. In that case, fall back to allocating new
4312 * space and copying.
4314 ret
= arena_malloc(choose_arena(), size
, false);
4318 /* Junk/zero-filling were already done by arena_malloc(). */
4319 copysize
= (size
< oldsize
) ? size
: oldsize
;
4321 if (copysize
>= VM_COPY_MIN
)
4322 pages_copy(ret
, ptr
, copysize
);
4325 memcpy(ret
, ptr
, copysize
);
4330 if (opt_junk
&& size
< oldsize
)
4331 memset((void *)((uintptr_t)ptr
+ size
), 0x5a, oldsize
- size
);
4332 else if (opt_zero
&& size
> oldsize
)
4333 memset((void *)((uintptr_t)ptr
+ oldsize
), 0, size
- oldsize
);
4338 static inline void *
4339 iralloc(void *ptr
, size_t size
)
4343 assert(ptr
!= NULL
);
4346 oldsize
= isalloc(ptr
);
4348 if (size
<= arena_maxclass
)
4349 return (arena_ralloc(ptr
, size
, oldsize
));
4351 return (huge_ralloc(ptr
, size
, oldsize
));
4355 arena_new(arena_t
*arena
)
4359 size_t pow2_size
, prev_run_size
;
4361 if (malloc_spin_init(&arena
->lock
))
4365 memset(&arena
->stats
, 0, sizeof(arena_stats_t
));
4368 /* Initialize chunks. */
4369 RB_INIT(&arena
->chunks
);
4370 arena
->spare
= NULL
;
4374 RB_INIT(&arena
->runs_avail_szad
);
4375 RB_INIT(&arena
->runs_avail_ad
);
4376 RB_INIT(&arena
->runs_alloced_ad
);
4378 #ifdef MALLOC_BALANCE
4379 arena
->contention
= 0;
4381 #ifdef MALLOC_LAZY_FREE
4382 if (opt_lazy_free_2pow
>= 0) {
4383 arena
->free_cache
= (void **) base_calloc(1, sizeof(void *)
4384 * (1U << opt_lazy_free_2pow
));
4385 if (arena
->free_cache
== NULL
)
4388 arena
->free_cache
= NULL
;
4391 /* Initialize bins. */
4392 prev_run_size
= pagesize
;
4394 /* (2^n)-spaced tiny bins. */
4395 for (i
= 0; i
< ntbins
; i
++) {
4396 bin
= &arena
->bins
[i
];
4398 RB_INIT(&bin
->runs
);
4400 bin
->reg_size
= (1U << (TINY_MIN_2POW
+ i
));
4402 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
4405 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
4409 /* Quantum-spaced bins. */
4410 for (; i
< ntbins
+ nqbins
; i
++) {
4411 bin
= &arena
->bins
[i
];
4413 RB_INIT(&bin
->runs
);
4415 bin
->reg_size
= quantum
* (i
- ntbins
+ 1);
4417 pow2_size
= pow2_ceil(quantum
* (i
- ntbins
+ 1));
4418 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
4421 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
4425 /* (2^n)-spaced sub-page bins. */
4426 for (; i
< ntbins
+ nqbins
+ nsbins
; i
++) {
4427 bin
= &arena
->bins
[i
];
4429 RB_INIT(&bin
->runs
);
4431 bin
->reg_size
= (small_max
<< (i
- (ntbins
+ nqbins
) + 1));
4433 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
4436 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
4441 arena
->magic
= ARENA_MAGIC
;
4447 /* Create a new arena and insert it into the arenas array at index ind. */
4449 arenas_extend(unsigned ind
)
4453 /* Allocate enough space for trailing bins. */
4454 ret
= (arena_t
*)base_alloc(sizeof(arena_t
)
4455 + (sizeof(arena_bin_t
) * (ntbins
+ nqbins
+ nsbins
- 1)));
4456 if (ret
!= NULL
&& arena_new(ret
) == false) {
4460 /* Only reached if there is an OOM error. */
4463 * OOM here is quite inconvenient to propagate, since dealing with it
4464 * would require a check for failure in the fast path. Instead, punt
4465 * by using arenas[0]. In practice, this is an extremely unlikely
4468 _malloc_message(_getprogname(),
4469 ": (malloc) Error initializing arena\n", "", "");
4479 /******************************************************************************/
4481 * Begin general internal functions.
4485 huge_malloc(size_t size
, bool zero
)
4489 extent_node_t
*node
;
4491 /* Allocate one or more contiguous chunks for this request. */
4493 csize
= CHUNK_CEILING(size
);
4495 /* size is large enough to cause size_t wrap-around. */
4499 /* Allocate an extent node with which to track the chunk. */
4500 node
= base_node_alloc();
4504 ret
= chunk_alloc(csize
, zero
);
4506 base_node_dealloc(node
);
4510 /* Insert node into huge. */
4514 malloc_mutex_lock(&huge_mtx
);
4515 RB_INSERT(extent_tree_ad_s
, &huge
, node
);
4518 huge_allocated
+= csize
;
4520 malloc_mutex_unlock(&huge_mtx
);
4523 if (zero
== false) {
4525 memset(ret
, 0xa5, csize
);
4527 memset(ret
, 0, csize
);
4534 /* Only handles large allocations that require more than chunk alignment. */
4536 huge_palloc(size_t alignment
, size_t size
)
4539 size_t alloc_size
, chunk_size
, offset
;
4540 extent_node_t
*node
;
4543 * This allocation requires alignment that is even larger than chunk
4544 * alignment. This means that huge_malloc() isn't good enough.
4546 * Allocate almost twice as many chunks as are demanded by the size or
4547 * alignment, in order to assure the alignment can be achieved, then
4548 * unmap leading and trailing chunks.
4550 assert(alignment
>= chunksize
);
4552 chunk_size
= CHUNK_CEILING(size
);
4554 if (size
>= alignment
)
4555 alloc_size
= chunk_size
+ alignment
- chunksize
;
4557 alloc_size
= (alignment
<< 1) - chunksize
;
4559 /* Allocate an extent node with which to track the chunk. */
4560 node
= base_node_alloc();
4564 ret
= chunk_alloc(alloc_size
, false);
4566 base_node_dealloc(node
);
4570 offset
= (uintptr_t)ret
& (alignment
- 1);
4571 assert((offset
& chunksize_mask
) == 0);
4572 assert(offset
< alloc_size
);
4574 /* Trim trailing space. */
4575 chunk_dealloc((void *)((uintptr_t)ret
+ chunk_size
), alloc_size
4580 /* Trim leading space. */
4581 chunk_dealloc(ret
, alignment
- offset
);
4583 ret
= (void *)((uintptr_t)ret
+ (alignment
- offset
));
4585 trailsize
= alloc_size
- (alignment
- offset
) - chunk_size
;
4586 if (trailsize
!= 0) {
4587 /* Trim trailing space. */
4588 assert(trailsize
< alloc_size
);
4589 chunk_dealloc((void *)((uintptr_t)ret
+ chunk_size
),
4594 /* Insert node into huge. */
4596 node
->size
= chunk_size
;
4598 malloc_mutex_lock(&huge_mtx
);
4599 RB_INSERT(extent_tree_ad_s
, &huge
, node
);
4602 huge_allocated
+= chunk_size
;
4604 malloc_mutex_unlock(&huge_mtx
);
4608 memset(ret
, 0xa5, chunk_size
);
4610 memset(ret
, 0, chunk_size
);
4617 huge_ralloc(void *ptr
, size_t size
, size_t oldsize
)
4622 /* Avoid moving the allocation if the size class would not change. */
4623 if (oldsize
> arena_maxclass
&&
4624 CHUNK_CEILING(size
) == CHUNK_CEILING(oldsize
)) {
4626 if (opt_junk
&& size
< oldsize
) {
4627 memset((void *)((uintptr_t)ptr
+ size
), 0x5a, oldsize
4629 } else if (opt_zero
&& size
> oldsize
) {
4630 memset((void *)((uintptr_t)ptr
+ oldsize
), 0, size
4638 * If we get here, then size and oldsize are different enough that we
4639 * need to use a different size class. In that case, fall back to
4640 * allocating new space and copying.
4642 ret
= huge_malloc(size
, false);
4646 copysize
= (size
< oldsize
) ? size
: oldsize
;
4648 if (copysize
>= VM_COPY_MIN
)
4649 pages_copy(ret
, ptr
, copysize
);
4652 memcpy(ret
, ptr
, copysize
);
4658 huge_dalloc(void *ptr
)
4660 extent_node_t
*node
, key
;
4662 malloc_mutex_lock(&huge_mtx
);
4664 /* Extract from tree of huge allocations. */
4666 node
= RB_FIND(extent_tree_ad_s
, &huge
, &key
);
4667 assert(node
!= NULL
);
4668 assert(node
->addr
== ptr
);
4669 RB_REMOVE(extent_tree_ad_s
, &huge
, node
);
4673 huge_allocated
-= node
->size
;
4676 malloc_mutex_unlock(&huge_mtx
);
4681 if (opt_dss
&& opt_junk
)
4682 memset(node
->addr
, 0x5a, node
->size
);
4685 chunk_dealloc(node
->addr
, node
->size
);
4687 base_node_dealloc(node
);
4691 static inline unsigned
4701 if (sysctl(mib
, 2, &ret
, &len
, (void *) 0, 0) == -1) {
4708 #elif (defined(LINUX_HOST))
4711 static inline unsigned
4715 int fd
, nread
, column
;
4717 static const char matchstr
[] = "processor\t:";
4720 * sysconf(3) would be the preferred method for determining the number
4721 * of CPUs, but it uses malloc internally, which causes untennable
4722 * recursion during malloc initialization.
4724 fd
= open("/proc/cpuinfo", O_RDONLY
);
4726 return (1); /* Error. */
4728 * Count the number of occurrences of matchstr at the beginnings of
4729 * lines. This treats hyperthreaded CPUs as multiple processors.
4734 nread
= read(fd
, &buf
, sizeof(buf
));
4736 break; /* EOF or error. */
4740 else if (column
!= -1) {
4741 if (buf
[0] == matchstr
[column
]) {
4743 if (column
== sizeof(matchstr
) - 1) {
4752 ret
= 1; /* Something went wrong in the parser. */
4757 #elif (defined(DARWIN))
4758 #include <mach/mach_init.h>
4759 #include <mach/mach_host.h>
4761 static inline unsigned
4764 kern_return_t error
;
4766 processor_info_array_t pinfo
;
4767 mach_msg_type_number_t pinfocnt
;
4769 error
= host_processor_info(mach_host_self(), PROCESSOR_BASIC_INFO
,
4770 &n
, &pinfo
, &pinfocnt
);
4771 if (error
!= KERN_SUCCESS
)
4772 return (1); /* Error. */
4776 #elif (defined(HAVE_KSTAT))
4779 static inline unsigned
4785 kstat_named_t
*named
;
4788 if ((ctl
= kstat_open()) == NULL
)
4789 return (1); /* Error. */
4791 if ((kstat
= kstat_lookup(ctl
, "unix", -1, "system_misc")) == NULL
)
4792 return (1); /* Error. */
4794 if (kstat_read(ctl
, kstat
, NULL
) == -1)
4795 return (1); /* Error. */
4797 named
= KSTAT_NAMED_PTR(kstat
);
4799 for (i
= 0; i
< kstat
->ks_ndata
; i
++) {
4800 if (strcmp(named
[i
].name
, "ncpus") == 0) {
4801 /* Figure out which one of these to actually use. */
4802 switch(named
[i
].data_type
) {
4803 case KSTAT_DATA_INT32
:
4804 ret
= named
[i
].value
.i32
;
4806 case KSTAT_DATA_UINT32
:
4807 ret
= named
[i
].value
.ui32
;
4809 case KSTAT_DATA_INT64
:
4810 ret
= named
[i
].value
.i64
;
4812 case KSTAT_DATA_UINT64
:
4813 ret
= named
[i
].value
.ui64
;
4816 return (1); /* Error. */
4821 kstat_close(ctl
); /* Don't bother checking for an error. */
4826 static inline unsigned
4831 * We lack a way to determine the number of CPUs on this platform, so
4839 malloc_print_stats(void)
4842 if (opt_print_stats
) {
4843 char s
[UMAX2S_BUFSIZE
];
4844 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4846 _malloc_message("Assertions ",
4853 _malloc_message("Boolean MALLOC_OPTIONS: ",
4854 opt_abort
? "A" : "a", "", "");
4856 _malloc_message(opt_dss
? "D" : "d", "", "", "");
4859 _malloc_message(opt_junk
? "J" : "j", "", "", "");
4862 _malloc_message(opt_mmap
? "M" : "m", "", "", "");
4864 _malloc_message("P", "", "", "");
4865 #ifdef MALLOC_UTRACE
4866 _malloc_message(opt_utrace
? "U" : "u", "", "", "");
4869 _malloc_message(opt_sysv
? "V" : "v", "", "", "");
4871 #ifdef MALLOC_XMALLOC
4872 _malloc_message(opt_xmalloc
? "X" : "x", "", "", "");
4875 _malloc_message(opt_zero
? "Z" : "z", "", "", "");
4877 _malloc_message("\n", "", "", "");
4879 _malloc_message("CPUs: ", umax2s(ncpus
, s
), "\n", "");
4880 _malloc_message("Max arenas: ", umax2s(narenas
, s
), "\n", "");
4881 #ifdef MALLOC_LAZY_FREE
4882 if (opt_lazy_free_2pow
>= 0) {
4883 _malloc_message("Lazy free slots: ",
4884 umax2s(1U << opt_lazy_free_2pow
, s
), "\n", "");
4886 _malloc_message("Lazy free slots: 0\n", "", "", "");
4888 #ifdef MALLOC_BALANCE
4889 _malloc_message("Arena balance threshold: ",
4890 umax2s(opt_balance_threshold
, s
), "\n", "");
4892 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s
),
4894 _malloc_message("Quantum size: ", umax2s(quantum
, s
), "\n", "");
4895 _malloc_message("Max small size: ", umax2s(small_max
, s
), "\n",
4897 _malloc_message("Max dirty pages per arena: ",
4898 umax2s(opt_dirty_max
, s
), "\n", "");
4900 _malloc_message("Chunk size: ", umax2s(chunksize
, s
), "", "");
4901 _malloc_message(" (2^", umax2s(opt_chunk_2pow
, s
), ")\n", "");
4905 size_t allocated
, mapped
;
4906 #ifdef MALLOC_BALANCE
4907 uint64_t nbalance
= 0;
4912 /* Calculate and print allocated/mapped stats. */
4915 for (i
= 0, allocated
= 0; i
< narenas
; i
++) {
4916 if (arenas
[i
] != NULL
) {
4917 malloc_spin_lock(&arenas
[i
]->lock
);
4919 arenas
[i
]->stats
.allocated_small
;
4921 arenas
[i
]->stats
.allocated_large
;
4922 #ifdef MALLOC_BALANCE
4923 nbalance
+= arenas
[i
]->stats
.nbalance
;
4925 malloc_spin_unlock(&arenas
[i
]->lock
);
4930 malloc_mutex_lock(&huge_mtx
);
4931 allocated
+= huge_allocated
;
4932 mapped
= stats_chunks
.curchunks
* chunksize
;
4933 malloc_mutex_unlock(&huge_mtx
);
4935 malloc_mutex_lock(&base_mtx
);
4936 mapped
+= base_mapped
;
4937 malloc_mutex_unlock(&base_mtx
);
4940 malloc_printf("Allocated: %lu, mapped: %lu\n",
4943 malloc_printf("Allocated: %zu, mapped: %zu\n",
4947 #ifdef MALLOC_BALANCE
4948 malloc_printf("Arena balance reassignments: %llu\n",
4952 /* Print chunk stats. */
4954 chunk_stats_t chunks_stats
;
4956 malloc_mutex_lock(&huge_mtx
);
4957 chunks_stats
= stats_chunks
;
4958 malloc_mutex_unlock(&huge_mtx
);
4960 malloc_printf("chunks: nchunks "
4961 "highchunks curchunks\n");
4962 malloc_printf(" %13llu%13lu%13lu\n",
4963 chunks_stats
.nchunks
,
4964 chunks_stats
.highchunks
,
4965 chunks_stats
.curchunks
);
4968 /* Print chunk stats. */
4970 "huge: nmalloc ndalloc allocated\n");
4972 malloc_printf(" %12llu %12llu %12lu\n",
4973 huge_nmalloc
, huge_ndalloc
, huge_allocated
);
4975 malloc_printf(" %12llu %12llu %12zu\n",
4976 huge_nmalloc
, huge_ndalloc
, huge_allocated
);
4978 /* Print stats for each arena. */
4979 for (i
= 0; i
< narenas
; i
++) {
4981 if (arena
!= NULL
) {
4983 "\narenas[%u]:\n", i
);
4984 malloc_spin_lock(&arena
->lock
);
4986 malloc_spin_unlock(&arena
->lock
);
4990 #endif /* #ifdef MALLOC_STATS */
4991 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4996 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4997 * implementation has to take pains to avoid infinite recursion during
5000 #if (defined(WIN32) || defined(DARWIN))
5001 #define malloc_init() false
5006 if (malloc_initialized
== false)
5007 return (malloc_init_hard());
5017 malloc_init_hard(void)
5020 char buf
[PATH_MAX
+ 1];
5028 malloc_mutex_lock(&init_lock
);
5031 if (malloc_initialized
) {
5033 * Another thread initialized the allocator before this one
5034 * acquired init_lock.
5037 malloc_mutex_unlock(&init_lock
);
5043 /* get a thread local storage index */
5044 tlsIndex
= TlsAlloc();
5047 /* Get page size and number of CPUs */
5052 GetSystemInfo(&info
);
5053 result
= info
.dwPageSize
;
5055 pagesize
= (unsigned) result
;
5057 ncpus
= info
.dwNumberOfProcessors
;
5060 ncpus
= malloc_ncpus();
5062 result
= sysconf(_SC_PAGESIZE
);
5063 assert(result
!= -1);
5065 pagesize
= (unsigned) result
;
5069 * We assume that pagesize is a power of 2 when calculating
5070 * pagesize_mask and pagesize_2pow.
5072 assert(((result
- 1) & result
) == 0);
5073 pagesize_mask
= result
- 1;
5074 pagesize_2pow
= ffs((int)result
) - 1;
5076 #ifdef MALLOC_LAZY_FREE
5078 opt_lazy_free_2pow
= -1;
5081 for (i
= 0; i
< 3; i
++) {
5084 /* Get runtime configuration. */
5088 if ((linklen
= readlink("/etc/malloc.conf", buf
,
5089 sizeof(buf
) - 1)) != -1) {
5091 * Use the contents of the "/etc/malloc.conf"
5092 * symbolic link's name.
5094 buf
[linklen
] = '\0';
5099 /* No configuration specified. */
5105 /* if (issetugid() == 0 && (opts = */
5106 /* getenv("MALLOC_OPTIONS")) != NULL) { */
5108 /* * Do nothing; opts is already initialized to */
5109 /* * the value of the MALLOC_OPTIONS environment */
5113 /* No configuration specified. */
5119 if (_malloc_options
!= NULL
) {
5121 * Use options that were compiled into the
5124 opts
= _malloc_options
;
5126 /* No configuration specified. */
5138 for (j
= 0; opts
[j
] != '\0'; j
++) {
5142 /* Parse repetition count, if any. */
5143 for (nreps
= 0, nseen
= false;; j
++, nseen
= true) {
5145 case '0': case '1': case '2': case '3':
5146 case '4': case '5': case '6': case '7':
5149 nreps
+= opts
[j
] - '0';
5159 for (k
= 0; k
< nreps
; k
++) {
5168 #ifdef MALLOC_BALANCE
5169 opt_balance_threshold
>>= 1;
5173 #ifdef MALLOC_BALANCE
5174 if (opt_balance_threshold
== 0)
5175 opt_balance_threshold
= 1;
5176 else if ((opt_balance_threshold
<< 1)
5177 > opt_balance_threshold
)
5178 opt_balance_threshold
<<= 1;
5192 opt_dirty_max
>>= 1;
5195 if (opt_dirty_max
== 0)
5197 else if ((opt_dirty_max
<< 1) != 0)
5198 opt_dirty_max
<<= 1;
5210 * Chunks always require at least one
5211 * header page, so chunks can never be
5212 * smaller than two pages.
5214 if (opt_chunk_2pow
> pagesize_2pow
+ 1)
5218 if (opt_chunk_2pow
+ 1 <
5219 (sizeof(size_t) << 3))
5223 #ifdef MALLOC_LAZY_FREE
5224 if (opt_lazy_free_2pow
>= 0)
5225 opt_lazy_free_2pow
--;
5229 #ifdef MALLOC_LAZY_FREE
5231 opt_lazy_free_2pow
++;
5245 opt_narenas_lshift
--;
5248 opt_narenas_lshift
++;
5251 opt_print_stats
= false;
5254 opt_print_stats
= true;
5257 if (opt_quantum_2pow
> QUANTUM_2POW_MIN
)
5261 if (opt_quantum_2pow
< pagesize_2pow
-
5266 if (opt_small_max_2pow
>
5268 opt_small_max_2pow
--;
5271 if (opt_small_max_2pow
< pagesize_2pow
5273 opt_small_max_2pow
++;
5275 #ifdef MALLOC_UTRACE
5291 #ifdef MALLOC_XMALLOC
5293 opt_xmalloc
= false;
5312 _malloc_message(_getprogname(),
5313 ": (malloc) Unsupported character "
5314 "in malloc options: '", cbuf
,
5323 /* Make sure that there is some method for acquiring memory. */
5324 if (opt_dss
== false && opt_mmap
== false)
5328 /* Take care to call atexit() only once. */
5329 if (opt_print_stats
) {
5331 /* Print statistics at exit. */
5332 atexit(malloc_print_stats
);
5336 /* Set variables according to the value of opt_small_max_2pow. */
5337 if (opt_small_max_2pow
< opt_quantum_2pow
)
5338 opt_small_max_2pow
= opt_quantum_2pow
;
5339 small_max
= (1U << opt_small_max_2pow
);
5341 /* Set bin-related variables. */
5342 bin_maxclass
= (pagesize
>> 1);
5343 assert(opt_quantum_2pow
>= TINY_MIN_2POW
);
5344 ntbins
= opt_quantum_2pow
- TINY_MIN_2POW
;
5345 assert(ntbins
<= opt_quantum_2pow
);
5346 nqbins
= (small_max
>> opt_quantum_2pow
);
5347 nsbins
= pagesize_2pow
- opt_small_max_2pow
- 1;
5349 /* Set variables according to the value of opt_quantum_2pow. */
5350 quantum
= (1U << opt_quantum_2pow
);
5351 quantum_mask
= quantum
- 1;
5353 small_min
= (quantum
>> 1) + 1;
5356 assert(small_min
<= quantum
);
5358 /* Set variables according to the value of opt_chunk_2pow. */
5359 chunksize
= (1LU << opt_chunk_2pow
);
5360 chunksize_mask
= chunksize
- 1;
5361 chunk_npages
= (chunksize
>> pagesize_2pow
);
5366 * Compute the header size such that it is large
5367 * enough to contain the page map and enough nodes for the
5368 * worst case: one node per non-header page plus one extra for
5369 * situations where we briefly have one more node allocated
5370 * than we will need.
5372 header_size
= sizeof(arena_chunk_t
) +
5373 (sizeof(arena_chunk_map_t
) * (chunk_npages
- 1)) +
5374 (sizeof(extent_node_t
) * chunk_npages
);
5375 arena_chunk_header_npages
= (header_size
>> pagesize_2pow
) +
5376 ((header_size
& pagesize_mask
) != 0);
5378 arena_maxclass
= chunksize
- (arena_chunk_header_npages
<<
5380 #ifdef MALLOC_LAZY_FREE
5382 * Make sure that allocating the free_cache does not exceed the limits
5383 * of what base_alloc() can handle.
5385 while ((sizeof(void *) << opt_lazy_free_2pow
) > chunksize
)
5386 opt_lazy_free_2pow
--;
5392 memset(&stats_chunks
, 0, sizeof(chunk_stats_t
));
5395 /* Various sanity checks that regard configuration. */
5396 assert(quantum
>= sizeof(void *));
5397 assert(quantum
<= pagesize
);
5398 assert(chunksize
>= pagesize
);
5399 assert(quantum
* 4 <= chunksize
);
5401 /* Initialize chunks data. */
5402 malloc_mutex_init(&huge_mtx
);
5405 malloc_mutex_init(&dss_mtx
);
5407 dss_prev
= dss_base
;
5409 RB_INIT(&dss_chunks_szad
);
5410 RB_INIT(&dss_chunks_ad
);
5418 /* Initialize base allocation data structures. */
5424 * Allocate a base chunk here, since it doesn't actually have to be
5425 * chunk-aligned. Doing this before allocating any other chunks allows
5426 * the use of space that would otherwise be wasted.
5429 base_pages_alloc(0);
5432 malloc_mutex_init(&base_mtx
);
5436 * For SMP systems, create four times as many arenas as there
5437 * are CPUs by default.
5439 opt_narenas_lshift
+= 2;
5442 /* Determine how many arenas to use. */
5444 if (opt_narenas_lshift
> 0) {
5445 if ((narenas
<< opt_narenas_lshift
) > narenas
)
5446 narenas
<<= opt_narenas_lshift
;
5448 * Make sure not to exceed the limits of what base_alloc() can
5451 if (narenas
* sizeof(arena_t
*) > chunksize
)
5452 narenas
= chunksize
/ sizeof(arena_t
*);
5453 } else if (opt_narenas_lshift
< 0) {
5454 if ((narenas
>> -opt_narenas_lshift
) < narenas
)
5455 narenas
>>= -opt_narenas_lshift
;
5456 /* Make sure there is at least one arena. */
5460 #ifdef MALLOC_BALANCE
5461 assert(narenas
!= 0);
5462 for (narenas_2pow
= 0;
5463 (narenas
>> (narenas_2pow
+ 1)) != 0;
5469 static const unsigned primes
[] = {1, 3, 5, 7, 11, 13, 17, 19,
5470 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5471 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5472 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5473 223, 227, 229, 233, 239, 241, 251, 257, 263};
5474 unsigned nprimes
, parenas
;
5477 * Pick a prime number of hash arenas that is more than narenas
5478 * so that direct hashing of pthread_self() pointers tends to
5479 * spread allocations evenly among the arenas.
5481 assert((narenas
& 1) == 0); /* narenas must be even. */
5482 nprimes
= (sizeof(primes
) >> SIZEOF_INT_2POW
);
5483 parenas
= primes
[nprimes
- 1]; /* In case not enough primes. */
5484 for (i
= 1; i
< nprimes
; i
++) {
5485 if (primes
[i
] > narenas
) {
5486 parenas
= primes
[i
];
5495 # ifndef MALLOC_BALANCE
5500 /* Allocate and initialize arenas. */
5501 arenas
= (arena_t
**)base_alloc(sizeof(arena_t
*) * narenas
);
5502 if (arenas
== NULL
) {
5504 malloc_mutex_unlock(&init_lock
);
5509 * Zero the array. In practice, this should always be pre-zeroed,
5510 * since it was just mmap()ed, but let's be sure.
5512 memset(arenas
, 0, sizeof(arena_t
*) * narenas
);
5515 * Initialize one arena here. The rest are lazily created in
5516 * choose_arena_hard().
5519 if (arenas
[0] == NULL
) {
5521 malloc_mutex_unlock(&init_lock
);
5527 * Assign the initial arena to the initial thread, in order to avoid
5528 * spurious creation of an extra arena if the application switches to
5532 TlsSetValue(tlsIndex
, arenas
[0]);
5534 arenas_map
= arenas
[0];
5539 * Seed here for the initial thread, since choose_arena_hard() is only
5540 * called for other threads. The seed values don't really matter.
5542 #ifdef MALLOC_LAZY_FREE
5543 SPRN(lazy_free
, 42);
5545 #ifdef MALLOC_BALANCE
5549 malloc_spin_init(&arenas_lock
);
5551 malloc_initialized
= true;
5553 malloc_mutex_unlock(&init_lock
);
5558 /* XXX Why not just expose malloc_print_stats()? */
5564 malloc_print_stats();
5569 * End general internal functions.
5571 /******************************************************************************/
5573 * Begin malloc(3)-compatible functions.
5579 moz_malloc(size_t size
)
5587 if (malloc_init()) {
5594 if (opt_sysv
== false)
5605 ret
= imalloc(size
);
5609 #ifdef MALLOC_XMALLOC
5611 _malloc_message(_getprogname(),
5612 ": (malloc) Error in malloc(): out of memory\n", "",
5620 UTRACE(0, size
, ret
);
5627 moz_posix_memalign(void **memptr
, size_t alignment
, size_t size
)
5630 posix_memalign(void **memptr
, size_t alignment
, size_t size
)
5639 /* Make sure that alignment is a large enough power of 2. */
5640 if (((alignment
- 1) & alignment
) != 0
5641 || alignment
< sizeof(void *)) {
5642 #ifdef MALLOC_XMALLOC
5644 _malloc_message(_getprogname(),
5645 ": (malloc) Error in posix_memalign(): "
5646 "invalid alignment\n", "", "");
5655 result
= ipalloc(alignment
, size
);
5658 if (result
== NULL
) {
5659 #ifdef MALLOC_XMALLOC
5661 _malloc_message(_getprogname(),
5662 ": (malloc) Error in posix_memalign(): out of memory\n",
5675 UTRACE(0, size
, result
);
5682 moz_memalign(size_t alignment
, size_t size
)
5685 memalign(size_t alignment
, size_t size
)
5691 if (moz_posix_memalign(&ret
, alignment
, size
) != 0)
5693 if (posix_memalign(&ret
, alignment
, size
) != 0)
5703 moz_valloc(size_t size
)
5710 return (moz_memalign(pagesize
, size
));
5712 return (memalign(pagesize
, size
));
5719 moz_calloc(size_t num
, size_t size
)
5722 calloc(size_t num
, size_t size
)
5728 if (malloc_init()) {
5734 num_size
= num
* size
;
5735 if (num_size
== 0) {
5737 if ((opt_sysv
== false) && ((num
== 0) || (size
== 0)))
5747 * Try to avoid division here. We know that it isn't possible to
5748 * overflow during multiplication if neither operand uses any of the
5749 * most significant half of the bits in a size_t.
5751 } else if (((num
| size
) & (SIZE_T_MAX
<< (sizeof(size_t) << 2)))
5752 && (num_size
/ size
!= num
)) {
5753 /* size_t overflow. */
5758 ret
= icalloc(num_size
);
5762 #ifdef MALLOC_XMALLOC
5764 _malloc_message(_getprogname(),
5765 ": (malloc) Error in calloc(): out of memory\n", "",
5773 UTRACE(0, num_size
, ret
);
5780 moz_realloc(void *ptr
, size_t size
)
5783 realloc(void *ptr
, size_t size
)
5790 if (opt_sysv
== false)
5804 assert(malloc_initialized
);
5806 ret
= iralloc(ptr
, size
);
5809 #ifdef MALLOC_XMALLOC
5811 _malloc_message(_getprogname(),
5812 ": (malloc) Error in realloc(): out of "
5813 "memory\n", "", "");
5823 ret
= imalloc(size
);
5826 #ifdef MALLOC_XMALLOC
5828 _malloc_message(_getprogname(),
5829 ": (malloc) Error in realloc(): out of "
5830 "memory\n", "", "");
5841 UTRACE(ptr
, size
, ret
);
5857 assert(malloc_initialized
);
5864 * This is a work in progress, which doesn't even get used.
5866 #ifdef USE_STATS_MEMORY
5867 /* Utility to update mallinfo for malloc_stats() and mallinfo() */
5870 malloc_update_mallinfo(arena_t
*arena
, struct mallinfo
*mi
)
5874 avail
= chunksize
* narenas
;
5877 malloc_printf("allocated: %u %u %u\n",
5878 arena
->stats
.allocated_small
+ arena
->stats
.allocated_large
,
5879 arena
->stats
.nmalloc_small
+ arena
->stats
.nmalloc_large
,
5880 arena
->stats
.ndalloc_small
+ arena
->stats
.ndalloc_large
);
5882 malloc_printf("available: %u\n", avail
);
5885 /* stats_print(arena); */
5886 malloc_spin_lock(&arena
->lock
);
5888 /* clear unused fields */
5889 mi
->arena
= mi
->ordblks
= mi
->smblks
= mi
->usmblks
= mi
->fsmblks
= 0;
5890 mi
->uordblks
= arena
->stats
.allocated_small
+ arena
->stats
.allocated_large
;
5891 mi
->fordblks
= arena
->stats
.mapped
- mi
->uordblks
;
5893 mi
->hblks
= mi
->hblkhd
= 0;
5894 mi
->keepcost
= arena
->stats
.mapped
;
5895 malloc_spin_unlock(&arena
->lock
);
5905 /* Calculate and print allocated/mapped stats. */
5906 malloc_update_mallinfo(choose_arena(), &mi
);
5913 * End malloc(3)-compatible functions.
5915 /******************************************************************************/
5917 * Begin non-standard functions.
5923 moz_malloc_usable_size(const void *ptr
)
5926 malloc_usable_size(const void *ptr
)
5930 assert(ptr
!= NULL
);
5932 return (isalloc(ptr
));
5937 _recalloc(void *ptr
, size_t count
, size_t size
)
5939 size_t oldsize
= (ptr
!= NULL
) ? isalloc(ptr
) : 0;
5940 size_t newsize
= count
* size
;
5943 * In order for all trailing bytes to be zeroed, the caller needs to
5944 * use calloc(), followed by recalloc(). However, the current calloc()
5945 * implementation only zeros the bytes requested, so if recalloc() is
5946 * to work 100% correctly, calloc() will need to change to zero
5950 ptr
= realloc(ptr
, newsize
);
5951 if (ptr
!= NULL
&& oldsize
< newsize
) {
5952 memset((void *)((uintptr_t)ptr
+ oldsize
), 0, newsize
-
5960 * This impl of _expand doesn't ever actually expand or shrink blocks: it
5961 * simply replies that you may continue using a shrunk block.
5964 _expand(void *ptr
, size_t newsize
)
5966 if (isalloc(ptr
) >= newsize
)
5973 _msize(const void *ptr
)
5976 return malloc_usable_size(ptr
);
5982 * End non-standard functions.
5984 /******************************************************************************/
5986 * Begin library-private functions, used by threading libraries for protection
5987 * of malloc during fork(). These functions are only called if the program is
5988 * running in threaded mode, so there is no need to check whether the program
5993 _malloc_prefork(void)
5997 /* Acquire all mutexes in a safe order. */
5999 malloc_spin_lock(&arenas_lock
);
6000 for (i
= 0; i
< narenas
; i
++) {
6001 if (arenas
[i
] != NULL
)
6002 malloc_spin_lock(&arenas
[i
]->lock
);
6004 malloc_spin_unlock(&arenas_lock
);
6006 malloc_mutex_lock(&base_mtx
);
6008 malloc_mutex_lock(&huge_mtx
);
6011 malloc_mutex_lock(&dss_mtx
);
6016 _malloc_postfork(void)
6020 /* Release all mutexes, now that fork() has completed. */
6023 malloc_mutex_unlock(&dss_mtx
);
6026 malloc_mutex_unlock(&huge_mtx
);
6028 malloc_mutex_unlock(&base_mtx
);
6030 malloc_spin_lock(&arenas_lock
);
6031 for (i
= 0; i
< narenas
; i
++) {
6032 if (arenas
[i
] != NULL
)
6033 malloc_spin_unlock(&arenas
[i
]->lock
);
6035 malloc_spin_unlock(&arenas_lock
);
6039 * End library-private functions.
6041 /******************************************************************************/
6045 static malloc_zone_t zone
;
6046 static struct malloc_introspection_t zone_introspect
;
6049 zone_size(malloc_zone_t
*zone
, void *ptr
)
6052 arena_chunk_t
*chunk
;
6055 * There appear to be places within Darwin (such as setenv(3)) that
6056 * cause calls to this function with pointers that *no* zone owns. If
6057 * we knew that all pointers were owned by *some* zone, we could split
6058 * our zone into two parts, and use one as the default allocator and
6059 * the other as the default deallocator/reallocator. Since that will
6060 * not work in practice, we must check all pointers to assure that they
6061 * reside within a mapped chunk before determining size.
6064 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
6068 arena_t
*arenas_snapshot
[narenas
];
6071 * Make a copy of the arenas vector while holding arenas_lock in
6072 * order to assure that all elements are up to date in this
6073 * processor's cache. Do this outside the following loop in
6074 * order to reduce lock acquisitions.
6076 malloc_spin_lock(&arenas_lock
);
6077 memcpy(&arenas_snapshot
, arenas
, sizeof(arena_t
*) * narenas
);
6078 malloc_spin_unlock(&arenas_lock
);
6081 for (i
= 0; i
< narenas
; i
++) {
6082 arena
= arenas_snapshot
[i
];
6084 if (arena
!= NULL
) {
6087 /* Make sure ptr is within a chunk. */
6088 malloc_spin_lock(&arena
->lock
);
6089 if (RB_FIND(arena_chunk_tree_s
, &arena
->chunks
,
6094 malloc_spin_unlock(&arena
->lock
);
6097 ret
= arena_salloc(ptr
);
6103 extent_node_t
*node
;
6107 key
.addr
= (void *)chunk
;
6108 malloc_mutex_lock(&huge_mtx
);
6109 node
= RB_FIND(extent_tree_ad_s
, &huge
, &key
);
6114 malloc_mutex_unlock(&huge_mtx
);
6122 zone_malloc(malloc_zone_t
*zone
, size_t size
)
6125 return (moz_malloc(size
));
6129 zone_calloc(malloc_zone_t
*zone
, size_t num
, size_t size
)
6132 return (moz_calloc(num
, size
));
6136 zone_valloc(malloc_zone_t
*zone
, size_t size
)
6138 void *ret
= NULL
; /* Assignment avoids useless compiler warning. */
6140 moz_posix_memalign(&ret
, pagesize
, size
);
6146 zone_free(malloc_zone_t
*zone
, void *ptr
)
6153 zone_realloc(malloc_zone_t
*zone
, void *ptr
, size_t size
)
6156 return (moz_realloc(ptr
, size
));
6160 zone_destroy(malloc_zone_t
*zone
)
6163 /* This function should never be called. */
6169 zone_good_size(malloc_zone_t
*zone
, size_t size
)
6175 * Actually create an object of the appropriate size, then find out
6176 * how large it could have been without moving up to the next size
6179 p
= moz_malloc(size
);
6190 zone_force_lock(malloc_zone_t
*zone
)
6197 zone_force_unlock(malloc_zone_t
*zone
)
6203 static malloc_zone_t
*
6207 assert(malloc_initialized
);
6209 zone
.size
= (void *)zone_size
;
6210 zone
.malloc
= (void *)zone_malloc
;
6211 zone
.calloc
= (void *)zone_calloc
;
6212 zone
.valloc
= (void *)zone_valloc
;
6213 zone
.free
= (void *)zone_free
;
6214 zone
.realloc
= (void *)zone_realloc
;
6215 zone
.destroy
= (void *)zone_destroy
;
6216 zone
.zone_name
= "jemalloc_zone";
6217 zone
.batch_malloc
= NULL
;
6218 zone
.batch_free
= NULL
;
6219 zone
.introspect
= &zone_introspect
;
6221 zone_introspect
.enumerator
= NULL
;
6222 zone_introspect
.good_size
= (void *)zone_good_size
;
6223 zone_introspect
.check
= NULL
;
6224 zone_introspect
.print
= NULL
;
6225 zone_introspect
.log
= NULL
;
6226 zone_introspect
.force_lock
= (void *)zone_force_lock
;
6227 zone_introspect
.force_unlock
= (void *)zone_force_unlock
;
6228 zone_introspect
.statistics
= NULL
;
6233 __attribute__((constructor
))
6235 jemalloc_darwin_init(void)
6237 extern unsigned malloc_num_zones
;
6238 extern malloc_zone_t
**malloc_zones
;
6240 if (malloc_init_hard())
6244 * The following code is *not* thread-safe, so it's critical that
6245 * initialization be manually triggered.
6248 /* Register the custom zones. */
6249 malloc_zone_register(create_zone());
6250 assert(malloc_zones
[malloc_num_zones
- 1] == &zone
);
6253 * Shift malloc_zones around so that zone is first, which makes it the
6256 assert(malloc_num_zones
> 1);
6257 memmove(&malloc_zones
[1], &malloc_zones
[0],
6258 sizeof(malloc_zone_t
*) * (malloc_num_zones
- 1));
6259 malloc_zones
[0] = &zone
;