memory/build/mozjemalloc.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 // Portions of this file were originally under the following license:
   8 //
   9 // Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
  10 // All rights reserved.
  11 // Copyright (C) 2007-2017 Mozilla Foundation.
  12 //
  13 // Redistribution and use in source and binary forms, with or without
  14 // modification, are permitted provided that the following conditions
  15 // are met:
  16 // 1. Redistributions of source code must retain the above copyright
  17 //    notice(s), this list of conditions and the following disclaimer as
  18 //    the first lines of this file unmodified other than the possible
  19 //    addition of one or more copyright notices.
  20 // 2. Redistributions in binary form must reproduce the above copyright
  21 //    notice(s), this list of conditions and the following disclaimer in
  22 //    the documentation and/or other materials provided with the
  23 //    distribution.
  24 //
  25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28 // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  29 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  32 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  33 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  34 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  35 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 //
  37 // *****************************************************************************
  38 //
  39 // This allocator implementation is designed to provide scalable performance
  40 // for multi-threaded programs on multi-processor systems.  The following
  41 // features are included for this purpose:
  42 //
  43 //   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  44 //     contention and cache sloshing.
  45 //
  46 //   + Cache line sharing between arenas is avoided for internal data
  47 //     structures.
  48 //
  49 //   + Memory is managed in chunks and runs (chunks can be split into runs),
  50 //     rather than as individual pages.  This provides a constant-time
  51 //     mechanism for associating allocations with particular arenas.
  52 //
  53 // Allocation requests are rounded up to the nearest size class, and no record
  54 // of the original request size is maintained.  Allocations are broken into
  55 // categories according to size class.  Assuming runtime defaults, the size
  56 // classes in each category are as follows (for x86, x86_64 and Apple Silicon):
  57 //
  58 //   |=========================================================|
  59 //   | Category | Subcategory    |     x86 |  x86_64 | Mac ARM |
  60 //   |---------------------------+---------+---------+---------|
  61 //   | Word size                 |  32 bit |  64 bit |  64 bit |
  62 //   | Page size                 |    4 Kb |    4 Kb |   16 Kb |
  63 //   |=========================================================|
  64 //   | Small    | Tiny           |    4/-w |      -w |       - |
  65 //   |          |                |       8 |    8/-w |       8 |
  66 //   |          |----------------+---------|---------|---------|
  67 //   |          | Quantum-spaced |      16 |      16 |      16 |
  68 //   |          |                |      32 |      32 |      32 |
  69 //   |          |                |      48 |      48 |      48 |
  70 //   |          |                |     ... |     ... |     ... |
  71 //   |          |                |     480 |     480 |     480 |
  72 //   |          |                |     496 |     496 |     496 |
  73 //   |          |----------------+---------|---------|---------|
  74 //   |          | Quantum-wide-  |     512 |     512 |     512 |
  75 //   |          | spaced         |     768 |     768 |     768 |
  76 //   |          |                |     ... |     ... |     ... |
  77 //   |          |                |    3584 |    3584 |    3584 |
  78 //   |          |                |    3840 |    3840 |    3840 |
  79 //   |          |----------------+---------|---------|---------|
  80 //   |          | Sub-page       |       - |       - |    4096 |
  81 //   |          |                |       - |       - |    8 kB |
  82 //   |=========================================================|
  83 //   | Large                     |    4 kB |    4 kB |       - |
  84 //   |                           |    8 kB |    8 kB |       - |
  85 //   |                           |   12 kB |   12 kB |       - |
  86 //   |                           |   16 kB |   16 kB |   16 kB |
  87 //   |                           |     ... |     ... |       - |
  88 //   |                           |   32 kB |   32 kB |   32 kB |
  89 //   |                           |     ... |     ... |     ... |
  90 //   |                           | 1008 kB | 1008 kB | 1008 kB |
  91 //   |                           | 1012 kB | 1012 kB |       - |
  92 //   |                           | 1016 kB | 1016 kB |       - |
  93 //   |                           | 1020 kB | 1020 kB |       - |
  94 //   |=========================================================|
  95 //   | Huge                      |    1 MB |    1 MB |    1 MB |
  96 //   |                           |    2 MB |    2 MB |    2 MB |
  97 //   |                           |    3 MB |    3 MB |    3 MB |
  98 //   |                           |     ... |     ... |     ... |
  99 //   |=========================================================|
 100 //
 101 // Legend:
 102 //   n:    Size class exists for this platform.
 103 //   n/-w: This size class doesn't exist on Windows (see kMinTinyClass).
 104 //   -:    This size class doesn't exist for this platform.
 105 //   ...:  Size classes follow a pattern here.
 106 //
 107 // NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an
 108 // allocation on Linux or Mac.  So on 32-bit *nix, the smallest bucket size is
 109 // 4 bytes, and on 64-bit, the smallest bucket size is 8 bytes.
 110 //
 111 // A different mechanism is used for each category:
 112 //
 113 //   Small : Each size class is segregated into its own set of runs.  Each run
 114 //           maintains a bitmap of which regions are free/allocated.
 115 //
 116 //   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 117 //           in the associated arena chunk header maps.
 118 //
 119 //   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 120 //          Metadata are stored in a separate red-black tree.
 121 //
 122 // *****************************************************************************
 123
 124 #include "mozmemory_wrap.h"
 125 #include "mozjemalloc.h"
 126 #include "mozjemalloc_types.h"
 127
 128 #include <cstring>
 129 #include <cerrno>
 130 #include <optional>
 131 #include <type_traits>
 132 #ifdef XP_WIN
 133 #  include <io.h>
 134 #  include <windows.h>
 135 #else
 136 #  include <sys/mman.h>
 137 #  include <unistd.h>
 138 #endif
 139 #ifdef XP_DARWIN
 140 #  include <libkern/OSAtomic.h>
 141 #  include <mach/mach_init.h>
 142 #  include <mach/vm_map.h>
 143 #endif
 144
 145 #include "mozilla/Atomics.h"
 146 #include "mozilla/Alignment.h"
 147 #include "mozilla/ArrayUtils.h"
 148 #include "mozilla/Assertions.h"
 149 #include "mozilla/CheckedInt.h"
 150 #include "mozilla/DoublyLinkedList.h"
 151 #include "mozilla/HelperMacros.h"
 152 #include "mozilla/Likely.h"
 153 #include "mozilla/MathAlgorithms.h"
 154 #include "mozilla/RandomNum.h"
 155 // Note: MozTaggedAnonymousMmap() could call an LD_PRELOADed mmap
 156 // instead of the one defined here; use only MozTagAnonymousMemory().
 157 #include "mozilla/TaggedAnonymousMemory.h"
 158 #include "mozilla/ThreadLocal.h"
 159 #include "mozilla/UniquePtr.h"
 160 #include "mozilla/Unused.h"
 161 #include "mozilla/XorShift128PlusRNG.h"
 162 #include "mozilla/fallible.h"
 163 #include "rb.h"
 164 #include "Mutex.h"
 165 #include "PHC.h"
 166 #include "Utils.h"
 167
 168 #if defined(XP_WIN)
 169 #  include "mozmemory_utils.h"
 170 #endif
 171
 172 // For GetGeckoProcessType(), when it's used.
 173 #if defined(XP_WIN) && !defined(JS_STANDALONE)
 174 #  include "mozilla/ProcessType.h"
 175 #endif
 176
 177 using namespace mozilla;
 178
 179 // On Linux, we use madvise(MADV_DONTNEED) to release memory back to the
 180 // operating system.  If we release 1MB of live pages with MADV_DONTNEED, our
 181 // RSS will decrease by 1MB (almost) immediately.
 182 //
 183 // On Mac, we use madvise(MADV_FREE).  Unlike MADV_DONTNEED on Linux, MADV_FREE
 184 // on Mac doesn't cause the OS to release the specified pages immediately; the
 185 // OS keeps them in our process until the machine comes under memory pressure.
 186 //
 187 // It's therefore difficult to measure the process's RSS on Mac, since, in the
 188 // absence of memory pressure, the contribution from the heap to RSS will not
 189 // decrease due to our madvise calls.
 190 //
 191 // We therefore define MALLOC_DOUBLE_PURGE on Mac.  This causes jemalloc to
 192 // track which pages have been MADV_FREE'd.  You can then call
 193 // jemalloc_purge_freed_pages(), which will force the OS to release those
 194 // MADV_FREE'd pages, making the process's RSS reflect its true memory usage.
 195 //
 196 // The jemalloc_purge_freed_pages definition in memory/build/mozmemory.h needs
 197 // to be adjusted if MALLOC_DOUBLE_PURGE is ever enabled on Linux.
 198
 199 #ifdef XP_DARWIN
 200 #  define MALLOC_DOUBLE_PURGE
 201 #endif
 202
 203 #ifdef XP_WIN
 204 #  define MALLOC_DECOMMIT
 205 #endif
 206
 207 // Define MALLOC_RUNTIME_CONFIG depending on MOZ_DEBUG. Overriding this as
 208 // a build option allows us to build mozjemalloc/firefox without runtime asserts
 209 // but with runtime configuration. Making some testing easier.
 210
 211 #ifdef MOZ_DEBUG
 212 #  define MALLOC_RUNTIME_CONFIG
 213 #endif
 214
 215 // When MALLOC_STATIC_PAGESIZE is defined, the page size is fixed at
 216 // compile-time for better performance, as opposed to determined at
 217 // runtime. Some platforms can have different page sizes at runtime
 218 // depending on kernel configuration, so they are opted out by default.
 219 // Debug builds are opted out too, for test coverage.
 220 #ifndef MALLOC_RUNTIME_CONFIG
 221 #  if !defined(__ia64__) && !defined(__sparc__) && !defined(__mips__) &&       \
 222       !defined(__aarch64__) && !defined(__powerpc__) && !defined(XP_MACOSX) && \
 223       !defined(__loongarch__)
 224 #    define MALLOC_STATIC_PAGESIZE 1
 225 #  endif
 226 #endif
 227
 228 #ifdef XP_WIN
 229 #  define STDERR_FILENO 2
 230
 231 // Implement getenv without using malloc.
 232 static char mozillaMallocOptionsBuf[64];
 233
 234 #  define getenv xgetenv
 235 static char* getenv(const char* name) {
 236   if (GetEnvironmentVariableA(name, mozillaMallocOptionsBuf,
 237                               sizeof(mozillaMallocOptionsBuf)) > 0) {
 238     return mozillaMallocOptionsBuf;
 239   }
 240
 241   return nullptr;
 242 }
 243 #endif
 244
 245 #ifndef XP_WIN
 246 // Newer Linux systems support MADV_FREE, but we're not supporting
 247 // that properly. bug #1406304.
 248 #  if defined(XP_LINUX) && defined(MADV_FREE)
 249 #    undef MADV_FREE
 250 #  endif
 251 #  ifndef MADV_FREE
 252 #    define MADV_FREE MADV_DONTNEED
 253 #  endif
 254 #endif
 255
 256 // Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that
 257 // happen to override mmap() and call dlsym() from their overridden
 258 // mmap(). The problem is that dlsym() calls malloc(), and this ends
 259 // up in a dead lock in jemalloc.
 260 // On these systems, we prefer to directly use the system call.
 261 // We do that for Linux systems and kfreebsd with GNU userland.
 262 // Note sanity checks are not done (alignment of offset, ...) because
 263 // the uses of mmap are pretty limited, in jemalloc.
 264 //
 265 // On Alpha, glibc has a bug that prevents syscall() to work for system
 266 // calls with 6 arguments.
 267 #if (defined(XP_LINUX) && !defined(__alpha__)) || \
 268     (defined(__FreeBSD_kernel__) && defined(__GLIBC__))
 269 #  include <sys/syscall.h>
 270 #  if defined(SYS_mmap) || defined(SYS_mmap2)
 271 static inline void* _mmap(void* addr, size_t length, int prot, int flags,
 272                           int fd, off_t offset) {
 273 // S390 only passes one argument to the mmap system call, which is a
 274 // pointer to a structure containing the arguments.
 275 #    ifdef __s390__
 276   struct {
 277     void* addr;
 278     size_t length;
 279     long prot;
 280     long flags;
 281     long fd;
 282     off_t offset;
 283   } args = {addr, length, prot, flags, fd, offset};
 284   return (void*)syscall(SYS_mmap, &args);
 285 #    else
 286 #      if defined(ANDROID) && defined(__aarch64__) && defined(SYS_mmap2)
 287   // Android NDK defines SYS_mmap2 for AArch64 despite it not supporting mmap2.
 288 #        undef SYS_mmap2
 289 #      endif
 290 #      ifdef SYS_mmap2
 291   return (void*)syscall(SYS_mmap2, addr, length, prot, flags, fd, offset >> 12);
 292 #      else
 293   return (void*)syscall(SYS_mmap, addr, length, prot, flags, fd, offset);
 294 #      endif
 295 #    endif
 296 }
 297 #    define mmap _mmap
 298 #    define munmap(a, l) syscall(SYS_munmap, a, l)
 299 #  endif
 300 #endif
 301
 302 // ***************************************************************************
 303 // Structures for chunk headers for chunks used for non-huge allocations.
 304
 305 struct arena_t;
 306
 307 // Each element of the chunk map corresponds to one page within the chunk.
 308 struct arena_chunk_map_t {
 309   // Linkage for run trees.  There are two disjoint uses:
 310   //
 311   // 1) arena_t's tree or available runs.
 312   // 2) arena_run_t conceptually uses this linkage for in-use non-full
 313   //    runs, rather than directly embedding linkage.
 314   RedBlackTreeNode<arena_chunk_map_t> link;
 315
 316   // Run address (or size) and various flags are stored together.  The bit
 317   // layout looks like (assuming 32-bit system):
 318   //
 319   //   ???????? ???????? ????---- -mckdzla
 320   //
 321   // ? : Unallocated: Run address for first/last pages, unset for internal
 322   //                  pages.
 323   //     Small: Run address.
 324   //     Large: Run size for first page, unset for trailing pages.
 325   // - : Unused.
 326   // m : MADV_FREE/MADV_DONTNEED'ed?
 327   // c : decommitted?
 328   // k : key?
 329   // d : dirty?
 330   // z : zeroed?
 331   // l : large?
 332   // a : allocated?
 333   //
 334   // Following are example bit patterns for the three types of runs.
 335   //
 336   // r : run address
 337   // s : run size
 338   // x : don't care
 339   // - : 0
 340   // [cdzla] : bit set
 341   //
 342   //   Unallocated:
 343   //     ssssssss ssssssss ssss---- --c-----
 344   //     xxxxxxxx xxxxxxxx xxxx---- ----d---
 345   //     ssssssss ssssssss ssss---- -----z--
 346   //
 347   //   Small:
 348   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 349   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 350   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 351   //
 352   //   Large:
 353   //     ssssssss ssssssss ssss---- ------la
 354   //     -------- -------- -------- ------la
 355   //     -------- -------- -------- ------la
 356   size_t bits;
 357
 358 // Note that CHUNK_MAP_DECOMMITTED's meaning varies depending on whether
 359 // MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are defined.
 360 //
 361 // If MALLOC_DECOMMIT is defined, a page which is CHUNK_MAP_DECOMMITTED must be
 362 // re-committed with pages_commit() before it may be touched.  If
 363 // MALLOC_DECOMMIT is defined, MALLOC_DOUBLE_PURGE may not be defined.
 364 //
 365 // If neither MALLOC_DECOMMIT nor MALLOC_DOUBLE_PURGE is defined, pages which
 366 // are madvised (with either MADV_DONTNEED or MADV_FREE) are marked with
 367 // CHUNK_MAP_MADVISED.
 368 //
 369 // Otherwise, if MALLOC_DECOMMIT is not defined and MALLOC_DOUBLE_PURGE is
 370 // defined, then a page which is madvised is marked as CHUNK_MAP_MADVISED.
 371 // When it's finally freed with jemalloc_purge_freed_pages, the page is marked
 372 // as CHUNK_MAP_DECOMMITTED.
 373 #define CHUNK_MAP_MADVISED ((size_t)0x40U)
 374 #define CHUNK_MAP_DECOMMITTED ((size_t)0x20U)
 375 #define CHUNK_MAP_MADVISED_OR_DECOMMITTED \
 376   (CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED)
 377 #define CHUNK_MAP_KEY ((size_t)0x10U)
 378 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
 379 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
 380 #define CHUNK_MAP_LARGE ((size_t)0x02U)
 381 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
 382 };
 383
 384 // Arena chunk header.
 385 struct arena_chunk_t {
 386   // Arena that owns the chunk.
 387   arena_t* arena;
 388
 389   // Linkage for the arena's tree of dirty chunks.
 390   RedBlackTreeNode<arena_chunk_t> link_dirty;
 391
 392 #ifdef MALLOC_DOUBLE_PURGE
 393   // If we're double-purging, we maintain a linked list of chunks which
 394   // have pages which have been madvise(MADV_FREE)'d but not explicitly
 395   // purged.
 396   //
 397   // We're currently lazy and don't remove a chunk from this list when
 398   // all its madvised pages are recommitted.
 399   DoublyLinkedListElement<arena_chunk_t> chunks_madvised_elem;
 400 #endif
 401
 402   // Number of dirty pages.
 403   size_t ndirty;
 404
 405   // Map of pages within chunk that keeps track of free/large/small.
 406   arena_chunk_map_t map[1];  // Dynamically sized.
 407 };
 408
 409 // ***************************************************************************
 410 // Constants defining allocator size classes and behavior.
 411
 412 // Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 413 // so over-estimates are okay (up to a point), but under-estimates will
 414 // negatively affect performance.
 415 static const size_t kCacheLineSize = 64;
 416
 417 // Our size classes are inclusive ranges of memory sizes.  By describing the
 418 // minimums and how memory is allocated in each range the maximums can be
 419 // calculated.
 420
 421 // Smallest size class to support.  On Windows the smallest allocation size
 422 // must be 8 bytes on 32-bit, 16 bytes on 64-bit.  On Linux and Mac, even
 423 // malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003).
 424 #ifdef XP_WIN
 425 static const size_t kMinTinyClass = sizeof(void*) * 2;
 426 #else
 427 static const size_t kMinTinyClass = sizeof(void*);
 428 #endif
 429
 430 // Maximum tiny size class.
 431 static const size_t kMaxTinyClass = 8;
 432
 433 // Smallest quantum-spaced size classes. It could actually also be labelled a
 434 // tiny allocation, and is spaced as such from the largest tiny size class.
 435 // Tiny classes being powers of 2, this is twice as large as the largest of
 436 // them.
 437 static const size_t kMinQuantumClass = kMaxTinyClass * 2;
 438 static const size_t kMinQuantumWideClass = 512;
 439 static const size_t kMinSubPageClass = 4_KiB;
 440
 441 // Amount (quantum) separating quantum-spaced size classes.
 442 static const size_t kQuantum = 16;
 443 static const size_t kQuantumMask = kQuantum - 1;
 444 static const size_t kQuantumWide = 256;
 445 static const size_t kQuantumWideMask = kQuantumWide - 1;
 446
 447 static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum;
 448 static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide;
 449
 450 // We can optimise some divisions to shifts if these are powers of two.
 451 static_assert(mozilla::IsPowerOfTwo(kQuantum),
 452               "kQuantum is not a power of two");
 453 static_assert(mozilla::IsPowerOfTwo(kQuantumWide),
 454               "kQuantumWide is not a power of two");
 455
 456 static_assert(kMaxQuantumClass % kQuantum == 0,
 457               "kMaxQuantumClass is not a multiple of kQuantum");
 458 static_assert(kMaxQuantumWideClass % kQuantumWide == 0,
 459               "kMaxQuantumWideClass is not a multiple of kQuantumWide");
 460 static_assert(kQuantum < kQuantumWide,
 461               "kQuantum must be smaller than kQuantumWide");
 462 static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass),
 463               "kMinSubPageClass is not a power of two");
 464
 465 // Number of (2^n)-spaced tiny classes.
 466 static const size_t kNumTinyClasses =
 467     LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1;
 468
 469 // Number of quantum-spaced classes.  We add kQuantum(Max) before subtracting to
 470 // avoid underflow when a class is empty (Max<Min).
 471 static const size_t kNumQuantumClasses =
 472     (kMaxQuantumClass + kQuantum - kMinQuantumClass) / kQuantum;
 473 static const size_t kNumQuantumWideClasses =
 474     (kMaxQuantumWideClass + kQuantumWide - kMinQuantumWideClass) / kQuantumWide;
 475
 476 // Size and alignment of memory chunks that are allocated by the OS's virtual
 477 // memory system.
 478 static const size_t kChunkSize = 1_MiB;
 479 static const size_t kChunkSizeMask = kChunkSize - 1;
 480
 481 #ifdef MALLOC_STATIC_PAGESIZE
 482 // VM page size. It must divide the runtime CPU page size or the code
 483 // will abort.
 484 // Platform specific page size conditions copied from js/public/HeapAPI.h
 485 #  if defined(__powerpc64__)
 486 static const size_t gPageSize = 64_KiB;
 487 #  elif defined(__loongarch64)
 488 static const size_t gPageSize = 16_KiB;
 489 #  else
 490 static const size_t gPageSize = 4_KiB;
 491 #  endif
 492 static const size_t gRealPageSize = gPageSize;
 493
 494 #else
 495 // When MALLOC_OPTIONS contains one or several `P`s, the page size used
 496 // across the allocator is multiplied by 2 for each `P`, but we also keep
 497 // the real page size for code paths that need it. gPageSize is thus a
 498 // power of two greater or equal to gRealPageSize.
 499 static size_t gRealPageSize;
 500 static size_t gPageSize;
 501 #endif
 502
 503 #ifdef MALLOC_STATIC_PAGESIZE
 504 #  define DECLARE_GLOBAL(type, name)
 505 #  define DEFINE_GLOBALS
 506 #  define END_GLOBALS
 507 #  define DEFINE_GLOBAL(type) static const type
 508 #  define GLOBAL_LOG2 LOG2
 509 #  define GLOBAL_ASSERT_HELPER1(x) static_assert(x, #x)
 510 #  define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
 511 #  define GLOBAL_ASSERT(...)                                               \
 512     MACRO_CALL(                                                            \
 513         MOZ_PASTE_PREFIX_AND_ARG_COUNT(GLOBAL_ASSERT_HELPER, __VA_ARGS__), \
 514         (__VA_ARGS__))
 515 #  define GLOBAL_CONSTEXPR constexpr
 516 #else
 517 #  define DECLARE_GLOBAL(type, name) static type name;
 518 #  define DEFINE_GLOBALS static void DefineGlobals() {
 519 #  define END_GLOBALS }
 520 #  define DEFINE_GLOBAL(type)
 521 #  define GLOBAL_LOG2 FloorLog2
 522 #  define GLOBAL_ASSERT MOZ_RELEASE_ASSERT
 523 #  define GLOBAL_CONSTEXPR
 524 #endif
 525
 526 DECLARE_GLOBAL(size_t, gMaxSubPageClass)
 527 DECLARE_GLOBAL(uint8_t, gNumSubPageClasses)
 528 DECLARE_GLOBAL(uint8_t, gPageSize2Pow)
 529 DECLARE_GLOBAL(size_t, gPageSizeMask)
 530 DECLARE_GLOBAL(size_t, gChunkNumPages)
 531 DECLARE_GLOBAL(size_t, gChunkHeaderNumPages)
 532 DECLARE_GLOBAL(size_t, gMaxLargeClass)
 533
 534 DEFINE_GLOBALS
 535
 536 // Largest sub-page size class, or zero if there are none
 537 DEFINE_GLOBAL(size_t)
 538 gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0;
 539
 540 // Max size class for bins.
 541 #define gMaxBinClass \
 542   (gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass)
 543
 544 // Number of sub-page bins.
 545 DEFINE_GLOBAL(uint8_t)
 546 gNumSubPageClasses = []() GLOBAL_CONSTEXPR -> uint8_t {
 547   if GLOBAL_CONSTEXPR (gMaxSubPageClass != 0) {
 548     return FloorLog2(gMaxSubPageClass) - LOG2(kMinSubPageClass) + 1;
 549   }
 550   return 0;
 551 }();
 552
 553 DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize);
 554 DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1;
 555
 556 // Number of pages in a chunk.
 557 DEFINE_GLOBAL(size_t) gChunkNumPages = kChunkSize >> gPageSize2Pow;
 558
 559 // Number of pages necessary for a chunk header plus a guard page.
 560 DEFINE_GLOBAL(size_t)
 561 gChunkHeaderNumPages =
 562     1 + (((sizeof(arena_chunk_t) +
 563            sizeof(arena_chunk_map_t) * (gChunkNumPages - 1) + gPageSizeMask) &
 564           ~gPageSizeMask) >>
 565          gPageSize2Pow);
 566
 567 // One chunk, minus the header, minus a guard page
 568 DEFINE_GLOBAL(size_t)
 569 gMaxLargeClass =
 570     kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow);
 571
 572 // Various sanity checks that regard configuration.
 573 GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
 574               "Page size is not a power of two");
 575 GLOBAL_ASSERT(kQuantum >= sizeof(void*));
 576 GLOBAL_ASSERT(kQuantum <= kQuantumWide);
 577 GLOBAL_ASSERT(!kNumQuantumWideClasses ||
 578               kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass));
 579
 580 GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
 581
 582 GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
 583 GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
 584 GLOBAL_ASSERT(kChunkSize >= gPageSize);
 585 GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
 586
 587 END_GLOBALS
 588
 589 // Recycle at most 128 MiB of chunks. This means we retain at most
 590 // 6.25% of the process address space on a 32-bit OS for later use.
 591 static const size_t gRecycleLimit = 128_MiB;
 592
 593 // The current amount of recycled bytes, updated atomically.
 594 static Atomic<size_t, ReleaseAcquire> gRecycledSize;
 595
 596 // Maximum number of dirty pages per arena.
 597 #define DIRTY_MAX_DEFAULT (1U << 8)
 598
 599 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
 600
 601 // Return the smallest chunk multiple that is >= s.
 602 #define CHUNK_CEILING(s) (((s) + kChunkSizeMask) & ~kChunkSizeMask)
 603
 604 // Return the smallest cacheline multiple that is >= s.
 605 #define CACHELINE_CEILING(s) \
 606   (((s) + (kCacheLineSize - 1)) & ~(kCacheLineSize - 1))
 607
 608 // Return the smallest quantum multiple that is >= a.
 609 #define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask))
 610 #define QUANTUM_WIDE_CEILING(a) \
 611   (((a) + (kQuantumWideMask)) & ~(kQuantumWideMask))
 612
 613 // Return the smallest sub page-size  that is >= a.
 614 #define SUBPAGE_CEILING(a) (RoundUpPow2(a))
 615
 616 // Return the smallest pagesize multiple that is >= s.
 617 #define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
 618
 619 // Number of all the small-allocated classes
 620 #define NUM_SMALL_CLASSES                                          \
 621   (kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \
 622    gNumSubPageClasses)
 623
 624 // ***************************************************************************
 625 // MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
 626 #if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE)
 627 #  error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
 628 #endif
 629
 630 static void* base_alloc(size_t aSize);
 631
 632 // Set to true once the allocator has been initialized.
 633 #if defined(_MSC_VER) && !defined(__clang__)
 634 // MSVC may create a static initializer for an Atomic<bool>, which may actually
 635 // run after `malloc_init` has been called once, which triggers multiple
 636 // initializations.
 637 // We work around the problem by not using an Atomic<bool> at all. There is a
 638 // theoretical problem with using `malloc_initialized` non-atomically, but
 639 // practically, this is only true if `malloc_init` is never called before
 640 // threads are created.
 641 static bool malloc_initialized;
 642 #else
 643 static Atomic<bool, MemoryOrdering::ReleaseAcquire> malloc_initialized;
 644 #endif
 645
 646 static StaticMutex gInitLock MOZ_UNANNOTATED = {STATIC_MUTEX_INIT};
 647
 648 // ***************************************************************************
 649 // Statistics data structures.
 650
 651 struct arena_stats_t {
 652   // Number of bytes currently mapped.
 653   size_t mapped;
 654
 655   // Current number of committed pages.
 656   size_t committed;
 657
 658   // Per-size-category statistics.
 659   size_t allocated_small;
 660
 661   size_t allocated_large;
 662 };
 663
 664 // ***************************************************************************
 665 // Extent data structures.
 666
 667 enum ChunkType {
 668   UNKNOWN_CHUNK,
 669   ZEROED_CHUNK,    // chunk only contains zeroes.
 670   ARENA_CHUNK,     // used to back arena runs created by arena_t::AllocRun.
 671   HUGE_CHUNK,      // used to back huge allocations (e.g. arena_t::MallocHuge).
 672   RECYCLED_CHUNK,  // chunk has been stored for future use by chunk_recycle.
 673 };
 674
 675 // Tree of extents.
 676 struct extent_node_t {
 677   union {
 678     // Linkage for the size/address-ordered tree for chunk recycling.
 679     RedBlackTreeNode<extent_node_t> mLinkBySize;
 680     // Arena id for huge allocations. It's meant to match mArena->mId,
 681     // which only holds true when the arena hasn't been disposed of.
 682     arena_id_t mArenaId;
 683   };
 684
 685   // Linkage for the address-ordered tree.
 686   RedBlackTreeNode<extent_node_t> mLinkByAddr;
 687
 688   // Pointer to the extent that this tree node is responsible for.
 689   void* mAddr;
 690
 691   // Total region size.
 692   size_t mSize;
 693
 694   union {
 695     // What type of chunk is there; used for chunk recycling.
 696     ChunkType mChunkType;
 697
 698     // A pointer to the associated arena, for huge allocations.
 699     arena_t* mArena;
 700   };
 701 };
 702
 703 struct ExtentTreeSzTrait {
 704   static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
 705     return aThis->mLinkBySize;
 706   }
 707
 708   static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
 709     Order ret = CompareInt(aNode->mSize, aOther->mSize);
 710     return (ret != Order::eEqual) ? ret
 711                                   : CompareAddr(aNode->mAddr, aOther->mAddr);
 712   }
 713 };
 714
 715 struct ExtentTreeTrait {
 716   static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
 717     return aThis->mLinkByAddr;
 718   }
 719
 720   static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
 721     return CompareAddr(aNode->mAddr, aOther->mAddr);
 722   }
 723 };
 724
 725 struct ExtentTreeBoundsTrait : public ExtentTreeTrait {
 726   static inline Order Compare(extent_node_t* aKey, extent_node_t* aNode) {
 727     uintptr_t key_addr = reinterpret_cast<uintptr_t>(aKey->mAddr);
 728     uintptr_t node_addr = reinterpret_cast<uintptr_t>(aNode->mAddr);
 729     size_t node_size = aNode->mSize;
 730
 731     // Is aKey within aNode?
 732     if (node_addr <= key_addr && key_addr < node_addr + node_size) {
 733       return Order::eEqual;
 734     }
 735
 736     return CompareAddr(aKey->mAddr, aNode->mAddr);
 737   }
 738 };
 739
 740 // Describe size classes to which allocations are rounded up to.
 741 // TODO: add large and huge types when the arena allocation code
 742 // changes in a way that allows it to be beneficial.
 743 class SizeClass {
 744  public:
 745   enum ClassType {
 746     Tiny,
 747     Quantum,
 748     QuantumWide,
 749     SubPage,
 750     Large,
 751   };
 752
 753   explicit inline SizeClass(size_t aSize) {
 754     if (aSize <= kMaxTinyClass) {
 755       mType = Tiny;
 756       mSize = std::max(RoundUpPow2(aSize), kMinTinyClass);
 757     } else if (aSize <= kMaxQuantumClass) {
 758       mType = Quantum;
 759       mSize = QUANTUM_CEILING(aSize);
 760     } else if (aSize <= kMaxQuantumWideClass) {
 761       mType = QuantumWide;
 762       mSize = QUANTUM_WIDE_CEILING(aSize);
 763     } else if (aSize <= gMaxSubPageClass) {
 764       mType = SubPage;
 765       mSize = SUBPAGE_CEILING(aSize);
 766     } else if (aSize <= gMaxLargeClass) {
 767       mType = Large;
 768       mSize = PAGE_CEILING(aSize);
 769     } else {
 770       MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Invalid size");
 771     }
 772   }
 773
 774   SizeClass& operator=(const SizeClass& aOther) = default;
 775
 776   bool operator==(const SizeClass& aOther) { return aOther.mSize == mSize; }
 777
 778   size_t Size() { return mSize; }
 779
 780   ClassType Type() { return mType; }
 781
 782   SizeClass Next() { return SizeClass(mSize + 1); }
 783
 784  private:
 785   ClassType mType;
 786   size_t mSize;
 787 };
 788
 789 // Fast division
 790 //
 791 // During deallocation we want to divide by the size class.  This class
 792 // provides a routine and sets up a constant as follows.
 793 //
 794 // To divide by a number D that is not a power of two we multiply by (2^17 /
 795 // D) and then right shift by 17 positions.
 796 //
 797 //   X / D
 798 //
 799 // becomes
 800 //
 801 //   (X * m) >> p
 802 //
 803 // Where m is calculated during the FastDivisor constructor similarly to:
 804 //
 805 //   m = 2^p / D
 806 //
 807 template <typename T>
 808 class FastDivisor {
 809  private:
 810   // The shift amount (p) is chosen to minimise the size of m while
 811   // working for divisors up to 65536 in steps of 16.  I arrived at 17
 812   // experimentally.  I wanted a low number to minimise the range of m
 813   // so it can fit in a uint16_t, 16 didn't work but 17 worked perfectly.
 814   //
 815   // We'd need to increase this if we allocated memory on smaller boundaries
 816   // than 16.
 817   static const unsigned p = 17;
 818
 819   // We can fit the inverted divisor in 16 bits, but we template it here for
 820   // convenience.
 821   T m;
 822
 823  public:
 824   // Needed so mBins can be constructed.
 825   FastDivisor() : m(0) {}
 826
 827   FastDivisor(unsigned div, unsigned max) {
 828     MOZ_ASSERT(div <= max);
 829
 830     // divide_inv_shift is large enough.
 831     MOZ_ASSERT((1U << p) >= div);
 832
 833     // The calculation here for m is formula 26 from Section
 834     // 10-9 "Unsigned Division by Divisors >= 1" in
 835     // Henry S. Warren, Jr.'s Hacker's Delight, 2nd Ed.
 836     unsigned m_ = ((1U << p) + div - 1 - (((1U << p) - 1) % div)) / div;
 837
 838     // Make sure that max * m does not overflow.
 839     MOZ_DIAGNOSTIC_ASSERT(max < UINT_MAX / m_);
 840
 841     MOZ_ASSERT(m_ <= std::numeric_limits<T>::max());
 842     m = static_cast<T>(m_);
 843
 844     // Initialisation made m non-zero.
 845     MOZ_ASSERT(m);
 846
 847     // Test that all the divisions in the range we expected would work.
 848 #ifdef MOZ_DEBUG
 849     for (unsigned num = 0; num < max; num += div) {
 850       MOZ_ASSERT(num / div == divide(num));
 851     }
 852 #endif
 853   }
 854
 855   // Note that this always occurs in uint32_t regardless of m's type.  If m is
 856   // a uint16_t it will be zero-extended before the multiplication.  We also use
 857   // uint32_t rather than something that could possibly be larger because it is
 858   // most-likely the cheapest multiplication.
 859   inline uint32_t divide(uint32_t num) const {
 860     // Check that m was initialised.
 861     MOZ_ASSERT(m);
 862     return (num * m) >> p;
 863   }
 864 };
 865
 866 template <typename T>
 867 unsigned inline operator/(unsigned num, FastDivisor<T> divisor) {
 868   return divisor.divide(num);
 869 }
 870
 871 // ***************************************************************************
 872 // Radix tree data structures.
 873 //
 874 // The number of bits passed to the template is the number of significant bits
 875 // in an address to do a radix lookup with.
 876 //
 877 // An address is looked up by splitting it in kBitsPerLevel bit chunks, except
 878 // the most significant bits, where the bit chunk is kBitsAtLevel1 which can be
 879 // different if Bits is not a multiple of kBitsPerLevel.
 880 //
 881 // With e.g. sizeof(void*)=4, Bits=16 and kBitsPerLevel=8, an address is split
 882 // like the following:
 883 // 0x12345678 -> mRoot[0x12][0x34]
 884 template <size_t Bits>
 885 class AddressRadixTree {
 886 // Size of each radix tree node (as a power of 2).
 887 // This impacts tree depth.
 888 #ifdef HAVE_64BIT_BUILD
 889   static const size_t kNodeSize = kCacheLineSize;
 890 #else
 891   static const size_t kNodeSize = 16_KiB;
 892 #endif
 893   static const size_t kBitsPerLevel = LOG2(kNodeSize) - LOG2(sizeof(void*));
 894   static const size_t kBitsAtLevel1 =
 895       (Bits % kBitsPerLevel) ? Bits % kBitsPerLevel : kBitsPerLevel;
 896   static const size_t kHeight = (Bits + kBitsPerLevel - 1) / kBitsPerLevel;
 897   static_assert(kBitsAtLevel1 + (kHeight - 1) * kBitsPerLevel == Bits,
 898                 "AddressRadixTree parameters don't work out");
 899
 900   Mutex mLock MOZ_UNANNOTATED;
 901   void** mRoot;
 902
 903  public:
 904   bool Init();
 905
 906   inline void* Get(void* aAddr);
 907
 908   // Returns whether the value was properly set.
 909   inline bool Set(void* aAddr, void* aValue);
 910
 911   inline bool Unset(void* aAddr) { return Set(aAddr, nullptr); }
 912
 913  private:
 914   inline void** GetSlot(void* aAddr, bool aCreate = false);
 915 };
 916
 917 // ***************************************************************************
 918 // Arena data structures.
 919
 920 struct arena_bin_t;
 921
 922 struct ArenaChunkMapLink {
 923   static RedBlackTreeNode<arena_chunk_map_t>& GetTreeNode(
 924       arena_chunk_map_t* aThis) {
 925     return aThis->link;
 926   }
 927 };
 928
 929 struct ArenaRunTreeTrait : public ArenaChunkMapLink {
 930   static inline Order Compare(arena_chunk_map_t* aNode,
 931                               arena_chunk_map_t* aOther) {
 932     MOZ_ASSERT(aNode);
 933     MOZ_ASSERT(aOther);
 934     return CompareAddr(aNode, aOther);
 935   }
 936 };
 937
 938 struct ArenaAvailTreeTrait : public ArenaChunkMapLink {
 939   static inline Order Compare(arena_chunk_map_t* aNode,
 940                               arena_chunk_map_t* aOther) {
 941     size_t size1 = aNode->bits & ~gPageSizeMask;
 942     size_t size2 = aOther->bits & ~gPageSizeMask;
 943     Order ret = CompareInt(size1, size2);
 944     return (ret != Order::eEqual)
 945                ? ret
 946                : CompareAddr((aNode->bits & CHUNK_MAP_KEY) ? nullptr : aNode,
 947                              aOther);
 948   }
 949 };
 950
 951 struct ArenaDirtyChunkTrait {
 952   static RedBlackTreeNode<arena_chunk_t>& GetTreeNode(arena_chunk_t* aThis) {
 953     return aThis->link_dirty;
 954   }
 955
 956   static inline Order Compare(arena_chunk_t* aNode, arena_chunk_t* aOther) {
 957     MOZ_ASSERT(aNode);
 958     MOZ_ASSERT(aOther);
 959     return CompareAddr(aNode, aOther);
 960   }
 961 };
 962
 963 #ifdef MALLOC_DOUBLE_PURGE
 964 namespace mozilla {
 965
 966 template <>
 967 struct GetDoublyLinkedListElement<arena_chunk_t> {
 968   static DoublyLinkedListElement<arena_chunk_t>& Get(arena_chunk_t* aThis) {
 969     return aThis->chunks_madvised_elem;
 970   }
 971 };
 972 }  // namespace mozilla
 973 #endif
 974
 975 struct arena_run_t {
 976 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
 977   uint32_t mMagic;
 978 #  define ARENA_RUN_MAGIC 0x384adf93
 979
 980   // On 64-bit platforms, having the arena_bin_t pointer following
 981   // the mMagic field means there's padding between both fields, making
 982   // the run header larger than necessary.
 983   // But when MOZ_DIAGNOSTIC_ASSERT_ENABLED is not set, starting the
 984   // header with this field followed by the arena_bin_t pointer yields
 985   // the same padding. We do want the mMagic field to appear first, so
 986   // depending whether MOZ_DIAGNOSTIC_ASSERT_ENABLED is set or not, we
 987   // move some field to avoid padding.
 988
 989   // Number of free regions in run.
 990   unsigned mNumFree;
 991 #endif
 992
 993   // Bin this run is associated with.
 994   arena_bin_t* mBin;
 995
 996   // Index of first element that might have a free region.
 997   unsigned mRegionsMinElement;
 998
 999 #if !defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
1000   // Number of free regions in run.
1001   unsigned mNumFree;
1002 #endif
1003
1004   // Bitmask of in-use regions (0: in use, 1: free).
1005   unsigned mRegionsMask[1];  // Dynamically sized.
1006 };
1007
1008 struct arena_bin_t {
1009   // Current run being used to service allocations of this bin's size
1010   // class.
1011   arena_run_t* mCurrentRun;
1012
1013   // Tree of non-full runs.  This tree is used when looking for an
1014   // existing run when mCurrentRun is no longer usable.  We choose the
1015   // non-full run that is lowest in memory; this policy tends to keep
1016   // objects packed well, and it can also help reduce the number of
1017   // almost-empty chunks.
1018   RedBlackTree<arena_chunk_map_t, ArenaRunTreeTrait> mNonFullRuns;
1019
1020   // Bin's size class.
1021   size_t mSizeClass;
1022
1023   // Total number of regions in a run for this bin's size class.
1024   uint32_t mRunNumRegions;
1025
1026   // Number of elements in a run's mRegionsMask for this bin's size class.
1027   uint32_t mRunNumRegionsMask;
1028
1029   // Offset of first region in a run for this bin's size class.
1030   uint32_t mRunFirstRegionOffset;
1031
1032   // Current number of runs in this bin, full or otherwise.
1033   uint32_t mNumRuns;
1034
1035   // A constant for fast division by size class.  This value is 16 bits wide so
1036   // it is placed last.
1037   FastDivisor<uint16_t> mSizeDivisor;
1038
1039   // Total number of pages in a run for this bin's size class.
1040   uint8_t mRunSizePages;
1041
1042   // Amount of overhead runs are allowed to have.
1043   static constexpr double kRunOverhead = 1.6_percent;
1044   static constexpr double kRunRelaxedOverhead = 2.4_percent;
1045
1046   // Initialize a bin for the given size class.
1047   // The generated run sizes, for a page size of 4 KiB, are:
1048   //   size|run       size|run       size|run       size|run
1049   //  class|size     class|size     class|size     class|size
1050   //     4   4 KiB      8   4 KiB     16   4 KiB     32   4 KiB
1051   //    48   4 KiB     64   4 KiB     80   4 KiB     96   4 KiB
1052   //   112   4 KiB    128   8 KiB    144   4 KiB    160   8 KiB
1053   //   176   4 KiB    192   4 KiB    208   8 KiB    224   4 KiB
1054   //   240   8 KiB    256  16 KiB    272   8 KiB    288   4 KiB
1055   //   304  12 KiB    320  12 KiB    336   4 KiB    352   8 KiB
1056   //   368   4 KiB    384   8 KiB    400  20 KiB    416  16 KiB
1057   //   432  12 KiB    448   4 KiB    464  16 KiB    480   8 KiB
1058   //   496  20 KiB    512  32 KiB    768  16 KiB   1024  64 KiB
1059   //  1280  24 KiB   1536  32 KiB   1792  16 KiB   2048 128 KiB
1060   //  2304  16 KiB   2560  48 KiB   2816  36 KiB   3072  64 KiB
1061   //  3328  36 KiB   3584  32 KiB   3840  64 KiB
1062   inline void Init(SizeClass aSizeClass);
1063 };
1064
1065 // We try to keep the above structure aligned with common cache lines sizes,
1066 // often that's 64 bytes on x86 and ARM, we don't make assumptions for other
1067 // architectures.
1068 #if defined(__x86_64__) || defined(__aarch64__)
1069 // On 64bit platforms this structure is often 48 bytes
1070 // long, which means every other array element will be properly aligned.
1071 static_assert(sizeof(arena_bin_t) == 48);
1072 #elif defined(__x86__) || defined(__arm__)
1073 static_assert(sizeof(arena_bin_t) == 32);
1074 #endif
1075
1076 struct arena_t {
1077 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
1078   uint32_t mMagic;
1079 #  define ARENA_MAGIC 0x947d3d24
1080 #endif
1081
1082   // Linkage for the tree of arenas by id.
1083   RedBlackTreeNode<arena_t> mLink;
1084
1085   // Arena id, that we keep away from the beginning of the struct so that
1086   // free list pointers in TypedBaseAlloc<arena_t> don't overflow in it,
1087   // and it keeps the value it had after the destructor.
1088   arena_id_t mId;
1089
1090   // All operations on this arena require that lock be locked.  The MaybeMutex
1091   // class well elude locking if the arena is accessed from a single thread
1092   // only.
1093   MaybeMutex mLock MOZ_UNANNOTATED;
1094
1095   arena_stats_t mStats;
1096
1097  private:
1098   // Tree of dirty-page-containing chunks this arena manages.
1099   RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty;
1100
1101 #ifdef MALLOC_DOUBLE_PURGE
1102   // Head of a linked list of MADV_FREE'd-page-containing chunks this
1103   // arena manages.
1104   DoublyLinkedList<arena_chunk_t> mChunksMAdvised;
1105 #endif
1106
1107   // In order to avoid rapid chunk allocation/deallocation when an arena
1108   // oscillates right on the cusp of needing a new chunk, cache the most
1109   // recently freed chunk.  The spare is left in the arena's chunk trees
1110   // until it is deleted.
1111   //
1112   // There is one spare chunk per arena, rather than one spare total, in
1113   // order to avoid interactions between multiple threads that could make
1114   // a single spare inadequate.
1115   arena_chunk_t* mSpare;
1116
1117   // A per-arena opt-in to randomize the offset of small allocations
1118   bool mRandomizeSmallAllocations;
1119
1120   // Whether this is a private arena. Multiple public arenas are just a
1121   // performance optimization and not a safety feature.
1122   //
1123   // Since, for example, we don't want thread-local arenas to grow too much, we
1124   // use the default arena for bigger allocations. We use this member to allow
1125   // realloc() to switch out of our arena if needed (which is not allowed for
1126   // private arenas for security).
1127   bool mIsPrivate;
1128
1129   // A pseudorandom number generator. Initially null, it gets initialized
1130   // on first use to avoid recursive malloc initialization (e.g. on OSX
1131   // arc4random allocates memory).
1132   mozilla::non_crypto::XorShift128PlusRNG* mPRNG;
1133
1134  public:
1135   // Current count of pages within unused runs that are potentially
1136   // dirty, and for which madvise(... MADV_FREE) has not been called.  By
1137   // tracking this, we can institute a limit on how much dirty unused
1138   // memory is mapped for each arena.
1139   size_t mNumDirty;
1140
1141   // Maximum value allowed for mNumDirty.
1142   size_t mMaxDirty;
1143
1144   int32_t mMaxDirtyIncreaseOverride;
1145   int32_t mMaxDirtyDecreaseOverride;
1146
1147  private:
1148   // Size/address-ordered tree of this arena's available runs.  This tree
1149   // is used for first-best-fit run allocation.
1150   RedBlackTree<arena_chunk_map_t, ArenaAvailTreeTrait> mRunsAvail;
1151
1152  public:
1153   // mBins is used to store rings of free regions of the following sizes,
1154   // assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
1155   //
1156   //  | mBins[i] | size |
1157   //  +----------+------+
1158   //  |       0  |    2 |
1159   //  |       1  |    4 |
1160   //  |       2  |    8 |
1161   //  +----------+------+
1162   //  |       3  |   16 |
1163   //  |       4  |   32 |
1164   //  |       5  |   48 |
1165   //  |       6  |   64 |
1166   //  |          :      :
1167   //  |          :      :
1168   //  |      33  |  496 |
1169   //  |      34  |  512 |
1170   //  +----------+------+
1171   //  |      35  |  768 |
1172   //  |      36  | 1024 |
1173   //  |          :      :
1174   //  |          :      :
1175   //  |      46  | 3584 |
1176   //  |      47  | 3840 |
1177   //  +----------+------+
1178   arena_bin_t mBins[1];  // Dynamically sized.
1179
1180   explicit arena_t(arena_params_t* aParams, bool aIsPrivate);
1181   ~arena_t();
1182
1183  private:
1184   void InitChunk(arena_chunk_t* aChunk, bool aZeroed);
1185
1186   // This may return a chunk that should be destroyed with chunk_dealloc outside
1187   // of the arena lock.  It is not the same chunk as was passed in (since that
1188   // chunk now becomes mSpare).
1189   [[nodiscard]] arena_chunk_t* DeallocChunk(arena_chunk_t* aChunk);
1190
1191   arena_run_t* AllocRun(size_t aSize, bool aLarge, bool aZero);
1192
1193   arena_chunk_t* DallocRun(arena_run_t* aRun, bool aDirty);
1194
1195   [[nodiscard]] bool SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
1196                               bool aZero);
1197
1198   void TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1199                    size_t aNewSize);
1200
1201   void TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1202                    size_t aNewSize, bool dirty);
1203
1204   arena_run_t* GetNonFullBinRun(arena_bin_t* aBin);
1205
1206   inline uint8_t FindFreeBitInMask(uint32_t aMask, uint32_t& aRng);
1207
1208   inline void* ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin);
1209
1210   inline void* MallocSmall(size_t aSize, bool aZero);
1211
1212   void* MallocLarge(size_t aSize, bool aZero);
1213
1214   void* MallocHuge(size_t aSize, bool aZero);
1215
1216   void* PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize);
1217
1218   void* PallocHuge(size_t aSize, size_t aAlignment, bool aZero);
1219
1220   void RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1221                          size_t aOldSize);
1222
1223   bool RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1224                        size_t aOldSize);
1225
1226   void* RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize);
1227
1228   void* RallocHuge(void* aPtr, size_t aSize, size_t aOldSize);
1229
1230  public:
1231   inline void* Malloc(size_t aSize, bool aZero);
1232
1233   void* Palloc(size_t aAlignment, size_t aSize);
1234
1235   // This may return a chunk that should be destroyed with chunk_dealloc outside
1236   // of the arena lock.  It is not the same chunk as was passed in (since that
1237   // chunk now becomes mSpare).
1238   [[nodiscard]] inline arena_chunk_t* DallocSmall(arena_chunk_t* aChunk,
1239                                                   void* aPtr,
1240                                                   arena_chunk_map_t* aMapElm);
1241
1242   [[nodiscard]] arena_chunk_t* DallocLarge(arena_chunk_t* aChunk, void* aPtr);
1243
1244   void* Ralloc(void* aPtr, size_t aSize, size_t aOldSize);
1245
1246   size_t EffectiveMaxDirty();
1247
1248   // Passing one means purging all.
1249   void Purge(size_t aMaxDirty);
1250
1251   void HardPurge();
1252
1253   bool IsMainThreadOnly() const { return !mLock.LockIsEnabled(); }
1254
1255   void* operator new(size_t aCount) = delete;
1256
1257   void* operator new(size_t aCount, const fallible_t&) noexcept;
1258
1259   void operator delete(void*);
1260 };
1261
1262 struct ArenaTreeTrait {
1263   static RedBlackTreeNode<arena_t>& GetTreeNode(arena_t* aThis) {
1264     return aThis->mLink;
1265   }
1266
1267   static inline Order Compare(arena_t* aNode, arena_t* aOther) {
1268     MOZ_ASSERT(aNode);
1269     MOZ_ASSERT(aOther);
1270     return CompareInt(aNode->mId, aOther->mId);
1271   }
1272 };
1273
1274 // Bookkeeping for all the arenas used by the allocator.
1275 // Arenas are separated in two categories:
1276 // - "private" arenas, used through the moz_arena_* API
1277 // - all the other arenas: the default arena, and thread-local arenas,
1278 //   used by the standard API.
1279 class ArenaCollection {
1280  public:
1281   bool Init() {
1282     mArenas.Init();
1283     mPrivateArenas.Init();
1284     mMainThreadArenas.Init();
1285     arena_params_t params;
1286     // The main arena allows more dirty pages than the default for other arenas.
1287     params.mMaxDirty = opt_dirty_max;
1288     mDefaultArena =
1289         mLock.Init() ? CreateArena(/* aIsPrivate = */ false, &params) : nullptr;
1290     return bool(mDefaultArena);
1291   }
1292
1293   inline arena_t* GetById(arena_id_t aArenaId, bool aIsPrivate);
1294
1295   arena_t* CreateArena(bool aIsPrivate, arena_params_t* aParams);
1296
1297   void DisposeArena(arena_t* aArena) {
1298     MutexAutoLock lock(mLock);
1299     Tree& tree =
1300         aArena->IsMainThreadOnly() ? mMainThreadArenas : mPrivateArenas;
1301
1302     MOZ_RELEASE_ASSERT(tree.Search(aArena), "Arena not in tree");
1303     tree.Remove(aArena);
1304     delete aArena;
1305   }
1306
1307   void SetDefaultMaxDirtyPageModifier(int32_t aModifier) {
1308     mDefaultMaxDirtyPageModifier = aModifier;
1309   }
1310   int32_t DefaultMaxDirtyPageModifier() { return mDefaultMaxDirtyPageModifier; }
1311
1312   using Tree = RedBlackTree<arena_t, ArenaTreeTrait>;
1313
1314   struct Iterator : Tree::Iterator {
1315     explicit Iterator(Tree* aTree, Tree* aSecondTree,
1316                       Tree* aThirdTree = nullptr)
1317         : Tree::Iterator(aTree),
1318           mSecondTree(aSecondTree),
1319           mThirdTree(aThirdTree) {}
1320
1321     Item<Iterator> begin() {
1322       return Item<Iterator>(this, *Tree::Iterator::begin());
1323     }
1324
1325     Item<Iterator> end() { return Item<Iterator>(this, nullptr); }
1326
1327     arena_t* Next() {
1328       arena_t* result = Tree::Iterator::Next();
1329       if (!result && mSecondTree) {
1330         new (this) Iterator(mSecondTree, mThirdTree);
1331         result = *Tree::Iterator::begin();
1332       }
1333       return result;
1334     }
1335
1336    private:
1337     Tree* mSecondTree;
1338     Tree* mThirdTree;
1339   };
1340
1341   Iterator iter() {
1342     if (IsOnMainThreadWeak()) {
1343       return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas);
1344     }
1345     return Iterator(&mArenas, &mPrivateArenas);
1346   }
1347
1348   inline arena_t* GetDefault() { return mDefaultArena; }
1349
1350   Mutex mLock MOZ_UNANNOTATED;
1351
1352   // We're running on the main thread which is set by a call to SetMainThread().
1353   bool IsOnMainThread() const {
1354     return mMainThreadId.isSome() && mMainThreadId.value() == GetThreadId();
1355   }
1356
1357   // We're running on the main thread or SetMainThread() has never been called.
1358   bool IsOnMainThreadWeak() const {
1359     return mMainThreadId.isNothing() || IsOnMainThread();
1360   }
1361
1362   // After a fork set the new thread ID in the child.
1363   void PostForkFixMainThread() {
1364     if (mMainThreadId.isSome()) {
1365       // Only if the main thread has been defined.
1366       mMainThreadId = Some(GetThreadId());
1367     }
1368   }
1369
1370   void SetMainThread() {
1371     MutexAutoLock lock(mLock);
1372     MOZ_ASSERT(mMainThreadId.isNothing());
1373     mMainThreadId = Some(GetThreadId());
1374   }
1375
1376  private:
1377   const static arena_id_t MAIN_THREAD_ARENA_BIT = 0x1;
1378
1379   inline arena_t* GetByIdInternal(Tree& aTree, arena_id_t aArenaId);
1380
1381   arena_id_t MakeRandArenaId(bool aIsMainThreadOnly) const;
1382   static bool ArenaIdIsMainThreadOnly(arena_id_t aArenaId) {
1383     return aArenaId & MAIN_THREAD_ARENA_BIT;
1384   }
1385
1386   arena_t* mDefaultArena;
1387   arena_id_t mLastPublicArenaId;
1388
1389   // Accessing mArenas and mPrivateArenas can only be done while holding mLock.
1390   // Since mMainThreadArenas can only be used from the main thread, it can be
1391   // accessed without a lock which is why it is a seperate tree.
1392   Tree mArenas;
1393   Tree mPrivateArenas;
1394   Tree mMainThreadArenas;
1395   Atomic<int32_t, MemoryOrdering::Relaxed> mDefaultMaxDirtyPageModifier;
1396   Maybe<ThreadId> mMainThreadId;
1397 };
1398
1399 static ArenaCollection gArenas;
1400
1401 // ******
1402 // Chunks.
1403 static AddressRadixTree<(sizeof(void*) << 3) - LOG2(kChunkSize)> gChunkRTree;
1404
1405 // Protects chunk-related data structures.
1406 static Mutex chunks_mtx;
1407
1408 // Trees of chunks that were previously allocated (trees differ only in node
1409 // ordering).  These are used when allocating chunks, in an attempt to re-use
1410 // address space.  Depending on function, different tree orderings are needed,
1411 // which is why there are two trees with the same contents.
1412 static RedBlackTree<extent_node_t, ExtentTreeSzTrait> gChunksBySize
1413     MOZ_GUARDED_BY(chunks_mtx);
1414 static RedBlackTree<extent_node_t, ExtentTreeTrait> gChunksByAddress
1415     MOZ_GUARDED_BY(chunks_mtx);
1416
1417 // Protects huge allocation-related data structures.
1418 static Mutex huge_mtx;
1419
1420 // Tree of chunks that are stand-alone huge allocations.
1421 static RedBlackTree<extent_node_t, ExtentTreeTrait> huge
1422     MOZ_GUARDED_BY(huge_mtx);
1423
1424 // Huge allocation statistics.
1425 static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx);
1426 static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx);
1427
1428 // **************************
1429 // base (internal allocation).
1430
1431 static Mutex base_mtx;
1432
1433 // Current pages that are being used for internal memory allocations.  These
1434 // pages are carved up in cacheline-size quanta, so that there is no chance of
1435 // false cache line sharing.
1436 static void* base_pages MOZ_GUARDED_BY(base_mtx);
1437 static void* base_next_addr MOZ_GUARDED_BY(base_mtx);
1438 static void* base_next_decommitted MOZ_GUARDED_BY(base_mtx);
1439 // Address immediately past base_pages.
1440 static void* base_past_addr MOZ_GUARDED_BY(base_mtx);
1441 static size_t base_mapped MOZ_GUARDED_BY(base_mtx);
1442 static size_t base_committed MOZ_GUARDED_BY(base_mtx);
1443
1444 // ******
1445 // Arenas.
1446
1447 // The arena associated with the current thread (per
1448 // jemalloc_thread_local_arena) On OSX, __thread/thread_local circles back
1449 // calling malloc to allocate storage on first access on each thread, which
1450 // leads to an infinite loop, but pthread-based TLS somehow doesn't have this
1451 // problem.
1452 #if !defined(XP_DARWIN)
1453 static MOZ_THREAD_LOCAL(arena_t*) thread_arena;
1454 #else
1455 static detail::ThreadLocal<arena_t*, detail::ThreadLocalKeyStorage>
1456     thread_arena;
1457 #endif
1458
1459 // *****************************
1460 // Runtime configuration options.
1461
1462 #ifdef MALLOC_RUNTIME_CONFIG
1463 #  define MALLOC_RUNTIME_VAR static
1464 #else
1465 #  define MALLOC_RUNTIME_VAR static const
1466 #endif
1467
1468 enum PoisonType {
1469   NONE,
1470   SOME,
1471   ALL,
1472 };
1473
1474 MALLOC_RUNTIME_VAR bool opt_junk = false;
1475 MALLOC_RUNTIME_VAR bool opt_zero = false;
1476
1477 #ifdef EARLY_BETA_OR_EARLIER
1478 MALLOC_RUNTIME_VAR PoisonType opt_poison = ALL;
1479 #else
1480 MALLOC_RUNTIME_VAR PoisonType opt_poison = SOME;
1481 #endif
1482
1483 MALLOC_RUNTIME_VAR size_t opt_poison_size = kCacheLineSize * 4;
1484
1485 static bool opt_randomize_small = true;
1486
1487 // ***************************************************************************
1488 // Begin forward declarations.
1489
1490 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase,
1491                          bool* aZeroed = nullptr);
1492 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType);
1493 static void chunk_ensure_zero(void* aPtr, size_t aSize, bool aZeroed);
1494 static void huge_dalloc(void* aPtr, arena_t* aArena);
1495 static bool malloc_init_hard();
1496
1497 #ifndef XP_WIN
1498 #  ifdef XP_DARWIN
1499 #    define FORK_HOOK extern "C"
1500 #  else
1501 #    define FORK_HOOK static
1502 #  endif
1503 FORK_HOOK void _malloc_prefork(void);
1504 FORK_HOOK void _malloc_postfork_parent(void);
1505 FORK_HOOK void _malloc_postfork_child(void);
1506 #endif
1507
1508 // End forward declarations.
1509 // ***************************************************************************
1510
1511 // FreeBSD's pthreads implementation calls malloc(3), so the malloc
1512 // implementation has to take pains to avoid infinite recursion during
1513 // initialization.
1514 // Returns whether the allocator was successfully initialized.
1515 static inline bool malloc_init() {
1516   if (!malloc_initialized) {
1517     return malloc_init_hard();
1518   }
1519   return true;
1520 }
1521
1522 static void _malloc_message(const char* p) {
1523 #if !defined(XP_WIN)
1524 #  define _write write
1525 #endif
1526   // Pretend to check _write() errors to suppress gcc warnings about
1527   // warn_unused_result annotations in some versions of glibc headers.
1528   if (_write(STDERR_FILENO, p, (unsigned int)strlen(p)) < 0) {
1529     return;
1530   }
1531 }
1532
1533 template <typename... Args>
1534 static void _malloc_message(const char* p, Args... args) {
1535   _malloc_message(p);
1536   _malloc_message(args...);
1537 }
1538
1539 #ifdef ANDROID
1540 // Android's pthread.h does not declare pthread_atfork() until SDK 21.
1541 extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void),
1542                                          void (*)(void));
1543 #endif
1544
1545 // ***************************************************************************
1546 // Begin Utility functions/macros.
1547
1548 // Return the chunk address for allocation address a.
1549 static inline arena_chunk_t* GetChunkForPtr(const void* aPtr) {
1550   return (arena_chunk_t*)(uintptr_t(aPtr) & ~kChunkSizeMask);
1551 }
1552
1553 // Return the chunk offset of address a.
1554 static inline size_t GetChunkOffsetForPtr(const void* aPtr) {
1555   return (size_t)(uintptr_t(aPtr) & kChunkSizeMask);
1556 }
1557
1558 static inline const char* _getprogname(void) { return "<jemalloc>"; }
1559
1560 static inline void MaybePoison(void* aPtr, size_t aSize) {
1561   size_t size;
1562   switch (opt_poison) {
1563     case NONE:
1564       return;
1565     case SOME:
1566       size = std::min(aSize, opt_poison_size);
1567       break;
1568     case ALL:
1569       size = aSize;
1570       break;
1571   }
1572   MOZ_ASSERT(size != 0 && size <= aSize);
1573   memset(aPtr, kAllocPoison, size);
1574 }
1575
1576 // Fill the given range of memory with zeroes or junk depending on opt_junk and
1577 // opt_zero.
1578 static inline void ApplyZeroOrJunk(void* aPtr, size_t aSize) {
1579   if (opt_junk) {
1580     memset(aPtr, kAllocJunk, aSize);
1581   } else if (opt_zero) {
1582     memset(aPtr, 0, aSize);
1583   }
1584 }
1585
1586 // On Windows, delay crashing on OOM.
1587 #ifdef XP_WIN
1588
1589 // Implementation of VirtualAlloc wrapper (bug 1716727).
1590 namespace MozAllocRetries {
1591
1592 // Maximum retry count on OOM.
1593 constexpr size_t kMaxAttempts = 10;
1594 // Minimum delay time between retries. (The actual delay time may be larger. See
1595 // Microsoft's documentation for ::Sleep() for details.)
1596 constexpr size_t kDelayMs = 50;
1597
1598 using StallSpecs = ::mozilla::StallSpecs;
1599
1600 static constexpr StallSpecs maxStall = {.maxAttempts = kMaxAttempts,
1601                                         .delayMs = kDelayMs};
1602
1603 static inline StallSpecs GetStallSpecs() {
1604 #  if defined(JS_STANDALONE)
1605   // GetGeckoProcessType() isn't available in this configuration. (SpiderMonkey
1606   // on Windows mostly skips this in favor of directly calling ::VirtualAlloc(),
1607   // though, so it's probably not going to matter whether we stall here or not.)
1608   return maxStall;
1609 #  else
1610   switch (GetGeckoProcessType()) {
1611     // For the main process, stall for the maximum permissible time period. (The
1612     // main process is the most important one to keep alive.)
1613     case GeckoProcessType::GeckoProcessType_Default:
1614       return maxStall;
1615
1616     // For all other process types, stall for at most half as long.
1617     default:
1618       return {.maxAttempts = maxStall.maxAttempts / 2,
1619               .delayMs = maxStall.delayMs};
1620   }
1621 #  endif
1622 }
1623
1624 // Drop-in wrapper around VirtualAlloc. When out of memory, may attempt to stall
1625 // and retry rather than returning immediately, in hopes that the page file is
1626 // about to be expanded by Windows.
1627 //
1628 // Ref: https://docs.microsoft.com/en-us/troubleshoot/windows-client/performance/slow-page-file-growth-memory-allocation-errors
1629 [[nodiscard]] void* MozVirtualAlloc(LPVOID lpAddress, SIZE_T dwSize,
1630                                     DWORD flAllocationType, DWORD flProtect) {
1631   DWORD const lastError = ::GetLastError();
1632
1633   constexpr auto IsOOMError = [] {
1634     switch (::GetLastError()) {
1635       // This is the usual error result from VirtualAlloc for OOM.
1636       case ERROR_COMMITMENT_LIMIT:
1637       // Although rare, this has also been observed in low-memory situations.
1638       // (Presumably this means Windows can't allocate enough kernel-side space
1639       // for its own internal representation of the process's virtual address
1640       // space.)
1641       case ERROR_NOT_ENOUGH_MEMORY:
1642         return true;
1643     }
1644     return false;
1645   };
1646
1647   {
1648     void* ptr = ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
1649     if (MOZ_LIKELY(ptr)) return ptr;
1650
1651     // We can't do anything for errors other than OOM...
1652     if (!IsOOMError()) return nullptr;
1653     // ... or if this wasn't a request to commit memory in the first place.
1654     // (This function has no strategy for resolving MEM_RESERVE failures.)
1655     if (!(flAllocationType & MEM_COMMIT)) return nullptr;
1656   }
1657
1658   // Retry as many times as desired (possibly zero).
1659   const StallSpecs stallSpecs = GetStallSpecs();
1660
1661   const auto ret =
1662       stallSpecs.StallAndRetry(&::Sleep, [&]() -> std::optional<void*> {
1663         void* ptr =
1664             ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
1665
1666         if (ptr) {
1667           // The OOM status has been handled, and should not be reported to
1668           // telemetry.
1669           if (IsOOMError()) {
1670             ::SetLastError(lastError);
1671           }
1672           return ptr;
1673         }
1674
1675         // Failure for some reason other than OOM.
1676         if (!IsOOMError()) {
1677           return nullptr;
1678         }
1679
1680         return std::nullopt;
1681       });
1682
1683   return ret.value_or(nullptr);
1684 }
1685 }  // namespace MozAllocRetries
1686
1687 using MozAllocRetries::MozVirtualAlloc;
1688
1689 namespace mozilla {
1690 MOZ_JEMALLOC_API StallSpecs GetAllocatorStallSpecs() {
1691   return ::MozAllocRetries::GetStallSpecs();
1692 }
1693 }  // namespace mozilla
1694
1695 #endif  // XP_WIN
1696
1697 // ***************************************************************************
1698
1699 static inline void pages_decommit(void* aAddr, size_t aSize) {
1700 #ifdef XP_WIN
1701   // The region starting at addr may have been allocated in multiple calls
1702   // to VirtualAlloc and recycled, so decommitting the entire region in one
1703   // go may not be valid. However, since we allocate at least a chunk at a
1704   // time, we may touch any region in chunksized increments.
1705   size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1706   while (aSize > 0) {
1707     // This will cause Access Violation on read and write and thus act as a
1708     // guard page or region as well.
1709     if (!VirtualFree(aAddr, pages_size, MEM_DECOMMIT)) {
1710       MOZ_CRASH();
1711     }
1712     aAddr = (void*)((uintptr_t)aAddr + pages_size);
1713     aSize -= pages_size;
1714     pages_size = std::min(aSize, kChunkSize);
1715   }
1716 #else
1717   if (mmap(aAddr, aSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
1718            0) == MAP_FAILED) {
1719     // We'd like to report the OOM for our tooling, but we can't allocate
1720     // memory at this point, so avoid the use of printf.
1721     const char out_of_mappings[] =
1722         "[unhandlable oom] Failed to mmap, likely no more mappings "
1723         "available " __FILE__ " : " MOZ_STRINGIFY(__LINE__);
1724     if (errno == ENOMEM) {
1725 #  ifndef ANDROID
1726       fputs(out_of_mappings, stderr);
1727       fflush(stderr);
1728 #  endif
1729       MOZ_CRASH_ANNOTATE(out_of_mappings);
1730     }
1731     MOZ_REALLY_CRASH(__LINE__);
1732   }
1733   MozTagAnonymousMemory(aAddr, aSize, "jemalloc-decommitted");
1734 #endif
1735 }
1736
1737 // Commit pages. Returns whether pages were committed.
1738 [[nodiscard]] static inline bool pages_commit(void* aAddr, size_t aSize) {
1739 #ifdef XP_WIN
1740   // The region starting at addr may have been allocated in multiple calls
1741   // to VirtualAlloc and recycled, so committing the entire region in one
1742   // go may not be valid. However, since we allocate at least a chunk at a
1743   // time, we may touch any region in chunksized increments.
1744   size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1745   while (aSize > 0) {
1746     if (!MozVirtualAlloc(aAddr, pages_size, MEM_COMMIT, PAGE_READWRITE)) {
1747       return false;
1748     }
1749     aAddr = (void*)((uintptr_t)aAddr + pages_size);
1750     aSize -= pages_size;
1751     pages_size = std::min(aSize, kChunkSize);
1752   }
1753 #else
1754   if (mmap(aAddr, aSize, PROT_READ | PROT_WRITE,
1755            MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) {
1756     return false;
1757   }
1758   MozTagAnonymousMemory(aAddr, aSize, "jemalloc");
1759 #endif
1760   return true;
1761 }
1762
1763 static bool base_pages_alloc(size_t minsize) MOZ_REQUIRES(base_mtx) {
1764   size_t csize;
1765   size_t pminsize;
1766
1767   MOZ_ASSERT(minsize != 0);
1768   csize = CHUNK_CEILING(minsize);
1769   base_pages = chunk_alloc(csize, kChunkSize, true);
1770   if (!base_pages) {
1771     return true;
1772   }
1773   base_next_addr = base_pages;
1774   base_past_addr = (void*)((uintptr_t)base_pages + csize);
1775   // Leave enough pages for minsize committed, since otherwise they would
1776   // have to be immediately recommitted.
1777   pminsize = PAGE_CEILING(minsize);
1778   base_next_decommitted = (void*)((uintptr_t)base_pages + pminsize);
1779   if (pminsize < csize) {
1780     pages_decommit(base_next_decommitted, csize - pminsize);
1781   }
1782   base_mapped += csize;
1783   base_committed += pminsize;
1784
1785   return false;
1786 }
1787
1788 static void* base_alloc(size_t aSize) {
1789   void* ret;
1790   size_t csize;
1791
1792   // Round size up to nearest multiple of the cacheline size.
1793   csize = CACHELINE_CEILING(aSize);
1794
1795   MutexAutoLock lock(base_mtx);
1796   // Make sure there's enough space for the allocation.
1797   if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1798     if (base_pages_alloc(csize)) {
1799       return nullptr;
1800     }
1801   }
1802   // Allocate.
1803   ret = base_next_addr;
1804   base_next_addr = (void*)((uintptr_t)base_next_addr + csize);
1805   // Make sure enough pages are committed for the new allocation.
1806   if ((uintptr_t)base_next_addr > (uintptr_t)base_next_decommitted) {
1807     void* pbase_next_addr = (void*)(PAGE_CEILING((uintptr_t)base_next_addr));
1808
1809     if (!pages_commit(
1810             base_next_decommitted,
1811             (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted)) {
1812       return nullptr;
1813     }
1814
1815     base_committed +=
1816         (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted;
1817     base_next_decommitted = pbase_next_addr;
1818   }
1819
1820   return ret;
1821 }
1822
1823 static void* base_calloc(size_t aNumber, size_t aSize) {
1824   void* ret = base_alloc(aNumber * aSize);
1825   if (ret) {
1826     memset(ret, 0, aNumber * aSize);
1827   }
1828   return ret;
1829 }
1830
1831 // A specialization of the base allocator with a free list.
1832 template <typename T>
1833 struct TypedBaseAlloc {
1834   static T* sFirstFree;
1835
1836   static size_t size_of() { return sizeof(T); }
1837
1838   static T* alloc() {
1839     T* ret;
1840
1841     base_mtx.Lock();
1842     if (sFirstFree) {
1843       ret = sFirstFree;
1844       sFirstFree = *(T**)ret;
1845       base_mtx.Unlock();
1846     } else {
1847       base_mtx.Unlock();
1848       ret = (T*)base_alloc(size_of());
1849     }
1850
1851     return ret;
1852   }
1853
1854   static void dealloc(T* aNode) {
1855     MutexAutoLock lock(base_mtx);
1856     *(T**)aNode = sFirstFree;
1857     sFirstFree = aNode;
1858   }
1859 };
1860
1861 using ExtentAlloc = TypedBaseAlloc<extent_node_t>;
1862
1863 template <>
1864 extent_node_t* ExtentAlloc::sFirstFree = nullptr;
1865
1866 template <>
1867 arena_t* TypedBaseAlloc<arena_t>::sFirstFree = nullptr;
1868
1869 template <>
1870 size_t TypedBaseAlloc<arena_t>::size_of() {
1871   // Allocate enough space for trailing bins.
1872   return sizeof(arena_t) + (sizeof(arena_bin_t) * (NUM_SMALL_CLASSES - 1));
1873 }
1874
1875 template <typename T>
1876 struct BaseAllocFreePolicy {
1877   void operator()(T* aPtr) { TypedBaseAlloc<T>::dealloc(aPtr); }
1878 };
1879
1880 using UniqueBaseNode =
1881     UniquePtr<extent_node_t, BaseAllocFreePolicy<extent_node_t>>;
1882
1883 // End Utility functions/macros.
1884 // ***************************************************************************
1885 // Begin chunk management functions.
1886
1887 #ifdef XP_WIN
1888
1889 static void* pages_map(void* aAddr, size_t aSize) {
1890   void* ret = nullptr;
1891   ret = MozVirtualAlloc(aAddr, aSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
1892   return ret;
1893 }
1894
1895 static void pages_unmap(void* aAddr, size_t aSize) {
1896   if (VirtualFree(aAddr, 0, MEM_RELEASE) == 0) {
1897     _malloc_message(_getprogname(), ": (malloc) Error in VirtualFree()\n");
1898   }
1899 }
1900 #else
1901
1902 static void pages_unmap(void* aAddr, size_t aSize) {
1903   if (munmap(aAddr, aSize) == -1) {
1904     char buf[64];
1905
1906     if (strerror_r(errno, buf, sizeof(buf)) == 0) {
1907       _malloc_message(_getprogname(), ": (malloc) Error in munmap(): ", buf,
1908                       "\n");
1909     }
1910   }
1911 }
1912
1913 static void* pages_map(void* aAddr, size_t aSize) {
1914   void* ret;
1915 #  if defined(__ia64__) || \
1916       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1917   // The JS engine assumes that all allocated pointers have their high 17 bits
1918   // clear, which ia64's mmap doesn't support directly. However, we can emulate
1919   // it by passing mmap an "addr" parameter with those bits clear. The mmap will
1920   // return that address, or the nearest available memory above that address,
1921   // providing a near-guarantee that those bits are clear. If they are not, we
1922   // return nullptr below to indicate out-of-memory.
1923   //
1924   // The addr is chosen as 0x0000070000000000, which still allows about 120TB of
1925   // virtual address space.
1926   //
1927   // See Bug 589735 for more information.
1928   bool check_placement = true;
1929   if (!aAddr) {
1930     aAddr = (void*)0x0000070000000000;
1931     check_placement = false;
1932   }
1933 #  endif
1934
1935 #  if defined(__sparc__) && defined(__arch64__) && defined(__linux__)
1936   const uintptr_t start = 0x0000070000000000ULL;
1937   const uintptr_t end = 0x0000800000000000ULL;
1938
1939   // Copied from js/src/gc/Memory.cpp and adapted for this source
1940   uintptr_t hint;
1941   void* region = MAP_FAILED;
1942   for (hint = start; region == MAP_FAILED && hint + aSize <= end;
1943        hint += kChunkSize) {
1944     region = mmap((void*)hint, aSize, PROT_READ | PROT_WRITE,
1945                   MAP_PRIVATE | MAP_ANON, -1, 0);
1946     if (region != MAP_FAILED) {
1947       if (((size_t)region + (aSize - 1)) & 0xffff800000000000) {
1948         if (munmap(region, aSize)) {
1949           MOZ_ASSERT(errno == ENOMEM);
1950         }
1951         region = MAP_FAILED;
1952       }
1953     }
1954   }
1955   ret = region;
1956 #  else
1957   // We don't use MAP_FIXED here, because it can cause the *replacement*
1958   // of existing mappings, and we only want to create new mappings.
1959   ret =
1960       mmap(aAddr, aSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
1961   MOZ_ASSERT(ret);
1962 #  endif
1963   if (ret == MAP_FAILED) {
1964     ret = nullptr;
1965   }
1966 #  if defined(__ia64__) || \
1967       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1968   // If the allocated memory doesn't have its upper 17 bits clear, consider it
1969   // as out of memory.
1970   else if ((long long)ret & 0xffff800000000000) {
1971     munmap(ret, aSize);
1972     ret = nullptr;
1973   }
1974   // If the caller requested a specific memory location, verify that's what mmap
1975   // returned.
1976   else if (check_placement && ret != aAddr) {
1977 #  else
1978   else if (aAddr && ret != aAddr) {
1979 #  endif
1980     // We succeeded in mapping memory, but not in the right place.
1981     pages_unmap(ret, aSize);
1982     ret = nullptr;
1983   }
1984   if (ret) {
1985     MozTagAnonymousMemory(ret, aSize, "jemalloc");
1986   }
1987
1988 #  if defined(__ia64__) || \
1989       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1990   MOZ_ASSERT(!ret || (!check_placement && ret) ||
1991              (check_placement && ret == aAddr));
1992 #  else
1993   MOZ_ASSERT(!ret || (!aAddr && ret != aAddr) || (aAddr && ret == aAddr));
1994 #  endif
1995   return ret;
1996 }
1997 #endif
1998
1999 #ifdef XP_DARWIN
2000 #  define VM_COPY_MIN kChunkSize
2001 static inline void pages_copy(void* dest, const void* src, size_t n) {
2002   MOZ_ASSERT((void*)((uintptr_t)dest & ~gPageSizeMask) == dest);
2003   MOZ_ASSERT(n >= VM_COPY_MIN);
2004   MOZ_ASSERT((void*)((uintptr_t)src & ~gPageSizeMask) == src);
2005
2006   kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
2007                             (vm_address_t)dest);
2008   if (r != KERN_SUCCESS) {
2009     MOZ_CRASH("vm_copy() failed");
2010   }
2011 }
2012
2013 #endif
2014
2015 template <size_t Bits>
2016 bool AddressRadixTree<Bits>::Init() {
2017   mLock.Init();
2018   mRoot = (void**)base_calloc(1 << kBitsAtLevel1, sizeof(void*));
2019   return mRoot;
2020 }
2021
2022 template <size_t Bits>
2023 void** AddressRadixTree<Bits>::GetSlot(void* aKey, bool aCreate) {
2024   uintptr_t key = reinterpret_cast<uintptr_t>(aKey);
2025   uintptr_t subkey;
2026   unsigned i, lshift, height, bits;
2027   void** node;
2028   void** child;
2029
2030   for (i = lshift = 0, height = kHeight, node = mRoot; i < height - 1;
2031        i++, lshift += bits, node = child) {
2032     bits = i ? kBitsPerLevel : kBitsAtLevel1;
2033     subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
2034     child = (void**)node[subkey];
2035     if (!child && aCreate) {
2036       child = (void**)base_calloc(1 << kBitsPerLevel, sizeof(void*));
2037       if (child) {
2038         node[subkey] = child;
2039       }
2040     }
2041     if (!child) {
2042       return nullptr;
2043     }
2044   }
2045
2046   // node is a leaf, so it contains values rather than node
2047   // pointers.
2048   bits = i ? kBitsPerLevel : kBitsAtLevel1;
2049   subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
2050   return &node[subkey];
2051 }
2052
2053 template <size_t Bits>
2054 void* AddressRadixTree<Bits>::Get(void* aKey) {
2055   void* ret = nullptr;
2056
2057   void** slot = GetSlot(aKey);
2058
2059   if (slot) {
2060     ret = *slot;
2061   }
2062 #ifdef MOZ_DEBUG
2063   MutexAutoLock lock(mLock);
2064
2065   // Suppose that it were possible for a jemalloc-allocated chunk to be
2066   // munmap()ped, followed by a different allocator in another thread re-using
2067   // overlapping virtual memory, all without invalidating the cached rtree
2068   // value.  The result would be a false positive (the rtree would claim that
2069   // jemalloc owns memory that it had actually discarded).  I don't think this
2070   // scenario is possible, but the following assertion is a prudent sanity
2071   // check.
2072   if (!slot) {
2073     // In case a slot has been created in the meantime.
2074     slot = GetSlot(aKey);
2075   }
2076   if (slot) {
2077     // The MutexAutoLock above should act as a memory barrier, forcing
2078     // the compiler to emit a new read instruction for *slot.
2079     MOZ_ASSERT(ret == *slot);
2080   } else {
2081     MOZ_ASSERT(ret == nullptr);
2082   }
2083 #endif
2084   return ret;
2085 }
2086
2087 template <size_t Bits>
2088 bool AddressRadixTree<Bits>::Set(void* aKey, void* aValue) {
2089   MutexAutoLock lock(mLock);
2090   void** slot = GetSlot(aKey, /* aCreate = */ true);
2091   if (slot) {
2092     *slot = aValue;
2093   }
2094   return slot;
2095 }
2096
2097 // pages_trim, chunk_alloc_mmap_slow and chunk_alloc_mmap were cherry-picked
2098 // from upstream jemalloc 3.4.1 to fix Mozilla bug 956501.
2099
2100 // Return the offset between a and the nearest aligned address at or below a.
2101 #define ALIGNMENT_ADDR2OFFSET(a, alignment) \
2102   ((size_t)((uintptr_t)(a) & ((alignment)-1)))
2103
2104 // Return the smallest alignment multiple that is >= s.
2105 #define ALIGNMENT_CEILING(s, alignment) \
2106   (((s) + ((alignment)-1)) & (~((alignment)-1)))
2107
2108 static void* pages_trim(void* addr, size_t alloc_size, size_t leadsize,
2109                         size_t size) {
2110   void* ret = (void*)((uintptr_t)addr + leadsize);
2111
2112   MOZ_ASSERT(alloc_size >= leadsize + size);
2113 #ifdef XP_WIN
2114   {
2115     void* new_addr;
2116
2117     pages_unmap(addr, alloc_size);
2118     new_addr = pages_map(ret, size);
2119     if (new_addr == ret) {
2120       return ret;
2121     }
2122     if (new_addr) {
2123       pages_unmap(new_addr, size);
2124     }
2125     return nullptr;
2126   }
2127 #else
2128   {
2129     size_t trailsize = alloc_size - leadsize - size;
2130
2131     if (leadsize != 0) {
2132       pages_unmap(addr, leadsize);
2133     }
2134     if (trailsize != 0) {
2135       pages_unmap((void*)((uintptr_t)ret + size), trailsize);
2136     }
2137     return ret;
2138   }
2139 #endif
2140 }
2141
2142 static void* chunk_alloc_mmap_slow(size_t size, size_t alignment) {
2143   void *ret, *pages;
2144   size_t alloc_size, leadsize;
2145
2146   alloc_size = size + alignment - gRealPageSize;
2147   // Beware size_t wrap-around.
2148   if (alloc_size < size) {
2149     return nullptr;
2150   }
2151   do {
2152     pages = pages_map(nullptr, alloc_size);
2153     if (!pages) {
2154       return nullptr;
2155     }
2156     leadsize =
2157         ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages;
2158     ret = pages_trim(pages, alloc_size, leadsize, size);
2159   } while (!ret);
2160
2161   MOZ_ASSERT(ret);
2162   return ret;
2163 }
2164
2165 static void* chunk_alloc_mmap(size_t size, size_t alignment) {
2166   void* ret;
2167   size_t offset;
2168
2169   // Ideally, there would be a way to specify alignment to mmap() (like
2170   // NetBSD has), but in the absence of such a feature, we have to work
2171   // hard to efficiently create aligned mappings. The reliable, but
2172   // slow method is to create a mapping that is over-sized, then trim the
2173   // excess. However, that always results in one or two calls to
2174   // pages_unmap().
2175   //
2176   // Optimistically try mapping precisely the right amount before falling
2177   // back to the slow method, with the expectation that the optimistic
2178   // approach works most of the time.
2179   ret = pages_map(nullptr, size);
2180   if (!ret) {
2181     return nullptr;
2182   }
2183   offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
2184   if (offset != 0) {
2185     pages_unmap(ret, size);
2186     return chunk_alloc_mmap_slow(size, alignment);
2187   }
2188
2189   MOZ_ASSERT(ret);
2190   return ret;
2191 }
2192
2193 // Purge and release the pages in the chunk of length `length` at `addr` to
2194 // the OS.
2195 // Returns whether the pages are guaranteed to be full of zeroes when the
2196 // function returns.
2197 // The force_zero argument explicitly requests that the memory is guaranteed
2198 // to be full of zeroes when the function returns.
2199 static bool pages_purge(void* addr, size_t length, bool force_zero) {
2200   pages_decommit(addr, length);
2201   return true;
2202 }
2203
2204 static void* chunk_recycle(size_t aSize, size_t aAlignment, bool* aZeroed) {
2205   extent_node_t key;
2206
2207   size_t alloc_size = aSize + aAlignment - kChunkSize;
2208   // Beware size_t wrap-around.
2209   if (alloc_size < aSize) {
2210     return nullptr;
2211   }
2212   key.mAddr = nullptr;
2213   key.mSize = alloc_size;
2214   chunks_mtx.Lock();
2215   extent_node_t* node = gChunksBySize.SearchOrNext(&key);
2216   if (!node) {
2217     chunks_mtx.Unlock();
2218     return nullptr;
2219   }
2220   size_t leadsize = ALIGNMENT_CEILING((uintptr_t)node->mAddr, aAlignment) -
2221                     (uintptr_t)node->mAddr;
2222   MOZ_ASSERT(node->mSize >= leadsize + aSize);
2223   size_t trailsize = node->mSize - leadsize - aSize;
2224   void* ret = (void*)((uintptr_t)node->mAddr + leadsize);
2225   ChunkType chunk_type = node->mChunkType;
2226   if (aZeroed) {
2227     *aZeroed = (chunk_type == ZEROED_CHUNK);
2228   }
2229   // Remove node from the tree.
2230   gChunksBySize.Remove(node);
2231   gChunksByAddress.Remove(node);
2232   if (leadsize != 0) {
2233     // Insert the leading space as a smaller chunk.
2234     node->mSize = leadsize;
2235     gChunksBySize.Insert(node);
2236     gChunksByAddress.Insert(node);
2237     node = nullptr;
2238   }
2239   if (trailsize != 0) {
2240     // Insert the trailing space as a smaller chunk.
2241     if (!node) {
2242       // An additional node is required, but
2243       // TypedBaseAlloc::alloc() can cause a new base chunk to be
2244       // allocated.  Drop chunks_mtx in order to avoid
2245       // deadlock, and if node allocation fails, deallocate
2246       // the result before returning an error.
2247       chunks_mtx.Unlock();
2248       node = ExtentAlloc::alloc();
2249       if (!node) {
2250         chunk_dealloc(ret, aSize, chunk_type);
2251         return nullptr;
2252       }
2253       chunks_mtx.Lock();
2254     }
2255     node->mAddr = (void*)((uintptr_t)(ret) + aSize);
2256     node->mSize = trailsize;
2257     node->mChunkType = chunk_type;
2258     gChunksBySize.Insert(node);
2259     gChunksByAddress.Insert(node);
2260     node = nullptr;
2261   }
2262
2263   gRecycledSize -= aSize;
2264
2265   chunks_mtx.Unlock();
2266
2267   if (node) {
2268     ExtentAlloc::dealloc(node);
2269   }
2270   if (!pages_commit(ret, aSize)) {
2271     return nullptr;
2272   }
2273   // pages_commit is guaranteed to zero the chunk.
2274   if (aZeroed) {
2275     *aZeroed = true;
2276   }
2277
2278   return ret;
2279 }
2280
2281 #ifdef XP_WIN
2282 // On Windows, calls to VirtualAlloc and VirtualFree must be matched, making it
2283 // awkward to recycle allocations of varying sizes. Therefore we only allow
2284 // recycling when the size equals the chunksize, unless deallocation is entirely
2285 // disabled.
2286 #  define CAN_RECYCLE(size) ((size) == kChunkSize)
2287 #else
2288 #  define CAN_RECYCLE(size) true
2289 #endif
2290
2291 // Allocates `size` bytes of system memory aligned for `alignment`.
2292 // `base` indicates whether the memory will be used for the base allocator
2293 // (e.g. base_alloc).
2294 // `zeroed` is an outvalue that returns whether the allocated memory is
2295 // guaranteed to be full of zeroes. It can be omitted when the caller doesn't
2296 // care about the result.
2297 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase,
2298                          bool* aZeroed) {
2299   void* ret = nullptr;
2300
2301   MOZ_ASSERT(aSize != 0);
2302   MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
2303   MOZ_ASSERT(aAlignment != 0);
2304   MOZ_ASSERT((aAlignment & kChunkSizeMask) == 0);
2305
2306   // Base allocations can't be fulfilled by recycling because of
2307   // possible deadlock or infinite recursion.
2308   if (CAN_RECYCLE(aSize) && !aBase) {
2309     ret = chunk_recycle(aSize, aAlignment, aZeroed);
2310   }
2311   if (!ret) {
2312     ret = chunk_alloc_mmap(aSize, aAlignment);
2313     if (aZeroed) {
2314       *aZeroed = true;
2315     }
2316   }
2317   if (ret && !aBase) {
2318     if (!gChunkRTree.Set(ret, ret)) {
2319       chunk_dealloc(ret, aSize, UNKNOWN_CHUNK);
2320       return nullptr;
2321     }
2322   }
2323
2324   MOZ_ASSERT(GetChunkOffsetForPtr(ret) == 0);
2325   return ret;
2326 }
2327
2328 static void chunk_ensure_zero(void* aPtr, size_t aSize, bool aZeroed) {
2329   if (aZeroed == false) {
2330     memset(aPtr, 0, aSize);
2331   }
2332 #ifdef MOZ_DEBUG
2333   else {
2334     size_t i;
2335     size_t* p = (size_t*)(uintptr_t)aPtr;
2336
2337     for (i = 0; i < aSize / sizeof(size_t); i++) {
2338       MOZ_ASSERT(p[i] == 0);
2339     }
2340   }
2341 #endif
2342 }
2343
2344 static void chunk_record(void* aChunk, size_t aSize, ChunkType aType) {
2345   extent_node_t key;
2346
2347   if (aType != ZEROED_CHUNK) {
2348     if (pages_purge(aChunk, aSize, aType == HUGE_CHUNK)) {
2349       aType = ZEROED_CHUNK;
2350     }
2351   }
2352
2353   // Allocate a node before acquiring chunks_mtx even though it might not
2354   // be needed, because TypedBaseAlloc::alloc() may cause a new base chunk to
2355   // be allocated, which could cause deadlock if chunks_mtx were already
2356   // held.
2357   UniqueBaseNode xnode(ExtentAlloc::alloc());
2358   // Use xprev to implement conditional deferred deallocation of prev.
2359   UniqueBaseNode xprev;
2360
2361   // RAII deallocates xnode and xprev defined above after unlocking
2362   // in order to avoid potential dead-locks
2363   MutexAutoLock lock(chunks_mtx);
2364   key.mAddr = (void*)((uintptr_t)aChunk + aSize);
2365   extent_node_t* node = gChunksByAddress.SearchOrNext(&key);
2366   // Try to coalesce forward.
2367   if (node && node->mAddr == key.mAddr) {
2368     // Coalesce chunk with the following address range.  This does
2369     // not change the position within gChunksByAddress, so only
2370     // remove/insert from/into gChunksBySize.
2371     gChunksBySize.Remove(node);
2372     node->mAddr = aChunk;
2373     node->mSize += aSize;
2374     if (node->mChunkType != aType) {
2375       node->mChunkType = RECYCLED_CHUNK;
2376     }
2377     gChunksBySize.Insert(node);
2378   } else {
2379     // Coalescing forward failed, so insert a new node.
2380     if (!xnode) {
2381       // TypedBaseAlloc::alloc() failed, which is an exceedingly
2382       // unlikely failure.  Leak chunk; its pages have
2383       // already been purged, so this is only a virtual
2384       // memory leak.
2385       return;
2386     }
2387     node = xnode.release();
2388     node->mAddr = aChunk;
2389     node->mSize = aSize;
2390     node->mChunkType = aType;
2391     gChunksByAddress.Insert(node);
2392     gChunksBySize.Insert(node);
2393   }
2394
2395   // Try to coalesce backward.
2396   extent_node_t* prev = gChunksByAddress.Prev(node);
2397   if (prev && (void*)((uintptr_t)prev->mAddr + prev->mSize) == aChunk) {
2398     // Coalesce chunk with the previous address range.  This does
2399     // not change the position within gChunksByAddress, so only
2400     // remove/insert node from/into gChunksBySize.
2401     gChunksBySize.Remove(prev);
2402     gChunksByAddress.Remove(prev);
2403
2404     gChunksBySize.Remove(node);
2405     node->mAddr = prev->mAddr;
2406     node->mSize += prev->mSize;
2407     if (node->mChunkType != prev->mChunkType) {
2408       node->mChunkType = RECYCLED_CHUNK;
2409     }
2410     gChunksBySize.Insert(node);
2411
2412     xprev.reset(prev);
2413   }
2414
2415   gRecycledSize += aSize;
2416 }
2417
2418 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType) {
2419   MOZ_ASSERT(aChunk);
2420   MOZ_ASSERT(GetChunkOffsetForPtr(aChunk) == 0);
2421   MOZ_ASSERT(aSize != 0);
2422   MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
2423
2424   gChunkRTree.Unset(aChunk);
2425
2426   if (CAN_RECYCLE(aSize)) {
2427     size_t recycled_so_far = gRecycledSize;
2428     // In case some race condition put us above the limit.
2429     if (recycled_so_far < gRecycleLimit) {
2430       size_t recycle_remaining = gRecycleLimit - recycled_so_far;
2431       size_t to_recycle;
2432       if (aSize > recycle_remaining) {
2433         to_recycle = recycle_remaining;
2434         // Drop pages that would overflow the recycle limit
2435         pages_trim(aChunk, aSize, 0, to_recycle);
2436       } else {
2437         to_recycle = aSize;
2438       }
2439       chunk_record(aChunk, to_recycle, aType);
2440       return;
2441     }
2442   }
2443
2444   pages_unmap(aChunk, aSize);
2445 }
2446
2447 #undef CAN_RECYCLE
2448
2449 // End chunk management functions.
2450 // ***************************************************************************
2451 // Begin arena.
2452
2453 static inline arena_t* thread_local_arena(bool enabled) {
2454   arena_t* arena;
2455
2456   if (enabled) {
2457     // The arena will essentially be leaked if this function is
2458     // called with `false`, but it doesn't matter at the moment.
2459     // because in practice nothing actually calls this function
2460     // with `false`, except maybe at shutdown.
2461     arena =
2462         gArenas.CreateArena(/* aIsPrivate = */ false, /* aParams = */ nullptr);
2463   } else {
2464     arena = gArenas.GetDefault();
2465   }
2466   thread_arena.set(arena);
2467   return arena;
2468 }
2469
2470 inline void MozJemalloc::jemalloc_thread_local_arena(bool aEnabled) {
2471   if (malloc_init()) {
2472     thread_local_arena(aEnabled);
2473   }
2474 }
2475
2476 // Choose an arena based on a per-thread value.
2477 static inline arena_t* choose_arena(size_t size) {
2478   arena_t* ret = nullptr;
2479
2480   // We can only use TLS if this is a PIC library, since for the static
2481   // library version, libc's malloc is used by TLS allocation, which
2482   // introduces a bootstrapping issue.
2483
2484   if (size > kMaxQuantumClass) {
2485     // Force the default arena for larger allocations.
2486     ret = gArenas.GetDefault();
2487   } else {
2488     // Check TLS to see if our thread has requested a pinned arena.
2489     ret = thread_arena.get();
2490     // If ret is non-null, it must not be in the first page.
2491     MOZ_DIAGNOSTIC_ASSERT_IF(ret, (size_t)ret >= gPageSize);
2492     if (!ret) {
2493       // Nothing in TLS. Pin this thread to the default arena.
2494       ret = thread_local_arena(false);
2495     }
2496   }
2497
2498   MOZ_DIAGNOSTIC_ASSERT(ret);
2499   return ret;
2500 }
2501
2502 inline uint8_t arena_t::FindFreeBitInMask(uint32_t aMask, uint32_t& aRng) {
2503   if (mPRNG != nullptr) {
2504     if (aRng == UINT_MAX) {
2505       aRng = mPRNG->next() % 32;
2506     }
2507     uint8_t bitIndex;
2508     // RotateRight asserts when provided bad input.
2509     aMask = aRng ? RotateRight(aMask, aRng)
2510                  : aMask;  // Rotate the mask a random number of slots
2511     bitIndex = CountTrailingZeroes32(aMask);
2512     return (bitIndex + aRng) % 32;
2513   }
2514   return CountTrailingZeroes32(aMask);
2515 }
2516
2517 inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) {
2518   void* ret;
2519   unsigned i, mask, bit, regind;
2520   uint32_t rndPos = UINT_MAX;
2521
2522   MOZ_DIAGNOSTIC_ASSERT(aRun->mMagic == ARENA_RUN_MAGIC);
2523   MOZ_ASSERT(aRun->mRegionsMinElement < aBin->mRunNumRegionsMask);
2524
2525   // Move the first check outside the loop, so that aRun->mRegionsMinElement can
2526   // be updated unconditionally, without the possibility of updating it
2527   // multiple times.
2528   i = aRun->mRegionsMinElement;
2529   mask = aRun->mRegionsMask[i];
2530   if (mask != 0) {
2531     bit = FindFreeBitInMask(mask, rndPos);
2532
2533     regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2534     MOZ_ASSERT(regind < aBin->mRunNumRegions);
2535     ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2536                   (aBin->mSizeClass * regind));
2537
2538     // Clear bit.
2539     mask ^= (1U << bit);
2540     aRun->mRegionsMask[i] = mask;
2541
2542     return ret;
2543   }
2544
2545   for (i++; i < aBin->mRunNumRegionsMask; i++) {
2546     mask = aRun->mRegionsMask[i];
2547     if (mask != 0) {
2548       bit = FindFreeBitInMask(mask, rndPos);
2549
2550       regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2551       MOZ_ASSERT(regind < aBin->mRunNumRegions);
2552       ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2553                     (aBin->mSizeClass * regind));
2554
2555       // Clear bit.
2556       mask ^= (1U << bit);
2557       aRun->mRegionsMask[i] = mask;
2558
2559       // Make a note that nothing before this element
2560       // contains a free region.
2561       aRun->mRegionsMinElement = i;  // Low payoff: + (mask == 0);
2562
2563       return ret;
2564     }
2565   }
2566   // Not reached.
2567   MOZ_DIAGNOSTIC_ASSERT(0);
2568   return nullptr;
2569 }
2570
2571 static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
2572                                         void* ptr, size_t size) {
2573   uint32_t diff, regind;
2574   unsigned elm, bit;
2575
2576   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
2577
2578   // Avoid doing division with a variable divisor if possible.  Using
2579   // actual division here can reduce allocator throughput by over 20%!
2580   diff =
2581       (uint32_t)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset);
2582
2583   MOZ_ASSERT(diff <=
2584              (static_cast<unsigned>(bin->mRunSizePages) << gPageSize2Pow));
2585   regind = diff / bin->mSizeDivisor;
2586
2587   MOZ_DIAGNOSTIC_ASSERT(diff == regind * size);
2588   MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions);
2589
2590   elm = regind >> (LOG2(sizeof(int)) + 3);
2591   if (elm < run->mRegionsMinElement) {
2592     run->mRegionsMinElement = elm;
2593   }
2594   bit = regind - (elm << (LOG2(sizeof(int)) + 3));
2595   MOZ_RELEASE_ASSERT((run->mRegionsMask[elm] & (1U << bit)) == 0,
2596                      "Double-free?");
2597   run->mRegionsMask[elm] |= (1U << bit);
2598 }
2599
2600 bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
2601                        bool aZero) {
2602   arena_chunk_t* chunk;
2603   size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2604
2605   chunk = GetChunkForPtr(aRun);
2606   old_ndirty = chunk->ndirty;
2607   run_ind = (unsigned)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
2608   total_pages = (chunk->map[run_ind].bits & ~gPageSizeMask) >> gPageSize2Pow;
2609   need_pages = (aSize >> gPageSize2Pow);
2610   MOZ_ASSERT(need_pages > 0);
2611   MOZ_ASSERT(need_pages <= total_pages);
2612   rem_pages = total_pages - need_pages;
2613
2614   for (i = 0; i < need_pages; i++) {
2615     // Commit decommitted pages if necessary.  If a decommitted
2616     // page is encountered, commit all needed adjacent decommitted
2617     // pages in one operation, in order to reduce system call
2618     // overhead.
2619     if (chunk->map[run_ind + i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) {
2620       size_t j;
2621
2622       // Advance i+j to just past the index of the last page
2623       // to commit.  Clear CHUNK_MAP_DECOMMITTED and
2624       // CHUNK_MAP_MADVISED along the way.
2625       for (j = 0; i + j < need_pages && (chunk->map[run_ind + i + j].bits &
2626                                          CHUNK_MAP_MADVISED_OR_DECOMMITTED);
2627            j++) {
2628         // DECOMMITTED and MADVISED are mutually exclusive.
2629         MOZ_ASSERT(!(chunk->map[run_ind + i + j].bits & CHUNK_MAP_DECOMMITTED &&
2630                      chunk->map[run_ind + i + j].bits & CHUNK_MAP_MADVISED));
2631
2632         chunk->map[run_ind + i + j].bits &= ~CHUNK_MAP_MADVISED_OR_DECOMMITTED;
2633       }
2634
2635 #ifdef MALLOC_DECOMMIT
2636       bool committed = pages_commit(
2637           (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)),
2638           j << gPageSize2Pow);
2639       // pages_commit zeroes pages, so mark them as such if it succeeded.
2640       // That's checked further below to avoid manually zeroing the pages.
2641       for (size_t k = 0; k < j; k++) {
2642         chunk->map[run_ind + i + k].bits |=
2643             committed ? CHUNK_MAP_ZEROED : CHUNK_MAP_DECOMMITTED;
2644       }
2645       if (!committed) {
2646         return false;
2647       }
2648 #endif
2649
2650       mStats.committed += j;
2651     }
2652   }
2653
2654   mRunsAvail.Remove(&chunk->map[run_ind]);
2655
2656   // Keep track of trailing unused pages for later use.
2657   if (rem_pages > 0) {
2658     chunk->map[run_ind + need_pages].bits =
2659         (rem_pages << gPageSize2Pow) |
2660         (chunk->map[run_ind + need_pages].bits & gPageSizeMask);
2661     chunk->map[run_ind + total_pages - 1].bits =
2662         (rem_pages << gPageSize2Pow) |
2663         (chunk->map[run_ind + total_pages - 1].bits & gPageSizeMask);
2664     mRunsAvail.Insert(&chunk->map[run_ind + need_pages]);
2665   }
2666
2667   for (i = 0; i < need_pages; i++) {
2668     // Zero if necessary.
2669     if (aZero) {
2670       if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED) == 0) {
2671         memset((void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), 0,
2672                gPageSize);
2673         // CHUNK_MAP_ZEROED is cleared below.
2674       }
2675     }
2676
2677     // Update dirty page accounting.
2678     if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2679       chunk->ndirty--;
2680       mNumDirty--;
2681       // CHUNK_MAP_DIRTY is cleared below.
2682     }
2683
2684     // Initialize the chunk map.
2685     if (aLarge) {
2686       chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2687     } else {
2688       chunk->map[run_ind + i].bits = size_t(aRun) | CHUNK_MAP_ALLOCATED;
2689     }
2690   }
2691
2692   // Set the run size only in the first element for large runs.  This is
2693   // primarily a debugging aid, since the lack of size info for trailing
2694   // pages only matters if the application tries to operate on an
2695   // interior pointer.
2696   if (aLarge) {
2697     chunk->map[run_ind].bits |= aSize;
2698   }
2699
2700   if (chunk->ndirty == 0 && old_ndirty > 0) {
2701     mChunksDirty.Remove(chunk);
2702   }
2703   return true;
2704 }
2705
2706 void arena_t::InitChunk(arena_chunk_t* aChunk, bool aZeroed) {
2707   size_t i;
2708   // WARNING: The following relies on !aZeroed meaning "used to be an arena
2709   // chunk".
2710   // When the chunk we're initializating as an arena chunk is zeroed, we
2711   // mark all runs are decommitted and zeroed.
2712   // When it is not, which we can assume means it's a recycled arena chunk,
2713   // all it can contain is an arena chunk header (which we're overwriting),
2714   // and zeroed or poisoned memory (because a recycled arena chunk will
2715   // have been emptied before being recycled). In that case, we can get
2716   // away with reusing the chunk as-is, marking all runs as madvised.
2717
2718   size_t flags =
2719       aZeroed ? CHUNK_MAP_DECOMMITTED | CHUNK_MAP_ZEROED : CHUNK_MAP_MADVISED;
2720
2721   mStats.mapped += kChunkSize;
2722
2723   aChunk->arena = this;
2724
2725   // Claim that no pages are in use, since the header is merely overhead.
2726   aChunk->ndirty = 0;
2727
2728   // Initialize the map to contain one maximal free untouched run.
2729   arena_run_t* run = (arena_run_t*)(uintptr_t(aChunk) +
2730                                     (gChunkHeaderNumPages << gPageSize2Pow));
2731
2732   // Clear the bits for the real header pages.
2733   for (i = 0; i < gChunkHeaderNumPages - 1; i++) {
2734     aChunk->map[i].bits = 0;
2735   }
2736   // Mark the leading guard page (last header page) as decommitted.
2737   aChunk->map[i++].bits = CHUNK_MAP_DECOMMITTED;
2738
2739   // Mark the area usable for runs as available, note size at start and end
2740   aChunk->map[i++].bits = gMaxLargeClass | flags;
2741   for (; i < gChunkNumPages - 2; i++) {
2742     aChunk->map[i].bits = flags;
2743   }
2744   aChunk->map[gChunkNumPages - 2].bits = gMaxLargeClass | flags;
2745
2746   // Mark the trailing guard page as decommitted.
2747   aChunk->map[gChunkNumPages - 1].bits = CHUNK_MAP_DECOMMITTED;
2748
2749 #ifdef MALLOC_DECOMMIT
2750   // Start out decommitted, in order to force a closer correspondence
2751   // between dirty pages and committed untouched pages. This includes
2752   // leading and trailing guard pages.
2753   pages_decommit((void*)(uintptr_t(run) - gPageSize),
2754                  gMaxLargeClass + 2 * gPageSize);
2755 #else
2756   // Decommit the last header page (=leading page) as a guard.
2757   pages_decommit((void*)(uintptr_t(run) - gPageSize), gPageSize);
2758   // Decommit the last page as a guard.
2759   pages_decommit((void*)(uintptr_t(aChunk) + kChunkSize - gPageSize),
2760                  gPageSize);
2761 #endif
2762
2763   mStats.committed += gChunkHeaderNumPages;
2764
2765   // Insert the run into the tree of available runs.
2766   mRunsAvail.Insert(&aChunk->map[gChunkHeaderNumPages]);
2767
2768 #ifdef MALLOC_DOUBLE_PURGE
2769   new (&aChunk->chunks_madvised_elem) DoublyLinkedListElement<arena_chunk_t>();
2770 #endif
2771 }
2772
2773 arena_chunk_t* arena_t::DeallocChunk(arena_chunk_t* aChunk) {
2774   if (mSpare) {
2775     if (mSpare->ndirty > 0) {
2776       aChunk->arena->mChunksDirty.Remove(mSpare);
2777       mNumDirty -= mSpare->ndirty;
2778       mStats.committed -= mSpare->ndirty;
2779     }
2780
2781 #ifdef MALLOC_DOUBLE_PURGE
2782     if (mChunksMAdvised.ElementProbablyInList(mSpare)) {
2783       mChunksMAdvised.remove(mSpare);
2784     }
2785 #endif
2786
2787     mStats.mapped -= kChunkSize;
2788     mStats.committed -= gChunkHeaderNumPages;
2789   }
2790
2791   // Remove run from the tree of available runs, so that the arena does not use
2792   // it. Dirty page flushing only uses the tree of dirty chunks, so leaving this
2793   // chunk in the chunks_* trees is sufficient for that purpose.
2794   mRunsAvail.Remove(&aChunk->map[gChunkHeaderNumPages]);
2795
2796   arena_chunk_t* chunk_dealloc = mSpare;
2797   mSpare = aChunk;
2798   return chunk_dealloc;
2799 }
2800
2801 arena_run_t* arena_t::AllocRun(size_t aSize, bool aLarge, bool aZero) {
2802   arena_run_t* run;
2803   arena_chunk_map_t* mapelm;
2804   arena_chunk_map_t key;
2805
2806   MOZ_ASSERT(aSize <= gMaxLargeClass);
2807   MOZ_ASSERT((aSize & gPageSizeMask) == 0);
2808
2809   // Search the arena's chunks for the lowest best fit.
2810   key.bits = aSize | CHUNK_MAP_KEY;
2811   mapelm = mRunsAvail.SearchOrNext(&key);
2812   if (mapelm) {
2813     arena_chunk_t* chunk = GetChunkForPtr(mapelm);
2814     size_t pageind =
2815         (uintptr_t(mapelm) - uintptr_t(chunk->map)) / sizeof(arena_chunk_map_t);
2816
2817     run = (arena_run_t*)(uintptr_t(chunk) + (pageind << gPageSize2Pow));
2818   } else if (mSpare) {
2819     // Use the spare.
2820     arena_chunk_t* chunk = mSpare;
2821     mSpare = nullptr;
2822     run = (arena_run_t*)(uintptr_t(chunk) +
2823                          (gChunkHeaderNumPages << gPageSize2Pow));
2824     // Insert the run into the tree of available runs.
2825     mRunsAvail.Insert(&chunk->map[gChunkHeaderNumPages]);
2826   } else {
2827     // No usable runs.  Create a new chunk from which to allocate
2828     // the run.
2829     bool zeroed;
2830     arena_chunk_t* chunk =
2831         (arena_chunk_t*)chunk_alloc(kChunkSize, kChunkSize, false, &zeroed);
2832     if (!chunk) {
2833       return nullptr;
2834     }
2835
2836     InitChunk(chunk, zeroed);
2837     run = (arena_run_t*)(uintptr_t(chunk) +
2838                          (gChunkHeaderNumPages << gPageSize2Pow));
2839   }
2840   // Update page map.
2841   return SplitRun(run, aSize, aLarge, aZero) ? run : nullptr;
2842 }
2843
2844 size_t arena_t::EffectiveMaxDirty() {
2845   int32_t modifier = gArenas.DefaultMaxDirtyPageModifier();
2846   if (modifier) {
2847     int32_t arenaOverride =
2848         modifier > 0 ? mMaxDirtyIncreaseOverride : mMaxDirtyDecreaseOverride;
2849     if (arenaOverride) {
2850       modifier = arenaOverride;
2851     }
2852   }
2853
2854   return modifier >= 0 ? mMaxDirty << modifier : mMaxDirty >> -modifier;
2855 }
2856
2857 void arena_t::Purge(size_t aMaxDirty) {
2858   arena_chunk_t* chunk;
2859   size_t i, npages;
2860
2861 #ifdef MOZ_DEBUG
2862   size_t ndirty = 0;
2863   for (auto chunk : mChunksDirty.iter()) {
2864     ndirty += chunk->ndirty;
2865   }
2866   MOZ_ASSERT(ndirty == mNumDirty);
2867 #endif
2868   MOZ_DIAGNOSTIC_ASSERT(aMaxDirty == 1 || (mNumDirty > aMaxDirty));
2869
2870   // Iterate downward through chunks until enough dirty memory has been
2871   // purged.  Terminate as soon as possible in order to minimize the
2872   // number of system calls, even if a chunk has only been partially
2873   // purged.
2874   while (mNumDirty > (aMaxDirty >> 1)) {
2875 #ifdef MALLOC_DOUBLE_PURGE
2876     bool madvised = false;
2877 #endif
2878     chunk = mChunksDirty.Last();
2879     MOZ_DIAGNOSTIC_ASSERT(chunk);
2880     // Last page is DECOMMITTED as a guard page.
2881     MOZ_ASSERT((chunk->map[gChunkNumPages - 1].bits & CHUNK_MAP_DECOMMITTED) !=
2882                0);
2883     for (i = gChunkNumPages - 2; chunk->ndirty > 0; i--) {
2884       MOZ_DIAGNOSTIC_ASSERT(i >= gChunkHeaderNumPages);
2885
2886       if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2887 #ifdef MALLOC_DECOMMIT
2888         const size_t free_operation = CHUNK_MAP_DECOMMITTED;
2889 #else
2890         const size_t free_operation = CHUNK_MAP_MADVISED;
2891 #endif
2892         MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) ==
2893                    0);
2894         chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
2895         // Find adjacent dirty run(s).
2896         for (npages = 1; i > gChunkHeaderNumPages &&
2897                          (chunk->map[i - 1].bits & CHUNK_MAP_DIRTY);
2898              npages++) {
2899           i--;
2900           MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) ==
2901                      0);
2902           chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
2903         }
2904         chunk->ndirty -= npages;
2905         mNumDirty -= npages;
2906
2907 #ifdef MALLOC_DECOMMIT
2908         pages_decommit((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2909                        (npages << gPageSize2Pow));
2910 #endif
2911         mStats.committed -= npages;
2912
2913 #ifndef MALLOC_DECOMMIT
2914 #  ifdef XP_SOLARIS
2915         posix_madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2916                       (npages << gPageSize2Pow), MADV_FREE);
2917 #  else
2918         madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2919                 (npages << gPageSize2Pow), MADV_FREE);
2920 #  endif
2921 #  ifdef MALLOC_DOUBLE_PURGE
2922         madvised = true;
2923 #  endif
2924 #endif
2925         if (mNumDirty <= (aMaxDirty >> 1)) {
2926           break;
2927         }
2928       }
2929     }
2930
2931     if (chunk->ndirty == 0) {
2932       mChunksDirty.Remove(chunk);
2933     }
2934 #ifdef MALLOC_DOUBLE_PURGE
2935     if (madvised) {
2936       // The chunk might already be in the list, but this
2937       // makes sure it's at the front.
2938       if (mChunksMAdvised.ElementProbablyInList(chunk)) {
2939         mChunksMAdvised.remove(chunk);
2940       }
2941       mChunksMAdvised.pushFront(chunk);
2942     }
2943 #endif
2944   }
2945 }
2946
2947 arena_chunk_t* arena_t::DallocRun(arena_run_t* aRun, bool aDirty) {
2948   arena_chunk_t* chunk;
2949   size_t size, run_ind, run_pages;
2950
2951   chunk = GetChunkForPtr(aRun);
2952   run_ind = (size_t)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
2953   MOZ_DIAGNOSTIC_ASSERT(run_ind >= gChunkHeaderNumPages);
2954   MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1);
2955   if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) {
2956     size = chunk->map[run_ind].bits & ~gPageSizeMask;
2957     run_pages = (size >> gPageSize2Pow);
2958   } else {
2959     run_pages = aRun->mBin->mRunSizePages;
2960     size = run_pages << gPageSize2Pow;
2961   }
2962
2963   // Mark pages as unallocated in the chunk map.
2964   if (aDirty) {
2965     size_t i;
2966
2967     for (i = 0; i < run_pages; i++) {
2968       MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) ==
2969                             0);
2970       chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2971     }
2972
2973     if (chunk->ndirty == 0) {
2974       mChunksDirty.Insert(chunk);
2975     }
2976     chunk->ndirty += run_pages;
2977     mNumDirty += run_pages;
2978   } else {
2979     size_t i;
2980
2981     for (i = 0; i < run_pages; i++) {
2982       chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
2983     }
2984   }
2985   chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & gPageSizeMask);
2986   chunk->map[run_ind + run_pages - 1].bits =
2987       size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
2988
2989   // Try to coalesce forward.
2990   if (run_ind + run_pages < gChunkNumPages - 1 &&
2991       (chunk->map[run_ind + run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2992     size_t nrun_size = chunk->map[run_ind + run_pages].bits & ~gPageSizeMask;
2993
2994     // Remove successor from tree of available runs; the coalesced run is
2995     // inserted later.
2996     mRunsAvail.Remove(&chunk->map[run_ind + run_pages]);
2997
2998     size += nrun_size;
2999     run_pages = size >> gPageSize2Pow;
3000
3001     MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + run_pages - 1].bits &
3002                            ~gPageSizeMask) == nrun_size);
3003     chunk->map[run_ind].bits =
3004         size | (chunk->map[run_ind].bits & gPageSizeMask);
3005     chunk->map[run_ind + run_pages - 1].bits =
3006         size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
3007   }
3008
3009   // Try to coalesce backward.
3010   if (run_ind > gChunkHeaderNumPages &&
3011       (chunk->map[run_ind - 1].bits & CHUNK_MAP_ALLOCATED) == 0) {
3012     size_t prun_size = chunk->map[run_ind - 1].bits & ~gPageSizeMask;
3013
3014     run_ind -= prun_size >> gPageSize2Pow;
3015
3016     // Remove predecessor from tree of available runs; the coalesced run is
3017     // inserted later.
3018     mRunsAvail.Remove(&chunk->map[run_ind]);
3019
3020     size += prun_size;
3021     run_pages = size >> gPageSize2Pow;
3022
3023     MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind].bits & ~gPageSizeMask) ==
3024                           prun_size);
3025     chunk->map[run_ind].bits =
3026         size | (chunk->map[run_ind].bits & gPageSizeMask);
3027     chunk->map[run_ind + run_pages - 1].bits =
3028         size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
3029   }
3030
3031   // Insert into tree of available runs, now that coalescing is complete.
3032   mRunsAvail.Insert(&chunk->map[run_ind]);
3033
3034   // Deallocate chunk if it is now completely unused.
3035   arena_chunk_t* chunk_dealloc = nullptr;
3036   if ((chunk->map[gChunkHeaderNumPages].bits &
3037        (~gPageSizeMask | CHUNK_MAP_ALLOCATED)) == gMaxLargeClass) {
3038     chunk_dealloc = DeallocChunk(chunk);
3039   }
3040
3041   size_t maxDirty = EffectiveMaxDirty();
3042   if (mNumDirty > maxDirty) {
3043     Purge(maxDirty);
3044   }
3045
3046   return chunk_dealloc;
3047 }
3048
3049 void arena_t::TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun,
3050                           size_t aOldSize, size_t aNewSize) {
3051   size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
3052   size_t head_npages = (aOldSize - aNewSize) >> gPageSize2Pow;
3053
3054   MOZ_ASSERT(aOldSize > aNewSize);
3055
3056   // Update the chunk map so that arena_t::RunDalloc() can treat the
3057   // leading run as separately allocated.
3058   aChunk->map[pageind].bits =
3059       (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3060   aChunk->map[pageind + head_npages].bits =
3061       aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3062
3063 #ifdef MOZ_DEBUG
3064   arena_chunk_t* no_chunk =
3065 #endif
3066       DallocRun(aRun, false);
3067   // This will never release a chunk as there's still at least one allocated
3068   // run.
3069   MOZ_ASSERT(!no_chunk);
3070 }
3071
3072 void arena_t::TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun,
3073                           size_t aOldSize, size_t aNewSize, bool aDirty) {
3074   size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
3075   size_t npages = aNewSize >> gPageSize2Pow;
3076
3077   MOZ_ASSERT(aOldSize > aNewSize);
3078
3079   // Update the chunk map so that arena_t::RunDalloc() can treat the
3080   // trailing run as separately allocated.
3081   aChunk->map[pageind].bits = aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3082   aChunk->map[pageind + npages].bits =
3083       (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3084
3085 #ifdef MOZ_DEBUG
3086   arena_chunk_t* no_chunk =
3087 #endif
3088       DallocRun((arena_run_t*)(uintptr_t(aRun) + aNewSize), aDirty);
3089
3090   // This will never release a chunk as there's still at least one allocated
3091   // run.
3092   MOZ_ASSERT(!no_chunk);
3093 }
3094
3095 arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
3096   arena_chunk_map_t* mapelm;
3097   arena_run_t* run;
3098   unsigned i, remainder;
3099
3100   // Look for a usable run.
3101   mapelm = aBin->mNonFullRuns.First();
3102   if (mapelm) {
3103     // run is guaranteed to have available space.
3104     aBin->mNonFullRuns.Remove(mapelm);
3105     run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
3106     return run;
3107   }
3108   // No existing runs have any space available.
3109
3110   // Allocate a new run.
3111   run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow,
3112                  false, false);
3113   if (!run) {
3114     return nullptr;
3115   }
3116   // Don't initialize if a race in arena_t::RunAlloc() allowed an existing
3117   // run to become usable.
3118   if (run == aBin->mCurrentRun) {
3119     return run;
3120   }
3121
3122   // Initialize run internals.
3123   run->mBin = aBin;
3124
3125   for (i = 0; i < aBin->mRunNumRegionsMask - 1; i++) {
3126     run->mRegionsMask[i] = UINT_MAX;
3127   }
3128   remainder = aBin->mRunNumRegions & ((1U << (LOG2(sizeof(int)) + 3)) - 1);
3129   if (remainder == 0) {
3130     run->mRegionsMask[i] = UINT_MAX;
3131   } else {
3132     // The last element has spare bits that need to be unset.
3133     run->mRegionsMask[i] =
3134         (UINT_MAX >> ((1U << (LOG2(sizeof(int)) + 3)) - remainder));
3135   }
3136
3137   run->mRegionsMinElement = 0;
3138
3139   run->mNumFree = aBin->mRunNumRegions;
3140 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
3141   run->mMagic = ARENA_RUN_MAGIC;
3142 #endif
3143
3144   aBin->mNumRuns++;
3145   return run;
3146 }
3147
3148 void arena_bin_t::Init(SizeClass aSizeClass) {
3149   size_t try_run_size;
3150   unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3151   // Size of the run header, excluding mRegionsMask.
3152   static const size_t kFixedHeaderSize = offsetof(arena_run_t, mRegionsMask);
3153
3154   MOZ_ASSERT(aSizeClass.Size() <= gMaxBinClass);
3155
3156   try_run_size = gPageSize;
3157
3158   mCurrentRun = nullptr;
3159   mNonFullRuns.Init();
3160   mSizeClass = aSizeClass.Size();
3161   mNumRuns = 0;
3162
3163   // Run size expansion loop.
3164   while (true) {
3165     try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) +
3166                 1;  // Counter-act try_nregs-- in loop.
3167
3168     // The do..while loop iteratively reduces the number of regions until
3169     // the run header and the regions no longer overlap.  A closed formula
3170     // would be quite messy, since there is an interdependency between the
3171     // header's mask length and the number of regions.
3172     do {
3173       try_nregs--;
3174       try_mask_nelms =
3175           (try_nregs >> (LOG2(sizeof(int)) + 3)) +
3176           ((try_nregs & ((1U << (LOG2(sizeof(int)) + 3)) - 1)) ? 1 : 0);
3177       try_reg0_offset = try_run_size - (try_nregs * mSizeClass);
3178     } while (kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) >
3179              try_reg0_offset);
3180
3181     // Try to keep the run overhead below kRunOverhead.
3182     if (Fraction(try_reg0_offset, try_run_size) <= kRunOverhead) {
3183       break;
3184     }
3185
3186     // If the overhead is larger than the size class, it means the size class
3187     // is small and doesn't align very well with the header. It's desirable to
3188     // have smaller run sizes for them, so relax the overhead requirement.
3189     if (try_reg0_offset > mSizeClass) {
3190       if (Fraction(try_reg0_offset, try_run_size) <= kRunRelaxedOverhead) {
3191         break;
3192       }
3193     }
3194
3195     // The run header includes one bit per region of the given size. For sizes
3196     // small enough, the number of regions is large enough that growing the run
3197     // size barely moves the needle for the overhead because of all those bits.
3198     // For example, for a size of 8 bytes, adding 4KiB to the run size adds
3199     // close to 512 bits to the header, which is 64 bytes.
3200     // With such overhead, there is no way to get to the wanted overhead above,
3201     // so we give up if the required size for mRegionsMask more than doubles the
3202     // size of the run header.
3203     if (try_mask_nelms * sizeof(unsigned) >= kFixedHeaderSize) {
3204       break;
3205     }
3206
3207     // If next iteration is going to be larger than the largest possible large
3208     // size class, then we didn't find a setup where the overhead is small
3209     // enough, and we can't do better than the current settings, so just use
3210     // that.
3211     if (try_run_size + gPageSize > gMaxLargeClass) {
3212       break;
3213     }
3214
3215     // Try more aggressive settings.
3216     try_run_size += gPageSize;
3217   }
3218
3219   MOZ_ASSERT(kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) <=
3220              try_reg0_offset);
3221   MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
3222
3223   // Copy final settings.
3224   MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX);
3225   mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow);
3226   mRunNumRegions = try_nregs;
3227   mRunNumRegionsMask = try_mask_nelms;
3228   mRunFirstRegionOffset = try_reg0_offset;
3229   mSizeDivisor = FastDivisor<uint16_t>(aSizeClass.Size(), try_run_size);
3230 }
3231
3232 void* arena_t::MallocSmall(size_t aSize, bool aZero) {
3233   void* ret;
3234   arena_bin_t* bin;
3235   arena_run_t* run;
3236   SizeClass sizeClass(aSize);
3237   aSize = sizeClass.Size();
3238
3239   switch (sizeClass.Type()) {
3240     case SizeClass::Tiny:
3241       bin = &mBins[FloorLog2(aSize / kMinTinyClass)];
3242       break;
3243     case SizeClass::Quantum:
3244       // Although we divide 2 things by kQuantum, the compiler will
3245       // reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a
3246       // single constant.
3247       bin = &mBins[kNumTinyClasses + (aSize / kQuantum) -
3248                    (kMinQuantumClass / kQuantum)];
3249       break;
3250     case SizeClass::QuantumWide:
3251       bin =
3252           &mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) -
3253                  (kMinQuantumWideClass / kQuantumWide)];
3254       break;
3255     case SizeClass::SubPage:
3256       bin =
3257           &mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses +
3258                  (FloorLog2(aSize) - LOG2(kMinSubPageClass))];
3259       break;
3260     default:
3261       MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type");
3262   }
3263   MOZ_DIAGNOSTIC_ASSERT(aSize == bin->mSizeClass);
3264
3265   {
3266     // Before we lock, we determine if we need to randomize the allocation
3267     // because if we do, we need to create the PRNG which might require
3268     // allocating memory (arc4random on OSX for example) and we need to
3269     // avoid the deadlock
3270     if (MOZ_UNLIKELY(mRandomizeSmallAllocations && mPRNG == nullptr)) {
3271       // This is frustrating. Because the code backing RandomUint64 (arc4random
3272       // for example) may allocate memory, and because
3273       // mRandomizeSmallAllocations is true and we haven't yet initilized mPRNG,
3274       // we would re-enter this same case and cause a deadlock inside e.g.
3275       // arc4random.  So we temporarily disable mRandomizeSmallAllocations to
3276       // skip this case and then re-enable it
3277       mRandomizeSmallAllocations = false;
3278       mozilla::Maybe<uint64_t> prngState1 = mozilla::RandomUint64();
3279       mozilla::Maybe<uint64_t> prngState2 = mozilla::RandomUint64();
3280       void* backing =
3281           base_alloc(sizeof(mozilla::non_crypto::XorShift128PlusRNG));
3282       mPRNG = new (backing) mozilla::non_crypto::XorShift128PlusRNG(
3283           prngState1.valueOr(0), prngState2.valueOr(0));
3284       mRandomizeSmallAllocations = true;
3285     }
3286     MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG);
3287
3288     MaybeMutexAutoLock lock(mLock);
3289     run = bin->mCurrentRun;
3290     if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) {
3291       run = bin->mCurrentRun = GetNonFullBinRun(bin);
3292     }
3293     if (MOZ_UNLIKELY(!run)) {
3294       return nullptr;
3295     }
3296     MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3297     MOZ_DIAGNOSTIC_ASSERT(run->mNumFree > 0);
3298     ret = ArenaRunRegAlloc(run, bin);
3299     MOZ_DIAGNOSTIC_ASSERT(ret);
3300     run->mNumFree--;
3301     if (!ret) {
3302       return nullptr;
3303     }
3304
3305     mStats.allocated_small += aSize;
3306   }
3307
3308   if (!aZero) {
3309     ApplyZeroOrJunk(ret, aSize);
3310   } else {
3311     memset(ret, 0, aSize);
3312   }
3313
3314   return ret;
3315 }
3316
3317 void* arena_t::MallocLarge(size_t aSize, bool aZero) {
3318   void* ret;
3319
3320   // Large allocation.
3321   aSize = PAGE_CEILING(aSize);
3322
3323   {
3324     MaybeMutexAutoLock lock(mLock);
3325     ret = AllocRun(aSize, true, aZero);
3326     if (!ret) {
3327       return nullptr;
3328     }
3329     mStats.allocated_large += aSize;
3330   }
3331
3332   if (!aZero) {
3333     ApplyZeroOrJunk(ret, aSize);
3334   }
3335
3336   return ret;
3337 }
3338
3339 void* arena_t::Malloc(size_t aSize, bool aZero) {
3340   MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
3341   MOZ_ASSERT(aSize != 0);
3342
3343   if (aSize <= gMaxBinClass) {
3344     return MallocSmall(aSize, aZero);
3345   }
3346   if (aSize <= gMaxLargeClass) {
3347     return MallocLarge(aSize, aZero);
3348   }
3349   return MallocHuge(aSize, aZero);
3350 }
3351
3352 // Only handles large allocations that require more than page alignment.
3353 void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
3354   void* ret;
3355   size_t offset;
3356   arena_chunk_t* chunk;
3357
3358   MOZ_ASSERT((aSize & gPageSizeMask) == 0);
3359   MOZ_ASSERT((aAlignment & gPageSizeMask) == 0);
3360
3361   {
3362     MaybeMutexAutoLock lock(mLock);
3363     ret = AllocRun(aAllocSize, true, false);
3364     if (!ret) {
3365       return nullptr;
3366     }
3367
3368     chunk = GetChunkForPtr(ret);
3369
3370     offset = uintptr_t(ret) & (aAlignment - 1);
3371     MOZ_ASSERT((offset & gPageSizeMask) == 0);
3372     MOZ_ASSERT(offset < aAllocSize);
3373     if (offset == 0) {
3374       TrimRunTail(chunk, (arena_run_t*)ret, aAllocSize, aSize, false);
3375     } else {
3376       size_t leadsize, trailsize;
3377
3378       leadsize = aAlignment - offset;
3379       if (leadsize > 0) {
3380         TrimRunHead(chunk, (arena_run_t*)ret, aAllocSize,
3381                     aAllocSize - leadsize);
3382         ret = (void*)(uintptr_t(ret) + leadsize);
3383       }
3384
3385       trailsize = aAllocSize - leadsize - aSize;
3386       if (trailsize != 0) {
3387         // Trim trailing space.
3388         MOZ_ASSERT(trailsize < aAllocSize);
3389         TrimRunTail(chunk, (arena_run_t*)ret, aSize + trailsize, aSize, false);
3390       }
3391     }
3392
3393     mStats.allocated_large += aSize;
3394   }
3395
3396   ApplyZeroOrJunk(ret, aSize);
3397   return ret;
3398 }
3399
3400 void* arena_t::Palloc(size_t aAlignment, size_t aSize) {
3401   void* ret;
3402   size_t ceil_size;
3403
3404   // Round size up to the nearest multiple of alignment.
3405   //
3406   // This done, we can take advantage of the fact that for each small
3407   // size class, every object is aligned at the smallest power of two
3408   // that is non-zero in the base two representation of the size.  For
3409   // example:
3410   //
3411   //   Size |   Base 2 | Minimum alignment
3412   //   -----+----------+------------------
3413   //     96 |  1100000 |  32
3414   //    144 | 10100000 |  32
3415   //    192 | 11000000 |  64
3416   //
3417   // Depending on runtime settings, it is possible that arena_malloc()
3418   // will further round up to a power of two, but that never causes
3419   // correctness issues.
3420   ceil_size = ALIGNMENT_CEILING(aSize, aAlignment);
3421
3422   // (ceil_size < aSize) protects against the combination of maximal
3423   // alignment and size greater than maximal alignment.
3424   if (ceil_size < aSize) {
3425     // size_t overflow.
3426     return nullptr;
3427   }
3428
3429   if (ceil_size <= gPageSize ||
3430       (aAlignment <= gPageSize && ceil_size <= gMaxLargeClass)) {
3431     ret = Malloc(ceil_size, false);
3432   } else {
3433     size_t run_size;
3434
3435     // We can't achieve sub-page alignment, so round up alignment
3436     // permanently; it makes later calculations simpler.
3437     aAlignment = PAGE_CEILING(aAlignment);
3438     ceil_size = PAGE_CEILING(aSize);
3439
3440     // (ceil_size < aSize) protects against very large sizes within
3441     // pagesize of SIZE_T_MAX.
3442     //
3443     // (ceil_size + aAlignment < ceil_size) protects against the
3444     // combination of maximal alignment and ceil_size large enough
3445     // to cause overflow.  This is similar to the first overflow
3446     // check above, but it needs to be repeated due to the new
3447     // ceil_size value, which may now be *equal* to maximal
3448     // alignment, whereas before we only detected overflow if the
3449     // original size was *greater* than maximal alignment.
3450     if (ceil_size < aSize || ceil_size + aAlignment < ceil_size) {
3451       // size_t overflow.
3452       return nullptr;
3453     }
3454
3455     // Calculate the size of the over-size run that arena_palloc()
3456     // would need to allocate in order to guarantee the alignment.
3457     if (ceil_size >= aAlignment) {
3458       run_size = ceil_size + aAlignment - gPageSize;
3459     } else {
3460       // It is possible that (aAlignment << 1) will cause
3461       // overflow, but it doesn't matter because we also
3462       // subtract pagesize, which in the case of overflow
3463       // leaves us with a very large run_size.  That causes
3464       // the first conditional below to fail, which means
3465       // that the bogus run_size value never gets used for
3466       // anything important.
3467       run_size = (aAlignment << 1) - gPageSize;
3468     }
3469
3470     if (run_size <= gMaxLargeClass) {
3471       ret = PallocLarge(aAlignment, ceil_size, run_size);
3472     } else if (aAlignment <= kChunkSize) {
3473       ret = MallocHuge(ceil_size, false);
3474     } else {
3475       ret = PallocHuge(ceil_size, aAlignment, false);
3476     }
3477   }
3478
3479   MOZ_ASSERT((uintptr_t(ret) & (aAlignment - 1)) == 0);
3480   return ret;
3481 }
3482
3483 class AllocInfo {
3484  public:
3485   template <bool Validate = false>
3486   static inline AllocInfo Get(const void* aPtr) {
3487     // If the allocator is not initialized, the pointer can't belong to it.
3488     if (Validate && !malloc_initialized) {
3489       return AllocInfo();
3490     }
3491
3492     auto chunk = GetChunkForPtr(aPtr);
3493     if (Validate) {
3494       if (!chunk || !gChunkRTree.Get(chunk)) {
3495         return AllocInfo();
3496       }
3497     }
3498
3499     if (chunk != aPtr) {
3500       MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
3501       size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
3502       return GetInChunk(aPtr, chunk, pageind);
3503     }
3504
3505     extent_node_t key;
3506
3507     // Huge allocation
3508     key.mAddr = chunk;
3509     MutexAutoLock lock(huge_mtx);
3510     extent_node_t* node = huge.Search(&key);
3511     if (Validate && !node) {
3512       return AllocInfo();
3513     }
3514     return AllocInfo(node->mSize, node);
3515   }
3516
3517   // Get the allocation information for a pointer we know is within a chunk
3518   // (Small or large, not huge).
3519   static inline AllocInfo GetInChunk(const void* aPtr, arena_chunk_t* aChunk,
3520                                      size_t pageind) {
3521     size_t mapbits = aChunk->map[pageind].bits;
3522     MOZ_DIAGNOSTIC_ASSERT((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3523
3524     size_t size;
3525     if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3526       arena_run_t* run = (arena_run_t*)(mapbits & ~gPageSizeMask);
3527       MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3528       size = run->mBin->mSizeClass;
3529     } else {
3530       size = mapbits & ~gPageSizeMask;
3531       MOZ_DIAGNOSTIC_ASSERT(size != 0);
3532     }
3533
3534     return AllocInfo(size, aChunk);
3535   }
3536
3537   // Validate ptr before assuming that it points to an allocation.  Currently,
3538   // the following validation is performed:
3539   //
3540   // + Check that ptr is not nullptr.
3541   //
3542   // + Check that ptr lies within a mapped chunk.
3543   static inline AllocInfo GetValidated(const void* aPtr) {
3544     return Get<true>(aPtr);
3545   }
3546
3547   AllocInfo() : mSize(0), mChunk(nullptr) {}
3548
3549   explicit AllocInfo(size_t aSize, arena_chunk_t* aChunk)
3550       : mSize(aSize), mChunk(aChunk) {
3551     MOZ_ASSERT(mSize <= gMaxLargeClass);
3552   }
3553
3554   explicit AllocInfo(size_t aSize, extent_node_t* aNode)
3555       : mSize(aSize), mNode(aNode) {
3556     MOZ_ASSERT(mSize > gMaxLargeClass);
3557   }
3558
3559   size_t Size() { return mSize; }
3560
3561   arena_t* Arena() {
3562     if (mSize <= gMaxLargeClass) {
3563       return mChunk->arena;
3564     }
3565     // Best effort detection that we're not trying to access an already
3566     // disposed arena. In the case of a disposed arena, the memory location
3567     // pointed by mNode->mArena is either free (but still a valid memory
3568     // region, per TypedBaseAlloc<arena_t>), in which case its id was reset,
3569     // or has been reallocated for a new region, and its id is very likely
3570     // different (per randomness). In both cases, the id is unlikely to
3571     // match what it was for the disposed arena.
3572     MOZ_RELEASE_ASSERT(mNode->mArenaId == mNode->mArena->mId);
3573     return mNode->mArena;
3574   }
3575
3576   bool IsValid() const { return !!mSize; }
3577
3578  private:
3579   size_t mSize;
3580   union {
3581     // Pointer to the chunk associated with the allocation for small
3582     // and large allocations.
3583     arena_chunk_t* mChunk;
3584
3585     // Pointer to the extent node for huge allocations.
3586     extent_node_t* mNode;
3587   };
3588 };
3589
3590 inline void MozJemalloc::jemalloc_ptr_info(const void* aPtr,
3591                                            jemalloc_ptr_info_t* aInfo) {
3592   arena_chunk_t* chunk = GetChunkForPtr(aPtr);
3593
3594   // Is the pointer null, or within one chunk's size of null?
3595   // Alternatively, if the allocator is not initialized yet, the pointer
3596   // can't be known.
3597   if (!chunk || !malloc_initialized) {
3598     *aInfo = {TagUnknown, nullptr, 0, 0};
3599     return;
3600   }
3601
3602   // Look for huge allocations before looking for |chunk| in gChunkRTree.
3603   // This is necessary because |chunk| won't be in gChunkRTree if it's
3604   // the second or subsequent chunk in a huge allocation.
3605   extent_node_t* node;
3606   extent_node_t key;
3607   {
3608     MutexAutoLock lock(huge_mtx);
3609     key.mAddr = const_cast<void*>(aPtr);
3610     node =
3611         reinterpret_cast<RedBlackTree<extent_node_t, ExtentTreeBoundsTrait>*>(
3612             &huge)
3613             ->Search(&key);
3614     if (node) {
3615       *aInfo = {TagLiveAlloc, node->mAddr, node->mSize, node->mArena->mId};
3616       return;
3617     }
3618   }
3619
3620   // It's not a huge allocation. Check if we have a known chunk.
3621   if (!gChunkRTree.Get(chunk)) {
3622     *aInfo = {TagUnknown, nullptr, 0, 0};
3623     return;
3624   }
3625
3626   MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
3627
3628   // Get the page number within the chunk.
3629   size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
3630   if (pageind < gChunkHeaderNumPages) {
3631     // Within the chunk header.
3632     *aInfo = {TagUnknown, nullptr, 0, 0};
3633     return;
3634   }
3635
3636   size_t mapbits = chunk->map[pageind].bits;
3637
3638   if (!(mapbits & CHUNK_MAP_ALLOCATED)) {
3639     void* pageaddr = (void*)(uintptr_t(aPtr) & ~gPageSizeMask);
3640     *aInfo = {TagFreedPage, pageaddr, gPageSize, chunk->arena->mId};
3641     return;
3642   }
3643
3644   if (mapbits & CHUNK_MAP_LARGE) {
3645     // It's a large allocation. Only the first page of a large
3646     // allocation contains its size, so if the address is not in
3647     // the first page, scan back to find the allocation size.
3648     size_t size;
3649     while (true) {
3650       size = mapbits & ~gPageSizeMask;
3651       if (size != 0) {
3652         break;
3653       }
3654
3655       // The following two return paths shouldn't occur in
3656       // practice unless there is heap corruption.
3657       pageind--;
3658       MOZ_DIAGNOSTIC_ASSERT(pageind >= gChunkHeaderNumPages);
3659       if (pageind < gChunkHeaderNumPages) {
3660         *aInfo = {TagUnknown, nullptr, 0, 0};
3661         return;
3662       }
3663
3664       mapbits = chunk->map[pageind].bits;
3665       MOZ_DIAGNOSTIC_ASSERT(mapbits & CHUNK_MAP_LARGE);
3666       if (!(mapbits & CHUNK_MAP_LARGE)) {
3667         *aInfo = {TagUnknown, nullptr, 0, 0};
3668         return;
3669       }
3670     }
3671
3672     void* addr = ((char*)chunk) + (pageind << gPageSize2Pow);
3673     *aInfo = {TagLiveAlloc, addr, size, chunk->arena->mId};
3674     return;
3675   }
3676
3677   // It must be a small allocation.
3678   auto run = (arena_run_t*)(mapbits & ~gPageSizeMask);
3679   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3680
3681   // The allocation size is stored in the run metadata.
3682   size_t size = run->mBin->mSizeClass;
3683
3684   // Address of the first possible pointer in the run after its headers.
3685   uintptr_t reg0_addr = (uintptr_t)run + run->mBin->mRunFirstRegionOffset;
3686   if (aPtr < (void*)reg0_addr) {
3687     // In the run header.
3688     *aInfo = {TagUnknown, nullptr, 0, 0};
3689     return;
3690   }
3691
3692   // Position in the run.
3693   unsigned regind = ((uintptr_t)aPtr - reg0_addr) / size;
3694
3695   // Pointer to the allocation's base address.
3696   void* addr = (void*)(reg0_addr + regind * size);
3697
3698   // Check if the allocation has been freed.
3699   unsigned elm = regind >> (LOG2(sizeof(int)) + 3);
3700   unsigned bit = regind - (elm << (LOG2(sizeof(int)) + 3));
3701   PtrInfoTag tag =
3702       ((run->mRegionsMask[elm] & (1U << bit))) ? TagFreedAlloc : TagLiveAlloc;
3703
3704   *aInfo = {tag, addr, size, chunk->arena->mId};
3705 }
3706
3707 namespace Debug {
3708 // Helper for debuggers. We don't want it to be inlined and optimized out.
3709 MOZ_NEVER_INLINE jemalloc_ptr_info_t* jemalloc_ptr_info(const void* aPtr) {
3710   static jemalloc_ptr_info_t info;
3711   MozJemalloc::jemalloc_ptr_info(aPtr, &info);
3712   return &info;
3713 }
3714 }  // namespace Debug
3715
3716 arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
3717                                     arena_chunk_map_t* aMapElm) {
3718   arena_run_t* run;
3719   arena_bin_t* bin;
3720   size_t size;
3721
3722   run = (arena_run_t*)(aMapElm->bits & ~gPageSizeMask);
3723   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3724   bin = run->mBin;
3725   size = bin->mSizeClass;
3726   MOZ_DIAGNOSTIC_ASSERT(uintptr_t(aPtr) >=
3727                         uintptr_t(run) + bin->mRunFirstRegionOffset);
3728
3729   arena_run_reg_dalloc(run, bin, aPtr, size);
3730   run->mNumFree++;
3731   arena_chunk_t* dealloc_chunk = nullptr;
3732
3733   if (run->mNumFree == bin->mRunNumRegions) {
3734     // Deallocate run.
3735     if (run == bin->mCurrentRun) {
3736       bin->mCurrentRun = nullptr;
3737     } else if (bin->mRunNumRegions != 1) {
3738       size_t run_pageind =
3739           (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
3740       arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
3741
3742       // This block's conditional is necessary because if the
3743       // run only contains one region, then it never gets
3744       // inserted into the non-full runs tree.
3745       MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == run_mapelm);
3746       bin->mNonFullRuns.Remove(run_mapelm);
3747     }
3748 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
3749     run->mMagic = 0;
3750 #endif
3751     dealloc_chunk = DallocRun(run, true);
3752     bin->mNumRuns--;
3753   } else if (run->mNumFree == 1 && run != bin->mCurrentRun) {
3754     // Make sure that bin->mCurrentRun always refers to the lowest
3755     // non-full run, if one exists.
3756     if (!bin->mCurrentRun) {
3757       bin->mCurrentRun = run;
3758     } else if (uintptr_t(run) < uintptr_t(bin->mCurrentRun)) {
3759       // Switch mCurrentRun.
3760       if (bin->mCurrentRun->mNumFree > 0) {
3761         arena_chunk_t* runcur_chunk = GetChunkForPtr(bin->mCurrentRun);
3762         size_t runcur_pageind =
3763             (uintptr_t(bin->mCurrentRun) - uintptr_t(runcur_chunk)) >>
3764             gPageSize2Pow;
3765         arena_chunk_map_t* runcur_mapelm = &runcur_chunk->map[runcur_pageind];
3766
3767         // Insert runcur.
3768         MOZ_DIAGNOSTIC_ASSERT(!bin->mNonFullRuns.Search(runcur_mapelm));
3769         bin->mNonFullRuns.Insert(runcur_mapelm);
3770       }
3771       bin->mCurrentRun = run;
3772     } else {
3773       size_t run_pageind =
3774           (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
3775       arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
3776
3777       MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == nullptr);
3778       bin->mNonFullRuns.Insert(run_mapelm);
3779     }
3780   }
3781   mStats.allocated_small -= size;
3782
3783   return dealloc_chunk;
3784 }
3785
3786 arena_chunk_t* arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) {
3787   MOZ_DIAGNOSTIC_ASSERT((uintptr_t(aPtr) & gPageSizeMask) == 0);
3788   size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
3789   size_t size = aChunk->map[pageind].bits & ~gPageSizeMask;
3790
3791   mStats.allocated_large -= size;
3792
3793   return DallocRun((arena_run_t*)aPtr, true);
3794 }
3795
3796 static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) {
3797   MOZ_ASSERT(aPtr);
3798   MOZ_ASSERT(aOffset != 0);
3799   MOZ_ASSERT(GetChunkOffsetForPtr(aPtr) == aOffset);
3800
3801   auto chunk = (arena_chunk_t*)((uintptr_t)aPtr - aOffset);
3802   auto arena = chunk->arena;
3803   MOZ_ASSERT(arena);
3804   MOZ_DIAGNOSTIC_ASSERT(arena->mMagic == ARENA_MAGIC);
3805   MOZ_RELEASE_ASSERT(!aArena || arena == aArena);
3806
3807   size_t pageind = aOffset >> gPageSize2Pow;
3808   if (opt_poison) {
3809     AllocInfo info = AllocInfo::GetInChunk(aPtr, chunk, pageind);
3810     MOZ_ASSERT(info.IsValid());
3811     MaybePoison(aPtr, info.Size());
3812   }
3813
3814   arena_chunk_t* chunk_dealloc_delay = nullptr;
3815
3816   {
3817     MaybeMutexAutoLock lock(arena->mLock);
3818     arena_chunk_map_t* mapelm = &chunk->map[pageind];
3819     MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_DECOMMITTED) == 0,
3820                        "Freeing in decommitted page.");
3821     MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0,
3822                        "Double-free?");
3823     if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3824       // Small allocation.
3825       chunk_dealloc_delay = arena->DallocSmall(chunk, aPtr, mapelm);
3826     } else {
3827       // Large allocation.
3828       chunk_dealloc_delay = arena->DallocLarge(chunk, aPtr);
3829     }
3830   }
3831
3832   if (chunk_dealloc_delay) {
3833     chunk_dealloc((void*)chunk_dealloc_delay, kChunkSize, ARENA_CHUNK);
3834   }
3835 }
3836
3837 static inline void idalloc(void* ptr, arena_t* aArena) {
3838   size_t offset;
3839
3840   MOZ_ASSERT(ptr);
3841
3842   offset = GetChunkOffsetForPtr(ptr);
3843   if (offset != 0) {
3844     arena_dalloc(ptr, offset, aArena);
3845   } else {
3846     huge_dalloc(ptr, aArena);
3847   }
3848 }
3849
3850 void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
3851                                 size_t aOldSize) {
3852   MOZ_ASSERT(aSize < aOldSize);
3853
3854   // Shrink the run, and make trailing pages available for other
3855   // allocations.
3856   MaybeMutexAutoLock lock(mLock);
3857   TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
3858   mStats.allocated_large -= aOldSize - aSize;
3859 }
3860
3861 // Returns whether reallocation was successful.
3862 bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
3863                               size_t aOldSize) {
3864   size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
3865   size_t npages = aOldSize >> gPageSize2Pow;
3866
3867   MaybeMutexAutoLock lock(mLock);
3868   MOZ_DIAGNOSTIC_ASSERT(aOldSize ==
3869                         (aChunk->map[pageind].bits & ~gPageSizeMask));
3870
3871   // Try to extend the run.
3872   MOZ_ASSERT(aSize > aOldSize);
3873   if (pageind + npages < gChunkNumPages - 1 &&
3874       (aChunk->map[pageind + npages].bits & CHUNK_MAP_ALLOCATED) == 0 &&
3875       (aChunk->map[pageind + npages].bits & ~gPageSizeMask) >=
3876           aSize - aOldSize) {
3877     // The next run is available and sufficiently large.  Split the
3878     // following run, then merge the first part with the existing
3879     // allocation.
3880     if (!SplitRun((arena_run_t*)(uintptr_t(aChunk) +
3881                                  ((pageind + npages) << gPageSize2Pow)),
3882                   aSize - aOldSize, true, false)) {
3883       return false;
3884     }
3885
3886     aChunk->map[pageind].bits = aSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3887     aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3888
3889     mStats.allocated_large += aSize - aOldSize;
3890     return true;
3891   }
3892
3893   return false;
3894 }
3895
3896 void* arena_t::RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize) {
3897   void* ret;
3898   size_t copysize;
3899   SizeClass sizeClass(aSize);
3900
3901   // Try to avoid moving the allocation.
3902   if (aOldSize <= gMaxLargeClass && sizeClass.Size() == aOldSize) {
3903     if (aSize < aOldSize) {
3904       MaybePoison((void*)(uintptr_t(aPtr) + aSize), aOldSize - aSize);
3905     }
3906     return aPtr;
3907   }
3908   if (sizeClass.Type() == SizeClass::Large && aOldSize > gMaxBinClass &&
3909       aOldSize <= gMaxLargeClass) {
3910     arena_chunk_t* chunk = GetChunkForPtr(aPtr);
3911     if (sizeClass.Size() < aOldSize) {
3912       // Fill before shrinking in order to avoid a race.
3913       MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize);
3914       RallocShrinkLarge(chunk, aPtr, sizeClass.Size(), aOldSize);
3915       return aPtr;
3916     }
3917     if (RallocGrowLarge(chunk, aPtr, sizeClass.Size(), aOldSize)) {
3918       ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
3919       return aPtr;
3920     }
3921   }
3922
3923   // If we get here, then aSize and aOldSize are different enough that we
3924   // need to move the object.  In that case, fall back to allocating new
3925   // space and copying. Allow non-private arenas to switch arenas.
3926   ret = (mIsPrivate ? this : choose_arena(aSize))->Malloc(aSize, false);
3927   if (!ret) {
3928     return nullptr;
3929   }
3930
3931   // Junk/zero-filling were already done by arena_t::Malloc().
3932   copysize = (aSize < aOldSize) ? aSize : aOldSize;
3933 #ifdef VM_COPY_MIN
3934   if (copysize >= VM_COPY_MIN) {
3935     pages_copy(ret, aPtr, copysize);
3936   } else
3937 #endif
3938   {
3939     memcpy(ret, aPtr, copysize);
3940   }
3941   idalloc(aPtr, this);
3942   return ret;
3943 }
3944
3945 void* arena_t::Ralloc(void* aPtr, size_t aSize, size_t aOldSize) {
3946   MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
3947   MOZ_ASSERT(aPtr);
3948   MOZ_ASSERT(aSize != 0);
3949
3950   return (aSize <= gMaxLargeClass) ? RallocSmallOrLarge(aPtr, aSize, aOldSize)
3951                                    : RallocHuge(aPtr, aSize, aOldSize);
3952 }
3953
3954 void* arena_t::operator new(size_t aCount, const fallible_t&) noexcept {
3955   MOZ_ASSERT(aCount == sizeof(arena_t));
3956   return TypedBaseAlloc<arena_t>::alloc();
3957 }
3958
3959 void arena_t::operator delete(void* aPtr) {
3960   TypedBaseAlloc<arena_t>::dealloc((arena_t*)aPtr);
3961 }
3962
3963 arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
3964   unsigned i;
3965
3966   memset(&mLink, 0, sizeof(mLink));
3967   memset(&mStats, 0, sizeof(arena_stats_t));
3968   mId = 0;
3969
3970   // Initialize chunks.
3971   mChunksDirty.Init();
3972 #ifdef MALLOC_DOUBLE_PURGE
3973   new (&mChunksMAdvised) DoublyLinkedList<arena_chunk_t>();
3974 #endif
3975   mSpare = nullptr;
3976
3977   mRandomizeSmallAllocations = opt_randomize_small;
3978   MaybeMutex::DoLock doLock = MaybeMutex::MUST_LOCK;
3979   if (aParams) {
3980     uint32_t randFlags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
3981     switch (randFlags) {
3982       case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED:
3983         mRandomizeSmallAllocations = true;
3984         break;
3985       case ARENA_FLAG_RANDOMIZE_SMALL_DISABLED:
3986         mRandomizeSmallAllocations = false;
3987         break;
3988       case ARENA_FLAG_RANDOMIZE_SMALL_DEFAULT:
3989       default:
3990         break;
3991     }
3992
3993     uint32_t threadFlags = aParams->mFlags & ARENA_FLAG_THREAD_MASK;
3994     if (threadFlags == ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) {
3995       // At the moment we require that any ARENA_FLAG_THREAD_MAIN_THREAD_ONLY
3996       // arenas are created and therefore always accessed by the main thread.
3997       // This is for two reasons:
3998       //  * it allows jemalloc_stats to read their statistics (we also require
3999       //    that jemalloc_stats is only used on the main thread).
4000       //  * Only main-thread or threadsafe arenas can be guanteed to be in a
4001       //    consistent state after a fork() from the main thread.  If fork()
4002       //    occurs off-thread then the new child process cannot use these arenas
4003       //    (new children should usually exec() or exit() since other data may
4004       //    also be inconsistent).
4005       MOZ_ASSERT(gArenas.IsOnMainThread());
4006       MOZ_ASSERT(aIsPrivate);
4007       doLock = MaybeMutex::AVOID_LOCK_UNSAFE;
4008     }
4009
4010     mMaxDirtyIncreaseOverride = aParams->mMaxDirtyIncreaseOverride;
4011     mMaxDirtyDecreaseOverride = aParams->mMaxDirtyDecreaseOverride;
4012   } else {
4013     mMaxDirtyIncreaseOverride = 0;
4014     mMaxDirtyDecreaseOverride = 0;
4015   }
4016
4017   MOZ_RELEASE_ASSERT(mLock.Init(doLock));
4018
4019   mPRNG = nullptr;
4020
4021   mIsPrivate = aIsPrivate;
4022
4023   mNumDirty = 0;
4024   // The default maximum amount of dirty pages allowed on arenas is a fraction
4025   // of opt_dirty_max.
4026   mMaxDirty = (aParams && aParams->mMaxDirty) ? aParams->mMaxDirty
4027                                               : (opt_dirty_max / 8);
4028
4029   mRunsAvail.Init();
4030
4031   // Initialize bins.
4032   SizeClass sizeClass(1);
4033
4034   for (i = 0;; i++) {
4035     arena_bin_t& bin = mBins[i];
4036     bin.Init(sizeClass);
4037
4038     // SizeClass doesn't want sizes larger than gMaxBinClass for now.
4039     if (sizeClass.Size() == gMaxBinClass) {
4040       break;
4041     }
4042     sizeClass = sizeClass.Next();
4043   }
4044   MOZ_ASSERT(i == NUM_SMALL_CLASSES - 1);
4045
4046 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
4047   mMagic = ARENA_MAGIC;
4048 #endif
4049 }
4050
4051 arena_t::~arena_t() {
4052   size_t i;
4053   MaybeMutexAutoLock lock(mLock);
4054   MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(),
4055                      "Arena is still registered");
4056   MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large,
4057                      "Arena is not empty");
4058   if (mSpare) {
4059     chunk_dealloc(mSpare, kChunkSize, ARENA_CHUNK);
4060   }
4061   for (i = 0; i < NUM_SMALL_CLASSES; i++) {
4062     MOZ_RELEASE_ASSERT(!mBins[i].mNonFullRuns.First(), "Bin is not empty");
4063   }
4064 #ifdef MOZ_DEBUG
4065   {
4066     MutexAutoLock lock(huge_mtx);
4067     // This is an expensive check, so we only do it on debug builds.
4068     for (auto node : huge.iter()) {
4069       MOZ_RELEASE_ASSERT(node->mArenaId != mId, "Arena has huge allocations");
4070     }
4071   }
4072 #endif
4073   mId = 0;
4074 }
4075
4076 arena_t* ArenaCollection::CreateArena(bool aIsPrivate,
4077                                       arena_params_t* aParams) {
4078   arena_t* ret = new (fallible) arena_t(aParams, aIsPrivate);
4079   if (!ret) {
4080     // Only reached if there is an OOM error.
4081
4082     // OOM here is quite inconvenient to propagate, since dealing with it
4083     // would require a check for failure in the fast path.  Instead, punt
4084     // by using the first arena.
4085     // In practice, this is an extremely unlikely failure.
4086     _malloc_message(_getprogname(), ": (malloc) Error initializing arena\n");
4087
4088     return mDefaultArena;
4089   }
4090
4091   MutexAutoLock lock(mLock);
4092
4093   // For public arenas, it's fine to just use incrementing arena id
4094   if (!aIsPrivate) {
4095     ret->mId = mLastPublicArenaId++;
4096     mArenas.Insert(ret);
4097     return ret;
4098   }
4099
4100   // For private arenas, generate a cryptographically-secure random id for the
4101   // new arena. If an attacker manages to get control of the process, this
4102   // should make it more difficult for them to "guess" the ID of a memory
4103   // arena, stopping them from getting data they may want
4104   Tree& tree = (ret->IsMainThreadOnly()) ? mMainThreadArenas : mPrivateArenas;
4105   arena_id_t arena_id;
4106   do {
4107     arena_id = MakeRandArenaId(ret->IsMainThreadOnly());
4108     // Keep looping until we ensure that the random number we just generated
4109     // isn't already in use by another active arena
4110   } while (GetByIdInternal(tree, arena_id));
4111
4112   ret->mId = arena_id;
4113   tree.Insert(ret);
4114   return ret;
4115 }
4116
4117 arena_id_t ArenaCollection::MakeRandArenaId(bool aIsMainThreadOnly) const {
4118   uint64_t rand;
4119   do {
4120     mozilla::Maybe<uint64_t> maybeRandomId = mozilla::RandomUint64();
4121     MOZ_RELEASE_ASSERT(maybeRandomId.isSome());
4122
4123     rand = maybeRandomId.value();
4124
4125     // Set or clear the least significant bit depending on if this is a
4126     // main-thread-only arena.  We use this in GetById.
4127     if (aIsMainThreadOnly) {
4128       rand = rand | MAIN_THREAD_ARENA_BIT;
4129     } else {
4130       rand = rand & ~MAIN_THREAD_ARENA_BIT;
4131     }
4132
4133     // Avoid 0 as an arena Id. We use 0 for disposed arenas.
4134   } while (rand == 0);
4135
4136   return arena_id_t(rand);
4137 }
4138
4139 // End arena.
4140 // ***************************************************************************
4141 // Begin general internal functions.
4142
4143 void* arena_t::MallocHuge(size_t aSize, bool aZero) {
4144   return PallocHuge(aSize, kChunkSize, aZero);
4145 }
4146
4147 void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
4148   void* ret;
4149   size_t csize;
4150   size_t psize;
4151   extent_node_t* node;
4152   bool zeroed;
4153
4154   // We're going to configure guard pages in the region between the
4155   // page-aligned size and the chunk-aligned size, so if those are the same
4156   // then we need to force that region into existence.
4157   csize = CHUNK_CEILING(aSize + gPageSize);
4158   if (csize < aSize) {
4159     // size is large enough to cause size_t wrap-around.
4160     return nullptr;
4161   }
4162
4163   // Allocate an extent node with which to track the chunk.
4164   node = ExtentAlloc::alloc();
4165   if (!node) {
4166     return nullptr;
4167   }
4168
4169   // Allocate one or more contiguous chunks for this request.
4170   ret = chunk_alloc(csize, aAlignment, false, &zeroed);
4171   if (!ret) {
4172     ExtentAlloc::dealloc(node);
4173     return nullptr;
4174   }
4175   psize = PAGE_CEILING(aSize);
4176   if (aZero) {
4177     // We will decommit anything past psize so there is no need to zero
4178     // further.
4179     chunk_ensure_zero(ret, psize, zeroed);
4180   }
4181
4182   // Insert node into huge.
4183   node->mAddr = ret;
4184   node->mSize = psize;
4185   node->mArena = this;
4186   node->mArenaId = mId;
4187
4188   {
4189     MutexAutoLock lock(huge_mtx);
4190     huge.Insert(node);
4191
4192     // Although we allocated space for csize bytes, we indicate that we've
4193     // allocated only psize bytes.
4194     //
4195     // If DECOMMIT is defined, this is a reasonable thing to do, since
4196     // we'll explicitly decommit the bytes in excess of psize.
4197     //
4198     // If DECOMMIT is not defined, then we're relying on the OS to be lazy
4199     // about how it allocates physical pages to mappings.  If we never
4200     // touch the pages in excess of psize, the OS won't allocate a physical
4201     // page, and we won't use more than psize bytes of physical memory.
4202     //
4203     // A correct program will only touch memory in excess of how much it
4204     // requested if it first calls malloc_usable_size and finds out how
4205     // much space it has to play with.  But because we set node->mSize =
4206     // psize above, malloc_usable_size will return psize, not csize, and
4207     // the program will (hopefully) never touch bytes in excess of psize.
4208     // Thus those bytes won't take up space in physical memory, and we can
4209     // reasonably claim we never "allocated" them in the first place.
4210     huge_allocated += psize;
4211     huge_mapped += csize;
4212   }
4213
4214   pages_decommit((void*)((uintptr_t)ret + psize), csize - psize);
4215
4216   if (!aZero) {
4217     ApplyZeroOrJunk(ret, psize);
4218   }
4219
4220   return ret;
4221 }
4222
4223 void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
4224   void* ret;
4225   size_t copysize;
4226
4227   // Avoid moving the allocation if the size class would not change.
4228   if (aOldSize > gMaxLargeClass &&
4229       CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) {
4230     size_t psize = PAGE_CEILING(aSize);
4231     if (aSize < aOldSize) {
4232       MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize);
4233     }
4234     if (psize < aOldSize) {
4235       extent_node_t key;
4236
4237       pages_decommit((void*)((uintptr_t)aPtr + psize), aOldSize - psize);
4238
4239       // Update recorded size.
4240       MutexAutoLock lock(huge_mtx);
4241       key.mAddr = const_cast<void*>(aPtr);
4242       extent_node_t* node = huge.Search(&key);
4243       MOZ_ASSERT(node);
4244       MOZ_ASSERT(node->mSize == aOldSize);
4245       MOZ_RELEASE_ASSERT(node->mArena == this);
4246       huge_allocated -= aOldSize - psize;
4247       // No need to change huge_mapped, because we didn't (un)map anything.
4248       node->mSize = psize;
4249     } else if (psize > aOldSize) {
4250       if (!pages_commit((void*)((uintptr_t)aPtr + aOldSize),
4251                         psize - aOldSize)) {
4252         return nullptr;
4253       }
4254
4255       // We need to update the recorded size if the size increased,
4256       // so malloc_usable_size doesn't return a value smaller than
4257       // what was requested via realloc().
4258       extent_node_t key;
4259       MutexAutoLock lock(huge_mtx);
4260       key.mAddr = const_cast<void*>(aPtr);
4261       extent_node_t* node = huge.Search(&key);
4262       MOZ_ASSERT(node);
4263       MOZ_ASSERT(node->mSize == aOldSize);
4264       MOZ_RELEASE_ASSERT(node->mArena == this);
4265       huge_allocated += psize - aOldSize;
4266       // No need to change huge_mapped, because we didn't
4267       // (un)map anything.
4268       node->mSize = psize;
4269     }
4270
4271     if (aSize > aOldSize) {
4272       ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
4273     }
4274     return aPtr;
4275   }
4276
4277   // If we get here, then aSize and aOldSize are different enough that we
4278   // need to use a different size class.  In that case, fall back to allocating
4279   // new space and copying. Allow non-private arenas to switch arenas.
4280   ret = (mIsPrivate ? this : choose_arena(aSize))->MallocHuge(aSize, false);
4281   if (!ret) {
4282     return nullptr;
4283   }
4284
4285   copysize = (aSize < aOldSize) ? aSize : aOldSize;
4286 #ifdef VM_COPY_MIN
4287   if (copysize >= VM_COPY_MIN) {
4288     pages_copy(ret, aPtr, copysize);
4289   } else
4290 #endif
4291   {
4292     memcpy(ret, aPtr, copysize);
4293   }
4294   idalloc(aPtr, this);
4295   return ret;
4296 }
4297
4298 static void huge_dalloc(void* aPtr, arena_t* aArena) {
4299   extent_node_t* node;
4300   size_t mapped = 0;
4301   {
4302     extent_node_t key;
4303     MutexAutoLock lock(huge_mtx);
4304
4305     // Extract from tree of huge allocations.
4306     key.mAddr = aPtr;
4307     node = huge.Search(&key);
4308     MOZ_RELEASE_ASSERT(node, "Double-free?");
4309     MOZ_ASSERT(node->mAddr == aPtr);
4310     MOZ_RELEASE_ASSERT(!aArena || node->mArena == aArena);
4311     // See AllocInfo::Arena.
4312     MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId);
4313     huge.Remove(node);
4314
4315     mapped = CHUNK_CEILING(node->mSize + gPageSize);
4316     huge_allocated -= node->mSize;
4317     huge_mapped -= mapped;
4318   }
4319
4320   // Unmap chunk.
4321   chunk_dealloc(node->mAddr, mapped, HUGE_CHUNK);
4322
4323   ExtentAlloc::dealloc(node);
4324 }
4325
4326 size_t GetKernelPageSize() {
4327   static size_t kernel_page_size = ([]() {
4328 #ifdef XP_WIN
4329     SYSTEM_INFO info;
4330     GetSystemInfo(&info);
4331     return info.dwPageSize;
4332 #else
4333     long result = sysconf(_SC_PAGESIZE);
4334     MOZ_ASSERT(result != -1);
4335     return result;
4336 #endif
4337   })();
4338   return kernel_page_size;
4339 }
4340
4341 // Returns whether the allocator was successfully initialized.
4342 static bool malloc_init_hard() {
4343   unsigned i;
4344   const char* opts;
4345
4346   AutoLock<StaticMutex> lock(gInitLock);
4347
4348   if (malloc_initialized) {
4349     // Another thread initialized the allocator before this one
4350     // acquired gInitLock.
4351     return true;
4352   }
4353
4354   if (!thread_arena.init()) {
4355     return true;
4356   }
4357
4358   // Get page size and number of CPUs
4359   const size_t result = GetKernelPageSize();
4360   // We assume that the page size is a power of 2.
4361   MOZ_ASSERT(((result - 1) & result) == 0);
4362 #ifdef MALLOC_STATIC_PAGESIZE
4363   if (gPageSize % result) {
4364     _malloc_message(
4365         _getprogname(),
4366         "Compile-time page size does not divide the runtime one.\n");
4367     MOZ_CRASH();
4368   }
4369 #else
4370   gRealPageSize = gPageSize = result;
4371 #endif
4372
4373   // Get runtime configuration.
4374   if ((opts = getenv("MALLOC_OPTIONS"))) {
4375     for (i = 0; opts[i] != '\0'; i++) {
4376       // All options are single letters, some take a *prefix* numeric argument.
4377
4378       // Parse the argument.
4379       unsigned prefix_arg = 0;
4380       while (opts[i] >= '0' && opts[i] <= '9') {
4381         prefix_arg *= 10;
4382         prefix_arg += opts[i] - '0';
4383         i++;
4384       }
4385
4386       switch (opts[i]) {
4387         case 'f':
4388           opt_dirty_max >>= prefix_arg ? prefix_arg : 1;
4389           break;
4390         case 'F':
4391           prefix_arg = prefix_arg ? prefix_arg : 1;
4392           if (opt_dirty_max == 0) {
4393             opt_dirty_max = 1;
4394             prefix_arg--;
4395           }
4396           opt_dirty_max <<= prefix_arg;
4397           if (opt_dirty_max == 0) {
4398             // If the shift above overflowed all the bits then clamp the result
4399             // instead.  If we started with DIRTY_MAX_DEFAULT then this will
4400             // always be a power of two so choose the maximum power of two that
4401             // fits in a size_t.
4402             opt_dirty_max = size_t(1) << (sizeof(size_t) * CHAR_BIT - 1);
4403           }
4404           break;
4405 #ifdef MALLOC_RUNTIME_CONFIG
4406         case 'j':
4407           opt_junk = false;
4408           break;
4409         case 'J':
4410           opt_junk = true;
4411           break;
4412         case 'q':
4413           // The argument selects how much poisoning to do.
4414           opt_poison = NONE;
4415           break;
4416         case 'Q':
4417           if (opts[i + 1] == 'Q') {
4418             // Maximum poisoning.
4419             i++;
4420             opt_poison = ALL;
4421           } else {
4422             opt_poison = SOME;
4423             opt_poison_size = kCacheLineSize * prefix_arg;
4424           }
4425           break;
4426         case 'z':
4427           opt_zero = false;
4428           break;
4429         case 'Z':
4430           opt_zero = true;
4431           break;
4432 #  ifndef MALLOC_STATIC_PAGESIZE
4433         case 'P':
4434           MOZ_ASSERT(gPageSize >= 4_KiB);
4435           MOZ_ASSERT(gPageSize <= 64_KiB);
4436           prefix_arg = prefix_arg ? prefix_arg : 1;
4437           gPageSize <<= prefix_arg;
4438           // We know that if the shift causes gPageSize to be zero then it's
4439           // because it shifted all the bits off.  We didn't start with zero.
4440           // Therefore if gPageSize is out of bounds we set it to 64KiB.
4441           if (gPageSize < 4_KiB || gPageSize > 64_KiB) {
4442             gPageSize = 64_KiB;
4443           }
4444           break;
4445 #  endif
4446 #endif
4447         case 'r':
4448           opt_randomize_small = false;
4449           break;
4450         case 'R':
4451           opt_randomize_small = true;
4452           break;
4453         default: {
4454           char cbuf[2];
4455
4456           cbuf[0] = opts[i];
4457           cbuf[1] = '\0';
4458           _malloc_message(_getprogname(),
4459                           ": (malloc) Unsupported character "
4460                           "in malloc options: '",
4461                           cbuf, "'\n");
4462         }
4463       }
4464     }
4465   }
4466
4467 #ifndef MALLOC_STATIC_PAGESIZE
4468   DefineGlobals();
4469 #endif
4470   gRecycledSize = 0;
4471
4472   // Initialize chunks data.
4473   chunks_mtx.Init();
4474   MOZ_PUSH_IGNORE_THREAD_SAFETY
4475   gChunksBySize.Init();
4476   gChunksByAddress.Init();
4477   MOZ_POP_THREAD_SAFETY
4478
4479   // Initialize huge allocation data.
4480   huge_mtx.Init();
4481   MOZ_PUSH_IGNORE_THREAD_SAFETY
4482   huge.Init();
4483   huge_allocated = 0;
4484   huge_mapped = 0;
4485   MOZ_POP_THREAD_SAFETY
4486
4487   // Initialize base allocation data structures.
4488   base_mtx.Init();
4489   MOZ_PUSH_IGNORE_THREAD_SAFETY
4490   base_mapped = 0;
4491   base_committed = 0;
4492   MOZ_POP_THREAD_SAFETY
4493
4494   // Initialize arenas collection here.
4495   if (!gArenas.Init()) {
4496     return false;
4497   }
4498
4499   // Assign the default arena to the initial thread.
4500   thread_arena.set(gArenas.GetDefault());
4501
4502   if (!gChunkRTree.Init()) {
4503     return false;
4504   }
4505
4506   malloc_initialized = true;
4507
4508   // Dummy call so that the function is not removed by dead-code elimination
4509   Debug::jemalloc_ptr_info(nullptr);
4510
4511 #if !defined(XP_WIN) && !defined(XP_DARWIN)
4512   // Prevent potential deadlock on malloc locks after fork.
4513   pthread_atfork(_malloc_prefork, _malloc_postfork_parent,
4514                  _malloc_postfork_child);
4515 #endif
4516
4517   return true;
4518 }
4519
4520 // End general internal functions.
4521 // ***************************************************************************
4522 // Begin malloc(3)-compatible functions.
4523
4524 // The BaseAllocator class is a helper class that implements the base allocator
4525 // functions (malloc, calloc, realloc, free, memalign) for a given arena,
4526 // or an appropriately chosen arena (per choose_arena()) when none is given.
4527 struct BaseAllocator {
4528 #define MALLOC_DECL(name, return_type, ...) \
4529   inline return_type name(__VA_ARGS__);
4530
4531 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4532 #include "malloc_decls.h"
4533
4534   explicit BaseAllocator(arena_t* aArena) : mArena(aArena) {}
4535
4536  private:
4537   arena_t* mArena;
4538 };
4539
4540 #define MALLOC_DECL(name, return_type, ...)                               \
4541   return_type MozJemalloc::name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
4542     BaseAllocator allocator(nullptr);                                     \
4543     return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__));              \
4544   }
4545 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4546 #include "malloc_decls.h"
4547
4548 inline void* BaseAllocator::malloc(size_t aSize) {
4549   void* ret;
4550   arena_t* arena;
4551
4552   if (!malloc_init()) {
4553     ret = nullptr;
4554     goto RETURN;
4555   }
4556
4557   if (aSize == 0) {
4558     aSize = 1;
4559   }
4560   // If mArena is non-null, it must not be in the first page.
4561   MOZ_DIAGNOSTIC_ASSERT_IF(mArena, (size_t)mArena >= gPageSize);
4562   arena = mArena ? mArena : choose_arena(aSize);
4563   ret = arena->Malloc(aSize, /* aZero = */ false);
4564
4565 RETURN:
4566   if (!ret) {
4567     errno = ENOMEM;
4568   }
4569
4570   return ret;
4571 }
4572
4573 inline void* BaseAllocator::memalign(size_t aAlignment, size_t aSize) {
4574   MOZ_ASSERT(((aAlignment - 1) & aAlignment) == 0);
4575
4576   if (!malloc_init()) {
4577     return nullptr;
4578   }
4579
4580   if (aSize == 0) {
4581     aSize = 1;
4582   }
4583
4584   aAlignment = aAlignment < sizeof(void*) ? sizeof(void*) : aAlignment;
4585   arena_t* arena = mArena ? mArena : choose_arena(aSize);
4586   return arena->Palloc(aAlignment, aSize);
4587 }
4588
4589 inline void* BaseAllocator::calloc(size_t aNum, size_t aSize) {
4590   void* ret;
4591
4592   if (malloc_init()) {
4593     CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aSize;
4594     if (checkedSize.isValid()) {
4595       size_t allocSize = checkedSize.value();
4596       if (allocSize == 0) {
4597         allocSize = 1;
4598       }
4599       arena_t* arena = mArena ? mArena : choose_arena(allocSize);
4600       ret = arena->Malloc(allocSize, /* aZero = */ true);
4601     } else {
4602       ret = nullptr;
4603     }
4604   } else {
4605     ret = nullptr;
4606   }
4607
4608   if (!ret) {
4609     errno = ENOMEM;
4610   }
4611
4612   return ret;
4613 }
4614
4615 inline void* BaseAllocator::realloc(void* aPtr, size_t aSize) {
4616   void* ret;
4617
4618   if (aSize == 0) {
4619     aSize = 1;
4620   }
4621
4622   if (aPtr) {
4623     MOZ_RELEASE_ASSERT(malloc_initialized);
4624
4625     auto info = AllocInfo::Get(aPtr);
4626     auto arena = info.Arena();
4627     MOZ_RELEASE_ASSERT(!mArena || arena == mArena);
4628     ret = arena->Ralloc(aPtr, aSize, info.Size());
4629   } else {
4630     if (!malloc_init()) {
4631       ret = nullptr;
4632     } else {
4633       arena_t* arena = mArena ? mArena : choose_arena(aSize);
4634       ret = arena->Malloc(aSize, /* aZero = */ false);
4635     }
4636   }
4637
4638   if (!ret) {
4639     errno = ENOMEM;
4640   }
4641   return ret;
4642 }
4643
4644 inline void BaseAllocator::free(void* aPtr) {
4645   size_t offset;
4646
4647   // A version of idalloc that checks for nullptr pointer.
4648   offset = GetChunkOffsetForPtr(aPtr);
4649   if (offset != 0) {
4650     MOZ_RELEASE_ASSERT(malloc_initialized);
4651     arena_dalloc(aPtr, offset, mArena);
4652   } else if (aPtr) {
4653     MOZ_RELEASE_ASSERT(malloc_initialized);
4654     huge_dalloc(aPtr, mArena);
4655   }
4656 }
4657
4658 inline int MozJemalloc::posix_memalign(void** aMemPtr, size_t aAlignment,
4659                                        size_t aSize) {
4660   return AlignedAllocator<memalign>::posix_memalign(aMemPtr, aAlignment, aSize);
4661 }
4662
4663 inline void* MozJemalloc::aligned_alloc(size_t aAlignment, size_t aSize) {
4664   return AlignedAllocator<memalign>::aligned_alloc(aAlignment, aSize);
4665 }
4666
4667 inline void* MozJemalloc::valloc(size_t aSize) {
4668   return AlignedAllocator<memalign>::valloc(aSize);
4669 }
4670
4671 // End malloc(3)-compatible functions.
4672 // ***************************************************************************
4673 // Begin non-standard functions.
4674
4675 // This was added by Mozilla for use by SQLite.
4676 size_t MozJemalloc::malloc_good_size(size_t aSize) {
4677   if (aSize <= gMaxLargeClass) {
4678     // Small or large
4679     aSize = SizeClass(aSize).Size();
4680   } else {
4681     // Huge.  We use PAGE_CEILING to get psize, instead of using
4682     // CHUNK_CEILING to get csize.  This ensures that this
4683     // malloc_usable_size(malloc(n)) always matches
4684     // malloc_good_size(n).
4685     aSize = PAGE_CEILING(aSize);
4686   }
4687   return aSize;
4688 }
4689
4690 size_t MozJemalloc::malloc_usable_size(usable_ptr_t aPtr) {
4691   return AllocInfo::GetValidated(aPtr).Size();
4692 }
4693
4694 void MozJemalloc::jemalloc_stats_internal(jemalloc_stats_t* aStats,
4695                                           jemalloc_bin_stats_t* aBinStats) {
4696   size_t non_arena_mapped, chunk_header_size;
4697
4698   if (!aStats) {
4699     return;
4700   }
4701   if (!malloc_init()) {
4702     memset(aStats, 0, sizeof(*aStats));
4703     return;
4704   }
4705   if (aBinStats) {
4706     memset(aBinStats, 0, sizeof(jemalloc_bin_stats_t) * NUM_SMALL_CLASSES);
4707   }
4708
4709   // Gather runtime settings.
4710   aStats->opt_junk = opt_junk;
4711   aStats->opt_zero = opt_zero;
4712   aStats->quantum = kQuantum;
4713   aStats->quantum_max = kMaxQuantumClass;
4714   aStats->quantum_wide = kQuantumWide;
4715   aStats->quantum_wide_max = kMaxQuantumWideClass;
4716   aStats->subpage_max = gMaxSubPageClass;
4717   aStats->large_max = gMaxLargeClass;
4718   aStats->chunksize = kChunkSize;
4719   aStats->page_size = gPageSize;
4720   aStats->dirty_max = opt_dirty_max;
4721
4722   // Gather current memory usage statistics.
4723   aStats->narenas = 0;
4724   aStats->mapped = 0;
4725   aStats->allocated = 0;
4726   aStats->waste = 0;
4727   aStats->page_cache = 0;
4728   aStats->bookkeeping = 0;
4729   aStats->bin_unused = 0;
4730
4731   non_arena_mapped = 0;
4732
4733   // Get huge mapped/allocated.
4734   {
4735     MutexAutoLock lock(huge_mtx);
4736     non_arena_mapped += huge_mapped;
4737     aStats->allocated += huge_allocated;
4738     MOZ_ASSERT(huge_mapped >= huge_allocated);
4739   }
4740
4741   // Get base mapped/allocated.
4742   {
4743     MutexAutoLock lock(base_mtx);
4744     non_arena_mapped += base_mapped;
4745     aStats->bookkeeping += base_committed;
4746     MOZ_ASSERT(base_mapped >= base_committed);
4747   }
4748
4749   gArenas.mLock.Lock();
4750
4751   // Stats can only read complete information if its run on the main thread.
4752   MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
4753
4754   // Iterate over arenas.
4755   for (auto arena : gArenas.iter()) {
4756     // Cannot safely read stats for this arena and therefore stats would be
4757     // incomplete.
4758     MOZ_ASSERT(arena->mLock.SafeOnThisThread());
4759
4760     size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j,
4761         arena_unused, arena_headers;
4762
4763     arena_headers = 0;
4764     arena_unused = 0;
4765
4766     {
4767       MaybeMutexAutoLock lock(arena->mLock);
4768
4769       arena_mapped = arena->mStats.mapped;
4770
4771       // "committed" counts dirty and allocated memory.
4772       arena_committed = arena->mStats.committed << gPageSize2Pow;
4773
4774       arena_allocated =
4775           arena->mStats.allocated_small + arena->mStats.allocated_large;
4776
4777       arena_dirty = arena->mNumDirty << gPageSize2Pow;
4778
4779       for (j = 0; j < NUM_SMALL_CLASSES; j++) {
4780         arena_bin_t* bin = &arena->mBins[j];
4781         size_t bin_unused = 0;
4782         size_t num_non_full_runs = 0;
4783
4784         for (auto mapelm : bin->mNonFullRuns.iter()) {
4785           arena_run_t* run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
4786           bin_unused += run->mNumFree * bin->mSizeClass;
4787           num_non_full_runs++;
4788         }
4789
4790         if (bin->mCurrentRun) {
4791           bin_unused += bin->mCurrentRun->mNumFree * bin->mSizeClass;
4792           num_non_full_runs++;
4793         }
4794
4795         arena_unused += bin_unused;
4796         arena_headers += bin->mNumRuns * bin->mRunFirstRegionOffset;
4797         if (aBinStats) {
4798           aBinStats[j].size = bin->mSizeClass;
4799           aBinStats[j].num_non_full_runs += num_non_full_runs;
4800           aBinStats[j].num_runs += bin->mNumRuns;
4801           aBinStats[j].bytes_unused += bin_unused;
4802           size_t bytes_per_run = static_cast<size_t>(bin->mRunSizePages)
4803                                  << gPageSize2Pow;
4804           aBinStats[j].bytes_total +=
4805               bin->mNumRuns * (bytes_per_run - bin->mRunFirstRegionOffset);
4806           aBinStats[j].bytes_per_run = bytes_per_run;
4807         }
4808       }
4809     }
4810
4811     MOZ_ASSERT(arena_mapped >= arena_committed);
4812     MOZ_ASSERT(arena_committed >= arena_allocated + arena_dirty);
4813
4814     aStats->mapped += arena_mapped;
4815     aStats->allocated += arena_allocated;
4816     aStats->page_cache += arena_dirty;
4817     // "waste" is committed memory that is neither dirty nor
4818     // allocated.  If you change this definition please update
4819     // memory/replace/logalloc/replay/Replay.cpp's jemalloc_stats calculation of
4820     // committed.
4821     aStats->waste += arena_committed - arena_allocated - arena_dirty -
4822                      arena_unused - arena_headers;
4823     aStats->bin_unused += arena_unused;
4824     aStats->bookkeeping += arena_headers;
4825     aStats->narenas++;
4826   }
4827   gArenas.mLock.Unlock();
4828
4829   // Account for arena chunk headers in bookkeeping rather than waste.
4830   chunk_header_size =
4831       ((aStats->mapped / aStats->chunksize) * gChunkHeaderNumPages)
4832       << gPageSize2Pow;
4833
4834   aStats->mapped += non_arena_mapped;
4835   aStats->bookkeeping += chunk_header_size;
4836   aStats->waste -= chunk_header_size;
4837
4838   MOZ_ASSERT(aStats->mapped >= aStats->allocated + aStats->waste +
4839                                    aStats->page_cache + aStats->bookkeeping);
4840 }
4841
4842 inline size_t MozJemalloc::jemalloc_stats_num_bins() {
4843   return NUM_SMALL_CLASSES;
4844 }
4845
4846 inline void MozJemalloc::jemalloc_set_main_thread() {
4847   MOZ_ASSERT(malloc_initialized);
4848   gArenas.SetMainThread();
4849 }
4850
4851 #ifdef MALLOC_DOUBLE_PURGE
4852
4853 // Explicitly remove all of this chunk's MADV_FREE'd pages from memory.
4854 static void hard_purge_chunk(arena_chunk_t* aChunk) {
4855   // See similar logic in arena_t::Purge().
4856   for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages; i++) {
4857     // Find all adjacent pages with CHUNK_MAP_MADVISED set.
4858     size_t npages;
4859     for (npages = 0; aChunk->map[i + npages].bits & CHUNK_MAP_MADVISED &&
4860                      i + npages < gChunkNumPages;
4861          npages++) {
4862       // Turn off the chunk's MADV_FREED bit and turn on its
4863       // DECOMMITTED bit.
4864       MOZ_DIAGNOSTIC_ASSERT(
4865           !(aChunk->map[i + npages].bits & CHUNK_MAP_DECOMMITTED));
4866       aChunk->map[i + npages].bits ^= CHUNK_MAP_MADVISED_OR_DECOMMITTED;
4867     }
4868
4869     // We could use mincore to find out which pages are actually
4870     // present, but it's not clear that's better.
4871     if (npages > 0) {
4872       pages_decommit(((char*)aChunk) + (i << gPageSize2Pow),
4873                      npages << gPageSize2Pow);
4874       Unused << pages_commit(((char*)aChunk) + (i << gPageSize2Pow),
4875                              npages << gPageSize2Pow);
4876     }
4877     i += npages;
4878   }
4879 }
4880
4881 // Explicitly remove all of this arena's MADV_FREE'd pages from memory.
4882 void arena_t::HardPurge() {
4883   MaybeMutexAutoLock lock(mLock);
4884
4885   while (!mChunksMAdvised.isEmpty()) {
4886     arena_chunk_t* chunk = mChunksMAdvised.popFront();
4887     hard_purge_chunk(chunk);
4888   }
4889 }
4890
4891 inline void MozJemalloc::jemalloc_purge_freed_pages() {
4892   if (malloc_initialized) {
4893     MutexAutoLock lock(gArenas.mLock);
4894     MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
4895     for (auto arena : gArenas.iter()) {
4896       arena->HardPurge();
4897     }
4898   }
4899 }
4900
4901 #else  // !defined MALLOC_DOUBLE_PURGE
4902
4903 inline void MozJemalloc::jemalloc_purge_freed_pages() {
4904   // Do nothing.
4905 }
4906
4907 #endif  // defined MALLOC_DOUBLE_PURGE
4908
4909 inline void MozJemalloc::jemalloc_free_dirty_pages(void) {
4910   if (malloc_initialized) {
4911     MutexAutoLock lock(gArenas.mLock);
4912     MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
4913     for (auto arena : gArenas.iter()) {
4914       MaybeMutexAutoLock arena_lock(arena->mLock);
4915       arena->Purge(1);
4916     }
4917   }
4918 }
4919
4920 inline arena_t* ArenaCollection::GetByIdInternal(Tree& aTree,
4921                                                  arena_id_t aArenaId) {
4922   // Use AlignedStorage2 to avoid running the arena_t constructor, while
4923   // we only need it as a placeholder for mId.
4924   mozilla::AlignedStorage2<arena_t> key;
4925   key.addr()->mId = aArenaId;
4926   return aTree.Search(key.addr());
4927 }
4928
4929 inline arena_t* ArenaCollection::GetById(arena_id_t aArenaId, bool aIsPrivate) {
4930   if (!malloc_initialized) {
4931     return nullptr;
4932   }
4933
4934   Tree* tree = nullptr;
4935   if (aIsPrivate) {
4936     if (ArenaIdIsMainThreadOnly(aArenaId)) {
4937       // Main thread only arena.  Do the lookup here without taking the lock.
4938       arena_t* result = GetByIdInternal(mMainThreadArenas, aArenaId);
4939       MOZ_RELEASE_ASSERT(result);
4940       return result;
4941     }
4942     tree = &mPrivateArenas;
4943   } else {
4944     tree = &mArenas;
4945   }
4946
4947   MutexAutoLock lock(mLock);
4948   arena_t* result = GetByIdInternal(*tree, aArenaId);
4949   MOZ_RELEASE_ASSERT(result);
4950   return result;
4951 }
4952
4953 inline arena_id_t MozJemalloc::moz_create_arena_with_params(
4954     arena_params_t* aParams) {
4955   if (malloc_init()) {
4956     arena_t* arena = gArenas.CreateArena(/* IsPrivate = */ true, aParams);
4957     return arena->mId;
4958   }
4959   return 0;
4960 }
4961
4962 inline void MozJemalloc::moz_dispose_arena(arena_id_t aArenaId) {
4963   arena_t* arena = gArenas.GetById(aArenaId, /* IsPrivate = */ true);
4964   MOZ_RELEASE_ASSERT(arena);
4965   gArenas.DisposeArena(arena);
4966 }
4967
4968 inline void MozJemalloc::moz_set_max_dirty_page_modifier(int32_t aModifier) {
4969   gArenas.SetDefaultMaxDirtyPageModifier(aModifier);
4970 }
4971
4972 #define MALLOC_DECL(name, return_type, ...)                          \
4973   return_type MozJemalloc::moz_arena_##name(                         \
4974       arena_id_t aArenaId, ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
4975     BaseAllocator allocator(                                         \
4976         gArenas.GetById(aArenaId, /* IsPrivate = */ true));          \
4977     return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__));         \
4978   }
4979 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4980 #include "malloc_decls.h"
4981
4982 // End non-standard functions.
4983 // ***************************************************************************
4984 #ifndef XP_WIN
4985 // Begin library-private functions, used by threading libraries for protection
4986 // of malloc during fork().  These functions are only called if the program is
4987 // running in threaded mode, so there is no need to check whether the program
4988 // is threaded here.
4989 //
4990 // Note that the only way to keep the main-thread-only arenas in a consistent
4991 // state for the child is if fork is called from the main thread only.  Or the
4992 // child must not use them, eg it should call exec().  We attempt to prevent the
4993 // child for accessing these arenas by refusing to re-initialise them.
4994 static pthread_t gForkingThread;
4995
4996 FORK_HOOK
4997 void _malloc_prefork(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
4998   // Acquire all mutexes in a safe order.
4999   gArenas.mLock.Lock();
5000   gForkingThread = pthread_self();
5001
5002   for (auto arena : gArenas.iter()) {
5003     if (arena->mLock.LockIsEnabled()) {
5004       arena->mLock.Lock();
5005     }
5006   }
5007
5008   base_mtx.Lock();
5009
5010   huge_mtx.Lock();
5011 }
5012
5013 FORK_HOOK
5014 void _malloc_postfork_parent(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
5015   // Release all mutexes, now that fork() has completed.
5016   huge_mtx.Unlock();
5017
5018   base_mtx.Unlock();
5019
5020   for (auto arena : gArenas.iter()) {
5021     if (arena->mLock.LockIsEnabled()) {
5022       arena->mLock.Unlock();
5023     }
5024   }
5025
5026   gArenas.mLock.Unlock();
5027 }
5028
5029 FORK_HOOK
5030 void _malloc_postfork_child(void) {
5031   // Reinitialize all mutexes, now that fork() has completed.
5032   huge_mtx.Init();
5033
5034   base_mtx.Init();
5035
5036   for (auto arena : gArenas.iter()) {
5037     arena->mLock.Reinit(gForkingThread);
5038   }
5039
5040   gArenas.PostForkFixMainThread();
5041   gArenas.mLock.Init();
5042 }
5043 #endif  // XP_WIN
5044
5045 // End library-private functions.
5046 // ***************************************************************************
5047 #ifdef MOZ_REPLACE_MALLOC
5048 // Windows doesn't come with weak imports as they are possible with
5049 // LD_PRELOAD or DYLD_INSERT_LIBRARIES on Linux/OSX. On this platform,
5050 // the replacement functions are defined as variable pointers to the
5051 // function resolved with GetProcAddress() instead of weak definitions
5052 // of functions. On Android, the same needs to happen as well, because
5053 // the Android linker doesn't handle weak linking with non LD_PRELOADed
5054 // libraries, but LD_PRELOADing is not very convenient on Android, with
5055 // the zygote.
5056 #  ifdef XP_DARWIN
5057 #    define MOZ_REPLACE_WEAK __attribute__((weak_import))
5058 #  elif defined(XP_WIN) || defined(ANDROID)
5059 #    define MOZ_DYNAMIC_REPLACE_INIT
5060 #    define replace_init replace_init_decl
5061 #  elif defined(__GNUC__)
5062 #    define MOZ_REPLACE_WEAK __attribute__((weak))
5063 #  endif
5064
5065 #  include "replace_malloc.h"
5066
5067 #  define MALLOC_DECL(name, return_type, ...) CanonicalMalloc::name,
5068
5069 // The default malloc table, i.e. plain allocations. It never changes. It's
5070 // used by init(), and not used after that.
5071 static const malloc_table_t gDefaultMallocTable = {
5072 #  include "malloc_decls.h"
5073 };
5074
5075 // The malloc table installed by init(). It never changes from that point
5076 // onward. It will be the same as gDefaultMallocTable if no replace-malloc tool
5077 // is enabled at startup.
5078 static malloc_table_t gOriginalMallocTable = {
5079 #  include "malloc_decls.h"
5080 };
5081
5082 // The malloc table installed by jemalloc_replace_dynamic(). (Read the
5083 // comments above that function for more details.)
5084 static malloc_table_t gDynamicMallocTable = {
5085 #  include "malloc_decls.h"
5086 };
5087
5088 // This briefly points to gDefaultMallocTable at startup. After that, it points
5089 // to either gOriginalMallocTable or gDynamicMallocTable. It's atomic to avoid
5090 // races when switching between tables.
5091 static Atomic<malloc_table_t const*, mozilla::MemoryOrdering::Relaxed>
5092     gMallocTablePtr;
5093
5094 #  ifdef MOZ_DYNAMIC_REPLACE_INIT
5095 #    undef replace_init
5096 typedef decltype(replace_init_decl) replace_init_impl_t;
5097 static replace_init_impl_t* replace_init = nullptr;
5098 #  endif
5099
5100 #  ifdef XP_WIN
5101 typedef HMODULE replace_malloc_handle_t;
5102
5103 static replace_malloc_handle_t replace_malloc_handle() {
5104   wchar_t replace_malloc_lib[1024];
5105   if (GetEnvironmentVariableW(L"MOZ_REPLACE_MALLOC_LIB", replace_malloc_lib,
5106                               ArrayLength(replace_malloc_lib)) > 0) {
5107     return LoadLibraryW(replace_malloc_lib);
5108   }
5109   return nullptr;
5110 }
5111
5112 #    define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
5113       (replace_init_impl_t*)GetProcAddress(handle, "replace_init")
5114
5115 #  elif defined(ANDROID)
5116 #    include <dlfcn.h>
5117
5118 typedef void* replace_malloc_handle_t;
5119
5120 static replace_malloc_handle_t replace_malloc_handle() {
5121   const char* replace_malloc_lib = getenv("MOZ_REPLACE_MALLOC_LIB");
5122   if (replace_malloc_lib && *replace_malloc_lib) {
5123     return dlopen(replace_malloc_lib, RTLD_LAZY);
5124   }
5125   return nullptr;
5126 }
5127
5128 #    define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
5129       (replace_init_impl_t*)dlsym(handle, "replace_init")
5130
5131 #  endif
5132
5133 static void replace_malloc_init_funcs(malloc_table_t*);
5134
5135 #  ifdef MOZ_REPLACE_MALLOC_STATIC
5136 extern "C" void logalloc_init(malloc_table_t*, ReplaceMallocBridge**);
5137
5138 extern "C" void dmd_init(malloc_table_t*, ReplaceMallocBridge**);
5139 #  endif
5140
5141 void phc_init(malloc_table_t*, ReplaceMallocBridge**);
5142
5143 bool Equals(const malloc_table_t& aTable1, const malloc_table_t& aTable2) {
5144   return memcmp(&aTable1, &aTable2, sizeof(malloc_table_t)) == 0;
5145 }
5146
5147 // Below is the malloc implementation overriding jemalloc and calling the
5148 // replacement functions if they exist.
5149 static ReplaceMallocBridge* gReplaceMallocBridge = nullptr;
5150 static void init() {
5151   malloc_table_t tempTable = gDefaultMallocTable;
5152
5153 #  ifdef MOZ_DYNAMIC_REPLACE_INIT
5154   replace_malloc_handle_t handle = replace_malloc_handle();
5155   if (handle) {
5156     replace_init = REPLACE_MALLOC_GET_INIT_FUNC(handle);
5157   }
5158 #  endif
5159
5160   // Set this *before* calling replace_init, otherwise if replace_init calls
5161   // malloc() we'll get an infinite loop.
5162   gMallocTablePtr = &gDefaultMallocTable;
5163
5164   // Pass in the default allocator table so replace functions can copy and use
5165   // it for their allocations. The replace_init() function should modify the
5166   // table if it wants to be active, otherwise leave it unmodified.
5167   if (replace_init) {
5168     replace_init(&tempTable, &gReplaceMallocBridge);
5169   }
5170 #  ifdef MOZ_REPLACE_MALLOC_STATIC
5171   if (Equals(tempTable, gDefaultMallocTable)) {
5172     logalloc_init(&tempTable, &gReplaceMallocBridge);
5173   }
5174 #    ifdef MOZ_DMD
5175   if (Equals(tempTable, gDefaultMallocTable)) {
5176     dmd_init(&tempTable, &gReplaceMallocBridge);
5177   }
5178 #    endif
5179 #  endif
5180   if (!Equals(tempTable, gDefaultMallocTable)) {
5181     replace_malloc_init_funcs(&tempTable);
5182   }
5183   gOriginalMallocTable = tempTable;
5184   gMallocTablePtr = &gOriginalMallocTable;
5185
5186 #  ifdef MOZ_PHC
5187   // For now PHC still uses the bridge, so if no other allocator registered a
5188   // bridge then register PHC's now.
5189   if (!gReplaceMallocBridge) {
5190     gReplaceMallocBridge = GetPHCBridge();
5191   }
5192 #  endif
5193 }
5194
5195 // WARNING WARNING WARNING: this function should be used with extreme care. It
5196 // is not as general-purpose as it looks. It is currently used by
5197 // tools/profiler/core/memory_hooks.cpp for counting allocations and probably
5198 // should not be used for any other purpose.
5199 //
5200 // This function allows the original malloc table to be temporarily replaced by
5201 // a different malloc table. Or, if the argument is nullptr, it switches back to
5202 // the original malloc table.
5203 //
5204 // Limitations:
5205 //
5206 // - It is not threadsafe. If multiple threads pass it the same
5207 //   `replace_init_func` at the same time, there will be data races writing to
5208 //   the malloc_table_t within that function.
5209 //
5210 // - Only one replacement can be installed. No nesting is allowed.
5211 //
5212 // - The new malloc table must be able to free allocations made by the original
5213 //   malloc table, and upon removal the original malloc table must be able to
5214 //   free allocations made by the new malloc table. This means the new malloc
5215 //   table can only do simple things like recording extra information, while
5216 //   delegating actual allocation/free operations to the original malloc table.
5217 //
5218 MOZ_JEMALLOC_API void jemalloc_replace_dynamic(
5219     jemalloc_init_func replace_init_func) {
5220   if (replace_init_func) {
5221     malloc_table_t tempTable = gOriginalMallocTable;
5222     (*replace_init_func)(&tempTable, &gReplaceMallocBridge);
5223     if (!Equals(tempTable, gOriginalMallocTable)) {
5224       replace_malloc_init_funcs(&tempTable);
5225
5226       // Temporarily switch back to the original malloc table. In the
5227       // (supported) non-nested case, this is a no-op. But just in case this is
5228       // a (unsupported) nested call, it makes the overwriting of
5229       // gDynamicMallocTable less racy, because ongoing calls to malloc() and
5230       // friends won't go through gDynamicMallocTable.
5231       gMallocTablePtr = &gOriginalMallocTable;
5232
5233       gDynamicMallocTable = tempTable;
5234       gMallocTablePtr = &gDynamicMallocTable;
5235       // We assume that dynamic replaces don't occur close enough for a
5236       // thread to still have old copies of the table pointer when the 2nd
5237       // replace occurs.
5238     }
5239   } else {
5240     // Switch back to the original malloc table.
5241     gMallocTablePtr = &gOriginalMallocTable;
5242   }
5243 }
5244
5245 #  define MALLOC_DECL(name, return_type, ...)                           \
5246     inline return_type ReplaceMalloc::name(                             \
5247         ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) {                       \
5248       if (MOZ_UNLIKELY(!gMallocTablePtr)) {                             \
5249         init();                                                         \
5250       }                                                                 \
5251       return (*gMallocTablePtr).name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
5252     }
5253 #  include "malloc_decls.h"
5254
5255 MOZ_JEMALLOC_API struct ReplaceMallocBridge* get_bridge(void) {
5256   if (MOZ_UNLIKELY(!gMallocTablePtr)) {
5257     init();
5258   }
5259   return gReplaceMallocBridge;
5260 }
5261
5262 // posix_memalign, aligned_alloc, memalign and valloc all implement some kind
5263 // of aligned memory allocation. For convenience, a replace-malloc library can
5264 // skip defining replace_posix_memalign, replace_aligned_alloc and
5265 // replace_valloc, and default implementations will be automatically derived
5266 // from replace_memalign.
5267 static void replace_malloc_init_funcs(malloc_table_t* table) {
5268   if (table->posix_memalign == CanonicalMalloc::posix_memalign &&
5269       table->memalign != CanonicalMalloc::memalign) {
5270     table->posix_memalign =
5271         AlignedAllocator<ReplaceMalloc::memalign>::posix_memalign;
5272   }
5273   if (table->aligned_alloc == CanonicalMalloc::aligned_alloc &&
5274       table->memalign != CanonicalMalloc::memalign) {
5275     table->aligned_alloc =
5276         AlignedAllocator<ReplaceMalloc::memalign>::aligned_alloc;
5277   }
5278   if (table->valloc == CanonicalMalloc::valloc &&
5279       table->memalign != CanonicalMalloc::memalign) {
5280     table->valloc = AlignedAllocator<ReplaceMalloc::memalign>::valloc;
5281   }
5282   if (table->moz_create_arena_with_params ==
5283           CanonicalMalloc::moz_create_arena_with_params &&
5284       table->malloc != CanonicalMalloc::malloc) {
5285 #  define MALLOC_DECL(name, ...) \
5286     table->name = DummyArenaAllocator<ReplaceMalloc>::name;
5287 #  define MALLOC_FUNCS MALLOC_FUNCS_ARENA_BASE
5288 #  include "malloc_decls.h"
5289   }
5290   if (table->moz_arena_malloc == CanonicalMalloc::moz_arena_malloc &&
5291       table->malloc != CanonicalMalloc::malloc) {
5292 #  define MALLOC_DECL(name, ...) \
5293     table->name = DummyArenaAllocator<ReplaceMalloc>::name;
5294 #  define MALLOC_FUNCS MALLOC_FUNCS_ARENA_ALLOC
5295 #  include "malloc_decls.h"
5296   }
5297 }
5298
5299 #endif  // MOZ_REPLACE_MALLOC
5300 // ***************************************************************************
5301 // Definition of all the _impl functions
5302 // GENERIC_MALLOC_DECL2_MINGW is only used for the MinGW build, and aliases
5303 // the malloc funcs (e.g. malloc) to the je_ versions. It does not generate
5304 // aliases for the other functions (jemalloc and arena functions).
5305 //
5306 // We do need aliases for the other mozglue.def-redirected functions though,
5307 // these are done at the bottom of mozmemory_wrap.cpp
5308 #define GENERIC_MALLOC_DECL2_MINGW(name, name_impl, return_type, ...) \
5309   return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__))            \
5310       __attribute__((alias(MOZ_STRINGIFY(name_impl))));
5311
5312 #define GENERIC_MALLOC_DECL2(attributes, name, name_impl, return_type, ...)  \
5313   return_type name_impl(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) attributes { \
5314     return DefaultMalloc::name(ARGS_HELPER(ARGS, ##__VA_ARGS__));            \
5315   }
5316
5317 #ifndef __MINGW32__
5318 #  define GENERIC_MALLOC_DECL(attributes, name, return_type, ...)    \
5319     GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
5320                          ##__VA_ARGS__)
5321 #else
5322 #  define GENERIC_MALLOC_DECL(attributes, name, return_type, ...)    \
5323     GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
5324                          ##__VA_ARGS__)                              \
5325     GENERIC_MALLOC_DECL2_MINGW(name, name##_impl, return_type, ##__VA_ARGS__)
5326 #endif
5327
5328 #define NOTHROW_MALLOC_DECL(...) \
5329   MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (noexcept(true), __VA_ARGS__))
5330 #define MALLOC_DECL(...) \
5331   MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
5332 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
5333 #include "malloc_decls.h"
5334
5335 #undef GENERIC_MALLOC_DECL
5336 #define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \
5337   GENERIC_MALLOC_DECL2(attributes, name, name, return_type, ##__VA_ARGS__)
5338
5339 #define MALLOC_DECL(...) \
5340   MOZ_JEMALLOC_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
5341 #define MALLOC_FUNCS (MALLOC_FUNCS_JEMALLOC | MALLOC_FUNCS_ARENA)
5342 #include "malloc_decls.h"
5343 // ***************************************************************************
5344
5345 #ifdef HAVE_DLOPEN
5346 #  include <dlfcn.h>
5347 #endif
5348
5349 #if defined(__GLIBC__) && !defined(__UCLIBC__)
5350 // glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
5351 // to inconsistently reference libc's malloc(3)-compatible functions
5352 // (bug 493541).
5353 //
5354 // These definitions interpose hooks in glibc.  The functions are actually
5355 // passed an extra argument for the caller return address, which will be
5356 // ignored.
5357
5358 extern "C" {
5359 MOZ_EXPORT void (*__free_hook)(void*) = free_impl;
5360 MOZ_EXPORT void* (*__malloc_hook)(size_t) = malloc_impl;
5361 MOZ_EXPORT void* (*__realloc_hook)(void*, size_t) = realloc_impl;
5362 MOZ_EXPORT void* (*__memalign_hook)(size_t, size_t) = memalign_impl;
5363 }
5364
5365 #elif defined(RTLD_DEEPBIND)
5366 // XXX On systems that support RTLD_GROUP or DF_1_GROUP, do their
5367 // implementations permit similar inconsistencies?  Should STV_SINGLETON
5368 // visibility be used for interposition where available?
5369 #  error \
5370       "Interposing malloc is unsafe on this system without libc malloc hooks."
5371 #endif
5372
5373 #ifdef XP_WIN
5374 MOZ_EXPORT void* _recalloc(void* aPtr, size_t aCount, size_t aSize) {
5375   size_t oldsize = aPtr ? AllocInfo::Get(aPtr).Size() : 0;
5376   CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aCount) * aSize;
5377
5378   if (!checkedSize.isValid()) {
5379     return nullptr;
5380   }
5381
5382   size_t newsize = checkedSize.value();
5383
5384   // In order for all trailing bytes to be zeroed, the caller needs to
5385   // use calloc(), followed by recalloc().  However, the current calloc()
5386   // implementation only zeros the bytes requested, so if recalloc() is
5387   // to work 100% correctly, calloc() will need to change to zero
5388   // trailing bytes.
5389   aPtr = DefaultMalloc::realloc(aPtr, newsize);
5390   if (aPtr && oldsize < newsize) {
5391     memset((void*)((uintptr_t)aPtr + oldsize), 0, newsize - oldsize);
5392   }
5393
5394   return aPtr;
5395 }
5396
5397 // This impl of _expand doesn't ever actually expand or shrink blocks: it
5398 // simply replies that you may continue using a shrunk block.
5399 MOZ_EXPORT void* _expand(void* aPtr, size_t newsize) {
5400   if (AllocInfo::Get(aPtr).Size() >= newsize) {
5401     return aPtr;
5402   }
5403
5404   return nullptr;
5405 }
5406
5407 MOZ_EXPORT size_t _msize(void* aPtr) {
5408   return DefaultMalloc::malloc_usable_size(aPtr);
5409 }
5410 #endif