hphp/util/alloc.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/util/alloc.h"
  17
  18 #include <atomic>
  19 #include <mutex>
  20
  21 #include <stdlib.h>
  22 #include <errno.h>
  23 #include <unistd.h>
  24
  25 #ifdef __APPLE__
  26 #include <sys/sysctl.h>
  27 #endif
  28
  29 #include <folly/portability/SysMman.h>
  30 #include <folly/portability/SysResource.h>
  31
  32 #include "hphp/util/address-range.h"
  33 #include "hphp/util/bump-mapper.h"
  34 #include "hphp/util/extent-hooks.h"
  35 #include "hphp/util/hugetlb.h"
  36 #include "hphp/util/kernel-version.h"
  37 #include "hphp/util/managed-arena.h"
  38 #include "hphp/util/numa.h"
  39 #include "hphp/util/slab-manager.h"
  40
  41 namespace HPHP {
  42 ///////////////////////////////////////////////////////////////////////////////
  43
  44 void flush_thread_caches() {
  45 #ifdef USE_JEMALLOC
  46   mallctlCall<true>("thread.tcache.flush");
  47 #if USE_JEMALLOC_EXTENT_HOOKS
  48   arenas_thread_flush();
  49 #endif
  50 #endif
  51 }
  52
  53 __thread int32_t s_numaNode;
  54
  55 __thread uintptr_t s_stackLimit;
  56 __thread size_t s_stackSize;
  57 const size_t s_pageSize = sysconf(_SC_PAGESIZE);
  58
  59 __thread MemBlock s_tlSpace;
  60 __thread MemBlock s_hugeRange;
  61
  62 __thread TLStaticArena* tl_static_arena;
  63 bool s_enable_static_arena = false;
  64
  65 static NEVER_INLINE uintptr_t get_stack_top() {
  66   using ActRec = char;
  67   DECLARE_FRAME_POINTER(fp);
  68   return uintptr_t(fp) - s_pageSize;
  69 }
  70
  71 void init_stack_limits(pthread_attr_t* attr) {
  72   size_t stacksize, guardsize;
  73   void *stackaddr;
  74   struct rlimit rlim;
  75
  76 #ifndef __APPLE__
  77   if (pthread_attr_getstack(attr, &stackaddr, &stacksize) != 0) {
  78     always_assert(false);
  79   }
  80 #else
  81   // We must use the following (undocumented) APIs because pthread_attr_getstack
  82   // returns incorrect values on OSX.
  83   pthread_t self = pthread_self();
  84   stackaddr = pthread_get_stackaddr_np(self);
  85   stacksize = pthread_get_stacksize_np(self);
  86
  87   // On OSX 10.9, we are lied to about the main thread's stack size.  Set it to
  88   // the minimum stack size, which is set earlier by execute_program_impl.
  89   if (pthread_main_np() == 1) {
  90     if (s_stackSize < kStackSizeMinimum) {
  91       char osRelease[256];
  92       size_t osReleaseSize = sizeof(osRelease);
  93       if (sysctlbyname("kern.osrelease", osRelease, &osReleaseSize,
  94                        nullptr, 0) == 0) {
  95         if (atoi(osRelease) >= 13) {
  96           stacksize = kStackSizeMinimum;
  97         }
  98       }
  99     }
 100   }
 101
 102   // stackaddr is not base, but top of the stack. Yes, really.
 103   stackaddr = ((char*) stackaddr) - stacksize;
 104 #endif
 105
 106   // Get the guard page's size, because the stack address returned
 107   // above starts at the guard page, so the thread's stack limit is
 108   // stackaddr + guardsize.
 109   if (pthread_attr_getguardsize(attr, &guardsize) != 0) {
 110     guardsize = 0;
 111   }
 112
 113   assert(stackaddr != nullptr);
 114   assert(stacksize >= PTHREAD_STACK_MIN);
 115   s_stackLimit = uintptr_t(stackaddr) + guardsize;
 116   s_stackSize = stacksize - guardsize;
 117
 118   // The main thread's native stack may be larger than desired if
 119   // set_stack_size() failed.  Make sure that even if the native stack is
 120   // extremely large (in which case anonymous mmap() could map some of the
 121   // "stack space"), we can differentiate between the part of the native stack
 122   // that could conceivably be used in practice and all anonymous mmap() memory.
 123   if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur == RLIM_INFINITY &&
 124       s_stackSize > kStackSizeMinimum) {
 125     s_stackLimit += s_stackSize - kStackSizeMinimum;
 126     s_stackSize = kStackSizeMinimum;
 127   }
 128 }
 129
 130 void flush_thread_stack() {
 131   uintptr_t top = get_stack_top() & (s_pageSize - 1);
 132   auto const hugeBase = reinterpret_cast<uintptr_t>(s_hugeRange.ptr);
 133   if (top > hugeBase) top = hugeBase;
 134   if (top <= s_stackLimit) return;
 135   size_t len = top - s_stackLimit;
 136   if (madvise((void*)s_stackLimit, len, MADV_DONTNEED) != 0 &&
 137       errno != EAGAIN) {
 138     fprintf(stderr, "%s failed to madvise with error %d\n", __func__, errno);
 139   }
 140 }
 141
 142 ssize_t purgeable_bytes() {
 143 #ifdef USE_JEMALLOC
 144   return s_pageSize * mallctl_all_pdirty();
 145 #else
 146   return 0;
 147 #endif
 148 }
 149
 150 #if !defined USE_JEMALLOC || !defined HAVE_NUMA
 151 void set_numa_binding(int node) {}
 152 void* mallocx_on_node(size_t size, int node, size_t align) {
 153   void* ret = nullptr;
 154   posix_memalign(&ret, align, size);
 155   return ret;
 156 }
 157 #endif
 158
 159 #ifdef USE_JEMALLOC
 160 unsigned low_arena = 0;
 161 unsigned lower_arena = 0;
 162 unsigned low_cold_arena = 0;
 163 unsigned high_arena = 0;
 164 unsigned high_cold_arena = 0;
 165 __thread unsigned local_arena = 0;
 166
 167 int low_arena_flags = 0;
 168 int lower_arena_flags = 0;
 169 int low_cold_arena_flags = 0;
 170 int high_cold_arena_flags = 0;
 171 __thread int high_arena_flags = 0;
 172 __thread int local_arena_flags = 0;
 173
 174 #if USE_JEMALLOC_EXTENT_HOOKS
 175 // Keep track of the size of recently freed memory that might be in the high1g
 176 // arena when it is disabled, so that we know when to reenable it.
 177 std::atomic_uint g_highArenaRecentlyFreed;
 178
 179 alloc::BumpFileMapper* cold_file_mapper = nullptr;
 180
 181 // Customized hooks to use 1g pages for jemalloc metadata.
 182 static extent_hooks_t huge_page_metadata_hooks;
 183 static extent_alloc_t* orig_alloc = nullptr;
 184
 185 static bool enableArenaMetadata1GPage = false;
 186 static bool enableNumaArenaMetadata1GPage = false;
 187 // jemalloc metadata is allocated through the internal base allocator, which
 188 // expands memory with an increasingly larger sequence.  The default reserved
 189 // space (216MB)is a sum of the sequence, from 2MB to 40MB.
 190 static size_t a0MetadataReservedSize = 0;
 191 static std::atomic<bool> jemallocMetadataCanUseHuge(false);
 192 static void* a0ReservedBase = nullptr;
 193 static std::atomic<size_t> a0ReservedLeft(0);
 194
 195 // Explicit per-thread tcache arenas needing it.
 196 // In jemalloc/include/jemalloc/jemalloc_macros.h.in, we have
 197 // #define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1)
 198 __thread int high_arena_tcache = -1;
 199 __thread int local_arena_tcache = -1;
 200 #endif
 201
 202 static unsigned base_arena;
 203
 204 #ifdef HAVE_NUMA
 205
 206 void set_numa_binding(int node) {
 207   if (node < 0) return;                 // thread not created from JobQueue
 208   s_numaNode = node;
 209   unsigned arena = base_arena + node;
 210   mallctlWrite("thread.arena", arena);
 211
 212   if (use_numa) {
 213     numa_sched_setaffinity(0, node_to_cpu_mask[node]);
 214     numa_set_interleave_mask(numa_no_nodes_ptr);
 215     bitmask* nodes = numa_allocate_nodemask();
 216     numa_bitmask_setbit(nodes, node);
 217     numa_set_membind(nodes);
 218     numa_bitmask_free(nodes);
 219   }
 220 }
 221
 222 void* mallocx_on_node(size_t size, int node, size_t align) {
 223   assert((align & (align - 1)) == 0);
 224   int flags = MALLOCX_ALIGN(align);
 225   if (node < 0) return mallocx(size, flags);
 226   int arena = base_arena + node;
 227   flags |= MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE;
 228   return mallocx(size, flags);
 229 }
 230
 231 #endif // HAVE_NUMA
 232
 233 #if USE_JEMALLOC_EXTENT_HOOKS
 234 using namespace alloc;
 235 static NEVER_INLINE
 236 RangeMapper* getMapperChain(RangeState& range, unsigned n1GPages,
 237                             bool use2MPages, unsigned n2MPages,
 238                             bool useNormalPages,
 239                             int numaMask, short nextNode) {
 240   RangeMapper* head = nullptr;
 241   RangeMapper** ptail = &head;
 242   if (n1GPages) {
 243     RangeMapper::append(ptail,
 244                         new Bump1GMapper(range, n1GPages, numaMask, nextNode));
 245   }
 246   if (use2MPages) {
 247     RangeMapper::append(ptail, new Bump2MMapper(range, n2MPages, numaMask));
 248   }
 249   if (useNormalPages) {
 250     RangeMapper::append(ptail, new BumpNormalMapper(range, 0, numaMask));
 251   }
 252   assertx(head);
 253   return head;
 254 }
 255
 256 // Find the first 2M mapper for the range, and grant it some 2M page budget.
 257 // Return the actual number of pages granted. The actual number can be different
 258 // from the input, because some part of the range may have already been mapped
 259 // in.
 260 unsigned allocate2MPagesToRange(AddrRangeClass c, unsigned pages) {
 261   auto& range = getRange(c);
 262   auto mapper = range.getLowMapper();
 263   if (!mapper) return 0;
 264   // Search for the first 2M mapper.
 265   do {
 266     if (auto mapper2m = dynamic_cast<Bump2MMapper*>(mapper)) {
 267       const unsigned maxPages = (range.capacity() - range.mapped()) / size2m;
 268       auto const assigned = std::min(pages, maxPages);
 269       mapper2m->setMaxPages(assigned);
 270       return assigned;
 271     }
 272     mapper = mapper->next();
 273   } while (mapper);
 274   return 0;
 275 }
 276
 277 void setup_low_arena(PageSpec s) {
 278   auto const lowArenaStart = lowArenaMinAddr();
 279   assert(reinterpret_cast<uintptr_t>(sbrk(0)) <= lowArenaStart);
 280   always_assert_flog(lowArenaStart <= (2ull << 30),
 281                      "low arena min addr ({}) must be <= 2GB",
 282                      lowArenaStart);
 283   // Initialize mappers for the VeryLow and Low address ranges.
 284   auto& veryLowRange = getRange(AddrRangeClass::VeryLow);
 285   auto& lowRange = getRange(AddrRangeClass::Low);
 286   auto veryLowMapper =
 287     getMapperChain(veryLowRange,
 288                    (s.n1GPages != 0) ? 1 : 0,
 289                    true, s.n2MPages,    // 2M
 290                    true,                // 4K
 291                    numa_node_set, 0);
 292   auto lowMapper =
 293     getMapperChain(lowRange,
 294                    (s.n1GPages > 1) ? (s.n1GPages - 1) : 0,
 295                    true, 0,             // 2M
 296                    true,                // 4K
 297                    numa_node_set, 1);
 298   veryLowRange.setLowMapper(veryLowMapper);
 299   lowRange.setLowMapper(lowMapper);
 300
 301   auto veryLowColdMapper =
 302     new BumpNormalMapper<Direction::HighToLow>(veryLowRange, 0, numa_node_set);
 303   auto lowColdMapper =
 304     new BumpNormalMapper<Direction::HighToLow>(lowRange, 0, numa_node_set);
 305   veryLowRange.setHighMapper(veryLowColdMapper);
 306   lowRange.setHighMapper(lowColdMapper);
 307
 308   auto ma = LowArena::CreateAt(&g_lowArena);
 309   ma->appendMapper(lowMapper);
 310   ma->appendMapper(veryLowMapper);
 311   low_arena = ma->id();
 312   low_arena_flags = MALLOCX_ARENA(low_arena) | MALLOCX_TCACHE_NONE;
 313
 314   ma = LowArena::CreateAt(&g_lowerArena);
 315   ma->appendMapper(veryLowMapper);
 316   ma->appendMapper(lowMapper);
 317   lower_arena = ma->id();
 318   lower_arena_flags = MALLOCX_ARENA(lower_arena) | MALLOCX_TCACHE_NONE;
 319
 320   ma = LowArena::CreateAt(&g_lowColdArena);
 321   ma->appendMapper(lowColdMapper);
 322   ma->appendMapper(veryLowColdMapper);
 323   low_cold_arena = ma->id();
 324   low_cold_arena_flags = MALLOCX_ARENA(low_cold_arena) | MALLOCX_TCACHE_NONE;
 325 }
 326
 327 void setup_high_arena(PageSpec s) {
 328   auto& range = getRange(AddrRangeClass::Uncounted);
 329   auto mapper = getMapperChain(range, s.n1GPages,
 330                                true, s.n2MPages, // 2M pages can be added later
 331                                true,             // use normal pages
 332                                numa_node_set,
 333                                num_numa_nodes() / 2 + 1);
 334   range.setLowMapper(mapper);
 335
 336   auto arena = HighArena::CreateAt(&g_highArena);
 337   arena->appendMapper(range.getLowMapper());
 338   high_arena = arena->id();
 339
 340   auto& fileRange = getRange(AddrRangeClass::UncountedCold);
 341   cold_file_mapper = new BumpFileMapper(fileRange);
 342   fileRange.setLowMapper(cold_file_mapper);
 343   auto coldMapper =
 344     new BumpNormalMapper<Direction::HighToLow>(range, 0, numa_node_set);
 345   range.setHighMapper(coldMapper);
 346   auto coldArena = HighArena::CreateAt(&g_coldArena);
 347   coldArena->appendMapper(cold_file_mapper);
 348   coldArena->appendMapper(coldMapper);
 349   high_cold_arena = coldArena->id();
 350   high_cold_arena_flags = MALLOCX_ARENA(high_cold_arena) | MALLOCX_TCACHE_NONE;
 351 }
 352
 353 void setup_arena0(PageSpec s) {
 354   size_t size = size1g * s.n1GPages + size2m * s.n2MPages;
 355   if (size == 0) return;
 356   // Give arena 0 some huge pages, starting at 2TB.
 357   auto ret = mmap(reinterpret_cast<void*>(kArena0Base),
 358                   size + size1g, PROT_NONE,
 359                   MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
 360                   -1, 0);
 361   auto base = reinterpret_cast<uintptr_t>(ret);
 362   if (auto r = base % size1g) {         // align to 1G boundary
 363     base = base + size1g - r;
 364   }
 365   assertx(base % size1g == 0);
 366
 367   auto a0 = PreMappedArena::AttachTo(low_malloc(sizeof(PreMappedArena)), 0,
 368                                      base, base + size, Reserved{});
 369   auto mapper = getMapperChain(*a0, s.n1GPages,
 370                                s.n2MPages, s.n2MPages,
 371                                false,
 372                                numa_node_set, 0);
 373   a0->setLowMapper(mapper);
 374   g_arena0 = a0;
 375 }
 376
 377 // Set up extra arenas for use in non-VM threads, when we have short bursts of
 378 // worker threads running, e.g., during deserialization of profile data.
 379 static std::vector<std::pair<std::vector<DefaultArena*>,
 380                              std::atomic_uint*>> s_extra_arenas;
 381 static unsigned s_extra_arena_per_node;
 382 bool setup_extra_arenas(unsigned count) {
 383   if (count == 0) return false;
 384   // This may be called when we have many other threads running.  So hold the
 385   // lock while making changes.
 386   static std::mutex lock;
 387   std::lock_guard<std::mutex> g(lock);
 388   // only the first call allocate the arenas.
 389   if (!s_extra_arenas.empty()) {
 390     return count <= s_extra_arenas.size() * s_extra_arenas[0].first.size();
 391   }
 392   // `count` needs to be a multiple of `num_numa_nodes()`, if it isn't, we round
 393   // it up to make it easy to balance across nodes.
 394   auto const nNodes = std::max(1u, num_numa_nodes());
 395   s_extra_arena_per_node = (count + nNodes - 1) / nNodes;
 396   assert(s_extra_arena_per_node >= 1);
 397   s_extra_arenas.resize(nNodes);
 398   for (unsigned n = 0; n < nNodes; ++n) {
 399     s_extra_arenas[n].first.resize(s_extra_arena_per_node);
 400     auto constexpr kArenaSize =
 401       (sizeof(DefaultArena) + alignof(DefaultArena) - 1)
 402       / alignof(DefaultArena) * alignof(DefaultArena);
 403     auto const allocSize = kArenaSize * s_extra_arena_per_node
 404       + sizeof(std::atomic_uint);
 405     void* addr = mallocx_on_node(allocSize, n, alignof(DefaultArena));
 406     memset(addr, 0, allocSize);
 407     for (unsigned i = 0; i < s_extra_arena_per_node; ++i) {
 408       s_extra_arenas[n].first[i] = DefaultArena::CreateAt(addr);
 409       addr = (char*)addr + kArenaSize;
 410     }
 411     s_extra_arenas[n].second = static_cast<std::atomic_uint*>(addr);
 412   }
 413   return true;
 414 }
 415
 416 DefaultArena* next_extra_arena(int node) {
 417   if (s_extra_arena_per_node == 0) return nullptr;
 418   if (node >= s_extra_arenas.size()) return nullptr;
 419   if (node < 0) node = 0;
 420   auto const n = static_cast<unsigned>(node);
 421   auto counter = s_extra_arenas[n].second;
 422   auto const next = counter->fetch_add(1, std::memory_order_relaxed);
 423   return s_extra_arenas[n].first[next % s_extra_arena_per_node];
 424 }
 425
 426 void* huge_page_extent_alloc(extent_hooks_t* extent_hooks, void* addr,
 427                              size_t size, size_t alignment, bool* zero,
 428                              bool* commit, unsigned arena_ind) {
 429   // This is used for arena 0's extent_alloc.  No malloc / free allowed within
 430   // this function since reentrancy is not supported for a0's extent hooks.
 431
 432   // Note that, only metadata will use 2M alignment (size will be multiple of 2M
 433   // as well). Aligned allocation doesn't require alignment by default, because
 434   // of the way virtual memory is expanded with opt.retain (which is the
 435   // default).  The current extent hook API has no other way to tell if the
 436   // allocation is for metadata.  The next major jemalloc release will include
 437   // this information in the API.
 438   if (!jemallocMetadataCanUseHuge.load() || alignment != size2m) {
 439     goto default_alloc;
 440   }
 441
 442   assert(a0ReservedBase != nullptr && (size & (size2m - 1)) == 0);
 443   if (arena_ind == 0) {
 444     size_t oldValue;
 445     while (size <= (oldValue = a0ReservedLeft.load())) {
 446       // Try placing a0 metadata on 1G huge pages.
 447       if (a0ReservedLeft.compare_exchange_weak(oldValue, oldValue - size)) {
 448         assert((oldValue & (size2m - 1)) == 0);
 449         return
 450           reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(a0ReservedBase) +
 451                                    (a0MetadataReservedSize - oldValue));
 452       }
 453     }
 454   } else if (auto ma = alloc::highArena()) {
 455     // For non arena 0: malloc / free allowed in this branch.
 456     void* ret = ma->extent_alloc(extent_hooks, addr, size, alignment, zero,
 457                                  commit, high_arena);
 458     if (ret != nullptr) return ret;
 459   }
 460 default_alloc:
 461   return orig_alloc(extent_hooks, addr, size, alignment, zero,
 462                     commit, arena_ind);
 463 }
 464
 465 /*
 466  * Customize arena 0's extent hook to use 1g pages for metadata.
 467  */
 468 void setup_jemalloc_metadata_extent_hook(bool enable, bool enable_numa_arena,
 469                                          size_t reserved) {
 470 #if !JEMALLOC_METADATA_1G_PAGES
 471   return;
 472 #endif
 473   assert(!jemallocMetadataCanUseHuge.load());
 474   enableArenaMetadata1GPage = enable;
 475   enableNumaArenaMetadata1GPage = enable_numa_arena;
 476   a0MetadataReservedSize = reserved;
 477
 478   auto ma = alloc::highArena();
 479   if (!ma) return;
 480   bool retain_enabled = false;
 481   mallctlRead("opt.retain", &retain_enabled);
 482   if (!enableArenaMetadata1GPage || !retain_enabled) return;
 483
 484   bool zero = true, commit = true;
 485   void* ret = ma->extent_alloc(nullptr, nullptr, a0MetadataReservedSize, size2m,
 486                                &zero, &commit, high_arena);
 487   if (!ret) return;
 488
 489   a0ReservedBase = ret;
 490   a0ReservedLeft.store(a0MetadataReservedSize);
 491
 492   extent_hooks_t* orig_hooks;
 493   int err = mallctlRead<extent_hooks_t*, true>("arena.0.extent_hooks",
 494                                                &orig_hooks);
 495   if (err) return;
 496
 497   orig_alloc = orig_hooks->alloc;
 498   huge_page_metadata_hooks = *orig_hooks;
 499   huge_page_metadata_hooks.alloc = &huge_page_extent_alloc;
 500
 501   err = mallctlWrite<extent_hooks_t*, true>("arena.0.extent_hooks",
 502                                             &huge_page_metadata_hooks);
 503   if (err) return;
 504
 505   jemallocMetadataCanUseHuge.store(true);
 506 }
 507
 508 void arenas_thread_init() {
 509   if (high_arena_tcache == -1) {
 510     mallctlRead<int, true>("tcache.create", &high_arena_tcache);
 511     high_arena_flags =
 512       MALLOCX_ARENA(high_arena) | MALLOCX_TCACHE(high_arena_tcache);
 513   }
 514   if (local_arena_tcache == -1) {
 515     local_arena = get_local_arena(s_numaNode);
 516     if (local_arena) {
 517       mallctlRead<int, true>("tcache.create", &local_arena_tcache);
 518       local_arena_flags =
 519         MALLOCX_ARENA(local_arena) | MALLOCX_TCACHE(local_arena_tcache);
 520     }
 521   }
 522   if (s_enable_static_arena) {
 523     assertx(!tl_static_arena);
 524     constexpr size_t kStaticArenaChunkSize = 256 * 1024;
 525     static TaggedSlabList s_static_pool;
 526     tl_static_arena = new TLStaticArena(kStaticArenaChunkSize, &s_static_pool);
 527   }
 528 }
 529
 530 void arenas_thread_flush() {
 531   // It is OK if flushing fails
 532   if (high_arena_tcache != -1) {
 533     mallctlWrite<int, true>("tcache.flush", high_arena_tcache);
 534   }
 535   if (local_arena_tcache != -1) {
 536     mallctlWrite<int, true>("tcache.flush", local_arena_tcache);
 537   }
 538 }
 539
 540 void arenas_thread_exit() {
 541   if (high_arena_tcache != -1) {
 542     mallctlWrite<int, true>("tcache.destroy", high_arena_tcache);
 543     high_arena_tcache = -1;
 544     // Ideally we shouldn't read high_arena_flags any more, but just in case.
 545     high_arena_flags = MALLOCX_ARENA(high_arena) | MALLOCX_TCACHE_NONE;
 546   }
 547   if (local_arena_tcache != -1) {
 548     mallctlWrite<int, true>("tcache.destroy", local_arena_tcache);
 549     local_arena_tcache = -1;
 550     // Ideally we shouldn't read local_arena_flags any more, but just in case.
 551     local_arena_flags = MALLOCX_ARENA(local_arena) | MALLOCX_TCACHE_NONE;
 552   }
 553   if (tl_static_arena) {
 554     delete tl_static_arena;
 555     tl_static_arena = nullptr;
 556   }
 557 }
 558
 559 #endif // USE_JEMALLOC_EXTENT_HOOKS
 560
 561 std::vector<SlabManager*> s_slab_managers;
 562
 563 void setup_local_arenas(PageSpec spec, unsigned slabs) {
 564   s_slab_managers.reserve(num_numa_nodes());
 565   slabs /= num_numa_nodes();
 566
 567   mallctlRead<unsigned>("arenas.narenas", &base_arena); // throw upon failure
 568   // The default one per node.
 569   for (int i = 0; i < num_numa_nodes(); i++) {
 570     unsigned arena = 0;
 571     mallctlRead<unsigned>("arenas.create", &arena);
 572     always_assert(arena == base_arena + i);
 573     if (slabs) {
 574       auto mem = low_malloc(sizeof(SlabManager));
 575       s_slab_managers.push_back(new (mem) SlabManager);
 576     } else {
 577       s_slab_managers.push_back(nullptr);
 578     }
 579   }
 580
 581 #if USE_JEMALLOC_EXTENT_HOOKS
 582   spec.n1GPages = std::min(spec.n1GPages, get_huge1g_info().nr_hugepages);
 583   spec.n1GPages /= num_numa_nodes();
 584   spec.n2MPages = std::min(spec.n2MPages, get_huge2m_info().nr_hugepages);
 585   spec.n2MPages /= num_numa_nodes();
 586   const size_t reserveSize =
 587     spec.n1GPages * size1g + spec.n2MPages * size2m;
 588   if (reserveSize == 0) return;
 589
 590   g_local_arenas.resize(num_numa_nodes(), 0);
 591   for (unsigned i = 0; i < num_numa_nodes(); ++i) {
 592     static_assert(kLocalArenaMinAddr % size1g == 0, "");
 593     auto const desiredBase = kLocalArenaMinAddr + i * kLocalArenaSizeLimit;
 594     // Try to get the desired address range, but don't use MAP_FIXED.
 595     auto ret = mmap(reinterpret_cast<void*>(desiredBase),
 596                     reserveSize + size1g, PROT_NONE,
 597                     MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
 598                     -1, 0);
 599     if (ret == MAP_FAILED) {
 600       throw std::runtime_error{"mmap() failed to reserve address range"};
 601     }
 602     auto base = reinterpret_cast<uintptr_t>(ret);
 603     if (base % size1g) {                // adjust to start at 1GB boundary
 604       auto const newBase = (base + size1g - 1) & ~(size1g - 1);
 605       munmap(reinterpret_cast<void*>(base), newBase - base);
 606       base = newBase;
 607     }
 608     assert(base % size1g == 0);
 609     auto arena = PreMappedArena::CreateAt(low_malloc(sizeof(PreMappedArena)),
 610                                           base, base + reserveSize, Reserved{});
 611     auto mapper = getMapperChain(*arena,
 612                                  spec.n1GPages,
 613                                  (bool)spec.n2MPages,
 614                                  spec.n2MPages,
 615                                  false,       // don't use normal pages
 616                                  1u << i,
 617                                  i);
 618     // Allocate some slabs first, which are not given to the arena, but managed
 619     // separately by the slab manager.
 620     auto const totalSlabSize = std::min(slabs * kSlabSize, reserveSize);
 621     if (totalSlabSize) {
 622       auto slabRange = mapper->alloc(totalSlabSize, kSlabAlign);
 623       if (slabRange) {
 624         s_slab_managers[i]->addRange<true>(slabRange, totalSlabSize);
 625       }
 626     }
 627     if (totalSlabSize == reserveSize) continue;
 628     arena->setLowMapper(mapper);
 629     g_local_arenas[i] = arena;
 630   }
 631 #endif
 632 }
 633
 634 unsigned get_local_arena(uint32_t node) {
 635 #if USE_JEMALLOC_EXTENT_HOOKS
 636   if (node >= g_local_arenas.size()) return 0;
 637   auto const arena = g_local_arenas[node];
 638   if (arena == nullptr) return 0;
 639   return arena->id();
 640 #else
 641   return 0;
 642 #endif
 643 }
 644
 645 SlabManager* get_local_slab_manager(uint32_t node) {
 646   if (node >= s_slab_managers.size()) return nullptr;
 647   return s_slab_managers[node];
 648 }
 649
 650 void shutdown_slab_managers() {
 651   for (auto slab_manager : s_slab_managers) {
 652     if (slab_manager) slab_manager->shutdown();
 653   }
 654 }
 655
 656 #endif // USE_JEMALLOC
 657
 658 ssize_t get_free_slab_bytes() {
 659   ssize_t bytes = 0;
 660 #ifdef USE_JEMALLOC
 661   for (auto const slabManager : s_slab_managers) {
 662     if (slabManager) {
 663       bytes += slabManager->bytes();
 664     }
 665   }
 666 #endif // USE_JEMALLOC
 667   return bytes;
 668 }
 669
 670 struct JEMallocInitializer {
 671   JEMallocInitializer() {
 672     // The following comes from malloc_extension.cc in google-perftools
 673 #ifdef __GLIBC__
 674     // GNU libc++ versions 3.3 and 3.4 obey the environment variables
 675     // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively.  Setting
 676     // one of these variables forces the STL default allocator to call
 677     // new() or delete() for each allocation or deletion.  Otherwise
 678     // the STL allocator tries to avoid the high cost of doing
 679     // allocations by pooling memory internally.
 680     setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
 681     setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
 682
 683     // Now we need to make the setenv 'stick', which it may not do since
 684     // the env is flakey before main() is called.  But luckily stl only
 685     // looks at this env var the first time it tries to do an alloc, and
 686     // caches what it finds.  So we just cause an stl alloc here.
 687     std::string dummy("I need to be allocated");
 688     dummy += "!";         // so the definition of dummy isn't optimized out
 689 #endif  /* __GLIBC__ */
 690
 691     // Enable backtracing through PHP frames (t9814472).
 692     setenv("UNW_RBP_ALWAYS_VALID", "1", false);
 693
 694     init_numa();
 695 #ifdef USE_JEMALLOC
 696 #if !USE_JEMALLOC_EXTENT_HOOKS
 697     // Create the legacy low arena that uses brk() instead of mmap().  When
 698     // using newer versions of jemalloc, we use extent hooks to get more
 699     // control.  If the mallctl fails, it will always_assert in mallctlHelper.
 700     if (mallctlRead<unsigned, true>("arenas.create", &low_arena)) {
 701       return;
 702     }
 703     char buf[32];
 704     snprintf(buf, sizeof(buf), "arena.%u.dss", low_arena);
 705     if (mallctlWrite<const char*, true>(buf, "primary") != 0) {
 706       // Error; bail out.
 707       return;
 708     }
 709     low_arena_flags = MALLOCX_ARENA(low_arena) | MALLOCX_TCACHE_NONE;
 710     lower_arena = low_arena;
 711     lower_arena_flags = low_arena_flags;
 712     low_cold_arena = low_arena;
 713     low_cold_arena_flags = low_arena_flags;
 714
 715     // We normally maintain the invariant that the region surrounding the
 716     // current brk is mapped huge, but we don't know yet whether huge pages
 717     // are enabled for low memory. Round up to the start of a huge page,
 718     // and set the high water mark to one below.
 719     constexpr size_t kHugePageSize = size2m;
 720     constexpr size_t kHugePageMask = kHugePageSize - 1;
 721     unsigned leftInPage = kHugePageSize - (uintptr_t(sbrk(0)) & kHugePageMask);
 722     (void) sbrk(leftInPage);
 723     assert((uintptr_t(sbrk(0)) & kHugePageMask) == 0);
 724
 725 #else // USE_JEMALLOC_EXTENT_HOOKS
 726     unsigned low_1g_pages = 0;
 727     if (char* buffer = getenv("HHVM_LOW_1G_PAGE")) {
 728       if (!sscanf(buffer, "%u", &low_1g_pages)) {
 729         fprintf(stderr,
 730                 "Bad environment variable HHVM_LOW_1G_PAGE: %s\n", buffer);
 731         abort();
 732       }
 733     }
 734     unsigned high_1g_pages = 0;
 735     if (char* buffer = getenv("HHVM_HIGH_1G_PAGE")) {
 736       if (!sscanf(buffer, "%u", &high_1g_pages)) {
 737         fprintf(stderr,
 738                 "Bad environment variable HHVM_HIGH_1G_PAGE: %s\n", buffer);
 739         abort();
 740       }
 741     }
 742     unsigned low_2m_pages = 0;
 743     if (char* buffer = getenv("HHVM_LOW_2M_PAGE")) {
 744       if (!sscanf(buffer, "%u", &low_2m_pages)) {
 745         fprintf(stderr,
 746                 "Bad environment variable HHVM_LOW_2M_PAGE: %s\n", buffer);
 747         abort();
 748       }
 749     }
 750     unsigned high_2m_pages = 0;
 751     if (char* buffer = getenv("HHVM_HIGH_2M_PAGE")) {
 752       if (!sscanf(buffer, "%u", &high_2m_pages)) {
 753         fprintf(stderr,
 754                 "Bad environment variable HHVM_HIGH_2M_PAGE: %s\n", buffer);
 755         abort();
 756       }
 757     }
 758
 759     HugePageInfo info = get_huge1g_info();
 760     unsigned remaining = static_cast<unsigned>(info.nr_hugepages);
 761     if (remaining == 0) {
 762       low_1g_pages = high_1g_pages = 0;
 763     } else if (low_1g_pages > 0 || high_1g_pages > 0) {
 764       KernelVersion version;
 765       if (version.m_major < 3 ||
 766           (version.m_major == 3 && version.m_minor < 9)) {
 767         // Older kernels need an explicit hugetlbfs mount point.
 768         find_hugetlbfs_path() || auto_mount_hugetlbfs();
 769       }
 770     }
 771
 772     // Do some allocation between low and high 1G arenas.  We use at most 2 1G
 773     // pages for the low 1G arena; usually 1 is good enough.
 774     auto const origLow1G = low_1g_pages;
 775     auto const origHigh1G = high_1g_pages;
 776     if (low_1g_pages > 0) {
 777       if (low_1g_pages > 2) {
 778         low_1g_pages = 2;
 779       }
 780       if (low_1g_pages + high_1g_pages > remaining) {
 781         low_1g_pages = 1;
 782       }
 783       assert(remaining >= low_1g_pages);
 784       remaining -= low_1g_pages;
 785     }
 786     if (origLow1G) {
 787       fprintf(stderr,
 788               "using %u (specified %u) 1G huge pages for low arena\n",
 789               low_1g_pages, origLow1G);
 790     }
 791     setup_low_arena({low_1g_pages, low_2m_pages});
 792
 793     if (high_1g_pages > remaining) {
 794       high_1g_pages = remaining;
 795     }
 796     if (origHigh1G) {
 797       fprintf(stderr,
 798               "using %u (specified %u) 1G huge pages for high arena\n",
 799               high_1g_pages, origHigh1G);
 800     }
 801     setup_high_arena({high_1g_pages, high_2m_pages});
 802     // Make sure high/low arenas are available to the current thread.
 803     arenas_thread_init();
 804 #endif
 805     // Initialize global mibs
 806     init_mallctl_mibs();
 807 #endif
 808   }
 809 };
 810
 811 #if defined(__GNUC__) && !defined(__APPLE__)
 812 // Construct this object before any others.
 813 // 101 is the highest priority allowed by the init_priority attribute.
 814 // http://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/C_002b_002b-Attributes.html
 815 #define MAX_CONSTRUCTOR_PRIORITY __attribute__((__init_priority__(101)))
 816 #else
 817 // init_priority is a gcc extension, so we can't use it on other compilers.
 818 // However, since constructor ordering is only known to be an issue with
 819 // GNU libc++ we're probably OK on other compilers so let the situation pass
 820 // silently instead of issuing a warning.
 821 #define MAX_CONSTRUCTOR_PRIORITY
 822 #endif
 823
 824 static JEMallocInitializer initJEMalloc MAX_CONSTRUCTOR_PRIORITY;
 825
 826 void low_2m_pages(uint32_t pages) {
 827 #if USE_JEMALLOC_EXTENT_HOOKS
 828   pages -= allocate2MPagesToRange(AddrRangeClass::VeryLow, pages);
 829   allocate2MPagesToRange(AddrRangeClass::Low, pages);
 830 #endif
 831 }
 832
 833 void high_2m_pages(uint32_t pages) {
 834 #if USE_JEMALLOC_EXTENT_HOOKS
 835   allocate2MPagesToRange(AddrRangeClass::Uncounted, pages);
 836 #endif
 837 }
 838
 839 void enable_high_cold_file() {
 840 #if USE_JEMALLOC_EXTENT_HOOKS
 841   if (cold_file_mapper) {
 842     cold_file_mapper->enable();
 843   }
 844 #endif
 845 }
 846
 847 void set_cold_file_dir(const char* dir) {
 848 #if USE_JEMALLOC_EXTENT_HOOKS
 849   if (cold_file_mapper) {
 850     cold_file_mapper->setDirectory(dir);
 851   }
 852 #endif
 853 }
 854
 855 ///////////////////////////////////////////////////////////////////////////////
 856 }
 857
 858 extern "C" {
 859   const char* malloc_conf = "narenas:1,lg_tcache_max:16"
 860 #if (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR >= 1) || \
 861     (JEMALLOC_VERSION_MAJOR > 5) // requires jemalloc >= 5.1
 862     ",metadata_thp:disabled"
 863 #endif
 864 #ifdef ENABLE_HHPROF
 865     ",prof:true,prof_active:false,prof_thread_active_init:false"
 866 #endif
 867     ;
 868 }