hphp/runtime/base/memory-manager.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/base/memory-manager.h"
  17
  18 #include <algorithm>
  19 #include <cstdint>
  20 #include <limits>
  21
  22 #include "hphp/runtime/base/builtin-functions.h"
  23 #include "hphp/runtime/base/exceptions.h"
  24 #include "hphp/runtime/base/ini-setting.h"
  25 #include "hphp/runtime/base/runtime-option.h"
  26 #include "hphp/runtime/base/stack-logger.h"
  27 #include "hphp/runtime/base/surprise-flags.h"
  28 #include "hphp/runtime/base/sweepable.h"
  29 #include "hphp/runtime/base/thread-info.h"
  30 #include "hphp/runtime/base/req-root.h"
  31 #include "hphp/runtime/base/heap-graph.h"
  32 #include "hphp/runtime/server/http-server.h"
  33
  34 #include "hphp/util/alloc.h"
  35 #include "hphp/util/logger.h"
  36 #include "hphp/util/process.h"
  37 #include "hphp/util/timer.h"
  38 #include "hphp/util/trace.h"
  39
  40 #include <folly/Random.h>
  41 #include <folly/ScopeGuard.h>
  42 #include <folly/portability/SysMman.h>
  43 #include <folly/portability/Unistd.h>
  44
  45 #include "hphp/runtime/base/memory-manager-defs.h"
  46
  47 namespace HPHP {
  48
  49 const unsigned kInvalidSweepIndex = 0xffffffff;
  50 __thread bool tl_sweeping;
  51
  52 TRACE_SET_MOD(mm);
  53
  54 //////////////////////////////////////////////////////////////////////
  55
  56 std::atomic<MemoryManager::ReqProfContext*>
  57   MemoryManager::s_trigger{nullptr};
  58
  59 #ifdef USE_JEMALLOC
  60 bool MemoryManager::s_statsEnabled = false;
  61 size_t MemoryManager::s_cactiveLimitCeiling = 0;
  62
  63 static size_t threadAllocatedpMib[2];
  64 static size_t threadDeallocatedpMib[2];
  65 static size_t statsCactiveMib[2];
  66 static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
  67
  68 void MemoryManager::threadStatsInit() {
  69   if (!mallctlnametomib) return;
  70   size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
  71   if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
  72     return;
  73   }
  74   miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
  75   if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
  76     return;
  77   }
  78   miblen = sizeof(statsCactiveMib) / sizeof(size_t);
  79   if (mallctlnametomib("stats.cactive", statsCactiveMib, &miblen)) {
  80     return;
  81   }
  82   MemoryManager::s_statsEnabled = true;
  83
  84   // In threadStats() we wish to solve for cactiveLimit in:
  85   //
  86   //   footprint + cactiveLimit + headRoom == MemTotal
  87   //
  88   // However, headRoom comes from RuntimeOption::ServerMemoryHeadRoom, which
  89   // isn't initialized until after the code here runs.  Therefore, compute
  90   // s_cactiveLimitCeiling here in order to amortize the cost of introspecting
  91   // footprint and MemTotal.
  92   //
  93   //   cactiveLimit == (MemTotal - footprint) - headRoom
  94   //
  95   //   cactiveLimit == s_cactiveLimitCeiling - headRoom
  96   // where
  97   //   s_cactiveLimitCeiling == MemTotal - footprint
  98   size_t footprint = Process::GetCodeFootprint(getpid());
  99   size_t MemTotal  = 0;
 100 #ifndef __APPLE__
 101   size_t pageSize = size_t(sysconf(_SC_PAGESIZE));
 102   MemTotal = size_t(sysconf(_SC_PHYS_PAGES)) * pageSize;
 103 #else
 104   int mib[2] = { CTL_HW, HW_MEMSIZE };
 105   u_int namelen = sizeof(mib) / sizeof(mib[0]);
 106   size_t len = sizeof(MemTotal);
 107   sysctl(mib, namelen, &MemTotal, &len, nullptr, 0);
 108 #endif
 109   if (MemTotal > footprint) {
 110     MemoryManager::s_cactiveLimitCeiling = MemTotal - footprint;
 111   }
 112 }
 113
 114 inline
 115 void MemoryManager::threadStats(uint64_t*& allocated, uint64_t*& deallocated,
 116                                 size_t*& cactive, size_t& cactiveLimit) {
 117   pthread_once(&threadStatsOnce, threadStatsInit);
 118   if (!MemoryManager::s_statsEnabled) return;
 119
 120   size_t len = sizeof(allocated);
 121   if (mallctlbymib(threadAllocatedpMib,
 122                    sizeof(threadAllocatedpMib) / sizeof(size_t),
 123                    &allocated, &len, nullptr, 0)) {
 124     not_reached();
 125   }
 126
 127   len = sizeof(deallocated);
 128   if (mallctlbymib(threadDeallocatedpMib,
 129                    sizeof(threadDeallocatedpMib) / sizeof(size_t),
 130                    &deallocated, &len, nullptr, 0)) {
 131     not_reached();
 132   }
 133
 134   len = sizeof(cactive);
 135   if (mallctlbymib(statsCactiveMib,
 136                    sizeof(statsCactiveMib) / sizeof(size_t),
 137                    &cactive, &len, nullptr, 0)) {
 138     not_reached();
 139   }
 140
 141   int64_t headRoom = RuntimeOption::ServerMemoryHeadRoom;
 142   // Compute cactiveLimit based on s_cactiveLimitCeiling, as computed in
 143   // threadStatsInit().
 144   if (headRoom != 0 && headRoom < MemoryManager::s_cactiveLimitCeiling) {
 145     cactiveLimit = MemoryManager::s_cactiveLimitCeiling - headRoom;
 146   } else {
 147     cactiveLimit = std::numeric_limits<size_t>::max();
 148   }
 149 }
 150 #endif
 151
 152 static void* MemoryManagerInit() {
 153   // We store the free list pointers right at the start of each
 154   // object (overlapping whatever it's first word holds), and we also clobber
 155   // _count as a free-object flag when the object is deallocated. This
 156   // assert just makes sure they don't overflow.
 157   assert(FAST_REFCOUNT_OFFSET + sizeof(int) <=
 158     MemoryManager::smallSizeClass(1));
 159   MemoryManager::TlsWrapper tls;
 160   return (void*)tls.getNoCheck;
 161 }
 162
 163 void* MemoryManager::TlsInitSetup = MemoryManagerInit();
 164
 165 void MemoryManager::Create(void* storage) {
 166   new (storage) MemoryManager();
 167 }
 168
 169 void MemoryManager::Delete(MemoryManager* mm) {
 170   mm->~MemoryManager();
 171 }
 172
 173 void MemoryManager::OnThreadExit(MemoryManager* mm) {
 174   mm->~MemoryManager();
 175 }
 176
 177 MemoryManager::MemoryManager() {
 178 #ifdef USE_JEMALLOC
 179   threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
 180 #endif
 181   resetStatsImpl(true);
 182   setMemoryLimit(std::numeric_limits<int64_t>::max());
 183   resetGC(); // so each thread has unique req_num at startup
 184   // make the circular-lists empty.
 185   m_strings.next = m_strings.prev = &m_strings;
 186   m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
 187
 188   IniSetting::Bind(IniSetting::CORE, IniSetting::PHP_INI_ALL, "zend.enable_gc",
 189       &m_gc_enabled);
 190 }
 191
 192 MemoryManager::~MemoryManager() {
 193   if (debug) {
 194     // Check that every allocation in heap has been freed before destruction.
 195     forEachHeader([&](Header* h, size_t) {
 196       assert(h->kind() == HeaderKind::Free);
 197     });
 198   }
 199   // ~BigHeap releases its slabs/bigs.
 200 }
 201
 202 void MemoryManager::resetRuntimeOptions() {
 203   if (debug) {
 204     checkHeap("resetRuntimeOptions");
 205     // check that every allocation in heap has been freed before reset
 206     iterate([&](Header* h, size_t) {
 207       assert(h->kind() == HeaderKind::Free);
 208     });
 209   }
 210   MemoryManager::TlsWrapper::destroy(); // ~MemoryManager()
 211   MemoryManager::TlsWrapper::getCheck(); // new MemoryManager()
 212 }
 213
 214 void MemoryManager::resetStatsImpl(bool isInternalCall) {
 215 #ifdef USE_JEMALLOC
 216   FTRACE(1, "resetStatsImpl({}) pre:\n", isInternalCall);
 217   FTRACE(1, "usage: {}\ncapacity: {}\npeak usage: {}\npeak alloc: {}\n",
 218     m_stats.usage(), m_stats.capacity, m_stats.peakUsage,
 219     m_stats.peakCap);
 220   FTRACE(1, "total alloc: {}\nje alloc: {}\nje dealloc: {}\n",
 221     m_stats.totalAlloc, m_prevAllocated, m_prevDeallocated);
 222   FTRACE(1, "je debt: {}\n\n", m_stats.mallocDebt);
 223 #else
 224   FTRACE(1, "resetStatsImpl({}) pre:\n"
 225     "usage: {}\ncapacity: {}\npeak usage: {}\npeak capacity: {}\n\n",
 226     isInternalCall, m_stats.usage(), m_stats.capacity, m_stats.peakUsage,
 227     m_stats.peakCap);
 228 #endif
 229   if (isInternalCall) {
 230     m_statsIntervalActive = false;
 231     m_stats.mmUsage = 0;
 232     m_stats.auxUsage = 0;
 233     m_stats.capacity = 0;
 234     m_stats.peakUsage = 0;
 235     m_stats.peakCap = 0;
 236     m_stats.totalAlloc = 0;
 237     m_stats.peakIntervalUsage = 0;
 238     m_stats.peakIntervalCap = 0;
 239 #ifdef USE_JEMALLOC
 240     m_enableStatsSync = false;
 241   } else if (!m_enableStatsSync) {
 242 #else
 243   } else {
 244 #endif
 245     // This is only set by the jemalloc stats sync which we don't enable until
 246     // after this has been called.
 247     assert(m_stats.totalAlloc == 0);
 248
 249     // The effect of this call is simply to ignore anything we've done *outside*
 250     // the MemoryManager allocator after we initialized to avoid attributing
 251     // shared structure initialization that happens during hphp_thread_init()
 252     // to this session.
 253
 254     // We don't want to clear the other values because we do already have some
 255     // small-sized allocator usage and live slabs and wiping now will result in
 256     // negative values when we try to reconcile our accounting with jemalloc.
 257 #ifdef USE_JEMALLOC
 258     // Anything that was definitively allocated by the MemoryManager allocator
 259     // should be counted in this number even if we're otherwise zeroing out
 260     // the count for each thread.
 261     m_stats.totalAlloc = s_statsEnabled ? m_stats.mallocDebt : 0;
 262
 263     m_enableStatsSync = s_statsEnabled;
 264 #else
 265     m_stats.totalAlloc = 0;
 266 #endif
 267   }
 268 #ifdef USE_JEMALLOC
 269   if (s_statsEnabled) {
 270     m_stats.mallocDebt = 0;
 271     m_prevDeallocated = *m_deallocated;
 272     m_prevAllocated = *m_allocated;
 273   }
 274 #endif
 275 #ifdef USE_JEMALLOC
 276   FTRACE(1, "resetStatsImpl({}) post:\n", isInternalCall);
 277   FTRACE(1, "usage: {}\ncapacity: {}\npeak usage: {}\npeak capacity: {}\n",
 278     m_stats.usage(), m_stats.capacity, m_stats.peakUsage,
 279     m_stats.peakCap);
 280   FTRACE(1, "total alloc: {}\nje alloc: {}\nje dealloc: {}\n",
 281     m_stats.totalAlloc, m_prevAllocated, m_prevDeallocated);
 282   FTRACE(1, "je debt: {}\n\n", m_stats.mallocDebt);
 283 #else
 284   FTRACE(1, "resetStatsImpl({}) post:\n"
 285     "usage: {}\ncapacity: {}\npeak usage: {}\npeak capacity: {}\n\n",
 286     isInternalCall, m_stats.usage(), m_stats.capacity,
 287     m_stats.peakUsage, m_stats.peakCap);
 288 #endif
 289 }
 290
 291 void MemoryManager::refreshStatsHelperExceeded() {
 292   setSurpriseFlag(MemExceededFlag);
 293   m_couldOOM = false;
 294   if (RuntimeOption::LogNativeStackOnOOM) {
 295     log_native_stack("Exceeded memory limit");
 296   }
 297 }
 298
 299 void MemoryManager::setMemThresholdCallback(size_t threshold) {
 300   m_memThresholdCallbackPeakUsage = threshold;
 301 }
 302
 303 #ifdef USE_JEMALLOC
 304 void MemoryManager::refreshStatsHelperStop() {
 305   HttpServer::Server->stop();
 306   // Increase the limit to the maximum possible value, so that this method
 307   // won't be called again.
 308   m_cactiveLimit = std::numeric_limits<size_t>::max();
 309 }
 310 #endif
 311
 312 /*
 313  * Refresh stats to reflect directly malloc()ed memory, and determine
 314  * whether the request memory limit has been exceeded.
 315  *
 316  * The stats parameter allows the updates to be applied to either
 317  * m_stats as in refreshStats() or to a separate MemoryUsageStats
 318  * struct as in getStatsSafe().
 319  *
 320  * The template variable live controls whether or not MemoryManager
 321  * member variables are updated and whether or not to call helper
 322  * methods in response to memory anomalies.
 323  */
 324 template<bool live>
 325 void MemoryManager::refreshStatsImpl(MemoryUsageStats& stats) {
 326 #ifdef USE_JEMALLOC
 327   // Incrementally incorporate the difference between the previous and current
 328   // deltas into the memory usage statistic.  For reference, the total
 329   // malloced memory usage could be calculated as such, if delta0 were
 330   // recorded in resetStatsImpl():
 331   //
 332   //   int64 musage = delta - delta0;
 333   //
 334   // Note however, the slab allocator adds to m_stats.mallocDebt
 335   // when it calls malloc(), so that this function can avoid
 336   // double-counting the malloced memory. Thus musage in the example
 337   // code may well substantially exceed m_stats.usage.
 338   if (m_enableStatsSync) {
 339     uint64_t jeDeallocated = *m_deallocated;
 340     uint64_t jeAllocated = *m_allocated;
 341
 342     // We can't currently handle wrapping so make sure this isn't happening.
 343     assert(jeAllocated >= 0 &&
 344            jeAllocated <= std::numeric_limits<int64_t>::max());
 345     assert(jeDeallocated >= 0 &&
 346            jeDeallocated <= std::numeric_limits<int64_t>::max());
 347
 348     // This is the delta between the current and the previous jemalloc reading.
 349     int64_t jeMMDeltaAllocated =
 350       int64_t(jeAllocated) - int64_t(m_prevAllocated);
 351
 352     FTRACE(1, "Before stats sync:\n");
 353     FTRACE(1, "je alloc:\ncurrent: {}\nprevious: {}\ndelta with MM: {}\n",
 354       jeAllocated, m_prevAllocated, jeAllocated - m_prevAllocated);
 355     FTRACE(1, "je dealloc:\ncurrent: {}\nprevious: {}\ndelta with MM: {}\n",
 356       jeDeallocated, m_prevDeallocated, jeDeallocated - m_prevDeallocated);
 357     FTRACE(1, "usage: {}\ntotal (je) alloc: {}\nje debt: {}\n",
 358       stats.usage(), stats.totalAlloc, stats.mallocDebt);
 359
 360     // Since these deltas potentially include memory allocated from another
 361     // thread but deallocated on this one, it is possible for these numbers to
 362     // go negative.
 363     int64_t jeDeltaAllocated =
 364       int64_t(jeAllocated) - int64_t(jeDeallocated);
 365     int64_t mmDeltaAllocated =
 366       int64_t(m_prevAllocated) - int64_t(m_prevDeallocated);
 367     FTRACE(1, "je delta:\ncurrent: {}\nprevious: {}\n",
 368         jeDeltaAllocated, mmDeltaAllocated);
 369
 370     // Subtract the old jemalloc adjustment (delta0) and add the current one
 371     // (delta) to arrive at the new combined usage number.
 372     stats.auxUsage += jeDeltaAllocated - mmDeltaAllocated;
 373     // Remove the "debt" accrued from allocating the slabs so we don't double
 374     // count the slab-based allocations.
 375     stats.auxUsage -= stats.mallocDebt;
 376
 377     stats.mallocDebt = 0;
 378     // We need to do the calculation instead of just setting it to jeAllocated
 379     // because of the MaskAlloc capability.
 380     stats.totalAlloc += jeMMDeltaAllocated;
 381     if (live) {
 382       m_prevAllocated = jeAllocated;
 383       m_prevDeallocated = jeDeallocated;
 384     }
 385
 386     FTRACE(1, "After stats sync:\n");
 387     FTRACE(1, "usage: {}\ntotal (je) alloc: {}\n\n",
 388       stats.usage(), stats.totalAlloc);
 389   }
 390 #endif
 391   assert(stats.limit > 0);
 392   if (live && stats.usage() > stats.limit && m_couldOOM) {
 393     refreshStatsHelperExceeded();
 394   }
 395   if (stats.usage() > stats.peakUsage) {
 396     // Check whether the process's active memory limit has been exceeded, and
 397     // if so, stop the server.
 398     //
 399     // Only check whether the total memory limit was exceeded if this request
 400     // is at a new high water mark.  This check could be performed regardless
 401     // of this request's current memory usage (because other request threads
 402     // could be to blame for the increased memory usage), but doing so would
 403     // measurably increase computation for little benefit.
 404 #ifdef USE_JEMALLOC
 405     // (*m_cactive) consistency is achieved via atomic operations.  The fact
 406     // that we do not use an atomic operation here means that we could get a
 407     // stale read, but in practice that poses no problems for how we are
 408     // using the value.
 409     if (live && s_statsEnabled && *m_cactive > m_cactiveLimit) {
 410       refreshStatsHelperStop();
 411     }
 412 #endif
 413     if (live &&
 414         stats.usage() > m_memThresholdCallbackPeakUsage &&
 415         stats.peakUsage <= m_memThresholdCallbackPeakUsage) {
 416       setSurpriseFlag(MemThresholdFlag);
 417     }
 418
 419     stats.peakUsage = stats.usage();
 420   }
 421   if (live && m_statsIntervalActive) {
 422     stats.peakIntervalUsage = std::max(stats.peakIntervalUsage, stats.usage());
 423     stats.peakIntervalCap = std::max(stats.peakIntervalCap, stats.capacity);
 424   }
 425 }
 426
 427 template void MemoryManager::refreshStatsImpl<true>(MemoryUsageStats& stats);
 428 template void MemoryManager::refreshStatsImpl<false>(MemoryUsageStats& stats);
 429
 430 void MemoryManager::sweep() {
 431   assert(!sweeping());
 432   tl_sweeping = true;
 433   DEBUG_ONLY size_t num_sweepables = 0, num_natives = 0;
 434
 435   // iterate until both sweep lists are empty. Entries can be added or
 436   // removed from either list during sweeping.
 437   do {
 438     while (!m_sweepables.empty()) {
 439       num_sweepables++;
 440       auto obj = m_sweepables.next();
 441       obj->unregister();
 442       obj->sweep();
 443     }
 444     while (!m_natives.empty()) {
 445       num_natives++;
 446       assert(m_natives.back()->sweep_index == m_natives.size() - 1);
 447       auto node = m_natives.back();
 448       m_natives.pop_back();
 449       auto obj = Native::obj(node);
 450       auto ndi = obj->getVMClass()->getNativeDataInfo();
 451       ndi->sweep(obj);
 452       // trash the native data but leave the header and object parsable
 453       assert(memset(node+1, kSmallFreeFill, node->obj_offset - sizeof(*node)));
 454     }
 455   } while (!m_sweepables.empty());
 456
 457   DEBUG_ONLY auto napcs = m_apc_arrays.size();
 458   FTRACE(1, "sweep: sweepable {} native {} apc array {}\n",
 459          num_sweepables,
 460          num_natives,
 461          napcs);
 462
 463   // decref apc arrays referenced by this request.  This must happen here
 464   // (instead of in resetAllocator), because the sweep routine may use
 465   // g_context.
 466   while (!m_apc_arrays.empty()) {
 467     auto a = m_apc_arrays.back();
 468     m_apc_arrays.pop_back();
 469     a->sweep();
 470     if (debug) a->m_sweep_index = kInvalidSweepIndex;
 471   }
 472
 473   if (debug) checkHeap("after MM::sweep");
 474 }
 475
 476 void MemoryManager::resetAllocator() {
 477   assert(m_natives.empty() && m_sweepables.empty() && tl_sweeping);
 478   // decref apc strings referenced by this request
 479   DEBUG_ONLY auto nstrings = StringData::sweepAll();
 480
 481   // free the heap
 482   m_heap.reset();
 483
 484   // zero out freelists
 485   for (auto& list : m_freelists) list.head = nullptr;
 486   if (RuntimeOption::EvalQuarantine) {
 487     for (auto& list : m_quarantine) list.head = nullptr;
 488   }
 489   m_front = m_limit = 0;
 490   tl_sweeping = false;
 491   m_exiting = false;
 492   resetStatsImpl(true);
 493   setGCEnabled(RuntimeOption::EvalEnableGC);
 494   resetGC();
 495   FTRACE(1, "reset: strings {}\n", nstrings);
 496   if (debug) resetEagerGC();
 497 }
 498
 499 void MemoryManager::flush() {
 500   always_assert(empty());
 501   m_heap.flush();
 502   m_apc_arrays = std::vector<APCLocalArray*>();
 503   m_natives = std::vector<NativeNode*>();
 504   m_root_handles = std::vector<req::root_handle*>{};
 505 }
 506
 507 /*
 508  * req::malloc & friends implementation notes
 509  *
 510  * There are three kinds of allocations:
 511  *
 512  *  a) Big allocations.  (size >= kMaxSmallSize)
 513  *
 514  *     In this case we behave as a wrapper around the normal libc
 515  *     malloc/free.  We insert a MallocNode header at the front of the
 516  *     allocation in order to find these at sweep time (end of
 517  *     request) so we can give them back to libc.
 518  *
 519  *  b) Size-tracked small allocations.
 520  *
 521  *     This is used for the generic case, for callers who can't tell
 522  *     us the size of the allocation at free time.
 523  *
 524  *     In this situation, we put a MallocNode header at the front of
 525  *     the block that tells us the size for when we need to free it
 526  *     later.  We differentiate this from a MallocNode using the size
 527  *     field in either structure (they overlap at the same address).
 528  *
 529  *  c) Size-untracked small allocation
 530  *
 531  *     Many callers have an easy time telling you how big the object
 532  *     was when they need to free it.  In this case we can avoid the
 533  *     MallocNode, which saves us some memory and also let's us give
 534  *     out 16-byte aligned pointers easily.
 535  *
 536  *     We know when we have one of these because it has to be freed
 537  *     through a different entry point.  (E.g. MM().freeSmallSize() or
 538  *     MM().freeBigSize().)
 539  *
 540  * When small blocks are freed (case b and c), they're placed in the
 541  * appropriate size-segregated freelist.  Large blocks are immediately
 542  * passed back to libc via free.
 543  *
 544  * There are currently two kinds of freelist entries: entries where
 545  * there is already a valid MallocNode on the list (case b), and
 546  * entries where there isn't (case c).  The reason for this is that
 547  * that way, when allocating for case b, you don't need to store the
 548  * MallocNode size again.  Much of the heap is going through case b at
 549  * the time of this writing, so it is a measurable regression to try
 550  * to just combine the free lists, but presumably we can move more to
 551  * case c and combine the lists eventually.
 552  */
 553
 554 const std::array<char*,NumHeaderKinds> header_names = {{
 555   "PackedArray", "MixedArray", "EmptyArray", "ApcArray",
 556   "GlobalsArray", "ProxyArray", "DictArray", "VecArray", "KeysetArray",
 557   "String", "Resource", "Ref",
 558   "Object", "WaitHandle", "AsyncFuncWH", "AwaitAllWH", "Closure",
 559   "Vector", "Map", "Set", "Pair", "ImmVector", "ImmMap", "ImmSet",
 560   "AsyncFuncFrame", "NativeData", "ClosureHdr",
 561   "SmallMalloc", "BigMalloc", "BigObj",
 562   "Free", "Hole"
 563 }};
 564
 565 // initialize a Hole header in the unused memory between m_front and m_limit
 566 void MemoryManager::initHole(void* ptr, uint32_t size) {
 567   FreeNode::InitFrom(ptr, size, HeaderKind::Hole);
 568 }
 569
 570 void MemoryManager::initHole() {
 571   if ((char*)m_front < (char*)m_limit) {
 572     initHole(m_front, (char*)m_limit - (char*)m_front);
 573   }
 574 }
 575
 576 // initialize the FreeNode header on all freelist entries.
 577 void MemoryManager::initFree() {
 578   initHole();
 579   for (auto i = 0; i < kNumSmallSizes; i++) {
 580     auto size = smallIndex2Size(i);
 581     auto n = m_freelists[i].head;
 582     for (; n && n->kind() != HeaderKind::Free; n = n->next) {
 583       n->initHeader(HeaderKind::Free, size);
 584     }
 585     if (debug) {
 586       // ensure the freelist tail is already initialized.
 587       for (; n; n = n->next) {
 588         assert(n->kind() == HeaderKind::Free && n->size() == size);
 589       }
 590     }
 591   }
 592   m_heap.sortSlabs();
 593   m_heap.sortBigs();
 594 }
 595
 596 void MemoryManager::beginQuarantine() {
 597   std::swap(m_freelists, m_quarantine);
 598 }
 599
 600 // turn free blocks into holes, restore original freelists
 601 void MemoryManager::endQuarantine() {
 602   for (auto i = 0; i < kNumSmallSizes; i++) {
 603     auto size = smallIndex2Size(i);
 604     while (auto n = m_freelists[i].maybePop()) {
 605       memset(n, 0x8a, size);
 606       initHole(n, size);
 607     }
 608   }
 609   std::swap(m_freelists, m_quarantine);
 610 }
 611
 612 // test iterating objects in slabs
 613 void MemoryManager::checkHeap(const char* phase) {
 614   size_t bytes=0;
 615   std::vector<Header*> hdrs;
 616   PtrMap free_blocks, apc_arrays, apc_strings;
 617   size_t counts[NumHeaderKinds];
 618   for (unsigned i=0; i < NumHeaderKinds; i++) counts[i] = 0;
 619   forEachHeader([&](Header* h, size_t alloc_size) {
 620     hdrs.push_back(&*h);
 621     bytes += alloc_size;
 622     auto kind = h->kind();
 623     counts[(int)kind]++;
 624     switch (kind) {
 625       case HeaderKind::Free:
 626         free_blocks.insert(h, alloc_size);
 627         break;
 628       case HeaderKind::Apc:
 629         if (h->apc_.m_sweep_index != kInvalidSweepIndex) {
 630           apc_arrays.insert(h, alloc_size);
 631         }
 632         break;
 633       case HeaderKind::String:
 634         if (h->str_.isProxy()) apc_strings.insert(h, alloc_size);
 635         break;
 636       case HeaderKind::Packed:
 637       case HeaderKind::Mixed:
 638       case HeaderKind::Dict:
 639       case HeaderKind::Empty:
 640       case HeaderKind::VecArray:
 641       case HeaderKind::Keyset:
 642       case HeaderKind::Globals:
 643       case HeaderKind::Proxy:
 644       case HeaderKind::Object:
 645       case HeaderKind::WaitHandle:
 646       case HeaderKind::AsyncFuncWH:
 647       case HeaderKind::AwaitAllWH:
 648       case HeaderKind::Closure:
 649       case HeaderKind::Vector:
 650       case HeaderKind::Map:
 651       case HeaderKind::Set:
 652       case HeaderKind::Pair:
 653       case HeaderKind::ImmVector:
 654       case HeaderKind::ImmMap:
 655       case HeaderKind::ImmSet:
 656       case HeaderKind::Resource:
 657       case HeaderKind::Ref:
 658       case HeaderKind::AsyncFuncFrame:
 659       case HeaderKind::NativeData:
 660       case HeaderKind::ClosureHdr:
 661       case HeaderKind::SmallMalloc:
 662       case HeaderKind::BigMalloc:
 663         break;
 664       case HeaderKind::BigObj:
 665       case HeaderKind::Hole:
 666         assert(false && "forEachHeader skips these kinds");
 667         break;
 668     }
 669   });
 670
 671   // check the free lists
 672   free_blocks.prepare();
 673   size_t num_free_blocks = 0;
 674   for (auto i = 0; i < kNumSmallSizes; i++) {
 675     for (auto n = m_freelists[i].head; n; n = n->next) {
 676       assert(free_blocks.isHeader(n));
 677       ++num_free_blocks;
 678     }
 679   }
 680   assert(num_free_blocks == free_blocks.size());
 681
 682   // check the apc array list
 683   assert(apc_arrays.size() == m_apc_arrays.size());
 684   apc_arrays.prepare();
 685   for (UNUSED auto a : m_apc_arrays) {
 686     assert(apc_arrays.isHeader(a));
 687   }
 688
 689   // check the apc string list
 690   size_t num_apc_strings = 0;
 691   apc_strings.prepare();
 692   for (StringDataNode *next, *n = m_strings.next; n != &m_strings; n = next) {
 693     next = n->next;
 694     UNUSED auto const s = StringData::node2str(n);
 695     assert(s->isProxy());
 696     assert(apc_strings.isHeader(s));
 697     ++num_apc_strings;
 698   }
 699   assert(num_apc_strings == apc_strings.size());
 700
 701   // heap check is done. If we are not exiting, check pointers using HeapGraph
 702   if (Trace::moduleEnabled(Trace::heapreport)) {
 703     auto g = makeHeapGraph(true /* include free blocks */);
 704     if (!exiting()) checkPointers(g, phase);
 705     if (Trace::moduleEnabled(Trace::heapreport, 2)) {
 706       printHeapReport(g, phase);
 707     }
 708   }
 709 }
 710
 711 /*
 712  * Store slab tail bytes (if any) in freelists.
 713  */
 714 inline void MemoryManager::storeTail(void* tail, uint32_t tailBytes) {
 715   void* rem = tail;
 716   for (uint32_t remBytes = tailBytes; remBytes > 0;) {
 717     uint32_t fragBytes = remBytes;
 718     assert(fragBytes >= kSmallSizeAlign);
 719     assert((fragBytes & kSmallSizeAlignMask) == 0);
 720     unsigned fragInd = smallSize2Index(fragBytes + 1) - 1;
 721     uint32_t fragUsable = smallIndex2Size(fragInd);
 722     auto frag = FreeNode::InitFrom((char*)rem + remBytes - fragUsable,
 723                                    fragUsable, HeaderKind::Hole);
 724     FTRACE(4, "MemoryManager::storeTail({}, {}): rem={}, remBytes={}, "
 725               "frag={}, fragBytes={}, fragUsable={}, fragInd={}\n", tail,
 726               (void*)uintptr_t(tailBytes), rem, (void*)uintptr_t(remBytes),
 727               frag, (void*)uintptr_t(fragBytes), (void*)uintptr_t(fragUsable),
 728               fragInd);
 729     m_freelists[fragInd].push(frag, fragUsable);
 730     remBytes -= fragUsable;
 731   }
 732 }
 733
 734 /*
 735  * Create nSplit contiguous regions and store them in the appropriate freelist.
 736  */
 737 inline void MemoryManager::splitTail(void* tail, uint32_t tailBytes,
 738                                      unsigned nSplit, uint32_t splitUsable,
 739                                      unsigned splitInd) {
 740   assert(tailBytes >= kSmallSizeAlign);
 741   assert((tailBytes & kSmallSizeAlignMask) == 0);
 742   assert((splitUsable & kSmallSizeAlignMask) == 0);
 743   assert(nSplit * splitUsable <= tailBytes);
 744   assert(splitUsable == smallIndex2Size(splitInd));
 745   for (uint32_t i = nSplit; i--;) {
 746     auto split = FreeNode::InitFrom((char*)tail + i * splitUsable,
 747                                     splitUsable, HeaderKind::Hole);
 748     FTRACE(4, "MemoryManager::splitTail(tail={}, tailBytes={}, tailPast={}): "
 749               "split={}, splitUsable={}, splitInd={}\n", tail,
 750               (void*)uintptr_t(tailBytes), (void*)(uintptr_t(tail) + tailBytes),
 751               split, splitUsable, splitInd);
 752     m_freelists[splitInd].push(split, splitUsable);
 753   }
 754   void* rem = (void*)(uintptr_t(tail) + nSplit * splitUsable);
 755   assert(tailBytes >= nSplit * splitUsable);
 756   uint32_t remBytes = tailBytes - nSplit * splitUsable;
 757   assert(uintptr_t(rem) + remBytes == uintptr_t(tail) + tailBytes);
 758   storeTail(rem, remBytes);
 759 }
 760
 761 /*
 762  * Get a new slab, then allocate nbytes from it and install it in our
 763  * slab list.  Return the newly allocated nbytes-sized block.
 764  */
 765 NEVER_INLINE void* MemoryManager::newSlab(uint32_t nbytes) {
 766   if (UNLIKELY(m_stats.usage() > m_stats.limit)) {
 767     refreshStats();
 768   }
 769   requestGC();
 770   storeTail(m_front, (char*)m_limit - (char*)m_front);
 771   auto slab = m_heap.allocSlab(kSlabSize);
 772   assert((uintptr_t(slab.ptr) & kSmallSizeAlignMask) == 0);
 773   m_stats.mallocDebt += slab.size;
 774   m_stats.capacity += slab.size;
 775   m_stats.peakCap = std::max(m_stats.peakCap, m_stats.capacity);
 776   m_front = (void*)(uintptr_t(slab.ptr) + nbytes);
 777   m_limit = (void*)(uintptr_t(slab.ptr) + slab.size);
 778   FTRACE(3, "newSlab: adding slab at {} to limit {}\n", slab.ptr, m_limit);
 779   return slab.ptr;
 780 }
 781
 782 /*
 783  * Allocate `bytes' from the current slab, aligned to kSmallSizeAlign.
 784  */
 785 inline void* MemoryManager::slabAlloc(uint32_t bytes, unsigned index) {
 786   FTRACE(3, "slabAlloc({}, {}): m_front={}, m_limit={}\n", bytes, index,
 787             m_front, m_limit);
 788   uint32_t nbytes = smallIndex2Size(index);
 789
 790   assert(bytes <= nbytes);
 791   assert(nbytes <= kSlabSize);
 792   assert((nbytes & kSmallSizeAlignMask) == 0);
 793   assert((uintptr_t(m_front) & kSmallSizeAlignMask) == 0);
 794
 795   if (UNLIKELY(m_bypassSlabAlloc)) {
 796     // Stats correction; mallocBigSize() pulls stats from jemalloc.
 797     m_stats.mmUsage -= bytes;
 798     return mallocBigSize<FreeRequested>(nbytes).ptr;
 799   }
 800
 801   void* ptr = m_front;
 802   {
 803     void* next = (void*)(uintptr_t(ptr) + nbytes);
 804     if (uintptr_t(next) <= uintptr_t(m_limit)) {
 805       m_front = next;
 806     } else {
 807       ptr = newSlab(nbytes);
 808     }
 809   }
 810   // Preallocate more of the same in order to amortize entry into this method.
 811   unsigned nSplit = kNContigTab[index] - 1;
 812   uintptr_t avail = uintptr_t(m_limit) - uintptr_t(m_front);
 813   if (UNLIKELY(nSplit * nbytes > avail)) {
 814     nSplit = avail / nbytes; // Expensive division.
 815   }
 816   if (nSplit > 0) {
 817     void* tail = m_front;
 818     uint32_t tailBytes = nSplit * nbytes;
 819     m_front = (void*)(uintptr_t(m_front) + tailBytes);
 820     splitTail(tail, tailBytes, nSplit, nbytes, index);
 821   }
 822   FTRACE(4, "slabAlloc({}, {}) --> ptr={}, m_front={}, m_limit={}\n", bytes,
 823             index, ptr, m_front, m_limit);
 824   return ptr;
 825 }
 826
 827 void* MemoryManager::mallocSmallSizeSlow(uint32_t bytes, unsigned index) {
 828   size_t nbytes = smallIndex2Size(index);
 829   unsigned nContig = kNContigTab[index];
 830   size_t contigMin = nContig * nbytes;
 831   unsigned contigInd = smallSize2Index(contigMin);
 832   for (unsigned i = contigInd; i < kNumSmallSizes; ++i) {
 833     FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}-->{}, {}): contigMin={}, "
 834               "contigInd={}, try i={}\n", bytes, nbytes, index, contigMin,
 835               contigInd, i);
 836     void* p = m_freelists[i].maybePop();
 837     if (p != nullptr) {
 838       FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}-->{}, {}): "
 839                 "contigMin={}, contigInd={}, use i={}, size={}, p={}\n", bytes,
 840                 nbytes, index, contigMin, contigInd, i, smallIndex2Size(i),
 841                 p);
 842       // Split tail into preallocations and store them back into freelists.
 843       uint32_t availBytes = smallIndex2Size(i);
 844       uint32_t tailBytes = availBytes - nbytes;
 845       if (tailBytes > 0) {
 846         void* tail = (void*)(uintptr_t(p) + nbytes);
 847         splitTail(tail, tailBytes, nContig - 1, nbytes, index);
 848       }
 849       return p;
 850     }
 851   }
 852
 853   // No available free list items; carve new space from the current slab.
 854   return slabAlloc(bytes, index);
 855 }
 856
 857 inline void MemoryManager::updateBigStats() {
 858   // If we are using jemalloc, it is keeping track of allocations outside of
 859   // the slabs and the usage so we should force this after an allocation that
 860   // was too large for one of the existing slabs. When we're not using jemalloc
 861   // this check won't do anything so avoid the extra overhead.
 862   if (debug) requestEagerGC();
 863   if (use_jemalloc || UNLIKELY(m_stats.usage() > m_stats.limit)) {
 864     refreshStats();
 865   }
 866 }
 867
 868 template<MemoryManager::MBS Mode> NEVER_INLINE
 869 MemBlock MemoryManager::mallocBigSize(size_t bytes, HeaderKind kind,
 870                                       type_scan::Index ty) {
 871   if (debug) MM().requestEagerGC();
 872   auto block = Mode == ZeroFreeActual ? m_heap.callocBig(bytes, kind, ty) :
 873                m_heap.allocBig(bytes, kind, ty);
 874   // NB: We don't report the SweepNode size in the stats.
 875   auto const delta = Mode == FreeRequested ? bytes : block.size;
 876   m_stats.mmUsage += delta;
 877   // Adjust jemalloc otherwise we'll double count the direct allocation.
 878   m_stats.mallocDebt += delta;
 879   m_stats.capacity += block.size + sizeof(MallocNode);
 880   updateBigStats();
 881   FTRACE(3, "mallocBigSize: {} ({} requested, {} usable)\n",
 882          block.ptr, bytes, block.size);
 883   return block;
 884 }
 885
 886 template NEVER_INLINE
 887 MemBlock MemoryManager::mallocBigSize<MemoryManager::FreeRequested>(
 888     size_t, HeaderKind, type_scan::Index
 889 );
 890 template NEVER_INLINE
 891 MemBlock MemoryManager::mallocBigSize<MemoryManager::FreeActual>(
 892     size_t, HeaderKind, type_scan::Index
 893 );
 894 template NEVER_INLINE
 895 MemBlock MemoryManager::mallocBigSize<MemoryManager::ZeroFreeActual>(
 896     size_t, HeaderKind, type_scan::Index
 897 );
 898
 899 MemBlock MemoryManager::resizeBig(MallocNode* n, size_t nbytes) {
 900   auto old_size = n->nbytes - sizeof(MallocNode);
 901   auto block = m_heap.resizeBig(n + 1, nbytes);
 902   m_stats.mmUsage += block.size - old_size;
 903   m_stats.mallocDebt += block.size - old_size;
 904   m_stats.capacity += block.size - old_size;
 905   updateBigStats();
 906   return block;
 907 }
 908
 909 NEVER_INLINE
 910 void MemoryManager::freeBigSize(void* vp, size_t bytes) {
 911   // Since we account for these direct allocations in our usage and adjust for
 912   // them on allocation, we also need to adjust for them negatively on free.
 913   m_stats.mmUsage -= bytes;
 914   m_stats.mallocDebt -= bytes;
 915   auto actual = static_cast<MallocNode*>(vp)[-1].nbytes;
 916   assert(bytes <= actual);
 917   m_stats.capacity -= actual;
 918   FTRACE(3, "freeBigSize: {} ({} bytes)\n", vp, bytes);
 919   m_heap.freeBig(vp);
 920 }
 921
 922 // req::malloc api entry points, with support for malloc/free corner cases.
 923 namespace req {
 924
 925 template<bool zero>
 926 static void* allocate(size_t nbytes, type_scan::Index ty) {
 927   nbytes = std::max(nbytes, size_t(1));
 928   auto const npadded = nbytes + sizeof(MallocNode);
 929   if (LIKELY(npadded <= kMaxSmallSize)) {
 930     auto const ptr = static_cast<MallocNode*>(MM().mallocSmallSize(npadded));
 931     ptr->nbytes = npadded;
 932     ptr->initHeader(ty, HeaderKind::SmallMalloc, 0);
 933     return zero ? memset(ptr + 1, 0, nbytes) : ptr + 1;
 934   }
 935   auto constexpr mode = zero ? MemoryManager::ZeroFreeActual :
 936                         MemoryManager::FreeActual;
 937   auto block = MM().mallocBigSize<mode>(nbytes, HeaderKind::BigMalloc, ty);
 938   return block.ptr;
 939 }
 940
 941 void* malloc(size_t nbytes, type_scan::Index tyindex) {
 942   return allocate<false>(nbytes, tyindex);
 943 }
 944
 945 void* calloc(size_t count, size_t nbytes, type_scan::Index tyindex) {
 946   return allocate<true>(count * nbytes, tyindex);
 947 }
 948
 949 void* realloc(void* ptr, size_t nbytes, type_scan::Index tyindex) {
 950   // first handle corner cases that degenerate to malloc() or free()
 951   if (!ptr) {
 952     return req::malloc(nbytes, tyindex);
 953   }
 954   if (!nbytes) {
 955     req::free(ptr);
 956     return nullptr;
 957   }
 958   FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
 959          ptr, nbytes, tyindex);
 960   auto const n = static_cast<MallocNode*>(ptr) - 1;
 961   if (LIKELY(n->nbytes <= kMaxSmallSize)) {
 962     // old block was small, cannot resize.
 963     auto newmem = req::malloc(nbytes, tyindex);
 964     auto copy_size = std::min(n->nbytes - sizeof(MallocNode), nbytes);
 965     newmem = memcpy(newmem, ptr, copy_size);
 966     MM().freeSmallSize(n, n->nbytes);
 967     return newmem;
 968   }
 969   // Ok, it's a big allocation.
 970   auto block = MM().resizeBig(n, nbytes);
 971   return block.ptr;
 972 }
 973
 974 char* strndup(const char* str, size_t len) {
 975   size_t n = std::min(len, strlen(str));
 976   char* ret = reinterpret_cast<char*>(
 977     req::malloc(n + 1, type_scan::kIndexUnknownNoPtrs)
 978   );
 979   memcpy(ret, str, n);
 980   ret[n] = 0;
 981   return ret;
 982 }
 983
 984 void free(void* ptr) {
 985   if (!ptr) return;
 986   auto const n = static_cast<MallocNode*>(ptr) - 1;
 987   if (LIKELY(n->nbytes <= kMaxSmallSize)) {
 988     return MM().freeSmallSize(n, n->nbytes);
 989   }
 990   MM().freeBigSize(ptr, n->nbytes - sizeof(MallocNode));
 991 }
 992
 993 } // namespace req
 994
 995 //////////////////////////////////////////////////////////////////////
 996
 997 void MemoryManager::addNativeObject(NativeNode* node) {
 998   if (debug) for (DEBUG_ONLY auto n : m_natives) assert(n != node);
 999   node->sweep_index = m_natives.size();
1000   m_natives.push_back(node);
1001 }
1002
1003 void MemoryManager::removeNativeObject(NativeNode* node) {
1004   assert(node->sweep_index < m_natives.size());
1005   assert(m_natives[node->sweep_index] == node);
1006   auto index = node->sweep_index;
1007   auto last = m_natives.back();
1008   m_natives[index] = last;
1009   m_natives.pop_back();
1010   last->sweep_index = index;
1011 }
1012
1013 void MemoryManager::addApcArray(APCLocalArray* a) {
1014   a->m_sweep_index = m_apc_arrays.size();
1015   m_apc_arrays.push_back(a);
1016 }
1017
1018 void MemoryManager::removeApcArray(APCLocalArray* a) {
1019   assert(a->m_sweep_index < m_apc_arrays.size());
1020   assert(m_apc_arrays[a->m_sweep_index] == a);
1021   auto index = a->m_sweep_index;
1022   auto last = m_apc_arrays.back();
1023   m_apc_arrays[index] = last;
1024   m_apc_arrays.pop_back();
1025   last->m_sweep_index = index;
1026 }
1027
1028 void MemoryManager::addSweepable(Sweepable* obj) {
1029   obj->enlist(&m_sweepables);
1030 }
1031
1032 // defined here because memory-manager.h includes sweepable.h
1033 Sweepable::Sweepable() {
1034   MM().addSweepable(this);
1035 }
1036
1037 //////////////////////////////////////////////////////////////////////
1038
1039 void MemoryManager::resetCouldOOM(bool state) {
1040   clearSurpriseFlag(MemExceededFlag);
1041   m_couldOOM = state;
1042 }
1043
1044
1045 ///////////////////////////////////////////////////////////////////////////////
1046 // Request profiling.
1047
1048 bool MemoryManager::triggerProfiling(const std::string& filename) {
1049   auto trigger = new ReqProfContext();
1050   trigger->flag = true;
1051   trigger->filename = filename;
1052
1053   ReqProfContext* expected = nullptr;
1054
1055   if (!s_trigger.compare_exchange_strong(expected, trigger)) {
1056     delete trigger;
1057     return false;
1058   }
1059   return true;
1060 }
1061
1062 void MemoryManager::requestInit() {
1063   MM().m_req_start_micros = HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
1064
1065   // If the trigger has already been claimed, do nothing.
1066   auto trigger = s_trigger.exchange(nullptr);
1067   if (trigger == nullptr) return;
1068
1069   always_assert(MM().empty());
1070
1071   // Initialize the request-local context from the trigger.
1072   auto& profctx = MM().m_profctx;
1073   assert(!profctx.flag);
1074
1075   MM().m_bypassSlabAlloc = true;
1076   profctx = *trigger;
1077   delete trigger;
1078
1079 #ifdef USE_JEMALLOC
1080   // Reset jemalloc stats.
1081   if (mallctlCall("prof.reset", true) != 0) {
1082     return;
1083   }
1084
1085   // Enable jemalloc thread-local heap dumps.
1086   if (mallctlReadWrite("prof.active", &profctx.prof_active, true, true)
1087       != 0) {
1088     profctx = ReqProfContext{};
1089     return;
1090   }
1091   if (mallctlReadWrite("thread.prof.active", &profctx.thread_prof_active,
1092                        true, true) != 0) {
1093     mallctlWrite("prof.active", profctx.prof_active);
1094     profctx = ReqProfContext{};
1095     return;
1096   }
1097 #endif
1098 }
1099
1100 void MemoryManager::requestShutdown() {
1101   auto& profctx = MM().m_profctx;
1102
1103   if (!profctx.flag) return;
1104
1105 #ifdef USE_JEMALLOC
1106   jemalloc_pprof_dump(profctx.filename, true);
1107
1108   mallctlWrite("thread.prof.active", profctx.thread_prof_active);
1109   mallctlWrite("prof.active", profctx.prof_active);
1110 #endif
1111
1112   MM().m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1113   MM().m_memThresholdCallbackPeakUsage = SIZE_MAX;
1114   profctx = ReqProfContext{};
1115 }
1116
1117 /* static */ void MemoryManager::setupProfiling() {
1118   always_assert(MM().empty());
1119   MM().m_bypassSlabAlloc = true;
1120 }
1121
1122 /* static */ void MemoryManager::teardownProfiling() {
1123   MM().m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1124 }
1125
1126 bool MemoryManager::isGCEnabled() {
1127   return m_gc_enabled;
1128 }
1129
1130 void MemoryManager::setGCEnabled(bool isGCEnabled) {
1131   m_gc_enabled = isGCEnabled;
1132 }
1133
1134 ///////////////////////////////////////////////////////////////////////////////
1135
1136 void BigHeap::reset() {
1137   TRACE(1, "BigHeap-reset: slabs %lu bigs %lu\n", m_slabs.size(),
1138         m_bigs.size());
1139 #ifdef USE_JEMALLOC
1140   auto do_free = [&](void* ptr) { dallocx(ptr, 0); };
1141 #else
1142   auto do_free = [&](void* ptr) { free(ptr); };
1143 #endif
1144   for (auto slab : m_slabs) do_free(slab.ptr);
1145   m_slabs.clear();
1146   for (auto n : m_bigs) do_free(n);
1147   m_bigs.clear();
1148 }
1149
1150 void BigHeap::flush() {
1151   assert(empty());
1152   m_slabs = std::vector<MemBlock>{};
1153   m_bigs = std::vector<MallocNode*>{};
1154 }
1155
1156 MemBlock BigHeap::allocSlab(size_t size) {
1157 #ifdef USE_JEMALLOC
1158   void* slab = mallocx(size, 0);
1159 #else
1160   void* slab = safe_malloc(size);
1161 #endif
1162   m_slabs.push_back({slab, size});
1163   return {slab, size};
1164 }
1165
1166 void BigHeap::enlist(MallocNode* n, HeaderKind kind,
1167                      size_t size, type_scan::Index tyindex) {
1168   n->initHeader(tyindex, kind, m_bigs.size());
1169   n->nbytes = size;
1170   m_bigs.push_back(n);
1171 }
1172
1173 MemBlock BigHeap::allocBig(size_t bytes, HeaderKind kind,
1174                            type_scan::Index tyindex) {
1175 #ifdef USE_JEMALLOC
1176   auto n = static_cast<MallocNode*>(mallocx(bytes + sizeof(MallocNode), 0));
1177   auto cap = sallocx(n, 0);
1178 #else
1179   auto cap = bytes + sizeof(MallocNode);
1180   auto n = static_cast<MallocNode*>(safe_malloc(cap));
1181 #endif
1182   enlist(n, kind, cap, tyindex);
1183   return {n + 1, cap - sizeof(MallocNode)};
1184 }
1185
1186 MemBlock BigHeap::callocBig(size_t nbytes, HeaderKind kind,
1187                             type_scan::Index tyindex) {
1188 #ifdef USE_JEMALLOC
1189   auto n = static_cast<MallocNode*>(
1190       mallocx(nbytes + sizeof(MallocNode), MALLOCX_ZERO)
1191   );
1192   auto cap = sallocx(n, 0);
1193 #else
1194   auto cap = nbytes + sizeof(MallocNode);
1195   auto const n = static_cast<MallocNode*>(safe_calloc(cap, 1));
1196 #endif
1197   enlist(n, kind, cap, tyindex);
1198   return {n + 1, nbytes};
1199 }
1200
1201 bool BigHeap::contains(void* ptr) const {
1202   auto const ptrInt = reinterpret_cast<uintptr_t>(ptr);
1203   auto it = std::find_if(std::begin(m_slabs), std::end(m_slabs),
1204     [&] (MemBlock slab) {
1205       auto const baseInt = reinterpret_cast<uintptr_t>(slab.ptr);
1206       return ptrInt >= baseInt && ptrInt < baseInt + slab.size;
1207     }
1208   );
1209   return it != std::end(m_slabs);
1210 }
1211
1212 void BigHeap::freeBig(void* ptr) {
1213   auto n = static_cast<MallocNode*>(ptr) - 1;
1214   auto i = n->index();
1215   auto last = m_bigs.back();
1216   last->index() = i;
1217   m_bigs[i] = last;
1218   m_bigs.pop_back();
1219 #ifdef USE_JEMALLOC
1220   dallocx(n, 0);
1221 #else
1222   free(n);
1223 #endif
1224 }
1225
1226 MemBlock BigHeap::resizeBig(void* ptr, size_t newsize) {
1227   // Since we don't know how big it is (i.e. how much data we should memcpy),
1228   // we have no choice but to ask malloc to realloc for us.
1229   auto const n = static_cast<MallocNode*>(ptr) - 1;
1230 #ifdef USE_JEMALLOC
1231   auto const newNode = static_cast<MallocNode*>(
1232     rallocx(n, newsize + sizeof(MallocNode), 0)
1233   );
1234   newNode->nbytes = sallocx(newNode, 0);
1235 #else
1236   auto const newNode = static_cast<MallocNode*>(
1237     safe_realloc(n, newsize + sizeof(MallocNode))
1238   );
1239   newNode->nbytes = newsize + sizeof(MallocNode);
1240 #endif
1241   if (newNode != n) {
1242     m_bigs[newNode->index()] = newNode;
1243   }
1244   return {newNode + 1, newsize};
1245 }
1246
1247 void BigHeap::sortSlabs() {
1248   std::sort(std::begin(m_slabs), std::end(m_slabs),
1249     [] (const MemBlock& l, const MemBlock& r) {
1250       assertx(static_cast<char*>(l.ptr) + l.size <= r.ptr ||
1251               static_cast<char*>(r.ptr) + r.size <= l.ptr);
1252       return l.ptr < r.ptr;
1253     }
1254   );
1255 }
1256
1257 void BigHeap::sortBigs() {
1258   std::sort(std::begin(m_bigs), std::end(m_bigs));
1259   for (size_t i = 0, n = m_bigs.size(); i < n; ++i) {
1260     m_bigs[i]->index() = i;
1261   }
1262 }
1263
1264 /*
1265  * To find `p', we sort the slabs, bisect them, then iterate the slab
1266  * containing `p'.  If there is no such slab, we bisect the bigs to try to find
1267  * a big containing `p'.
1268  *
1269  * If that fails, we return nullptr.
1270  */
1271 Header* BigHeap::find(const void* p) {
1272   sortSlabs();
1273   auto const slab = std::lower_bound(
1274     std::begin(m_slabs), std::end(m_slabs), p,
1275     [] (const MemBlock& slab, const void* p) {
1276       return static_cast<const char*>(slab.ptr) + slab.size <= p;
1277     }
1278   );
1279
1280   if (slab != std::end(m_slabs) && slab->ptr <= p) {
1281     // std::lower_bound() finds the first slab that is not less than `p'.  By
1282     // our comparison predicate, a slab is less than `p' iff its entire range
1283     // is below `p', so if the returned slab's start address is <= `p', then
1284     // the slab must contain `p'.  Within the slab, we just do a linear search.
1285     auto const slab_end = static_cast<char*>(slab->ptr) + slab->size;
1286     auto h = reinterpret_cast<char*>(slab->ptr);
1287     while (h < slab_end) {
1288       auto const hdr = reinterpret_cast<Header*>(h);
1289       auto const size = hdr->allocSize();
1290       if (p < h + size) return hdr;
1291       h += size;
1292     }
1293     // We know `p' is in the slab, so it must belong to one of the headers.
1294     always_assert(false);
1295   }
1296
1297   sortBigs();
1298
1299   auto const big = std::lower_bound(
1300     std::begin(m_bigs), std::end(m_bigs), p,
1301     [] (const MallocNode* big, const void* p) {
1302       return reinterpret_cast<const char*>(big) + big->nbytes <= p;
1303     }
1304   );
1305
1306   if (big != std::end(m_bigs) && *big <= p) {
1307     auto const hdr = reinterpret_cast<Header*>(*big);
1308     if (hdr->kind() != HeaderKind::BigObj) {
1309       // `p' is part of the MallocNode.
1310       return hdr;
1311     }
1312     auto const sub = reinterpret_cast<Header*>(*big + 1);
1313     return p >= sub ? sub : hdr;
1314   }
1315   return nullptr;
1316 }
1317
1318 ///////////////////////////////////////////////////////////////////////////////
1319 }