hphp/runtime/base/memory-manager.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/base/memory-manager.h"
  17
  18 #include <algorithm>
  19 #include <cstdint>
  20 #include <limits>
  21 #include <sys/mman.h>
  22 #include <unistd.h>
  23
  24 #include "hphp/runtime/base/builtin-functions.h"
  25 #include "hphp/runtime/base/memory-profile.h"
  26 #include "hphp/runtime/base/runtime-option.h"
  27 #include "hphp/runtime/base/stack-logger.h"
  28 #include "hphp/runtime/base/surprise-flags.h"
  29 #include "hphp/runtime/base/sweepable.h"
  30 #include "hphp/runtime/base/thread-info.h"
  31 #include "hphp/runtime/server/http-server.h"
  32
  33 #include "hphp/util/alloc.h"
  34 #include "hphp/util/logger.h"
  35 #include "hphp/util/process.h"
  36 #include "hphp/util/trace.h"
  37
  38 #include <folly/ScopeGuard.h>
  39 #include "hphp/runtime/base/memory-manager-defs.h"
  40
  41 namespace HPHP {
  42
  43 TRACE_SET_MOD(smartalloc);
  44
  45 //////////////////////////////////////////////////////////////////////
  46
  47 std::atomic<MemoryManager::ReqProfContext*>
  48   MemoryManager::s_trigger{nullptr};
  49
  50 // generate mmap flags for contiguous heap
  51 uint32_t getRequestHeapFlags() {
  52   struct stat buf;
  53
  54   // check if MAP_UNITIALIZED is supported
  55   auto mapUninitializedSupported =
  56     (stat("/sys/kernel/debug/fb_map_uninitialized", &buf) == 0);
  57   auto mmapFlags = MAP_NORESERVE | MAP_ANON | MAP_PRIVATE;
  58
  59   /* Check whether mmap(2) supports the MAP_UNINITIALIZED flag. */
  60  if (mapUninitializedSupported) {
  61     mmapFlags |= MAP_UNINITIALIZED;
  62   }
  63  return mmapFlags;
  64 }
  65
  66 static auto s_mmapFlags = getRequestHeapFlags();
  67
  68 #ifdef USE_JEMALLOC
  69 bool MemoryManager::s_statsEnabled = false;
  70 size_t MemoryManager::s_cactiveLimitCeiling = 0;
  71
  72 static size_t threadAllocatedpMib[2];
  73 static size_t threadDeallocatedpMib[2];
  74 static size_t statsCactiveMib[2];
  75 static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
  76
  77 void MemoryManager::threadStatsInit() {
  78   if (!mallctlnametomib) return;
  79   size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
  80   if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
  81     return;
  82   }
  83   miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
  84   if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
  85     return;
  86   }
  87   miblen = sizeof(statsCactiveMib) / sizeof(size_t);
  88   if (mallctlnametomib("stats.cactive", statsCactiveMib, &miblen)) {
  89     return;
  90   }
  91   MemoryManager::s_statsEnabled = true;
  92
  93   // In threadStats() we wish to solve for cactiveLimit in:
  94   //
  95   //   footprint + cactiveLimit + headRoom == MemTotal
  96   //
  97   // However, headRoom comes from RuntimeOption::ServerMemoryHeadRoom, which
  98   // isn't initialized until after the code here runs.  Therefore, compute
  99   // s_cactiveLimitCeiling here in order to amortize the cost of introspecting
 100   // footprint and MemTotal.
 101   //
 102   //   cactiveLimit == (MemTotal - footprint) - headRoom
 103   //
 104   //   cactiveLimit == s_cactiveLimitCeiling - headRoom
 105   // where
 106   //   s_cactiveLimitCeiling == MemTotal - footprint
 107   size_t footprint = Process::GetCodeFootprint(Process::GetProcessId());
 108   size_t MemTotal  = 0;
 109 #ifndef __APPLE__
 110   size_t pageSize = size_t(sysconf(_SC_PAGESIZE));
 111   MemTotal = size_t(sysconf(_SC_PHYS_PAGES)) * pageSize;
 112 #else
 113   int mib[2] = { CTL_HW, HW_MEMSIZE };
 114   u_int namelen = sizeof(mib) / sizeof(mib[0]);
 115   size_t len = sizeof(MemTotal);
 116   sysctl(mib, namelen, &MemTotal, &len, nullptr, 0);
 117 #endif
 118   if (MemTotal > footprint) {
 119     MemoryManager::s_cactiveLimitCeiling = MemTotal - footprint;
 120   }
 121 }
 122
 123 inline
 124 void MemoryManager::threadStats(uint64_t*& allocated, uint64_t*& deallocated,
 125                                 size_t*& cactive, size_t& cactiveLimit) {
 126   pthread_once(&threadStatsOnce, threadStatsInit);
 127   if (!MemoryManager::s_statsEnabled) return;
 128
 129   size_t len = sizeof(allocated);
 130   if (mallctlbymib(threadAllocatedpMib,
 131                    sizeof(threadAllocatedpMib) / sizeof(size_t),
 132                    &allocated, &len, nullptr, 0)) {
 133     not_reached();
 134   }
 135
 136   len = sizeof(deallocated);
 137   if (mallctlbymib(threadDeallocatedpMib,
 138                    sizeof(threadDeallocatedpMib) / sizeof(size_t),
 139                    &deallocated, &len, nullptr, 0)) {
 140     not_reached();
 141   }
 142
 143   len = sizeof(cactive);
 144   if (mallctlbymib(statsCactiveMib,
 145                    sizeof(statsCactiveMib) / sizeof(size_t),
 146                    &cactive, &len, nullptr, 0)) {
 147     not_reached();
 148   }
 149
 150   int64_t headRoom = RuntimeOption::ServerMemoryHeadRoom;
 151   // Compute cactiveLimit based on s_cactiveLimitCeiling, as computed in
 152   // threadStatsInit().
 153   if (headRoom != 0 && headRoom < MemoryManager::s_cactiveLimitCeiling) {
 154     cactiveLimit = MemoryManager::s_cactiveLimitCeiling - headRoom;
 155   } else {
 156     cactiveLimit = std::numeric_limits<size_t>::max();
 157   }
 158 }
 159 #endif
 160
 161 static void* MemoryManagerInit() {
 162   // We store the free list pointers right at the start of each
 163   // object, overlapping SmartHeader.data, and we also clobber _count
 164   // as a free-object flag when the object is deallocated.  This
 165   // assert just makes sure they don't overflow.
 166   assert(FAST_REFCOUNT_OFFSET + sizeof(int) <=
 167     MemoryManager::smartSizeClass(1));
 168   MemoryManager::TlsWrapper tls;
 169   return (void*)tls.getNoCheck;
 170 }
 171
 172 void* MemoryManager::TlsInitSetup = MemoryManagerInit();
 173
 174 void MemoryManager::Create(void* storage) {
 175   new (storage) MemoryManager();
 176 }
 177
 178 void MemoryManager::Delete(MemoryManager* mm) {
 179   mm->~MemoryManager();
 180 }
 181
 182 void MemoryManager::OnThreadExit(MemoryManager* mm) {
 183   mm->~MemoryManager();
 184 }
 185
 186 MemoryManager::MemoryManager()
 187     : m_front(nullptr)
 188     , m_limit(nullptr)
 189     , m_sweeping(false) {
 190 #ifdef USE_JEMALLOC
 191   threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
 192 #endif
 193   resetStatsImpl(true);
 194   m_stats.maxBytes = std::numeric_limits<int64_t>::max();
 195   // make the circular-lists empty.
 196   m_strings.next = m_strings.prev = &m_strings;
 197   m_bypassSlabAlloc = RuntimeOption::DisableSmartAllocator;
 198 }
 199
 200 void MemoryManager::resetRuntimeOptions() {
 201   if (debug) {
 202     checkHeap();
 203     // check that every allocation in heap has been freed before reset
 204     for (auto h = begin(), lim = end(); h != lim; ++h) {
 205       if (h->kind_ == HeaderKind::Debug) {
 206         auto h2 = h; ++h2;
 207         if (h2 != lim) {
 208           assert(h2->kind_ == HeaderKind::Free);
 209         }
 210       } else {
 211         assert(h->kind_ == HeaderKind::Free ||
 212                h->kind_ == HeaderKind::Hole);
 213       }
 214     }
 215   }
 216   MemoryManager::TlsWrapper::destroy();
 217   MemoryManager::TlsWrapper::getCheck();
 218 }
 219
 220 void MemoryManager::resetStatsImpl(bool isInternalCall) {
 221 #ifdef USE_JEMALLOC
 222   FTRACE(1, "resetStatsImpl({}) pre:\n", isInternalCall);
 223   FTRACE(1, "usage: {}\nalloc: {}\npeak usage: {}\npeak alloc: {}\n",
 224     m_stats.usage, m_stats.alloc, m_stats.peakUsage, m_stats.peakAlloc);
 225   FTRACE(1, "total alloc: {}\nje alloc: {}\nje dealloc: {}\n",
 226     m_stats.totalAlloc, m_prevAllocated, m_prevDeallocated);
 227   FTRACE(1, "je debt: {}\n\n", m_stats.jemallocDebt);
 228 #else
 229   FTRACE(1, "resetStatsImpl({}) pre:\n"
 230     "usage: {}\nalloc: {}\npeak usage: {}\npeak alloc: {}\n\n",
 231     isInternalCall,
 232     m_stats.usage, m_stats.alloc, m_stats.peakUsage, m_stats.peakAlloc);
 233 #endif
 234   if (isInternalCall) {
 235     m_statsIntervalActive = false;
 236     m_stats.usage = 0;
 237     m_stats.alloc = 0;
 238     m_stats.peakUsage = 0;
 239     m_stats.peakAlloc = 0;
 240     m_stats.totalAlloc = 0;
 241     m_stats.peakIntervalUsage = 0;
 242     m_stats.peakIntervalAlloc = 0;
 243 #ifdef USE_JEMALLOC
 244     m_enableStatsSync = false;
 245   } else if (!m_enableStatsSync) {
 246 #else
 247   } else {
 248 #endif
 249     // This is only set by the jemalloc stats sync which we don't enable until
 250     // after this has been called.
 251     assert(m_stats.totalAlloc == 0);
 252 #ifdef USE_JEMALLOC
 253     assert(m_stats.jemallocDebt >= m_stats.alloc);
 254 #endif
 255
 256     // The effect of this call is simply to ignore anything we've done *outside*
 257     // the smart allocator after we initialized to avoid attributing shared
 258     // structure initialization that happens during init_thread_locals() to this
 259     // session.
 260
 261     // We don't want to clear the other values because we do already have some
 262     // sized smart allocator usage and live slabs and wiping now will result in
 263     // negative values when we try to reconcile our accounting with jemalloc.
 264 #ifdef USE_JEMALLOC
 265     // Anything that was definitively allocated by the smart allocator should
 266     // be counted in this number even if we're otherwise zeroing out the count
 267     // for each thread.
 268     m_stats.totalAlloc = s_statsEnabled ? m_stats.jemallocDebt : 0;
 269
 270     m_enableStatsSync = s_statsEnabled;
 271 #else
 272     m_stats.totalAlloc = 0;
 273 #endif
 274   }
 275 #ifdef USE_JEMALLOC
 276   if (s_statsEnabled) {
 277     m_stats.jemallocDebt = 0;
 278     m_prevDeallocated = *m_deallocated;
 279     m_prevAllocated = *m_allocated;
 280   }
 281 #endif
 282 #ifdef USE_JEMALLOC
 283   FTRACE(1, "resetStatsImpl({}) post:\n", isInternalCall);
 284   FTRACE(1, "usage: {}\nalloc: {}\npeak usage: {}\npeak alloc: {}\n",
 285     m_stats.usage, m_stats.alloc, m_stats.peakUsage, m_stats.peakAlloc);
 286   FTRACE(1, "total alloc: {}\nje alloc: {}\nje dealloc: {}\n",
 287     m_stats.totalAlloc, m_prevAllocated, m_prevDeallocated);
 288   FTRACE(1, "je debt: {}\n\n", m_stats.jemallocDebt);
 289 #else
 290   FTRACE(1, "resetStatsImpl({}) post:\n"
 291     "usage: {}\nalloc: {}\npeak usage: {}\npeak alloc: {}\n\n",
 292     isInternalCall,
 293     m_stats.usage, m_stats.alloc, m_stats.peakUsage, m_stats.peakAlloc);
 294 #endif
 295 }
 296
 297 void MemoryManager::refreshStatsHelperExceeded() {
 298   setSurpriseFlag(MemExceededFlag);
 299   m_couldOOM = false;
 300   if (RuntimeOption::LogNativeStackOnOOM) {
 301     log_native_stack("Exceeded memory limit");
 302   }
 303 }
 304
 305 #ifdef USE_JEMALLOC
 306 void MemoryManager::refreshStatsHelperStop() {
 307   HttpServer::Server->stop();
 308   // Increase the limit to the maximum possible value, so that this method
 309   // won't be called again.
 310   m_cactiveLimit = std::numeric_limits<size_t>::max();
 311 }
 312 #endif
 313
 314 /*
 315  * Refresh stats to reflect directly malloc()ed memory, and determine
 316  * whether the request memory limit has been exceeded.
 317  *
 318  * The stats parameter allows the updates to be applied to either
 319  * m_stats as in refreshStats() or to a separate MemoryUsageStats
 320  * struct as in getStatsSafe().
 321  *
 322  * The template variable live controls whether or not MemoryManager
 323  * member variables are updated and whether or not to call helper
 324  * methods in response to memory anomalies.
 325  */
 326 template<bool live>
 327 void MemoryManager::refreshStatsImpl(MemoryUsageStats& stats) {
 328 #ifdef USE_JEMALLOC
 329   // Incrementally incorporate the difference between the previous and current
 330   // deltas into the memory usage statistic.  For reference, the total
 331   // malloced memory usage could be calculated as such, if delta0 were
 332   // recorded in resetStatsImpl():
 333   //
 334   //   int64 musage = delta - delta0;
 335   //
 336   // Note however, that SmartAllocator adds to m_stats.jemallocDebt
 337   // when it calls malloc(), so that this function can avoid
 338   // double-counting the malloced memory. Thus musage in the example
 339   // code may well substantially exceed m_stats.usage.
 340   if (m_enableStatsSync) {
 341     uint64_t jeDeallocated = *m_deallocated;
 342     uint64_t jeAllocated = *m_allocated;
 343
 344     // We can't currently handle wrapping so make sure this isn't happening.
 345     assert(jeAllocated >= 0 &&
 346            jeAllocated <= std::numeric_limits<int64_t>::max());
 347     assert(jeDeallocated >= 0 &&
 348            jeDeallocated <= std::numeric_limits<int64_t>::max());
 349
 350     // This is the delta between the current and the previous jemalloc reading.
 351     int64_t jeMMDeltaAllocated =
 352       int64_t(jeAllocated) - int64_t(m_prevAllocated);
 353
 354     FTRACE(1, "Before stats sync:\n");
 355     FTRACE(1, "je alloc:\ncurrent: {}\nprevious: {}\ndelta with MM: {}\n",
 356       jeAllocated, m_prevAllocated, jeAllocated - m_prevAllocated);
 357     FTRACE(1, "je dealloc:\ncurrent: {}\nprevious: {}\ndelta with MM: {}\n",
 358       jeDeallocated, m_prevDeallocated, jeDeallocated - m_prevDeallocated);
 359     FTRACE(1, "usage: {}\ntotal (je) alloc: {}\nje debt: {}\n",
 360       stats.usage, stats.totalAlloc, stats.jemallocDebt);
 361
 362     if (!contiguous_heap) {
 363       // Since these deltas potentially include memory allocated from another
 364       // thread but deallocated on this one, it is possible for these nubmers to
 365       // go negative.
 366       int64_t jeDeltaAllocated =
 367         int64_t(jeAllocated) - int64_t(jeDeallocated);
 368       int64_t mmDeltaAllocated =
 369         int64_t(m_prevAllocated) - int64_t(m_prevDeallocated);
 370       FTRACE(1, "je delta:\ncurrent: {}\nprevious: {}\n",
 371           jeDeltaAllocated, mmDeltaAllocated);
 372
 373       // Subtract the old jemalloc adjustment (delta0) and add the current one
 374       // (delta) to arrive at the new combined usage number.
 375       stats.usage += jeDeltaAllocated - mmDeltaAllocated;
 376       // Remove the "debt" accrued from allocating the slabs so we don't double
 377       // count the slab-based allocations.
 378       stats.usage -= stats.jemallocDebt;
 379     }
 380
 381     stats.jemallocDebt = 0;
 382     // We need to do the calculation instead of just setting it to jeAllocated
 383     // because of the MaskAlloc capability.
 384     stats.totalAlloc += jeMMDeltaAllocated;
 385     if (live) {
 386       m_prevAllocated = jeAllocated;
 387       m_prevDeallocated = jeDeallocated;
 388     }
 389
 390     FTRACE(1, "After stats sync:\n");
 391     FTRACE(1, "usage: {}\ntotal (je) alloc: {}\n\n",
 392       stats.usage, stats.totalAlloc);
 393 }
 394 #endif
 395   if (stats.usage > stats.peakUsage) {
 396     // NOTE: the peak memory usage monotonically increases, so there cannot
 397     // be a second OOM exception in one request.
 398     assert(stats.maxBytes > 0);
 399     if (live && m_couldOOM && stats.usage > stats.maxBytes) {
 400       refreshStatsHelperExceeded();
 401     }
 402     // Check whether the process's active memory limit has been exceeded, and
 403     // if so, stop the server.
 404     //
 405     // Only check whether the total memory limit was exceeded if this request
 406     // is at a new high water mark.  This check could be performed regardless
 407     // of this request's current memory usage (because other request threads
 408     // could be to blame for the increased memory usage), but doing so would
 409     // measurably increase computation for little benefit.
 410 #ifdef USE_JEMALLOC
 411     // (*m_cactive) consistency is achieved via atomic operations.  The fact
 412     // that we do not use an atomic operation here means that we could get a
 413     // stale read, but in practice that poses no problems for how we are
 414     // using the value.
 415     if (live && s_statsEnabled && *m_cactive > m_cactiveLimit) {
 416       refreshStatsHelperStop();
 417     }
 418 #endif
 419     stats.peakUsage = stats.usage;
 420   }
 421   if (live && m_statsIntervalActive) {
 422     if (stats.usage > stats.peakIntervalUsage) {
 423       stats.peakIntervalUsage = stats.usage;
 424     }
 425     if (stats.alloc > stats.peakIntervalAlloc) {
 426       stats.peakIntervalAlloc = stats.alloc;
 427     }
 428   }
 429 }
 430
 431 template void MemoryManager::refreshStatsImpl<true>(MemoryUsageStats& stats);
 432 template void MemoryManager::refreshStatsImpl<false>(MemoryUsageStats& stats);
 433
 434 void MemoryManager::sweep() {
 435   assert(!sweeping());
 436   if (debug) checkHeap();
 437   if (debug) traceHeap();
 438   m_sweeping = true;
 439   SCOPE_EXIT { m_sweeping = false; };
 440   DEBUG_ONLY size_t num_sweepables = 0, num_natives = 0;
 441
 442   // iterate until both sweep lists are empty. Entries can be added or
 443   // removed from either list during sweeping.
 444   do {
 445     while (!m_sweepables.empty()) {
 446       num_sweepables++;
 447       auto obj = m_sweepables.next();
 448       obj->unregister();
 449       obj->sweep();
 450     }
 451     while (!m_natives.empty()) {
 452       num_natives++;
 453       assert(m_natives.back()->sweep_index == m_natives.size() - 1);
 454       auto node = m_natives.back();
 455       m_natives.pop_back();
 456       auto obj = Native::obj(node);
 457       auto ndi = obj->getVMClass()->getNativeDataInfo();
 458       ndi->sweep(obj);
 459       // trash the native data but leave the header and object parsable
 460       assert(memset(node+1, kSmartFreeFill, node->obj_offset - sizeof(*node)));
 461     }
 462   } while (!m_sweepables.empty());
 463
 464   TRACE(1, "sweep: sweepable %lu native %lu\n", num_sweepables, num_natives);
 465   if (debug) checkHeap();
 466 }
 467
 468 void MemoryManager::resetAllocator() {
 469   assert(m_natives.empty() && m_sweepables.empty());
 470   // decref apc strings and arrays referenced by this request
 471   DEBUG_ONLY auto napcs = m_apc_arrays.size();
 472   while (!m_apc_arrays.empty()) {
 473     auto a = m_apc_arrays.back();
 474     m_apc_arrays.pop_back();
 475     a->sweep();
 476   }
 477   DEBUG_ONLY auto nstrings = StringData::sweepAll();
 478
 479   // free the heap
 480   m_heap.reset();
 481
 482   // zero out freelists
 483   for (auto& i : m_freelists) i.head = nullptr;
 484   m_front = m_limit = 0;
 485
 486   resetStatsImpl(true);
 487   TRACE(1, "reset: apc-arrays %lu strings %u\n", napcs, nstrings);
 488 }
 489
 490 void MemoryManager::flush() {
 491   always_assert(empty());
 492   m_heap.flush();
 493   m_apc_arrays = std::vector<APCLocalArray*>();
 494   m_natives = std::vector<NativeNode*>();
 495 }
 496
 497 /*
 498  * smart_malloc & friends implementation notes
 499  *
 500  * There are three kinds of smart mallocation:
 501  *
 502  *  a) Large allocations.  (size >= kMaxSmartSize)
 503  *
 504  *     In this case we behave as a wrapper around the normal libc
 505  *     malloc/free.  We insert a SweepNode header at the front of the
 506  *     allocation in order to find these at sweep time (end of
 507  *     request) so we can give them back to libc.
 508  *
 509  *  b) Size-tracked small allocations.
 510  *
 511  *     This is used for the generic case, for callers who can't tell
 512  *     us the size of the allocation at free time.
 513  *
 514  *     In this situation, we put a SmallNode header at the front of
 515  *     the block that tells us the size for when we need to free it
 516  *     later.  We differentiate this from a SweepNode (for a big
 517  *     allocation) by assuming that no SweepNode::prev will point to
 518  *     an address in the first kMaxSmartSize bytes of virtual address
 519  *     space.
 520  *
 521  *  c) Size-untracked small allocation
 522  *
 523  *     Many callers have an easy time telling you how big the object
 524  *     was when they need to free it.  In this case we can avoid the
 525  *     SmallNode, which saves us some memory and also let's us give
 526  *     out 16-byte aligned pointers easily.
 527  *
 528  *     We know when we have one of these because it has to be freed
 529  *     through a different entry point.  (E.g. MM().smartFreeSize or
 530  *     MM().smartFreeSizeBig.)
 531  *
 532  * When small blocks are freed (case b and c), they're placed in the
 533  * appropriate size-segregated freelist.  Large blocks are immediately
 534  * passed back to libc via free.
 535  *
 536  * There are currently two kinds of freelist entries: entries where
 537  * there is already a valid SmallNode on the list (case b), and
 538  * entries where there isn't (case c).  The reason for this is that
 539  * that way, when allocating for case b, you don't need to store the
 540  * SmallNode size again.  Much of the heap is going through case b at
 541  * the time of this writing, so it is a measurable regression to try
 542  * to just combine the free lists, but presumably we can move more to
 543  * case c and combine the lists eventually.
 544  */
 545
 546 inline void* MemoryManager::smartMalloc(size_t nbytes) {
 547   auto const nbytes_padded = nbytes + sizeof(SmallNode);
 548   if (LIKELY(nbytes_padded) <= kMaxSmartSize) {
 549     auto const ptr = static_cast<SmallNode*>(smartMallocSize(nbytes_padded));
 550     ptr->padbytes = nbytes_padded;
 551     ptr->kind = HeaderKind::SmallMalloc;
 552     return ptr + 1;
 553   }
 554   return smartMallocBig(nbytes);
 555 }
 556
 557 union MallocNode {
 558   BigNode big;
 559   SmallNode small;
 560 };
 561
 562 static_assert(sizeof(SmallNode) == sizeof(BigNode), "");
 563
 564 inline void MemoryManager::smartFree(void* ptr) {
 565   assert(ptr != 0);
 566   auto const n = static_cast<MallocNode*>(ptr) - 1;
 567   auto const padbytes = n->small.padbytes;
 568   if (LIKELY(padbytes <= kMaxSmartSize)) {
 569     return smartFreeSize(&n->small, n->small.padbytes);
 570   }
 571   m_heap.freeBig(ptr);
 572 }
 573
 574 inline void* MemoryManager::smartRealloc(void* ptr, size_t nbytes) {
 575   FTRACE(3, "smartRealloc: {} to {}\n", ptr, nbytes);
 576   assert(nbytes > 0);
 577   auto const n = static_cast<MallocNode*>(ptr) - 1;
 578   if (LIKELY(n->small.padbytes <= kMaxSmartSize)) {
 579     void* newmem = smart_malloc(nbytes);
 580     auto const copySize = std::min(
 581       n->small.padbytes - sizeof(SmallNode),
 582       nbytes
 583     );
 584     newmem = memcpy(newmem, ptr, copySize);
 585     smart_free(ptr);
 586     return newmem;
 587   }
 588   // Ok, it's a big allocation.
 589   auto block = m_heap.resizeBig(ptr, nbytes);
 590   refreshStats();
 591   return block.ptr;
 592 }
 593
 594 namespace {
 595 DEBUG_ONLY const char* header_names[] = {
 596   "Packed", "Struct", "Mixed", "Empty", "Apc", "Globals", "Proxy",
 597   "String", "Object", "ResumableObj", "AwaitAllWH", "Resource", "Ref",
 598   "Resumable", "Native", "SmallMalloc", "BigMalloc", "BigObj",
 599   "Free", "Hole", "Debug"
 600 };
 601 static_assert(sizeof(header_names)/sizeof(*header_names) == NumHeaderKinds, "");
 602
 603 // Reverse lookup table from size class index back to block size.
 604 struct SizeTable {
 605   size_t table[kNumSmartSizes];
 606   SizeTable() {
 607 #define SMART_SIZE(i,d,s) table[i] = s;
 608     SMART_SIZES
 609 #undef SMART_SIZE
 610     assert(table[27] == 4096 && table[28] == 0);
 611     // pick up where the macros left off
 612     auto i = 28;
 613     auto s = 4096;
 614     auto d = s/4;
 615     for (; i < kNumSmartSizes; d *= 2) {
 616       // each power of two size has 4 linear spaced size classes
 617       table[i++] = (s += d);
 618       if (i < kNumSmartSizes) table[i++] = (s += d);
 619       if (i < kNumSmartSizes) table[i++] = (s += d);
 620       if (i < kNumSmartSizes) table[i++] = (s += d);
 621     }
 622   }
 623   static_assert(LG_SMART_SIZES_PER_DOUBLING == 2, "");
 624 };
 625 SizeTable s_index2size;
 626
 627 }
 628
 629 // initialize a Hole header in the unused memory between m_front and m_limit
 630 void MemoryManager::initHole() {
 631   if ((char*)m_front < (char*)m_limit) {
 632     auto hdr = static_cast<FreeNode*>(m_front);
 633     hdr->kind = HeaderKind::Hole;
 634     hdr->size = (char*)m_limit - (char*)m_front;
 635   }
 636 }
 637
 638 // initialize the FreeNode header on all freelist entries.
 639 void MemoryManager::initFree() {
 640   for (size_t i = 0; i < kNumSmartSizes; i++) {
 641     auto size = s_index2size.table[i];
 642     for (auto n = m_freelists[i].head; n; n = n->next) {
 643       n->kind_size = HeaderKind::Free<<24 | size<<32;
 644     }
 645   }
 646 }
 647
 648 BigHeap::iterator MemoryManager::begin() {
 649   initHole();
 650   initFree();
 651   return m_heap.begin();
 652 }
 653
 654 BigHeap::iterator MemoryManager::end() {
 655   return m_heap.end();
 656 }
 657
 658 // test iterating objects in slabs
 659 void MemoryManager::checkHeap() {
 660   size_t bytes=0;
 661   std::vector<Header*> hdrs;
 662   std::unordered_set<FreeNode*> free_blocks;
 663   std::unordered_set<APCLocalArray*> apc_arrays;
 664   std::unordered_set<StringData*> apc_strings;
 665   size_t counts[NumHeaderKinds];
 666   for (unsigned i=0; i < NumHeaderKinds; i++) counts[i] = 0;
 667   for (auto h = begin(), lim = end(); h != lim; ++h) {
 668     hdrs.push_back(&*h);
 669     TRACE(2, "checkHeap: hdr %p\n", hdrs[hdrs.size()-1]);
 670     bytes += h->size();
 671     counts[(int)h->kind_]++;
 672     switch (h->kind_) {
 673       case HeaderKind::Debug: {
 674         // the next block's parsed size should agree with DebugHeader
 675         auto h2 = h; ++h2;
 676         if (h2 != lim) {
 677           assert(h2->kind_ != HeaderKind::Debug);
 678           assert(h->debug_.returnedCap ==
 679                  MemoryManager::smartSizeClass(h2->size()));
 680         }
 681         break;
 682       }
 683       case HeaderKind::Free:
 684         free_blocks.insert(&h->free_);
 685         break;
 686       case HeaderKind::Apc:
 687         apc_arrays.insert(&h->apc_);
 688         break;
 689       case HeaderKind::String:
 690         if (h->str_.isShared()) apc_strings.insert(&h->str_);
 691         break;
 692       case HeaderKind::Packed:
 693       case HeaderKind::Struct:
 694       case HeaderKind::Mixed:
 695       case HeaderKind::Empty:
 696       case HeaderKind::Globals:
 697       case HeaderKind::Proxy:
 698       case HeaderKind::Object:
 699       case HeaderKind::ResumableObj:
 700       case HeaderKind::AwaitAllWH:
 701       case HeaderKind::Resource:
 702       case HeaderKind::Ref:
 703       case HeaderKind::Resumable:
 704       case HeaderKind::Native:
 705       case HeaderKind::SmallMalloc:
 706       case HeaderKind::BigMalloc:
 707       case HeaderKind::BigObj:
 708       case HeaderKind::Hole:
 709         break;
 710     }
 711   }
 712
 713   // check the free lists
 714   for (size_t i = 0; i < kNumSmartSizes; i++) {
 715     for (auto n = m_freelists[i].head; n; n = n->next) {
 716       assert(free_blocks.find(n) != free_blocks.end());
 717       free_blocks.erase(n);
 718     }
 719   }
 720   assert(free_blocks.empty());
 721
 722   // check the apc array list
 723   for (auto a : m_apc_arrays) {
 724     assert(apc_arrays.find(a) != apc_arrays.end());
 725     apc_arrays.erase(a);
 726   }
 727   assert(apc_arrays.empty());
 728
 729   // check the apc string list
 730   for (StringDataNode *next, *n = m_strings.next; n != &m_strings; n = next) {
 731     next = n->next;
 732     auto const s = StringData::node2str(n);
 733     assert(s->isShared());
 734     assert(apc_strings.find(s) != apc_strings.end());
 735     apc_strings.erase(s);
 736   }
 737   assert(apc_strings.empty());
 738
 739   TRACE(1, "checkHeap: %lu objects %lu bytes\n", hdrs.size(), bytes);
 740   TRACE(1, "checkHeap-types: ");
 741   for (unsigned i = 0; i < NumHeaderKinds; ++i) {
 742     TRACE(1, "%s %lu%s", header_names[i], counts[i],
 743           (i + 1 < NumHeaderKinds ? " " : "\n"));
 744   }
 745 }
 746
 747 /*
 748  * Get a new slab, then allocate nbytes from it and install it in our
 749  * slab list.  Return the newly allocated nbytes-sized block.
 750  */
 751 NEVER_INLINE void* MemoryManager::newSlab(size_t nbytes) {
 752   if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
 753     refreshStats();
 754   }
 755   initHole(); // enable parsing the leftover space in the old slab
 756   if (debug && RuntimeOption::EvalCheckHeapOnAlloc) checkHeap();
 757   if (debug) traceHeap();
 758   auto slab = m_heap.allocSlab(kSlabSize);
 759   assert((uintptr_t(slab.ptr) & kSmartSizeAlignMask) == 0);
 760   m_stats.borrow(slab.size);
 761   m_stats.alloc += slab.size;
 762   if (m_stats.alloc > m_stats.peakAlloc) {
 763     m_stats.peakAlloc = m_stats.alloc;
 764   }
 765   m_front = (void*)(uintptr_t(slab.ptr) + nbytes);
 766   m_limit = (void*)(uintptr_t(slab.ptr) + slab.size);
 767   FTRACE(3, "newSlab: adding slab at {} to limit {}\n", slab.ptr, m_limit);
 768   return slab.ptr;
 769 }
 770
 771 /*
 772  * Allocate `bytes' from the current slab, aligned to kSmartSizeAlign.
 773  */
 774 void* MemoryManager::slabAlloc(uint32_t bytes, unsigned index) {
 775   FTRACE(3, "slabAlloc({}, {})\n", bytes, index);
 776   size_t nbytes = debugAddExtra(smartSizeClass(bytes));
 777
 778   assert(nbytes <= kSlabSize);
 779   assert((nbytes & kSmartSizeAlignMask) == 0);
 780   assert((uintptr_t(m_front) & kSmartSizeAlignMask) == 0);
 781
 782   if (UNLIKELY(m_bypassSlabAlloc)) {
 783     // Stats correction; smartMallocSizeBig() pulls stats from jemalloc.
 784     m_stats.usage -= bytes;
 785     // smartMallocSizeBig already wraps its allocation in a debug header, but
 786     // the caller will try to do it again, so we need to adjust this pointer
 787     // before returning it.
 788     return ((char*)smartMallocSizeBig<false>(nbytes).ptr) - kDebugExtraSize;
 789   }
 790
 791   void* ptr = m_front;
 792   {
 793     void* next = (void*)(uintptr_t(ptr) + nbytes);
 794     if (uintptr_t(next) <= uintptr_t(m_limit)) {
 795       m_front = next;
 796     } else {
 797       ptr = newSlab(nbytes);
 798     }
 799   }
 800   // Preallocate more of the same in order to amortize entry into this method.
 801   unsigned nPrealloc;
 802   if (nbytes * kSmartPreallocCountLimit <= kSmartPreallocBytesLimit) {
 803     nPrealloc = kSmartPreallocCountLimit;
 804   } else {
 805     nPrealloc = kSmartPreallocBytesLimit / nbytes;
 806   }
 807   {
 808     void* front = (void*)(uintptr_t(m_front) + nPrealloc*nbytes);
 809     if (uintptr_t(front) > uintptr_t(m_limit)) {
 810       nPrealloc = ((uintptr_t)m_limit - uintptr_t(m_front)) / nbytes;
 811       front = (void*)(uintptr_t(m_front) + nPrealloc*nbytes);
 812     }
 813     m_front = front;
 814   }
 815   for (void* p = (void*)(uintptr_t(m_front) - nbytes); p != ptr;
 816        p = (void*)(uintptr_t(p) - nbytes)) {
 817     auto usable = debugRemoveExtra(nbytes);
 818     auto ptr = debugPostAllocate(p, usable, usable);
 819     debugPreFree(ptr, usable, usable);
 820     m_freelists[index].push(ptr, usable);
 821   }
 822   return ptr;
 823 }
 824
 825 inline void MemoryManager::updateBigStats() {
 826   // If we are using jemalloc, it is keeping track of allocations outside of
 827   // the slabs and the usage so we should force this after an allocation that
 828   // was too large for one of the existing slabs. When we're not using jemalloc
 829   // this check won't do anything so avoid the extra overhead.
 830   if (use_jemalloc || UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
 831     refreshStats();
 832   }
 833 }
 834
 835 NEVER_INLINE
 836 void* MemoryManager::smartMallocBig(size_t nbytes) {
 837   assert(nbytes > 0);
 838   auto block = m_heap.allocBig(nbytes, HeaderKind::BigMalloc);
 839   updateBigStats();
 840   return block.ptr;
 841 }
 842
 843 template NEVER_INLINE
 844 MemBlock MemoryManager::smartMallocSizeBig<true>(size_t);
 845 template NEVER_INLINE
 846 MemBlock MemoryManager::smartMallocSizeBig<false>(size_t);
 847
 848 template<bool callerSavesActualSize> NEVER_INLINE
 849 MemBlock MemoryManager::smartMallocSizeBig(size_t bytes) {
 850   auto block = m_heap.allocBig(debugAddExtra(bytes), HeaderKind::BigObj);
 851   auto szOut = debugRemoveExtra(block.size);
 852 #ifdef USE_JEMALLOC
 853   // NB: We don't report the SweepNode size in the stats.
 854   auto const delta = callerSavesActualSize ? szOut : bytes;
 855   m_stats.usage += int64_t(delta);
 856   // Adjust jemalloc otherwise we'll double count the direct allocation.
 857   m_stats.borrow(delta);
 858 #else
 859   m_stats.usage += bytes;
 860 #endif
 861   updateBigStats();
 862   auto ptrOut = debugPostAllocate(block.ptr, bytes, szOut);
 863   FTRACE(3, "smartMallocSizeBig: {} ({} requested, {} usable)\n",
 864          ptrOut, bytes, szOut);
 865   return {ptrOut, szOut};
 866 }
 867
 868 NEVER_INLINE
 869 void* MemoryManager::smartCallocBig(size_t totalbytes) {
 870   assert(totalbytes > 0);
 871   auto block = m_heap.callocBig(totalbytes);
 872   updateBigStats();
 873   return block.ptr;
 874 }
 875
 876 // smart_malloc api entry points, with support for malloc/free corner cases.
 877
 878 void* smart_malloc(size_t nbytes) {
 879   auto& mm = MM();
 880   auto const size = std::max(nbytes, size_t(1));
 881   return mm.smartMalloc(size);
 882 }
 883
 884 void* smart_calloc(size_t count, size_t nbytes) {
 885   auto& mm = MM();
 886   auto const totalBytes = std::max<size_t>(count * nbytes, 1);
 887   if (totalBytes <= kMaxSmartSize) {
 888     return memset(smart_malloc(totalBytes), 0, totalBytes);
 889   }
 890   return mm.smartCallocBig(totalBytes);
 891 }
 892
 893 void* smart_realloc(void* ptr, size_t nbytes) {
 894   auto& mm = MM();
 895   if (!ptr) return smart_malloc(nbytes);
 896   if (!nbytes) {
 897     smart_free(ptr);
 898     return nullptr;
 899   }
 900   return mm.smartRealloc(ptr, nbytes);
 901 }
 902
 903 void smart_free(void* ptr) {
 904   if (ptr) MM().smartFree(ptr);
 905 }
 906
 907 //////////////////////////////////////////////////////////////////////
 908
 909 void MemoryManager::addNativeObject(NativeNode* node) {
 910   if (debug) for (DEBUG_ONLY auto n : m_natives) assert(n != node);
 911   node->sweep_index = m_natives.size();
 912   m_natives.push_back(node);
 913 }
 914
 915 void MemoryManager::removeNativeObject(NativeNode* node) {
 916   assert(node->sweep_index < m_natives.size());
 917   assert(m_natives[node->sweep_index] == node);
 918   auto index = node->sweep_index;
 919   auto last = m_natives.back();
 920   m_natives[index] = last;
 921   m_natives.pop_back();
 922   last->sweep_index = index;
 923 }
 924
 925 void MemoryManager::addApcArray(APCLocalArray* a) {
 926   a->m_sweep_index = m_apc_arrays.size();
 927   m_apc_arrays.push_back(a);
 928 }
 929
 930 void MemoryManager::removeApcArray(APCLocalArray* a) {
 931   assert(a->m_sweep_index < m_apc_arrays.size());
 932   assert(m_apc_arrays[a->m_sweep_index] == a);
 933   auto index = a->m_sweep_index;
 934   auto last = m_apc_arrays.back();
 935   m_apc_arrays[index] = last;
 936   m_apc_arrays.pop_back();
 937   last->m_sweep_index = index;
 938 }
 939
 940 void MemoryManager::addSweepable(Sweepable* obj) {
 941   obj->enlist(&m_sweepables);
 942 }
 943
 944 // defined here because memory-manager.h includes sweepable.h
 945 Sweepable::Sweepable() {
 946   MM().addSweepable(this);
 947 }
 948
 949 //////////////////////////////////////////////////////////////////////
 950
 951 #ifdef DEBUG
 952
 953 void* MemoryManager::debugPostAllocate(void* p,
 954                                        size_t bytes,
 955                                        size_t returnedCap) {
 956   auto const header = static_cast<DebugHeader*>(p);
 957   header->allocatedMagic = DebugHeader::kAllocatedMagic;
 958   header->kind = HeaderKind::Debug;
 959   header->requestedSize = bytes;
 960   header->returnedCap = returnedCap;
 961   return (void*)(uintptr_t(header) + kDebugExtraSize);
 962 }
 963
 964 void* MemoryManager::debugPreFree(void* p,
 965                                   size_t bytes,
 966                                   size_t userSpecifiedBytes) {
 967   auto const header = reinterpret_cast<DebugHeader*>(uintptr_t(p) -
 968                                                      kDebugExtraSize);
 969   assert(checkPreFree(header, bytes, userSpecifiedBytes));
 970   header->requestedSize = DebugHeader::kFreedMagic;
 971   memset(p, kSmartFreeFill, bytes);
 972   return header;
 973 }
 974
 975 #endif
 976
 977 bool MemoryManager::checkPreFree(DebugHeader* p,
 978                                  size_t bytes,
 979                                  size_t userSpecifiedBytes) const {
 980   assert(debug);
 981   assert(p->allocatedMagic == DebugHeader::kAllocatedMagic);
 982
 983   if (userSpecifiedBytes != 0) {
 984     // For size-specified frees, the size they report when freeing
 985     // must be between the requested size and the actual capacity.
 986     assert(userSpecifiedBytes >= p->requestedSize &&
 987            userSpecifiedBytes <= p->returnedCap);
 988   }
 989   if (!m_bypassSlabAlloc && bytes != 0 && bytes <= kMaxSmartSize) {
 990     assert(m_heap.contains(p));
 991   }
 992
 993   return true;
 994 }
 995
 996 void MemoryManager::logAllocation(void* p, size_t bytes) {
 997   MemoryProfile::logAllocation(p, bytes);
 998 }
 999
1000 void MemoryManager::logDeallocation(void* p) {
1001   MemoryProfile::logDeallocation(p);
1002 }
1003
1004 void MemoryManager::resetCouldOOM(bool state) {
1005   clearSurpriseFlag(MemExceededFlag);
1006   m_couldOOM = state;
1007 }
1008
1009
1010 ///////////////////////////////////////////////////////////////////////////////
1011 // Request profiling.
1012
1013 bool MemoryManager::triggerProfiling(const std::string& filename) {
1014   auto trigger = new ReqProfContext();
1015   trigger->flag = true;
1016   trigger->filename = filename;
1017
1018   ReqProfContext* expected = nullptr;
1019
1020   if (!s_trigger.compare_exchange_strong(expected, trigger)) {
1021     delete trigger;
1022     return false;
1023   }
1024   return true;
1025 }
1026
1027 void MemoryManager::requestInit() {
1028   auto trigger = s_trigger.exchange(nullptr);
1029
1030   // If the trigger has already been claimed, do nothing.
1031   if (trigger == nullptr) return;
1032
1033   always_assert(MM().empty());
1034
1035   // Initialize the request-local context from the trigger.
1036   auto& profctx = MM().m_profctx;
1037   assert(!profctx.flag);
1038
1039   MM().m_bypassSlabAlloc = true;
1040   profctx = *trigger;
1041   delete trigger;
1042
1043 #ifdef USE_JEMALLOC
1044   bool active = true;
1045   size_t boolsz = sizeof(bool);
1046
1047   // Reset jemalloc stats.
1048   if (mallctl("prof.reset", nullptr, nullptr, nullptr, 0)) {
1049     return;
1050   }
1051
1052   // Enable jemalloc thread-local heap dumps.
1053   if (mallctl("prof.active",
1054               &profctx.prof_active, &boolsz,
1055               &active, sizeof(bool))) {
1056     profctx = ReqProfContext{};
1057     return;
1058   }
1059   if (mallctl("thread.prof.active",
1060               &profctx.thread_prof_active, &boolsz,
1061               &active, sizeof(bool))) {
1062     mallctl("prof.active", nullptr, nullptr,
1063             &profctx.prof_active, sizeof(bool));
1064     profctx = ReqProfContext{};
1065     return;
1066   }
1067 #endif
1068 }
1069
1070 void MemoryManager::requestShutdown() {
1071   auto& profctx = MM().m_profctx;
1072
1073   if (!profctx.flag) return;
1074
1075 #ifdef USE_JEMALLOC
1076   jemalloc_pprof_dump(profctx.filename, true);
1077
1078   mallctl("thread.prof.active", nullptr, nullptr,
1079           &profctx.thread_prof_active, sizeof(bool));
1080   mallctl("prof.active", nullptr, nullptr,
1081           &profctx.prof_active, sizeof(bool));
1082 #endif
1083
1084   MM().m_bypassSlabAlloc = RuntimeOption::DisableSmartAllocator;
1085   profctx = ReqProfContext{};
1086 }
1087
1088 ///////////////////////////////////////////////////////////////////////////////
1089
1090 void BigHeap::reset() {
1091   TRACE(1, "BigHeap-reset: slabs %lu bigs %lu\n", m_slabs.size(),
1092         m_bigs.size());
1093   for (auto slab : m_slabs) {
1094     free(slab.ptr);
1095   }
1096   m_slabs.clear();
1097   for (auto n : m_bigs) {
1098     free(n);
1099   }
1100   m_bigs.clear();
1101 }
1102
1103 void BigHeap::flush() {
1104   assert(empty());
1105   m_slabs = std::vector<MemBlock>{};
1106   m_bigs = std::vector<BigNode*>{};
1107 }
1108
1109 MemBlock BigHeap::allocSlab(size_t size) {
1110   void* slab = safe_malloc(size);
1111   m_slabs.push_back({slab, size});
1112   return {slab, size};
1113 }
1114
1115 void BigHeap::enlist(BigNode* n, HeaderKind kind, size_t size) {
1116   n->nbytes = size;
1117   n->kind = kind;
1118   n->index = m_bigs.size();
1119   m_bigs.push_back(n);
1120 }
1121
1122 MemBlock BigHeap::allocBig(size_t bytes, HeaderKind kind) {
1123 #ifdef USE_JEMALLOC
1124   auto n = static_cast<BigNode*>(mallocx(bytes + sizeof(BigNode), 0));
1125   auto cap = sallocx(n, 0);
1126 #else
1127   auto cap = bytes + sizeof(BigNode);
1128   auto n = static_cast<BigNode*>(safe_malloc(cap));
1129 #endif
1130   enlist(n, kind, cap);
1131   return {n + 1, cap - sizeof(BigNode)};
1132 }
1133
1134 MemBlock BigHeap::callocBig(size_t nbytes) {
1135   auto cap = nbytes + sizeof(BigNode);
1136   auto const n = static_cast<BigNode*>(safe_calloc(cap, 1));
1137   enlist(n, HeaderKind::BigMalloc, cap);
1138   return {n + 1, nbytes};
1139 }
1140
1141 bool BigHeap::contains(void* ptr) const {
1142   auto const ptrInt = reinterpret_cast<uintptr_t>(ptr);
1143   auto it = std::find_if(std::begin(m_slabs), std::end(m_slabs),
1144     [&] (MemBlock slab) {
1145       auto const baseInt = reinterpret_cast<uintptr_t>(slab.ptr);
1146       return ptrInt >= baseInt && ptrInt < baseInt + slab.size;
1147     }
1148   );
1149   return it != std::end(m_slabs);
1150 }
1151
1152 NEVER_INLINE
1153 void BigHeap::freeBig(void* ptr) {
1154   auto n = static_cast<BigNode*>(ptr) - 1;
1155   auto i = n->index;
1156   auto last = m_bigs.back();
1157   last->index = i;
1158   m_bigs[i] = last;
1159   m_bigs.pop_back();
1160   free(n);
1161 }
1162
1163 MemBlock BigHeap::resizeBig(void* ptr, size_t newsize) {
1164   // Since we don't know how big it is (i.e. how much data we should memcpy),
1165   // we have no choice but to ask malloc to realloc for us.
1166   auto const n = static_cast<BigNode*>(ptr) - 1;
1167   auto const newNode = static_cast<BigNode*>(
1168     safe_realloc(n, newsize + sizeof(BigNode))
1169   );
1170   if (newNode != n) {
1171     m_bigs[newNode->index] = newNode;
1172   }
1173   return {newNode + 1, newsize};
1174 }
1175
1176 BigHeap::iterator BigHeap::begin() {
1177   if (!m_slabs.empty()) return iterator{m_slabs.begin(), *this};
1178   return iterator{m_bigs.begin(), *this};
1179 }
1180
1181 BigHeap::iterator BigHeap::end() {
1182   return iterator{m_bigs.end(), *this};
1183 }
1184
1185 /////////////////////////////////////////////////////////////////////////
1186 //Contiguous Heap
1187
1188 void ContiguousHeap::reset() {
1189   m_requestCount++;
1190
1191   // if there is a new peak, store it
1192   if (m_peak < m_used) {
1193     m_peak = m_used;
1194     // convert usage to MB.. used later for comparison with water marks
1195     m_heapUsage = ((uintptr_t)m_peak - (uintptr_t) m_base) >> 20;
1196   }
1197
1198   // should me reset?
1199   bool resetHeap = false;
1200
1201   // check if we are above low water mark
1202   if (m_heapUsage > RuntimeOption::HeapLowWaterMark) {
1203     // check if we are above below water mark
1204     if (m_heapUsage > RuntimeOption::HeapHighWaterMark) {
1205       // we are above high water mark... always reset
1206       resetHeap = true;
1207     } else {
1208       // if between watermarks, free based on request count and usage
1209       int requestCount = RuntimeOption::HeapResetCountBase;
1210
1211       // Assumption : low and high water mark are power of 2 aligned
1212       for( auto resetStep = RuntimeOption::HeapHighWaterMark / 2 ;
1213            resetStep > m_heapUsage ;
1214            resetStep /= 2 ) {
1215         requestCount *= RuntimeOption::HeapResetCountMultiple;
1216       }
1217       if (requestCount <= m_requestCount) {
1218         resetHeap = true;
1219       }
1220     }
1221   }
1222
1223
1224   if (resetHeap) {
1225     auto oldPeak = m_peak;
1226     m_peak -= ((m_peak - m_base) / 2);
1227     m_peak = (char*)((uintptr_t)m_peak & ~(s_pageSize - 1));
1228     if (madvise(m_peak,
1229                 (uintptr_t)oldPeak - (uintptr_t)m_peak,
1230                 MADV_DONTNEED) == 0)
1231     {
1232       m_requestCount = 0;
1233       TRACE(1, "ContiguousHeap-reset: bytes %lu\n",
1234             (uintptr_t)m_end - (uintptr_t)m_peak);
1235     } else {
1236       TRACE(1,
1237           "ContiguousHeap-reset: madvise failed, trying again next request");
1238     }
1239   } else {
1240     TRACE(1, "ContiguousHeap-reset: nothing release");
1241   }
1242   m_used = m_base;
1243   m_freeList.next = nullptr;
1244   m_freeList.size = 0;
1245   always_assert(m_base);
1246   m_slabs.clear();
1247   m_bigs.clear();
1248 }
1249
1250 void ContiguousHeap::flush() {
1251   madvise(m_base, m_peak-m_base, MADV_DONTNEED);
1252   m_used = m_peak = m_base;
1253   m_freeList.size = 0;
1254   m_freeList.next = nullptr;
1255   m_slabs = std::vector<MemBlock>{};
1256   m_bigs = std::vector<BigNode*>{};
1257 }
1258
1259 MemBlock ContiguousHeap::allocSlab(size_t size) {
1260   size_t cap;
1261   void* slab = heapAlloc(size, cap);
1262   m_slabs.push_back({slab, cap});
1263   return {slab, cap};
1264 }
1265
1266 MemBlock ContiguousHeap::allocBig(size_t bytes, HeaderKind kind) {
1267   size_t cap;
1268   auto n = static_cast<BigNode*>(heapAlloc(bytes + sizeof(BigNode), cap));
1269   enlist(n, kind, cap);
1270   return {n + 1, cap - sizeof(BigNode)};
1271 }
1272
1273 MemBlock ContiguousHeap::callocBig(size_t nbytes) {
1274   size_t cap;
1275   auto const n = static_cast<BigNode*>(
1276         heapAlloc(nbytes + sizeof(BigNode), cap));
1277   memset(n, 0, cap);
1278   enlist(n, HeaderKind::BigMalloc, cap);
1279   return {n + 1, cap - sizeof(BigNode)};
1280 }
1281
1282 bool ContiguousHeap::contains(void* ptr) const {
1283   auto const ptrInt = reinterpret_cast<uintptr_t>(ptr);
1284   return ptrInt >= reinterpret_cast<uintptr_t>(m_base) &&
1285          ptrInt <  reinterpret_cast<uintptr_t>(m_used);
1286 }
1287
1288 NEVER_INLINE
1289 void ContiguousHeap::freeBig(void* ptr) {
1290   // remove from big list
1291   auto n = static_cast<BigNode*>(ptr) - 1;
1292   auto i = n->index;
1293   auto last = m_bigs.back();
1294   auto size = n->nbytes;
1295   last->index = i;
1296   m_bigs[i] = last;
1297   m_bigs.pop_back();
1298
1299   // free heap space
1300   // freed nodes are stored in address ordered freelist
1301   auto free = &m_freeList;
1302   auto node = reinterpret_cast<FreeNode*>(n);
1303   node->size = size;
1304   while (free->next != nullptr && free->next < node) {
1305     free = free->next;
1306   }
1307   // Coalesce Nodes if possible with adjacent free nodes
1308   if ((uintptr_t)free + free->size + node->size == (uintptr_t)free->next) {
1309     free->size += node->size + free->next->size;
1310     free->next = free->next->next;
1311   } else if ((uintptr_t)free + free->size == (uintptr_t)ptr) {
1312     free->size += node->size;
1313   } else if ((uintptr_t)node + node->size == (uintptr_t)free->next){
1314     node->next = free->next->next;
1315     node->size += free->next->size;
1316     free->next = node;
1317   } else {
1318     node->next = free->next;
1319     free->next = node;
1320   }
1321 }
1322
1323 MemBlock ContiguousHeap::resizeBig(void* ptr, size_t newsize) {
1324   // Since we don't know how big it is (i.e. how much data we should memcpy),
1325   // we have no choice but to ask malloc to realloc for us.
1326   auto const n = static_cast<BigNode*>(ptr) - 1;
1327   size_t cap = 0;
1328   BigNode* newNode = nullptr;
1329   if (n->nbytes >= newsize + sizeof(BigNode)) {
1330     newNode = n;
1331   } else {
1332     newNode = static_cast<BigNode*>(
1333       heapAlloc(newsize + sizeof(BigNode),cap)
1334     );
1335     memcpy(newNode, ptr, n->nbytes);
1336     newNode->nbytes = cap;
1337   }
1338   if (newNode != n) {
1339     m_bigs[newNode->index] = newNode;
1340     freeBig(n);
1341   }
1342   return {newNode + 1, n->nbytes - sizeof(BigNode)};
1343 }
1344
1345 void* ContiguousHeap::heapAlloc(size_t nbytes, size_t &cap) {
1346   if (UNLIKELY(!m_base)) {
1347     // Lazy allocation of heap
1348     createRequestHeap();
1349   }
1350
1351   void* ptr = nullptr;
1352   auto alignedSize = (nbytes + s_pageSize - 1) & ~(s_pageSize - 1);
1353
1354   // freeList is address ordered first fit
1355   auto prev = &m_freeList;
1356   auto cur = m_freeList.next;
1357   while (cur != nullptr ) {
1358     if (cur->size >= alignedSize &&
1359         cur->size < alignedSize + kMaxSmartSize) {
1360       // found freed heap node that fits allocation and doesn't need to split
1361       ptr = cur;
1362       prev->next = cur->next;
1363       cap = cur->size;
1364       return ptr;
1365     }
1366     if (cur->size > alignedSize) {
1367       // split free heap node
1368       prev->next = reinterpret_cast<FreeNode*>(((char*)cur) + alignedSize);
1369       prev->next->next = cur->next;
1370       prev->next->size = cur->size - alignedSize;
1371       ptr = cur;
1372       cap = alignedSize;
1373       return ptr;
1374     }
1375     prev = cur;
1376     cur = cur->next;
1377   }
1378   ptr = (void*)m_used;
1379   m_used += alignedSize;
1380   cap = alignedSize;
1381   if (UNLIKELY(m_used > m_end)) {
1382     always_assert_flog(0,
1383         "Heap address space exhausted\nbase:{}\nend:{}\nused{}",
1384         m_base, m_end, m_used);
1385     // Throw exception when t4840214 is fixed
1386     // throw FatalErrorException("Request heap out of memory");
1387   } else if (UNLIKELY(m_used > m_OOMMarker)) {
1388     setSurpriseFlag(MemExceededFlag);
1389   }
1390   return ptr;
1391 }
1392
1393 void ContiguousHeap::createRequestHeap() {
1394   // convert to bytes
1395   m_contiguousHeapSize = RuntimeOption::HeapSizeMB * 1024 * 1024;
1396
1397   if (( m_base = (char*)mmap(NULL,
1398                             m_contiguousHeapSize,
1399                             PROT_WRITE | PROT_READ,
1400                             s_mmapFlags,
1401                             -1,
1402                             0)) != MAP_FAILED) {
1403     m_used = m_base;
1404   } else {
1405     always_assert_flog(0, "Heap Creation Failed");
1406   }
1407   m_end = m_base + m_contiguousHeapSize;
1408   m_peak = m_base;
1409   m_OOMMarker = m_end - (m_contiguousHeapSize/2);
1410   m_freeList.next = nullptr;
1411   m_freeList.size = 0;
1412 }
1413
1414 ContiguousHeap::~ContiguousHeap(){
1415   flush();
1416 }
1417 }