Add sub-controls for Hack array compat runtime checks
[hiphop-php.git] / hphp / runtime / base / memory-manager.cpp
blob09cbc78b72f4a9ded5a66c274ddeab09a1e3c6fc
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/base/memory-manager.h"
18 #include <algorithm>
19 #include <cstdint>
20 #include <limits>
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/exceptions.h"
24 #include "hphp/runtime/base/ini-setting.h"
25 #include "hphp/runtime/base/runtime-option.h"
26 #include "hphp/runtime/base/stack-logger.h"
27 #include "hphp/runtime/base/surprise-flags.h"
28 #include "hphp/runtime/base/sweepable.h"
29 #include "hphp/runtime/base/thread-info.h"
30 #include "hphp/runtime/base/req-root.h"
31 #include "hphp/runtime/base/heap-graph.h"
32 #include "hphp/runtime/server/http-server.h"
34 #include "hphp/util/alloc.h"
35 #include "hphp/util/logger.h"
36 #include "hphp/util/process.h"
37 #include "hphp/util/ptr-map.h"
38 #include "hphp/util/struct-log.h"
39 #include "hphp/util/timer.h"
40 #include "hphp/util/trace.h"
42 #include <folly/CPortability.h>
43 #include <folly/Random.h>
44 #include <folly/ScopeGuard.h>
45 #include <folly/portability/SysMman.h>
46 #include <folly/portability/Unistd.h>
48 #include "hphp/runtime/base/memory-manager-defs.h"
50 namespace HPHP {
52 const unsigned kInvalidSweepIndex = 0xffffffff;
53 __thread bool tl_sweeping;
54 THREAD_LOCAL_FLAT(MemoryManager, tl_heap);
55 __thread size_t tl_heap_id; // thread's current heap instance id
57 TRACE_SET_MOD(mm);
59 //////////////////////////////////////////////////////////////////////
61 std::atomic<MemoryManager::ReqProfContext*>
62 MemoryManager::s_trigger{nullptr};
64 bool MemoryManager::s_statsEnabled = false;
66 static std::atomic<size_t> s_heap_id; // global counter of heap instances
68 #ifdef USE_JEMALLOC
69 static size_t threadAllocatedpMib[2];
70 static size_t threadDeallocatedpMib[2];
71 static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
73 void MemoryManager::threadStatsInit() {
74 if (!mallctlnametomib) return;
75 size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
76 if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
77 return;
79 miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
80 if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
81 return;
83 MemoryManager::s_statsEnabled = true;
86 inline
87 void MemoryManager::threadStats(uint64_t*& allocated, uint64_t*& deallocated) {
88 pthread_once(&threadStatsOnce, threadStatsInit);
89 if (!MemoryManager::s_statsEnabled) return;
91 size_t len = sizeof(allocated);
92 if (mallctlbymib(threadAllocatedpMib,
93 sizeof(threadAllocatedpMib) / sizeof(size_t),
94 &allocated, &len, nullptr, 0)) {
95 not_reached();
98 len = sizeof(deallocated);
99 if (mallctlbymib(threadDeallocatedpMib,
100 sizeof(threadDeallocatedpMib) / sizeof(size_t),
101 &deallocated, &len, nullptr, 0)) {
102 not_reached();
105 #endif
107 MemoryManager::MemoryManager() {
108 #ifdef USE_JEMALLOC
109 threadStats(m_allocated, m_deallocated);
110 #endif
111 FTRACE(1, "heap-id {} new MM pid {}\n", tl_heap_id, getpid());
112 resetAllStats();
113 setMemoryLimit(std::numeric_limits<int64_t>::max());
114 resetGC(); // so each thread has unique req_num at startup
115 // make the circular-lists empty.
116 m_strings.next = m_strings.prev = &m_strings;
117 m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
118 m_req_start_micros = HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
119 IniSetting::Bind(IniSetting::CORE, IniSetting::PHP_INI_ALL, "zend.enable_gc",
120 &m_gc_enabled);
123 MemoryManager::~MemoryManager() {
124 FTRACE(1, "heap-id {} ~MM\n", tl_heap_id);
125 // TODO(T20916887): Enable this for one-bit refcounting.
126 if (debug && !one_bit_refcount) {
127 // Check that every object in the heap is free.
128 forEachHeapObject([&](HeapObject* h, size_t) {
129 assert_flog(h->kind() == HeaderKind::Free,
130 "{} still live in ~MemoryManager()",
131 header_names[size_t(h->kind())]);
134 // ~SparseHeap releases its slabs/bigs.
137 void MemoryManager::resetRuntimeOptions() {
138 if (debug) checkHeap("resetRuntimeOptions");
139 void* mem = this;
140 this->~MemoryManager();
141 new (mem) MemoryManager();
144 void MemoryManager::traceStats(const char* event) {
145 FTRACE(1, "heap-id {} {} ", tl_heap_id, event);
146 if (use_jemalloc) {
147 FTRACE(1, "mm-usage {} extUsage {} ",
148 m_stats.mmUsage(), m_stats.extUsage);
149 FTRACE(1, "capacity {} peak usage {} peak capacity {} ",
150 m_stats.capacity(), m_stats.peakUsage, m_stats.peakCap);
151 FTRACE(1, "total {} reset alloc-dealloc {} cur alloc-dealloc {}\n",
152 m_stats.totalAlloc, m_resetAllocated - m_resetDeallocated,
153 *m_allocated - *m_deallocated);
154 } else {
155 FTRACE(1, "usage: {} capacity: {} peak usage: {} peak capacity: {}\n",
156 m_stats.usage(), m_stats.capacity(),
157 m_stats.peakUsage, m_stats.peakCap);
161 // Reset all memory stats counters, both internal and external; intended to
162 // be used between requests when the whole heap is being reset.
163 void MemoryManager::resetAllStats() {
164 traceStats("resetAllStats pre");
165 m_statsIntervalActive = false;
166 m_stats.mm_debt = 0;
167 m_stats.mm_allocated = 0;
168 m_stats.mm_freed = 0;
169 m_stats.extUsage = 0;
170 m_stats.malloc_cap = 0;
171 m_stats.mmap_cap = 0;
172 m_stats.mmap_volume = 0;
173 m_stats.peakUsage = 0;
174 m_stats.peakCap = 0;
175 m_stats.totalAlloc = 0;
176 m_stats.peakIntervalUsage = 0;
177 m_stats.peakIntervalCap = 0;
178 if (m_bypassSlabAlloc) {
179 totalSmallAllocs.insert(totalSmallAllocs.begin(),
180 totalSmallAllocs.size(), 0);
181 currentSmallAllocs.insert(currentSmallAllocs.begin(),
182 currentSmallAllocs.size(), 0);
184 m_enableStatsSync = false;
185 if (Trace::enabled) tl_heap_id = ++s_heap_id;
186 if (s_statsEnabled) {
187 m_resetDeallocated = *m_deallocated;
188 m_resetAllocated = *m_allocated;
190 traceStats("resetAllStats post");
193 // Reset external allocation counters, but preserve MemoryManager counters.
194 // The effect of this call is simply to ignore anything we've done *outside*
195 // the MemoryManager allocator after we initialized, to avoid attributing
196 // shared structure initialization that happens during hphp_thread_init()
197 // to this session. Intended to be used once per request, early in the request
198 // lifetime before PHP execution begins.
199 void MemoryManager::resetExternalStats() {
200 traceStats("resetExternalStats pre");
201 // extUsage and totalAlloc are only set by refreshStatsImpl, which we don't
202 // enable until after this has been called.
203 assert(m_enableStatsSync ||
204 (m_stats.extUsage == 0 && m_stats.totalAlloc == 0));
205 m_enableStatsSync = s_statsEnabled; // false if !use_jemalloc
206 if (s_statsEnabled) {
207 m_resetDeallocated = *m_deallocated;
208 m_resetAllocated = *m_allocated - m_stats.malloc_cap;
209 // By subtracting malloc_cap here, the next call to refreshStatsImpl()
210 // will correctly include m_stats.malloc_cap in extUsage and totalAlloc.
212 traceStats("resetExternalStats post");
215 void MemoryManager::refreshStatsHelperExceeded() {
216 setSurpriseFlag(MemExceededFlag);
217 m_couldOOM = false;
218 if (RuntimeOption::LogNativeStackOnOOM) {
219 log_native_stack("Exceeded memory limit");
223 void MemoryManager::setMemThresholdCallback(size_t threshold) {
224 m_memThresholdCallbackPeakUsage = threshold;
228 * Refresh stats to reflect directly malloc()ed memory, and determine
229 * whether the request memory limit has been exceeded.
231 * The stats parameter allows the updates to be applied to either
232 * m_stats as in refreshStats() or to a separate MemoryUsageStats
233 * struct as in getStatsCopy().
235 * The template variable live controls whether or not MemoryManager
236 * member variables are updated and whether or not to call helper
237 * methods in response to memory anomalies.
239 void MemoryManager::refreshStatsImpl(MemoryUsageStats& stats) {
240 // Incrementally incorporate the difference between the previous and current
241 // deltas into the memory usage statistic. For reference, the total
242 // malloced memory usage could be calculated as such, if delta0 were
243 // recorded in resetAllStats():
245 // int64 musage = delta - delta0;
247 // Note however, the slab allocator adds to m_stats.malloc_cap
248 // when it calls malloc(), so that this function can avoid
249 // double-counting the malloced memory. Thus musage in the example
250 // code may well substantially exceed m_stats.usage.
251 if (m_enableStatsSync) {
252 // We can't currently handle wrapping so make sure this isn't happening.
253 assert(*m_allocated <= uint64_t(std::numeric_limits<int64_t>::max()));
254 assert(*m_deallocated <= uint64_t(std::numeric_limits<int64_t>::max()));
255 const int64_t curAllocated = *m_allocated;
256 const int64_t curDeallocated = *m_deallocated;
258 // Since these deltas potentially include memory allocated from another
259 // thread but deallocated on this one, it is possible for these numbers to
260 // go negative.
261 auto curUsage = curAllocated - curDeallocated;
262 auto resetUsage = m_resetAllocated - m_resetDeallocated;
264 FTRACE(1, "heap-id {} Before stats sync: ", tl_heap_id);
265 FTRACE(1, "reset alloc-dealloc {} cur alloc-dealloc: {} alloc-change: {} ",
266 resetUsage, curUsage, curAllocated - m_resetAllocated);
267 FTRACE(1, "dealloc-change: {} ", curDeallocated - m_resetDeallocated);
268 FTRACE(1, "mm usage {} extUsage {} totalAlloc {} capacity {}\n",
269 stats.mmUsage(), stats.extUsage, stats.totalAlloc, stats.capacity());
271 // External usage (allocated-deallocated) since the last resetStats().
272 stats.extUsage = curUsage - resetUsage;
274 // Calculate the allocation volume since the last reset.
275 // We need to do the calculation instead of just setting it to curAllocated
276 // because of the MaskAlloc capability, which updates m_resetAllocated.
278 // stats.mmap_volume is only used for mmap'd heap space; any malloc'd
279 // space is included in curAllocated.
280 stats.totalAlloc = curAllocated - m_resetAllocated + stats.mmap_volume;
281 FTRACE(1, "heap-id {} after sync extUsage {} totalAlloc: {}\n",
282 tl_heap_id, stats.extUsage, stats.totalAlloc);
284 assert(m_usageLimit > 0);
285 auto usage = stats.usage();
286 stats.peakUsage = std::max(stats.peakUsage, usage);
287 if (m_statsIntervalActive) {
288 stats.peakIntervalUsage = std::max(stats.peakIntervalUsage, usage);
289 stats.peakIntervalCap = std::max(stats.peakIntervalCap, stats.capacity());
294 * Refresh our internally stored m_stats, then check for OOM and the
295 * memThresholdCallback
297 void MemoryManager::refreshStats() {
298 refreshStatsImpl(m_stats);
299 auto usage = m_stats.usage();
300 if (usage > m_usageLimit && m_couldOOM) {
301 refreshStatsHelperExceeded();
303 if (usage > m_memThresholdCallbackPeakUsage) {
304 m_memThresholdCallbackPeakUsage = SIZE_MAX;
305 setSurpriseFlag(MemThresholdFlag);
310 * Calculate how many bytes of allocation should happen before the next
311 * time the fast path is interrupted.
313 void MemoryManager::updateMMDebt()
314 // TODO: T26068998 fix signed-integer-overflow undefined behavior
315 FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER("signed-integer-overflow") {
316 auto const new_debt = m_nextGC - m_stats.mmUsage();
317 m_stats.mm_allocated = m_stats.mm_allocated - m_stats.mm_debt + new_debt;
318 m_stats.mm_debt = new_debt;
321 void MemoryManager::sweep() {
322 assert(!sweeping());
323 tl_sweeping = true;
324 DEBUG_ONLY size_t num_sweepables = 0, num_natives = 0;
326 // iterate until both sweep lists are empty. Entries can be added or
327 // removed from either list during sweeping.
328 do {
329 while (!m_sweepables.empty()) {
330 num_sweepables++;
331 auto obj = m_sweepables.next();
332 obj->unregister();
333 obj->sweep();
335 while (!m_natives.empty()) {
336 num_natives++;
337 assert(m_natives.back()->sweep_index == m_natives.size() - 1);
338 auto node = m_natives.back();
339 m_natives.pop_back();
340 auto obj = Native::obj(node);
341 auto ndi = obj->getVMClass()->getNativeDataInfo();
342 ndi->sweep(obj);
343 // trash the native data but leave the header and object parsable
344 assert(memset(node+1, kSmallFreeFill, node->obj_offset - sizeof(*node)));
346 } while (!m_sweepables.empty());
348 DEBUG_ONLY auto napcs = m_apc_arrays.size();
349 FTRACE(1, "heap-id {} sweep: sweepable {} native {} apc array {}\n",
350 tl_heap_id,
351 num_sweepables,
352 num_natives,
353 napcs);
355 // decref apc arrays referenced by this request. This must happen here
356 // (instead of in resetAllocator), because the sweep routine may use
357 // g_context.
358 while (!m_apc_arrays.empty()) {
359 auto a = m_apc_arrays.back();
360 m_apc_arrays.pop_back();
361 a->sweep();
362 if (debug) a->m_sweep_index = kInvalidSweepIndex;
365 if (debug) checkHeap("after MM::sweep");
368 void MemoryManager::resetAllocator() {
369 assert(m_natives.empty() && m_sweepables.empty() && tl_sweeping);
370 // decref apc strings referenced by this request
371 DEBUG_ONLY auto nstrings = StringData::sweepAll();
372 FTRACE(1, "heap-id {} resetAllocator: strings {}\n", tl_heap_id, nstrings);
374 // free the heap
375 m_heap.reset();
377 // zero out freelists
378 for (auto& list : m_freelists) list.head = nullptr;
379 m_front = m_limit = 0;
380 tl_sweeping = false;
381 m_exiting = false;
382 if (StructuredLog::coinflip(RuntimeOption::TotalAllocSampleF)) {
383 publishStats("total", totalSmallAllocs, RuntimeOption::TotalAllocSampleF);
385 resetAllStats();
386 setGCEnabled(RuntimeOption::EvalEnableGC);
387 resetGC();
388 if (debug) resetEagerGC();
391 void MemoryManager::flush() {
392 always_assert(empty());
393 m_heap.flush();
394 m_apc_arrays = std::vector<APCLocalArray*>();
395 m_natives = std::vector<NativeNode*>();
396 m_root_handles = std::vector<req::root_handle*>{};
400 * req::malloc & friends implementation notes
402 * There are three kinds of allocations:
404 * a) Big allocations. (size >= kMaxSmallSize)
406 * In this case we behave as a wrapper around the normal libc
407 * malloc/free. We insert a MallocNode header at the front of the
408 * allocation in order to find these at sweep time (end of
409 * request) so we can give them back to libc.
411 * b) Size-tracked small allocations.
413 * This is used for the generic case, for callers who can't tell
414 * us the size of the allocation at free time.
416 * In this situation, we put a MallocNode header at the front of
417 * the block that tells us the size for when we need to free it
418 * later. We differentiate this from a MallocNode using the size
419 * field in either structure (they overlap at the same address).
421 * c) Size-untracked small allocation
423 * Many callers have an easy time telling you how big the object
424 * was when they need to free it. In this case we can avoid the
425 * MallocNode, which saves us some memory and also let's us give
426 * out 16-byte aligned pointers easily.
428 * We know when we have one of these because it has to be freed
429 * through a different entry point. (E.g. tl_heap->freeSmallSize() or
430 * tl_heap->freeBigSize().)
432 * When small blocks are freed (case b and c), they're placed in the
433 * appropriate size-segregated freelist. Large blocks are immediately
434 * passed back to libc via free.
436 * There are currently two kinds of freelist entries: entries where
437 * there is already a valid MallocNode on the list (case b), and
438 * entries where there isn't (case c). The reason for this is that
439 * that way, when allocating for case b, you don't need to store the
440 * MallocNode size again. Much of the heap is going through case b at
441 * the time of this writing, so it is a measurable regression to try
442 * to just combine the free lists, but presumably we can move more to
443 * case c and combine the lists eventually.
446 const std::array<char*,NumHeaderKinds> header_names = {{
447 "PackedArray", "MixedArray", "EmptyArray", "ApcArray",
448 "GlobalsArray", "DictArray", "VecArray", "KeysetArray",
449 "String", "Resource", "Ref",
450 "Object", "NativeObject", "WaitHandle", "AsyncFuncWH", "AwaitAllWH",
451 "Closure", "Vector", "Map", "Set", "Pair", "ImmVector", "ImmMap", "ImmSet",
452 "AsyncFuncFrame", "NativeData", "ClosureHdr",
453 "SmallMalloc", "BigMalloc", "BigObj",
454 "Free", "Hole", "Slab"
457 // initialize a Hole header in the unused memory between m_front and m_limit
458 void MemoryManager::initHole(void* ptr, uint32_t size) {
459 FreeNode::InitFrom(ptr, size, HeaderKind::Hole);
462 void MemoryManager::initFree() {
463 if ((char*)m_front < (char*)m_limit) {
464 initHole(m_front, (char*)m_limit - (char*)m_front);
465 Slab::fromPtr(m_front)->setStart(m_front);
467 m_heap.sort();
468 reinitFree();
471 void MemoryManager::reinitFree() {
472 for (auto i = 0; i < kNumSmallSizes; i++) {
473 auto size = sizeIndex2Size(i);
474 auto n = m_freelists[i].head;
475 for (; n && n->kind() != HeaderKind::Free; n = n->next) {
476 n->initHeader_32(HeaderKind::Free, size);
478 if (debug) {
479 // ensure the freelist tail is already initialized.
480 for (; n; n = n->next) {
481 assert(n->kind() == HeaderKind::Free && n->size() == size);
487 MemoryManager::FreelistArray MemoryManager::beginQuarantine() {
488 FreelistArray list;
489 for (auto i = 0; i < kNumSmallSizes; ++i) {
490 list[i].head = m_freelists[i].head;
491 m_freelists[i].head = nullptr;
493 return list;
496 // turn free blocks into holes, restore original freelists
497 void MemoryManager::endQuarantine(FreelistArray&& list) {
498 for (auto i = 0; i < kNumSmallSizes; i++) {
499 auto size = sizeIndex2Size(i);
500 while (auto n = m_freelists[i].likelyPop()) {
501 memset(n, 0x8a, size);
502 initHole(n, size);
504 m_freelists[i].head = list[i].head;
505 list[i].head = nullptr;
509 // test iterating objects in slabs
510 void MemoryManager::checkHeap(const char* phase) {
511 size_t bytes=0;
512 std::vector<HeapObject*> hdrs;
513 PtrMap<HeapObject*> free_blocks, apc_arrays, apc_strings;
514 size_t counts[NumHeaderKinds];
515 for (unsigned i=0; i < NumHeaderKinds; i++) counts[i] = 0;
516 forEachHeapObject([&](HeapObject* h, size_t alloc_size) {
517 hdrs.push_back(h);
518 bytes += alloc_size;
519 auto kind = h->kind();
520 counts[(int)kind]++;
521 switch (kind) {
522 case HeaderKind::Free:
523 free_blocks.insert(h, alloc_size);
524 break;
525 case HeaderKind::Apc:
526 if (static_cast<APCLocalArray*>(h)->m_sweep_index !=
527 kInvalidSweepIndex) {
528 apc_arrays.insert(h, alloc_size);
530 break;
531 case HeaderKind::String:
532 if (static_cast<StringData*>(h)->isProxy()) {
533 apc_strings.insert(h, alloc_size);
535 break;
536 case HeaderKind::Packed:
537 case HeaderKind::Mixed:
538 case HeaderKind::Dict:
539 case HeaderKind::Empty:
540 case HeaderKind::VecArray:
541 case HeaderKind::Keyset:
542 case HeaderKind::Globals:
543 case HeaderKind::Object:
544 case HeaderKind::NativeObject:
545 case HeaderKind::WaitHandle:
546 case HeaderKind::AsyncFuncWH:
547 case HeaderKind::AwaitAllWH:
548 case HeaderKind::Closure:
549 case HeaderKind::Vector:
550 case HeaderKind::Map:
551 case HeaderKind::Set:
552 case HeaderKind::Pair:
553 case HeaderKind::ImmVector:
554 case HeaderKind::ImmMap:
555 case HeaderKind::ImmSet:
556 case HeaderKind::Resource:
557 case HeaderKind::Ref:
558 case HeaderKind::AsyncFuncFrame:
559 case HeaderKind::NativeData:
560 case HeaderKind::ClosureHdr:
561 case HeaderKind::Cpp:
562 case HeaderKind::SmallMalloc:
563 case HeaderKind::BigMalloc:
564 break;
565 case HeaderKind::BigObj:
566 case HeaderKind::Hole:
567 case HeaderKind::Slab:
568 assert(false && "forEachHeapObject skips these kinds");
569 break;
573 // check the free lists
574 free_blocks.prepare();
575 size_t num_free_blocks = 0;
576 for (auto i = 0; i < kNumSmallSizes; i++) {
577 for (auto n = m_freelists[i].head; n; n = n->next) {
578 assert(free_blocks.isStart(n));
579 ++num_free_blocks;
582 assert(num_free_blocks == free_blocks.size());
584 // check the apc array list
585 assert(apc_arrays.size() == m_apc_arrays.size());
586 apc_arrays.prepare();
587 for (UNUSED auto a : m_apc_arrays) {
588 assert(apc_arrays.isStart(a));
591 // check the apc string list
592 size_t num_apc_strings = 0;
593 apc_strings.prepare();
594 for (StringDataNode *next, *n = m_strings.next; n != &m_strings; n = next) {
595 next = n->next;
596 UNUSED auto const s = StringData::node2str(n);
597 assert(s->isProxy());
598 assert(apc_strings.isStart(s));
599 ++num_apc_strings;
601 assert(num_apc_strings == apc_strings.size());
603 // heap check is done. If we are not exiting, check pointers using HeapGraph
604 if (Trace::moduleEnabled(Trace::heapreport)) {
605 auto g = makeHeapGraph(true /* include free blocks */);
606 if (!exiting()) checkPointers(g, phase);
607 if (Trace::moduleEnabled(Trace::heapreport, 2)) {
608 printHeapReport(g, phase);
613 // Filling the start bits one word at a time requires writing the mask for
614 // the appropriate size class, and left-shifting the mask each time to insert
615 // any necessary zeros not included in the previous word, for size classes
616 // that don't pack perfectly into 64 bits. Suppose:
618 // d = size / kSmallSizeAlign = number of bits for the given size class
620 // In other words, when d%64 != 0, we need to shift the mask slightly after
621 // each store, until the shift amount wraps. For example, using 8-bit words
622 // for brevity, the sequence of stores would be:
624 // 11111111 11111111 11111111 11111111 size=16 d=1 shift 0,0,0,0
625 // 10101010 10101010 10101010 10101010 size=32 d=2 shift 0,0,0,0
626 // 10010010 01001001 00100100 10010010 size=48 d=3 shift 0,1,2,0
627 // 10001000 10001000 10001000 10001000 size=64 d=4 shift 0,0,0,0
628 // 10000100 00100001 00001000 01000010 00010000 size=80 d=5 shift 0,2,4,1,3,0
629 // 10000010 00001000 00100000 10000010 size=96 d=6 shift 0,4,2,0
630 // 10000001 00000010 00000100 00001000 00010000 00100000 01000000 10000001
631 // size=112 d=7 shift 0,6,5,4,3,2,1,0
632 // 10000000 10000000 size=128 d=8 shift 0,0
634 // build a bitmask-init table for size classes that fit at least one
635 // object per 64*kSmallSizeAlign bytes; this means they fit at least
636 // one start bit per 64 bits, supporting fast nContig initialization.
638 // masks_[i] = bitmask to store each time
639 std::array<uint64_t,Slab::kNumMasks> Slab::masks_;
641 // shifts_[i] = how much to shift masks_[i] after each store
642 std::array<uint8_t,Slab::kNumMasks> Slab::shifts_;
644 struct Slab::InitMasks {
645 InitMasks() {
646 static_assert(kSizeIndex2Size[kNumMasks - 1] <= 64 * kSmallSizeAlign, "");
647 for (size_t i = 0; i < kNumMasks; i++) {
648 auto const d = kSizeIndex2Size[i] / kSmallSizeAlign;
649 for (size_t j = 0; j < 64; j += d) {
650 masks_[i] |= 1ull << j;
652 shifts_[i] = d - 64 % d; // # of high-order zeros not in mask
657 namespace {
659 Slab::InitMasks s_init_masks;
661 using FreelistArray = MemoryManager::FreelistArray;
663 alignas(64) constexpr size_t kContigTab[] = {
664 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
665 ncontig * kSizeIndex2Size[index],
666 SIZE_CLASSES
667 #undef SIZE_CLASS
670 alignas(64) const uint8_t kContigIndexTab[] = {
671 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
672 (uint8_t)std::max(size_t(index+1),\
673 MemoryManager::size2Index(kContigTab[index])),
674 SIZE_CLASSES
675 #undef SIZE_CLASS
679 * Store slab tail bytes (if any) in freelists.
681 inline
682 void storeTail(FreelistArray& freelists, void* tail, size_t tailBytes,
683 Slab* slab) {
684 void* rem = tail;
685 for (auto remBytes = tailBytes; remBytes > 0;) {
686 auto fragBytes = remBytes;
687 assert(fragBytes >= kSmallSizeAlign);
688 assert((fragBytes & kSmallSizeAlignMask) == 0);
689 auto fragInd = MemoryManager::size2Index(fragBytes + 1) - 1;
690 auto fragUsable = MemoryManager::sizeIndex2Size(fragInd);
691 auto frag = FreeNode::InitFrom((char*)rem + remBytes - fragUsable,
692 fragUsable, HeaderKind::Hole);
693 FTRACE(4, "storeTail({}, {}): rem={}, remBytes={}, "
694 "frag={}, fragBytes={}, fragUsable={}, fragInd={}\n", tail,
695 (void*)uintptr_t(tailBytes), rem, (void*)uintptr_t(remBytes),
696 frag, (void*)uintptr_t(fragBytes), (void*)uintptr_t(fragUsable),
697 fragInd);
698 freelists[fragInd].push(frag);
699 slab->setStart(frag);
700 remBytes -= fragUsable;
705 * Create split_bytes worth of contiguous regions, each of size splitUsable,
706 * and store them in the appropriate freelist. In addition, initialize the
707 * start-bits for the new objects.
709 inline
710 void splitTail(FreelistArray& freelists, void* tail, size_t tailBytes,
711 size_t split_bytes, size_t splitUsable, size_t index,
712 Slab* slab) {
713 assert(tailBytes >= kSmallSizeAlign);
714 assert((tailBytes & kSmallSizeAlignMask) == 0);
715 assert((splitUsable & kSmallSizeAlignMask) == 0);
716 assert(split_bytes <= tailBytes);
718 // initialize the free objects, and push them onto the freelist.
719 auto head = freelists[index].head;
720 auto rem = (char*)tail + split_bytes;
721 for (auto next = rem - splitUsable; next >= tail; next -= splitUsable) {
722 auto split = FreeNode::InitFrom(next, splitUsable, HeaderKind::Hole);
723 FTRACE(4, "MemoryManager::splitTail(tail={}, tailBytes={}, tailPast={}): "
724 "split={}, splitUsable={}\n", tail,
725 (void*)uintptr_t(tailBytes), (void*)(uintptr_t(tail) + tailBytes),
726 split, splitUsable);
727 head = FreeNode::UninitFrom(split, head);
729 freelists[index].head = head;
731 // initialize the start-bits for each object.
732 slab->setStarts(tail, rem, splitUsable, index);
734 auto remBytes = tailBytes - split_bytes;
735 assert(uintptr_t(rem) + remBytes == uintptr_t(tail) + tailBytes);
736 storeTail(freelists, rem, remBytes, slab);
741 * Get a new slab, then allocate nbytes from it and install it in our
742 * slab list. Return the newly allocated nbytes-sized block.
744 NEVER_INLINE void* MemoryManager::newSlab(size_t nbytes) {
745 refreshStats();
746 if (m_front < m_limit) {
747 storeTail(m_freelists, m_front, (char*)m_limit - (char*)m_front,
748 Slab::fromPtr(m_front));
750 auto mem = m_heap.allocSlab(m_stats);
751 always_assert(reinterpret_cast<uintptr_t>(mem) % kSlabAlign == 0);
752 auto slab = static_cast<Slab*>(mem);
753 auto slab_start = slab->init();
754 m_front = slab_start + nbytes; // allocate requested object
755 // we can't use any space after slab->end() even if the allocator allows
756 // (indiciated by mem.size), because of the fixed-sized crossing map.
757 m_limit = slab->end();
758 FTRACE(3, "newSlab: adding slab at {} to limit {}\n", slab_start, m_limit);
759 slab->setStart(slab_start);
760 return slab_start;
764 * Allocate `bytes' from the current slab, aligned to kSmallSizeAlign.
766 inline void* MemoryManager::slabAlloc(size_t nbytes, size_t index) {
767 FTRACE(3, "slabAlloc({}, {}): m_front={}, m_limit={}\n", nbytes, index,
768 m_front, m_limit);
769 assert(nbytes == sizeIndex2Size(index));
770 assert(nbytes <= kSlabSize);
771 assert((uintptr_t(m_front) & kSmallSizeAlignMask) == 0);
773 if (UNLIKELY(m_bypassSlabAlloc)) {
774 totalSmallAllocs.resize(kNumSmallSizes, 0);
775 currentSmallAllocs.resize(kNumSmallSizes, 0);
776 ++totalSmallAllocs[index];
777 ++currentSmallAllocs[index];
778 if (StructuredLog::coinflip(RuntimeOption::PerAllocSampleF)) {
779 publishStats("current", currentSmallAllocs,
780 RuntimeOption::PerAllocSampleF);
783 // Stats correction; mallocBigSize() updates m_stats. Add to mm_debt rather
784 // than adding to mm_freed because we're adjusting for double-counting, not
785 // actually freeing anything.
786 m_stats.mm_debt += nbytes;
787 return mallocBigSize<Unzeroed>(nbytes);
790 auto ptr = m_front;
791 auto next = (void*)(uintptr_t(ptr) + nbytes);
792 Slab* slab;
793 if (uintptr_t(next) <= uintptr_t(m_limit)) {
794 m_front = next;
795 slab = Slab::fromPtr(ptr);
796 slab->setStart(ptr);
797 } else {
798 ptr = newSlab(nbytes); // sets start bit at ptr
799 slab = Slab::fromPtr(ptr);
801 // Preallocate more of the same in order to amortize entry into this method.
802 auto split_bytes = kContigTab[index] - nbytes;
803 auto avail = uintptr_t(m_limit) - uintptr_t(m_front);
804 if (UNLIKELY(split_bytes > avail)) {
805 split_bytes = avail - avail % nbytes; // Expensive division.
807 if (split_bytes > 0) {
808 auto tail = m_front;
809 m_front = (void*)(uintptr_t(tail) + split_bytes);
810 splitTail(m_freelists, tail, split_bytes, split_bytes, nbytes, index, slab);
812 FTRACE(4, "slabAlloc({}, {}) --> ptr={}, m_front={}, m_limit={}\n", nbytes,
813 index, ptr, m_front, m_limit);
814 return ptr;
817 NEVER_INLINE
818 void* MemoryManager::mallocSmallIndexSlow(size_t bytes, size_t index) {
819 checkGC();
820 updateMMDebt();
821 return mallocSmallIndexTail(bytes, index);
824 void* MemoryManager::mallocSmallSizeSlow(size_t nbytes, size_t index) {
825 assert(nbytes == sizeIndex2Size(index));
826 assert(!m_freelists[index].head); // freelist[index] is empty
827 size_t contigInd = kContigIndexTab[index];
828 for (auto i = contigInd; i < kNumSmallSizes; ++i) {
829 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): contigMin={}, "
830 "contigInd={}, try i={}\n", nbytes, index, kContigTab[index],
831 contigInd, i);
832 if (auto p = m_freelists[i].unlikelyPop()) {
833 assert(i > index); // because freelist[index] was empty
834 assert(Slab::fromPtr(p)->isStart(p));
835 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): "
836 "contigMin={}, contigInd={}, use i={}, size={}, p={}\n",
837 nbytes, index, kContigTab[index], contigInd, i,
838 sizeIndex2Size(i), p);
839 // Split tail into preallocations and store them back into freelists.
840 splitTail(m_freelists, (char*)p + nbytes, sizeIndex2Size(i) - nbytes,
841 kContigTab[index] - nbytes, nbytes, index, Slab::fromPtr(p));
842 return p;
846 // No available free list items; carve new space from the current slab.
847 return slabAlloc(nbytes, index);
850 inline void MemoryManager::updateBigStats() {
851 // If we are using jemalloc, it is keeping track of allocations outside of
852 // the slabs and the usage so we should force this after an allocation that
853 // was too large for one of the existing slabs. When we're not using jemalloc
854 // this check won't do anything so avoid the extra overhead.
855 if (debug) requestEagerGC();
856 refreshStats();
859 template<MemoryManager::MBS Mode> NEVER_INLINE
860 void* MemoryManager::mallocBigSize(size_t bytes, HeaderKind kind,
861 type_scan::Index ty) {
862 if (debug) tl_heap->requestEagerGC();
863 auto ptr = Mode == Zeroed ? m_heap.callocBig(bytes, kind, ty, m_stats) :
864 m_heap.allocBig(bytes, kind, ty, m_stats);
865 updateBigStats();
866 checkGC();
867 FTRACE(3, "mallocBigSize: {} ({} requested)\n", ptr, bytes);
868 return ptr;
871 template NEVER_INLINE
872 void* MemoryManager::mallocBigSize<MemoryManager::Unzeroed>(
873 size_t, HeaderKind, type_scan::Index
875 template NEVER_INLINE
876 void* MemoryManager::mallocBigSize<MemoryManager::Zeroed>(
877 size_t, HeaderKind, type_scan::Index
880 void* MemoryManager::resizeBig(MallocNode* n, size_t nbytes) {
881 assert(n->kind() == HeaderKind::BigMalloc);
882 auto block = m_heap.resizeBig(n + 1, nbytes, m_stats);
883 updateBigStats();
884 return block;
887 NEVER_INLINE
888 void MemoryManager::freeBigSize(void* vp) {
889 FTRACE(3, "freeBigSize: {} ({} bytes)\n", vp,
890 static_cast<MallocNode*>(vp)[-1].nbytes);
891 m_heap.freeBig(vp, m_stats);
894 // req::malloc api entry points, with support for malloc/free corner cases.
895 namespace req {
897 using MBS = MemoryManager::MBS;
899 template<MBS Mode>
900 static void* allocate(size_t nbytes, type_scan::Index ty) {
901 nbytes = std::max(nbytes, size_t(1));
902 auto const npadded = nbytes + sizeof(MallocNode);
903 if (LIKELY(npadded <= kMaxSmallSize)) {
904 auto const ptr = static_cast<MallocNode*>(
905 tl_heap->mallocSmallSize(npadded)
907 ptr->nbytes = npadded;
908 ptr->initHeader_32_16(HeaderKind::SmallMalloc, 0, ty);
909 return Mode == MBS::Zeroed ? memset(ptr + 1, 0, nbytes) : ptr + 1;
911 return tl_heap->mallocBigSize<Mode>(nbytes, HeaderKind::BigMalloc, ty);
914 void* malloc(size_t nbytes, type_scan::Index tyindex) {
915 assert(type_scan::isKnownType(tyindex));
916 return allocate<MBS::Unzeroed>(nbytes, tyindex);
919 void* calloc(size_t count, size_t nbytes, type_scan::Index tyindex) {
920 assert(type_scan::isKnownType(tyindex));
921 return allocate<MBS::Zeroed>(count * nbytes, tyindex);
924 void* malloc_untyped(size_t nbytes) {
925 return tl_heap->mallocBigSize<MBS::Unzeroed>(
926 std::max(nbytes, 1ul),
927 HeaderKind::BigMalloc,
928 type_scan::kIndexUnknown
932 void* calloc_untyped(size_t count, size_t bytes) {
933 return tl_heap->mallocBigSize<MBS::Zeroed>(
934 std::max(count * bytes, 1ul),
935 HeaderKind::BigMalloc,
936 type_scan::kIndexUnknown
940 void* realloc(void* ptr, size_t nbytes, type_scan::Index tyindex) {
941 assert(type_scan::isKnownType(tyindex));
942 if (!ptr) {
943 return allocate<MBS::Unzeroed>(nbytes, tyindex);
945 if (!nbytes) {
946 req::free(ptr);
947 return nullptr;
949 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
950 ptr, nbytes, tyindex);
951 auto const n = static_cast<MallocNode*>(ptr) - 1;
952 assert(n->typeIndex() == tyindex);
953 if (LIKELY(n->kind() == HeaderKind::SmallMalloc) ||
954 UNLIKELY(nbytes + sizeof(MallocNode) <= kMaxSmallSize)) {
955 // either the old or new block will be small; force a copy.
956 auto newmem = allocate<MBS::Unzeroed>(nbytes, tyindex);
957 auto copy_size = std::min(n->nbytes - sizeof(MallocNode), nbytes);
958 newmem = memcpy(newmem, ptr, copy_size);
959 req::free(ptr);
960 return newmem;
962 // it's a big allocation.
963 return tl_heap->resizeBig(n, nbytes);
966 void* realloc_untyped(void* ptr, size_t nbytes) {
967 // first handle corner cases that degenerate to malloc() or free()
968 if (!ptr) {
969 return req::malloc_untyped(nbytes);
971 if (!nbytes) {
972 req::free(ptr);
973 return nullptr;
975 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
976 ptr, nbytes, type_scan::kIndexUnknown);
977 auto const n = static_cast<MallocNode*>(ptr) - 1;
978 assert(n->kind() == HeaderKind::BigMalloc);
979 assert(n->typeIndex() == type_scan::kIndexUnknown);
980 return tl_heap->resizeBig(n, nbytes);
983 char* strndup(const char* str, size_t len) {
984 size_t n = std::min(len, strlen(str));
985 char* ret = reinterpret_cast<char*>(
986 req::malloc(n + 1, type_scan::kIndexUnknownNoPtrs)
988 memcpy(ret, str, n);
989 ret[n] = 0;
990 return ret;
993 void free(void* ptr) {
994 if (!ptr) return;
995 auto const n = static_cast<MallocNode*>(ptr) - 1;
996 if (LIKELY(n->kind() == HeaderKind::SmallMalloc)) {
997 return tl_heap->freeSmallSize(n, n->nbytes);
999 if (n->kind() == HeaderKind::Cpp) {
1000 return tl_heap->objFree(n, n->nbytes);
1002 assert(n->kind() == HeaderKind::BigMalloc);
1003 tl_heap->freeBigSize(ptr);
1006 } // namespace req
1008 //////////////////////////////////////////////////////////////////////
1010 void MemoryManager::addNativeObject(NativeNode* node) {
1011 if (debug) for (DEBUG_ONLY auto n : m_natives) assert(n != node);
1012 node->sweep_index = m_natives.size();
1013 m_natives.push_back(node);
1016 void MemoryManager::removeNativeObject(NativeNode* node) {
1017 assert(node->sweep_index < m_natives.size());
1018 assert(m_natives[node->sweep_index] == node);
1019 auto index = node->sweep_index;
1020 auto last = m_natives.back();
1021 m_natives[index] = last;
1022 m_natives.pop_back();
1023 last->sweep_index = index;
1026 void MemoryManager::addApcArray(APCLocalArray* a) {
1027 a->m_sweep_index = m_apc_arrays.size();
1028 m_apc_arrays.push_back(a);
1031 void MemoryManager::removeApcArray(APCLocalArray* a) {
1032 assert(a->m_sweep_index < m_apc_arrays.size());
1033 assert(m_apc_arrays[a->m_sweep_index] == a);
1034 auto index = a->m_sweep_index;
1035 auto last = m_apc_arrays.back();
1036 m_apc_arrays[index] = last;
1037 m_apc_arrays.pop_back();
1038 last->m_sweep_index = index;
1041 void MemoryManager::addSweepable(Sweepable* obj) {
1042 obj->enlist(&m_sweepables);
1045 // defined here because memory-manager.h includes sweepable.h
1046 Sweepable::Sweepable() {
1047 tl_heap->addSweepable(this);
1050 //////////////////////////////////////////////////////////////////////
1052 void MemoryManager::resetCouldOOM(bool state) {
1053 clearSurpriseFlag(MemExceededFlag);
1054 m_couldOOM = state;
1058 ///////////////////////////////////////////////////////////////////////////////
1059 // Request profiling.
1061 bool MemoryManager::triggerProfiling(const std::string& filename) {
1062 auto trigger = new ReqProfContext();
1063 trigger->flag = true;
1064 trigger->filename = filename;
1066 ReqProfContext* expected = nullptr;
1068 if (!s_trigger.compare_exchange_strong(expected, trigger)) {
1069 delete trigger;
1070 return false;
1072 return true;
1075 void MemoryManager::requestInit() {
1076 tl_heap->m_req_start_micros = HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
1078 // If the trigger has already been claimed, do nothing.
1079 auto trigger = s_trigger.exchange(nullptr);
1080 if (trigger == nullptr) return;
1082 always_assert(tl_heap->empty());
1084 // Initialize the request-local context from the trigger.
1085 auto& profctx = tl_heap->m_profctx;
1086 assert(!profctx.flag);
1088 tl_heap->m_bypassSlabAlloc = true;
1089 profctx = *trigger;
1090 delete trigger;
1092 #ifdef USE_JEMALLOC
1093 // Reset jemalloc stats.
1094 if (mallctlCall("prof.reset", true) != 0) {
1095 return;
1098 // Enable jemalloc thread-local heap dumps.
1099 if (mallctlReadWrite("prof.active", &profctx.prof_active, true, true)
1100 != 0) {
1101 profctx = ReqProfContext{};
1102 return;
1104 if (mallctlReadWrite("thread.prof.active", &profctx.thread_prof_active,
1105 true, true) != 0) {
1106 mallctlWrite("prof.active", profctx.prof_active);
1107 profctx = ReqProfContext{};
1108 return;
1110 #endif
1113 void MemoryManager::requestShutdown() {
1114 auto& profctx = tl_heap->m_profctx;
1116 if (!profctx.flag) return;
1118 #ifdef USE_JEMALLOC
1119 jemalloc_pprof_dump(profctx.filename, true);
1121 mallctlWrite("thread.prof.active", profctx.thread_prof_active);
1122 mallctlWrite("prof.active", profctx.prof_active);
1123 #endif
1125 tl_heap->m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1126 tl_heap->m_memThresholdCallbackPeakUsage = SIZE_MAX;
1127 profctx = ReqProfContext{};
1130 /* static */ void MemoryManager::setupProfiling() {
1131 always_assert(tl_heap->empty());
1132 tl_heap->m_bypassSlabAlloc = true;
1135 /* static */ void MemoryManager::teardownProfiling() {
1136 tl_heap->m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1139 bool MemoryManager::isGCEnabled() {
1140 return m_gc_enabled;
1143 void MemoryManager::setGCEnabled(bool isGCEnabled) {
1144 m_gc_enabled = isGCEnabled;
1145 updateNextGc();
1148 void MemoryManager::publishStats(const char* name,
1149 const std::vector<int64_t> &stats, uint32_t sampleRate) {
1150 if (stats.size() == 0) return;
1151 StructuredLogEntry log;
1152 for (size_t i = 0; i < stats.size(); ++i) {
1153 std::array<char, 32> log_name;
1154 snprintf(&log_name[0], log_name.size(), "%s[%lu]", name,
1155 kSizeIndex2Size[i]);
1156 log.setInt(&log_name[0], stats[i]);
1158 log.setInt("sample_rate", sampleRate);
1159 StructuredLog::log("hhvm_allocs", log);
1162 ///////////////////////////////////////////////////////////////////////////////