EvalEmitDVArray: varray
[hiphop-php.git] / hphp / runtime / base / memory-manager.cpp
blob25e5abb1cd1c252acc57ab42e102444cd015b655
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/base/memory-manager.h"
18 #include <algorithm>
19 #include <cstdint>
20 #include <limits>
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/exceptions.h"
24 #include "hphp/runtime/base/ini-setting.h"
25 #include "hphp/runtime/base/runtime-option.h"
26 #include "hphp/runtime/base/stack-logger.h"
27 #include "hphp/runtime/base/surprise-flags.h"
28 #include "hphp/runtime/base/sweepable.h"
29 #include "hphp/runtime/base/request-info.h"
30 #include "hphp/runtime/base/req-root.h"
31 #include "hphp/runtime/base/heap-graph.h"
32 #include "hphp/runtime/server/http-server.h"
34 #include "hphp/util/alloc.h"
35 #include "hphp/util/logger.h"
36 #include "hphp/util/process.h"
37 #include "hphp/util/ptr-map.h"
38 #include "hphp/util/struct-log.h"
39 #include "hphp/util/timer.h"
40 #include "hphp/util/trace.h"
42 #include <folly/CPortability.h>
43 #include <folly/Random.h>
44 #include <folly/ScopeGuard.h>
45 #include <folly/portability/SysMman.h>
46 #include <folly/portability/Unistd.h>
48 #include "hphp/runtime/base/memory-manager-defs.h"
50 namespace HPHP {
52 const unsigned kInvalidSweepIndex = 0xffffffff;
53 __thread bool tl_sweeping;
54 THREAD_LOCAL_FLAT(MemoryManager, tl_heap);
55 __thread size_t tl_heap_id; // thread's current heap instance id
57 TRACE_SET_MOD(mm);
59 //////////////////////////////////////////////////////////////////////
61 std::atomic<MemoryManager::ReqProfContext*>
62 MemoryManager::s_trigger{nullptr};
64 bool MemoryManager::s_statsEnabled = false;
65 std::atomic<ssize_t> MemoryManager::s_req_heap_usage;
67 static std::atomic<size_t> s_heap_id; // global counter of heap instances
69 #ifdef USE_JEMALLOC
70 static size_t threadAllocatedpMib[2];
71 static size_t threadDeallocatedpMib[2];
72 static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
74 void MemoryManager::threadStatsInit() {
75 if (!mallctlnametomib) return;
76 size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
77 if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
78 return;
80 miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
81 if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
82 return;
84 MemoryManager::s_statsEnabled = true;
87 inline
88 void MemoryManager::threadStats(uint64_t*& allocated, uint64_t*& deallocated) {
89 pthread_once(&threadStatsOnce, threadStatsInit);
90 if (!MemoryManager::s_statsEnabled) return;
92 size_t len = sizeof(allocated);
93 if (mallctlbymib(threadAllocatedpMib,
94 sizeof(threadAllocatedpMib) / sizeof(size_t),
95 &allocated, &len, nullptr, 0)) {
96 not_reached();
99 len = sizeof(deallocated);
100 if (mallctlbymib(threadDeallocatedpMib,
101 sizeof(threadDeallocatedpMib) / sizeof(size_t),
102 &deallocated, &len, nullptr, 0)) {
103 not_reached();
106 #endif
108 MemoryManager::MemoryManager() {
109 #ifdef USE_JEMALLOC
110 threadStats(m_allocated, m_deallocated);
111 #endif
112 rl_gcdata.getCheck();
113 resetAllStats();
114 setMemoryLimit(std::numeric_limits<int64_t>::max());
115 resetGC(); // so each thread has unique req_num at startup
116 // make the circular-lists empty.
117 m_strings.next = m_strings.prev = &m_strings;
118 m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
119 m_req_start_micros = HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
120 IniSetting::Bind(IniSetting::CORE, IniSetting::PHP_INI_ALL, "zend.enable_gc",
121 &m_gc_enabled);
124 MemoryManager::~MemoryManager() {
125 FTRACE(1, "heap-id {} ~MM\n", tl_heap_id);
126 // TODO(T20916887): Enable this for one-bit refcounting.
127 if (debug && !one_bit_refcount) {
128 // Check that every object in the heap is free.
129 forEachHeapObject([&](HeapObject* h, size_t) {
130 assert_flog(h->kind() == HeaderKind::Free,
131 "{} still live in ~MemoryManager()",
132 header_names[size_t(h->kind())]);
135 // ~SparseHeap releases its slabs/bigs.
138 void MemoryManager::resetRuntimeOptions() {
139 if (debug) checkHeap("resetRuntimeOptions");
140 void* mem = this;
141 this->~MemoryManager();
142 new (mem) MemoryManager();
145 void MemoryManager::traceStats(const char* event) {
146 FTRACE(1, "heap-id {} {} ", tl_heap_id, event);
147 if (use_jemalloc) {
148 FTRACE(1, "mm-usage {} extUsage {} ",
149 m_stats.mmUsage(), m_stats.extUsage);
150 FTRACE(1, "capacity {} peak usage {} peak capacity {} ",
151 m_stats.capacity(), m_stats.peakUsage, m_stats.peakCap);
152 FTRACE(1, "total {} reset alloc-dealloc {} cur alloc-dealloc {}\n",
153 m_stats.totalAlloc, m_resetAllocated - m_resetDeallocated,
154 *m_allocated - *m_deallocated);
155 } else {
156 FTRACE(1, "usage: {} capacity: {} peak usage: {} peak capacity: {}\n",
157 m_stats.usage(), m_stats.capacity(),
158 m_stats.peakUsage, m_stats.peakCap);
162 // Reset all memory stats counters, both internal and external; intended to
163 // be used between requests when the whole heap is being reset.
164 void MemoryManager::resetAllStats() {
165 traceStats("resetAllStats pre");
166 m_statsIntervalActive = false;
167 m_stats.mm_udebt = 0;
168 m_stats.mm_uallocated = 0;
169 m_stats.mm_freed = 0;
170 m_stats.extUsage = 0;
171 m_stats.malloc_cap = 0;
172 m_stats.mmap_cap = 0;
173 m_stats.mmap_volume = 0;
174 m_stats.peakUsage = 0;
175 m_stats.peakCap = 0;
176 m_stats.totalAlloc = 0;
177 m_stats.peakIntervalUsage = 0;
178 m_stats.peakIntervalCap = 0;
179 m_enableStatsSync = false;
181 // Reset this memory managers portion of the bytes used across all memory
182 // managers.
183 s_req_heap_usage.fetch_sub(m_lastUsage, std::memory_order_relaxed);
184 m_lastUsage = 0;
186 if (Trace::enabled) tl_heap_id = ++s_heap_id;
187 if (s_statsEnabled) {
188 m_resetDeallocated = *m_deallocated;
189 m_resetAllocated = *m_allocated;
191 traceStats("resetAllStats post");
194 // Reset external allocation counters, but preserve MemoryManager counters.
195 // The effect of this call is simply to ignore anything we've done *outside*
196 // the MemoryManager allocator after we initialized, to avoid attributing
197 // shared structure initialization that happens during hphp_thread_init()
198 // to this session. Intended to be used once per request, early in the request
199 // lifetime before PHP execution begins.
200 void MemoryManager::resetExternalStats() {
201 traceStats("resetExternalStats pre");
202 // extUsage and totalAlloc are only set by refreshStatsImpl, which we don't
203 // enable until after this has been called.
204 assertx(m_enableStatsSync ||
205 (m_stats.extUsage == 0 && m_stats.totalAlloc == 0));
206 m_enableStatsSync = s_statsEnabled; // false if !use_jemalloc
207 if (s_statsEnabled) {
208 m_resetDeallocated = *m_deallocated;
209 m_resetAllocated = *m_allocated - m_stats.malloc_cap;
210 // By subtracting malloc_cap here, the next call to refreshStatsImpl()
211 // will correctly include m_stats.malloc_cap in extUsage and totalAlloc.
213 traceStats("resetExternalStats post");
216 void MemoryManager::refreshStatsHelperExceeded() {
217 setSurpriseFlag(MemExceededFlag);
218 m_couldOOM = false;
219 if (RuntimeOption::LogNativeStackOnOOM) {
220 log_native_stack("Exceeded memory limit");
224 void MemoryManager::setMemThresholdCallback(size_t threshold) {
225 m_memThresholdCallbackPeakUsage = threshold;
229 * Refresh stats to reflect directly malloc()ed memory, and determine
230 * whether the request memory limit has been exceeded.
232 * The stats parameter allows the updates to be applied to either
233 * m_stats as in refreshStats() or to a separate MemoryUsageStats
234 * struct as in getStatsCopy().
236 void MemoryManager::refreshStatsImpl(MemoryUsageStats& stats) {
237 // Incrementally incorporate the difference between the previous and current
238 // deltas into the memory usage statistic. For reference, the total
239 // malloced memory usage could be calculated as such, if delta0 were
240 // recorded in resetAllStats():
242 // int64 musage = delta - delta0;
244 // Note however, the slab allocator adds to m_stats.malloc_cap
245 // when it calls malloc(), so that this function can avoid
246 // double-counting the malloced memory. Thus musage in the example
247 // code may well substantially exceed m_stats.usage.
248 if (m_enableStatsSync) {
249 // We can't currently handle wrapping so make sure this isn't happening.
250 assertx(*m_allocated <= uint64_t(std::numeric_limits<int64_t>::max()));
251 assertx(*m_deallocated <= uint64_t(std::numeric_limits<int64_t>::max()));
252 const int64_t curAllocated = *m_allocated;
253 const int64_t curDeallocated = *m_deallocated;
255 // Since these deltas potentially include memory allocated from another
256 // thread but deallocated on this one, it is possible for these numbers to
257 // go negative.
258 auto curUsage = curAllocated - curDeallocated;
259 auto resetUsage = m_resetAllocated - m_resetDeallocated;
261 FTRACE(1, "heap-id {} Before stats sync: ", tl_heap_id);
262 FTRACE(1, "reset alloc-dealloc {} cur alloc-dealloc: {} alloc-change: {} ",
263 resetUsage, curUsage, curAllocated - m_resetAllocated);
264 FTRACE(1, "dealloc-change: {} ", curDeallocated - m_resetDeallocated);
265 FTRACE(1, "mm usage {} extUsage {} totalAlloc {} capacity {}\n",
266 stats.mmUsage(), stats.extUsage, stats.totalAlloc, stats.capacity());
268 // External usage (allocated-deallocated) since the last resetStats().
269 stats.extUsage = curUsage - resetUsage;
271 // Calculate the allocation volume since the last reset.
272 // We need to do the calculation instead of just setting it to curAllocated
273 // because of the MaskAlloc capability, which updates m_resetAllocated.
275 // stats.mmap_volume is only used for mmap'd heap space; any malloc'd
276 // space is included in curAllocated.
277 stats.totalAlloc = curAllocated - m_resetAllocated + stats.mmap_volume;
278 FTRACE(1, "heap-id {} after sync extUsage {} totalAlloc: {}\n",
279 tl_heap_id, stats.extUsage, stats.totalAlloc);
281 assertx(m_usageLimit > 0);
282 auto usage = stats.usage();
283 stats.peakUsage = std::max(stats.peakUsage, usage);
284 if (m_statsIntervalActive) {
285 stats.peakIntervalUsage = std::max(stats.peakIntervalUsage, usage);
286 stats.peakIntervalCap = std::max(stats.peakIntervalCap, stats.capacity());
291 * Refresh our internally stored m_stats, then check for OOM and the
292 * memThresholdCallback
294 void MemoryManager::refreshStats() {
295 refreshStatsImpl(m_stats);
296 auto usage = m_stats.usage();
297 s_req_heap_usage.fetch_add(usage - m_lastUsage, std::memory_order_relaxed);
298 m_lastUsage = usage;
299 if (usage > m_usageLimit && m_couldOOM) {
300 refreshStatsHelperExceeded();
301 } else if (usage > m_memThresholdCallbackPeakUsage) {
302 m_memThresholdCallbackPeakUsage = SIZE_MAX;
303 setSurpriseFlag(MemThresholdFlag);
307 void MemoryManager::recordStats(StructuredLogEntry& entry) {
308 auto const stats = getStatsCopy();
309 entry.ints["mem-peak-usage"] = stats.peakUsage;
310 entry.ints["mem-peak-capacity"] = stats.peakCap;
311 entry.ints["mem-total-alloc"] = stats.totalAlloc;
315 * Calculate how many bytes of allocation should happen before the next
316 * time the fast path is interrupted.
318 void MemoryManager::updateMMDebt() {
319 const int64_t delta_sample = m_nextSample - m_stats.mmAllocated();
320 const int64_t delta_gc = m_nextGC - m_stats.mmUsage();
321 auto const delta = static_cast<uint64_t>(std::min(delta_sample, delta_gc));
322 auto const new_debt = delta > std::numeric_limits<int64_t>::max() ? 0 : delta;
323 m_stats.mm_uallocated += new_debt - m_stats.mm_udebt;
324 m_stats.mm_udebt = new_debt;
327 void MemoryManager::sweep() {
328 assertx(!sweeping());
329 tl_sweeping = true;
330 DEBUG_ONLY size_t num_sweepables = 0, num_natives = 0;
332 // iterate until both sweep lists are empty. Entries can be added or
333 // removed from either list during sweeping.
334 do {
335 while (!m_sweepables.empty()) {
336 num_sweepables++;
337 auto obj = m_sweepables.next();
338 obj->unregister();
339 obj->sweep();
341 while (!m_natives.empty()) {
342 num_natives++;
343 assertx(m_natives.back()->sweep_index == m_natives.size() - 1);
344 auto node = m_natives.back();
345 m_natives.pop_back();
346 auto obj = Native::obj(node);
347 auto ndi = obj->getVMClass()->getNativeDataInfo();
348 ndi->sweep(obj);
349 // trash the native data but leave the header and object parsable
350 assertx(memset(node+1, kSmallFreeFill, node->obj_offset - sizeof(*node)));
352 } while (!m_sweepables.empty());
354 DEBUG_ONLY auto napcs = m_apc_arrays.size();
355 FTRACE(1, "heap-id {} sweep: sweepable {} native {} apc array {}\n",
356 tl_heap_id,
357 num_sweepables,
358 num_natives,
359 napcs);
361 // decref apc arrays referenced by this request. This must happen here
362 // (instead of in resetAllocator), because the sweep routine may use
363 // g_context.
364 while (!m_apc_arrays.empty()) {
365 auto a = m_apc_arrays.back();
366 m_apc_arrays.pop_back();
367 a->sweep();
368 if (debug) a->m_sweep_index = kInvalidSweepIndex;
371 if (debug) checkHeap("after MM::sweep");
374 void MemoryManager::resetAllocator() {
375 assertx(m_natives.empty() && m_sweepables.empty() && tl_sweeping);
376 // decref apc strings referenced by this request
377 DEBUG_ONLY auto nstrings = StringData::sweepAll();
378 FTRACE(1, "heap-id {} resetAllocator: strings {}\n", tl_heap_id, nstrings);
380 // free the heap
381 m_heap.reset();
383 // zero out freelists
384 for (auto& list : m_freelists) list.head = nullptr;
385 m_front = m_limit = 0;
386 tl_sweeping = false;
387 m_exiting = false;
388 resetAllStats();
389 setGCEnabled(RuntimeOption::EvalEnableGC);
390 resetGC();
391 if (debug) resetEagerGC();
394 void MemoryManager::flush() {
395 always_assert(empty());
396 m_heap.flush();
397 m_apc_arrays = std::vector<APCLocalArray*>();
398 m_natives = std::vector<NativeNode*>();
399 m_root_handles = std::vector<req::root_handle*>{};
403 * req::malloc & friends implementation notes
405 * There are three kinds of allocations:
407 * a) Big allocations. (size >= kMaxSmallSize)
409 * In this case we behave as a wrapper around the normal libc
410 * malloc/free. We insert a MallocNode header at the front of the
411 * allocation in order to find these at sweep time (end of
412 * request) so we can give them back to libc.
414 * b) Size-tracked small allocations.
416 * This is used for the generic case, for callers who can't tell
417 * us the size of the allocation at free time.
419 * In this situation, we put a MallocNode header at the front of
420 * the block that tells us the size for when we need to free it
421 * later. We differentiate this from a MallocNode using the size
422 * field in either structure (they overlap at the same address).
424 * c) Size-untracked small allocation
426 * Many callers have an easy time telling you how big the object
427 * was when they need to free it. In this case we can avoid the
428 * MallocNode, which saves us some memory and also let's us give
429 * out 16-byte aligned pointers easily.
431 * We know when we have one of these because it has to be freed
432 * through a different entry point. (E.g. tl_heap->freeSmallSize() or
433 * tl_heap->freeBigSize().)
435 * When small blocks are freed (case b and c), they're placed in the
436 * appropriate size-segregated freelist. Large blocks are immediately
437 * passed back to libc via free.
439 * There are currently two kinds of freelist entries: entries where
440 * there is already a valid MallocNode on the list (case b), and
441 * entries where there isn't (case c). The reason for this is that
442 * that way, when allocating for case b, you don't need to store the
443 * MallocNode size again. Much of the heap is going through case b at
444 * the time of this writing, so it is a measurable regression to try
445 * to just combine the free lists, but presumably we can move more to
446 * case c and combine the lists eventually.
449 const std::array<char*,NumHeaderKinds> header_names = {{
450 "PackedArray", "MixedArray", "EmptyArray", "ApcArray", "GlobalsArray",
451 "RecordArray", "DictArray", "VecArray", "KeysetArray",
452 "String", "Resource", "ClsMeth", "Record",
453 "Object", "NativeObject", "WaitHandle", "AsyncFuncWH", "AwaitAllWH",
454 "Closure", "Vector", "Map", "Set", "Pair", "ImmVector", "ImmMap", "ImmSet",
455 "AsyncFuncFrame", "NativeData", "ClosureHdr", "MemoData", "Cpp",
456 "SmallMalloc", "BigMalloc",
457 "Free", "Hole", "Slab"
460 // initialize a Hole header in the unused memory between m_front and m_limit
461 void MemoryManager::initHole(void* ptr, uint32_t size) {
462 FreeNode::InitFrom(ptr, size, HeaderKind::Hole);
465 void MemoryManager::initFree() {
466 if ((char*)m_front < (char*)m_limit) {
467 initHole(m_front, (char*)m_limit - (char*)m_front);
468 Slab::fromPtr(m_front)->setStart(m_front);
470 reinitFree();
473 void MemoryManager::reinitFree() {
474 for (size_t i = 0, e = m_freelists.size(); i < e; i++) {
475 auto size = sizeIndex2Size(i);
476 auto n = m_freelists[i].head;
477 for (; n && n->kind() != HeaderKind::Free; n = n->next) {
478 n->initHeader_32(HeaderKind::Free, size);
480 if (debug) {
481 // ensure the freelist tail is already initialized.
482 for (; n; n = n->next) {
483 assertx(n->kind() == HeaderKind::Free && n->size() == size);
489 MemoryManager::FreelistArray MemoryManager::beginQuarantine() {
490 FreelistArray list;
491 for (size_t i = 0, n = list.size(); i < n; ++i) {
492 list[i].head = m_freelists[i].head;
493 m_freelists[i].head = nullptr;
495 return list;
498 // turn free blocks into holes, restore original freelists
499 void MemoryManager::endQuarantine(FreelistArray&& list) {
500 for (size_t i = 0, n = list.size(); i < n; i++) {
501 auto size = sizeIndex2Size(i);
502 while (auto n = m_freelists[i].likelyPop()) {
503 memset(n, 0x8a, size);
504 initHole(n, size);
506 m_freelists[i].head = list[i].head;
507 list[i].head = nullptr;
511 // test iterating objects in slabs
512 void MemoryManager::checkHeap(const char* phase) {
513 size_t bytes=0;
514 std::vector<HeapObject*> hdrs;
515 PtrMap<HeapObject*> free_blocks, apc_arrays, apc_strings;
516 size_t counts[NumHeaderKinds];
517 for (unsigned i=0; i < NumHeaderKinds; i++) counts[i] = 0;
518 forEachHeapObject([&](HeapObject* h, size_t alloc_size) {
519 hdrs.push_back(h);
520 bytes += alloc_size;
521 auto kind = h->kind();
522 counts[(int)kind]++;
523 switch (kind) {
524 case HeaderKind::Free:
525 free_blocks.insert(h, alloc_size);
526 break;
527 case HeaderKind::Apc:
528 if (static_cast<APCLocalArray*>(h)->m_sweep_index !=
529 kInvalidSweepIndex) {
530 apc_arrays.insert(h, alloc_size);
532 break;
533 case HeaderKind::String:
534 if (static_cast<StringData*>(h)->isProxy()) {
535 apc_strings.insert(h, alloc_size);
537 break;
538 case HeaderKind::Packed:
539 case HeaderKind::Mixed:
540 case HeaderKind::Dict:
541 case HeaderKind::Empty:
542 case HeaderKind::VecArray:
543 case HeaderKind::Keyset:
544 case HeaderKind::Globals:
545 case HeaderKind::Object:
546 case HeaderKind::NativeObject:
547 case HeaderKind::WaitHandle:
548 case HeaderKind::AsyncFuncWH:
549 case HeaderKind::AwaitAllWH:
550 case HeaderKind::Closure:
551 case HeaderKind::Vector:
552 case HeaderKind::Map:
553 case HeaderKind::Set:
554 case HeaderKind::Pair:
555 case HeaderKind::ImmVector:
556 case HeaderKind::ImmMap:
557 case HeaderKind::ImmSet:
558 case HeaderKind::Resource:
559 case HeaderKind::ClsMeth:
560 case HeaderKind::AsyncFuncFrame:
561 case HeaderKind::NativeData:
562 case HeaderKind::ClosureHdr:
563 case HeaderKind::MemoData:
564 case HeaderKind::Cpp:
565 case HeaderKind::SmallMalloc:
566 case HeaderKind::BigMalloc:
567 case HeaderKind::Record:
568 case HeaderKind::RecordArray:
569 break;
570 case HeaderKind::Hole:
571 case HeaderKind::Slab:
572 assertx(false && "forEachHeapObject skips these kinds");
573 break;
577 // check the free lists
578 free_blocks.prepare();
579 size_t num_free_blocks = 0;
580 for (auto& list : m_freelists) {
581 for (auto n = list.head; n; n = n->next) {
582 assertx(free_blocks.isStart(n));
583 ++num_free_blocks;
586 assertx(num_free_blocks == free_blocks.size());
588 // check the apc array list
589 assertx(apc_arrays.size() == m_apc_arrays.size());
590 apc_arrays.prepare();
591 for (UNUSED auto a : m_apc_arrays) {
592 assertx(apc_arrays.isStart(a));
595 // check the apc string list
596 size_t num_apc_strings = 0;
597 apc_strings.prepare();
598 for (StringDataNode *next, *n = m_strings.next; n != &m_strings; n = next) {
599 next = n->next;
600 UNUSED auto const s = StringData::node2str(n);
601 assertx(s->isProxy());
602 assertx(apc_strings.isStart(s));
603 ++num_apc_strings;
605 assertx(num_apc_strings == apc_strings.size());
607 // heap check is done. If we are not exiting, check pointers using HeapGraph
608 if (Trace::moduleEnabled(Trace::heapreport)) {
609 auto g = makeHeapGraph(true /* include free blocks */);
610 if (!exiting()) checkPointers(g, phase);
611 if (Trace::moduleEnabled(Trace::heapreport, 2)) {
612 printHeapReport(g, phase);
617 // Filling the start bits one word at a time requires writing the mask for
618 // the appropriate size class, and left-shifting the mask each time to insert
619 // any necessary zeros not included in the previous word, for size classes
620 // that don't pack perfectly into 64 bits. Suppose:
622 // d = size / kSmallSizeAlign = number of bits for the given size class
624 // In other words, when d%64 != 0, we need to shift the mask slightly after
625 // each store, until the shift amount wraps. For example, using 8-bit words
626 // for brevity, the sequence of stores would be:
628 // 11111111 11111111 11111111 11111111 size=16 d=1 shift 0,0,0,0
629 // 10101010 10101010 10101010 10101010 size=32 d=2 shift 0,0,0,0
630 // 10010010 01001001 00100100 10010010 size=48 d=3 shift 0,1,2,0
631 // 10001000 10001000 10001000 10001000 size=64 d=4 shift 0,0,0,0
632 // 10000100 00100001 00001000 01000010 00010000 size=80 d=5 shift 0,2,4,1,3,0
633 // 10000010 00001000 00100000 10000010 size=96 d=6 shift 0,4,2,0
634 // 10000001 00000010 00000100 00001000 00010000 00100000 01000000 10000001
635 // size=112 d=7 shift 0,6,5,4,3,2,1,0
636 // 10000000 10000000 size=128 d=8 shift 0,0
638 // build a bitmask-init table for size classes that fit at least one
639 // object per 64*kSmallSizeAlign bytes; this means they fit at least
640 // one start bit per 64 bits, supporting fast nContig initialization.
642 // masks_[i] = bitmask to store each time
643 std::array<uint64_t,Slab::kNumMasks> Slab::masks_;
645 // shifts_[i] = how much to shift masks_[i] after each store
646 std::array<uint8_t,Slab::kNumMasks> Slab::shifts_;
648 struct Slab::InitMasks {
649 InitMasks() {
650 static_assert(kSizeIndex2Size[kNumMasks - 1] <= 64 * kSmallSizeAlign, "");
651 for (size_t i = 0; i < kNumMasks; i++) {
652 auto const d = kSizeIndex2Size[i] / kSmallSizeAlign;
653 for (size_t j = 0; j < 64; j += d) {
654 masks_[i] |= 1ull << j;
656 shifts_[i] = d - 64 % d; // # of high-order zeros not in mask
661 namespace {
663 Slab::InitMasks s_init_masks;
665 using FreelistArray = MemoryManager::FreelistArray;
667 alignas(64) constexpr size_t kContigTab[] = {
668 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
669 ncontig * kSizeIndex2Size[index],
670 SIZE_CLASSES
671 #undef SIZE_CLASS
674 alignas(64) const uint8_t kContigIndexTab[] = {
675 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
676 (uint8_t)std::max(size_t(index+1),\
677 MemoryManager::size2Index(kContigTab[index])),
678 SIZE_CLASSES
679 #undef SIZE_CLASS
683 * Store slab tail bytes (if any) in freelists.
685 inline
686 void storeTail(FreelistArray& freelists, void* tail, size_t tailBytes,
687 Slab* slab) {
688 void* rem = tail;
689 for (auto remBytes = tailBytes; remBytes > 0;) {
690 auto fragBytes = remBytes;
691 assertx(fragBytes >= kSmallSizeAlign);
692 assertx((fragBytes & kSmallSizeAlignMask) == 0);
693 auto fragInd = MemoryManager::size2Index(fragBytes + 1) - 1;
694 auto fragUsable = MemoryManager::sizeIndex2Size(fragInd);
695 auto frag = FreeNode::InitFrom((char*)rem + remBytes - fragUsable,
696 fragUsable, HeaderKind::Hole);
697 FTRACE(4, "storeTail({}, {}): rem={}, remBytes={}, "
698 "frag={}, fragBytes={}, fragUsable={}, fragInd={}\n", tail,
699 (void*)uintptr_t(tailBytes), rem, (void*)uintptr_t(remBytes),
700 frag, (void*)uintptr_t(fragBytes), (void*)uintptr_t(fragUsable),
701 fragInd);
702 freelists[fragInd].push(frag);
703 slab->setStart(frag);
704 remBytes -= fragUsable;
709 * Create split_bytes worth of contiguous regions, each of size splitUsable,
710 * and store them in the appropriate freelist. In addition, initialize the
711 * start-bits for the new objects.
713 inline
714 void splitTail(FreelistArray& freelists, void* tail, size_t tailBytes,
715 size_t split_bytes, size_t splitUsable, size_t index,
716 Slab* slab) {
717 assertx(tailBytes >= kSmallSizeAlign);
718 assertx((tailBytes & kSmallSizeAlignMask) == 0);
719 assertx((splitUsable & kSmallSizeAlignMask) == 0);
720 assertx(split_bytes <= tailBytes);
722 // initialize the free objects, and push them onto the freelist.
723 auto head = freelists[index].head;
724 auto rem = (char*)tail + split_bytes;
725 for (auto next = rem - splitUsable; next >= tail; next -= splitUsable) {
726 auto split = FreeNode::InitFrom(next, splitUsable, HeaderKind::Hole);
727 FTRACE(4, "MemoryManager::splitTail(tail={}, tailBytes={}, tailPast={}): "
728 "split={}, splitUsable={}\n", tail,
729 (void*)uintptr_t(tailBytes), (void*)(uintptr_t(tail) + tailBytes),
730 split, splitUsable);
731 head = FreeNode::UninitFrom(split, head);
733 freelists[index].head = head;
735 // initialize the start-bits for each object.
736 slab->setStarts(tail, rem, splitUsable, index);
738 auto remBytes = tailBytes - split_bytes;
739 assertx(uintptr_t(rem) + remBytes == uintptr_t(tail) + tailBytes);
740 storeTail(freelists, rem, remBytes, slab);
745 * Get a new slab, then allocate nbytes from it and install it in our
746 * slab list. Return the newly allocated nbytes-sized block.
748 NEVER_INLINE void* MemoryManager::newSlab(size_t nbytes) {
749 refreshStats();
750 if (m_front < m_limit) {
751 storeTail(m_freelists, m_front, (char*)m_limit - (char*)m_front,
752 Slab::fromPtr(m_front));
754 auto mem = m_heap.allocSlab(m_stats);
755 always_assert(reinterpret_cast<uintptr_t>(mem) % kSlabAlign == 0);
756 auto slab = static_cast<Slab*>(mem);
757 auto slab_start = slab->init();
758 m_front = slab_start + nbytes; // allocate requested object
759 // we can't use any space after slab->end() even if the allocator allows
760 // (indiciated by mem.size), because of the fixed-sized crossing map.
761 m_limit = slab->end();
762 assertx(m_front <= m_limit);
763 FTRACE(3, "newSlab: adding slab at {} to limit {}\n", slab_start, m_limit);
764 slab->setStart(slab_start);
765 return slab_start;
769 * Allocate `bytes' from the current slab, aligned to kSmallSizeAlign.
771 inline void* MemoryManager::slabAlloc(size_t nbytes, size_t index) {
772 FTRACE(3, "slabAlloc({}, {}): m_front={}, m_limit={}\n", nbytes, index,
773 m_front, m_limit);
774 assertx(nbytes == sizeIndex2Size(index));
775 assertx((uintptr_t(m_front) & kSmallSizeAlignMask) == 0);
777 auto ptr = m_front;
778 auto next = (void*)(uintptr_t(ptr) + nbytes);
779 Slab* slab;
780 if (uintptr_t(next) <= uintptr_t(m_limit)) {
781 m_front = next;
782 slab = Slab::fromPtr(ptr);
783 slab->setStart(ptr);
784 } else {
785 if (UNLIKELY(index >= kNumSmallSizes) || UNLIKELY(m_bypassSlabAlloc)) {
786 // Stats correction; mallocBigSize() updates m_stats. Add to mm_udebt
787 // rather than mm_freed because we're adjusting for double-counting, not
788 // actually freeing anything.
789 m_stats.mm_udebt += nbytes;
790 return mallocBigSize(nbytes);
792 ptr = newSlab(nbytes); // sets start bit at ptr
793 slab = Slab::fromPtr(ptr);
795 // Preallocate more of the same in order to amortize entry into this method.
796 auto split_bytes = kContigTab[index] - nbytes;
797 auto avail = uintptr_t(m_limit) - uintptr_t(m_front);
798 if (UNLIKELY(split_bytes > avail)) {
799 split_bytes = avail - avail % nbytes; // Expensive division.
801 if (split_bytes > 0) {
802 auto tail = m_front;
803 m_front = (void*)(uintptr_t(tail) + split_bytes);
804 splitTail(m_freelists, tail, split_bytes, split_bytes, nbytes, index, slab);
806 FTRACE(4, "slabAlloc({}, {}) --> ptr={}, m_front={}, m_limit={}\n", nbytes,
807 index, ptr, m_front, m_limit);
808 return ptr;
811 NEVER_INLINE
812 void* MemoryManager::mallocSmallSizeSlow(size_t nbytes, size_t index) {
813 assertx(nbytes == sizeIndex2Size(index));
815 if (UNLIKELY(m_stats.mm_udebt > std::numeric_limits<int64_t>::max())) {
816 checkSampling(nbytes);
817 // Must be here to check gc; might still have free objects.
818 checkGC();
819 updateMMDebt();
820 auto clamped = std::min(index, kNumSmallSizes);
821 if (auto p = m_freelists[clamped].unlikelyPop()) {
822 FTRACE(3, "mallocSmallSizeSlow: check gc {} -> {}\n", nbytes, p);
823 return p;
827 size_t contigInd = kContigIndexTab[index];
828 for (auto i = contigInd; i < kNumSmallSizes; ++i) {
829 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): contigMin={}, "
830 "contigInd={}, try i={}\n", nbytes, index, kContigTab[index],
831 contigInd, i);
832 if (auto p = m_freelists[i].unlikelyPop()) {
833 assertx(i > index); // because freelist[index] was empty
834 assertx(Slab::fromPtr(p)->isStart(p));
835 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): "
836 "contigMin={}, contigInd={}, use i={}, size={}, p={}\n",
837 nbytes, index, kContigTab[index], contigInd, i,
838 sizeIndex2Size(i), p);
839 // Split tail into preallocations and store them back into freelists.
840 splitTail(m_freelists, (char*)p + nbytes, sizeIndex2Size(i) - nbytes,
841 kContigTab[index] - nbytes, nbytes, index, Slab::fromPtr(p));
842 return p;
846 // No available free list items; carve new space from the current slab.
847 return slabAlloc(nbytes, index);
850 inline void MemoryManager::updateBigStats() {
851 // If we are using jemalloc, it is keeping track of allocations outside of
852 // the slabs and the usage so we should force this after an allocation that
853 // was too large for one of the existing slabs. When we're not using jemalloc
854 // this check won't do anything so avoid the extra overhead.
855 if (debug) requestEagerGC();
856 refreshStats();
859 NEVER_INLINE
860 void MemoryManager::freeSmallIndexSlow(void* ptr, size_t index, size_t bytes) {
861 if (UNLIKELY(index >= kNumSmallSizes) || UNLIKELY(m_bypassSlabAlloc)) {
862 return freeBigSize(ptr);
864 // copy of FreeList::push() fast path when head == nullptr
865 m_freelists[index].head = FreeNode::UninitFrom(ptr, nullptr);
866 m_stats.mm_freed += bytes;
869 NEVER_INLINE
870 void* MemoryManager::mallocBigSize(size_t bytes, bool zero) {
871 if (debug) tl_heap->requestEagerGC();
872 auto ptr = m_heap.allocBig(bytes, zero, m_stats);
873 updateBigStats();
874 checkSampling(bytes);
875 checkGC();
876 FTRACE(3, "mallocBigSize: {} ({} requested)\n", ptr, bytes);
877 return ptr;
880 MallocNode* MemoryManager::reallocBig(MallocNode* n, size_t nbytes) {
881 assertx(n->kind() == HeaderKind::BigMalloc);
882 auto n2 = static_cast<MallocNode*>(
883 m_heap.resizeBig(n, nbytes, m_stats)
885 n2->nbytes = nbytes;
886 updateBigStats();
887 return n2;
890 NEVER_INLINE
891 void MemoryManager::freeBigSize(void* vp) {
892 FTRACE(3, "freeBigSize: {}\n", vp);
893 m_heap.freeBig(vp, m_stats);
896 // req::malloc api entry points, with support for malloc/free corner cases.
897 namespace req {
899 static void* allocate(size_t nbytes, bool zero, type_scan::Index ty) {
900 nbytes = std::max(nbytes, size_t(1));
901 auto const npadded = nbytes + sizeof(MallocNode);
902 if (LIKELY(npadded <= kMaxSmallSize)) {
903 auto const ptr = static_cast<MallocNode*>(
904 tl_heap->mallocSmallSize(npadded)
906 ptr->initHeader_32_16(HeaderKind::SmallMalloc, 0, ty);
907 ptr->nbytes = npadded;
908 return zero ? memset(ptr + 1, 0, nbytes) : ptr + 1;
910 auto const ptr = static_cast<MallocNode*>(
911 tl_heap->mallocBigSize(npadded, zero)
913 ptr->initHeader_32_16(HeaderKind::BigMalloc, 0, ty);
914 ptr->nbytes = npadded;
915 return ptr + 1;
918 void* malloc(size_t nbytes, type_scan::Index tyindex) {
919 assertx(type_scan::isKnownType(tyindex));
920 return allocate(nbytes, false, tyindex);
923 void* calloc(size_t count, size_t nbytes, type_scan::Index tyindex) {
924 assertx(type_scan::isKnownType(tyindex));
925 return allocate(count * nbytes, true, tyindex);
928 void* malloc_untyped(size_t nbytes) {
929 auto n = static_cast<MallocNode*>(
930 tl_heap->mallocBigSize(std::max(nbytes + sizeof(MallocNode), 1ul), false)
932 n->initHeader_32_16(HeaderKind::BigMalloc, 0, type_scan::kIndexUnknown);
933 n->nbytes = nbytes + sizeof(MallocNode);
934 return n + 1;
937 void* calloc_untyped(size_t count, size_t bytes) {
938 auto nbytes = count * bytes + sizeof(MallocNode);
939 auto n = static_cast<MallocNode*>(
940 tl_heap->mallocBigSize(nbytes, true)
942 n->initHeader_32_16(HeaderKind::BigMalloc, 0, type_scan::kIndexUnknown);
943 n->nbytes = nbytes;
944 return n + 1;
947 void* realloc(void* ptr, size_t nbytes, type_scan::Index tyindex) {
948 assertx(type_scan::isKnownType(tyindex));
949 if (!ptr) {
950 return allocate(nbytes, false, tyindex);
952 if (!nbytes) {
953 req::free(ptr);
954 return nullptr;
956 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
957 ptr, nbytes, tyindex);
958 auto const n = static_cast<MallocNode*>(ptr) - 1;
959 assertx(n->typeIndex() == tyindex);
960 auto new_size = nbytes + sizeof(MallocNode);
961 if (LIKELY(n->kind() == HeaderKind::SmallMalloc) ||
962 UNLIKELY(new_size <= kMaxSmallSize)) {
963 // either the old or new block will be small; force a copy.
964 auto newmem = allocate(nbytes, false, tyindex);
965 auto copy_size = std::min(n->nbytes - sizeof(MallocNode), nbytes);
966 newmem = memcpy(newmem, ptr, copy_size);
967 req::free(ptr);
968 return newmem;
970 // it's a big allocation.
971 auto n2 = tl_heap->reallocBig(n, new_size);
972 return n2 + 1;
975 void* realloc_untyped(void* ptr, size_t nbytes) {
976 // first handle corner cases that degenerate to malloc() or free()
977 if (!ptr) {
978 return req::malloc_untyped(nbytes);
980 if (!nbytes) {
981 req::free(ptr);
982 return nullptr;
984 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
985 ptr, nbytes, type_scan::kIndexUnknown);
986 auto const n = static_cast<MallocNode*>(ptr) - 1;
987 assertx(n->kind() == HeaderKind::BigMalloc);
988 assertx(n->typeIndex() == type_scan::kIndexUnknown);
989 auto n2 = tl_heap->reallocBig(n, nbytes + sizeof(MallocNode));
990 return n2 + 1;
993 char* strndup(const char* str, size_t len) {
994 size_t n = std::min(len, strlen(str));
995 char* ret = reinterpret_cast<char*>(
996 req::malloc(n + 1, type_scan::kIndexUnknownNoPtrs)
998 memcpy(ret, str, n);
999 ret[n] = 0;
1000 return ret;
1003 void free(void* ptr) {
1004 if (!ptr) return;
1005 auto const n = static_cast<MallocNode*>(ptr) - 1;
1006 if (LIKELY(n->kind() == HeaderKind::SmallMalloc)) {
1007 return tl_heap->freeSmallSize(n, n->nbytes);
1009 if (n->kind() == HeaderKind::Cpp) {
1010 return tl_heap->objFree(n, n->nbytes);
1012 assertx(n->kind() == HeaderKind::BigMalloc);
1013 tl_heap->freeBigSize(n);
1016 } // namespace req
1018 //////////////////////////////////////////////////////////////////////
1020 void MemoryManager::addNativeObject(NativeNode* node) {
1021 if (debug) for (DEBUG_ONLY auto n : m_natives) assertx(n != node);
1022 node->sweep_index = m_natives.size();
1023 m_natives.push_back(node);
1026 void MemoryManager::removeNativeObject(NativeNode* node) {
1027 assertx(node->sweep_index < m_natives.size());
1028 assertx(m_natives[node->sweep_index] == node);
1029 auto index = node->sweep_index;
1030 auto last = m_natives.back();
1031 m_natives[index] = last;
1032 m_natives.pop_back();
1033 last->sweep_index = index;
1036 void MemoryManager::addApcArray(APCLocalArray* a) {
1037 a->m_sweep_index = m_apc_arrays.size();
1038 m_apc_arrays.push_back(a);
1041 void MemoryManager::removeApcArray(APCLocalArray* a) {
1042 assertx(a->m_sweep_index < m_apc_arrays.size());
1043 assertx(m_apc_arrays[a->m_sweep_index] == a);
1044 auto index = a->m_sweep_index;
1045 auto last = m_apc_arrays.back();
1046 m_apc_arrays[index] = last;
1047 m_apc_arrays.pop_back();
1048 last->m_sweep_index = index;
1051 void MemoryManager::addSweepable(Sweepable* obj) {
1052 obj->enlist(&m_sweepables);
1055 // defined here because memory-manager.h includes sweepable.h
1056 Sweepable::Sweepable() {
1057 tl_heap->addSweepable(this);
1060 //////////////////////////////////////////////////////////////////////
1062 void MemoryManager::resetCouldOOM(bool state) {
1063 clearSurpriseFlag(MemExceededFlag);
1064 m_couldOOM = state;
1068 ///////////////////////////////////////////////////////////////////////////////
1069 // Request profiling.
1071 bool MemoryManager::triggerProfiling(const std::string& filename) {
1072 auto trigger = new ReqProfContext();
1073 trigger->flag = true;
1074 trigger->filename = filename;
1076 ReqProfContext* expected = nullptr;
1078 if (!s_trigger.compare_exchange_strong(expected, trigger)) {
1079 delete trigger;
1080 return false;
1082 return true;
1085 void MemoryManager::requestInit() {
1086 tl_heap->m_req_start_micros = HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
1088 if (RuntimeOption::EvalHeapAllocSampleRequests > 0 &&
1089 RuntimeOption::EvalHeapAllocSampleBytes > 0) {
1090 if (!folly::Random::rand32(RuntimeOption::EvalHeapAllocSampleRequests)) {
1091 tl_heap->m_nextSample =
1092 folly::Random::rand32(RuntimeOption::EvalHeapAllocSampleBytes);
1096 // If the trigger has already been claimed, do nothing.
1097 auto trigger = s_trigger.exchange(nullptr);
1098 if (trigger == nullptr) return;
1100 always_assert(tl_heap->empty());
1102 // Initialize the request-local context from the trigger.
1103 auto& profctx = tl_heap->m_profctx;
1104 assertx(!profctx.flag);
1106 tl_heap->m_bypassSlabAlloc = true;
1107 profctx = *trigger;
1108 delete trigger;
1110 #ifdef USE_JEMALLOC
1111 // Reset jemalloc stats.
1112 if (mallctlCall<true>("prof.reset") != 0) {
1113 return;
1116 // Enable jemalloc thread-local heap dumps.
1117 if (mallctlReadWrite<bool, true>("prof.active", &profctx.prof_active, true)
1118 != 0) {
1119 profctx = ReqProfContext{};
1120 return;
1122 if (mallctlReadWrite<bool, true>("thread.prof.active",
1123 &profctx.thread_prof_active,
1124 true) != 0) {
1125 mallctlWrite("prof.active", profctx.prof_active);
1126 profctx = ReqProfContext{};
1127 return;
1129 #endif
1132 void MemoryManager::requestShutdown() {
1133 if (tl_heap->m_nextSample != kNoNextSample) {
1134 reset_alloc_sampling();
1135 tl_heap->m_nextSample = kNoNextSample;
1138 auto& profctx = tl_heap->m_profctx;
1139 if (!profctx.flag) return;
1141 #ifdef USE_JEMALLOC
1142 jemalloc_pprof_dump(profctx.filename, true);
1144 mallctlWrite("thread.prof.active", profctx.thread_prof_active);
1145 mallctlWrite("prof.active", profctx.prof_active);
1146 #endif
1148 tl_heap->m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1149 tl_heap->m_memThresholdCallbackPeakUsage = SIZE_MAX;
1150 profctx = ReqProfContext{};
1153 /* static */ void MemoryManager::setupProfiling() {
1154 always_assert(tl_heap->empty());
1155 tl_heap->m_bypassSlabAlloc = true;
1158 /* static */ void MemoryManager::teardownProfiling() {
1159 tl_heap->m_bypassSlabAlloc = RuntimeOption::DisableSmallAllocator;
1162 bool MemoryManager::isGCEnabled() {
1163 return m_gc_enabled;
1166 void MemoryManager::setGCEnabled(bool isGCEnabled) {
1167 m_gc_enabled = isGCEnabled;
1168 updateNextGc();
1171 ///////////////////////////////////////////////////////////////////////////////