2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/base/memory-manager.h"
22 #include "hphp/runtime/base/builtin-functions.h"
23 #include "hphp/runtime/base/exceptions.h"
24 #include "hphp/runtime/base/ini-setting.h"
25 #include "hphp/runtime/base/runtime-option.h"
26 #include "hphp/runtime/base/stack-logger.h"
27 #include "hphp/runtime/base/surprise-flags.h"
28 #include "hphp/runtime/base/sweepable.h"
29 #include "hphp/runtime/base/request-info.h"
30 #include "hphp/runtime/base/req-root.h"
31 #include "hphp/runtime/base/heap-graph.h"
32 #include "hphp/runtime/server/http-server.h"
34 #include "hphp/util/alloc.h"
35 #include "hphp/util/logger.h"
36 #include "hphp/util/process.h"
37 #include "hphp/util/ptr-map.h"
38 #include "hphp/util/struct-log.h"
39 #include "hphp/util/timer.h"
40 #include "hphp/util/trace.h"
42 #include <folly/CPortability.h>
43 #include <folly/Random.h>
44 #include <folly/ScopeGuard.h>
45 #include <folly/portability/SysMman.h>
46 #include <folly/portability/Unistd.h>
48 #include "hphp/runtime/base/memory-manager-defs.h"
52 const unsigned kInvalidSweepIndex
= 0xffffffff;
53 __thread
bool tl_sweeping
;
54 THREAD_LOCAL_FLAT(MemoryManager
, tl_heap
);
55 __thread
size_t tl_heap_id
; // thread's current heap instance id
59 //////////////////////////////////////////////////////////////////////
61 std::atomic
<MemoryManager::ReqProfContext
*>
62 MemoryManager::s_trigger
{nullptr};
64 bool MemoryManager::s_statsEnabled
= false;
65 std::atomic
<ssize_t
> MemoryManager::s_req_heap_usage
;
67 static std::atomic
<size_t> s_heap_id
; // global counter of heap instances
70 static size_t threadAllocatedpMib
[2];
71 static size_t threadDeallocatedpMib
[2];
72 static pthread_once_t threadStatsOnce
= PTHREAD_ONCE_INIT
;
74 void MemoryManager::threadStatsInit() {
75 if (!mallctlnametomib
) return;
76 size_t miblen
= sizeof(threadAllocatedpMib
) / sizeof(size_t);
77 if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib
, &miblen
)) {
80 miblen
= sizeof(threadDeallocatedpMib
) / sizeof(size_t);
81 if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib
, &miblen
)) {
84 MemoryManager::s_statsEnabled
= true;
88 void MemoryManager::threadStats(uint64_t*& allocated
, uint64_t*& deallocated
) {
89 pthread_once(&threadStatsOnce
, threadStatsInit
);
90 if (!MemoryManager::s_statsEnabled
) return;
92 size_t len
= sizeof(allocated
);
93 if (mallctlbymib(threadAllocatedpMib
,
94 sizeof(threadAllocatedpMib
) / sizeof(size_t),
95 &allocated
, &len
, nullptr, 0)) {
99 len
= sizeof(deallocated
);
100 if (mallctlbymib(threadDeallocatedpMib
,
101 sizeof(threadDeallocatedpMib
) / sizeof(size_t),
102 &deallocated
, &len
, nullptr, 0)) {
108 MemoryManager::MemoryManager() {
110 threadStats(m_allocated
, m_deallocated
);
112 rl_gcdata
.getCheck();
114 setMemoryLimit(std::numeric_limits
<int64_t>::max());
115 resetGC(); // so each thread has unique req_num at startup
116 // make the circular-lists empty.
117 m_strings
.next
= m_strings
.prev
= &m_strings
;
118 m_bypassSlabAlloc
= RuntimeOption::DisableSmallAllocator
;
119 m_req_start_micros
= HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
120 IniSetting::Bind(IniSetting::CORE
, IniSetting::PHP_INI_ALL
, "zend.enable_gc",
124 MemoryManager::~MemoryManager() {
125 FTRACE(1, "heap-id {} ~MM\n", tl_heap_id
);
126 // TODO(T20916887): Enable this for one-bit refcounting.
127 if (debug
&& !one_bit_refcount
) {
128 // Check that every object in the heap is free.
129 forEachHeapObject([&](HeapObject
* h
, size_t) {
130 assert_flog(h
->kind() == HeaderKind::Free
,
131 "{} still live in ~MemoryManager()",
132 header_names
[size_t(h
->kind())]);
135 // ~SparseHeap releases its slabs/bigs.
138 void MemoryManager::resetRuntimeOptions() {
139 if (debug
) checkHeap("resetRuntimeOptions");
141 this->~MemoryManager();
142 new (mem
) MemoryManager();
145 void MemoryManager::traceStats(const char* event
) {
146 FTRACE(1, "heap-id {} {} ", tl_heap_id
, event
);
148 FTRACE(1, "mm-usage {} extUsage {} ",
149 m_stats
.mmUsage(), m_stats
.extUsage
);
150 FTRACE(1, "capacity {} peak usage {} peak capacity {} ",
151 m_stats
.capacity(), m_stats
.peakUsage
, m_stats
.peakCap
);
152 FTRACE(1, "total {} reset alloc-dealloc {} cur alloc-dealloc {}\n",
153 m_stats
.totalAlloc
, m_resetAllocated
- m_resetDeallocated
,
154 *m_allocated
- *m_deallocated
);
156 FTRACE(1, "usage: {} capacity: {} peak usage: {} peak capacity: {}\n",
157 m_stats
.usage(), m_stats
.capacity(),
158 m_stats
.peakUsage
, m_stats
.peakCap
);
162 // Reset all memory stats counters, both internal and external; intended to
163 // be used between requests when the whole heap is being reset.
164 void MemoryManager::resetAllStats() {
165 traceStats("resetAllStats pre");
166 m_statsIntervalActive
= false;
167 m_stats
.mm_udebt
= 0;
168 m_stats
.mm_uallocated
= 0;
169 m_stats
.mm_freed
= 0;
170 m_stats
.extUsage
= 0;
171 m_stats
.malloc_cap
= 0;
172 m_stats
.mmap_cap
= 0;
173 m_stats
.mmap_volume
= 0;
174 m_stats
.peakUsage
= 0;
176 m_stats
.totalAlloc
= 0;
177 m_stats
.peakIntervalUsage
= 0;
178 m_stats
.peakIntervalCap
= 0;
179 m_enableStatsSync
= false;
181 // Reset this memory managers portion of the bytes used across all memory
183 s_req_heap_usage
.fetch_sub(m_lastUsage
, std::memory_order_relaxed
);
186 if (Trace::enabled
) tl_heap_id
= ++s_heap_id
;
187 if (s_statsEnabled
) {
188 m_resetDeallocated
= *m_deallocated
;
189 m_resetAllocated
= *m_allocated
;
191 traceStats("resetAllStats post");
194 // Reset external allocation counters, but preserve MemoryManager counters.
195 // The effect of this call is simply to ignore anything we've done *outside*
196 // the MemoryManager allocator after we initialized, to avoid attributing
197 // shared structure initialization that happens during hphp_thread_init()
198 // to this session. Intended to be used once per request, early in the request
199 // lifetime before PHP execution begins.
200 void MemoryManager::resetExternalStats() {
201 traceStats("resetExternalStats pre");
202 // extUsage and totalAlloc are only set by refreshStatsImpl, which we don't
203 // enable until after this has been called.
204 assertx(m_enableStatsSync
||
205 (m_stats
.extUsage
== 0 && m_stats
.totalAlloc
== 0));
206 m_enableStatsSync
= s_statsEnabled
; // false if !use_jemalloc
207 if (s_statsEnabled
) {
208 m_resetDeallocated
= *m_deallocated
;
209 m_resetAllocated
= *m_allocated
- m_stats
.malloc_cap
;
210 // By subtracting malloc_cap here, the next call to refreshStatsImpl()
211 // will correctly include m_stats.malloc_cap in extUsage and totalAlloc.
213 traceStats("resetExternalStats post");
216 void MemoryManager::refreshStatsHelperExceeded() {
217 setSurpriseFlag(MemExceededFlag
);
219 if (RuntimeOption::LogNativeStackOnOOM
) {
220 log_native_stack("Exceeded memory limit");
224 void MemoryManager::setMemThresholdCallback(size_t threshold
) {
225 m_memThresholdCallbackPeakUsage
= threshold
;
229 * Refresh stats to reflect directly malloc()ed memory, and determine
230 * whether the request memory limit has been exceeded.
232 * The stats parameter allows the updates to be applied to either
233 * m_stats as in refreshStats() or to a separate MemoryUsageStats
234 * struct as in getStatsCopy().
236 void MemoryManager::refreshStatsImpl(MemoryUsageStats
& stats
) {
237 // Incrementally incorporate the difference between the previous and current
238 // deltas into the memory usage statistic. For reference, the total
239 // malloced memory usage could be calculated as such, if delta0 were
240 // recorded in resetAllStats():
242 // int64 musage = delta - delta0;
244 // Note however, the slab allocator adds to m_stats.malloc_cap
245 // when it calls malloc(), so that this function can avoid
246 // double-counting the malloced memory. Thus musage in the example
247 // code may well substantially exceed m_stats.usage.
248 if (m_enableStatsSync
) {
249 // We can't currently handle wrapping so make sure this isn't happening.
250 assertx(*m_allocated
<= uint64_t(std::numeric_limits
<int64_t>::max()));
251 assertx(*m_deallocated
<= uint64_t(std::numeric_limits
<int64_t>::max()));
252 const int64_t curAllocated
= *m_allocated
;
253 const int64_t curDeallocated
= *m_deallocated
;
255 // Since these deltas potentially include memory allocated from another
256 // thread but deallocated on this one, it is possible for these numbers to
258 auto curUsage
= curAllocated
- curDeallocated
;
259 auto resetUsage
= m_resetAllocated
- m_resetDeallocated
;
261 FTRACE(1, "heap-id {} Before stats sync: ", tl_heap_id
);
262 FTRACE(1, "reset alloc-dealloc {} cur alloc-dealloc: {} alloc-change: {} ",
263 resetUsage
, curUsage
, curAllocated
- m_resetAllocated
);
264 FTRACE(1, "dealloc-change: {} ", curDeallocated
- m_resetDeallocated
);
265 FTRACE(1, "mm usage {} extUsage {} totalAlloc {} capacity {}\n",
266 stats
.mmUsage(), stats
.extUsage
, stats
.totalAlloc
, stats
.capacity());
268 // External usage (allocated-deallocated) since the last resetStats().
269 stats
.extUsage
= curUsage
- resetUsage
;
271 // Calculate the allocation volume since the last reset.
272 // We need to do the calculation instead of just setting it to curAllocated
273 // because of the MaskAlloc capability, which updates m_resetAllocated.
275 // stats.mmap_volume is only used for mmap'd heap space; any malloc'd
276 // space is included in curAllocated.
277 stats
.totalAlloc
= curAllocated
- m_resetAllocated
+ stats
.mmap_volume
;
278 FTRACE(1, "heap-id {} after sync extUsage {} totalAlloc: {}\n",
279 tl_heap_id
, stats
.extUsage
, stats
.totalAlloc
);
281 assertx(m_usageLimit
> 0);
282 auto usage
= stats
.usage();
283 stats
.peakUsage
= std::max(stats
.peakUsage
, usage
);
284 if (m_statsIntervalActive
) {
285 stats
.peakIntervalUsage
= std::max(stats
.peakIntervalUsage
, usage
);
286 stats
.peakIntervalCap
= std::max(stats
.peakIntervalCap
, stats
.capacity());
291 * Refresh our internally stored m_stats, then check for OOM and the
292 * memThresholdCallback
294 void MemoryManager::refreshStats() {
295 refreshStatsImpl(m_stats
);
296 auto usage
= m_stats
.usage();
297 s_req_heap_usage
.fetch_add(usage
- m_lastUsage
, std::memory_order_relaxed
);
299 if (usage
> m_usageLimit
&& m_couldOOM
) {
300 refreshStatsHelperExceeded();
301 } else if (usage
> m_memThresholdCallbackPeakUsage
) {
302 m_memThresholdCallbackPeakUsage
= SIZE_MAX
;
303 setSurpriseFlag(MemThresholdFlag
);
307 void MemoryManager::recordStats(StructuredLogEntry
& entry
) {
308 auto const stats
= getStatsCopy();
309 entry
.ints
["mem-peak-usage"] = stats
.peakUsage
;
310 entry
.ints
["mem-peak-capacity"] = stats
.peakCap
;
311 entry
.ints
["mem-total-alloc"] = stats
.totalAlloc
;
315 * Calculate how many bytes of allocation should happen before the next
316 * time the fast path is interrupted.
318 void MemoryManager::updateMMDebt() {
319 const int64_t delta_sample
= m_nextSample
- m_stats
.mmAllocated();
320 const int64_t delta_gc
= m_nextGC
- m_stats
.mmUsage();
321 auto const delta
= static_cast<uint64_t>(std::min(delta_sample
, delta_gc
));
322 auto const new_debt
= delta
> std::numeric_limits
<int64_t>::max() ? 0 : delta
;
323 m_stats
.mm_uallocated
+= new_debt
- m_stats
.mm_udebt
;
324 m_stats
.mm_udebt
= new_debt
;
327 void MemoryManager::sweep() {
328 assertx(!sweeping());
330 DEBUG_ONLY
size_t num_sweepables
= 0, num_natives
= 0;
332 // iterate until both sweep lists are empty. Entries can be added or
333 // removed from either list during sweeping.
335 while (!m_sweepables
.empty()) {
337 auto obj
= m_sweepables
.next();
341 while (!m_natives
.empty()) {
343 assertx(m_natives
.back()->sweep_index
== m_natives
.size() - 1);
344 auto node
= m_natives
.back();
345 m_natives
.pop_back();
346 auto obj
= Native::obj(node
);
347 auto ndi
= obj
->getVMClass()->getNativeDataInfo();
349 // trash the native data but leave the header and object parsable
350 assertx(memset(node
+1, kSmallFreeFill
, node
->obj_offset
- sizeof(*node
)));
352 } while (!m_sweepables
.empty());
354 DEBUG_ONLY
auto napcs
= m_apc_arrays
.size();
355 FTRACE(1, "heap-id {} sweep: sweepable {} native {} apc array {}\n",
361 // decref apc arrays referenced by this request. This must happen here
362 // (instead of in resetAllocator), because the sweep routine may use
364 while (!m_apc_arrays
.empty()) {
365 auto a
= m_apc_arrays
.back();
366 m_apc_arrays
.pop_back();
368 if (debug
) a
->m_sweep_index
= kInvalidSweepIndex
;
371 if (debug
) checkHeap("after MM::sweep");
374 void MemoryManager::resetAllocator() {
375 assertx(m_natives
.empty() && m_sweepables
.empty() && tl_sweeping
);
376 // decref apc strings referenced by this request
377 DEBUG_ONLY
auto nstrings
= StringData::sweepAll();
378 FTRACE(1, "heap-id {} resetAllocator: strings {}\n", tl_heap_id
, nstrings
);
383 // zero out freelists
384 for (auto& list
: m_freelists
) list
.head
= nullptr;
385 m_front
= m_limit
= 0;
389 setGCEnabled(RuntimeOption::EvalEnableGC
);
391 if (debug
) resetEagerGC();
394 void MemoryManager::flush() {
395 always_assert(empty());
397 m_apc_arrays
= std::vector
<APCLocalArray
*>();
398 m_natives
= std::vector
<NativeNode
*>();
399 m_root_handles
= std::vector
<req::root_handle
*>{};
403 * req::malloc & friends implementation notes
405 * There are three kinds of allocations:
407 * a) Big allocations. (size >= kMaxSmallSize)
409 * In this case we behave as a wrapper around the normal libc
410 * malloc/free. We insert a MallocNode header at the front of the
411 * allocation in order to find these at sweep time (end of
412 * request) so we can give them back to libc.
414 * b) Size-tracked small allocations.
416 * This is used for the generic case, for callers who can't tell
417 * us the size of the allocation at free time.
419 * In this situation, we put a MallocNode header at the front of
420 * the block that tells us the size for when we need to free it
421 * later. We differentiate this from a MallocNode using the size
422 * field in either structure (they overlap at the same address).
424 * c) Size-untracked small allocation
426 * Many callers have an easy time telling you how big the object
427 * was when they need to free it. In this case we can avoid the
428 * MallocNode, which saves us some memory and also let's us give
429 * out 16-byte aligned pointers easily.
431 * We know when we have one of these because it has to be freed
432 * through a different entry point. (E.g. tl_heap->freeSmallSize() or
433 * tl_heap->freeBigSize().)
435 * When small blocks are freed (case b and c), they're placed in the
436 * appropriate size-segregated freelist. Large blocks are immediately
437 * passed back to libc via free.
439 * There are currently two kinds of freelist entries: entries where
440 * there is already a valid MallocNode on the list (case b), and
441 * entries where there isn't (case c). The reason for this is that
442 * that way, when allocating for case b, you don't need to store the
443 * MallocNode size again. Much of the heap is going through case b at
444 * the time of this writing, so it is a measurable regression to try
445 * to just combine the free lists, but presumably we can move more to
446 * case c and combine the lists eventually.
449 const std::array
<char*,NumHeaderKinds
> header_names
= {{
450 "PackedArray", "MixedArray", "EmptyArray", "ApcArray", "GlobalsArray",
451 "RecordArray", "DictArray", "VecArray", "KeysetArray",
452 "String", "Resource", "ClsMeth", "Record",
453 "Object", "NativeObject", "WaitHandle", "AsyncFuncWH", "AwaitAllWH",
454 "Closure", "Vector", "Map", "Set", "Pair", "ImmVector", "ImmMap", "ImmSet",
455 "AsyncFuncFrame", "NativeData", "ClosureHdr", "MemoData", "Cpp",
456 "SmallMalloc", "BigMalloc",
457 "Free", "Hole", "Slab"
460 // initialize a Hole header in the unused memory between m_front and m_limit
461 void MemoryManager::initHole(void* ptr
, uint32_t size
) {
462 FreeNode::InitFrom(ptr
, size
, HeaderKind::Hole
);
465 void MemoryManager::initFree() {
466 if ((char*)m_front
< (char*)m_limit
) {
467 initHole(m_front
, (char*)m_limit
- (char*)m_front
);
468 Slab::fromPtr(m_front
)->setStart(m_front
);
473 void MemoryManager::reinitFree() {
474 for (size_t i
= 0, e
= m_freelists
.size(); i
< e
; i
++) {
475 auto size
= sizeIndex2Size(i
);
476 auto n
= m_freelists
[i
].head
;
477 for (; n
&& n
->kind() != HeaderKind::Free
; n
= n
->next
) {
478 n
->initHeader_32(HeaderKind::Free
, size
);
481 // ensure the freelist tail is already initialized.
482 for (; n
; n
= n
->next
) {
483 assertx(n
->kind() == HeaderKind::Free
&& n
->size() == size
);
489 MemoryManager::FreelistArray
MemoryManager::beginQuarantine() {
491 for (size_t i
= 0, n
= list
.size(); i
< n
; ++i
) {
492 list
[i
].head
= m_freelists
[i
].head
;
493 m_freelists
[i
].head
= nullptr;
498 // turn free blocks into holes, restore original freelists
499 void MemoryManager::endQuarantine(FreelistArray
&& list
) {
500 for (size_t i
= 0, n
= list
.size(); i
< n
; i
++) {
501 auto size
= sizeIndex2Size(i
);
502 while (auto n
= m_freelists
[i
].likelyPop()) {
503 memset(n
, 0x8a, size
);
506 m_freelists
[i
].head
= list
[i
].head
;
507 list
[i
].head
= nullptr;
511 // test iterating objects in slabs
512 void MemoryManager::checkHeap(const char* phase
) {
514 std::vector
<HeapObject
*> hdrs
;
515 PtrMap
<HeapObject
*> free_blocks
, apc_arrays
, apc_strings
;
516 size_t counts
[NumHeaderKinds
];
517 for (unsigned i
=0; i
< NumHeaderKinds
; i
++) counts
[i
] = 0;
518 forEachHeapObject([&](HeapObject
* h
, size_t alloc_size
) {
521 auto kind
= h
->kind();
524 case HeaderKind::Free
:
525 free_blocks
.insert(h
, alloc_size
);
527 case HeaderKind::Apc
:
528 if (static_cast<APCLocalArray
*>(h
)->m_sweep_index
!=
529 kInvalidSweepIndex
) {
530 apc_arrays
.insert(h
, alloc_size
);
533 case HeaderKind::String
:
534 if (static_cast<StringData
*>(h
)->isProxy()) {
535 apc_strings
.insert(h
, alloc_size
);
538 case HeaderKind::Packed
:
539 case HeaderKind::Mixed
:
540 case HeaderKind::Dict
:
541 case HeaderKind::Empty
:
542 case HeaderKind::VecArray
:
543 case HeaderKind::Keyset
:
544 case HeaderKind::Globals
:
545 case HeaderKind::Object
:
546 case HeaderKind::NativeObject
:
547 case HeaderKind::WaitHandle
:
548 case HeaderKind::AsyncFuncWH
:
549 case HeaderKind::AwaitAllWH
:
550 case HeaderKind::Closure
:
551 case HeaderKind::Vector
:
552 case HeaderKind::Map
:
553 case HeaderKind::Set
:
554 case HeaderKind::Pair
:
555 case HeaderKind::ImmVector
:
556 case HeaderKind::ImmMap
:
557 case HeaderKind::ImmSet
:
558 case HeaderKind::Resource
:
559 case HeaderKind::ClsMeth
:
560 case HeaderKind::AsyncFuncFrame
:
561 case HeaderKind::NativeData
:
562 case HeaderKind::ClosureHdr
:
563 case HeaderKind::MemoData
:
564 case HeaderKind::Cpp
:
565 case HeaderKind::SmallMalloc
:
566 case HeaderKind::BigMalloc
:
567 case HeaderKind::Record
:
568 case HeaderKind::RecordArray
:
570 case HeaderKind::Hole
:
571 case HeaderKind::Slab
:
572 assertx(false && "forEachHeapObject skips these kinds");
577 // check the free lists
578 free_blocks
.prepare();
579 size_t num_free_blocks
= 0;
580 for (auto& list
: m_freelists
) {
581 for (auto n
= list
.head
; n
; n
= n
->next
) {
582 assertx(free_blocks
.isStart(n
));
586 assertx(num_free_blocks
== free_blocks
.size());
588 // check the apc array list
589 assertx(apc_arrays
.size() == m_apc_arrays
.size());
590 apc_arrays
.prepare();
591 for (UNUSED
auto a
: m_apc_arrays
) {
592 assertx(apc_arrays
.isStart(a
));
595 // check the apc string list
596 size_t num_apc_strings
= 0;
597 apc_strings
.prepare();
598 for (StringDataNode
*next
, *n
= m_strings
.next
; n
!= &m_strings
; n
= next
) {
600 UNUSED
auto const s
= StringData::node2str(n
);
601 assertx(s
->isProxy());
602 assertx(apc_strings
.isStart(s
));
605 assertx(num_apc_strings
== apc_strings
.size());
607 // heap check is done. If we are not exiting, check pointers using HeapGraph
608 if (Trace::moduleEnabled(Trace::heapreport
)) {
609 auto g
= makeHeapGraph(true /* include free blocks */);
610 if (!exiting()) checkPointers(g
, phase
);
611 if (Trace::moduleEnabled(Trace::heapreport
, 2)) {
612 printHeapReport(g
, phase
);
617 // Filling the start bits one word at a time requires writing the mask for
618 // the appropriate size class, and left-shifting the mask each time to insert
619 // any necessary zeros not included in the previous word, for size classes
620 // that don't pack perfectly into 64 bits. Suppose:
622 // d = size / kSmallSizeAlign = number of bits for the given size class
624 // In other words, when d%64 != 0, we need to shift the mask slightly after
625 // each store, until the shift amount wraps. For example, using 8-bit words
626 // for brevity, the sequence of stores would be:
628 // 11111111 11111111 11111111 11111111 size=16 d=1 shift 0,0,0,0
629 // 10101010 10101010 10101010 10101010 size=32 d=2 shift 0,0,0,0
630 // 10010010 01001001 00100100 10010010 size=48 d=3 shift 0,1,2,0
631 // 10001000 10001000 10001000 10001000 size=64 d=4 shift 0,0,0,0
632 // 10000100 00100001 00001000 01000010 00010000 size=80 d=5 shift 0,2,4,1,3,0
633 // 10000010 00001000 00100000 10000010 size=96 d=6 shift 0,4,2,0
634 // 10000001 00000010 00000100 00001000 00010000 00100000 01000000 10000001
635 // size=112 d=7 shift 0,6,5,4,3,2,1,0
636 // 10000000 10000000 size=128 d=8 shift 0,0
638 // build a bitmask-init table for size classes that fit at least one
639 // object per 64*kSmallSizeAlign bytes; this means they fit at least
640 // one start bit per 64 bits, supporting fast nContig initialization.
642 // masks_[i] = bitmask to store each time
643 std::array
<uint64_t,Slab::kNumMasks
> Slab::masks_
;
645 // shifts_[i] = how much to shift masks_[i] after each store
646 std::array
<uint8_t,Slab::kNumMasks
> Slab::shifts_
;
648 struct Slab::InitMasks
{
650 static_assert(kSizeIndex2Size
[kNumMasks
- 1] <= 64 * kSmallSizeAlign
, "");
651 for (size_t i
= 0; i
< kNumMasks
; i
++) {
652 auto const d
= kSizeIndex2Size
[i
] / kSmallSizeAlign
;
653 for (size_t j
= 0; j
< 64; j
+= d
) {
654 masks_
[i
] |= 1ull << j
;
656 shifts_
[i
] = d
- 64 % d
; // # of high-order zeros not in mask
663 Slab::InitMasks s_init_masks
;
665 using FreelistArray
= MemoryManager::FreelistArray
;
667 alignas(64) constexpr size_t kContigTab
[] = {
668 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
669 ncontig * kSizeIndex2Size[index],
674 alignas(64) const uint8_t kContigIndexTab
[] = {
675 #define SIZE_CLASS(index, lg_grp, lg_delta, ndelta, lg_delta_lookup, ncontig) \
676 (uint8_t)std::max(size_t(index+1),\
677 MemoryManager::size2Index(kContigTab[index])),
683 * Store slab tail bytes (if any) in freelists.
686 void storeTail(FreelistArray
& freelists
, void* tail
, size_t tailBytes
,
689 for (auto remBytes
= tailBytes
; remBytes
> 0;) {
690 auto fragBytes
= remBytes
;
691 assertx(fragBytes
>= kSmallSizeAlign
);
692 assertx((fragBytes
& kSmallSizeAlignMask
) == 0);
693 auto fragInd
= MemoryManager::size2Index(fragBytes
+ 1) - 1;
694 auto fragUsable
= MemoryManager::sizeIndex2Size(fragInd
);
695 auto frag
= FreeNode::InitFrom((char*)rem
+ remBytes
- fragUsable
,
696 fragUsable
, HeaderKind::Hole
);
697 FTRACE(4, "storeTail({}, {}): rem={}, remBytes={}, "
698 "frag={}, fragBytes={}, fragUsable={}, fragInd={}\n", tail
,
699 (void*)uintptr_t(tailBytes
), rem
, (void*)uintptr_t(remBytes
),
700 frag
, (void*)uintptr_t(fragBytes
), (void*)uintptr_t(fragUsable
),
702 freelists
[fragInd
].push(frag
);
703 slab
->setStart(frag
);
704 remBytes
-= fragUsable
;
709 * Create split_bytes worth of contiguous regions, each of size splitUsable,
710 * and store them in the appropriate freelist. In addition, initialize the
711 * start-bits for the new objects.
714 void splitTail(FreelistArray
& freelists
, void* tail
, size_t tailBytes
,
715 size_t split_bytes
, size_t splitUsable
, size_t index
,
717 assertx(tailBytes
>= kSmallSizeAlign
);
718 assertx((tailBytes
& kSmallSizeAlignMask
) == 0);
719 assertx((splitUsable
& kSmallSizeAlignMask
) == 0);
720 assertx(split_bytes
<= tailBytes
);
722 // initialize the free objects, and push them onto the freelist.
723 auto head
= freelists
[index
].head
;
724 auto rem
= (char*)tail
+ split_bytes
;
725 for (auto next
= rem
- splitUsable
; next
>= tail
; next
-= splitUsable
) {
726 auto split
= FreeNode::InitFrom(next
, splitUsable
, HeaderKind::Hole
);
727 FTRACE(4, "MemoryManager::splitTail(tail={}, tailBytes={}, tailPast={}): "
728 "split={}, splitUsable={}\n", tail
,
729 (void*)uintptr_t(tailBytes
), (void*)(uintptr_t(tail
) + tailBytes
),
731 head
= FreeNode::UninitFrom(split
, head
);
733 freelists
[index
].head
= head
;
735 // initialize the start-bits for each object.
736 slab
->setStarts(tail
, rem
, splitUsable
, index
);
738 auto remBytes
= tailBytes
- split_bytes
;
739 assertx(uintptr_t(rem
) + remBytes
== uintptr_t(tail
) + tailBytes
);
740 storeTail(freelists
, rem
, remBytes
, slab
);
745 * Get a new slab, then allocate nbytes from it and install it in our
746 * slab list. Return the newly allocated nbytes-sized block.
748 NEVER_INLINE
void* MemoryManager::newSlab(size_t nbytes
) {
750 if (m_front
< m_limit
) {
751 storeTail(m_freelists
, m_front
, (char*)m_limit
- (char*)m_front
,
752 Slab::fromPtr(m_front
));
754 auto mem
= m_heap
.allocSlab(m_stats
);
755 always_assert(reinterpret_cast<uintptr_t>(mem
) % kSlabAlign
== 0);
756 auto slab
= static_cast<Slab
*>(mem
);
757 auto slab_start
= slab
->init();
758 m_front
= slab_start
+ nbytes
; // allocate requested object
759 // we can't use any space after slab->end() even if the allocator allows
760 // (indiciated by mem.size), because of the fixed-sized crossing map.
761 m_limit
= slab
->end();
762 assertx(m_front
<= m_limit
);
763 FTRACE(3, "newSlab: adding slab at {} to limit {}\n", slab_start
, m_limit
);
764 slab
->setStart(slab_start
);
769 * Allocate `bytes' from the current slab, aligned to kSmallSizeAlign.
771 inline void* MemoryManager::slabAlloc(size_t nbytes
, size_t index
) {
772 FTRACE(3, "slabAlloc({}, {}): m_front={}, m_limit={}\n", nbytes
, index
,
774 assertx(nbytes
== sizeIndex2Size(index
));
775 assertx((uintptr_t(m_front
) & kSmallSizeAlignMask
) == 0);
778 auto next
= (void*)(uintptr_t(ptr
) + nbytes
);
780 if (uintptr_t(next
) <= uintptr_t(m_limit
)) {
782 slab
= Slab::fromPtr(ptr
);
785 if (UNLIKELY(index
>= kNumSmallSizes
) || UNLIKELY(m_bypassSlabAlloc
)) {
786 // Stats correction; mallocBigSize() updates m_stats. Add to mm_udebt
787 // rather than mm_freed because we're adjusting for double-counting, not
788 // actually freeing anything.
789 m_stats
.mm_udebt
+= nbytes
;
790 return mallocBigSize(nbytes
);
792 ptr
= newSlab(nbytes
); // sets start bit at ptr
793 slab
= Slab::fromPtr(ptr
);
795 // Preallocate more of the same in order to amortize entry into this method.
796 auto split_bytes
= kContigTab
[index
] - nbytes
;
797 auto avail
= uintptr_t(m_limit
) - uintptr_t(m_front
);
798 if (UNLIKELY(split_bytes
> avail
)) {
799 split_bytes
= avail
- avail
% nbytes
; // Expensive division.
801 if (split_bytes
> 0) {
803 m_front
= (void*)(uintptr_t(tail
) + split_bytes
);
804 splitTail(m_freelists
, tail
, split_bytes
, split_bytes
, nbytes
, index
, slab
);
806 FTRACE(4, "slabAlloc({}, {}) --> ptr={}, m_front={}, m_limit={}\n", nbytes
,
807 index
, ptr
, m_front
, m_limit
);
812 void* MemoryManager::mallocSmallSizeSlow(size_t nbytes
, size_t index
) {
813 assertx(nbytes
== sizeIndex2Size(index
));
815 if (UNLIKELY(m_stats
.mm_udebt
> std::numeric_limits
<int64_t>::max())) {
816 checkSampling(nbytes
);
817 // Must be here to check gc; might still have free objects.
820 auto clamped
= std::min(index
, kNumSmallSizes
);
821 if (auto p
= m_freelists
[clamped
].unlikelyPop()) {
822 FTRACE(3, "mallocSmallSizeSlow: check gc {} -> {}\n", nbytes
, p
);
827 size_t contigInd
= kContigIndexTab
[index
];
828 for (auto i
= contigInd
; i
< kNumSmallSizes
; ++i
) {
829 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): contigMin={}, "
830 "contigInd={}, try i={}\n", nbytes
, index
, kContigTab
[index
],
832 if (auto p
= m_freelists
[i
].unlikelyPop()) {
833 assertx(i
> index
); // because freelist[index] was empty
834 assertx(Slab::fromPtr(p
)->isStart(p
));
835 FTRACE(4, "MemoryManager::mallocSmallSizeSlow({}, {}): "
836 "contigMin={}, contigInd={}, use i={}, size={}, p={}\n",
837 nbytes
, index
, kContigTab
[index
], contigInd
, i
,
838 sizeIndex2Size(i
), p
);
839 // Split tail into preallocations and store them back into freelists.
840 splitTail(m_freelists
, (char*)p
+ nbytes
, sizeIndex2Size(i
) - nbytes
,
841 kContigTab
[index
] - nbytes
, nbytes
, index
, Slab::fromPtr(p
));
846 // No available free list items; carve new space from the current slab.
847 return slabAlloc(nbytes
, index
);
850 inline void MemoryManager::updateBigStats() {
851 // If we are using jemalloc, it is keeping track of allocations outside of
852 // the slabs and the usage so we should force this after an allocation that
853 // was too large for one of the existing slabs. When we're not using jemalloc
854 // this check won't do anything so avoid the extra overhead.
855 if (debug
) requestEagerGC();
860 void MemoryManager::freeSmallIndexSlow(void* ptr
, size_t index
, size_t bytes
) {
861 if (UNLIKELY(index
>= kNumSmallSizes
) || UNLIKELY(m_bypassSlabAlloc
)) {
862 return freeBigSize(ptr
);
864 // copy of FreeList::push() fast path when head == nullptr
865 m_freelists
[index
].head
= FreeNode::UninitFrom(ptr
, nullptr);
866 m_stats
.mm_freed
+= bytes
;
870 void* MemoryManager::mallocBigSize(size_t bytes
, bool zero
) {
871 if (debug
) tl_heap
->requestEagerGC();
872 auto ptr
= m_heap
.allocBig(bytes
, zero
, m_stats
);
874 checkSampling(bytes
);
876 FTRACE(3, "mallocBigSize: {} ({} requested)\n", ptr
, bytes
);
880 MallocNode
* MemoryManager::reallocBig(MallocNode
* n
, size_t nbytes
) {
881 assertx(n
->kind() == HeaderKind::BigMalloc
);
882 auto n2
= static_cast<MallocNode
*>(
883 m_heap
.resizeBig(n
, nbytes
, m_stats
)
891 void MemoryManager::freeBigSize(void* vp
) {
892 FTRACE(3, "freeBigSize: {}\n", vp
);
893 m_heap
.freeBig(vp
, m_stats
);
896 // req::malloc api entry points, with support for malloc/free corner cases.
899 static void* allocate(size_t nbytes
, bool zero
, type_scan::Index ty
) {
900 nbytes
= std::max(nbytes
, size_t(1));
901 auto const npadded
= nbytes
+ sizeof(MallocNode
);
902 if (LIKELY(npadded
<= kMaxSmallSize
)) {
903 auto const ptr
= static_cast<MallocNode
*>(
904 tl_heap
->mallocSmallSize(npadded
)
906 ptr
->initHeader_32_16(HeaderKind::SmallMalloc
, 0, ty
);
907 ptr
->nbytes
= npadded
;
908 return zero
? memset(ptr
+ 1, 0, nbytes
) : ptr
+ 1;
910 auto const ptr
= static_cast<MallocNode
*>(
911 tl_heap
->mallocBigSize(npadded
, zero
)
913 ptr
->initHeader_32_16(HeaderKind::BigMalloc
, 0, ty
);
914 ptr
->nbytes
= npadded
;
918 void* malloc(size_t nbytes
, type_scan::Index tyindex
) {
919 assertx(type_scan::isKnownType(tyindex
));
920 return allocate(nbytes
, false, tyindex
);
923 void* calloc(size_t count
, size_t nbytes
, type_scan::Index tyindex
) {
924 assertx(type_scan::isKnownType(tyindex
));
925 return allocate(count
* nbytes
, true, tyindex
);
928 void* malloc_untyped(size_t nbytes
) {
929 auto n
= static_cast<MallocNode
*>(
930 tl_heap
->mallocBigSize(std::max(nbytes
+ sizeof(MallocNode
), 1ul), false)
932 n
->initHeader_32_16(HeaderKind::BigMalloc
, 0, type_scan::kIndexUnknown
);
933 n
->nbytes
= nbytes
+ sizeof(MallocNode
);
937 void* calloc_untyped(size_t count
, size_t bytes
) {
938 auto nbytes
= count
* bytes
+ sizeof(MallocNode
);
939 auto n
= static_cast<MallocNode
*>(
940 tl_heap
->mallocBigSize(nbytes
, true)
942 n
->initHeader_32_16(HeaderKind::BigMalloc
, 0, type_scan::kIndexUnknown
);
947 void* realloc(void* ptr
, size_t nbytes
, type_scan::Index tyindex
) {
948 assertx(type_scan::isKnownType(tyindex
));
950 return allocate(nbytes
, false, tyindex
);
956 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
957 ptr
, nbytes
, tyindex
);
958 auto const n
= static_cast<MallocNode
*>(ptr
) - 1;
959 assertx(n
->typeIndex() == tyindex
);
960 auto new_size
= nbytes
+ sizeof(MallocNode
);
961 if (LIKELY(n
->kind() == HeaderKind::SmallMalloc
) ||
962 UNLIKELY(new_size
<= kMaxSmallSize
)) {
963 // either the old or new block will be small; force a copy.
964 auto newmem
= allocate(nbytes
, false, tyindex
);
965 auto copy_size
= std::min(n
->nbytes
- sizeof(MallocNode
), nbytes
);
966 newmem
= memcpy(newmem
, ptr
, copy_size
);
970 // it's a big allocation.
971 auto n2
= tl_heap
->reallocBig(n
, new_size
);
975 void* realloc_untyped(void* ptr
, size_t nbytes
) {
976 // first handle corner cases that degenerate to malloc() or free()
978 return req::malloc_untyped(nbytes
);
984 FTRACE(3, "MemoryManager::realloc: {} to {} [type_index: {}]\n",
985 ptr
, nbytes
, type_scan::kIndexUnknown
);
986 auto const n
= static_cast<MallocNode
*>(ptr
) - 1;
987 assertx(n
->kind() == HeaderKind::BigMalloc
);
988 assertx(n
->typeIndex() == type_scan::kIndexUnknown
);
989 auto n2
= tl_heap
->reallocBig(n
, nbytes
+ sizeof(MallocNode
));
993 char* strndup(const char* str
, size_t len
) {
994 size_t n
= std::min(len
, strlen(str
));
995 char* ret
= reinterpret_cast<char*>(
996 req::malloc(n
+ 1, type_scan::kIndexUnknownNoPtrs
)
1003 void free(void* ptr
) {
1005 auto const n
= static_cast<MallocNode
*>(ptr
) - 1;
1006 if (LIKELY(n
->kind() == HeaderKind::SmallMalloc
)) {
1007 return tl_heap
->freeSmallSize(n
, n
->nbytes
);
1009 if (n
->kind() == HeaderKind::Cpp
) {
1010 return tl_heap
->objFree(n
, n
->nbytes
);
1012 assertx(n
->kind() == HeaderKind::BigMalloc
);
1013 tl_heap
->freeBigSize(n
);
1018 //////////////////////////////////////////////////////////////////////
1020 void MemoryManager::addNativeObject(NativeNode
* node
) {
1021 if (debug
) for (DEBUG_ONLY
auto n
: m_natives
) assertx(n
!= node
);
1022 node
->sweep_index
= m_natives
.size();
1023 m_natives
.push_back(node
);
1026 void MemoryManager::removeNativeObject(NativeNode
* node
) {
1027 assertx(node
->sweep_index
< m_natives
.size());
1028 assertx(m_natives
[node
->sweep_index
] == node
);
1029 auto index
= node
->sweep_index
;
1030 auto last
= m_natives
.back();
1031 m_natives
[index
] = last
;
1032 m_natives
.pop_back();
1033 last
->sweep_index
= index
;
1036 void MemoryManager::addApcArray(APCLocalArray
* a
) {
1037 a
->m_sweep_index
= m_apc_arrays
.size();
1038 m_apc_arrays
.push_back(a
);
1041 void MemoryManager::removeApcArray(APCLocalArray
* a
) {
1042 assertx(a
->m_sweep_index
< m_apc_arrays
.size());
1043 assertx(m_apc_arrays
[a
->m_sweep_index
] == a
);
1044 auto index
= a
->m_sweep_index
;
1045 auto last
= m_apc_arrays
.back();
1046 m_apc_arrays
[index
] = last
;
1047 m_apc_arrays
.pop_back();
1048 last
->m_sweep_index
= index
;
1051 void MemoryManager::addSweepable(Sweepable
* obj
) {
1052 obj
->enlist(&m_sweepables
);
1055 // defined here because memory-manager.h includes sweepable.h
1056 Sweepable::Sweepable() {
1057 tl_heap
->addSweepable(this);
1060 //////////////////////////////////////////////////////////////////////
1062 void MemoryManager::resetCouldOOM(bool state
) {
1063 clearSurpriseFlag(MemExceededFlag
);
1068 ///////////////////////////////////////////////////////////////////////////////
1069 // Request profiling.
1071 bool MemoryManager::triggerProfiling(const std::string
& filename
) {
1072 auto trigger
= new ReqProfContext();
1073 trigger
->flag
= true;
1074 trigger
->filename
= filename
;
1076 ReqProfContext
* expected
= nullptr;
1078 if (!s_trigger
.compare_exchange_strong(expected
, trigger
)) {
1085 void MemoryManager::requestInit() {
1086 tl_heap
->m_req_start_micros
= HPHP::Timer::GetThreadCPUTimeNanos() / 1000;
1088 if (RuntimeOption::EvalHeapAllocSampleRequests
> 0 &&
1089 RuntimeOption::EvalHeapAllocSampleBytes
> 0) {
1090 if (!folly::Random::rand32(RuntimeOption::EvalHeapAllocSampleRequests
)) {
1091 tl_heap
->m_nextSample
=
1092 folly::Random::rand32(RuntimeOption::EvalHeapAllocSampleBytes
);
1096 // If the trigger has already been claimed, do nothing.
1097 auto trigger
= s_trigger
.exchange(nullptr);
1098 if (trigger
== nullptr) return;
1100 always_assert(tl_heap
->empty());
1102 // Initialize the request-local context from the trigger.
1103 auto& profctx
= tl_heap
->m_profctx
;
1104 assertx(!profctx
.flag
);
1106 tl_heap
->m_bypassSlabAlloc
= true;
1111 // Reset jemalloc stats.
1112 if (mallctlCall
<true>("prof.reset") != 0) {
1116 // Enable jemalloc thread-local heap dumps.
1117 if (mallctlReadWrite
<bool, true>("prof.active", &profctx
.prof_active
, true)
1119 profctx
= ReqProfContext
{};
1122 if (mallctlReadWrite
<bool, true>("thread.prof.active",
1123 &profctx
.thread_prof_active
,
1125 mallctlWrite("prof.active", profctx
.prof_active
);
1126 profctx
= ReqProfContext
{};
1132 void MemoryManager::requestShutdown() {
1133 if (tl_heap
->m_nextSample
!= kNoNextSample
) {
1134 reset_alloc_sampling();
1135 tl_heap
->m_nextSample
= kNoNextSample
;
1138 auto& profctx
= tl_heap
->m_profctx
;
1139 if (!profctx
.flag
) return;
1142 jemalloc_pprof_dump(profctx
.filename
, true);
1144 mallctlWrite("thread.prof.active", profctx
.thread_prof_active
);
1145 mallctlWrite("prof.active", profctx
.prof_active
);
1148 tl_heap
->m_bypassSlabAlloc
= RuntimeOption::DisableSmallAllocator
;
1149 tl_heap
->m_memThresholdCallbackPeakUsage
= SIZE_MAX
;
1150 profctx
= ReqProfContext
{};
1153 /* static */ void MemoryManager::setupProfiling() {
1154 always_assert(tl_heap
->empty());
1155 tl_heap
->m_bypassSlabAlloc
= true;
1158 /* static */ void MemoryManager::teardownProfiling() {
1159 tl_heap
->m_bypassSlabAlloc
= RuntimeOption::DisableSmallAllocator
;
1162 bool MemoryManager::isGCEnabled() {
1163 return m_gc_enabled
;
1166 void MemoryManager::setGCEnabled(bool isGCEnabled
) {
1167 m_gc_enabled
= isGCEnabled
;
1171 ///////////////////////////////////////////////////////////////////////////////