2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/base/apc-gc-manager.h"
17 #include "hphp/runtime/base/req-containers.h"
18 #include "hphp/runtime/base/mixed-array-defs.h"
19 #include "hphp/runtime/base/memory-manager-defs.h"
20 #include "hphp/runtime/base/heap-scan.h"
21 #include "hphp/runtime/base/thread-info.h"
22 #include "hphp/runtime/base/heap-graph.h"
23 #include "hphp/runtime/base/weakref-data.h"
24 #include "hphp/runtime/ext/weakref/weakref-data-handle.h"
25 #include "hphp/runtime/vm/vm-regs.h"
26 #include "hphp/util/alloc.h"
27 #include "hphp/util/bloom-filter.h"
28 #include "hphp/util/cycles.h"
29 #include "hphp/util/process.h"
30 #include "hphp/util/ptr-map.h"
31 #include "hphp/util/struct-log.h"
32 #include "hphp/util/timer.h"
33 #include "hphp/util/trace.h"
34 #include "hphp/util/type-scan.h"
39 #include <folly/Range.h>
40 #include <folly/portability/Unistd.h>
50 void operator+=(size_t n
) {
56 bool hasNativeData(const HeapObject
* h
) {
57 assert(isObjectKind(h
->kind()));
58 return static_cast<const ObjectData
*>(h
)->getAttribute(
59 ObjectData::HasNativeData
63 constexpr auto MinMark
= GCBits(1);
64 constexpr auto MaxMark
= GCBits(3);
67 explicit Collector(HeapImpl
& heap
, APCGCManager
* apcgc
, GCBits mark_version
)
68 : heap_(heap
), mark_version_
{mark_version
}, apcgc_(apcgc
)
71 template<bool apcgc
> void traceRoots();
72 template<bool apcgc
> void trace();
75 // drain the scanner, enqueue pointers
78 // mark ambiguous pointers in the range [start,start+len)
80 void conservativeScan(const void* start
, size_t len
);
82 bool marked(const HeapObject
* h
) {
83 return h
->marks() == mark_version_
;
85 template<bool apcgc
> void checkedEnqueue(const void* p
);
86 void enqueueWeak(const WeakRefDataHandle
* p
);
87 template<bool apcgc
> void finish_typescan();
88 HdrBlock
find(const void*);
90 void enqueue(const HeapObject
* h
) {
91 assert(h
&& h
->kind() <= HeaderKind::BigMalloc
&&
92 h
->kind() != HeaderKind::AsyncFuncWH
&&
93 h
->kind() != HeaderKind::Closure
);
94 assert(!isObjectKind(h
->kind()) || !hasNativeData(h
));
96 max_worklist_
= std::max(max_worklist_
, work_
.size());
100 GCBits
const mark_version_
;
101 Counter marked_
, pinned_
, unknown_
; // bytes
102 Counter cscanned_roots_
, cscanned_
; // bytes
103 Counter xscanned_roots_
, xscanned_
; // bytes
104 size_t init_ns_
, initfree_ns_
, roots_ns_
, mark_ns_
, sweep_ns_
;
105 size_t max_worklist_
{0}; // max size of work_
106 size_t freed_bytes_
{0};
107 PtrMap
<const HeapObject
*> ptrs_
;
108 type_scan::Scanner type_scanner_
;
109 std::vector
<const HeapObject
*> work_
;
110 std::vector
<const WeakRefDataHandle
*> weakRefs_
;
111 APCGCManager
* const apcgc_
;
114 // TODO(T20460162): The contiguous heap has a bitmap of which chunks of memory
115 // are allocated/free. And it can efficiently find the start of an allocated
116 // object using bit operations. So there is an opportunity to speed this up by
117 // directly accessing the bitmap instead of using PtrMap.
118 HdrBlock
Collector::find(const void* ptr
) {
119 if (auto r
= ptrs_
.region(ptr
)) {
120 auto h
= const_cast<HeapObject
*>(r
->first
);
121 if (h
->kind() == HeaderKind::Slab
) {
122 return Slab::fromHeader(h
)->find(ptr
);
124 if (h
->kind() == HeaderKind::BigObj
) {
125 auto h2
= static_cast<MallocNode
*>(h
) + 1;
126 return ptr
>= h2
? HdrBlock
{h2
, r
->second
- sizeof(MallocNode
)} :
127 HdrBlock
{nullptr, 0};
129 assert(h
->kind() == HeaderKind::BigMalloc
);
130 return {h
, r
->second
};
135 DEBUG_ONLY
bool checkEnqueuedKind(const HeapObject
* h
) {
137 case HeaderKind::Apc
:
138 case HeaderKind::Globals
:
139 case HeaderKind::Proxy
:
140 case HeaderKind::Ref
:
141 case HeaderKind::Resource
:
142 case HeaderKind::Packed
:
143 case HeaderKind::Mixed
:
144 case HeaderKind::Dict
:
145 case HeaderKind::VecArray
:
146 case HeaderKind::Keyset
:
147 case HeaderKind::Empty
:
148 case HeaderKind::SmallMalloc
:
149 case HeaderKind::BigMalloc
:
151 case HeaderKind::Object
:
152 case HeaderKind::Vector
:
153 case HeaderKind::Map
:
154 case HeaderKind::Set
:
155 case HeaderKind::Pair
:
156 case HeaderKind::ImmVector
:
157 case HeaderKind::ImmMap
:
158 case HeaderKind::ImmSet
:
159 case HeaderKind::WaitHandle
:
160 case HeaderKind::AwaitAllWH
:
161 // Object kinds. None of these should have native-data, because if they
162 // do, the mapped header should be for the NativeData prefix.
163 assert(!hasNativeData(h
));
165 case HeaderKind::AsyncFuncFrame
:
166 case HeaderKind::NativeData
:
167 case HeaderKind::ClosureHdr
:
168 // these have inner objects, but we queued the outer one.
170 case HeaderKind::String
:
171 // nothing to queue since strings don't have pointers
173 case HeaderKind::Closure
:
174 case HeaderKind::AsyncFuncWH
:
175 // These header types should not be found during heap or slab iteration
176 // because they are appended to ClosureHdr or AsyncFuncFrame.
177 case HeaderKind::BigObj
:
178 case HeaderKind::Slab
:
179 case HeaderKind::Free
:
180 case HeaderKind::Hole
:
181 // These header types are not allocated objects; they are handled
182 // earlier and should never be queued on the gc worklist.
183 always_assert(false && "bad header kind");
189 template <bool apcgc
>
190 void Collector::checkedEnqueue(const void* p
) {
192 if (auto h
= r
.ptr
) {
193 // enqueue h the first time. If it's an object with no pointers (eg String),
194 // we'll skip it when we process the queue.
195 auto old
= h
->marks();
196 if (old
!= mark_version_
) {
197 h
->setmarks(mark_version_
);
201 assert(checkEnqueuedKind(h
));
204 // If p doesn't belong to any APC data, APCGCManager won't do anything
209 // Enqueue a weak pointer for possible clearing at the end of scanning.
210 void Collector::enqueueWeak(const WeakRefDataHandle
* weak
) {
211 weakRefs_
.emplace_back(weak
);
214 // mark ambigous pointers in the range [start,start+len). If the start or
215 // end is a partial word, don't scan that word.
216 template <bool apcgc
>
217 void FOLLY_DISABLE_ADDRESS_SANITIZER
218 Collector::conservativeScan(const void* start
, size_t len
) {
219 constexpr uintptr_t M
{7}; // word size - 1
220 auto s
= (char**)((uintptr_t(start
) + M
) & ~M
); // round up
221 auto e
= (char**)((uintptr_t(start
) + len
) & ~M
); // round down
222 cscanned_
+= uintptr_t(e
) - uintptr_t(s
);
224 checkedEnqueue
<apcgc
>(
225 // Mask off the upper 16-bits to handle things like
226 // DiscriminatedPtr which stores things up there.
227 (void*)(uintptr_t(*s
) & (-1ULL >> 16))
232 inline int64_t cpu_ns() {
233 return HPHP::Timer::GetThreadCPUTimeNanos();
237 * If we have non-conservative scanners, we must treat all unknown
238 * type-index allocations in the heap as roots. Why? The generated
239 * scanners will only report a pointer if it knows the pointer can point
240 * to an object on the request heap. It does this by tracking all types
241 * which are allocated via the allocation functions via the type-index
242 * mechanism. If an allocation has an unknown type-index, then by definition
243 * we don't know which type it contains, and therefore the auto generated
244 * scanners will never report a pointer to such a type.
246 * The only good way to solve this is to treat such allocations as roots
247 * and conservative scan them. If we're conservative scanning everything,
248 * we need to take no special action, as the above problem only applies to
249 * auto generated scanners.
252 // initially parse the heap to find valid objects and initialize metadata.
253 NEVER_INLINE
void Collector::init() {
254 auto const t0
= cpu_ns();
255 SCOPE_EXIT
{ init_ns_
= cpu_ns() - t0
; };
257 initfree_ns_
= cpu_ns() - t0
;
260 [&](HeapObject
* h
, size_t size
) { // onBig
261 ptrs_
.insert(h
, size
);
262 if (h
->kind() == HeaderKind::BigMalloc
) {
263 if (!type_scan::isKnownType(static_cast<MallocNode
*>(h
)->typeIndex())) {
265 h
->setmarks(mark_version_
);
270 [&](HeapObject
* h
, size_t size
) { // onSlab
271 ptrs_
.insert(h
, size
);
272 Slab::fromHeader(h
)->initCrossingMap();
278 template <bool apcgc
>
279 void Collector::finish_typescan() {
280 type_scanner_
.finish(
281 [this](const void* p
, std::size_t size
) {
282 // we could extract the addresses of ambiguous ptrs, if desired.
283 conservativeScan
<apcgc
>(p
, size
);
285 [this](const void** addr
) {
286 xscanned_
+= sizeof(*addr
);
287 checkedEnqueue
<apcgc
>(*addr
);
289 [this](const void* weak
) {
290 enqueueWeak(reinterpret_cast<const WeakRefDataHandle
*>(weak
));
295 template <bool apcgc
>
296 NEVER_INLINE
void Collector::traceRoots() {
297 auto const t0
= cpu_ns();
298 SCOPE_EXIT
{ roots_ns_
= cpu_ns() - t0
; };
299 iterateRoots([&](const void* p
, size_t size
, type_scan::Index tyindex
) {
300 type_scanner_
.scanByIndex(tyindex
, p
, size
);
301 finish_typescan
<apcgc
>();
303 cscanned_roots_
= cscanned_
;
304 xscanned_roots_
= xscanned_
;
307 template <bool apcgc
>
308 NEVER_INLINE
void Collector::trace() {
309 auto const t0
= cpu_ns();
310 SCOPE_EXIT
{ mark_ns_
= cpu_ns() - t0
; };
311 while (!work_
.empty()) {
312 auto h
= work_
.back();
314 scanHeapObject(h
, type_scanner_
);
315 finish_typescan
<apcgc
>();
319 // another pass through the heap, this time using the PtrMap we computed
320 // in init(). Free and maybe quarantine unmarked objects.
321 NEVER_INLINE
void Collector::sweep() {
323 auto const t0
= cpu_ns();
324 auto const usage0
= mm
.currentUsage();
325 MemoryManager::FreelistArray quarantine
;
326 if (RuntimeOption::EvalQuarantine
) quarantine
= mm
.beginQuarantine();
328 if (RuntimeOption::EvalQuarantine
) mm
.endQuarantine(std::move(quarantine
));
329 freed_bytes_
= usage0
- mm
.currentUsage();
330 sweep_ns_
= cpu_ns() - t0
;
331 assert(freed_bytes_
>= 0);
334 // Clear weak references as needed.
335 while (!weakRefs_
.empty()) {
336 auto wref
= weakRefs_
.back();
337 assert(wref
->acquire_count
== 0);
338 assert(wref
->wr_data
);
339 weakRefs_
.pop_back();
340 auto type
= wref
->wr_data
->pointee
.m_type
;
341 if (type
== KindOfObject
) {
342 auto r
= find(wref
->wr_data
->pointee
.m_data
.pobj
);
343 if (!marked(r
.ptr
)) {
344 WeakRefData::invalidateWeakRef(uintptr_t(r
.ptr
));
349 assert(type
== KindOfNull
|| type
== KindOfUninit
);
352 bool need_reinit_free
= false;
353 g_context
->sweepDynPropTable([&](const ObjectData
* obj
) {
354 if (need_reinit_free
) mm
.reinitFree();
356 // if we return true, call reinitFree() before calling find() again,
357 // to ensure the heap remains walkable.
358 return need_reinit_free
= !r
.ptr
|| !marked(r
.ptr
);
361 mm
.sweepApcArrays([&](APCLocalArray
* a
) {
365 mm
.sweepApcStrings([&](StringData
* s
) {
372 [&](HeapObject
* big
, size_t big_size
) { // onBig
373 if (big
->kind() == HeaderKind::BigObj
) {
374 HeapObject
* h2
= static_cast<MallocNode
*>(big
) + 1;
375 if (!marked(h2
) && h2
->kind() != HeaderKind::SmallMalloc
) {
380 [&](HeapObject
* big
, size_t /*big_size*/) { // onSlab
381 auto slab
= Slab::fromHeader(big
);
382 slab
->find_if((HeapObject
*)slab
->start(),
383 [&](HeapObject
* h
, size_t h_size
) {
384 if (!marked(h
) && !isFreeKind(h
->kind()) &&
385 h
->kind() != HeaderKind::SmallMalloc
) {
386 mm
.freeSmallSize(h
, h_size
);
393 // This should be removed after global GC API is provided
394 // Currently we do this to sweeping only when script mode
399 thread_local
bool t_eager_gc
{false};
400 thread_local BloomFilter
<256*1024> t_surprise_filter
;
402 // Structured Logging
404 thread_local
std::atomic
<size_t> g_req_num
;
405 __thread
size_t t_req_num
; // snapshot thread-local copy of g_req_num;
406 __thread
size_t t_gc_num
; // nth collection in this request.
407 __thread
bool t_enable_samples
;
408 __thread
size_t t_trigger
;
409 __thread
int64_t t_req_age
;
410 __thread MemoryUsageStats t_pre_stats
;
412 StructuredLogEntry
logCommon() {
413 StructuredLogEntry sample
;
414 sample
.setInt("req_num", t_req_num
);
416 sample
.setInt("memory_limit", MM().getMemoryLimit());
417 sample
.setInt("usage", t_pre_stats
.usage());
418 sample
.setInt("mm_usage", t_pre_stats
.mmUsage
);
419 sample
.setInt("aux_usage", t_pre_stats
.auxUsage());
420 sample
.setInt("mm_capacity", t_pre_stats
.capacity
);
421 sample
.setInt("peak_usage", t_pre_stats
.peakUsage
);
422 sample
.setInt("peak_capacity", t_pre_stats
.peakCap
);
423 sample
.setInt("total_alloc", t_pre_stats
.totalAlloc
);
427 void logCollection(const char* phase
, const Collector
& collector
) {
429 constexpr auto MB
= 1024*1024;
430 if (Trace::moduleEnabledRelease(Trace::gc
, 1)) {
431 auto cscanned_heap
= collector
.cscanned_
.bytes
-
432 collector
.cscanned_roots_
.bytes
;
433 auto xscanned_heap
= collector
.xscanned_
.bytes
-
434 collector
.xscanned_roots_
.bytes
;
436 "gc age %ldms mmUsage %luM trigger %luM init %lums mark %lums "
437 "marked %.1f%% pinned %.1f%% free %.1fM "
438 "cscan-heap %.1fM xscan-heap %.1fM\n",
440 t_pre_stats
.mmUsage
/MB
,
442 collector
.init_ns_
/1000000,
443 collector
.mark_ns_
/1000000,
444 100.0 * collector
.marked_
.bytes
/ t_pre_stats
.mmUsage
,
445 100.0 * collector
.pinned_
.bytes
/ t_pre_stats
.mmUsage
,
446 double(collector
.freed_bytes_
) / MB
,
447 double(cscanned_heap
) / MB
,
448 double(xscanned_heap
) / MB
451 auto sample
= logCommon();
452 sample
.setStr("phase", phase
);
453 std::string
scanner(type_scan::hasNonConservative() ? "typescan" : "ts-cons");
454 sample
.setStr("scanner", !debug
? scanner
: scanner
+ "-debug");
455 sample
.setInt("gc_num", t_gc_num
);
456 sample
.setInt("req_age_micros", t_req_age
);
457 // timers of gc-sub phases
458 sample
.setInt("init_micros", collector
.init_ns_
/1000);
459 sample
.setInt("initfree_micros", collector
.initfree_ns_
/1000);
460 sample
.setInt("roots_micros", collector
.roots_ns_
/1000);
461 sample
.setInt("mark_micros", collector
.mark_ns_
/1000);
462 sample
.setInt("sweep_micros", collector
.sweep_ns_
/1000);
463 // size metrics gathered during gc
464 sample
.setInt("allocd_span", collector
.ptrs_
.span().second
);
465 sample
.setInt("marked_bytes", collector
.marked_
.bytes
);
466 sample
.setInt("pinned_bytes", collector
.pinned_
.bytes
);
467 sample
.setInt("unknown_bytes", collector
.unknown_
.bytes
);
468 sample
.setInt("freed_bytes", collector
.freed_bytes_
);
469 sample
.setInt("trigger_bytes", t_trigger
);
470 sample
.setInt("cscanned_roots", collector
.cscanned_roots_
.bytes
);
471 sample
.setInt("xscanned_roots", collector
.xscanned_roots_
.bytes
);
472 sample
.setInt("cscanned_heap",
473 collector
.cscanned_
.bytes
- collector
.cscanned_roots_
.bytes
);
474 sample
.setInt("xscanned_heap",
475 collector
.xscanned_
.bytes
- collector
.xscanned_roots_
.bytes
);
476 sample
.setInt("rds_normal_size", rds::normalSection().size());
477 sample
.setInt("rds_normal_count", rds::detail::s_normal_alloc_descs
.size());
478 sample
.setInt("rds_local_size", rds::localSection().size());
479 sample
.setInt("rds_local_count", rds::detail::s_local_alloc_descs
.size());
480 sample
.setInt("max_worklist", collector
.max_worklist_
);
481 StructuredLog::log("hhvm_gc", sample
);
484 void collectImpl(HeapImpl
& heap
, const char* phase
, GCBits
& mark_version
) {
486 if (t_eager_gc
&& RuntimeOption::EvalFilterGCPoints
) {
489 if (t_surprise_filter
.test(pc
)) {
490 if (RuntimeOption::EvalGCForAPC
) {
491 if (!APCGCManager::getInstance().excessedGCTriggerBar()) {
498 t_surprise_filter
.insert(pc
);
499 TRACE(2, "eager gc %s at %p\n", phase
, pc
);
502 TRACE(2, "normal gc %s at %p\n", phase
, vmpc());
505 t_enable_samples
= StructuredLog::coinflip(RuntimeOption::EvalGCSampleRate
);
507 if (t_enable_samples
) {
508 t_pre_stats
= MM().getStatsCopy(); // don't check or trigger OOM
510 mark_version
= (mark_version
== MaxMark
) ? MinMark
:
511 GCBits(uint8_t(mark_version
) + 1);
514 RuntimeOption::EvalGCForAPC
? &APCGCManager::getInstance() : nullptr,
518 if (RuntimeOption::EvalGCForAPC
) {
519 collector
.traceRoots
<true>();
520 collector
.trace
<true>();
522 collector
.traceRoots
<false>();
523 collector
.trace
<false>();
526 if (t_enable_samples
) {
527 logCollection(phase
, collector
);
534 void MemoryManager::resetGC() {
535 t_req_num
= ++g_req_num
;
540 void MemoryManager::resetEagerGC() {
541 if (RuntimeOption::EvalEagerGC
&& RuntimeOption::EvalFilterGCPoints
) {
542 t_surprise_filter
.clear();
546 void MemoryManager::requestEagerGC() {
547 if (RuntimeOption::EvalEagerGC
&& rds::header()) {
549 setSurpriseFlag(PendingGCFlag
);
553 void MemoryManager::requestGC() {
554 if (this->isGCEnabled() && rds::header()) {
555 if (m_stats
.mmUsage
> m_nextGc
) {
556 setSurpriseFlag(PendingGCFlag
);
561 void MemoryManager::updateNextGc() {
562 auto stats
= getStatsCopy();
563 auto mm_limit
= m_usageLimit
- stats
.auxUsage();
564 int64_t delta
= (mm_limit
- stats
.mmUsage
) *
565 RuntimeOption::EvalGCTriggerPct
;
566 delta
= std::max(delta
, RuntimeOption::EvalGCMinTrigger
);
567 m_nextGc
= stats
.mmUsage
+ delta
;
570 void MemoryManager::collect(const char* phase
) {
572 t_req_age
= cpu_ns()/1000 - m_req_start_micros
;
573 t_trigger
= m_nextGc
;
574 collectImpl(m_heap
, phase
, m_mark_version
);
578 void MemoryManager::setMemoryLimit(size_t limit
) {
579 assert(limit
<= (size_t)std::numeric_limits
<int64_t>::max());
580 m_usageLimit
= limit
;