GC: Rename Marker->Collector
[hiphop-php.git] / hphp / runtime / base / heap-collect.cpp
blobe6a76a88aa52d74b9ed220593e5b502fe77938ae
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/runtime/base/apc-gc-manager.h"
17 #include "hphp/runtime/base/req-containers.h"
18 #include "hphp/runtime/base/mixed-array-defs.h"
19 #include "hphp/runtime/base/memory-manager-defs.h"
20 #include "hphp/runtime/base/heap-scan.h"
21 #include "hphp/runtime/base/thread-info.h"
22 #include "hphp/runtime/base/heap-graph.h"
23 #include "hphp/runtime/base/weakref-data.h"
24 #include "hphp/runtime/ext/weakref/weakref-data-handle.h"
25 #include "hphp/runtime/vm/vm-regs.h"
26 #include "hphp/util/alloc.h"
27 #include "hphp/util/bloom-filter.h"
28 #include "hphp/util/cycles.h"
29 #include "hphp/util/process.h"
30 #include "hphp/util/ptr-map.h"
31 #include "hphp/util/struct-log.h"
32 #include "hphp/util/timer.h"
33 #include "hphp/util/trace.h"
34 #include "hphp/util/type-scan.h"
36 #include <algorithm>
37 #include <iterator>
38 #include <vector>
39 #include <folly/Range.h>
40 #include <folly/portability/Unistd.h>
42 namespace HPHP {
43 TRACE_SET_MOD(gc);
45 namespace {
47 struct Counter {
48 size_t count{0};
49 size_t bytes{0};
50 void operator+=(size_t n) {
51 bytes += n;
52 count++;
56 bool hasNativeData(const HeapObject* h) {
57 assert(isObjectKind(h->kind()));
58 return static_cast<const ObjectData*>(h)->getAttribute(
59 ObjectData::HasNativeData
63 constexpr auto MinMark = GCBits(1);
64 constexpr auto MaxMark = GCBits(3);
66 struct Collector {
67 explicit Collector(HeapImpl& heap, APCGCManager* apcgc, GCBits mark_version)
68 : heap_(heap), mark_version_{mark_version}, apcgc_(apcgc)
70 void init();
71 template<bool apcgc> void traceRoots();
72 template<bool apcgc> void trace();
73 void sweep();
75 // drain the scanner, enqueue pointers
76 void finish_scan();
78 // mark ambiguous pointers in the range [start,start+len)
79 template<bool apcgc>
80 void conservativeScan(const void* start, size_t len);
82 bool marked(const HeapObject* h) {
83 return h->marks() == mark_version_;
85 template<bool apcgc> void checkedEnqueue(const void* p);
86 void enqueueWeak(const WeakRefDataHandle* p);
87 template<bool apcgc> void finish_typescan();
88 HdrBlock find(const void*);
90 void enqueue(const HeapObject* h) {
91 assert(h && h->kind() <= HeaderKind::BigMalloc &&
92 h->kind() != HeaderKind::AsyncFuncWH &&
93 h->kind() != HeaderKind::Closure);
94 assert(!isObjectKind(h->kind()) || !hasNativeData(h));
95 work_.push_back(h);
96 max_worklist_ = std::max(max_worklist_, work_.size());
99 HeapImpl& heap_;
100 GCBits const mark_version_;
101 Counter marked_, pinned_, unknown_; // bytes
102 Counter cscanned_roots_, cscanned_; // bytes
103 Counter xscanned_roots_, xscanned_; // bytes
104 size_t init_ns_, initfree_ns_, roots_ns_, mark_ns_, sweep_ns_;
105 size_t max_worklist_{0}; // max size of work_
106 size_t freed_bytes_{0};
107 PtrMap<const HeapObject*> ptrs_;
108 type_scan::Scanner type_scanner_;
109 std::vector<const HeapObject*> work_;
110 std::vector<const WeakRefDataHandle*> weakRefs_;
111 APCGCManager* const apcgc_;
114 // TODO(T20460162): The contiguous heap has a bitmap of which chunks of memory
115 // are allocated/free. And it can efficiently find the start of an allocated
116 // object using bit operations. So there is an opportunity to speed this up by
117 // directly accessing the bitmap instead of using PtrMap.
118 HdrBlock Collector::find(const void* ptr) {
119 if (auto r = ptrs_.region(ptr)) {
120 auto h = const_cast<HeapObject*>(r->first);
121 if (h->kind() == HeaderKind::Slab) {
122 return Slab::fromHeader(h)->find(ptr);
124 if (h->kind() == HeaderKind::BigObj) {
125 auto h2 = static_cast<MallocNode*>(h) + 1;
126 return ptr >= h2 ? HdrBlock{h2, r->second - sizeof(MallocNode)} :
127 HdrBlock{nullptr, 0};
129 assert(h->kind() == HeaderKind::BigMalloc);
130 return {h, r->second};
132 return {nullptr, 0};
135 DEBUG_ONLY bool checkEnqueuedKind(const HeapObject* h) {
136 switch (h->kind()) {
137 case HeaderKind::Apc:
138 case HeaderKind::Globals:
139 case HeaderKind::Proxy:
140 case HeaderKind::Ref:
141 case HeaderKind::Resource:
142 case HeaderKind::Packed:
143 case HeaderKind::Mixed:
144 case HeaderKind::Dict:
145 case HeaderKind::VecArray:
146 case HeaderKind::Keyset:
147 case HeaderKind::Empty:
148 case HeaderKind::SmallMalloc:
149 case HeaderKind::BigMalloc:
150 break;
151 case HeaderKind::Object:
152 case HeaderKind::Vector:
153 case HeaderKind::Map:
154 case HeaderKind::Set:
155 case HeaderKind::Pair:
156 case HeaderKind::ImmVector:
157 case HeaderKind::ImmMap:
158 case HeaderKind::ImmSet:
159 case HeaderKind::WaitHandle:
160 case HeaderKind::AwaitAllWH:
161 // Object kinds. None of these should have native-data, because if they
162 // do, the mapped header should be for the NativeData prefix.
163 assert(!hasNativeData(h));
164 break;
165 case HeaderKind::AsyncFuncFrame:
166 case HeaderKind::NativeData:
167 case HeaderKind::ClosureHdr:
168 // these have inner objects, but we queued the outer one.
169 break;
170 case HeaderKind::String:
171 // nothing to queue since strings don't have pointers
172 break;
173 case HeaderKind::Closure:
174 case HeaderKind::AsyncFuncWH:
175 // These header types should not be found during heap or slab iteration
176 // because they are appended to ClosureHdr or AsyncFuncFrame.
177 case HeaderKind::BigObj:
178 case HeaderKind::Slab:
179 case HeaderKind::Free:
180 case HeaderKind::Hole:
181 // These header types are not allocated objects; they are handled
182 // earlier and should never be queued on the gc worklist.
183 always_assert(false && "bad header kind");
184 break;
186 return true;
189 template <bool apcgc>
190 void Collector::checkedEnqueue(const void* p) {
191 auto r = find(p);
192 if (auto h = r.ptr) {
193 // enqueue h the first time. If it's an object with no pointers (eg String),
194 // we'll skip it when we process the queue.
195 auto old = h->marks();
196 if (old != mark_version_) {
197 h->setmarks(mark_version_);
198 marked_ += r.size;
199 pinned_ += r.size;
200 enqueue(h);
201 assert(checkEnqueuedKind(h));
203 } else if (apcgc) {
204 // If p doesn't belong to any APC data, APCGCManager won't do anything
205 apcgc_->mark(p);
209 // Enqueue a weak pointer for possible clearing at the end of scanning.
210 void Collector::enqueueWeak(const WeakRefDataHandle* weak) {
211 weakRefs_.emplace_back(weak);
214 // mark ambigous pointers in the range [start,start+len). If the start or
215 // end is a partial word, don't scan that word.
216 template <bool apcgc>
217 void FOLLY_DISABLE_ADDRESS_SANITIZER
218 Collector::conservativeScan(const void* start, size_t len) {
219 constexpr uintptr_t M{7}; // word size - 1
220 auto s = (char**)((uintptr_t(start) + M) & ~M); // round up
221 auto e = (char**)((uintptr_t(start) + len) & ~M); // round down
222 cscanned_ += uintptr_t(e) - uintptr_t(s);
223 for (; s < e; s++) {
224 checkedEnqueue<apcgc>(
225 // Mask off the upper 16-bits to handle things like
226 // DiscriminatedPtr which stores things up there.
227 (void*)(uintptr_t(*s) & (-1ULL >> 16))
232 inline int64_t cpu_ns() {
233 return HPHP::Timer::GetThreadCPUTimeNanos();
237 * If we have non-conservative scanners, we must treat all unknown
238 * type-index allocations in the heap as roots. Why? The generated
239 * scanners will only report a pointer if it knows the pointer can point
240 * to an object on the request heap. It does this by tracking all types
241 * which are allocated via the allocation functions via the type-index
242 * mechanism. If an allocation has an unknown type-index, then by definition
243 * we don't know which type it contains, and therefore the auto generated
244 * scanners will never report a pointer to such a type.
246 * The only good way to solve this is to treat such allocations as roots
247 * and conservative scan them. If we're conservative scanning everything,
248 * we need to take no special action, as the above problem only applies to
249 * auto generated scanners.
252 // initially parse the heap to find valid objects and initialize metadata.
253 NEVER_INLINE void Collector::init() {
254 auto const t0 = cpu_ns();
255 SCOPE_EXIT { init_ns_ = cpu_ns() - t0; };
256 MM().initFree();
257 initfree_ns_ = cpu_ns() - t0;
259 heap_.iterate(
260 [&](HeapObject* h, size_t size) { // onBig
261 ptrs_.insert(h, size);
262 if (h->kind() == HeaderKind::BigMalloc) {
263 if (!type_scan::isKnownType(static_cast<MallocNode*>(h)->typeIndex())) {
264 unknown_ += size;
265 h->setmarks(mark_version_);
266 enqueue(h);
270 [&](HeapObject* h, size_t size) { // onSlab
271 ptrs_.insert(h, size);
272 Slab::fromHeader(h)->initCrossingMap();
275 ptrs_.prepare();
278 template <bool apcgc>
279 void Collector::finish_typescan() {
280 type_scanner_.finish(
281 [this](const void* p, std::size_t size) {
282 // we could extract the addresses of ambiguous ptrs, if desired.
283 conservativeScan<apcgc>(p, size);
285 [this](const void** addr) {
286 xscanned_ += sizeof(*addr);
287 checkedEnqueue<apcgc>(*addr);
289 [this](const void* weak) {
290 enqueueWeak(reinterpret_cast<const WeakRefDataHandle*>(weak));
295 template <bool apcgc>
296 NEVER_INLINE void Collector::traceRoots() {
297 auto const t0 = cpu_ns();
298 SCOPE_EXIT { roots_ns_ = cpu_ns() - t0; };
299 iterateRoots([&](const void* p, size_t size, type_scan::Index tyindex) {
300 type_scanner_.scanByIndex(tyindex, p, size);
301 finish_typescan<apcgc>();
303 cscanned_roots_ = cscanned_;
304 xscanned_roots_ = xscanned_;
307 template <bool apcgc>
308 NEVER_INLINE void Collector::trace() {
309 auto const t0 = cpu_ns();
310 SCOPE_EXIT { mark_ns_ = cpu_ns() - t0; };
311 while (!work_.empty()) {
312 auto h = work_.back();
313 work_.pop_back();
314 scanHeapObject(h, type_scanner_);
315 finish_typescan<apcgc>();
319 // another pass through the heap, this time using the PtrMap we computed
320 // in init(). Free and maybe quarantine unmarked objects.
321 NEVER_INLINE void Collector::sweep() {
322 auto& mm = MM();
323 auto const t0 = cpu_ns();
324 auto const usage0 = mm.currentUsage();
325 MemoryManager::FreelistArray quarantine;
326 if (RuntimeOption::EvalQuarantine) quarantine = mm.beginQuarantine();
327 SCOPE_EXIT {
328 if (RuntimeOption::EvalQuarantine) mm.endQuarantine(std::move(quarantine));
329 freed_bytes_ = usage0 - mm.currentUsage();
330 sweep_ns_ = cpu_ns() - t0;
331 assert(freed_bytes_ >= 0);
334 // Clear weak references as needed.
335 while (!weakRefs_.empty()) {
336 auto wref = weakRefs_.back();
337 assert(wref->acquire_count == 0);
338 assert(wref->wr_data);
339 weakRefs_.pop_back();
340 auto type = wref->wr_data->pointee.m_type;
341 if (type == KindOfObject) {
342 auto r = find(wref->wr_data->pointee.m_data.pobj);
343 if (!marked(r.ptr)) {
344 WeakRefData::invalidateWeakRef(uintptr_t(r.ptr));
345 mm.reinitFree();
347 continue;
349 assert(type == KindOfNull || type == KindOfUninit);
352 bool need_reinit_free = false;
353 g_context->sweepDynPropTable([&](const ObjectData* obj) {
354 if (need_reinit_free) mm.reinitFree();
355 auto r = find(obj);
356 // if we return true, call reinitFree() before calling find() again,
357 // to ensure the heap remains walkable.
358 return need_reinit_free = !r.ptr || !marked(r.ptr);
361 mm.sweepApcArrays([&](APCLocalArray* a) {
362 return !marked(a);
365 mm.sweepApcStrings([&](StringData* s) {
366 return !marked(s);
369 mm.reinitFree();
371 heap_.iterate(
372 [&](HeapObject* big, size_t big_size) { // onBig
373 if (big->kind() == HeaderKind::BigObj) {
374 HeapObject* h2 = static_cast<MallocNode*>(big) + 1;
375 if (!marked(h2) && h2->kind() != HeaderKind::SmallMalloc) {
376 mm.freeBigSize(h2);
380 [&](HeapObject* big, size_t /*big_size*/) { // onSlab
381 auto slab = Slab::fromHeader(big);
382 slab->find_if((HeapObject*)slab->start(),
383 [&](HeapObject* h, size_t h_size) {
384 if (!marked(h) && !isFreeKind(h->kind()) &&
385 h->kind() != HeaderKind::SmallMalloc) {
386 mm.freeSmallSize(h, h_size);
388 return false;
392 if (apcgc_) {
393 // This should be removed after global GC API is provided
394 // Currently we do this to sweeping only when script mode
395 apcgc_->sweep();
399 thread_local bool t_eager_gc{false};
400 thread_local BloomFilter<256*1024> t_surprise_filter;
402 // Structured Logging
404 thread_local std::atomic<size_t> g_req_num;
405 __thread size_t t_req_num; // snapshot thread-local copy of g_req_num;
406 __thread size_t t_gc_num; // nth collection in this request.
407 __thread bool t_enable_samples;
408 __thread size_t t_trigger;
409 __thread int64_t t_req_age;
410 __thread MemoryUsageStats t_pre_stats;
412 StructuredLogEntry logCommon() {
413 StructuredLogEntry sample;
414 sample.setInt("req_num", t_req_num);
415 // MemoryUsageStats
416 sample.setInt("memory_limit", MM().getMemoryLimit());
417 sample.setInt("usage", t_pre_stats.usage());
418 sample.setInt("mm_usage", t_pre_stats.mmUsage);
419 sample.setInt("aux_usage", t_pre_stats.auxUsage());
420 sample.setInt("mm_capacity", t_pre_stats.capacity);
421 sample.setInt("peak_usage", t_pre_stats.peakUsage);
422 sample.setInt("peak_capacity", t_pre_stats.peakCap);
423 sample.setInt("total_alloc", t_pre_stats.totalAlloc);
424 return sample;
427 void logCollection(const char* phase, const Collector& collector) {
428 // log stuff
429 constexpr auto MB = 1024*1024;
430 if (Trace::moduleEnabledRelease(Trace::gc, 1)) {
431 auto cscanned_heap = collector.cscanned_.bytes -
432 collector.cscanned_roots_.bytes;
433 auto xscanned_heap = collector.xscanned_.bytes -
434 collector.xscanned_roots_.bytes;
435 Trace::traceRelease(
436 "gc age %ldms mmUsage %luM trigger %luM init %lums mark %lums "
437 "marked %.1f%% pinned %.1f%% free %.1fM "
438 "cscan-heap %.1fM xscan-heap %.1fM\n",
439 t_req_age,
440 t_pre_stats.mmUsage/MB,
441 t_trigger/MB,
442 collector.init_ns_/1000000,
443 collector.mark_ns_/1000000,
444 100.0 * collector.marked_.bytes / t_pre_stats.mmUsage,
445 100.0 * collector.pinned_.bytes / t_pre_stats.mmUsage,
446 double(collector.freed_bytes_) / MB,
447 double(cscanned_heap) / MB,
448 double(xscanned_heap) / MB
451 auto sample = logCommon();
452 sample.setStr("phase", phase);
453 std::string scanner(type_scan::hasNonConservative() ? "typescan" : "ts-cons");
454 sample.setStr("scanner", !debug ? scanner : scanner + "-debug");
455 sample.setInt("gc_num", t_gc_num);
456 sample.setInt("req_age_micros", t_req_age);
457 // timers of gc-sub phases
458 sample.setInt("init_micros", collector.init_ns_/1000);
459 sample.setInt("initfree_micros", collector.initfree_ns_/1000);
460 sample.setInt("roots_micros", collector.roots_ns_/1000);
461 sample.setInt("mark_micros", collector.mark_ns_/1000);
462 sample.setInt("sweep_micros", collector.sweep_ns_/1000);
463 // size metrics gathered during gc
464 sample.setInt("allocd_span", collector.ptrs_.span().second);
465 sample.setInt("marked_bytes", collector.marked_.bytes);
466 sample.setInt("pinned_bytes", collector.pinned_.bytes);
467 sample.setInt("unknown_bytes", collector.unknown_.bytes);
468 sample.setInt("freed_bytes", collector.freed_bytes_);
469 sample.setInt("trigger_bytes", t_trigger);
470 sample.setInt("cscanned_roots", collector.cscanned_roots_.bytes);
471 sample.setInt("xscanned_roots", collector.xscanned_roots_.bytes);
472 sample.setInt("cscanned_heap",
473 collector.cscanned_.bytes - collector.cscanned_roots_.bytes);
474 sample.setInt("xscanned_heap",
475 collector.xscanned_.bytes - collector.xscanned_roots_.bytes);
476 sample.setInt("rds_normal_size", rds::normalSection().size());
477 sample.setInt("rds_normal_count", rds::detail::s_normal_alloc_descs.size());
478 sample.setInt("rds_local_size", rds::localSection().size());
479 sample.setInt("rds_local_count", rds::detail::s_local_alloc_descs.size());
480 sample.setInt("max_worklist", collector.max_worklist_);
481 StructuredLog::log("hhvm_gc", sample);
484 void collectImpl(HeapImpl& heap, const char* phase, GCBits& mark_version) {
485 VMRegAnchor _;
486 if (t_eager_gc && RuntimeOption::EvalFilterGCPoints) {
487 t_eager_gc = false;
488 auto pc = vmpc();
489 if (t_surprise_filter.test(pc)) {
490 if (RuntimeOption::EvalGCForAPC) {
491 if (!APCGCManager::getInstance().excessedGCTriggerBar()) {
492 return;
494 } else {
495 return;
498 t_surprise_filter.insert(pc);
499 TRACE(2, "eager gc %s at %p\n", phase, pc);
500 phase = "eager";
501 } else {
502 TRACE(2, "normal gc %s at %p\n", phase, vmpc());
504 if (t_gc_num == 0) {
505 t_enable_samples = StructuredLog::coinflip(RuntimeOption::EvalGCSampleRate);
507 if (t_enable_samples) {
508 t_pre_stats = MM().getStatsCopy(); // don't check or trigger OOM
510 mark_version = (mark_version == MaxMark) ? MinMark :
511 GCBits(uint8_t(mark_version) + 1);
512 Collector collector(
513 heap,
514 RuntimeOption::EvalGCForAPC ? &APCGCManager::getInstance() : nullptr,
515 mark_version
517 collector.init();
518 if (RuntimeOption::EvalGCForAPC) {
519 collector.traceRoots<true>();
520 collector.trace<true>();
521 } else {
522 collector.traceRoots<false>();
523 collector.trace<false>();
525 collector.sweep();
526 if (t_enable_samples) {
527 logCollection(phase, collector);
529 ++t_gc_num;
534 void MemoryManager::resetGC() {
535 t_req_num = ++g_req_num;
536 t_gc_num = 0;
537 updateNextGc();
540 void MemoryManager::resetEagerGC() {
541 if (RuntimeOption::EvalEagerGC && RuntimeOption::EvalFilterGCPoints) {
542 t_surprise_filter.clear();
546 void MemoryManager::requestEagerGC() {
547 if (RuntimeOption::EvalEagerGC && rds::header()) {
548 t_eager_gc = true;
549 setSurpriseFlag(PendingGCFlag);
553 void MemoryManager::requestGC() {
554 if (this->isGCEnabled() && rds::header()) {
555 if (m_stats.mmUsage > m_nextGc) {
556 setSurpriseFlag(PendingGCFlag);
561 void MemoryManager::updateNextGc() {
562 auto stats = getStatsCopy();
563 auto mm_limit = m_usageLimit - stats.auxUsage();
564 int64_t delta = (mm_limit - stats.mmUsage) *
565 RuntimeOption::EvalGCTriggerPct;
566 delta = std::max(delta, RuntimeOption::EvalGCMinTrigger);
567 m_nextGc = stats.mmUsage + delta;
570 void MemoryManager::collect(const char* phase) {
571 if (empty()) return;
572 t_req_age = cpu_ns()/1000 - m_req_start_micros;
573 t_trigger = m_nextGc;
574 collectImpl(m_heap, phase, m_mark_version);
575 updateNextGc();
578 void MemoryManager::setMemoryLimit(size_t limit) {
579 assert(limit <= (size_t)std::numeric_limits<int64_t>::max());
580 m_usageLimit = limit;
581 updateNextGc();