Move io_tests to folly/io/async/test
[hiphop-php.git] / hphp / runtime / vm / jit / tc-recycle.cpp
blob6710d7bb6db621f584fb9c209829d6aeed9b66de
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/tc.h"
18 #include "hphp/runtime/vm/jit/tc-internal.h"
20 #include "hphp/runtime/vm/func.h"
21 #include "hphp/runtime/vm/treadmill.h"
23 #include "hphp/runtime/vm/jit/cg-meta.h"
24 #include "hphp/runtime/vm/jit/func-order.h"
25 #include "hphp/runtime/vm/jit/types.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/relocation.h"
28 #include "hphp/runtime/vm/jit/service-requests.h"
29 #include "hphp/runtime/vm/jit/smashable-instr.h"
30 #include "hphp/runtime/vm/jit/srcdb.h"
31 #include "hphp/runtime/vm/jit/tc-intercept.h"
32 #include "hphp/runtime/vm/jit/vasm-gen.h"
34 #include "hphp/util/arch.h"
35 #include "hphp/util/asm-x64.h"
36 #include "hphp/util/match.h"
37 #include "hphp/util/rds-local.h"
38 #include "hphp/util/trace.h"
40 #include "hphp/vixl/a64/instructions-a64.h"
42 #include <condition_variable>
45 * This module implements garbage collection for the translation cache so that
46 * unreachable translations may be overridden by new translations.
48 * Unreachable translations are created by either:
49 * (1) Freeing a function through the treadmill
50 * (2) Replacing profiling translations in a SrcRec
52 * SrcRecs and prologues are recorded as they are emitted in to the TC so that
53 * when their associated function becomes unreachable they may be freed. In the
54 * case of profiling translations, these are sometimes freed eagerly when they
55 * become unreachable, as they will be erased from their associated SrcRec and
56 * are not tracked elsewhere.
58 * Function callers and inter-translation jumps are recorded so that they may
59 * be smashed when space is reclaimed with the TC.
61 * Freed memory is tracked and allocated using the policy defined in DataBlock,
62 * and allocation is performed in MCGenerator following the creation of a new
63 * translation.
65 * Rather than emit new translations directly into freed memory they are written
66 * at the end of the TC and then relocated into freed memory. As the space
67 * required for a translation will be unknown until it is complete this strategy
68 * allows allocation of an appropriately sized block.
70 * Currently all allocation and deallocation is done eagerly, therefore the
71 * performance of the module is dependent on accurately detecting unreachable
72 * functions and translations.
74 * This module exports diagnostic data in the form of counts of smashed calls
75 * and branches, and recorded functions. Higher level diagnostic data exported
76 * by DataBlock may be of more use in tracking TC health. In particular, the
77 * number of free bytes and free blocks give a rough measure of fragmentation
78 * within the allocator.
80 * See DataBlock for details about the allocation strategy and free memory
81 * tracking.
83 namespace HPHP::jit::tc {
85 ///////////////////////////////////////////////////////////////////////////////
87 TRACE_SET_MOD(reusetc);
89 namespace {
90 struct FuncInfo {
91 FuncInfo() = default;
92 FuncInfo(FuncInfo&&) = default;
93 FuncInfo& operator=(FuncInfo&&) = default;
95 std::vector<TransLoc> prologues;
96 std::vector<SrcRec*> srcRecs;
97 jit::fast_set<TCA> callers;
100 struct SmashedCall {
101 FuncId fid;
102 ProfTransRec* rec;
105 std::mutex s_dataLock;
107 jit::fast_map<
108 TCA /* toSmash */,
109 SmashedCall /* target */
110 > s_smashedCalls;
112 jit::fast_map<
113 TCA /* toSmash */,
114 SrcRec* /* dest */
115 > s_smashedBranches;
117 // Keyed on FuncId as these are never reused
118 jit::fast_map<FuncId, FuncInfo> s_funcTCData;
120 struct FuncJob {
121 const StringData* fname;
122 FuncId fid;
125 using Job = boost::variant<FuncJob, const SrcRec*, TransLoc>;
126 std::atomic<bool> s_running{false};
127 std::queue<Job> s_jobq;
128 std::condition_variable s_qcv;
129 std::mutex s_qlock;
130 std::thread s_reaper;
132 void enqueueJob(Job j) {
133 std::unique_lock<std::mutex> l{s_qlock};
134 s_jobq.emplace(j);
135 l.unlock();
136 s_qcv.notify_all();
139 Optional<Job> dequeueJob() {
140 std::unique_lock<std::mutex> l{s_qlock};
141 s_qcv.wait(l, [] {
142 return !s_running.load(std::memory_order_acquire) || !s_jobq.empty();
145 if (!s_running.load(std::memory_order_acquire)) return std::nullopt;
146 assertx(!s_jobq.empty());
147 auto ret = s_jobq.front();
148 s_jobq.pop();
149 return ret;
152 std::unique_lock<std::mutex> lockData() {
153 return std::unique_lock<std::mutex>(s_dataLock);
157 * Removes meta-data about a caller to a proflogue from prof-data to ensure that
158 * a call to an optimized translation isn't wrongly smashed later.
160 void clearProfCaller(TCA toSmash, ProfTransRec* rec) {
161 if (!rec || !rec->isProflogue()) return;
163 auto lock = rec->lockCallerList();
164 rec->removeMainCaller(toSmash);
168 * Erase any metadata referencing a call at address start and return the
169 * SmashedCall record if the call referenced a ProfTransRec.
171 Optional<SmashedCall> eraseSmashedCall(TCA start) {
172 auto dataLock = lockData();
173 auto it = s_smashedCalls.find(start);
174 if (it != s_smashedCalls.end()) {
175 auto scall = std::move(it->second);
176 ITRACE(1, "Erasing smashed call mapping @ {} to ProfTransRec {}\n",
177 start, scall.rec);
178 s_funcTCData[scall.fid].callers.erase(start);
179 s_smashedCalls.erase(it);
180 if (scall.rec) return scall;
182 return std::nullopt;
186 * Clear bound branch and call data associated with range [start, end) in the
187 * TC. Also sets all catch-traces to null to ensure that they are reset as
188 * appropriate in any future translation (the unwinder always_asserts on null
189 * catch trace annotations).
191 void clearTCMaps(TCA start, TCA end) {
192 auto const profData = jit::profData();
193 deleteRangeInterceptTCA(start, end);
194 while (start < end) {
195 bool isBranch, isNop, isCall;
196 size_t instSz;
197 switch (arch()) {
198 case Arch::ARM: {
199 using namespace vixl;
200 Instruction* instr = Instruction::Cast(start);
201 isBranch = instr->IsCondBranchImm() || instr->IsUncondBranchImm() ||
202 instr->IsUncondBranchReg() || instr->IsCompareBranch() ||
203 instr->IsTestBranch();
204 isNop = instr->Mask(SystemHintFMask) == SystemHintFixed &&
205 instr->ImmHint() == NOP;
206 isCall = instr->Mask(UnconditionalBranchMask) == BL ||
207 instr->Mask(UnconditionalBranchToRegisterMask) == BLR;
208 instSz = vixl::kInstructionSize;
209 break;
211 case Arch::X64: {
212 x64::DecodedInstruction di(start);
213 isBranch = di.isBranch();
214 isNop = di.isNop();
215 isCall = di.isCall();
216 instSz = di.size();
217 break;
221 if (profData && (isBranch || isNop || isCall)) {
222 auto const id = profData->clearJmpTransID(start);
223 if (id != kInvalidTransID) {
224 ITRACE(1, "Erasing jmpTransID @ {} to {}\n", start, id);
228 FuncOrder::clearCallFuncId(start);
230 eraseCatchTrace(start);
231 eraseInlineStack(start);
232 if (isCall) {
233 if (auto call = eraseSmashedCall(start)) {
234 clearProfCaller(start, call->rec);
237 start += instSz;
242 * Erase all metadata associated with branches to loc. This does not update the
243 * associated IB records on the SrcRec for loc. Returns a vector of the erased
244 * records.
246 std::vector<std::pair<TCA, SrcRec*>> eraseSmashedBranches(TransLoc loc) {
247 auto dataLock = lockData();
248 std::vector<std::pair<TCA, SrcRec*>> ibRecs;
249 for (auto it = s_smashedBranches.begin(); it != s_smashedBranches.end();) {
250 auto br = it++;
251 if (loc.contains(br->first)) {
252 ITRACE(1, "Erasing smashed branch @ {} from SrcRec addr={}\n",
253 br->first, (void*)br->second);
254 ibRecs.emplace_back(*br);
255 s_smashedBranches.erase(br);
259 return ibRecs;
263 * Erase any metadata associated with loc from the TC.
265 void clearTransLocMaps(TransLoc loc) {
266 ITRACE(1, "Clearing translation meta-data\n");
267 Trace::Indent _i2;
268 clearTCMaps(loc.mainStart(), loc.mainEnd());
269 clearTCMaps(loc.coldCodeStart(), loc.coldEnd());
270 clearTCMaps(loc.frozenCodeStart(), loc.frozenEnd());
274 * DEBUG_ONLY: write ud2/int3 over a region of the TC beginning at start and
275 * extending length bytes. Use info as the name of the associated CodeBlock.
277 void clearRange(TCA start, size_t len, const char* info) {
278 CodeBlock cb;
279 cb.init(start, len, info);
281 CGMeta fixups;
282 SCOPE_EXIT {
283 // In general, fixups should be empty at this point. However, a fallthru
284 // instruction is appended to any empty block and, on ARM, fallthru
285 // instructions add address immediates in the fixups.addressImmediates.
286 assertx(arch() == Arch::ARM || fixups.empty());
289 DataBlock db;
290 Vauto vasm { cb, cb, db, fixups };
291 vasm.unit().padding = true;
295 * Free loc from the TC. It will be made available for reuse immediately, all
296 * associated metadata must be cleared prior to calling this function.
298 void freeTransLoc(TransLoc loc) {
299 auto codeLock = lockCode();
300 auto& cache = code();
301 cache.blockFor(loc.mainStart()).free(loc.mainStart(), loc.mainSize());
302 cache.blockFor(loc.coldStart()).free(loc.coldStart(), loc.coldSize());
303 if (loc.coldStart() != loc.frozenStart()) {
304 cache.blockFor(loc.frozenStart()).free(loc.frozenStart(), loc.frozenSize());
307 if (debug) {
308 // Ensure no one calls into the function
309 clearRange(loc.mainStart(), loc.mainSize(), "Dead Main");
310 clearRange(loc.coldStart(), loc.coldSize(), "Dead Cold");
311 if (loc.coldStart() != loc.frozenStart()) {
312 clearRange(loc.frozenStart(), loc.frozenSize(), "Dead Frozen");
317 void reclaimTranslationSync(TransLoc loc, const SrcRec* freedSr = nullptr) {
318 ITRACE(1, "Reclaiming translation M[{}, {}] C[{}, {}] F[{}, {}]\n",
319 loc.mainStart(), loc.mainEnd(), loc.coldStart(), loc.coldEnd(),
320 loc.frozenStart(), loc.frozenEnd());
322 Trace::Indent _i;
324 // Extract the SrcRecs with smashed branches first to avoid a rank violation
325 // between the data lock and SrcRec lock.
326 auto ibRecs = eraseSmashedBranches(loc);
328 for (auto sr : ibRecs) {
329 // When called from reclaimSrcRec freedSr is the SrcRec being freed
330 if (sr.second == freedSr) continue;
331 sr.second->removeIncomingBranch(sr.first);
334 clearTransLocMaps(loc);
336 // Do this last, it will make the TransLoc available for reuse.
337 freeTransLoc(loc);
341 * Reclaim all translations associated with a SrcRec.
343 void reclaimSrcRecSync(const SrcRec* rec) {
344 auto srLock = rec->readlock();
345 ITRACE(1, "Reclaiming SrcRec addr={}\n", (void*)rec);
347 Trace::Indent _i;
349 for (auto& loc : rec->translations()) {
350 reclaimTranslationSync(loc, rec);
354 void reclaimTranslation(TransLoc loc) { enqueueJob(loc); }
355 void reclaimSrcRec(const SrcRec* sr) { enqueueJob(sr); }
357 Optional<FuncInfo> eraseFuncInfo(FuncId fid) {
358 auto dataLock = lockData();
360 auto it = s_funcTCData.find(fid);
361 if (it == s_funcTCData.end()) return std::nullopt;
363 auto data = std::move(it->second);
364 s_funcTCData.erase(it);
366 for (auto& caller : data.callers) {
367 s_smashedCalls.erase(caller);
370 return std::move(data);
373 void reclaimFunctionSync(const StringData* fname, FuncId fid) {
374 ITRACE(1, "Tearing down func {} (id={})\n", fname->data(), fid);
375 Trace::Indent _i;
377 auto data = eraseFuncInfo(fid);
378 auto& us = ustubs();
380 if (!data) return;
382 for (auto& caller : data->callers) {
383 ITRACE(1, "Unsmashing call @ {}\n", caller);
384 smashCall(caller, us.immutableBindCallStub);
387 // We just smashed all of those callers-- treadmill the free to avoid a
388 // race (threads executing callers may end up inside the guard even though
389 // the function is now unreachable). Once the following block runs the guards
390 // should be unreachable.
391 Treadmill::enqueue([fname, fid, data = std::move(*data)] {
392 ITRACE(1, "Reclaiming func {} (id={})\n", fname, fid);
393 Trace::Indent _i2;
395 ITRACE(1, "Reclaiming Prologues\n");
396 Trace::Indent _i3;
397 for (auto& loc : data.prologues) {
398 reclaimTranslation(loc);
402 for (auto* rec : data.srcRecs) {
403 reclaimSrcRec(rec);
409 ////////////////////////////////////////////////////////////////////////////////
411 int smashedCalls() { return s_smashedCalls.size(); }
412 int smashedBranches() { return s_smashedBranches.size(); }
413 int recordedFuncs() { return s_funcTCData.size(); }
415 namespace {
416 ServiceData::CounterCallback s_counters(
417 [](std::map<std::string, int64_t>& counters) {
418 if (!RuntimeOption::EvalEnableReusableTC) return;
420 counters["jit.tc.smashed_calls"] = s_smashedCalls.size();
421 counters["jit.tc.recorded_funcs"] = s_funcTCData.size();
422 counters["jit.tc.smashed_branches"] = s_smashedBranches.size();
427 ////////////////////////////////////////////////////////////////////////////////
429 void recordFuncCaller(const Func* func, TCA toSmash, ProfTransRec* rec) {
430 auto dataLock = lockData();
432 FTRACE(1, "Recording smashed call @ {} to func {} (id = {})\n",
433 toSmash, func->fullName()->data(), func->getFuncId());
435 s_funcTCData[func->getFuncId()].callers.emplace(toSmash);
436 s_smashedCalls[toSmash] = SmashedCall{func->getFuncId(), rec};
439 void recordFuncSrcRec(const Func* func, SrcRec* rec) {
440 auto dataLock = lockData();
442 FTRACE(1, "Recording SrcRec for func {} (id = {}) addr = {}\n",
443 func->fullName()->data(), func->getFuncId(), (void*)rec);
444 s_funcTCData[func->getFuncId()].srcRecs.emplace_back(rec);
447 void recordFuncPrologue(const Func* func, TransLoc loc) {
448 auto dataLock = lockData();
450 FTRACE(1, "Recording Prologue for func {} (id = {}) main={}\n",
451 func->fullName()->data(), func->getFuncId(), loc.entry());
452 s_funcTCData[func->getFuncId()].prologues.emplace_back(loc);
455 void recordJump(TCA toSmash, SrcRec* sr) {
456 auto dataLock = lockData();
458 FTRACE(1, "Recording smashed branch @ {} to SrcRec addr={}\n",
459 toSmash, (void*)sr);
460 s_smashedBranches[toSmash] = sr;
463 ////////////////////////////////////////////////////////////////////////////////
465 void reclaimFunction(const Func* func) {
466 enqueueJob(FuncJob {func->name(), func->getFuncId()});
469 void reclaimTranslations(GrowableVector<TransLoc>&& trans) {
470 Treadmill::enqueue([trans = std::move(trans)]() mutable {
471 for (auto& loc : trans) {
472 reclaimTranslation(loc);
478 void recycleInit() {
479 if (!RuntimeOption::EvalEnableReusableTC) return;
481 s_running.store(true, std::memory_order_release);
482 s_reaper = std::thread([] {
483 rds::local::init();
484 SCOPE_EXIT { rds::local::fini(); };
485 while (auto j = dequeueJob()) {
486 ProfData::Session pds;
487 match<void>(
489 [] (TransLoc loc) { reclaimTranslationSync(loc); },
490 [] (const SrcRec* sr) { reclaimSrcRecSync(sr); },
491 [] (FuncJob j) { reclaimFunctionSync(j.fname, j.fid); }
497 void recycleStop() {
498 if (!s_running.load(std::memory_order_acquire)) return;
499 s_running.store(false, std::memory_order_release);
500 s_qcv.notify_all();
501 s_reaper.join();
504 ///////////////////////////////////////////////////////////////////////////////