Store num args instead of offset in prologue and func entry SrcKeys
[hiphop-php.git] / hphp / runtime / vm / jit / tc-recycle.cpp
blob76f60e229a00bd2c9192d7234cbe7886ac5d5187
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/tc.h"
18 #include "hphp/runtime/vm/jit/tc-internal.h"
20 #include "hphp/runtime/vm/func.h"
21 #include "hphp/runtime/vm/treadmill.h"
23 #include "hphp/runtime/vm/jit/cg-meta.h"
24 #include "hphp/runtime/vm/jit/func-order.h"
25 #include "hphp/runtime/vm/jit/types.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/relocation.h"
28 #include "hphp/runtime/vm/jit/service-requests.h"
29 #include "hphp/runtime/vm/jit/smashable-instr.h"
30 #include "hphp/runtime/vm/jit/srcdb.h"
31 #include "hphp/runtime/vm/jit/vasm-gen.h"
33 #include "hphp/util/arch.h"
34 #include "hphp/util/asm-x64.h"
35 #include "hphp/util/match.h"
36 #include "hphp/util/rds-local.h"
37 #include "hphp/util/trace.h"
39 #include "hphp/vixl/a64/instructions-a64.h"
41 #include <condition_variable>
44 * This module implements garbage collection for the translation cache so that
45 * unreachable translations may be overridden by new translations.
47 * Unreachable translations are created by either:
48 * (1) Freeing a function through the treadmill
49 * (2) Replacing profiling translations in a SrcRec
51 * SrcRecs and prologues are recorded as they are emitted in to the TC so that
52 * when their associated function becomes unreachable they may be freed. In the
53 * case of profiling translations, these are sometimes freed eagerly when they
54 * become unreachable, as they will be erased from their associated SrcRec and
55 * are not tracked elsewhere.
57 * Function callers and inter-translation jumps are recorded so that they may
58 * be smashed when space is reclaimed with the TC.
60 * Freed memory is tracked and allocated using the policy defined in DataBlock,
61 * and allocation is performed in MCGenerator following the creation of a new
62 * translation.
64 * Rather than emit new translations directly into freed memory they are written
65 * at the end of the TC and then relocated into freed memory. As the space
66 * required for a translation will be unknown until it is complete this strategy
67 * allows allocation of an appropriately sized block.
69 * Currently all allocation and deallocation is done eagerly, therefore the
70 * performance of the module is dependent on accurately detecting unreachable
71 * functions and translations.
73 * This module exports diagnostic data in the form of counts of smashed calls
74 * and branches, and recorded functions. Higher level diagnostic data exported
75 * by DataBlock may be of more use in tracking TC health. In particular, the
76 * number of free bytes and free blocks give a rough measure of fragmentation
77 * within the allocator.
79 * See DataBlock for details about the allocation strategy and free memory
80 * tracking.
82 namespace HPHP::jit::tc {
84 ///////////////////////////////////////////////////////////////////////////////
86 TRACE_SET_MOD(reusetc);
88 namespace {
89 struct FuncInfo {
90 FuncInfo() = default;
91 FuncInfo(FuncInfo&&) = default;
92 FuncInfo& operator=(FuncInfo&&) = default;
94 std::vector<TransLoc> prologues;
95 std::vector<SrcRec*> srcRecs;
96 jit::fast_set<TCA> callers;
99 struct SmashedCall {
100 FuncId fid;
101 ProfTransRec* rec;
104 std::mutex s_dataLock;
106 jit::fast_map<
107 TCA /* toSmash */,
108 SmashedCall /* target */
109 > s_smashedCalls;
111 jit::fast_map<
112 TCA /* toSmash */,
113 SrcRec* /* dest */
114 > s_smashedBranches;
116 // Keyed on FuncId as these are never reused
117 jit::fast_map<FuncId, FuncInfo> s_funcTCData;
119 struct FuncJob {
120 const StringData* fname;
121 FuncId fid;
124 using Job = boost::variant<FuncJob, const SrcRec*, TransLoc>;
125 std::atomic<bool> s_running{false};
126 std::queue<Job> s_jobq;
127 std::condition_variable s_qcv;
128 std::mutex s_qlock;
129 std::thread s_reaper;
131 void enqueueJob(Job j) {
132 std::unique_lock<std::mutex> l{s_qlock};
133 s_jobq.emplace(j);
134 l.unlock();
135 s_qcv.notify_all();
138 Optional<Job> dequeueJob() {
139 std::unique_lock<std::mutex> l{s_qlock};
140 s_qcv.wait(l, [] {
141 return !s_running.load(std::memory_order_acquire) || !s_jobq.empty();
144 if (!s_running.load(std::memory_order_relaxed)) return std::nullopt;
145 assertx(!s_jobq.empty());
146 auto ret = s_jobq.front();
147 s_jobq.pop();
148 return ret;
151 std::unique_lock<std::mutex> lockData() {
152 return std::unique_lock<std::mutex>(s_dataLock);
156 * Removes meta-data about a caller to a proflogue from prof-data to ensure that
157 * a call to an optimized translation isn't wrongly smashed later.
159 void clearProfCaller(TCA toSmash, ProfTransRec* rec) {
160 if (!rec || !rec->isProflogue()) return;
162 auto lock = rec->lockCallerList();
163 rec->removeMainCaller(toSmash);
167 * Erase any metadata referencing a call at address start and return the
168 * SmashedCall record if the call referenced a ProfTransRec.
170 Optional<SmashedCall> eraseSmashedCall(TCA start) {
171 auto dataLock = lockData();
172 auto it = s_smashedCalls.find(start);
173 if (it != s_smashedCalls.end()) {
174 auto scall = std::move(it->second);
175 ITRACE(1, "Erasing smashed call mapping @ {} to ProfTransRec {}\n",
176 start, scall.rec);
177 s_funcTCData[scall.fid].callers.erase(start);
178 s_smashedCalls.erase(it);
179 if (scall.rec) return scall;
181 return std::nullopt;
185 * Clear bound branch and call data associated with range [start, end) in the
186 * TC. Also sets all catch-traces to null to ensure that they are reset as
187 * appropriate in any future translation (the unwinder always_asserts on null
188 * catch trace annotations).
190 void clearTCMaps(TCA start, TCA end) {
191 auto const profData = jit::profData();
192 while (start < end) {
193 bool isBranch, isNop, isCall;
194 size_t instSz;
195 switch (arch()) {
196 case Arch::ARM: {
197 using namespace vixl;
198 Instruction* instr = Instruction::Cast(start);
199 isBranch = instr->IsCondBranchImm() || instr->IsUncondBranchImm() ||
200 instr->IsUncondBranchReg() || instr->IsCompareBranch() ||
201 instr->IsTestBranch();
202 isNop = instr->Mask(SystemHintFMask) == SystemHintFixed &&
203 instr->ImmHint() == NOP;
204 isCall = instr->Mask(UnconditionalBranchMask) == BL ||
205 instr->Mask(UnconditionalBranchToRegisterMask) == BLR;
206 instSz = vixl::kInstructionSize;
207 break;
209 case Arch::X64: {
210 x64::DecodedInstruction di(start);
211 isBranch = di.isBranch();
212 isNop = di.isNop();
213 isCall = di.isCall();
214 instSz = di.size();
215 break;
219 if (profData && (isBranch || isNop || isCall)) {
220 auto const id = profData->clearJmpTransID(start);
221 if (id != kInvalidTransID) {
222 ITRACE(1, "Erasing jmpTransID @ {} to {}\n", start, id);
226 FuncOrder::clearCallFuncId(start);
228 eraseCatchTrace(start);
229 eraseInlineStack(start);
230 if (isCall) {
231 if (auto call = eraseSmashedCall(start)) {
232 clearProfCaller(start, call->rec);
235 start += instSz;
240 * Erase all metadata associated with branches to loc. This does not update the
241 * associated IB records on the SrcRec for loc. Returns a vector of the erased
242 * records.
244 std::vector<std::pair<TCA, SrcRec*>> eraseSmashedBranches(TransLoc loc) {
245 auto dataLock = lockData();
246 std::vector<std::pair<TCA, SrcRec*>> ibRecs;
247 for (auto it = s_smashedBranches.begin(); it != s_smashedBranches.end();) {
248 auto br = it++;
249 if (loc.contains(br->first)) {
250 ITRACE(1, "Erasing smashed branch @ {} from SrcRec addr={}\n",
251 br->first, (void*)br->second);
252 ibRecs.emplace_back(*br);
253 s_smashedBranches.erase(br);
257 return ibRecs;
261 * Erase any metadata associated with loc from the TC.
263 void clearTransLocMaps(TransLoc loc) {
264 ITRACE(1, "Clearing translation meta-data\n");
265 Trace::Indent _i2;
266 clearTCMaps(loc.mainStart(), loc.mainEnd());
267 clearTCMaps(loc.coldCodeStart(), loc.coldEnd());
268 clearTCMaps(loc.frozenCodeStart(), loc.frozenEnd());
272 * DEBUG_ONLY: write ud2/int3 over a region of the TC beginning at start and
273 * extending length bytes. Use info as the name of the associated CodeBlock.
275 void clearRange(TCA start, size_t len, const char* info) {
276 CodeBlock cb;
277 cb.init(start, len, info);
279 CGMeta fixups;
280 SCOPE_EXIT { assertx(fixups.empty()); };
282 DataBlock db;
283 Vauto vasm { cb, cb, db, fixups };
284 vasm.unit().padding = true;
288 * Free loc from the TC. It will be made available for reuse immediately, all
289 * associated metadata must be cleared prior to calling this function.
291 void freeTransLoc(TransLoc loc) {
292 auto codeLock = lockCode();
293 auto& cache = code();
294 cache.blockFor(loc.mainStart()).free(loc.mainStart(), loc.mainSize());
295 cache.blockFor(loc.coldStart()).free(loc.coldStart(), loc.coldSize());
296 if (loc.coldStart() != loc.frozenStart()) {
297 cache.blockFor(loc.frozenStart()).free(loc.frozenStart(), loc.frozenSize());
300 if (debug) {
301 // Ensure no one calls into the function
302 clearRange(loc.mainStart(), loc.mainSize(), "Dead Main");
303 clearRange(loc.coldStart(), loc.coldSize(), "Dead Cold");
304 if (loc.coldStart() != loc.frozenStart()) {
305 clearRange(loc.frozenStart(), loc.frozenSize(), "Dead Frozen");
310 void reclaimTranslationSync(TransLoc loc, const SrcRec* freedSr = nullptr) {
311 ITRACE(1, "Reclaiming translation M[{}, {}] C[{}, {}] F[{}, {}]\n",
312 loc.mainStart(), loc.mainEnd(), loc.coldStart(), loc.coldEnd(),
313 loc.frozenStart(), loc.frozenEnd());
315 Trace::Indent _i;
317 // Extract the SrcRecs with smashed branches first to avoid a rank violation
318 // between the data lock and SrcRec lock.
319 auto ibRecs = eraseSmashedBranches(loc);
321 for (auto sr : ibRecs) {
322 // When called from reclaimSrcRec freedSr is the SrcRec being freed
323 if (sr.second == freedSr) continue;
324 sr.second->removeIncomingBranch(sr.first);
327 clearTransLocMaps(loc);
329 // Do this last, it will make the TransLoc available for reuse.
330 freeTransLoc(loc);
334 * Reclaim all translations associated with a SrcRec.
336 void reclaimSrcRecSync(const SrcRec* rec) {
337 auto srLock = rec->readlock();
338 ITRACE(1, "Reclaiming SrcRec addr={}\n", (void*)rec);
340 Trace::Indent _i;
342 for (auto& loc : rec->translations()) {
343 reclaimTranslationSync(loc, rec);
347 void reclaimTranslation(TransLoc loc) { enqueueJob(loc); }
348 void reclaimSrcRec(const SrcRec* sr) { enqueueJob(sr); }
350 Optional<FuncInfo> eraseFuncInfo(FuncId fid) {
351 auto dataLock = lockData();
353 auto it = s_funcTCData.find(fid);
354 if (it == s_funcTCData.end()) return std::nullopt;
356 auto data = std::move(it->second);
357 s_funcTCData.erase(it);
359 for (auto& caller : data.callers) {
360 s_smashedCalls.erase(caller);
363 return std::move(data);
366 void reclaimFunctionSync(const StringData* fname, FuncId fid) {
367 ITRACE(1, "Tearing down func {} (id={})\n", fname->data(), fid);
368 Trace::Indent _i;
370 auto data = eraseFuncInfo(fid);
371 auto& us = ustubs();
373 if (!data) return;
375 for (auto& caller : data->callers) {
376 ITRACE(1, "Unsmashing call @ {}\n", caller);
377 smashCall(caller, us.immutableBindCallStub);
380 // We just smashed all of those callers-- treadmill the free to avoid a
381 // race (threads executing callers may end up inside the guard even though
382 // the function is now unreachable). Once the following block runs the guards
383 // should be unreachable.
384 Treadmill::enqueue([fname, fid, data = std::move(*data)] {
385 ITRACE(1, "Reclaiming func {} (id={})\n", fname, fid);
386 Trace::Indent _i2;
388 ITRACE(1, "Reclaiming Prologues\n");
389 Trace::Indent _i3;
390 for (auto& loc : data.prologues) {
391 reclaimTranslation(loc);
395 for (auto* rec : data.srcRecs) {
396 reclaimSrcRec(rec);
402 ////////////////////////////////////////////////////////////////////////////////
404 int smashedCalls() { return s_smashedCalls.size(); }
405 int smashedBranches() { return s_smashedBranches.size(); }
406 int recordedFuncs() { return s_funcTCData.size(); }
408 ////////////////////////////////////////////////////////////////////////////////
410 void recordFuncCaller(const Func* func, TCA toSmash, ProfTransRec* rec) {
411 auto dataLock = lockData();
413 FTRACE(1, "Recording smashed call @ {} to func {} (id = {})\n",
414 toSmash, func->fullName()->data(), func->getFuncId());
416 s_funcTCData[func->getFuncId()].callers.emplace(toSmash);
417 s_smashedCalls[toSmash] = SmashedCall{func->getFuncId(), rec};
420 void recordFuncSrcRec(const Func* func, SrcRec* rec) {
421 auto dataLock = lockData();
423 FTRACE(1, "Recording SrcRec for func {} (id = {}) addr = {}\n",
424 func->fullName()->data(), func->getFuncId(), (void*)rec);
425 s_funcTCData[func->getFuncId()].srcRecs.emplace_back(rec);
428 void recordFuncPrologue(const Func* func, TransLoc loc) {
429 auto dataLock = lockData();
431 FTRACE(1, "Recording Prologue for func {} (id = {}) main={}\n",
432 func->fullName()->data(), func->getFuncId(), loc.entry());
433 s_funcTCData[func->getFuncId()].prologues.emplace_back(loc);
436 void recordJump(TCA toSmash, SrcRec* sr) {
437 auto dataLock = lockData();
439 FTRACE(1, "Recording smashed branch @ {} to SrcRec addr={}\n",
440 toSmash, (void*)sr);
441 s_smashedBranches[toSmash] = sr;
444 ////////////////////////////////////////////////////////////////////////////////
446 void reclaimFunction(const Func* func) {
447 enqueueJob(FuncJob {func->name(), func->getFuncId()});
450 void reclaimTranslations(GrowableVector<TransLoc>&& trans) {
451 Treadmill::enqueue([trans = std::move(trans)]() mutable {
452 for (auto& loc : trans) {
453 reclaimTranslation(loc);
459 void recycleInit() {
460 if (!RuntimeOption::EvalEnableReusableTC) return;
462 s_running.store(true, std::memory_order_release);
463 s_reaper = std::thread([] {
464 rds::local::init();
465 SCOPE_EXIT { rds::local::fini(); };
466 while (auto j = dequeueJob()) {
467 ProfData::Session pds;
468 match<void>(
470 [] (TransLoc loc) { reclaimTranslationSync(loc); },
471 [] (const SrcRec* sr) { reclaimSrcRecSync(sr); },
472 [] (FuncJob j) { reclaimFunctionSync(j.fname, j.fid); }
478 void recycleStop() {
479 if (!s_running.load(std::memory_order_acquire)) return;
480 s_running.store(false, std::memory_order_release);
481 s_qcv.notify_all();
482 s_reaper.join();
485 ///////////////////////////////////////////////////////////////////////////////