2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/tc.h"
18 #include "hphp/runtime/vm/jit/tc-internal.h"
20 #include "hphp/runtime/vm/func.h"
21 #include "hphp/runtime/vm/treadmill.h"
23 #include "hphp/runtime/vm/jit/cg-meta.h"
24 #include "hphp/runtime/vm/jit/func-order.h"
25 #include "hphp/runtime/vm/jit/types.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/relocation.h"
28 #include "hphp/runtime/vm/jit/service-requests.h"
29 #include "hphp/runtime/vm/jit/smashable-instr.h"
30 #include "hphp/runtime/vm/jit/srcdb.h"
31 #include "hphp/runtime/vm/jit/tc-intercept.h"
32 #include "hphp/runtime/vm/jit/vasm-gen.h"
34 #include "hphp/util/arch.h"
35 #include "hphp/util/asm-x64.h"
36 #include "hphp/util/match.h"
37 #include "hphp/util/rds-local.h"
38 #include "hphp/util/trace.h"
40 #include "hphp/vixl/a64/instructions-a64.h"
42 #include <condition_variable>
45 * This module implements garbage collection for the translation cache so that
46 * unreachable translations may be overridden by new translations.
48 * Unreachable translations are created by either:
49 * (1) Freeing a function through the treadmill
50 * (2) Replacing profiling translations in a SrcRec
52 * SrcRecs and prologues are recorded as they are emitted in to the TC so that
53 * when their associated function becomes unreachable they may be freed. In the
54 * case of profiling translations, these are sometimes freed eagerly when they
55 * become unreachable, as they will be erased from their associated SrcRec and
56 * are not tracked elsewhere.
58 * Function callers and inter-translation jumps are recorded so that they may
59 * be smashed when space is reclaimed with the TC.
61 * Freed memory is tracked and allocated using the policy defined in DataBlock,
62 * and allocation is performed in MCGenerator following the creation of a new
65 * Rather than emit new translations directly into freed memory they are written
66 * at the end of the TC and then relocated into freed memory. As the space
67 * required for a translation will be unknown until it is complete this strategy
68 * allows allocation of an appropriately sized block.
70 * Currently all allocation and deallocation is done eagerly, therefore the
71 * performance of the module is dependent on accurately detecting unreachable
72 * functions and translations.
74 * This module exports diagnostic data in the form of counts of smashed calls
75 * and branches, and recorded functions. Higher level diagnostic data exported
76 * by DataBlock may be of more use in tracking TC health. In particular, the
77 * number of free bytes and free blocks give a rough measure of fragmentation
78 * within the allocator.
80 * See DataBlock for details about the allocation strategy and free memory
83 namespace HPHP::jit::tc
{
85 ///////////////////////////////////////////////////////////////////////////////
87 TRACE_SET_MOD(reusetc
);
92 FuncInfo(FuncInfo
&&) = default;
93 FuncInfo
& operator=(FuncInfo
&&) = default;
95 std::vector
<TransLoc
> prologues
;
96 std::vector
<SrcRec
*> srcRecs
;
97 jit::fast_set
<TCA
> callers
;
105 std::mutex s_dataLock
;
109 SmashedCall
/* target */
117 // Keyed on FuncId as these are never reused
118 jit::fast_map
<FuncId
, FuncInfo
> s_funcTCData
;
121 const StringData
* fname
;
125 using Job
= boost::variant
<FuncJob
, const SrcRec
*, TransLoc
>;
126 std::atomic
<bool> s_running
{false};
127 std::queue
<Job
> s_jobq
;
128 std::condition_variable s_qcv
;
130 std::thread s_reaper
;
132 void enqueueJob(Job j
) {
133 std::unique_lock
<std::mutex
> l
{s_qlock
};
139 Optional
<Job
> dequeueJob() {
140 std::unique_lock
<std::mutex
> l
{s_qlock
};
142 return !s_running
.load(std::memory_order_acquire
) || !s_jobq
.empty();
145 if (!s_running
.load(std::memory_order_acquire
)) return std::nullopt
;
146 assertx(!s_jobq
.empty());
147 auto ret
= s_jobq
.front();
152 std::unique_lock
<std::mutex
> lockData() {
153 return std::unique_lock
<std::mutex
>(s_dataLock
);
157 * Removes meta-data about a caller to a proflogue from prof-data to ensure that
158 * a call to an optimized translation isn't wrongly smashed later.
160 void clearProfCaller(TCA toSmash
, ProfTransRec
* rec
) {
161 if (!rec
|| !rec
->isProflogue()) return;
163 auto lock
= rec
->lockCallerList();
164 rec
->removeMainCaller(toSmash
);
168 * Erase any metadata referencing a call at address start and return the
169 * SmashedCall record if the call referenced a ProfTransRec.
171 Optional
<SmashedCall
> eraseSmashedCall(TCA start
) {
172 auto dataLock
= lockData();
173 auto it
= s_smashedCalls
.find(start
);
174 if (it
!= s_smashedCalls
.end()) {
175 auto scall
= std::move(it
->second
);
176 ITRACE(1, "Erasing smashed call mapping @ {} to ProfTransRec {}\n",
178 s_funcTCData
[scall
.fid
].callers
.erase(start
);
179 s_smashedCalls
.erase(it
);
180 if (scall
.rec
) return scall
;
186 * Clear bound branch and call data associated with range [start, end) in the
187 * TC. Also sets all catch-traces to null to ensure that they are reset as
188 * appropriate in any future translation (the unwinder always_asserts on null
189 * catch trace annotations).
191 void clearTCMaps(TCA start
, TCA end
) {
192 auto const profData
= jit::profData();
193 deleteRangeInterceptTCA(start
, end
);
194 while (start
< end
) {
195 bool isBranch
, isNop
, isCall
;
199 using namespace vixl
;
200 Instruction
* instr
= Instruction::Cast(start
);
201 isBranch
= instr
->IsCondBranchImm() || instr
->IsUncondBranchImm() ||
202 instr
->IsUncondBranchReg() || instr
->IsCompareBranch() ||
203 instr
->IsTestBranch();
204 isNop
= instr
->Mask(SystemHintFMask
) == SystemHintFixed
&&
205 instr
->ImmHint() == NOP
;
206 isCall
= instr
->Mask(UnconditionalBranchMask
) == BL
||
207 instr
->Mask(UnconditionalBranchToRegisterMask
) == BLR
;
208 instSz
= vixl::kInstructionSize
;
212 x64::DecodedInstruction
di(start
);
213 isBranch
= di
.isBranch();
215 isCall
= di
.isCall();
221 if (profData
&& (isBranch
|| isNop
|| isCall
)) {
222 auto const id
= profData
->clearJmpTransID(start
);
223 if (id
!= kInvalidTransID
) {
224 ITRACE(1, "Erasing jmpTransID @ {} to {}\n", start
, id
);
228 FuncOrder::clearCallFuncId(start
);
230 eraseCatchTrace(start
);
231 eraseInlineStack(start
);
233 if (auto call
= eraseSmashedCall(start
)) {
234 clearProfCaller(start
, call
->rec
);
242 * Erase all metadata associated with branches to loc. This does not update the
243 * associated IB records on the SrcRec for loc. Returns a vector of the erased
246 std::vector
<std::pair
<TCA
, SrcRec
*>> eraseSmashedBranches(TransLoc loc
) {
247 auto dataLock
= lockData();
248 std::vector
<std::pair
<TCA
, SrcRec
*>> ibRecs
;
249 for (auto it
= s_smashedBranches
.begin(); it
!= s_smashedBranches
.end();) {
251 if (loc
.contains(br
->first
)) {
252 ITRACE(1, "Erasing smashed branch @ {} from SrcRec addr={}\n",
253 br
->first
, (void*)br
->second
);
254 ibRecs
.emplace_back(*br
);
255 s_smashedBranches
.erase(br
);
263 * Erase any metadata associated with loc from the TC.
265 void clearTransLocMaps(TransLoc loc
) {
266 ITRACE(1, "Clearing translation meta-data\n");
268 clearTCMaps(loc
.mainStart(), loc
.mainEnd());
269 clearTCMaps(loc
.coldCodeStart(), loc
.coldEnd());
270 clearTCMaps(loc
.frozenCodeStart(), loc
.frozenEnd());
274 * DEBUG_ONLY: write ud2/int3 over a region of the TC beginning at start and
275 * extending length bytes. Use info as the name of the associated CodeBlock.
277 void clearRange(TCA start
, size_t len
, const char* info
) {
279 cb
.init(start
, len
, info
);
283 // In general, fixups should be empty at this point. However, a fallthru
284 // instruction is appended to any empty block and, on ARM, fallthru
285 // instructions add address immediates in the fixups.addressImmediates.
286 assertx(arch() == Arch::ARM
|| fixups
.empty());
290 Vauto vasm
{ cb
, cb
, db
, fixups
};
291 vasm
.unit().padding
= true;
295 * Free loc from the TC. It will be made available for reuse immediately, all
296 * associated metadata must be cleared prior to calling this function.
298 void freeTransLoc(TransLoc loc
) {
299 auto codeLock
= lockCode();
300 auto& cache
= code();
301 cache
.blockFor(loc
.mainStart()).free(loc
.mainStart(), loc
.mainSize());
302 cache
.blockFor(loc
.coldStart()).free(loc
.coldStart(), loc
.coldSize());
303 if (loc
.coldStart() != loc
.frozenStart()) {
304 cache
.blockFor(loc
.frozenStart()).free(loc
.frozenStart(), loc
.frozenSize());
308 // Ensure no one calls into the function
309 clearRange(loc
.mainStart(), loc
.mainSize(), "Dead Main");
310 clearRange(loc
.coldStart(), loc
.coldSize(), "Dead Cold");
311 if (loc
.coldStart() != loc
.frozenStart()) {
312 clearRange(loc
.frozenStart(), loc
.frozenSize(), "Dead Frozen");
317 void reclaimTranslationSync(TransLoc loc
, const SrcRec
* freedSr
= nullptr) {
318 ITRACE(1, "Reclaiming translation M[{}, {}] C[{}, {}] F[{}, {}]\n",
319 loc
.mainStart(), loc
.mainEnd(), loc
.coldStart(), loc
.coldEnd(),
320 loc
.frozenStart(), loc
.frozenEnd());
324 // Extract the SrcRecs with smashed branches first to avoid a rank violation
325 // between the data lock and SrcRec lock.
326 auto ibRecs
= eraseSmashedBranches(loc
);
328 for (auto sr
: ibRecs
) {
329 // When called from reclaimSrcRec freedSr is the SrcRec being freed
330 if (sr
.second
== freedSr
) continue;
331 sr
.second
->removeIncomingBranch(sr
.first
);
334 clearTransLocMaps(loc
);
336 // Do this last, it will make the TransLoc available for reuse.
341 * Reclaim all translations associated with a SrcRec.
343 void reclaimSrcRecSync(const SrcRec
* rec
) {
344 auto srLock
= rec
->readlock();
345 ITRACE(1, "Reclaiming SrcRec addr={}\n", (void*)rec
);
349 for (auto& loc
: rec
->translations()) {
350 reclaimTranslationSync(loc
, rec
);
354 void reclaimTranslation(TransLoc loc
) { enqueueJob(loc
); }
355 void reclaimSrcRec(const SrcRec
* sr
) { enqueueJob(sr
); }
357 Optional
<FuncInfo
> eraseFuncInfo(FuncId fid
) {
358 auto dataLock
= lockData();
360 auto it
= s_funcTCData
.find(fid
);
361 if (it
== s_funcTCData
.end()) return std::nullopt
;
363 auto data
= std::move(it
->second
);
364 s_funcTCData
.erase(it
);
366 for (auto& caller
: data
.callers
) {
367 s_smashedCalls
.erase(caller
);
370 return std::move(data
);
373 void reclaimFunctionSync(const StringData
* fname
, FuncId fid
) {
374 ITRACE(1, "Tearing down func {} (id={})\n", fname
->data(), fid
);
377 auto data
= eraseFuncInfo(fid
);
382 for (auto& caller
: data
->callers
) {
383 ITRACE(1, "Unsmashing call @ {}\n", caller
);
384 smashCall(caller
, us
.immutableBindCallStub
);
387 // We just smashed all of those callers-- treadmill the free to avoid a
388 // race (threads executing callers may end up inside the guard even though
389 // the function is now unreachable). Once the following block runs the guards
390 // should be unreachable.
391 Treadmill::enqueue([fname
, fid
, data
= std::move(*data
)] {
392 ITRACE(1, "Reclaiming func {} (id={})\n", fname
, fid
);
395 ITRACE(1, "Reclaiming Prologues\n");
397 for (auto& loc
: data
.prologues
) {
398 reclaimTranslation(loc
);
402 for (auto* rec
: data
.srcRecs
) {
409 ////////////////////////////////////////////////////////////////////////////////
411 int smashedCalls() { return s_smashedCalls
.size(); }
412 int smashedBranches() { return s_smashedBranches
.size(); }
413 int recordedFuncs() { return s_funcTCData
.size(); }
416 ServiceData::CounterCallback
s_counters(
417 [](std::map
<std::string
, int64_t>& counters
) {
418 if (!RuntimeOption::EvalEnableReusableTC
) return;
420 counters
["jit.tc.smashed_calls"] = s_smashedCalls
.size();
421 counters
["jit.tc.recorded_funcs"] = s_funcTCData
.size();
422 counters
["jit.tc.smashed_branches"] = s_smashedBranches
.size();
427 ////////////////////////////////////////////////////////////////////////////////
429 void recordFuncCaller(const Func
* func
, TCA toSmash
, ProfTransRec
* rec
) {
430 auto dataLock
= lockData();
432 FTRACE(1, "Recording smashed call @ {} to func {} (id = {})\n",
433 toSmash
, func
->fullName()->data(), func
->getFuncId());
435 s_funcTCData
[func
->getFuncId()].callers
.emplace(toSmash
);
436 s_smashedCalls
[toSmash
] = SmashedCall
{func
->getFuncId(), rec
};
439 void recordFuncSrcRec(const Func
* func
, SrcRec
* rec
) {
440 auto dataLock
= lockData();
442 FTRACE(1, "Recording SrcRec for func {} (id = {}) addr = {}\n",
443 func
->fullName()->data(), func
->getFuncId(), (void*)rec
);
444 s_funcTCData
[func
->getFuncId()].srcRecs
.emplace_back(rec
);
447 void recordFuncPrologue(const Func
* func
, TransLoc loc
) {
448 auto dataLock
= lockData();
450 FTRACE(1, "Recording Prologue for func {} (id = {}) main={}\n",
451 func
->fullName()->data(), func
->getFuncId(), loc
.entry());
452 s_funcTCData
[func
->getFuncId()].prologues
.emplace_back(loc
);
455 void recordJump(TCA toSmash
, SrcRec
* sr
) {
456 auto dataLock
= lockData();
458 FTRACE(1, "Recording smashed branch @ {} to SrcRec addr={}\n",
460 s_smashedBranches
[toSmash
] = sr
;
463 ////////////////////////////////////////////////////////////////////////////////
465 void reclaimFunction(const Func
* func
) {
466 enqueueJob(FuncJob
{func
->name(), func
->getFuncId()});
469 void reclaimTranslations(GrowableVector
<TransLoc
>&& trans
) {
470 Treadmill::enqueue([trans
= std::move(trans
)]() mutable {
471 for (auto& loc
: trans
) {
472 reclaimTranslation(loc
);
479 if (!RuntimeOption::EvalEnableReusableTC
) return;
481 s_running
.store(true, std::memory_order_release
);
482 s_reaper
= std::thread([] {
484 SCOPE_EXIT
{ rds::local::fini(); };
485 while (auto j
= dequeueJob()) {
486 ProfData::Session pds
;
489 [] (TransLoc loc
) { reclaimTranslationSync(loc
); },
490 [] (const SrcRec
* sr
) { reclaimSrcRecSync(sr
); },
491 [] (FuncJob j
) { reclaimFunctionSync(j
.fname
, j
.fid
); }
498 if (!s_running
.load(std::memory_order_acquire
)) return;
499 s_running
.store(false, std::memory_order_release
);
504 ///////////////////////////////////////////////////////////////////////////////