hphp/runtime/vm/jit/mc-generator.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/mc-generator.h"
  18 #include "hphp/runtime/vm/jit/vtune-jit.h"
  19
  20 #include <cinttypes>
  21 #include <assert.h>
  22 #include <stdarg.h>
  23 #include <stdint.h>
  24 #include <stdio.h>
  25 #include <sys/mman.h>
  26 #include <unistd.h>
  27 #include <unwind.h>
  28
  29 #include <algorithm>
  30 #include <exception>
  31 #include <memory>
  32 #include <queue>
  33 #include <string>
  34 #include <strstream>
  35 #include <unordered_set>
  36 #include <vector>
  37
  38 #include <folly/Format.h>
  39 #include <folly/MapUtil.h>
  40 #include <folly/Optional.h>
  41 #include <folly/String.h>
  42
  43 #include "hphp/util/abi-cxx.h"
  44 #include "hphp/util/asm-x64.h"
  45 #include "hphp/util/bitops.h"
  46 #include "hphp/util/cycles.h"
  47 #include "hphp/util/debug.h"
  48 #include "hphp/util/disasm.h"
  49 #include "hphp/util/maphuge.h"
  50 #include "hphp/util/meta.h"
  51 #include "hphp/util/process.h"
  52 #include "hphp/util/rank.h"
  53 #include "hphp/util/repo-schema.h"
  54 #include "hphp/util/ringbuffer.h"
  55 #include "hphp/util/timer.h"
  56 #include "hphp/util/trace.h"
  57
  58 #include "hphp/runtime/base/arch.h"
  59 #include "hphp/runtime/base/execution-context.h"
  60 #include "hphp/runtime/base/rds.h"
  61 #include "hphp/runtime/base/runtime-option-guard.h"
  62 #include "hphp/runtime/base/runtime-option.h"
  63 #include "hphp/runtime/base/stats.h"
  64 #include "hphp/runtime/base/strings.h"
  65 #include "hphp/runtime/base/zend-string.h"
  66 #include "hphp/runtime/ext/ext_closure.h"
  67 #include "hphp/runtime/ext/ext_generator.h"
  68 #include "hphp/runtime/ext/std/ext_std_function.h"
  69 #include "hphp/runtime/server/source-root-info.h"
  70 #include "hphp/runtime/vm/bytecode.h"
  71 #include "hphp/runtime/vm/debug/debug.h"
  72 #include "hphp/runtime/vm/func.h"
  73 #include "hphp/runtime/vm/jit/back-end-x64.h" // XXX Layering violation.
  74 #include "hphp/runtime/vm/jit/check.h"
  75 #include "hphp/runtime/vm/jit/code-gen.h"
  76 #include "hphp/runtime/vm/jit/debug-guards.h"
  77 #include "hphp/runtime/vm/jit/inlining-decider.h"
  78 #include "hphp/runtime/vm/jit/irgen.h"
  79 #include "hphp/runtime/vm/jit/normalized-instruction.h"
  80 #include "hphp/runtime/vm/jit/opt.h"
  81 #include "hphp/runtime/vm/jit/print.h"
  82 #include "hphp/runtime/vm/jit/prof-data.h"
  83 #include "hphp/runtime/vm/jit/region-selection.h"
  84 #include "hphp/runtime/vm/jit/service-requests-inline.h"
  85 #include "hphp/runtime/vm/jit/srcdb.h"
  86 #include "hphp/runtime/vm/jit/timer.h"
  87 #include "hphp/runtime/vm/jit/translate-region.h"
  88 #include "hphp/runtime/vm/jit/translator-inline.h"
  89 #include "hphp/runtime/vm/jit/vasm-emit.h"
  90 #include "hphp/runtime/vm/jit/vasm-instr.h"
  91 #include "hphp/runtime/vm/member-operations.h"
  92 #include "hphp/runtime/vm/php-debug.h"
  93 #include "hphp/runtime/vm/repo.h"
  94 #include "hphp/runtime/vm/runtime.h"
  95 #include "hphp/runtime/vm/srckey.h"
  96 #include "hphp/runtime/vm/treadmill.h"
  97 #include "hphp/runtime/vm/type-profile.h"
  98 #include "hphp/runtime/vm/unwind.h"
  99
 100 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
 101
 102 namespace HPHP { namespace jit {
 103
 104 TRACE_SET_MOD(mcg);
 105
 106 using namespace reg;
 107 using namespace Trace;
 108 using std::max;
 109
 110 #define TRANS_PERF_COUNTERS \
 111   TPC(translate) \
 112   TPC(retranslate) \
 113   TPC(interp_bb) \
 114   TPC(interp_instr) \
 115   TPC(interp_one) \
 116   TPC(max_trans) \
 117   TPC(enter_tc) \
 118   TPC(service_req)
 119
 120 #define TPC(n) "jit_" #n,
 121 static const char* const kPerfCounterNames[] = {
 122   TRANS_PERF_COUNTERS
 123 };
 124 #undef TPC
 125
 126 __thread int64_t s_perfCounters[tpc_num_counters];
 127 static __thread size_t s_initialTCSize;
 128
 129 // The global MCGenerator object.
 130 MCGenerator* mcg;
 131
 132 CppCall MCGenerator::getDtorCall(DataType type) {
 133   switch (type) {
 134     case KindOfString:
 135       return CppCall::method(&StringData::release);
 136     case KindOfArray:
 137       return CppCall::method(&ArrayData::release);
 138     case KindOfObject:
 139       return CppCall::method(&ObjectData::release);
 140     case KindOfResource:
 141       return CppCall::method(&ResourceData::release);
 142     case KindOfRef:
 143       return CppCall::method(&RefData::release);
 144     DT_UNCOUNTED_CASE:
 145     case KindOfClass:
 146       break;
 147   }
 148   not_reached();
 149 }
 150
 151 ///////////////////////////////////////////////////////////////////////////////
 152
 153 bool shouldPGOFunc(const Func& func) {
 154   if (!RuntimeOption::EvalJitPGO) return false;
 155
 156   // JITing pseudo-mains requires extra checks that blow the IR.  PGO
 157   // can significantly increase the size of the regions, so disable it for
 158   // pseudo-mains (so regions will be just tracelets).
 159   if (func.isPseudoMain()) return false;
 160
 161   // Non-cloned closures simply contain prologues that redispacth to
 162   // cloned closures.  They don't contain a translation for the
 163   // function entry, which is what triggers an Optimize retranslation.
 164   // So don't generate profiling translations for them -- there's not
 165   // much to do with PGO anyway here, since they just have prologues.
 166   if (func.isClosureBody() && !func.isClonedClosure()) return false;
 167
 168   if (!RuntimeOption::EvalJitPGOHotOnly) return true;
 169   return func.attrs() & AttrHot;
 170 }
 171
 172 bool MCGenerator::profileSrcKey(SrcKey sk) const {
 173   if (!shouldPGOFunc(*sk.func())) return false;
 174   if (m_tx.profData()->optimized(sk.getFuncId())) return false;
 175   if (m_tx.profData()->profiling(sk.getFuncId())) return true;
 176
 177   // Don't start profiling new functions if the size of either main or
 178   // prof is already above Eval.JitAMaxUsage.
 179   auto tcUsage = std::max(code.mainUsed(), code.profUsed());
 180   if (tcUsage >= RuntimeOption::EvalJitAMaxUsage) {
 181     return false;
 182   }
 183
 184   return requestCount() <= RuntimeOption::EvalJitProfileRequests;
 185 }
 186
 187 /*
 188  * Invalidate the SrcDB entries for func's SrcKeys that have any
 189  * Profile translation.
 190  */
 191 void MCGenerator::invalidateFuncProfSrcKeys(const Func* func) {
 192   assert(RuntimeOption::EvalJitPGO);
 193   FuncId funcId = func->getFuncId();
 194   for (auto tid : m_tx.profData()->funcProfTransIDs(funcId)) {
 195     invalidateSrcKey(m_tx.profData()->transSrcKey(tid));
 196   }
 197 }
 198
 199 TCA MCGenerator::retranslate(const TranslArgs& args) {
 200   auto sr = m_tx.getSrcDB().find(args.sk);
 201   always_assert(sr);
 202   bool locked = sr->tryLock();
 203   SCOPE_EXIT {
 204     if (locked) sr->freeLock();
 205   };
 206   if (isDebuggerAttachedProcess() && m_tx.isSrcKeyInBL(args.sk)) {
 207     // We are about to translate something known to be blacklisted by
 208     // debugger, exit early
 209     SKTRACE(1, args.sk, "retranslate abort due to debugger\n");
 210     return nullptr;
 211   }
 212   LeaseHolder writer(Translator::WriteLease());
 213   if (!writer || !shouldTranslate(args.sk.func())) return nullptr;
 214   if (!locked) {
 215     // Even though we knew above that we were going to skip
 216     // doing another translation, we wait until we get the
 217     // write lease, to avoid spinning through the tracelet
 218     // guards again and again while another thread is writing
 219     // to it.
 220     return sr->getTopTranslation();
 221   }
 222   if (sr->translations().size() > RuntimeOption::EvalJitMaxTranslations) {
 223     always_assert(sr->translations().size() ==
 224                   RuntimeOption::EvalJitMaxTranslations + 1);
 225     return sr->getTopTranslation();
 226   }
 227   SKTRACE(1, args.sk, "retranslate\n");
 228
 229   m_tx.setMode(profileSrcKey(args.sk) ? TransKind::Profile : TransKind::Live);
 230   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 231
 232   return translate(args);
 233 }
 234
 235 TCA MCGenerator::retranslateOpt(TransID transId, bool align) {
 236   LeaseHolder writer(Translator::WriteLease());
 237   if (!writer) return nullptr;
 238   if (isDebuggerAttachedProcess()) return nullptr;
 239
 240   TRACE(1, "retranslateOpt: transId = %u\n", transId);
 241
 242   SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
 243
 244   if (!m_tx.profData()->hasTransRec(transId)) return nullptr;
 245
 246   always_assert(m_tx.profData()->transRegion(transId) != nullptr);
 247
 248   auto func   = m_tx.profData()->transFunc(transId);
 249   auto funcId = func->getFuncId();
 250   auto sk     = m_tx.profData()->transSrcKey(transId);
 251
 252   if (m_tx.profData()->optimized(funcId)) return nullptr;
 253   m_tx.profData()->setOptimized(funcId);
 254
 255   bool setFuncBody = func->getDVFunclets().size() == 0;
 256
 257   func->setFuncBody(m_tx.uniqueStubs.funcBodyHelperThunk);
 258
 259   // Invalidate SrcDB's entries for all func's SrcKeys.
 260   invalidateFuncProfSrcKeys(func);
 261
 262   // Regenerate the prologues and DV funclets before the actual function body.
 263   TCA start = regeneratePrologues(func, sk);
 264
 265   // Regionize func and translate all its regions.
 266   std::vector<RegionDescPtr> regions;
 267   regionizeFunc(func, this, regions);
 268
 269   for (auto region : regions) {
 270     m_tx.setMode(TransKind::Optimize);
 271     always_assert(!region->empty());
 272     auto regionSk = region->start();
 273     auto translArgs = TranslArgs{regionSk, align};
 274     translArgs.region = region;
 275
 276     if (setFuncBody && regionSk.offset() == func->base()) {
 277       translArgs.setFuncBody = true;
 278       setFuncBody = false;
 279     }
 280     auto regionStart = translate(translArgs);
 281     if (start == nullptr && regionSk == sk) {
 282       start = regionStart;
 283     }
 284     // Cloned closures' prologue tables point to the corresponding
 285     // main/DV entry point.  So update the prologue table when
 286     // retranslating their entries.
 287     if (func->isClonedClosure() && func->isEntry(regionSk.offset()) &&
 288         regionStart) {
 289       int entryNumParams = func->getEntryNumParams(regionSk.offset());
 290       func->setPrologue(entryNumParams, regionStart);
 291     }
 292   }
 293
 294   m_tx.profData()->freeFuncData(funcId);
 295
 296   return start;
 297 }
 298
 299 static bool liveFrameIsPseudoMain() {
 300   ActRec* ar = (ActRec*)vmfp();
 301   return ar->hasVarEnv() && ar->getVarEnv()->isGlobalScope();
 302 }
 303
 304 /*
 305  * Find or create a translation for sk. Returns TCA of "best" current
 306  * translation. May return NULL if it is currently impossible to create
 307  * a translation.
 308  */
 309 TCA
 310 MCGenerator::getTranslation(const TranslArgs& args) {
 311   auto sk = args.sk;
 312   sk.func()->validate();
 313   SKTRACE(2, sk,
 314           "getTranslation: curUnit %s funcId %x offset %d\n",
 315           sk.unit()->filepath()->data(),
 316           sk.getFuncId(),
 317           sk.offset());
 318   SKTRACE(2, sk, "   funcId: %x \n", sk.func()->getFuncId());
 319
 320   if (liveFrameIsPseudoMain() && !RuntimeOption::EvalJitPseudomain) {
 321     SKTRACE(2, sk, "punting on pseudoMain\n");
 322     return nullptr;
 323   }
 324   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 325     TCA tca = sr->getTopTranslation();
 326     if (tca) {
 327       SKTRACE(2, sk, "getTranslation: found %p\n", tca);
 328       return tca;
 329     }
 330   }
 331   return createTranslation(args);
 332 }
 333
 334 int
 335 MCGenerator::numTranslations(SrcKey sk) const {
 336   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 337     return sr->translations().size();
 338   }
 339   return 0;
 340 }
 341
 342 const StaticString
 343   s_php_errormsg("php_errormsg"),
 344   s_http_response_header("http_response_header");
 345
 346 bool MCGenerator::shouldTranslateNoSizeLimit(const Func* func) const {
 347   // If we've hit Eval.JitGlobalTranslationLimit, then we stop translating.
 348   if (m_numTrans >= RuntimeOption::EvalJitGlobalTranslationLimit) {
 349     return false;
 350   }
 351
 352   /*
 353    * We don't support JIT compiling functions that use some super-dynamic php
 354    * variables.
 355    */
 356   if (func->lookupVarId(s_php_errormsg.get()) != -1 ||
 357       func->lookupVarId(s_http_response_header.get()) != -1) {
 358     return false;
 359   }
 360
 361   return true;
 362 }
 363
 364 bool MCGenerator::shouldTranslate(const Func* func) const {
 365   if (!shouldTranslateNoSizeLimit(func)) return false;
 366   // Otherwise, follow the Eval.JitAMaxUsage limit.  However, we do
 367   // allow Optimize translations past that limit.
 368   return code.mainUsed() < RuntimeOption::EvalJitAMaxUsage ||
 369          m_tx.mode() == TransKind::Optimize;
 370 }
 371
 372
 373 static void populateLiveContext(RegionContext& ctx) {
 374   typedef RegionDesc::Location L;
 375
 376   const ActRec*     const fp {vmfp()};
 377   const TypedValue* const sp {vmsp()};
 378
 379   for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
 380     ctx.liveTypes.push_back(
 381       { L::Local{i}, liveTVType(frame_local(fp, i)) }
 382     );
 383   }
 384
 385   int32_t stackOff = 0;
 386   visitStackElems(
 387     fp, sp, ctx.bcOffset,
 388     [&](const ActRec* ar) {
 389       // TODO(#2466980): when it's a Cls, we should pass the Class* in
 390       // the Type.
 391       auto const objOrCls =
 392         ar->hasThis()  ? Type::SubObj(ar->getThis()->getVMClass()) :
 393         ar->hasClass() ? Type::Cls
 394                        : Type::Nullptr;
 395
 396       ctx.preLiveARs.push_back({
 397         stackOff,
 398         ar->m_func,
 399         objOrCls
 400       });
 401       FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
 402       stackOff += kNumActRecCells;
 403     },
 404     [&](const TypedValue* tv) {
 405       ctx.liveTypes.push_back(
 406         { L::Stack{ctx.spOffset - stackOff}, liveTVType(tv) }
 407       );
 408       stackOff++;
 409       FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
 410     }
 411   );
 412 }
 413
 414 TCA
 415 MCGenerator::createTranslation(const TranslArgs& args) {
 416   if (!shouldTranslate(args.sk.func())) return nullptr;
 417
 418   /*
 419    * Try to become the writer. We delay this until we *know* we will have
 420    * a need to create new translations, instead of just trying to win the
 421    * lottery at the dawn of time. Hopefully lots of requests won't require
 422    * any new translation.
 423    */
 424   auto sk = args.sk;
 425   LeaseHolder writer(Translator::WriteLease());
 426   if (!writer || !shouldTranslate(args.sk.func())) return nullptr;
 427
 428   if (auto sr = m_tx.getSrcDB().find(sk)) {
 429     TCA tca = sr->getTopTranslation();
 430     if (tca) {
 431       // Handle extremely unlikely race; someone may have just already
 432       // added the first instance of this SrcRec while we did a
 433       // non-blocking wait on the write lease.
 434       return tca;
 435     } else {
 436       // Since we are holding the write lease, we know that sk is properly
 437       // initialized, except that it has no translations (due to
 438       // replaceOldTranslations)
 439       return retranslate(args);
 440     }
 441   }
 442
 443   // We put retranslate requests at the end of our slab to more frequently
 444   //   allow conditional jump fall-throughs
 445   TCA astart          = code.main().frontier();
 446   TCA realColdStart   = code.realCold().frontier();
 447   TCA realFrozenStart = code.realFrozen().frontier();
 448   TCA req = emitServiceReq(code.cold(), REQ_RETRANSLATE,
 449                            sk.offset(), TransFlags().packed);
 450   SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
 451           sk.unit(), sk.offset(), req);
 452   SrcRec* sr = m_tx.getSrcRec(sk);
 453   sr->setFuncInfo(sk.func());
 454   sr->setAnchorTranslation(req);
 455
 456   size_t asize = code.main().frontier() - astart;
 457   size_t realColdSize   = code.realCold().frontier()   - realColdStart;
 458   size_t realFrozenSize = code.realFrozen().frontier() - realFrozenStart;
 459   assert(asize == 0);
 460   if (realColdSize && RuntimeOption::EvalDumpTCAnchors) {
 461     TransRec tr(sk,
 462                 TransKind::Anchor,
 463                 astart, asize, realColdStart, realColdSize,
 464                 realFrozenStart, realFrozenSize);
 465     m_tx.addTranslation(tr);
 466     if (RuntimeOption::EvalJitUseVtuneAPI) {
 467       reportTraceletToVtune(sk.unit(), sk.func(), tr);
 468     }
 469
 470     if (m_tx.profData()) {
 471       m_tx.profData()->addTransNonProf(TransKind::Anchor, sk);
 472     }
 473     assert(!m_tx.isTransDBEnabled() ||
 474            m_tx.getTransRec(realColdStart)->kind == TransKind::Anchor);
 475   }
 476
 477   return retranslate(args);
 478 }
 479
 480 TCA
 481 MCGenerator::lookupTranslation(SrcKey sk) const {
 482   if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 483     return sr->getTopTranslation();
 484   }
 485   return nullptr;
 486 }
 487
 488 TCA
 489 MCGenerator::translate(const TranslArgs& args) {
 490   INC_TPC(translate);
 491
 492   assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
 493   assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
 494   assert(m_tx.mode() != TransKind::Invalid);
 495   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 496
 497   if (!shouldTranslate(args.sk.func())) return nullptr;
 498
 499   auto func = const_cast<Func*>(args.sk.func());
 500   CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
 501                             .profile(m_tx.mode() == TransKind::Profile)
 502                             .hot(RuntimeOption::EvalHotFuncCount &&
 503                                  (func->attrs() & AttrHot) && m_tx.useAHot()));
 504
 505   auto start = translateWork(args);
 506
 507   if (args.setFuncBody) {
 508     func->setFuncBody(start);
 509   }
 510   SKTRACE(1, args.sk, "translate moved head from %p to %p\n",
 511           getTopTranslation(args.sk), start);
 512
 513   return start;
 514 }
 515
 516 TCA
 517 MCGenerator::getCallArrayPrologue(Func* func) {
 518   TCA tca = func->getFuncBody();
 519   if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 520
 521   DVFuncletsVec dvs = func->getDVFunclets();
 522
 523   if (dvs.size()) {
 524     LeaseHolder writer(Translator::WriteLease());
 525     if (!writer) return nullptr;
 526     tca = func->getFuncBody();
 527     if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 528     tca = backEnd().emitCallArrayPrologue(func, dvs);
 529     func->setFuncBody(tca);
 530   } else {
 531     SrcKey sk(func, func->base(), false);
 532     auto args = TranslArgs{sk, false};
 533     args.setFuncBody = true;
 534     tca = mcg->getTranslation(args);
 535   }
 536
 537   return tca;
 538 }
 539
 540 void
 541 MCGenerator::smashPrologueGuards(TCA* prologues, int numPrologues,
 542                                  const Func* func) {
 543   for (int i = 0; i < numPrologues; i++) {
 544     if (prologues[i] != m_tx.uniqueStubs.fcallHelperThunk
 545         && backEnd().funcPrologueHasGuard(prologues[i], func)) {
 546       backEnd().funcPrologueSmashGuard(prologues[i], func);
 547     }
 548   }
 549 }
 550
 551 /*
 552  * funcPrologue --
 553  *
 554  * Given a callee and a number of args, match up to the callee's
 555  * argument expectations and dispatch.
 556  *
 557  * Call/return hand-shaking is a bit funny initially. At translation time,
 558  * we don't necessarily know what function we're calling. For instance,
 559  *
 560  *   f(g());
 561  *
 562  * Will lead to a set of basic blocks like:
 563  *
 564  * b1: pushfuncd "f"
 565  *     pushfuncd "g"
 566  *     fcall
 567  * b2: fcall
 568  *
 569  * The fcallc labelled "b2" above is not statically bindable in our
 570  * execution model.
 571  *
 572  * We decouple the call work into a per-callsite portion, responsible
 573  * for recording the return address, and a per-(callee, numArgs) portion,
 574  * responsible for fixing up arguments and dispatching to remaining
 575  * code. We call the per-callee portion a "prologue."
 576  *
 577  * Also, we are called from two distinct environments. From REQ_BIND_CALL,
 578  * we're running "between" basic blocks, with all VM registers sync'ed.
 579  * However, we're also called in the middle of basic blocks, when dropping
 580  * entries into func->m_prologues. So don't go around using the
 581  * translation-time values of vmfp()/vmsp(), since they have an
 582  * unpredictable relationship to the source.
 583  */
 584 bool
 585 MCGenerator::checkCachedPrologue(const Func* func, int paramIdx,
 586                                  TCA& prologue) const {
 587   prologue = (TCA)func->getPrologue(paramIdx);
 588   if (prologue != m_tx.uniqueStubs.fcallHelperThunk) {
 589     TRACE(1, "cached prologue %s(%d) -> cached %p\n",
 590           func->fullName()->data(), paramIdx, prologue);
 591     assert(isValidCodeAddress(prologue));
 592     return true;
 593   }
 594   return false;
 595 }
 596
 597 TCA
 598 MCGenerator::getFuncPrologue(Func* func, int nPassed, ActRec* ar,
 599                              bool forRegeneratePrologue) {
 600   func->validate();
 601   TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
 602   int const numParams = func->numNonVariadicParams();
 603   int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
 604
 605   bool const funcIsMagic = func->isMagic();
 606
 607   // Do a quick test before grabbing the write lease
 608   TCA prologue;
 609   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 610
 611   Offset entry = func->getEntryForNumArgs(nPassed);
 612   SrcKey funcBody(func, entry, false);
 613
 614   if (func->isClonedClosure()) {
 615     assert(ar);
 616     interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
 617     auto tca = getTranslation(TranslArgs{funcBody, false});
 618     tl_regState = VMRegState::DIRTY;
 619     if (tca) {
 620       // racy, but ok...
 621       func->setPrologue(paramIndex, tca);
 622     }
 623     return tca;
 624   }
 625
 626   LeaseHolder writer(Translator::WriteLease());
 627   if (!writer) return nullptr;
 628
 629   // If we're regenerating a prologue, and we want to check shouldTranslate()
 630   // but ignore the code size limits.  We still want to respect the global
 631   // translation limit and other restrictions, though.
 632   if (forRegeneratePrologue) {
 633     if (!shouldTranslateNoSizeLimit(func)) return nullptr;
 634   } else {
 635     if (!shouldTranslate(func)) return nullptr;
 636   }
 637
 638   // Double check the prologue array now that we have the write lease
 639   // in case another thread snuck in and set the prologue already.
 640   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 641
 642   // We're coming from a BIND_CALL service request, so enable
 643   // profiling if we haven't optimized the function entry yet.
 644   assert(m_tx.mode() == TransKind::Invalid ||
 645          m_tx.mode() == TransKind::Prologue);
 646   if (m_tx.mode() == TransKind::Invalid && profileSrcKey(funcBody)) {
 647     m_tx.setMode(TransKind::Proflogue);
 648   } else {
 649     m_tx.setMode(TransKind::Prologue);
 650   }
 651   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 652
 653   CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
 654                             .profile(m_tx.mode() == TransKind::Proflogue)
 655                             .hot(RuntimeOption::EvalHotFuncCount &&
 656                                  (func->attrs() & AttrHot) && m_tx.useAHot()));
 657
 658   assert(m_fixups.empty());
 659   // If we're close to a cache line boundary, just burn some space to
 660   // try to keep the func and its body on fewer total lines.
 661   if (((uintptr_t)code.main().frontier() & backEnd().cacheLineMask()) >=
 662       (backEnd().cacheLineSize() / 2)) {
 663     backEnd().moveToAlign(code.main(), MoveToAlignFlags::kCacheLineAlign);
 664   }
 665   m_fixups.m_alignFixups.emplace(
 666     code.main().frontier(), std::make_pair(backEnd().cacheLineSize() / 2, 0));
 667
 668   // Careful: this isn't necessarily the real entry point. For funcIsMagic
 669   // prologues, this is just a possible prologue.
 670   TCA aStart    = code.main().frontier();
 671   TCA start     = aStart;
 672   TCA realColdStart   = mcg->code.realCold().frontier();
 673   TCA realFrozenStart = mcg->code.realFrozen().frontier();
 674
 675   auto const skFuncBody = backEnd().emitFuncPrologue(
 676     code.main(), code.cold(), func, funcIsMagic, nPassed, start, aStart);
 677   m_fixups.process(nullptr);
 678
 679   assert(backEnd().funcPrologueHasGuard(start, func));
 680   TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
 681         this, func->fullName()->data(), nPassed, start);
 682   assert(isValidCodeAddress(start));
 683   func->setPrologue(paramIndex, start);
 684
 685   assert(m_tx.mode() == TransKind::Prologue ||
 686          m_tx.mode() == TransKind::Proflogue);
 687   TransRec tr(skFuncBody,
 688               m_tx.mode(),
 689               aStart,          code.main().frontier()       - aStart,
 690               realColdStart,   code.realCold().frontier()   - realColdStart,
 691               realFrozenStart, code.realFrozen().frontier() - realFrozenStart);
 692   m_tx.addTranslation(tr);
 693   if (RuntimeOption::EvalJitUseVtuneAPI) {
 694     reportTraceletToVtune(func->unit(), func, tr);
 695   }
 696
 697   if (m_tx.profData()) {
 698     m_tx.profData()->addTransPrologue(m_tx.mode(), skFuncBody, paramIndex);
 699   }
 700
 701   recordGdbTranslation(skFuncBody, func,
 702                        code.main(), aStart,
 703                        false, true);
 704   recordBCInstr(OpFuncPrologue, aStart, code.main().frontier(), false);
 705
 706   m_numTrans++;
 707   assert(m_numTrans <= RuntimeOption::EvalJitGlobalTranslationLimit);
 708
 709   return start;
 710 }
 711
 712 /**
 713  * Given the proflogueTransId for a TransProflogue translation,
 714  * regenerate the prologue (as a TransPrologue).  Returns the starting
 715  * address for the translation corresponding to triggerSk, if such
 716  * translation is generated; otherwise returns nullptr.
 717  */
 718 TCA MCGenerator::regeneratePrologue(TransID prologueTransId, SrcKey triggerSk) {
 719   Func* func = m_tx.profData()->transFunc(prologueTransId);
 720   int  nArgs = m_tx.profData()->prologueArgs(prologueTransId);
 721
 722   // Regenerate the prologue.
 723   func->resetPrologue(nArgs);
 724   m_tx.setMode(TransKind::Prologue);
 725   SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
 726   auto const start = getFuncPrologue(
 727     func,
 728     nArgs,
 729     nullptr /* ActRec */,
 730     true /* regeneratePrologue */
 731   );
 732   if (!start) return nullptr;
 733
 734   func->setPrologue(nArgs, start);
 735
 736   // Smash callers of the old prologue with the address of the new one.
 737   PrologueCallersRec* pcr =
 738     m_tx.profData()->prologueCallers(prologueTransId);
 739   for (TCA toSmash : pcr->mainCallers()) {
 740     backEnd().smashCall(toSmash, start);
 741   }
 742   // If the prologue has a guard, then smash its guard-callers as well.
 743   if (backEnd().funcPrologueHasGuard(start, func)) {
 744     TCA guard = backEnd().funcPrologueToGuard(start, func);
 745     for (TCA toSmash : pcr->guardCallers()) {
 746       backEnd().smashCall(toSmash, guard);
 747     }
 748   }
 749   pcr->clearAllCallers();
 750
 751   // If this prologue has a DV funclet, then generate a translation
 752   // for the DV funclet right after the prologue.  However, skip
 753   // cloned closures because their prologues are actually the DV
 754   // funclets already.
 755   TCA triggerSkStart = nullptr;
 756   if (nArgs < func->numNonVariadicParams() && !func->isClonedClosure()) {
 757     auto paramInfo = func->params()[nArgs];
 758     if (paramInfo.hasDefaultValue()) {
 759       m_tx.setMode(TransKind::Optimize);
 760       SrcKey funcletSK(func, paramInfo.funcletOff, false);
 761       auto funcletTransId = m_tx.profData()->dvFuncletTransId(func, nArgs);
 762       if (funcletTransId != kInvalidTransID) {
 763         invalidateSrcKey(funcletSK);
 764         auto args = TranslArgs{funcletSK, false};
 765         args.transId = funcletTransId;
 766         auto dvStart = translate(args);
 767         if (dvStart && !triggerSkStart && funcletSK == triggerSk) {
 768           triggerSkStart = dvStart;
 769         }
 770         // Flag that this translation has been retranslated, so that
 771         // it's not retranslated again along with the function body.
 772         m_tx.profData()->setOptimized(funcletSK);
 773       }
 774     }
 775   }
 776
 777   return triggerSkStart;
 778 }
 779
 780 /**
 781  * Regenerate all prologues of func that were previously generated.
 782  * The prologues are sorted in ascending order of profile counters.
 783  * For prologues with corresponding DV funclets, their corresponding
 784  * DV funclet will be regenerated right after them.  The idea is to
 785  * generate the function body right after calling this function, so
 786  * that all prologues are placed right before it, and with the hottest
 787  * prologues closer to it.
 788  *
 789  * Returns the starting address for the translation corresponding to
 790  * triggerSk, if such translation is generated; otherwise returns
 791  * nullptr.
 792  */
 793 TCA MCGenerator::regeneratePrologues(Func* func, SrcKey triggerSk) {
 794   TCA triggerStart = nullptr;
 795   std::vector<TransID> prologTransIDs;
 796
 797   for (int nArgs = 0; nArgs < func->numPrologues(); nArgs++) {
 798     TransID tid = m_tx.profData()->prologueTransId(func, nArgs);
 799     if (tid != kInvalidTransID) {
 800       prologTransIDs.push_back(tid);
 801     }
 802   }
 803
 804   std::sort(prologTransIDs.begin(), prologTransIDs.end(),
 805           [&](TransID t1, TransID t2) -> bool {
 806             // This will sort in ascending order. Note that transCounters start
 807             // at JitPGOThreshold and count down.
 808             return m_tx.profData()->transCounter(t1) >
 809                    m_tx.profData()->transCounter(t2);
 810           });
 811
 812   for (TransID tid : prologTransIDs) {
 813     TCA start = regeneratePrologue(tid, triggerSk);
 814     if (triggerStart == nullptr && start != nullptr) {
 815       triggerStart = start;
 816     }
 817   }
 818
 819   return triggerStart;
 820 }
 821
 822 /*
 823  * bindJmp --
 824  *
 825  *   Runtime service handler that patches a jmp to the translation of
 826  *   u:dest from toSmash.
 827  */
 828 TCA
 829 MCGenerator::bindJmp(TCA toSmash, SrcKey destSk, ServiceRequest req,
 830                      TransFlags trflags, bool& smashed) {
 831   auto args = TranslArgs{destSk, false};
 832   args.flags = trflags;
 833   auto tDest = getTranslation(args);
 834   if (!tDest) return nullptr;
 835
 836   LeaseHolder writer(Translator::WriteLease());
 837   if (!writer) return tDest;
 838
 839   SrcRec* sr = m_tx.getSrcRec(destSk);
 840   // The top translation may have changed while we waited for the
 841   // write lease, so read it again.  If it was replaced with a new
 842   // translation, then bind to the new one.  If it was invalidated,
 843   // then don't bind the jump.
 844   tDest = sr->getTopTranslation();
 845   if (tDest == nullptr) return nullptr;
 846
 847   if (req == REQ_BIND_ADDR) {
 848     auto addr = reinterpret_cast<TCA*>(toSmash);
 849     if (*addr == tDest) {
 850       // Already smashed
 851       return tDest;
 852     }
 853     sr->chainFrom(IncomingBranch::addr(addr));
 854   } else {
 855     DecodedInstruction di(toSmash);
 856     if (di.isBranch() && !di.isJmp()) {
 857       auto jt = backEnd().jccTarget(toSmash);
 858       assert(jt);
 859       if (jt == tDest) {
 860         // Already smashed
 861         return tDest;
 862       }
 863       sr->chainFrom(IncomingBranch::jccFrom(toSmash));
 864     } else {
 865       assert(!backEnd().jccTarget(toSmash));
 866       if (!backEnd().jmpTarget(toSmash)
 867           || backEnd().jmpTarget(toSmash) == tDest) {
 868         // Already smashed
 869         return tDest;
 870       }
 871       sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
 872     }
 873   }
 874   smashed = true;
 875   return tDest;
 876 }
 877
 878 /*
 879  * When we end a tracelet with a conditional jump, emitCondJmp first emits:
 880  *
 881  *   1:         j<CC> stubJmpccFirst
 882  *              jmp   stubJmpccFirst
 883  *
 884  * Our "taken" argument tells us whether the branch at 1: was taken or
 885  * not; and therefore which of offTaken and offNotTaken to continue executing.
 886  * If we did take the branch, we now rewrite the code so that the branch is
 887  * straightened. This predicts that subsequent executions will go the same way
 888  * as the first execution.
 889  *
 890  *              jn<CC> stubJmpccSecond:offNotTaken
 891  *              nop5   ; fallthru, or jmp if there's already a translation.
 892  * offTaken:
 893  *
 894  * If we did not take the branch, we leave the sense of the condition
 895  * intact, while patching it up to go to the unexplored code:
 896  *
 897  *              j<CC> stubJmpccSecond:offTaken
 898  *              nop5
 899  * offNotTaken:
 900  */
 901 TCA
 902 MCGenerator::bindJmpccFirst(TCA toSmash,
 903                             SrcKey skTaken, SrcKey skNotTaken,
 904                             bool taken,
 905                             bool& smashed) {
 906   LeaseHolder writer(Translator::WriteLease());
 907   if (!writer) return nullptr;
 908   auto skWillExplore = taken ? skTaken : skNotTaken;
 909   auto skWillDefer = taken ? skNotTaken : skTaken;
 910   auto dest = skWillExplore;
 911   auto cc = backEnd().jccCondCode(toSmash);
 912   TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
 913         "taken %d\n",
 914         skWillExplore.offset(), skWillDefer.offset(), cc, taken);
 915
 916   // We want the branch to point to whichever side has not been explored yet.
 917   if (taken) cc = ccNegate(cc);
 918
 919   auto& cb = code.blockFor(toSmash);
 920   Asm as { cb };
 921   // Its not clear where the IncomingBranch should go to if cb is code.frozen()
 922   assert(&cb != &code.frozen());
 923
 924   // XXX Use of kJmp*Len here is a layering violation.
 925   using namespace x64;
 926
 927   // can we just directly fall through?
 928   // a jmp + jz takes 5 + 6 = 11 bytes
 929   bool fallThru = toSmash + kJmpccLen + kJmpLen == cb.frontier() &&
 930     !m_tx.getSrcDB().find(dest);
 931
 932   auto tDest = getTranslation(TranslArgs{dest, !fallThru});
 933   if (!tDest) {
 934     return 0;
 935   }
 936
 937   if (backEnd().jmpTarget(toSmash + kJmpccLen)
 938       != backEnd().jccTarget(toSmash)) {
 939     // someone else already smashed this one. Ideally we would
 940     // just re-execute from toSmash - except the flags will have
 941     // been trashed.
 942     return tDest;
 943   }
 944
 945   TCA stub = emitEphemeralServiceReq(code.frozen(),
 946                                      getFreeStub(code.frozen(),
 947                                                  &mcg->cgFixups()),
 948                                      REQ_BIND_JMP,
 949                                      RipRelative(toSmash),
 950                                      skWillDefer.toAtomicInt(),
 951                                      TransFlags{}.packed);
 952
 953   mcg->cgFixups().process(nullptr);
 954   smashed = true;
 955   assert(Translator::WriteLease().amOwner());
 956   /*
 957    * Roll over the jcc and the jmp/fallthru. E.g., from:
 958    *
 959    *     toSmash:    jcc   <jmpccFirstStub>
 960    *     toSmash+6:  jmp   <jmpccFirstStub>
 961    *     toSmash+11: <probably the new translation == tdest>
 962    *
 963    * to:
 964    *
 965    *     toSmash:    j[n]z <jmpccSecondStub>
 966    *     toSmash+6:  nop5
 967    *     toSmash+11: newHotness
 968    */
 969   CodeCursor cg(cb, toSmash);
 970   as.jcc(cc, stub);
 971   m_tx.getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(cb.frontier()));
 972   TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
 973   return tDest;
 974 }
 975
 976 TCA MCGenerator::bindCall(ActRec* calleeFrame,
 977                           bool isImmutable,
 978                           SrcKey& sk,
 979                           ServiceRequest& req) {
 980   TCA toSmash = backEnd().smashableCallFromReturn((TCA)calleeFrame->m_savedRip);
 981   Func *func = const_cast<Func*>(calleeFrame->m_func);
 982   int nArgs = calleeFrame->numArgs();
 983   TRACE(2, "bindCall %s, ActRec %p\n",
 984         func->fullName()->data(), calleeFrame);
 985   TCA start = getFuncPrologue(func, nArgs);
 986   TRACE(2, "bindCall -> %p\n", start);
 987   if (!isImmutable) {
 988     // We dont know we're calling the right function, so adjust start to point
 989     // to the dynamic check of ar->m_func.
 990     start = backEnd().funcPrologueToGuard(start, func);
 991   } else {
 992     TRACE(2, "bindCall immutably %s -> %p\n",
 993           func->fullName()->data(), start);
 994   }
 995   if (start) {
 996     LeaseHolder writer(Translator::WriteLease());
 997     if (writer) {
 998       // Someone else may have changed the func prologue while we waited for
 999       // the write lease, so read it again.
1000       start = getFuncPrologue(func, nArgs);
1001       if (!isImmutable) start = backEnd().funcPrologueToGuard(start, func);
1002
1003       if (start && backEnd().callTarget(toSmash) != start) {
1004         assert(backEnd().callTarget(toSmash));
1005         TRACE(2, "bindCall smash %p -> %p\n",
1006               toSmash, start);
1007         backEnd().smashCall(toSmash, start);
1008         // For functions to be PGO'ed, if their current prologues are still
1009         // profiling ones (living in code.prof()), then save toSmash as a
1010         // caller to the prologue, so that it can later be smashed to call a
1011         // new prologue when it's generated.
1012         int calleeNumParams = func->numNonVariadicParams();
1013         int calledPrologNumArgs = (nArgs <= calleeNumParams ?
1014                                    nArgs :  calleeNumParams + 1);
1015         if (code.prof().contains(start)) {
1016           if (isImmutable) {
1017             m_tx.profData()->addPrologueMainCaller(
1018               func, calledPrologNumArgs, toSmash);
1019           } else {
1020             m_tx.profData()->addPrologueGuardCaller(
1021               func, calledPrologNumArgs, toSmash);
1022           }
1023         }
1024       }
1025     }
1026     // sk: stale, but doesn't matter since we have a valid start TCA.
1027   } else {
1028     // We need translator help; we're not at the callee yet, so roll back. The
1029     // prelude has done some work already, but it should be safe to redo.
1030     TRACE(2, "bindCall rollback smash %p -> %p\n",
1031           toSmash, start);
1032
1033     const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1034       liveFunc()->base() + calleeFrame->m_soff);
1035
1036     sk = SrcKey{liveFunc(), fe->m_fcallOff, vmfp()->resumed()};
1037
1038     // We're going to have to interpret the FCall, so make sure handleSRHelper
1039     // doesn't think we're coming back from a REQ_BIND_CALL when we finally
1040     // make it back to the TC.
1041     req = REQ_BIND_JMP;
1042   }
1043
1044   return start;
1045 }
1046
1047 namespace {
1048 class FreeRequestStubTrigger {
1049   TCA m_stub;
1050  public:
1051   explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
1052     TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
1053   }
1054   void operator()() {
1055     TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
1056     if (mcg->freeRequestStub(m_stub) != true) {
1057       // If we can't free the stub, enqueue again to retry.
1058       TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
1059       Treadmill::enqueue(FreeRequestStubTrigger(m_stub));
1060     }
1061   }
1062 };
1063 }
1064
1065 #ifdef DEBUG
1066
1067 struct DepthGuard {
1068   static __thread int m_depth;
1069   DepthGuard()  { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
1070   ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
1071
1072   bool depthOne() const { return m_depth == 1; }
1073 };
1074 __thread int DepthGuard::m_depth;
1075
1076 #else
1077
1078 struct DepthGuard { bool depthOne() const { return false; } };
1079
1080 #endif
1081
1082 void
1083 MCGenerator::enterTC(TCA start, ActRec* stashedAR) {
1084   if (debug) {
1085     fflush(stdout);
1086     fflush(stderr);
1087   }
1088   DepthGuard d;
1089
1090   assert(isValidCodeAddress(start));
1091   assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1092   assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1093
1094   Translator::WriteLease().gremlinUnlock();
1095   assert(!Translator::WriteLease().amOwner());
1096
1097   INC_TPC(enter_tc);
1098   if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1099     auto skData = SrcKey{liveFunc(), vmpc(), liveResumed()}.toAtomicInt();
1100     Trace::ringbufferEntry(RBTypeEnterTC, skData, (uint64_t)start);
1101   }
1102
1103   tl_regState = VMRegState::DIRTY;
1104   backEnd().enterTCHelper(start, stashedAR);
1105   tl_regState = VMRegState::CLEAN;
1106   assert(isValidVMStackAddress(vmsp()));
1107
1108   if (debug) {
1109     // Debugging code: cede the write lease half the time.
1110     if (RuntimeOption::EvalJitStressLease) {
1111       if (d.depthOne() && (rand() % 2) == 0) {
1112         Translator::WriteLease().gremlinLock();
1113       }
1114     }
1115   }
1116
1117   vmfp() = nullptr;
1118 }
1119
1120 TCA MCGenerator::handleServiceRequest(ServiceReqInfo& info) {
1121   assert_native_stack_aligned();
1122   tl_regState = VMRegState::CLEAN; // partially a lie: vmpc() isn't synced
1123
1124   auto callToExit = [&] {
1125     tl_regState = VMRegState::DIRTY;
1126     return m_tx.uniqueStubs.callToExit;
1127   };
1128
1129   TCA start = nullptr;
1130   SrcKey sk;
1131   auto smashed = false;
1132
1133   // If start is still nullptr at the end of this switch, we will enter the
1134   // interpreter at sk.
1135   switch (info.req) {
1136     case REQ_BIND_CALL: {
1137       auto calleeFrame = info.stashedAR;
1138       auto isImmutable = info.args[0].boolVal;
1139       start = bindCall(calleeFrame, isImmutable, sk, info.req);
1140       break;
1141     }
1142
1143     case REQ_BIND_JMP:
1144     case REQ_BIND_ADDR: {
1145       auto const toSmash = info.args[0].tca;
1146       sk = SrcKey::fromAtomicInt(info.args[1].sk);
1147       auto const trflags = info.args[2].trflags;
1148       start = bindJmp(toSmash, sk, info.req, trflags, smashed);
1149       break;
1150     }
1151
1152     case REQ_BIND_JMPCC_FIRST: {
1153       auto toSmash = info.args[0].tca;
1154       auto skTaken = SrcKey::fromAtomicInt(info.args[1].sk);
1155       auto skNotTaken = SrcKey::fromAtomicInt(info.args[2].sk);
1156       auto taken = info.args[3].boolVal;
1157       sk = taken ? skTaken : skNotTaken;
1158       start = bindJmpccFirst(toSmash, skTaken, skNotTaken, taken, smashed);
1159       break;
1160     }
1161
1162     case REQ_RETRANSLATE: {
1163       INC_TPC(retranslate);
1164       sk = SrcKey{liveFunc(), info.args[0].offset, liveResumed()};
1165       auto trflags = info.args[1].trflags;
1166       auto args = TranslArgs{sk, true};
1167       args.flags = trflags;
1168       start = retranslate(args);
1169       SKTRACE(2, sk, "retranslated @%p\n", start);
1170       break;
1171     }
1172
1173     case REQ_RETRANSLATE_OPT: {
1174       sk = SrcKey::fromAtomicInt(info.args[0].sk);
1175       auto transID = info.args[1].transID;
1176       start = retranslateOpt(transID, false);
1177       SKTRACE(2, sk, "retranslated-OPT: transId = %d  start: @%p\n", transID,
1178               start);
1179       break;
1180     }
1181
1182     case REQ_INTERPRET:
1183       // Leave start as nullptr and let the dispatchBB() happen down below.
1184       sk = SrcKey{liveFunc(), info.args[0].offset, liveResumed()};
1185       break;
1186
1187     case REQ_POST_INTERP_RET: {
1188       // This is only responsible for the control-flow aspect of the Ret:
1189       // getting to the destination's translation, if any.
1190       auto ar = info.args[0].ar;
1191       auto caller = info.args[1].ar;
1192       assert(caller == vmfp());
1193       Unit* destUnit = caller->func()->unit();
1194       // Set PC so logging code in getTranslation doesn't get confused.
1195       vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
1196       sk = SrcKey{caller->func(), vmpc(), caller->resumed()};
1197       start = getTranslation(TranslArgs{sk, true});
1198       TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1199             ar->m_func->fullName()->data(),
1200             caller->m_func->fullName()->data());
1201       break;
1202     }
1203
1204     case REQ_RESUME: {
1205       if (UNLIKELY(vmpc() == 0)) return callToExit();
1206       sk = SrcKey{liveFunc(), vmpc(), liveResumed()};
1207       start = getTranslation(TranslArgs{sk, true});
1208       break;
1209     }
1210
1211     case REQ_STACK_OVERFLOW: {
1212       if (info.stashedAR->m_sfp == vmfp()) {
1213         /*
1214          * The normal case - we were called via FCall, or FCallArray.  We need
1215          * to construct the pc of the fcall from the return address (which will
1216          * be after the fcall). Because fcall is a variable length instruction,
1217          * and because we sometimes delete instructions from the instruction
1218          * stream, we need to use fpi regions to find the fcall.
1219          */
1220         const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1221           liveUnit()->offsetOf(vmpc()));
1222         vmpc() = liveUnit()->at(fe->m_fcallOff);
1223         assert(isFCallStar(*reinterpret_cast<const Op*>(vmpc())));
1224         raise_error("Stack overflow");
1225       } else {
1226         /*
1227          * We were called via re-entry.  Leak the params and the actrec, and
1228          * tell the unwinder that there's nothing left to do in this "entry".
1229          */
1230         vmsp() = reinterpret_cast<Cell*>(info.stashedAR + 1);
1231         throw VMReenterStackOverflow();
1232       }
1233       not_reached();
1234     }
1235   }
1236
1237   if (smashed && info.stub) {
1238     Treadmill::enqueue(FreeRequestStubTrigger(info.stub));
1239   }
1240
1241   // If we don't have a starting address, interpret basic blocks until we end
1242   // up somewhere with a translation (which we may have created, if the lease
1243   // holder dropped it).
1244   while (!start) {
1245     vmpc() = sk.unit()->at(sk.offset());
1246     INC_TPC(interp_bb);
1247     HPHP::dispatchBB();
1248     if (!vmpc()) return callToExit();
1249     sk = SrcKey{liveFunc(), vmpc(), liveResumed()};
1250     start = getTranslation(TranslArgs{sk, true});
1251   }
1252
1253   if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1254     auto skData = sk.valid() ? sk.toAtomicInt() : uint64_t(-1LL);
1255     Trace::ringbufferEntry(RBTypeResumeTC, skData, (uint64_t)start);
1256   }
1257
1258   tl_regState = VMRegState::DIRTY;
1259   return start;
1260 }
1261
1262 /*
1263  * Support for the stub freelist.
1264  */
1265 TCA FreeStubList::maybePop() {
1266   StubNode* ret = m_list;
1267   if (ret) {
1268     TRACE(1, "alloc stub %p\n", ret);
1269     m_list = ret->m_next;
1270     ret->m_freed = ~kStubFree;
1271   }
1272   return (TCA)ret;
1273 }
1274
1275 void FreeStubList::push(TCA stub) {
1276   /*
1277    * A freed stub may be released by Treadmill more than once if multiple
1278    * threads execute the service request before it is freed. We detect
1279    * duplicates by marking freed stubs
1280    */
1281   StubNode* n = reinterpret_cast<StubNode*>(stub);
1282   if (n->m_freed == kStubFree) {
1283     TRACE(1, "already freed stub %p\n", stub);
1284     return;
1285   }
1286   n->m_freed = kStubFree;
1287   n->m_next = m_list;
1288   TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
1289   m_list = n;
1290 }
1291
1292 bool
1293 MCGenerator::freeRequestStub(TCA stub) {
1294   LeaseHolder writer(Translator::WriteLease());
1295   /*
1296    * If we can't acquire the write lock, the caller
1297    * (FreeRequestStubTrigger) retries
1298    */
1299   if (!writer) return false;
1300   assert(code.frozen().contains(stub));
1301   m_freeStubs.push(stub);
1302   return true;
1303 }
1304
1305 TCA MCGenerator::getFreeStub(CodeBlock& frozen, CodeGenFixups* fixups) {
1306   TCA ret = m_freeStubs.maybePop();
1307   if (ret) {
1308     Stats::inc(Stats::Astub_Reused);
1309     always_assert(m_freeStubs.m_list == nullptr ||
1310                   code.isValidCodeAddress(TCA(m_freeStubs.m_list)));
1311     TRACE(1, "recycle stub %p\n", ret);
1312   } else {
1313     ret = frozen.frontier();
1314     Stats::inc(Stats::Astub_New);
1315     TRACE(1, "alloc new stub %p\n", ret);
1316   }
1317   if (fixups) {
1318     fixups->m_reusedStubs.emplace_back(ret);
1319   }
1320   return ret;
1321 }
1322
1323 TCA MCGenerator::getTranslatedCaller() const {
1324   DECLARE_FRAME_POINTER(fp);
1325   ActRec* framePtr = fp;  // can't directly mutate the register-mapped one
1326   for (; framePtr; framePtr = framePtr->m_sfp) {
1327     TCA rip = (TCA)framePtr->m_savedRip;
1328     if (isValidCodeAddress(rip)) {
1329       return rip;
1330     }
1331   }
1332   return nullptr;
1333 }
1334
1335 void
1336 MCGenerator::syncWork() {
1337   assert(tl_regState == VMRegState::DIRTY);
1338   m_fixupMap.fixup(g_context.getNoCheck());
1339   tl_regState = VMRegState::CLEAN;
1340   Stats::inc(Stats::TC_Sync);
1341 }
1342
1343 // Get the address of the literal val in the global data section.
1344 // If it's not there, add it to the map in m_fixups, which will
1345 // be committed to m_literals when m_fixups.process() is called.
1346 const uint64_t*
1347 MCGenerator::allocLiteral(uint64_t val) {
1348   auto it = m_literals.find(val);
1349   if (it != m_literals.end()) {
1350     assert(*it->second == val);
1351     return it->second;
1352   }
1353   auto& pending = m_fixups.m_literals;
1354   it = pending.find(val);
1355   if (it != pending.end()) {
1356     assert(*it->second == val);
1357     return it->second;
1358   }
1359   auto addr = allocData<uint64_t>(sizeof(uint64_t), 1);
1360   *addr = val;
1361   return pending[val] = addr;
1362 }
1363
1364 bool
1365 MCGenerator::reachedTranslationLimit(SrcKey sk,
1366                                      const SrcRec& srcRec) const {
1367   if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
1368     INC_TPC(max_trans);
1369     if (debug && Trace::moduleEnabled(Trace::mcg, 2)) {
1370       const auto& tns = srcRec.translations();
1371       TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1372             tns.size(), sk.unit()->filepath()->data(),
1373             sk.offset());
1374       SKTRACE(2, sk, "{\n");
1375       TCA topTrans = srcRec.getTopTranslation();
1376       for (size_t i = 0; i < tns.size(); ++i) {
1377         const TransRec* rec = m_tx.getTransRec(tns[i]);
1378         assert(rec);
1379         SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
1380         if (tns[i] == topTrans) {
1381           SKTRACE(2, sk, "%zd: *Top*\n", i);
1382         }
1383         if (rec->kind == TransKind::Anchor) {
1384           SKTRACE(2, sk, "%zd: Anchor\n", i);
1385         } else {
1386           SKTRACE(2, sk, "%zd: guards {\n", i);
1387           for (unsigned j = 0; j < rec->guards.size(); ++j) {
1388             FTRACE(2, "{}\n", rec->guards[j]);
1389           }
1390           SKTRACE(2, sk, "%zd } guards\n", i);
1391         }
1392       }
1393       SKTRACE(2, sk, "} /* Too many translations */\n");
1394     }
1395     return true;
1396   }
1397
1398   return false;
1399 }
1400
1401 void
1402 MCGenerator::recordSyncPoint(CodeAddress frontier, Offset pcOff, Offset spOff) {
1403   m_fixups.m_pendingFixups.push_back(
1404     PendingFixup(frontier, Fixup(pcOff, spOff)));
1405 }
1406
1407 /*
1408  * Equivalent to container.clear(), but guarantees to free
1409  * any memory associated with the container (eg clear
1410  * doesn't affect std::vector's capacity).
1411  */
1412 template <typename T> void ClearContainer(T& container) {
1413   T().swap(container);
1414 }
1415
1416 void
1417 CodeGenFixups::process_only(
1418   GrowableVector<IncomingBranch>* inProgressTailBranches) {
1419   for (uint i = 0; i < m_pendingFixups.size(); i++) {
1420     TCA tca = m_pendingFixups[i].m_tca;
1421     assert(mcg->isValidCodeAddress(tca));
1422     mcg->fixupMap().recordFixup(tca, m_pendingFixups[i].m_fixup);
1423   }
1424   ClearContainer(m_pendingFixups);
1425
1426   for (auto const& pair : m_pendingCatchTraces) {
1427     mcg->catchTraceMap().insert(pair.first, pair.second);
1428   }
1429   ClearContainer(m_pendingCatchTraces);
1430
1431   for (auto const& elm : m_pendingJmpTransIDs) {
1432     mcg->getJmpToTransIDMap().insert(elm);
1433   }
1434   ClearContainer(m_pendingJmpTransIDs);
1435
1436   mcg->literals().insert(m_literals.begin(), m_literals.end());
1437   ClearContainer(m_literals);
1438
1439   if (inProgressTailBranches) {
1440     m_inProgressTailJumps.swap(*inProgressTailBranches);
1441   }
1442   assert(m_inProgressTailJumps.empty());
1443 }
1444
1445 void CodeGenFixups::clear() {
1446   ClearContainer(m_pendingFixups);
1447   ClearContainer(m_pendingCatchTraces);
1448   ClearContainer(m_pendingJmpTransIDs);
1449   ClearContainer(m_reusedStubs);
1450   ClearContainer(m_addressImmediates);
1451   ClearContainer(m_codePointers);
1452   ClearContainer(m_bcMap);
1453   ClearContainer(m_alignFixups);
1454   ClearContainer(m_inProgressTailJumps);
1455   ClearContainer(m_literals);
1456 }
1457
1458 bool CodeGenFixups::empty() const {
1459   return
1460     m_pendingFixups.empty() &&
1461     m_pendingCatchTraces.empty() &&
1462     m_pendingJmpTransIDs.empty() &&
1463     m_reusedStubs.empty() &&
1464     m_addressImmediates.empty() &&
1465     m_codePointers.empty() &&
1466     m_bcMap.empty() &&
1467     m_alignFixups.empty() &&
1468     m_inProgressTailJumps.empty() &&
1469     m_literals.empty();
1470 }
1471
1472 TCA
1473 MCGenerator::translateWork(const TranslArgs& args) {
1474   Timer _t(Timer::translate);
1475   auto sk = args.sk;
1476
1477   SKTRACE(1, sk, "translateWork\n");
1478   assert(m_tx.getSrcDB().find(sk));
1479
1480   if (args.align) {
1481     mcg->backEnd().moveToAlign(code.main(),
1482                                MoveToAlignFlags::kNonFallthroughAlign);
1483   }
1484
1485   TCA        start             = code.main().frontier();
1486   TCA        coldStart         = code.cold().frontier();
1487   TCA        realColdStart     = code.realCold().frontier();
1488   TCA DEBUG_ONLY frozenStart   = code.frozen().frontier();
1489   TCA        realFrozenStart   = code.realFrozen().frontier();
1490   SrcRec&    srcRec            = *m_tx.getSrcRec(sk);
1491   TransKind  transKindToRecord = TransKind::Interp;
1492   UndoMarker undoA(code.main());
1493   UndoMarker undoAcold(code.cold());
1494   UndoMarker undoAfrozen(code.frozen());
1495   UndoMarker undoGlobalData(code.data());
1496
1497   setUseLLVM(
1498     // HHIRBytecodeControlFlow causes vmsp stack manipulations we can't handle
1499     // right now: t4810319
1500     !RuntimeOption::EvalHHIRBytecodeControlFlow &&
1501     (RuntimeOption::EvalJitLLVM > 1 ||
1502      (RuntimeOption::EvalJitLLVM && m_tx.mode() == TransKind::Optimize))
1503   );
1504   SCOPE_EXIT {
1505     setUseLLVM(false);
1506   };
1507
1508   auto resetState = [&] {
1509     undoA.undo();
1510     undoAcold.undo();
1511     undoAfrozen.undo();
1512     undoGlobalData.undo();
1513     m_fixups.clear();
1514   };
1515
1516   auto assertCleanState = [&] {
1517     assert(code.main().frontier() == start);
1518     assert(code.frozen().frontier() == frozenStart);
1519     assert(m_fixups.empty());
1520   };
1521
1522   PostConditions pconds;
1523   RegionDescPtr region;
1524   if (!reachedTranslationLimit(sk, srcRec)) {
1525     // Attempt to create a region at this SrcKey
1526     if (m_tx.mode() == TransKind::Optimize) {
1527       assert(RuntimeOption::EvalJitPGO);
1528       region = args.region;
1529       if (region) {
1530         assert(!region->empty());
1531       } else {
1532         assert(isValidTransID(args.transId));
1533         region = selectHotRegion(args.transId, this);
1534         assert(region);
1535         if (region && region->empty()) region = nullptr;
1536       }
1537     } else {
1538       assert(m_tx.mode() == TransKind::Profile ||
1539              m_tx.mode() == TransKind::Live);
1540       RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1541                                sk.resumed() };
1542       FTRACE(2, "populating live context for region\n");
1543       populateLiveContext(rContext);
1544       region = selectRegion(rContext, m_tx.mode());
1545     }
1546
1547     auto result = TranslateResult::Retry;
1548     auto regionInterps = RegionBlacklist{};
1549     auto const initSpOffset = region ? region->entry()->initialSpOffset()
1550                                        : liveSpOff();
1551
1552     while (region && result == TranslateResult::Retry) {
1553       auto const transContext = TransContext {
1554         RuntimeOption::EvalJitPGO
1555           ? m_tx.profData()->curTransID()
1556           : kInvalidTransID,
1557         sk.offset(),
1558         initSpOffset,
1559         sk.resumed(),
1560         sk.func(),
1561         region.get()
1562       };
1563
1564       HTS hhbcTrans { transContext };
1565       FTRACE(1, "{}{:-^40}{}\n",
1566              color(ANSI_COLOR_BLACK, ANSI_BGCOLOR_GREEN),
1567              " HHIR during translation ",
1568              color(ANSI_COLOR_END));
1569
1570       try {
1571         assertCleanState();
1572         result = translateRegion(hhbcTrans, *region, regionInterps, args.flags,
1573                                  pconds);
1574         FTRACE(2, "translateRegion finished with result {}\n", show(result));
1575       } catch (const std::exception& e) {
1576         FTRACE(1, "translateRegion failed with '{}'\n", e.what());
1577         result = TranslateResult::Failure;
1578       }
1579
1580       if (result != TranslateResult::Success) {
1581         // Translation failed or will be retried. Free resources for this
1582         // trace, rollback the translation cache frontiers, and discard any
1583         // pending fixups.
1584         resetState();
1585       }
1586
1587       if (result == TranslateResult::Failure) {
1588         // If the region translator failed for an Optimize translation, it's OK
1589         // to do a Live translation for the function entry. Otherwise, fall
1590         // back to Interp.
1591         if (m_tx.mode() == TransKind::Optimize) {
1592           if (sk.getFuncId() == liveFunc()->getFuncId() &&
1593               liveUnit()->contains(vmpc()) &&
1594               sk.offset() == liveUnit()->offsetOf(vmpc()) &&
1595               sk.resumed() == liveResumed()) {
1596             m_tx.setMode(TransKind::Live);
1597             RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1598                 sk.resumed() };
1599             FTRACE(2, "populating live context for region after failed optimize"
1600                    "translation\n");
1601             populateLiveContext(rContext);
1602             region = selectRegion(rContext, m_tx.mode());
1603           } else {
1604             region.reset();
1605           }
1606         }
1607       }
1608     }
1609
1610     if (!region) m_tx.setMode(TransKind::Interp);
1611
1612     if (result == TranslateResult::Success) {
1613       assert(m_tx.mode() == TransKind::Live    ||
1614              m_tx.mode() == TransKind::Profile ||
1615              m_tx.mode() == TransKind::Optimize);
1616       transKindToRecord = m_tx.mode();
1617     }
1618   }
1619
1620   if (args.dryRun) {
1621     resetState();
1622     return start;
1623   }
1624
1625   if (transKindToRecord == TransKind::Interp) {
1626     assertCleanState();
1627     FTRACE(1, "emitting dispatchBB interp request for failed translation\n");
1628     backEnd().emitInterpReq(code.main(), code.cold(), sk);
1629     // Fall through.
1630   }
1631
1632   if (args.align) {
1633     m_fixups.m_alignFixups.emplace(
1634       start, std::make_pair(backEnd().cacheLineSize() - 1, 0));
1635   }
1636
1637   if (RuntimeOption::EvalProfileBC) {
1638     auto* unit = sk.unit();
1639     TransBCMapping prev{};
1640     for (auto& cur : m_fixups.m_bcMap) {
1641       if (!cur.aStart) continue;
1642       if (prev.aStart) {
1643         if (prev.bcStart < unit->bclen()) {
1644           recordBCInstr(unit->entry()[prev.bcStart],
1645                         prev.aStart, cur.aStart, false);
1646         }
1647       } else {
1648         recordBCInstr(OpTraceletGuard, start, cur.aStart, false);
1649       }
1650       prev = cur;
1651     }
1652   }
1653
1654   recordGdbTranslation(sk, sk.func(), code.main(), start,
1655                        false, false);
1656   recordGdbTranslation(sk, sk.func(), code.cold(), coldStart,
1657                        false, false);
1658   if (RuntimeOption::EvalJitPGO) {
1659     if (transKindToRecord == TransKind::Profile) {
1660       always_assert(region);
1661       m_tx.profData()->addTransProfile(region, pconds);
1662     } else {
1663       m_tx.profData()->addTransNonProf(transKindToRecord, sk);
1664     }
1665   }
1666
1667   TransRec tr(sk, transKindToRecord,
1668               start,           code.main().frontier()       - start,
1669               realColdStart,   code.realCold().frontier()   - realColdStart,
1670               realFrozenStart, code.realFrozen().frontier() - realFrozenStart,
1671               region, m_fixups.m_bcMap,
1672               useLLVM());
1673   m_tx.addTranslation(tr);
1674   if (RuntimeOption::EvalJitUseVtuneAPI) {
1675     reportTraceletToVtune(sk.unit(), sk.func(), tr);
1676   }
1677
1678   GrowableVector<IncomingBranch> inProgressTailBranches;
1679   m_fixups.process(&inProgressTailBranches);
1680
1681   // SrcRec::newTranslation() makes this code reachable. Do this last;
1682   // otherwise there's some chance of hitting in the reader threads whose
1683   // metadata is not yet visible.
1684   TRACE(1, "newTranslation: %p  sk: (func %d, bcOff %d)\n",
1685         start, sk.getFuncId(), sk.offset());
1686   srcRec.newTranslation(start, inProgressTailBranches);
1687
1688   TRACE(1, "mcg: %zd-byte tracelet\n", code.main().frontier() - start);
1689   if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
1690     Trace::traceRelease("%s", getUsageString().c_str());
1691   }
1692
1693   return start;
1694 }
1695
1696 void MCGenerator::traceCodeGen(HTS& hts) {
1697   auto& unit = hts.unit;
1698
1699   auto finishPass = [&](const char* msg, int level) {
1700     printUnit(level, unit, msg, nullptr, hts.irb->guards());
1701     assert(checkCfg(unit));
1702   };
1703
1704   finishPass(" after initial translation ", kIRLevel);
1705
1706   always_assert_flog(
1707     IMPLIES(cfgHasLoop(unit), RuntimeOption::EvalJitLoops),
1708     "IRUnit has loop but Eval.JitLoops=0"
1709   );
1710
1711   optimize(unit, *hts.irb, m_tx.mode());
1712   finishPass(" after optimizing ", kOptLevel);
1713
1714   always_assert(this == mcg);
1715   genCode(unit);
1716
1717   m_numTrans++;
1718   assert(m_numTrans <= RuntimeOption::EvalJitGlobalTranslationLimit);
1719 }
1720
1721 MCGenerator::MCGenerator()
1722   : m_backEnd(newBackEnd())
1723   , m_numTrans(0)
1724   , m_catchTraceMap(128)
1725 {
1726   TRACE(1, "MCGenerator@%p startup\n", this);
1727   mcg = this;
1728
1729   m_unwindRegistrar = register_unwind_region(code.base(), code.codeSize());
1730
1731   static bool profileUp = false;
1732   if (!profileUp) {
1733     profileInit();
1734     profileUp = true;
1735   }
1736
1737   if (Trace::moduleEnabledRelease(Trace::printir) &&
1738       !RuntimeOption::EvalJit) {
1739     Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
1740                         "Did you mean to run with -vEval.Jit=1?\n");
1741   }
1742   if (Trace::moduleEnabledRelease(Trace::llvm, 1) ||
1743       RuntimeOption::EvalJitLLVMCounters) {
1744     g_bytecodesVasm.bind();
1745     g_bytecodesLLVM.bind();
1746   }
1747 }
1748
1749 void MCGenerator::initUniqueStubs() {
1750   // Put the following stubs into ahot, rather than a.
1751   CodeCache::Selector cbSel(CodeCache::Selector::Args(code).
1752                             hot(m_tx.useAHot()));
1753   m_tx.uniqueStubs = backEnd().emitUniqueStubs();
1754   m_fixups.process(nullptr); // in case we generated literals
1755 }
1756
1757 void MCGenerator::registerCatchBlock(CTCA ip, TCA block) {
1758   FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip, block);
1759   m_fixups.m_pendingCatchTraces.emplace_back(ip, block);
1760 }
1761
1762 folly::Optional<TCA> MCGenerator::getCatchTrace(CTCA ip) const {
1763   TCA* found = m_catchTraceMap.find(ip);
1764   if (found) return *found;
1765   return folly::none;
1766 }
1767
1768 void MCGenerator::codeEmittedThisRequest(size_t& requestEntry,
1769                                          size_t& now) const {
1770   requestEntry = s_initialTCSize;
1771   now = code.totalUsed();
1772 }
1773
1774 void MCGenerator::requestInit() {
1775   tl_regState = VMRegState::CLEAN;
1776   Timer::RequestInit();
1777   memset(&s_perfCounters, 0, sizeof(s_perfCounters));
1778   Stats::init();
1779   s_initialTCSize = code.totalUsed();
1780 }
1781
1782 void MCGenerator::requestExit() {
1783   always_assert(!Translator::WriteLease().amOwner());
1784   TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
1785             " kept, %15" PRId64 " grabbed\n",
1786             Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept,
1787             Translator::WriteLease().m_hintGrabbed);
1788   Stats::dump();
1789   Stats::clear();
1790   Timer::RequestExit();
1791
1792   if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
1793     Trace::traceRelease("MCGenerator perf counters for %s:\n",
1794                         g_context->getRequestUrl(50).c_str());
1795     for (int i = 0; i < tpc_num_counters; i++) {
1796       Trace::traceRelease("%-20s %10" PRId64 "\n",
1797                           kPerfCounterNames[i], s_perfCounters[i]);
1798     }
1799     Trace::traceRelease("\n");
1800   }
1801
1802   if (Trace::moduleEnabledRelease(Trace::llvm, 1)) {
1803     auto llvm = *g_bytecodesLLVM;
1804     auto total = llvm + *g_bytecodesVasm;
1805     Trace::ftraceRelease(
1806       "{:9} / {:9} bytecodes ({:6.2f}%) handled by LLVM backend for {}\n",
1807       llvm, total, llvm * 100.0 / total, g_context->getRequestUrl(50)
1808     );
1809   }
1810 }
1811
1812 bool
1813 MCGenerator::isPseudoEvent(const char* event) {
1814   for (auto name : kPerfCounterNames) {
1815     if (!strcmp(event, name)) {
1816       return true;
1817     }
1818   }
1819   return false;
1820 }
1821
1822 void
1823 MCGenerator::getPerfCounters(Array& ret) {
1824   for (int i = 0; i < tpc_num_counters; i++) {
1825     // Until Perflab can automatically scale the values we give it to
1826     // an appropriate range, we have to fudge these numbers so they
1827     // look more like reasonable hardware counter values.
1828     ret.set(String::FromCStr(kPerfCounterNames[i]),
1829             s_perfCounters[i] * 1000);
1830   }
1831
1832   for (auto const& pair : Timer::Counters()) {
1833     if (pair.second.total == 0 && pair.second.count == 0) continue;
1834
1835     ret.set(String("jit_time_") + pair.first, pair.second.total);
1836   }
1837
1838   if (RuntimeOption::EvalJitLLVMCounters) {
1839     ret.set(String("jit_instr_vasm"), *g_bytecodesVasm);
1840     ret.set(String("jit_instr_llvm"), *g_bytecodesLLVM);
1841   }
1842 }
1843
1844 MCGenerator::~MCGenerator() {
1845 }
1846
1847 static Debug::TCRange rangeFrom(const CodeBlock& cb, const TCA addr,
1848                                 bool isAcold) {
1849   assert(cb.contains(addr));
1850   return Debug::TCRange(addr, cb.frontier(), isAcold);
1851 }
1852
1853 void MCGenerator::recordBCInstr(uint32_t op,
1854                                 const TCA addr,
1855                                 const TCA end,
1856                                 bool cold) {
1857   if (addr != end) {
1858     m_debugInfo.recordBCInstr(
1859       Debug::TCRange(addr, end, cold), op);
1860   }
1861 }
1862
1863 void MCGenerator::recordGdbTranslation(SrcKey sk,
1864                                        const Func* srcFunc,
1865                                        const CodeBlock& cb,
1866                                        const TCA start,
1867                                        bool exit,
1868                                        bool inPrologue) {
1869   if (start != cb.frontier()) {
1870     assert(Translator::WriteLease().amOwner());
1871     if (!RuntimeOption::EvalJitNoGdb) {
1872       m_debugInfo.recordTracelet(rangeFrom(cb, start, &cb == &code.cold()),
1873                                  srcFunc,
1874                                  reinterpret_cast<const Op*>(
1875                                    srcFunc->unit() ?
1876                                      srcFunc->unit()->at(sk.offset()) : nullptr
1877                                  ),
1878                                  exit, inPrologue);
1879     }
1880     if (RuntimeOption::EvalPerfPidMap) {
1881       m_debugInfo.recordPerfMap(rangeFrom(cb, start, &cb == &code.cold()),
1882                                 srcFunc, exit, inPrologue);
1883     }
1884   }
1885 }
1886
1887 void MCGenerator::recordGdbStub(const CodeBlock& cb,
1888                                 const TCA start, const char* name) {
1889   if (!RuntimeOption::EvalJitNoGdb) {
1890     m_debugInfo.recordStub(rangeFrom(cb, start, &cb == &code.cold()),
1891                            name);
1892   }
1893 }
1894
1895 std::vector<UsageInfo> MCGenerator::getUsageInfo() {
1896   std::vector<UsageInfo> tcUsageInfo;
1897   code.forEachBlock([&](const char* name, const CodeBlock& a) {
1898     tcUsageInfo.emplace_back(UsageInfo{std::string("code.") + name,
1899                              a.used(),
1900                              a.capacity(),
1901                              true});
1902   });
1903   tcUsageInfo.emplace_back(UsageInfo{
1904       "data",
1905       code.data().used(),
1906       code.data().capacity(),
1907       true});
1908   tcUsageInfo.emplace_back(UsageInfo{
1909       "RDS",
1910       rds::usedBytes(),
1911       RuntimeOption::EvalJitTargetCacheSize * 3 / 4,
1912       false});
1913   tcUsageInfo.emplace_back(UsageInfo{
1914       "RDSLocal",
1915       rds::usedLocalBytes(),
1916       RuntimeOption::EvalJitTargetCacheSize * 3 / 4,
1917       false});
1918   tcUsageInfo.emplace_back(UsageInfo{
1919       "persistentRDS",
1920       rds::usedPersistentBytes(),
1921       RuntimeOption::EvalJitTargetCacheSize / 4,
1922       false});
1923   tcUsageInfo.emplace_back(UsageInfo{
1924       "cloned-closures",
1925       Func::s_totalClonedClosures,
1926       100000 /* dummy value -- there isn't really a capacity for this */});
1927   return tcUsageInfo;
1928 }
1929
1930 std::string MCGenerator::getUsageString() {
1931   std::string usage;
1932   size_t totalBlockSize = 0;
1933   size_t totalBlockCapacity = 0;
1934   auto addRow = [&](UsageInfo blockUsageInfo) {
1935     auto percent = blockUsageInfo.m_capacity ?
1936       100 * blockUsageInfo.m_used / blockUsageInfo.m_capacity : 0;
1937     usage += folly::format("mcg: {:9} bytes ({}%) in {}\n",
1938                            blockUsageInfo.m_used,
1939                            percent,
1940                            blockUsageInfo.m_name).str();
1941     if (blockUsageInfo.m_global) {
1942       totalBlockSize += blockUsageInfo.m_used;
1943       totalBlockCapacity += blockUsageInfo.m_capacity;
1944     }
1945   };
1946   auto tcUsageInfo = getUsageInfo();
1947   for_each(tcUsageInfo.begin(), tcUsageInfo.end(), addRow);
1948   addRow(UsageInfo{"total", totalBlockSize, totalBlockCapacity, false});
1949   return usage;
1950 }
1951
1952 std::string MCGenerator::getTCAddrs() {
1953   std::string addrs;
1954   code.forEachBlock([&](const char* name, const CodeBlock& a) {
1955       addrs += folly::format("{}: {}\n", name, a.base()).str();
1956   });
1957   return addrs;
1958 }
1959
1960 bool MCGenerator::addDbgGuards(const Unit* unit) {
1961   // TODO refactor
1962   // It grabs the write lease and iterates through whole SrcDB...
1963   struct timespec tsBegin, tsEnd;
1964   {
1965     BlockingLeaseHolder writer(Translator::WriteLease());
1966     if (!writer) {
1967       return false;
1968     }
1969     assert(mcg->cgFixups().empty());
1970     HPHP::Timer::GetMonotonicTime(tsBegin);
1971     // Doc says even find _could_ invalidate iterator, in pactice it should
1972     // be very rare, so go with it now.
1973     for (SrcDB::const_iterator it = m_tx.getSrcDB().begin();
1974          it != m_tx.getSrcDB().end(); ++it) {
1975       SrcKey const sk = SrcKey::fromAtomicInt(it->first);
1976       // We may have a SrcKey to a deleted function. NB: this may miss a
1977       // race with deleting a Func. See task #2826313.
1978       if (!Func::isFuncIdValid(sk.getFuncId())) continue;
1979       SrcRec* sr = it->second;
1980       if (sr->unitMd5() == unit->md5() &&
1981           !sr->hasDebuggerGuard() &&
1982           m_tx.isSrcKeyInBL(sk)) {
1983         addDbgGuardImpl(sk, sr);
1984       }
1985     }
1986     mcg->cgFixups().process(nullptr);
1987   }
1988   HPHP::Timer::GetMonotonicTime(tsEnd);
1989   int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
1990   if (Trace::moduleEnabledRelease(Trace::mcg, 5)) {
1991     Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
1992   }
1993   return true;
1994 }
1995
1996 bool MCGenerator::addDbgGuard(const Func* func, Offset offset, bool resumed) {
1997   SrcKey sk(func, offset, resumed);
1998   {
1999     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2000       if (sr->hasDebuggerGuard()) {
2001         return true;
2002       }
2003     } else {
2004       // no translation yet
2005       return true;
2006     }
2007   }
2008   if (debug) {
2009     if (!m_tx.isSrcKeyInBL(sk)) {
2010       TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2011       return false;
2012     }
2013   }
2014   BlockingLeaseHolder writer(Translator::WriteLease());
2015   if (!writer) {
2016     return false;
2017   }
2018   assert(mcg->cgFixups().empty());
2019   {
2020     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2021       addDbgGuardImpl(sk, sr);
2022     }
2023   }
2024   mcg->cgFixups().process(nullptr);
2025   return true;
2026 }
2027
2028 bool MCGenerator::dumpTCCode(const char* filename) {
2029 #define OPEN_FILE(F, SUFFIX)                                    \
2030   std::string F ## name = std::string(filename).append(SUFFIX); \
2031   FILE* F = fopen(F ## name .c_str(),"wb");                     \
2032   if (F == nullptr) return false;                               \
2033   SCOPE_EXIT{ fclose(F); };
2034
2035   OPEN_FILE(ahotFile,       "_ahot");
2036   OPEN_FILE(aFile,          "_a");
2037   OPEN_FILE(aprofFile,      "_aprof");
2038   OPEN_FILE(acoldFile,      "_acold");
2039   OPEN_FILE(afrozenFile,    "_afrozen");
2040   OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
2041
2042 #undef OPEN_FILE
2043
2044   // dump starting from the hot region
2045   size_t count = code.hot().used();
2046   bool result = (fwrite(code.hot().base(), 1, count, ahotFile) == count);
2047   if (result) {
2048     count = code.main().used();
2049     result = (fwrite(code.main().base(), 1, count, aFile) == count);
2050   }
2051   if (result) {
2052     count = code.prof().used();
2053     result = (fwrite(code.prof().base(), 1, count, aprofFile) == count);
2054   }
2055   if (result) {
2056     count = code.cold().used();
2057     result = (fwrite(code.cold().base(), 1, count, acoldFile) == count);
2058   }
2059   if (result) {
2060     count = code.frozen().used();
2061     result = (fwrite(code.frozen().base(), 1, count, afrozenFile) == count);
2062   }
2063   return result;
2064 }
2065
2066 // Returns true on success
2067 bool MCGenerator::dumpTC(bool ignoreLease) {
2068   folly::Optional<BlockingLeaseHolder> writer;
2069   if (!ignoreLease) {
2070     writer.emplace(Translator::WriteLease());
2071     if (!*writer) return false;
2072   }
2073   bool success = dumpTCData();
2074   if (success) {
2075     success = dumpTCCode("/tmp/tc_dump");
2076   }
2077   return success;
2078 }
2079
2080 // Returns true on success
2081 bool tc_dump(void) {
2082   return mcg && mcg->dumpTC();
2083 }
2084
2085 // Returns true on success
2086 bool MCGenerator::dumpTCData() {
2087   gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
2088   if (!tcDataFile) return false;
2089
2090   if (!gzprintf(tcDataFile,
2091                 "repo_schema      = %s\n"
2092                 "ahot.base        = %p\n"
2093                 "ahot.frontier    = %p\n"
2094                 "a.base           = %p\n"
2095                 "a.frontier       = %p\n"
2096                 "aprof.base       = %p\n"
2097                 "aprof.frontier   = %p\n"
2098                 "acold.base       = %p\n"
2099                 "acold.frontier   = %p\n"
2100                 "afrozen.base     = %p\n"
2101                 "afrozen.frontier = %p\n\n",
2102                 kRepoSchemaId,
2103                 code.hot().base(), code.hot().frontier(),
2104                 code.main().base(), code.main().frontier(),
2105                 code.prof().base(), code.prof().frontier(),
2106                 code.cold().base(), code.cold().frontier(),
2107                 code.frozen().base(), code.frozen().frontier())) {
2108     return false;
2109   }
2110
2111   if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
2112                 m_tx.getCurrentTransID())) {
2113     return false;
2114   }
2115
2116   for (TransID t = 0; t < m_tx.getCurrentTransID(); t++) {
2117     if (gzputs(tcDataFile,
2118                m_tx.getTransRec(t)->print(m_tx.getTransCounter(t)).c_str()) ==
2119         -1) {
2120       return false;
2121     }
2122   }
2123
2124   gzclose(tcDataFile);
2125   return true;
2126 }
2127
2128 void MCGenerator::invalidateSrcKey(SrcKey sk) {
2129   assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
2130   assert(Translator::WriteLease().amOwner());
2131   /*
2132    * Reroute existing translations for SrcKey to an as-yet indeterminate
2133    * new one.
2134    */
2135   SrcRec* sr = m_tx.getSrcDB().find(sk);
2136   assert(sr);
2137   /*
2138    * Since previous translations aren't reachable from here, we know we
2139    * just created some garbage in the TC. We currently have no mechanism
2140    * to reclaim this.
2141    */
2142   sr->replaceOldTranslations();
2143 }
2144
2145 void MCGenerator::setJmpTransID(TCA jmp) {
2146   if (m_tx.mode() != TransKind::Profile) return;
2147
2148   TransID transId = m_tx.profData()->curTransID();
2149   FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
2150   m_fixups.m_pendingJmpTransIDs.emplace_back(jmp, transId);
2151 }
2152
2153 void RelocationInfo::recordRange(TCA start, TCA end,
2154                                  TCA destStart, TCA destEnd) {
2155   m_srcRanges.emplace_back(start, end);
2156   m_dstRanges.emplace_back(destStart, destEnd);
2157   m_adjustedAddresses[start].second = destStart;
2158   m_adjustedAddresses[end].first = destEnd;
2159 }
2160
2161 void RelocationInfo::recordAddress(TCA src, TCA dest, int range) {
2162   m_adjustedAddresses.emplace(src, std::make_pair(dest, dest + range));
2163 }
2164
2165 TCA RelocationInfo::adjustedAddressAfter(TCA addr) const {
2166   auto it = m_adjustedAddresses.find(addr);
2167   if (it == m_adjustedAddresses.end()) return nullptr;
2168
2169   return it->second.second;
2170 }
2171
2172 TCA RelocationInfo::adjustedAddressBefore(TCA addr) const {
2173   auto it = m_adjustedAddresses.find(addr);
2174   if (it == m_adjustedAddresses.end()) return nullptr;
2175
2176   return it->second.first;
2177 }
2178
2179 void RelocationInfo::rewind(TCA start, TCA end) {
2180   if (m_srcRanges.size() && m_srcRanges.back().first == start) {
2181     assert(m_dstRanges.size() == m_srcRanges.size());
2182     assert(m_srcRanges.back().second == end);
2183     m_srcRanges.pop_back();
2184     m_dstRanges.pop_back();
2185   }
2186   auto it = m_adjustedAddresses.lower_bound(start);
2187   if (it == m_adjustedAddresses.end()) return;
2188   if (it->first == start) {
2189     // if it->second.first is set, start is also the end
2190     // of an existing region. Don't erase it in that case
2191     if (it->second.first) {
2192       it++->second.second = 0;
2193     } else {
2194       m_adjustedAddresses.erase(it++);
2195     }
2196   }
2197   while (it != m_adjustedAddresses.end() && it->first < end) {
2198     m_adjustedAddresses.erase(it++);
2199   }
2200   if (it == m_adjustedAddresses.end()) return;
2201   if (it->first == end) {
2202     // Similar to start above, end could be the start of an
2203     // existing region.
2204     if (it->second.second) {
2205       it++->second.first = 0;
2206     } else {
2207       m_adjustedAddresses.erase(it++);
2208     }
2209   }
2210 }
2211
2212 void
2213 emitIncStat(CodeBlock& cb, uint64_t* tl_table, uint index, int n, bool force) {
2214   if (!force && !Stats::enabled()) return;
2215   intptr_t disp = uintptr_t(&tl_table[index]) - tlsBase();
2216
2217   mcg->backEnd().emitIncStat(cb, disp, n);
2218 }
2219
2220 void emitIncStat(Vout& v, Stats::StatCounter stat, int n, bool force) {
2221   if (!force && !Stats::enabled()) return;
2222   intptr_t disp = uintptr_t(&Stats::tl_counters[stat]) - tlsBase();
2223   v << addqim{n, Vptr{baseless(disp), Vptr::FS}, v.makeReg()};
2224 }
2225
2226 // generic vasm service-request generator. target specific details
2227 // are hidden by the svcreq{} instruction.
2228 void emitServiceReq(Vout& v, TCA stub_block,
2229                     ServiceRequest req, const ServiceReqArgVec& argv) {
2230   TRACE(3, "Emit Service Req %s(", serviceReqName(req));
2231   VregList args;
2232   for (auto& argInfo : argv) {
2233     switch (argInfo.m_kind) {
2234       case ServiceReqArgInfo::Immediate: {
2235         TRACE(3, "%" PRIx64 ", ", argInfo.m_imm);
2236         args.push_back(v.cns(argInfo.m_imm));
2237         break;
2238       }
2239       default: {
2240         always_assert(false);
2241         break;
2242       }
2243     }
2244   }
2245   v << svcreq{req, v.makeTuple(args), stub_block};
2246 }
2247
2248 } // HPHP::jit
2249
2250 } // HPHP