hphp/runtime/vm/jit/mc-generator.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/vm/jit/mc-generator.h"
  17 #include "hphp/runtime/vm/jit/vtune-jit.h"
  18
  19 #include "folly/MapUtil.h"
  20
  21 #include <cinttypes>
  22 #include <stdint.h>
  23 #include <assert.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <strstream>
  27 #include <stdio.h>
  28 #include <stdarg.h>
  29 #include <string>
  30 #include <queue>
  31 #include <unwind.h>
  32 #include <unordered_set>
  33 #ifdef __FreeBSD__
  34 #include <sys/ucontext.h>
  35 #endif
  36
  37 #ifdef __FreeBSD__
  38 #define RIP_REGISTER(v) (v).mc_rip
  39 #elif defined(__APPLE__)
  40 #define RIP_REGISTER(v) (v)->__ss.__rip
  41 #elif defined(__x86_64__)
  42 #define RIP_REGISTER(v) (v).gregs[REG_RIP]
  43 #elif defined(__AARCH64EL__)
  44 #define RIP_REGISTER(v) (v).pc
  45 #else
  46 #error How is rip accessed on this architecture?
  47 #endif
  48
  49 #include <boost/bind.hpp>
  50 #include <boost/utility/typed_in_place_factory.hpp>
  51 #include <boost/range/adaptors.hpp>
  52 #include <boost/scoped_ptr.hpp>
  53 #include <algorithm>
  54 #include <exception>
  55 #include <memory>
  56 #include <vector>
  57
  58 #include "folly/Format.h"
  59 #include "folly/String.h"
  60
  61 #include "hphp/util/abi-cxx.h"
  62 #include "hphp/util/asm-x64.h"
  63 #include "hphp/util/bitops.h"
  64 #include "hphp/util/debug.h"
  65 #include "hphp/util/disasm.h"
  66 #include "hphp/util/maphuge.h"
  67 #include "hphp/util/rank.h"
  68 #include "hphp/util/ringbuffer.h"
  69 #include "hphp/util/timer.h"
  70 #include "hphp/util/trace.h"
  71 #include "hphp/util/meta.h"
  72 #include "hphp/util/process.h"
  73 #include "hphp/util/repo-schema.h"
  74 #include "hphp/util/cycles.h"
  75
  76 #include "hphp/vixl/a64/decoder-a64.h"
  77 #include "hphp/vixl/a64/disasm-a64.h"
  78 #include "hphp/vixl/a64/macro-assembler-a64.h"
  79 #include "hphp/vixl/a64/simulator-a64.h"
  80
  81 #include "hphp/runtime/vm/jit/abi-arm.h"
  82 #include "hphp/runtime/vm/jit/arch.h"
  83 #include "hphp/runtime/vm/jit/unique-stubs-arm.h"
  84 #include "hphp/runtime/vm/jit/unique-stubs-x64.h"
  85 #include "hphp/runtime/vm/bytecode.h"
  86 #include "hphp/runtime/vm/php-debug.h"
  87 #include "hphp/runtime/vm/runtime.h"
  88 #include "hphp/runtime/base/complex-types.h"
  89 #include "hphp/runtime/base/execution-context.h"
  90 #include "hphp/runtime/base/runtime-option.h"
  91 #include "hphp/runtime/base/runtime-option-guard.h"
  92 #include "hphp/runtime/base/strings.h"
  93 #include "hphp/runtime/server/source-root-info.h"
  94 #include "hphp/runtime/base/zend-string.h"
  95 #include "hphp/runtime/ext/ext_closure.h"
  96 #include "hphp/runtime/ext/ext_continuation.h"
  97 #include "hphp/runtime/ext/ext_function.h"
  98 #include "hphp/runtime/vm/debug/debug.h"
  99 #include "hphp/runtime/base/stats.h"
 100 #include "hphp/runtime/vm/pendq.h"
 101 #include "hphp/runtime/vm/srckey.h"
 102 #include "hphp/runtime/vm/treadmill.h"
 103 #include "hphp/runtime/vm/repo.h"
 104 #include "hphp/runtime/vm/type-profile.h"
 105 #include "hphp/runtime/vm/member-operations.h"
 106 #include "hphp/runtime/vm/jit/abi-x64.h"
 107 #include "hphp/runtime/vm/jit/check.h"
 108 #include "hphp/runtime/vm/jit/hhbc-translator.h"
 109 #include "hphp/runtime/vm/jit/ir-translator.h"
 110 #include "hphp/runtime/vm/jit/normalized-instruction.h"
 111 #include "hphp/runtime/vm/jit/opt.h"
 112 #include "hphp/runtime/vm/jit/print.h"
 113 #include "hphp/runtime/vm/jit/region-selection.h"
 114 #include "hphp/runtime/vm/jit/srcdb.h"
 115 #include "hphp/runtime/base/rds.h"
 116 #include "hphp/runtime/vm/jit/tracelet.h"
 117 #include "hphp/runtime/vm/jit/translator-inline.h"
 118 #include "hphp/runtime/vm/jit/unwind-arm.h"
 119 #include "hphp/runtime/vm/jit/unwind-x64.h"
 120 #include "hphp/runtime/vm/jit/code-gen-helpers-arm.h"
 121 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
 122 #include "hphp/runtime/vm/jit/code-gen.h"
 123 #include "hphp/runtime/vm/jit/service-requests-x64.h"
 124 #include "hphp/runtime/vm/jit/jump-smash.h"
 125 #include "hphp/runtime/vm/jit/func-prologues.h"
 126 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
 127 #include "hphp/runtime/vm/jit/func-prologues-arm.h"
 128 #include "hphp/runtime/vm/jit/debug-guards.h"
 129 #include "hphp/runtime/vm/jit/timer.h"
 130 #include "hphp/runtime/vm/unwind.h"
 131
 132 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
 133
 134 namespace HPHP {
 135 namespace JIT {
 136
 137 TRACE_SET_MOD(mcg);
 138
 139 using namespace reg;
 140 using namespace Trace;
 141 using std::max;
 142
 143 #define TRANS_PERF_COUNTERS \
 144   TPC(translate) \
 145   TPC(retranslate) \
 146   TPC(interp_bb) \
 147   TPC(interp_instr) \
 148   TPC(interp_one) \
 149   TPC(max_trans) \
 150   TPC(enter_tc) \
 151   TPC(service_req)
 152
 153 static const char* const kInstrCountMCGName = "instr_mcg";
 154 static const char* const kInstrCountIRName = "instr_hhir";
 155
 156 #define TPC(n) "jit_" #n,
 157 static const char* const kPerfCounterNames[] = {
 158   TRANS_PERF_COUNTERS
 159   kInstrCountMCGName,
 160   kInstrCountIRName,
 161 };
 162 #undef TPC
 163
 164 #define TPC(n) tpc_ ## n,
 165 enum TransPerfCounter {
 166   TRANS_PERF_COUNTERS
 167   tpc_num_counters
 168 };
 169 #undef TPC
 170 static __thread int64_t s_perfCounters[tpc_num_counters];
 171 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
 172
 173 // The global MCGenerator object.
 174 MCGenerator* mcg;
 175
 176 // Register dirtiness: thread-private.
 177 __thread VMRegState tl_regState = VMRegState::CLEAN;
 178
 179 CppCall MCGenerator::getDtorCall(DataType type) {
 180   switch (type) {
 181   case BitwiseKindOfString:
 182     return CppCall(getMethodPtr(&StringData::release));
 183   case KindOfArray:
 184     return CppCall(getMethodPtr(&ArrayData::release));
 185   case KindOfObject:
 186     return CppCall(getMethodPtr(&ObjectData::release));
 187   case KindOfResource:
 188     return CppCall(getMethodPtr(&ResourceData::release));
 189   case KindOfRef:
 190     return CppCall(getMethodPtr(&RefData::release));
 191   default:
 192     assert(false);
 193     NOT_REACHED();
 194   }
 195 }
 196
 197 bool MCGenerator::profileSrcKey(const SrcKey& sk) const {
 198   if (!sk.func()->shouldPGO()) return false;
 199
 200   if (m_tx.profData()->optimized(sk.getFuncId())) return false;
 201
 202   // If we've hit EvalJitProfileRequests, then don't emit profiling
 203   // translations that would trigger an optimizing retranslation.
 204   // This limits the duration of profiling.  For
 205   // non-retranslate-triggering SrcKeys, whose profiling translations
 206   // only increment a counter, it's OK to emit them past the
 207   // EvalJitProfileRequests threshold as long as we're already
 208   // profiling this function (next check below) but haven't
 209   // retranslated this function yet (checked above).
 210   bool triggersRetrans = sk.func()->isEntry(sk.offset());
 211   if (triggersRetrans &&
 212       requestCount() > RuntimeOption::EvalJitProfileRequests) {
 213     return false;
 214   }
 215
 216   // For translations that don't trigger a retranslation, only emit
 217   // them if we've already generated a retranslation-triggering
 218   // translation for its function.
 219   if (!triggersRetrans &&
 220       !m_tx.profData()->profiling(sk.getFuncId())) {
 221     return false;
 222   }
 223
 224   return true;
 225 }
 226
 227 bool MCGenerator::profilePrologue(const SrcKey& sk) const {
 228   if (!sk.func()->shouldPGO()) return false;
 229
 230   if (m_tx.profData()->optimized(sk.getFuncId())) return false;
 231
 232   // Proflogues don't trigger retranslation, so only emit them if
 233   // we've already generated a retranslation-triggering translation
 234   // for its function or if we're about to generate one (which
 235   // requires depends on requestCount(), see profileSrcKey()).
 236   return m_tx.profData()->profiling(sk.getFuncId()) ||
 237          requestCount() <= RuntimeOption::EvalJitProfileRequests;
 238 }
 239
 240 /*
 241  * Invalidate the SrcDB entries for func's SrcKeys that have any
 242  * Profile translation.
 243  */
 244 void MCGenerator::invalidateFuncProfSrcKeys(const Func* func) {
 245   assert(RuntimeOption::EvalJitPGO);
 246   FuncId funcId = func->getFuncId();
 247   for (auto tid : m_tx.profData()->funcProfTransIDs(funcId)) {
 248     invalidateSrcKey(m_tx.profData()->transSrcKey(tid));
 249   }
 250 }
 251
 252 TCA MCGenerator::retranslate(const TranslArgs& args) {
 253   SrcRec* sr = m_tx.getSrcDB().find(args.m_sk);
 254
 255   bool locked = sr->tryLock();
 256   SCOPE_EXIT {
 257     if (locked) sr->freeLock();
 258   };
 259   if (isDebuggerAttachedProcess() && m_tx.isSrcKeyInBL(args.m_sk)) {
 260     // We are about to translate something known to be blacklisted by
 261     // debugger, exit early
 262     SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
 263     return nullptr;
 264   }
 265   LeaseHolder writer(Translator::WriteLease());
 266   if (!writer || !shouldTranslate()) return nullptr;
 267   if (!locked) {
 268     // Even though we knew above that we were going to skip
 269     // doing another translation, we wait until we get the
 270     // write lease, to avoid spinning through the tracelet
 271     // guards again and again while another thread is writing
 272     // to it.
 273     return sr->getTopTranslation();
 274   }
 275   SKTRACE(1, args.m_sk, "retranslate\n");
 276
 277   m_tx.setMode(profileSrcKey(args.m_sk) ? TransProfile : TransLive);
 278   SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
 279
 280   return translate(args);
 281 }
 282
 283 TCA MCGenerator::retranslateOpt(TransID transId, bool align) {
 284   LeaseHolder writer(Translator::WriteLease());
 285   if (!writer || !shouldTranslate()) return nullptr;
 286   if (isDebuggerAttachedProcess()) return nullptr;
 287
 288   TRACE(1, "retranslateOpt: transId = %u\n", transId);
 289
 290   SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
 291
 292   always_assert(m_tx.profData()->transRegion(transId) != nullptr);
 293
 294   Func*       func = m_tx.profData()->transFunc(transId);
 295   FuncId    funcId = func->getFuncId();
 296   const SrcKey& sk = m_tx.profData()->transSrcKey(transId);
 297
 298   if (m_tx.profData()->optimized(funcId)) return nullptr;
 299   m_tx.profData()->setOptimized(funcId);
 300
 301   bool setFuncBody = func->getDVFunclets().size() == 0;
 302
 303   func->setFuncBody(m_tx.uniqueStubs.funcBodyHelperThunk);
 304
 305   // Invalidate SrcDB's entries for all func's SrcKeys.
 306   invalidateFuncProfSrcKeys(func);
 307
 308   // Regenerate the prologues and DV funclets before the actual function body.
 309   TCA start = regeneratePrologues(func, sk);
 310
 311   // Regionize func and translate all its regions.
 312   std::vector<RegionDescPtr> regions;
 313   regionizeFunc(func, this, regions);
 314
 315   for (auto region : regions) {
 316     m_tx.setMode(TransOptimize);
 317     always_assert(region->blocks.size() > 0);
 318     SrcKey regionSk = region->blocks[0]->start();
 319     auto translArgs = TranslArgs(regionSk, align).region(region);
 320     if (setFuncBody && regionSk.offset() == func->base()) {
 321       translArgs.setFuncBody();
 322       setFuncBody = false;
 323     }
 324     TCA regionStart = translate(translArgs);
 325     if (start == nullptr && regionSk == sk) {
 326       assert(regionStart);
 327       start = regionStart;
 328     }
 329   }
 330   assert(start);
 331   return start;
 332 }
 333
 334 /*
 335  * Find or create a translation for sk. Returns TCA of "best" current
 336  * translation. May return NULL if it is currently impossible to create
 337  * a translation.
 338  */
 339 TCA
 340 MCGenerator::getTranslation(const TranslArgs& args) {
 341   auto sk = args.m_sk;
 342   sk.func()->validate();
 343   SKTRACE(2, sk,
 344           "getTranslation: curUnit %s funcId %x offset %d\n",
 345           sk.unit()->filepath()->data(),
 346           sk.getFuncId(),
 347           sk.offset());
 348   SKTRACE(2, sk, "   funcId: %x \n", sk.func()->getFuncId());
 349
 350   if (Translator::liveFrameIsPseudoMain()) {
 351     SKTRACE(2, sk, "punting on pseudoMain\n");
 352     return nullptr;
 353   }
 354   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 355     TCA tca = sr->getTopTranslation();
 356     if (tca) {
 357       SKTRACE(2, sk, "getTranslation: found %p\n", tca);
 358       return tca;
 359     }
 360   }
 361   return createTranslation(args);
 362 }
 363
 364 int
 365 MCGenerator::numTranslations(SrcKey sk) const {
 366   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 367     return sr->translations().size();
 368   }
 369   return 0;
 370 }
 371
 372 static void populateLiveContext(RegionContext& ctx) {
 373   typedef RegionDesc::Location L;
 374
 375   const ActRec*     const fp {g_context->getFP()};
 376   const TypedValue* const sp {g_context->getStack().top()};
 377
 378   for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
 379     ctx.liveTypes.push_back(
 380       { L::Local{i}, liveTVType(frame_local(fp, i)) }
 381     );
 382   }
 383
 384   uint32_t stackOff = 0;
 385   visitStackElems(
 386     fp, sp, ctx.bcOffset,
 387     [&](const ActRec* ar) {
 388       // TODO(#2466980): when it's a Cls, we should pass the Class* in
 389       // the Type.
 390       auto const objOrCls =
 391         ar->hasThis()  ? Type::Obj.specialize(ar->getThis()->getVMClass()) :
 392         ar->hasClass() ? Type::Cls
 393                        : Type::Nullptr;
 394
 395       ctx.preLiveARs.push_back(
 396         { stackOff,
 397           ar->m_func,
 398           objOrCls
 399         }
 400       );
 401       FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
 402
 403       stackOff += kNumActRecCells;
 404     },
 405     [&](const TypedValue* tv) {
 406       ctx.liveTypes.push_back(
 407         { L::Stack{stackOff, ctx.spOffset - stackOff}, liveTVType(tv) }
 408       );
 409       stackOff++;
 410       FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
 411     }
 412   );
 413 }
 414
 415 TCA
 416 MCGenerator::createTranslation(const TranslArgs& args) {
 417   if (!shouldTranslate()) return nullptr;
 418
 419   /*
 420    * Try to become the writer. We delay this until we *know* we will have
 421    * a need to create new translations, instead of just trying to win the
 422    * lottery at the dawn of time. Hopefully lots of requests won't require
 423    * any new translation.
 424    */
 425   auto sk = args.m_sk;
 426   LeaseHolder writer(Translator::WriteLease());
 427   if (!writer) return nullptr;
 428
 429   if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 430     TCA tca = sr->getTopTranslation();
 431     if (tca) {
 432       // Handle extremely unlikely race; someone may have just already
 433       // added the first instance of this SrcRec while we did a
 434       // non-blocking wait on the write lease.
 435       return tca;
 436     } else {
 437       // Since we are holding the write lease, we know that sk is properly
 438       // initialized, except that it has no translations (due to
 439       // replaceOldTranslations)
 440       return retranslate(args);
 441     }
 442   }
 443
 444   // We put retranslate requests at the end of our slab to more frequently
 445   //   allow conditional jump fall-throughs
 446   TCA astart = code.main().frontier();
 447   TCA stubstart = code.stubs().frontier();
 448   TCA req = emitServiceReq(code.stubs(), REQ_RETRANSLATE, sk.offset());
 449   SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
 450           sk.unit(), sk.offset(), req);
 451   SrcRec* sr = m_tx.getSrcRec(sk);
 452   sr->setFuncInfo(sk.func());
 453   sr->setAnchorTranslation(req);
 454
 455   size_t asize = code.main().frontier() - astart;
 456   size_t stubsize = code.stubs().frontier() - stubstart;
 457   assert(asize == 0);
 458   if (stubsize && RuntimeOption::EvalDumpTCAnchors) {
 459     TransRec tr(sk, sk.unit()->md5(), TransAnchor,
 460                 astart, asize, stubstart, stubsize);
 461     m_tx.addTranslation(tr);
 462     if (RuntimeOption::EvalJitUseVtuneAPI) {
 463       reportTraceletToVtune(sk.unit(), sk.func(), tr);
 464     }
 465
 466     if (m_tx.profData()) {
 467       m_tx.profData()->addTransNonProf(TransAnchor, sk);
 468     }
 469     assert(!m_tx.isTransDBEnabled() ||
 470            m_tx.getTransRec(stubstart)->kind == TransAnchor);
 471   }
 472
 473   return retranslate(args);
 474 }
 475
 476 TCA
 477 MCGenerator::lookupTranslation(SrcKey sk) const {
 478   if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 479     return sr->getTopTranslation();
 480   }
 481   return nullptr;
 482 }
 483
 484 TCA
 485 MCGenerator::translate(const TranslArgs& args) {
 486   INC_TPC(translate);
 487
 488   assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
 489   assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
 490   assert(m_tx.mode() != TransInvalid);
 491   SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
 492
 493   if (!args.m_interp) {
 494     if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
 495       RuntimeOption::EvalJit = false;
 496       ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
 497       return nullptr;
 498     }
 499   }
 500
 501   Func* func = const_cast<Func*>(args.m_sk.func());
 502   CodeCache::Selector asmSel(CodeCache::Selector::Args(code)
 503                              .profile(m_tx.mode() == TransProfile)
 504                              .hot(func->attrs() & AttrHot));
 505
 506   if (args.m_align) {
 507     X64::moveToAlign(code.main(), kNonFallthroughAlign);
 508   }
 509
 510   TCA start = code.main().frontier();
 511
 512   if (RuntimeOption::EvalJitDryRuns &&
 513       (m_tx.mode() == TransLive || m_tx.mode() == TransProfile)) {
 514     auto const useRegion =
 515       RuntimeOption::EvalJitRegionSelector == "tracelet";
 516     always_assert(useRegion ||
 517                   RuntimeOption::EvalJitRegionSelector == "");
 518
 519     auto dryArgs = args;
 520
 521     dryArgs.dryRun(!useRegion);
 522     {
 523       // First, run translateWork with the tracelet region selector. If
 524       // useRegion == false, the generated code will be thrown away at the end.
 525       OPTION_GUARD(EvalJitRegionSelector, "tracelet");
 526       OPTION_GUARD(EvalHHIRRelaxGuards, true);
 527       OPTION_GUARD(EvalHHBCRelaxGuards, false);
 528       translateWork(dryArgs);
 529     }
 530
 531     dryArgs.dryRun(useRegion);
 532     {
 533       // Now translate with analyze(), throwing away the generated code if
 534       // useRegion == true.
 535       OPTION_GUARD(EvalJitRegionSelector, "");
 536       OPTION_GUARD(EvalHHIRRelaxGuards, false);
 537       OPTION_GUARD(EvalHHBCRelaxGuards, true);
 538       translateWork(dryArgs);
 539     }
 540   } else {
 541     translateWork(args);
 542   }
 543
 544   if (args.m_setFuncBody) {
 545     func->setFuncBody(start);
 546   }
 547   SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
 548           getTopTranslation(args.m_sk), start);
 549   return start;
 550 }
 551
 552 TCA
 553 MCGenerator::getCallArrayPrologue(Func* func) {
 554   TCA tca = func->getFuncBody();
 555   if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 556
 557   DVFuncletsVec dvs = func->getDVFunclets();
 558
 559   if (dvs.size()) {
 560     LeaseHolder writer(Translator::WriteLease());
 561     if (!writer) return nullptr;
 562     tca = func->getFuncBody();
 563     if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 564     switch (arch()) {
 565       case Arch::X64:
 566         tca = X64::emitCallArrayPrologue(func, dvs);
 567         break;
 568       case Arch::ARM:
 569         tca = ARM::emitCallArrayPrologue(func, dvs);
 570         break;
 571     }
 572     func->setFuncBody(tca);
 573   } else {
 574     SrcKey sk(func, func->base(), false);
 575     tca = mcg->getTranslation(TranslArgs(sk, false).setFuncBody());
 576   }
 577
 578   return tca;
 579 }
 580
 581 void
 582 MCGenerator::smashPrologueGuards(TCA* prologues, int numPrologues,
 583                                  const Func* func) {
 584   DEBUG_ONLY std::unique_ptr<LeaseHolder> writer;
 585   for (int i = 0; i < numPrologues; i++) {
 586     if (prologues[i] != m_tx.uniqueStubs.fcallHelperThunk
 587         && funcPrologueHasGuard(prologues[i], func)) {
 588       if (debug) {
 589         /*
 590          * Unit's are sometimes created racily, in which case all
 591          * but the first are destroyed immediately. In that case,
 592          * the Funcs of the destroyed Units never need their
 593          * prologues smashing, and it would be a lock rank violation
 594          * to take the write lease here.
 595          * In all other cases, Funcs are destroyed via a delayed path
 596          * (treadmill) and the rank violation isn't an issue.
 597          *
 598          * Also note that we only need the write lease because we
 599          * mprotect the translation cache in debug builds.
 600          */
 601         if (!writer) {
 602           writer.reset(new LeaseHolder(Translator::WriteLease(),
 603                        LeaseAcquire::BLOCKING));
 604         }
 605       }
 606       switch (arch()) {
 607         case Arch::X64:
 608           X64::funcPrologueSmashGuard(prologues[i], func);
 609           break;
 610         case Arch::ARM:
 611           ARM::funcPrologueSmashGuard(prologues[i], func);
 612           break;
 613       }
 614     }
 615   }
 616 }
 617
 618 /*
 619  * funcPrologue --
 620  *
 621  * Given a callee and a number of args, match up to the callee's
 622  * argument expectations and dispatch.
 623  *
 624  * Call/return hand-shaking is a bit funny initially. At translation time,
 625  * we don't necessarily know what function we're calling. For instance,
 626  *
 627  *   f(g());
 628  *
 629  * Will lead to a set of basic blocks like:
 630  *
 631  * b1: pushfuncd "f"
 632  *     pushfuncd "g"
 633  *     fcall
 634  * b2: fcall
 635  *
 636  * The fcallc labelled "b2" above is not statically bindable in our
 637  * execution model.
 638  *
 639  * We decouple the call work into a per-callsite portion, responsible
 640  * for recording the return address, and a per-(callee, numArgs) portion,
 641  * responsible for fixing up arguments and dispatching to remaining
 642  * code. We call the per-callee portion a "prologue."
 643  *
 644  * Also, we are called from two distinct environments. From REQ_BIND_CALL,
 645  * we're running "between" basic blocks, with all VM registers sync'ed.
 646  * However, we're also called in the middle of basic blocks, when dropping
 647  * entries into func->m_prologues. So don't go around using the
 648  * translation-time values of vmfp()/vmsp(), since they have an
 649  * unpredictable relationship to the source.
 650  */
 651 bool
 652 MCGenerator::checkCachedPrologue(const Func* func, int paramIdx,
 653                                  TCA& prologue) const {
 654   prologue = (TCA)func->getPrologue(paramIdx);
 655   if (prologue != m_tx.uniqueStubs.fcallHelperThunk) {
 656     TRACE(1, "cached prologue %s(%d) -> cached %p\n",
 657           func->fullName()->data(), paramIdx, prologue);
 658     assert(isValidCodeAddress(prologue));
 659     return true;
 660   }
 661   return false;
 662 }
 663
 664 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
 665   assert(tl_regState == VMRegState::DIRTY);
 666   tl_regState = VMRegState::CLEAN;
 667   vmfp() = (Cell*)ar;
 668   vmsp() = sp;
 669   vmpc() = ar->unit()->at(pcOff);
 670 }
 671
 672 TCA
 673 MCGenerator::getFuncPrologue(Func* func, int nPassed, ActRec* ar) {
 674   func->validate();
 675   TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
 676   int const numParams = func->numNonVariadicParams();
 677   int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
 678
 679   bool const funcIsMagic = func->isMagic();
 680
 681   // Do a quick test before grabbing the write lease
 682   TCA prologue;
 683   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 684
 685   Offset entry = func->getEntryForNumArgs(nPassed);
 686   SrcKey funcBody(func, entry, false);
 687
 688   if (func->isClonedClosure()) {
 689     assert(ar);
 690     interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
 691     TCA tca = getTranslation(TranslArgs(funcBody, false));
 692     tl_regState = VMRegState::DIRTY;
 693     if (tca) {
 694       // racy, but ok...
 695       func->setPrologue(paramIndex, tca);
 696     }
 697     return tca;
 698   }
 699
 700   LeaseHolder writer(Translator::WriteLease());
 701   if (!writer || !shouldTranslate()) return nullptr;
 702
 703   // Double check the prologue array now that we have the write lease
 704   // in case another thread snuck in and set the prologue already.
 705   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 706
 707   // We're comming from a BIND_CALL service request, so enable
 708   // profiling if we haven't optimized the function entry yet.
 709   assert(m_tx.mode() == TransInvalid || m_tx.mode() == TransPrologue);
 710   if (m_tx.mode() == TransInvalid && profilePrologue(funcBody)) {
 711     m_tx.setMode(TransProflogue);
 712   } else {
 713     m_tx.setMode(TransPrologue);
 714   }
 715   SCOPE_EXIT{ m_tx.setMode(TransInvalid); };
 716
 717   CodeCache::Selector asmSel(CodeCache::Selector::Args(code)
 718                              .profile(m_tx.mode() == TransProflogue)
 719                              .hot(func->attrs() & AttrHot));
 720
 721   // If we're close to a cache line boundary, just burn some space to
 722   // try to keep the func and its body on fewer total lines.
 723   if (((uintptr_t)code.main().frontier() & kX64CacheLineMask) >= 32) {
 724     X64::moveToAlign(code.main(), kX64CacheLineSize);
 725   }
 726
 727   // Careful: this isn't necessarily the real entry point. For funcIsMagic
 728   // prologues, this is just a possible prologue.
 729   TCA aStart    = code.main().frontier();
 730   TCA start     = aStart;
 731   TCA stubStart = code.stubs().frontier();
 732
 733   auto const skFuncBody = [&] {
 734     switch (arch()) {
 735     case Arch::X64:
 736       return funcIsMagic
 737         ? X64::emitMagicFuncPrologue(func, nPassed, start)
 738         : X64::emitFuncPrologue(func, nPassed, start);
 739     case Arch::ARM:
 740       return ARM::emitFuncPrologue(
 741         code.main(), code.stubs(), func, funcIsMagic, nPassed, start, aStart
 742       );
 743     }
 744     not_reached();
 745   }();
 746
 747   assert(funcPrologueHasGuard(start, func));
 748   TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
 749         this, func->fullName()->data(), nPassed, start);
 750   assert(isValidCodeAddress(start));
 751   func->setPrologue(paramIndex, start);
 752
 753   assert(m_tx.mode() == TransPrologue || m_tx.mode() == TransProflogue);
 754   TransRec tr(skFuncBody, func->unit()->md5(),
 755               m_tx.mode(), aStart, code.main().frontier() - aStart,
 756               stubStart, code.stubs().frontier() - stubStart);
 757   m_tx.addTranslation(tr);
 758   if (RuntimeOption::EvalJitUseVtuneAPI) {
 759     reportTraceletToVtune(func->unit(), func, tr);
 760   }
 761
 762   if (m_tx.profData()) {
 763     m_tx.profData()->addTransPrologue(m_tx.mode(), skFuncBody, paramIndex);
 764   }
 765
 766   recordGdbTranslation(skFuncBody, func,
 767                        code.main(), aStart,
 768                        false, true);
 769   recordBCInstr(OpFuncPrologue, code.main(), start);
 770
 771   return start;
 772 }
 773
 774 /**
 775  * Given the proflogueTransId for a TransProflogue translation,
 776  * regenerate the prologue (as a TransPrologue).  Returns the starting
 777  * address for the translation corresponding to triggerSk, if such
 778  * translation is generated; otherwise returns nullptr.
 779  */
 780 TCA MCGenerator::regeneratePrologue(TransID prologueTransId,
 781                                     SrcKey triggerSk) {
 782   Func* func = m_tx.profData()->transFunc(prologueTransId);
 783   int  nArgs = m_tx.profData()->prologueArgs(prologueTransId);
 784
 785   // Regenerate the prologue.
 786   func->resetPrologue(nArgs);
 787   m_tx.setMode(TransPrologue);
 788   SCOPE_EXIT { m_tx.setMode(TransInvalid); };
 789   TCA start = getFuncPrologue(func, nArgs);
 790   func->setPrologue(nArgs, start);
 791
 792   // Smash callers of the old prologue with the address of the new one.
 793   PrologueCallersRec* pcr =
 794     m_tx.profData()->prologueCallers(prologueTransId);
 795   for (TCA toSmash : pcr->mainCallers()) {
 796     smashCall(toSmash, start);
 797   }
 798   // If the prologue has a guard, then smash its guard-callers as well.
 799   if (funcPrologueHasGuard(start, func)) {
 800     TCA guard = funcPrologueToGuard(start, func);
 801     for (TCA toSmash : pcr->guardCallers()) {
 802       smashCall(toSmash, guard);
 803     }
 804   }
 805   pcr->clearAllCallers();
 806
 807   // If this prologue has a DV funclet, then generate a translation
 808   // for the DV funclet right after the prologue.  However, skip
 809   // cloned closures because their prologues are actually the DV
 810   // funclets already.
 811   TCA triggerSkStart = nullptr;
 812   if (nArgs < func->numNonVariadicParams() && !func->isClonedClosure()) {
 813     auto paramInfo = func->params()[nArgs];
 814     if (paramInfo.hasDefaultValue()) {
 815       m_tx.setMode(TransOptimize);
 816       SrcKey  funcletSK(func, paramInfo.funcletOff(), false);
 817       TransID funcletTransId = m_tx.profData()->dvFuncletTransId(func, nArgs);
 818       if (funcletTransId != InvalidID) {
 819         invalidateSrcKey(funcletSK);
 820         TCA dvStart = translate(TranslArgs(funcletSK, false).
 821                                 transId(funcletTransId));
 822         if (dvStart && !triggerSkStart && funcletSK == triggerSk) {
 823           triggerSkStart = dvStart;
 824         }
 825         // Flag that this translation has been retranslated, so that
 826         // it's not retranslated again along with the function body.
 827         m_tx.profData()->setOptimized(funcletSK);
 828       }
 829     }
 830   }
 831
 832   return triggerSkStart;
 833 }
 834
 835 /**
 836  * Regenerate all prologues of func that were previously generated.
 837  * The prologues are sorted in ascending order of profile counters.
 838  * For prologues with corresponding DV funclets, their corresponding
 839  * DV funclet will be regenerated right after them.  The idea is to
 840  * generate the function body right after calling this function, so
 841  * that all prologues are placed right before it, and with the hottest
 842  * prologues closer to it.
 843  *
 844  * Returns the starting address for the translation corresponding to
 845  * triggerSk, if such translation is generated; otherwise returns
 846  * nullptr.
 847  */
 848 TCA MCGenerator::regeneratePrologues(Func* func, SrcKey triggerSk) {
 849   TCA triggerStart = nullptr;
 850   std::vector<TransID> prologTransIDs;
 851
 852   auto const limit = func->numNonVariadicParams() + 1;
 853   for (int nArgs = 0; nArgs <= limit; nArgs++) {
 854     TransID tid = m_tx.profData()->prologueTransId(func, nArgs);
 855     if (tid != InvalidID) {
 856       prologTransIDs.push_back(tid);
 857     }
 858   }
 859
 860   std::sort(prologTransIDs.begin(), prologTransIDs.end(),
 861           [&](TransID t1, TransID t2) -> bool {
 862             // This will sort in ascending order. Note that transCounters start
 863             // at JitPGOThreshold and count down.
 864             return m_tx.profData()->transCounter(t1) >
 865                    m_tx.profData()->transCounter(t2);
 866           });
 867
 868   for (TransID tid : prologTransIDs) {
 869     TCA start = regeneratePrologue(tid, triggerSk);
 870     if (triggerStart == nullptr && start != nullptr) {
 871       triggerStart = start;
 872     }
 873   }
 874
 875   return triggerStart;
 876 }
 877
 878 /*
 879  * bindJmp --
 880  *
 881  *   Runtime service handler that patches a jmp to the translation of
 882  *   u:dest from toSmash.
 883  */
 884 TCA
 885 MCGenerator::bindJmp(TCA toSmash, SrcKey destSk,
 886                      ServiceRequest req, bool& smashed) {
 887   TCA tDest = getTranslation(TranslArgs(destSk, false));
 888   if (!tDest) return nullptr;
 889   LeaseHolder writer(Translator::WriteLease());
 890   if (!writer) return tDest;
 891   SrcRec* sr = m_tx.getSrcRec(destSk);
 892   // The top translation may have changed while we waited for the
 893   // write lease, so read it again.  If it was replaced with a new
 894   // translation, then bind to the new one.  If it was invalidated,
 895   // then don't bind the jump.
 896   tDest = sr->getTopTranslation();
 897   if (tDest == nullptr) return nullptr;
 898
 899   if (req == REQ_BIND_ADDR) {
 900     auto addr = reinterpret_cast<TCA*>(toSmash);
 901     if (*addr == tDest) {
 902       // Already smashed
 903       return tDest;
 904     }
 905     sr->chainFrom(IncomingBranch::addr(addr));
 906   } else if (req == REQ_BIND_JCC || req == REQ_BIND_SIDE_EXIT) {
 907     auto jt = jccTarget(toSmash);
 908     assert(jt);
 909     if (jt == tDest) {
 910       // Already smashed
 911       return tDest;
 912     }
 913     sr->chainFrom(IncomingBranch::jccFrom(toSmash));
 914   } else {
 915     assert(!jccTarget(toSmash));
 916     if (!jmpTarget(toSmash) || jmpTarget(toSmash) == tDest) {
 917       // Already smashed
 918       return tDest;
 919     }
 920     sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
 921   }
 922   smashed = true;
 923   return tDest;
 924 }
 925
 926 /*
 927  * When we end a tracelet with a conditional jump, emitCondJmp first emits:
 928  *
 929  *   1:         j<CC> stubJmpccFirst
 930  *              jmp   stubJmpccFirst
 931  *
 932  * Our "taken" argument tells us whether the branch at 1: was taken or
 933  * not; and therefore which of offTaken and offNotTaken to continue executing.
 934  * If we did take the branch, we now rewrite the code so that the branch is
 935  * straightened. This predicts that subsequent executions will go the same way
 936  * as the first execution.
 937  *
 938  *              jn<CC> stubJmpccSecond:offNotTaken
 939  *              nop5   ; fallthru, or jmp if there's already a translation.
 940  * offTaken:
 941  *
 942  * If we did not take the branch, we leave the sense of the condition
 943  * intact, while patching it up to go to the unexplored code:
 944  *
 945  *              j<CC> stubJmpccSecond:offTaken
 946  *              nop5
 947  * offNotTaken:
 948  */
 949 TCA
 950 MCGenerator::bindJmpccFirst(TCA toSmash,
 951                             Offset offTaken, Offset offNotTaken,
 952                             bool taken,
 953                             ConditionCode cc,
 954                             bool& smashed) {
 955   const Func* f = liveFunc();
 956   LeaseHolder writer(Translator::WriteLease());
 957   if (!writer) return nullptr;
 958   Offset offWillExplore = taken ? offTaken : offNotTaken;
 959   Offset offWillDefer = taken ? offNotTaken : offTaken;
 960   SrcKey dest(f, offWillExplore, liveResumed());
 961   TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
 962         "taken %d\n",
 963         offWillExplore, offWillDefer, cc, taken);
 964
 965   // We want the branch to point to whichever side has not been explored
 966   // yet.
 967   if (taken) {
 968     cc = ccNegate(cc);
 969   }
 970
 971   auto& cb = code.blockFor(toSmash);
 972   Asm as { cb };
 973   // Its not clear where chainFrom should go to if as is astubs
 974   assert(&cb != &code.stubs());
 975
 976   using namespace X64;
 977
 978   // can we just directly fall through?
 979   // a jmp + jz takes 5 + 6 = 11 bytes
 980   bool fallThru = toSmash + kJmpccLen + kJmpLen == cb.frontier() &&
 981     !m_tx.getSrcDB().find(dest);
 982
 983   TCA tDest;
 984   tDest = getTranslation(TranslArgs(dest, !fallThru));
 985   if (!tDest) {
 986     return 0;
 987   }
 988
 989   if (jmpTarget(toSmash + kJmpccLen) != jccTarget(toSmash)) {
 990     // someone else already smashed this one. Ideally we would
 991     // just re-execute from toSmash - except the flags will have
 992     // been trashed.
 993     return tDest;
 994   }
 995
 996   TCA stub = emitEphemeralServiceReq(code.stubs(), getFreeStub(),
 997                                      REQ_BIND_JMPCC_SECOND, toSmash,
 998                                      offWillDefer, cc);
 999
1000   smashed = true;
1001   assert(Translator::WriteLease().amOwner());
1002   /*
1003    * Roll over the jcc and the jmp/fallthru. E.g., from:
1004    *
1005    *     toSmash:    jcc   <jmpccFirstStub>
1006    *     toSmash+6:  jmp   <jmpccFirstStub>
1007    *     toSmash+11: <probably the new translation == tdest>
1008    *
1009    * to:
1010    *
1011    *     toSmash:    j[n]z <jmpccSecondStub>
1012    *     toSmash+6:  nop5
1013    *     toSmash+11: newHotness
1014    */
1015   CodeCursor cg(cb, toSmash);
1016   as.jcc(cc, stub);
1017   m_tx.getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(cb.frontier()));
1018   TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
1019   return tDest;
1020 }
1021
1022 // smashes a jcc to point to a new destination
1023 TCA
1024 MCGenerator::bindJmpccSecond(TCA toSmash, const Offset off,
1025                              ConditionCode cc, bool& smashed) {
1026   const Func* f = liveFunc();
1027   SrcKey dest(f, off, liveResumed());
1028   TCA branch = getTranslation(TranslArgs(dest, true));
1029   if (branch) {
1030     LeaseHolder writer(Translator::WriteLease());
1031     if (writer) {
1032       if (branch == jccTarget(toSmash)) {
1033         // already smashed
1034         return branch;
1035       } else {
1036         smashed = true;
1037         SrcRec* destRec = m_tx.getSrcRec(dest);
1038         destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
1039       }
1040     }
1041   }
1042   return branch;
1043 }
1044
1045 void MCGenerator::emitResolvedDeps(const ChangeMap& resolvedDeps) {
1046   for (const auto dep : resolvedDeps) {
1047     m_tx.irTrans()->assertType(dep.first, dep.second->rtt);
1048   }
1049 }
1050
1051 void
1052 MCGenerator::checkRefs(SrcKey sk,
1053                        const RefDeps& refDeps,
1054                        SrcRec& fail) {
1055   if (refDeps.size() == 0) {
1056     return;
1057   }
1058
1059   // Set up guards for each pushed ActRec that we've made reffiness
1060   // assumptions about
1061   for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
1062        it != refDeps.m_arMap.end(); ++it) {
1063     // Be careful! The actual Func might have fewer refs than the number
1064     // of args we're passing. To forestall this, we always prepare at
1065     // least 64 bits in the Func, and always fill out the refBitVec
1066     // to a multiple of 64 bits
1067
1068     int entryArDelta = it->first;
1069
1070     m_tx.irTrans()->hhbcTrans().guardRefs(entryArDelta,
1071                                           it->second.m_mask,
1072                                           it->second.m_vals);
1073   }
1074 }
1075
1076 namespace {
1077 class FreeRequestStubTrigger {
1078   TCA m_stub;
1079  public:
1080   explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
1081     TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
1082   }
1083   void operator()() {
1084     TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
1085     if (mcg->freeRequestStub(m_stub) != true) {
1086       // If we can't free the stub, enqueue again to retry.
1087       TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
1088       Treadmill::enqueue(FreeRequestStubTrigger(m_stub));
1089     }
1090   }
1091 };
1092 }
1093
1094 #ifdef DEBUG
1095
1096 struct DepthGuard {
1097   static __thread int m_depth;
1098   DepthGuard()  { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
1099   ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
1100
1101   bool depthOne() const { return m_depth == 1; }
1102 };
1103 __thread int DepthGuard::m_depth;
1104
1105 #else
1106
1107 struct DepthGuard { bool depthOne() const { return false; } };
1108
1109 #endif
1110
1111 /*
1112  * enterTCHelper does not save callee-saved registers except %rbp. This means
1113  * when we call it from C++, we have to tell gcc to clobber all the other
1114  * callee-saved registers.
1115  */
1116 #if defined(__x86_64__)
1117 #  define CALLEE_SAVED_BARRIER() \
1118   asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
1119 #elif defined(__AARCH64EL__)
1120 #  define CALLEE_SAVED_BARRIER() \
1121   asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
1122                "x26", "x27", "x28")
1123 #else
1124 #  error What are the callee-saved registers on your system?
1125 #endif
1126
1127 /*
1128  * enterTCHelper is a handwritten assembly function that transfers control in
1129  * and out of the TC.
1130  */
1131 static_assert(X64::rVmSp == rbx &&
1132               X64::rVmFp == rbp &&
1133               X64::rVmTl == r12 &&
1134               X64::rStashedAR == r15,
1135               "__enterTCHelper needs to be modified to use the correct ABI");
1136 static_assert(REQ_BIND_CALL == 0x1,
1137               "Update assembly test for REQ_BIND_CALL in __enterTCHelper");
1138 extern "C" void enterTCHelper(Cell* vm_sp,
1139                               Cell* vm_fp,
1140                               TCA start,
1141                               TReqInfo* infoPtr,
1142                               ActRec* firstAR,
1143                               void* targetCacheBase);
1144
1145 /*
1146  * A partial equivalent of enterTCHelper, used to set up the ARM simulator.
1147  */
1148 uintptr_t setupSimRegsAndStack(vixl::Simulator& sim,
1149                                uintptr_t saved_rStashedAr) {
1150   sim.   set_xreg(ARM::rGContextReg.code(), g_context.getNoCheck());
1151   sim.   set_xreg(ARM::rVmFp.code(), vmfp());
1152   sim.   set_xreg(ARM::rVmSp.code(), vmsp());
1153   sim.   set_xreg(ARM::rVmTl.code(), RDS::tl_base);
1154   sim.   set_xreg(ARM::rStashedAR.code(), saved_rStashedAr);
1155
1156   // Leave space for register spilling and MInstrState.
1157   sim.   set_sp(sim.sp() - kReservedRSPTotalSpace);
1158   assert(sim.is_on_stack(reinterpret_cast<void*>(sim.sp())));
1159
1160   auto spOnEntry = sim.sp();
1161
1162   // Push the link register onto the stack. The link register is
1163   // technically caller-saved; what this means in practice is that
1164   // non-leaf functions push it at the very beginning and pop it just
1165   // before returning (as opposed to just saving it around calls).
1166   sim.   set_sp(sim.sp() - 16);
1167   *reinterpret_cast<uint64_t*>(sim.sp()) = sim.lr();
1168
1169   return spOnEntry;
1170 }
1171
1172
1173 struct TReqInfo {
1174   uintptr_t requestNum;
1175   uintptr_t args[5];
1176
1177   // Some TC registers need to be preserved across service requests.
1178   uintptr_t saved_rStashedAr;
1179
1180   // Stub addresses are passed back to allow us to recycle used stubs.
1181   TCA stubAddr;
1182 };
1183
1184
1185 void
1186 MCGenerator::enterTC(TCA start, void* data) {
1187   if (debug) {
1188     fflush(stdout);
1189     fflush(stderr);
1190   }
1191   DepthGuard d;
1192   TReqInfo info;
1193   SrcKey sk;
1194
1195   if (LIKELY(start != nullptr)) {
1196     info.requestNum = data ? REQ_BIND_CALL : -1;
1197     info.saved_rStashedAr = (uintptr_t)data;
1198   } else {
1199     info.requestNum = -1;
1200     info.saved_rStashedAr = 0;
1201     sk = *(SrcKey*)data;
1202     start = getTranslation(TranslArgs(sk, true));
1203   }
1204   for (;;) {
1205     assert(sizeof(Cell) == 16);
1206     assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1207     assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1208
1209     Translator::WriteLease().gremlinUnlock();
1210     // Keep dispatching until we end up somewhere the translator
1211     // recognizes, or we luck out and the leaseholder exits.
1212     while (!start) {
1213       TRACE(2, "enterTC forwarding BB to interpreter\n");
1214       g_context->m_pc = sk.unit()->at(sk.offset());
1215       INC_TPC(interp_bb);
1216       g_context->dispatchBB();
1217       PC newPc = g_context->getPC();
1218       if (!newPc) { g_context->m_fp = 0; return; }
1219       sk = SrcKey(liveFunc(), newPc, liveResumed());
1220       start = getTranslation(TranslArgs(sk, true));
1221     }
1222     assert(start == m_tx.uniqueStubs.funcBodyHelperThunk ||
1223            isValidCodeAddress(start) ||
1224            (start == m_tx.uniqueStubs.fcallHelperThunk &&
1225             info.saved_rStashedAr == (uintptr_t)data));
1226     assert(!Translator::WriteLease().amOwner());
1227     const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
1228     func->validate();
1229     INC_TPC(enter_tc);
1230
1231     TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
1232           vmfp(), func->name()->data(), vmsp());
1233     tl_regState = VMRegState::DIRTY;
1234
1235     if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1236       auto skData = sk.valid() ? sk.toAtomicInt() : uint64_t(-1LL);
1237       Trace::ringbufferEntry(RBTypeEnterTC, skData, (uint64_t)start);
1238     }
1239
1240     switch (arch()) {
1241       case Arch::X64: {
1242         // We have to force C++ to spill anything that might be in a
1243         // callee-saved register (aside from rbp). enterTCHelper does not save
1244         // them.
1245         CALLEE_SAVED_BARRIER();
1246         enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
1247                       RDS::tl_base);
1248         CALLEE_SAVED_BARRIER();
1249         break;
1250       }
1251       case Arch::ARM: {
1252         // This is a pseudo-copy of the logic in enterTCHelper: it sets up the
1253         // simulator's registers and stack, runs the translation, and gets the
1254         // necessary information out of the registers when it's done.
1255
1256         vixl::PrintDisassembler disasm(std::cout);
1257         vixl::Decoder decoder;
1258         if (getenv("ARM_DISASM")) {
1259           decoder.AppendVisitor(&disasm);
1260         }
1261         vixl::Simulator sim(&decoder, std::cout);
1262         SCOPE_EXIT {
1263           Stats::inc(Stats::vixl_SimulatedInstr, sim.instr_count());
1264           Stats::inc(Stats::vixl_SimulatedLoad, sim.load_count());
1265           Stats::inc(Stats::vixl_SimulatedStore, sim.store_count());
1266         };
1267
1268         sim.set_exception_hook(ARM::simulatorExceptionHook);
1269
1270         g_context->m_activeSims.push_back(&sim);
1271         SCOPE_EXIT { g_context->m_activeSims.pop_back(); };
1272
1273         DEBUG_ONLY auto spOnEntry =
1274           setupSimRegsAndStack(sim, info.saved_rStashedAr);
1275
1276         // The handshake is different in the case of REQ_BIND_CALL. The code
1277         // we're jumping to expects to find a return address in x30, and a saved
1278         // return address on the stack.
1279         if (info.requestNum == REQ_BIND_CALL) {
1280           // Put the call's return address in the link register.
1281           auto* ar = reinterpret_cast<ActRec*>(info.saved_rStashedAr);
1282           sim.set_lr(ar->m_savedRip);
1283         }
1284
1285         std::cout.flush();
1286         sim.RunFrom(vixl::Instruction::Cast(start));
1287         std::cout.flush();
1288
1289         assert(sim.sp() == spOnEntry);
1290
1291         info.requestNum = sim.xreg(0);
1292         info.args[0] = sim.xreg(1);
1293         info.args[1] = sim.xreg(2);
1294         info.args[2] = sim.xreg(3);
1295         info.args[3] = sim.xreg(4);
1296         info.args[4] = sim.xreg(5);
1297         info.saved_rStashedAr = sim.xreg(ARM::rStashedAR.code());
1298
1299         info.stubAddr = reinterpret_cast<TCA>(sim.xreg(ARM::rAsm.code()));
1300         break;
1301       }
1302     }
1303
1304     assert(g_context->m_stack.isValidAddress((uintptr_t)vmsp()));
1305
1306     tl_regState = VMRegState::CLEAN; // Careful: pc isn't sync'ed yet.
1307     TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
1308           vmfp(), vmsp());
1309
1310     if (debug) {
1311       // Debugging code: cede the write lease half the time.
1312       if (RuntimeOption::EvalJitStressLease) {
1313         if (d.depthOne() && (rand() % 2) == 0) {
1314           Translator::WriteLease().gremlinLock();
1315         }
1316       }
1317       // Ensure that each case either returns, or drives start to a valid
1318       // value.
1319       start = TCA(0xbee5face);
1320     }
1321
1322     TRACE(2, "enterTC: request(%s) args: %" PRIxPTR " %" PRIxPTR " %"
1323              PRIxPTR " %" PRIxPTR " %" PRIxPTR "\n",
1324           serviceReqName(info.requestNum),
1325           info.args[0], info.args[1], info.args[2], info.args[3],
1326           info.args[4]);
1327
1328     if (LIKELY(info.requestNum == REQ_EXIT)) {
1329       vmfp() = nullptr;
1330       return;
1331     }
1332     if (!handleServiceRequest(info, start, sk)) return;
1333   }
1334 }
1335
1336 /*
1337  * The contract is that each case will set sk to the place where
1338  * execution should resume, and optionally set start to the hardware
1339  * translation of the resumption point (or otherwise set it to null).
1340  * Returns false if we need to halt this nesting of the VM.
1341  *
1342  * start and sk might be subtly different; i.e., there are cases where
1343  * start != NULL && start != getTranslation(sk). For instance,
1344  * REQ_BIND_CALL has not finished executing the OpCall when it gets
1345  * here, and has even done some work on its behalf. sk == OpFCall,
1346  * while start == the point in the TC that's "half-way through" the
1347  * Call instruction. If we punt to the interpreter, the interpreter
1348  * will redo some of the work that the translator has already done.
1349  */
1350 bool MCGenerator::handleServiceRequest(TReqInfo& info,
1351                                        TCA& start,
1352                                        SrcKey& sk) {
1353   const ServiceRequest requestNum =
1354     static_cast<ServiceRequest>(info.requestNum);
1355   auto* const args = info.args;
1356   assert(requestNum != REQ_EXIT);
1357   INC_TPC(service_req);
1358
1359   bool smashed = false;
1360   switch (requestNum) {
1361   case REQ_BIND_CALL: {
1362     ReqBindCall* req = reinterpret_cast<ReqBindCall*>(args[0]);
1363     ActRec* calleeFrame = reinterpret_cast<ActRec*>(args[1]);
1364     TCA toSmash = req->m_toSmash;
1365     Func *func = const_cast<Func*>(calleeFrame->m_func);
1366     int nArgs = req->m_nArgs;
1367     bool isImmutable = req->m_isImmutable;
1368     TRACE(2, "enterTC: bindCall %s, ActRec %p\n",
1369           func->fullName()->data(), calleeFrame);
1370     TCA dest = getFuncPrologue(func, nArgs);
1371     TRACE(2, "enterTC: bindCall -> %p\n", dest);
1372     if (!isImmutable) {
1373       // We dont know we're calling the right function, so adjust
1374       // dest to point to the dynamic check of ar->m_func.
1375       dest = funcPrologueToGuard(dest, func);
1376     } else {
1377       TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
1378             func->fullName()->data(), dest);
1379     }
1380     if (dest) {
1381       LeaseHolder writer(Translator::WriteLease());
1382       if (writer) {
1383         // Someone else may have changed the func prologue while we
1384         // waited for the write lease, so read it again.
1385         dest = getFuncPrologue(func, nArgs);
1386         assert(dest);
1387         if (!isImmutable) dest = funcPrologueToGuard(dest, func);
1388
1389         if (callTarget(toSmash) != dest) {
1390           TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
1391           smashCall(toSmash, dest);
1392           smashed = true;
1393           // For functions to be PGO'ed, if their current prologues
1394           // are still profiling ones (living in code.prof()), then
1395           // save toSmash as a caller to the prologue, so that it can
1396           // later be smashed to call a new prologue when it's generated.
1397           int calleeNumParams = func->numNonVariadicParams();
1398           int calledPrologNumArgs = (nArgs <= calleeNumParams ?
1399                                      nArgs :  calleeNumParams + 1);
1400           if (code.prof().contains(dest)) {
1401             if (isImmutable) {
1402               m_tx.profData()->addPrologueMainCaller(func, calledPrologNumArgs,
1403                                                      toSmash);
1404             } else {
1405               m_tx.profData()->addPrologueGuardCaller(func, calledPrologNumArgs,
1406                                                       toSmash);
1407             }
1408           }
1409         }
1410       }
1411       // sk: stale, but doesn't matter since we have a valid dest TCA.
1412     } else {
1413       // We need translator help; we're not at the callee yet, so
1414       // roll back. The prelude has done some work already, but it
1415       // should be safe to redo.
1416       TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
1417             toSmash, dest);
1418       sk = req->m_sourceInstr;
1419
1420       // EnterTCHelper pushes the return ip onto the stack when the
1421       // requestNum is REQ_BIND_CALL, but if start is NULL, it will
1422       // interpret in doFCall, so we clear out the requestNum in this
1423       // case to prevent enterTCHelper from pushing the return ip
1424       // onto the stack.
1425       info.requestNum = ~REQ_BIND_CALL;
1426     }
1427     start = dest;
1428   } break;
1429
1430   case REQ_BIND_SIDE_EXIT:
1431   case REQ_BIND_JMP:
1432   case REQ_BIND_JCC:
1433   case REQ_BIND_ADDR:
1434   {
1435     TCA toSmash = (TCA)args[0];
1436     Offset off = args[1];
1437     sk = SrcKey(liveFunc(), off, liveResumed());
1438     if (requestNum == REQ_BIND_SIDE_EXIT) {
1439       SKTRACE(3, sk, "side exit taken!\n");
1440     }
1441     start = bindJmp(toSmash, sk, requestNum, smashed);
1442   } break;
1443
1444   case REQ_BIND_JMPCC_FIRST: {
1445     TCA toSmash = (TCA)args[0];
1446     Offset offTaken = (Offset)args[1];
1447     Offset offNotTaken = (Offset)args[2];
1448     ConditionCode cc = ConditionCode(args[3]);
1449     bool taken = int64_t(args[4]) & 1;
1450     start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
1451                            taken, cc, smashed);
1452     // SrcKey: we basically need to emulate the fail
1453     sk = SrcKey(liveFunc(), taken ? offTaken : offNotTaken, liveResumed());
1454   } break;
1455
1456   case REQ_BIND_JMPCC_SECOND: {
1457     TCA toSmash = (TCA)args[0];
1458     Offset off = (Offset)args[1];
1459     ConditionCode cc = ConditionCode(args[2]);
1460     start = bindJmpccSecond(toSmash, off, cc, smashed);
1461     sk = SrcKey(liveFunc(), off, liveResumed());
1462   } break;
1463
1464   case REQ_RETRANSLATE_OPT: {
1465     auto ai = (SrcKey::AtomicInt)args[0];
1466     TransID transId = (TransID)args[1];
1467     sk = SrcKey::fromAtomicInt(ai);
1468     start = retranslateOpt(transId, false);
1469     SKTRACE(2, sk, "retranslated-OPT: transId = %d  start: @%p\n", transId,
1470             start);
1471     break;
1472   }
1473
1474   case REQ_RETRANSLATE: {
1475     INC_TPC(retranslate);
1476     sk = SrcKey(liveFunc(), (Offset)args[0], liveResumed());
1477     start = retranslate(TranslArgs(sk, true));
1478     SKTRACE(2, sk, "retranslated @%p\n", start);
1479   } break;
1480
1481   case REQ_INTERPRET: {
1482     Offset off = args[0];
1483     int numInstrs = args[1];
1484     g_context->m_pc = liveUnit()->at(off);
1485     /*
1486      * We know the compilation unit has not changed; basic blocks do
1487      * not span files. I claim even exceptions do not violate this
1488      * axiom.
1489      */
1490     assert(numInstrs >= 0);
1491     SKTRACE(5, SrcKey(liveFunc(), off, liveResumed()), "interp: enter\n");
1492     if (numInstrs) {
1493       s_perfCounters[tpc_interp_instr] += numInstrs;
1494       g_context->dispatchN(numInstrs);
1495     } else {
1496       // numInstrs == 0 means it wants to dispatch until BB ends
1497       INC_TPC(interp_bb);
1498       g_context->dispatchBB();
1499     }
1500     PC newPc = g_context->getPC();
1501     if (!newPc) { g_context->m_fp = 0; return false; }
1502     SrcKey newSk(liveFunc(), newPc, liveResumed());
1503     SKTRACE(5, newSk, "interp: exit\n");
1504     sk = newSk;
1505     start = getTranslation(TranslArgs(newSk, true));
1506   } break;
1507
1508   case REQ_POST_INTERP_RET: {
1509     // This is only responsible for the control-flow aspect of the Ret:
1510     // getting to the destination's translation, if any.
1511     ActRec* ar = (ActRec*)args[0];
1512     ActRec* caller = (ActRec*)args[1];
1513     assert((Cell*) caller == vmfp());
1514     Unit* destUnit = caller->m_func->unit();
1515     // Set PC so logging code in getTranslation doesn't get confused.
1516     vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
1517     SrcKey dest(caller->func(), vmpc(), caller->resumed());
1518     sk = dest;
1519     start = getTranslation(TranslArgs(dest, true));
1520     TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1521           ar->m_func->fullName()->data(),
1522           caller->m_func->fullName()->data());
1523   } break;
1524
1525   case REQ_RESUME: {
1526     if (UNLIKELY(vmpc() == 0)) {
1527       g_context->m_fp = 0;
1528       return false;
1529     }
1530     SrcKey dest(liveFunc(), vmpc(), liveResumed());
1531     sk = dest;
1532     start = getTranslation(TranslArgs(dest, true));
1533   } break;
1534
1535   case REQ_STACK_OVERFLOW:
1536     if (((ActRec*)info.saved_rStashedAr)->m_savedRbp == (uintptr_t)vmfp()) {
1537       /*
1538        * The normal case - we were called via FCall, or FCallArray.
1539        * We need to construct the pc of the fcall from the return
1540        * address (which will be after the fcall). Because fcall is
1541        * a variable length instruction, and because we sometimes
1542        * delete instructions from the instruction stream, we
1543        * need to use fpi regions to find the fcall.
1544        */
1545       const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1546         liveUnit()->offsetOf(vmpc()));
1547       vmpc() = liveUnit()->at(fe->m_fcallOff);
1548       assert(isFCallStar(*reinterpret_cast<const Op*>(vmpc())));
1549       raise_error("Stack overflow");
1550       NOT_REACHED();
1551     } else {
1552       /*
1553        * We were called via re-entry
1554        * Leak the params and the actrec, and tell the unwinder
1555        * that there's nothing left to do in this "entry".
1556        */
1557       vmsp() = (Cell*)((ActRec*)info.saved_rStashedAr + 1);
1558       throw VMReenterStackOverflow();
1559     }
1560
1561   case REQ_EXIT:
1562     not_reached();
1563   }
1564
1565   if (smashed && info.stubAddr) {
1566     Treadmill::enqueue(FreeRequestStubTrigger(info.stubAddr));
1567   }
1568
1569   return true;
1570 }
1571
1572 /*
1573  * Support for the stub freelist.
1574  */
1575 TCA FreeStubList::maybePop() {
1576   StubNode* ret = m_list;
1577   if (ret) {
1578     TRACE(1, "alloc stub %p\n", ret);
1579     m_list = ret->m_next;
1580     ret->m_freed = ~kStubFree;
1581   }
1582   return (TCA)ret;
1583 }
1584
1585 void FreeStubList::push(TCA stub) {
1586   /*
1587    * A freed stub may be released by Treadmill more than once if multiple
1588    * threads execute the service request before it is freed. We detect
1589    * duplicates by marking freed stubs
1590    */
1591   StubNode* n = (StubNode *)stub;
1592   if (n->m_freed == kStubFree) {
1593     TRACE(1, "already freed stub %p\n", stub);
1594     return;
1595   }
1596   n->m_freed = kStubFree;
1597   n->m_next = m_list;
1598   TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
1599   m_list = n;
1600 }
1601
1602 bool
1603 MCGenerator::freeRequestStub(TCA stub) {
1604   LeaseHolder writer(Translator::WriteLease());
1605   /*
1606    * If we can't acquire the write lock, the caller
1607    * (FreeRequestStubTrigger) retries
1608    */
1609   if (!writer) return false;
1610   assert(code.stubs().contains(stub));
1611   m_freeStubs.push(stub);
1612   return true;
1613 }
1614
1615 TCA MCGenerator::getFreeStub() {
1616   TCA ret = m_freeStubs.maybePop();
1617   if (ret) {
1618     Stats::inc(Stats::Astubs_Reused);
1619     assert(m_freeStubs.m_list == nullptr ||
1620            code.stubs().contains(TCA(m_freeStubs.m_list)));
1621     TRACE(1, "recycle stub %p\n", ret);
1622   } else {
1623     ret = code.stubs().frontier();
1624     Stats::inc(Stats::Astubs_New);
1625     TRACE(1, "alloc new stub %p\n", ret);
1626   }
1627   return ret;
1628 }
1629
1630 #define O(opcode, imm, pusph, pop, flags) \
1631 /**
1632  * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
1633  * calls into the interpreter, and then return a pointer to the
1634  * current ExecutionContext.
1635  */  \
1636 ExecutionContext*                                                       \
1637 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) {                 \
1638   interp_set_regs(ar, sp, pcOff);                                       \
1639   SKTRACE(5, SrcKey(liveFunc(), vmpc(), liveResumed()), "%40s %p %p\n", \
1640           "interpOne" #opcode " before (fp,sp)",                        \
1641           vmfp(), vmsp());                                              \
1642   assert(*reinterpret_cast<const Op*>(vmpc()) == Op::opcode);           \
1643   auto const ec = g_context.getNoCheck();                               \
1644   Stats::inc(Stats::Instr_InterpOne ## opcode);                         \
1645   if (Trace::moduleEnabled(Trace::interpOne, 1)) {                      \
1646     static const StringData* cat = makeStaticString("interpOne");       \
1647     static const StringData* name = makeStaticString(#opcode);          \
1648     Stats::incStatGrouped(cat, name, 1);                                \
1649   }                                                                     \
1650   INC_TPC(interp_one)                                                   \
1651   /* Correct for over-counting in TC-stats. */                          \
1652   Stats::inc(Stats::Instr_TC, -1);                                      \
1653   ec->op##opcode();                                                     \
1654   /*
1655    * Only set regstate back to dirty if an exception is not
1656    * propagating.  If an exception is throwing, regstate for this call
1657    * is actually still correct, and we don't have information in the
1658    * fixup map for interpOne calls anyway.
1659    */ \
1660   tl_regState = VMRegState::DIRTY;                                      \
1661   return ec;                                                            \
1662 }
1663
1664 OPCODES
1665 #undef O
1666
1667 void* interpOneEntryPoints[] = {
1668 #define O(opcode, imm, pusph, pop, flags) \
1669   (void*)(interpOne ## opcode),
1670 OPCODES
1671 #undef O
1672 };
1673
1674 TCA MCGenerator::getTranslatedCaller() const {
1675   DECLARE_FRAME_POINTER(fp);
1676   ActRec* framePtr = fp;  // can't directly mutate the register-mapped one
1677   for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
1678     TCA rip = (TCA)framePtr->m_savedRip;
1679     if (isValidCodeAddress(rip)) {
1680       return rip;
1681     }
1682   }
1683   return nullptr;
1684 }
1685
1686 void
1687 MCGenerator::syncWork() {
1688   assert(tl_regState == VMRegState::DIRTY);
1689   m_fixupMap.fixup(g_context.getNoCheck());
1690   tl_regState = VMRegState::CLEAN;
1691   Stats::inc(Stats::TC_Sync);
1692 }
1693
1694 TCA
1695 MCGenerator::emitNativeTrampoline(TCA helperAddr) {
1696   auto& trampolines = code.trampolines();
1697   if (!trampolines.canEmit(kExpectedPerTrampolineSize)) {
1698     // not enough space to emit a trampoline, so just return the
1699     // helper address and emitCall will the emit the right sequence
1700     // to call it indirectly
1701     TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
1702     always_assert(false);
1703     return helperAddr;
1704   }
1705
1706   uint32_t index = m_numNativeTrampolines++;
1707   TCA trampAddr = trampolines.frontier();
1708   if (Stats::enabled()) {
1709     emitIncStat(trampolines, &Stats::tl_helper_counters[0], index);
1710     auto name = getNativeFunctionName(helperAddr);
1711     const size_t limit = 50;
1712     if (name.size() > limit) {
1713       name[limit] = '\0';
1714     }
1715
1716     // The duped string lives until process death intentionally.
1717     Stats::helperNames[index].store(strdup(name.c_str()),
1718                                     std::memory_order_release);
1719   }
1720
1721   Asm a { trampolines };
1722   a.    jmp    (helperAddr);
1723   a.    ud2    ();
1724
1725   m_trampolineMap[helperAddr] = trampAddr;
1726   recordBCInstr(OpNativeTrampoline, trampolines, trampAddr);
1727   if (RuntimeOption::EvalJitUseVtuneAPI) {
1728     reportTrampolineToVtune(trampAddr, trampolines.frontier() - trampAddr);
1729   }
1730
1731   return trampAddr;
1732 }
1733
1734 TCA
1735 MCGenerator::getNativeTrampoline(TCA helperAddr) {
1736   if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
1737     return helperAddr;
1738   }
1739   auto const trampAddr = (TCA)folly::get_default(m_trampolineMap, helperAddr);
1740   if (trampAddr) {
1741     return trampAddr;
1742   }
1743   return emitNativeTrampoline(helperAddr);
1744 }
1745
1746 bool
1747 MCGenerator::reachedTranslationLimit(SrcKey sk,
1748                                      const SrcRec& srcRec) const {
1749   if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
1750     INC_TPC(max_trans);
1751     if (debug && Trace::moduleEnabled(Trace::mcg, 2)) {
1752       const auto& tns = srcRec.translations();
1753       TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1754             tns.size(), sk.unit()->filepath()->data(),
1755             sk.offset());
1756       SKTRACE(2, sk, "{\n");
1757       TCA topTrans = srcRec.getTopTranslation();
1758       for (size_t i = 0; i < tns.size(); ++i) {
1759         const TransRec* rec = m_tx.getTransRec(tns[i]);
1760         assert(rec);
1761         SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
1762         if (tns[i] == topTrans) {
1763           SKTRACE(2, sk, "%zd: *Top*\n", i);
1764         }
1765         if (rec->kind == TransAnchor) {
1766           SKTRACE(2, sk, "%zd: Anchor\n", i);
1767         } else {
1768           SKTRACE(2, sk, "%zd: guards {\n", i);
1769           for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
1770             TRACE(2, rec->dependencies[j]);
1771           }
1772           SKTRACE(2, sk, "%zd } guards\n", i);
1773         }
1774       }
1775       SKTRACE(2, sk, "} /* Too many translations */\n");
1776     }
1777     return true;
1778   }
1779
1780   return false;
1781 }
1782
1783 void
1784 MCGenerator::emitGuardChecks(SrcKey sk,
1785                              const ChangeMap& dependencies,
1786                              const RefDeps& refDeps,
1787                              SrcRec& fail) {
1788   if (Trace::moduleEnabled(Trace::stats, 2)) {
1789     emitIncStat(code.main(), Stats::TraceletGuard_enter);
1790   }
1791
1792   m_tx.irTrans()->hhbcTrans().emitRB(RBTypeTraceletGuards, sk);
1793   bool checkOuterTypeOnly = m_tx.mode() != TransProfile;
1794   for (auto const& dep : dependencies) {
1795     m_tx.irTrans()->checkType(dep.first, dep.second->rtt, checkOuterTypeOnly);
1796   }
1797
1798   checkRefs(sk, refDeps, fail);
1799
1800   if (Trace::moduleEnabled(Trace::stats, 2)) {
1801     emitIncStat(code.main(), Stats::TraceletGuard_execute);
1802   }
1803 }
1804
1805
1806 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
1807   if (!debug) return;
1808
1809   SrcKey sk = t.m_sk;
1810   DEBUG_ONLY auto unit = sk.unit();
1811
1812   TRACE(3, "----------------------------------------------\n");
1813   TRACE(3, "  Translating from file %s:%d %s at %p:\n",
1814         unit->filepath()->data(),
1815         unit->getLineNumber(sk.offset()),
1816         sk.func()->name()->data(),
1817         postGuards);
1818   TRACE(3, "  preconds:\n");
1819   TRACE(3, "    types:\n");
1820   for (DepMap::const_iterator i = t.m_dependencies.begin();
1821        i != t.m_dependencies.end(); ++i) {
1822     TRACE(3, "      %-5s\n", i->second->pretty().c_str());
1823   }
1824   if (t.m_refDeps.size() != 0) {
1825     TRACE(3, "    refs:\n");
1826     for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
1827         i != t.m_refDeps.m_arMap.end();
1828         ++i) {
1829       TRACE(3, "      (ActRec %" PRId64 " : %-5s)\n", i->first,
1830         i->second.pretty().c_str());
1831     }
1832   }
1833   TRACE(3, "  postconds:\n");
1834   for (ChangeMap::const_iterator i = t.m_changes.begin();
1835        i != t.m_changes.end(); ++i) {
1836     TRACE(3, "    %-5s\n", i->second->pretty().c_str());
1837   }
1838   for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
1839     TRACE(3, "  %6d: %s\n", ni->source.offset(),
1840       instrToString((Op*)ni->pc()).c_str());
1841     if (ni->breaksTracelet) break;
1842   }
1843   TRACE(3, "----------------------------------------------\n");
1844   if (Trace::moduleEnabled(Trace::mcg, 5)) {
1845     // prettyStack() expects to use vmpc(). Leave it in the state we
1846     // found it since this code is debug-only, and we don't want behavior
1847     // to vary across the optimized/debug builds.
1848     PC oldPC = vmpc();
1849     vmpc() = unit->at(sk.offset());
1850     TRACE(3, g_context->prettyStack(std::string(" mcg ")));
1851     vmpc() = oldPC;
1852     TRACE(3, "----------------------------------------------\n");
1853   }
1854 }
1855
1856 void
1857 MCGenerator::translateWork(const TranslArgs& args) {
1858   Timer _t(Timer::translate);
1859   auto sk = args.m_sk;
1860   std::unique_ptr<Tracelet> tp;
1861
1862   SKTRACE(1, sk, "translateWork\n");
1863   assert(m_tx.getSrcDB().find(sk));
1864
1865   TCA        start = code.main().frontier();
1866   TCA        stubStart = code.stubs().frontier();
1867   SrcRec&    srcRec = *m_tx.getSrcRec(sk);
1868   TransKind  transKind = TransInterp;
1869   UndoMarker undoA(code.main());
1870   UndoMarker undoAstubs(code.stubs());
1871   UndoMarker undoGlobalData(code.data());
1872
1873   auto resetState = [&] {
1874     undoA.undo();
1875     undoAstubs.undo();
1876     undoGlobalData.undo();
1877     m_fixupMap.clearPendingFixups();
1878     m_pendingCatchTraces.clear();
1879     m_bcMap.clear();
1880     srcRec.clearInProgressTailJumps();
1881   };
1882
1883   auto assertCleanState = [&] {
1884     assert(code.main().frontier() == start);
1885     assert(code.stubs().frontier() == stubStart);
1886     assert(m_fixupMap.pendingFixupsEmpty());
1887     assert(m_pendingCatchTraces.empty());
1888     assert(m_bcMap.empty());
1889     assert(srcRec.inProgressTailJumps().empty());
1890   };
1891
1892   PostConditions pconds;
1893   RegionDescPtr region;
1894   if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) {
1895     // Attempt to create a region at this SrcKey
1896     if (m_tx.mode() == TransOptimize) {
1897       assert(RuntimeOption::EvalJitPGO);
1898       region = args.m_region;
1899       if (region) {
1900         assert(region->blocks.size() > 0);
1901       } else {
1902         TransID transId = args.m_transId;
1903         assert(transId != InvalidID);
1904         region = selectHotRegion(transId, this);
1905         assert(region);
1906         if (region && region->blocks.size() == 0) region = nullptr;
1907       }
1908     } else {
1909       assert(m_tx.mode() == TransProfile || m_tx.mode() == TransLive);
1910       tp = m_tx.analyze(sk);
1911       // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1912       RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1913                                liveResumed() };
1914       FTRACE(2, "populating live context for region\n");
1915       populateLiveContext(rContext);
1916       region = selectRegion(rContext, tp.get(), m_tx.mode());
1917
1918       if (RuntimeOption::EvalJitCompareRegions &&
1919           RuntimeOption::EvalJitRegionSelector == "tracelet") {
1920         // Re-analyze with guard relaxation on
1921         OPTION_GUARD(EvalHHBCRelaxGuards, 1);
1922         OPTION_GUARD(EvalHHIRRelaxGuards, 0);
1923         auto legacyRegion = selectTraceletLegacy(rContext.spOffset,
1924                                                  *m_tx.analyze(sk));
1925         if (!region) {
1926           Trace::ftraceRelease("{:-^60}\nCouldn't select tracelet region "
1927                                "for:\n{}", "", show(*legacyRegion));
1928         } else {
1929           diffRegions(*region, *legacyRegion);
1930         }
1931       }
1932     }
1933
1934     Translator::TranslateResult result = Translator::Retry;
1935     Translator::RegionBlacklist regionInterps;
1936     Offset initSpOffset = region ? region->blocks[0]->initialSpOffset()
1937                                  : liveSpOff();
1938     bool bcControlFlow = RuntimeOption::EvalHHIRBytecodeControlFlow;
1939
1940     while (result == Translator::Retry) {
1941       // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1942       m_tx.traceStart(sk.offset(), initSpOffset, liveResumed(), sk.func());
1943
1944       // Try translating a region if we have one, then fall back to using the
1945       // Tracelet.
1946       if (region) {
1947         try {
1948           assertCleanState();
1949           result = m_tx.translateRegion(*region, bcControlFlow, regionInterps);
1950
1951           // If we're profiling, grab the postconditions so we can
1952           // use them in region selection whenever we decide to retranslate.
1953           if (m_tx.mode() == TransProfile && result == Translator::Success &&
1954               RuntimeOption::EvalJitPGOUsePostConditions) {
1955             pconds = m_tx.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
1956           }
1957
1958           FTRACE(2, "translateRegion finished with result {}\n",
1959                  Translator::translateResultName(result));
1960         } catch (ControlFlowFailedExc& cfe) {
1961           FTRACE(2, "translateRegion with control flow failed: '{}'\n",
1962                  cfe.what());
1963           always_assert(bcControlFlow &&
1964             "control flow translation failed, but control flow not enabled");
1965           bcControlFlow = false;
1966           result = Translator::Retry;
1967         } catch (const std::exception& e) {
1968           FTRACE(1, "translateRegion failed with '{}'\n", e.what());
1969           result = Translator::Failure;
1970         }
1971         if (result == Translator::Failure) {
1972           m_tx.traceFree();
1973           // TODO(#4150507): use sk.resumed() instead of liveResumed()?
1974           m_tx.traceStart(sk.offset(), liveSpOff(), liveResumed(), sk.func());
1975           resetState();
1976         }
1977       }
1978       if (!region || result == Translator::Failure) {
1979         // If the region translator failed for an Optimize
1980         // translation, it's OK to do a Live translation for the
1981         // function entry.  We lazily create the tracelet here in this
1982         // case.
1983         if (m_tx.mode() == TransOptimize) {
1984           if (sk.getFuncId() == liveFunc()->getFuncId() &&
1985               liveUnit()->contains(vmpc()) &&
1986               sk.offset() == liveUnit()->offsetOf(vmpc())) {
1987             m_tx.setMode(TransLive);
1988             tp = m_tx.analyze(sk);
1989           } else {
1990             m_tx.setMode(TransInterp);
1991             m_tx.traceFree();
1992             break;
1993           }
1994         }
1995         FTRACE(1, "trying translateTracelet\n");
1996         assertCleanState();
1997         result = translateTracelet(*tp);
1998
1999         // If we're profiling, grab the postconditions so we can
2000         // use them in region selection whenever we decide to
2001         // retranslate.
2002         if (m_tx.mode() == TransProfile && result == Translator::Success &&
2003             RuntimeOption::EvalJitPGOUsePostConditions) {
2004           pconds = m_tx.irTrans()->hhbcTrans().irBuilder().getKnownTypes();
2005         }
2006       }
2007
2008       if (result != Translator::Success) {
2009         // Translation failed. Free resources for this trace, rollback the
2010         // translation cache frontiers, and discard any pending fixups.
2011         resetState();
2012       }
2013       m_tx.traceFree();
2014     }
2015
2016     if (result == Translator::Success) {
2017       assert(m_tx.mode() == TransLive    ||
2018              m_tx.mode() == TransProfile ||
2019              m_tx.mode() == TransOptimize);
2020       transKind = m_tx.mode();
2021     }
2022   }
2023
2024   if (args.m_dryRun) {
2025     resetState();
2026     return;
2027   }
2028
2029   if (transKind == TransInterp) {
2030     assertCleanState();
2031     auto interpOps = tp ? tp->m_numOpcodes : 1;
2032     FTRACE(1, "emitting {}-instr interp request for failed translation\n",
2033            interpOps);
2034     switch (arch()) {
2035       case Arch::X64: {
2036         Asm a { code.main() };
2037         // Add a counter for the translation if requested
2038         if (RuntimeOption::EvalJitTransCounters) {
2039           X64::emitTransCounterInc(a);
2040         }
2041         a.    jmp(emitServiceReq(code.stubs(), REQ_INTERPRET,
2042                                  sk.offset(), interpOps));
2043         break;
2044       }
2045       case Arch::ARM: {
2046         if (RuntimeOption::EvalJitTransCounters) {
2047           vixl::MacroAssembler a { code.main() };
2048           ARM::emitTransCounterInc(a);
2049         }
2050         // This jump won't be smashed, but a far jump on ARM requires the same
2051         // code sequence.
2052         emitSmashableJump(
2053           code.main(),
2054           emitServiceReq(code.stubs(), REQ_INTERPRET,
2055                          sk.offset(), interpOps),
2056           CC_None
2057         );
2058         break;
2059       }
2060     }
2061     // Fall through.
2062   }
2063
2064   m_fixupMap.processPendingFixups();
2065   processPendingCatchTraces();
2066
2067   TransRec tr(sk, sk.unit()->md5(), transKind, tp.get(), start,
2068               code.main().frontier() - start, stubStart,
2069               code.stubs().frontier() - stubStart,
2070               m_bcMap);
2071   m_tx.addTranslation(tr);
2072   if (RuntimeOption::EvalJitUseVtuneAPI) {
2073     reportTraceletToVtune(sk.unit(), sk.func(), tr);
2074   }
2075   m_bcMap.clear();
2076
2077   recordGdbTranslation(sk, sk.func(), code.main(), start,
2078                        false, false);
2079   recordGdbTranslation(sk, sk.func(), code.stubs(), stubStart,
2080                        false, false);
2081   if (RuntimeOption::EvalJitPGO) {
2082     if (transKind == TransProfile) {
2083       if (!region) {
2084         assert(tp);
2085         region = selectTraceletLegacy(liveSpOff(), *tp);
2086       }
2087       m_tx.profData()->addTransProfile(region, pconds);
2088     } else {
2089       m_tx.profData()->addTransNonProf(transKind, sk);
2090     }
2091   }
2092   // SrcRec::newTranslation() makes this code reachable. Do this last;
2093   // otherwise there's some chance of hitting in the reader threads whose
2094   // metadata is not yet visible.
2095   TRACE(1, "newTranslation: %p  sk: (func %d, bcOff %d)\n",
2096         start, sk.getFuncId(), sk.offset());
2097   srcRec.newTranslation(start);
2098   TRACE(1, "mcg: %zd-byte tracelet\n", code.main().frontier() - start);
2099   if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
2100     Trace::traceRelease("%s", getUsage().c_str());
2101   }
2102 }
2103
2104 Translator::TranslateResult
2105 MCGenerator::translateTracelet(Tracelet& t) {
2106   if (RuntimeOption::EvalJitRegionSelector != "") {
2107     // In order to properly simulate a post-Tracelet world, refuse to translate
2108     // Tracelets when a region selector is active.
2109     return Translator::Failure;
2110   }
2111
2112   Timer _t(Timer::translateTracelet);
2113
2114   FTRACE(2, "attempting to translate tracelet:\n{}\n", t.toString());
2115   assert(!Translator::liveFrameIsPseudoMain());
2116   const SrcKey &sk = t.m_sk;
2117   SrcRec& srcRec = *m_tx.getSrcRec(sk);
2118   HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
2119   bool profilingFunc = false;
2120
2121   assert(srcRec.inProgressTailJumps().size() == 0);
2122   try {
2123     emitResolvedDeps(t.m_resolvedDeps);
2124     {
2125       emitGuardChecks(sk, t.m_dependencies, t.m_refDeps, srcRec);
2126
2127       dumpTranslationInfo(t, code.main().frontier());
2128
2129       // after guards, add a counter for the translation if requested
2130       if (RuntimeOption::EvalJitTransCounters) {
2131         ht.emitIncTransCounter();
2132       }
2133
2134       if (m_tx.mode() == TransProfile) {
2135         if (t.func()->isEntry(sk.offset())) {
2136           ht.emitCheckCold(m_tx.profData()->curTransID());
2137           profilingFunc = true;
2138         } else {
2139           ht.emitIncProfCounter(m_tx.profData()->curTransID());
2140         }
2141       }
2142
2143       ht.emitRB(RBTypeTraceletBody, t.m_sk);
2144       emitIncStat(code.main(), Stats::Instr_TC, t.m_numOpcodes);
2145     }
2146
2147     // Profiling on function entry.
2148     if (t.m_sk.offset() == t.func()->base()) {
2149       ht.profileFunctionEntry("Normal");
2150     }
2151
2152     /*
2153      * Profiling on the shapes of tracelets that are whole functions.
2154      * (These are the things we might consider trying to support
2155      * inlining.)
2156      */
2157     [&]{
2158       static const bool enabled = Stats::enabledAny() &&
2159                                   getenv("HHVM_STATS_FUNCSHAPE");
2160       if (!enabled) return;
2161       if (t.m_sk.offset() != t.func()->base()) return;
2162       if (auto last = t.m_instrStream.last) {
2163         if (last->op() != OpRetC && last->op() != OpRetV &&
2164             last->op() != OpCreateCont && last->op() != OpAsyncSuspend) {
2165           return;
2166         }
2167       }
2168       ht.profileSmallFunctionShape(traceletShape(t));
2169     }();
2170
2171     Timer irGenTimer(Timer::translateTracelet_irGeneration);
2172     Unit::MetaHandle metaHand;
2173     // Translate each instruction in the tracelet
2174     for (auto* ni = t.m_instrStream.first; ni && !ht.hasExit();
2175          ni = ni->next) {
2176       ht.setBcOff(ni->source.offset(),
2177                   ni->breaksTracelet && !ht.isInlining());
2178       readMetaData(metaHand, *ni, m_tx.irTrans()->hhbcTrans(),
2179                    m_tx.mode() == TransProfile, MetaMode::Legacy);
2180
2181       try {
2182         SKTRACE(1, ni->source, "HHIR: translateInstr\n");
2183         assert(!(m_tx.mode() ==
2184                TransProfile && ni->outputPredicted && ni->next));
2185         m_tx.irTrans()->translateInstr(*ni);
2186       } catch (FailedIRGen& fcg) {
2187         always_assert(!ni->interp);
2188         ni->interp = true;
2189         FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2190                "after failing to generate ir: {} \n\n",
2191                m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2192         return Translator::Retry;
2193       }
2194       assert(ni->source.offset() >= t.func()->base());
2195       // We sometimes leave the tail of a truncated tracelet in place to aid
2196       // analysis, but breaksTracelet is authoritative.
2197       if (ni->breaksTracelet || m_tx.irTrans()->hhbcTrans().hasExit()) break;
2198     }
2199     m_tx.traceEnd();
2200     irGenTimer.end();
2201
2202     try {
2203       traceCodeGen();
2204       TRACE(1, "HHIR: SUCCEEDED to generate code for Translation %d\n\n\n",
2205             m_tx.getCurrentTransID());
2206       if (profilingFunc) m_tx.profData()->setProfiling(t.func()->getFuncId());
2207       return Translator::Success;
2208     } catch (FailedCodeGen& fcg) {
2209       // Code-gen failed. Search for the bytecode instruction that caused the
2210       // problem, flag it to be interpreted, and retranslate the tracelet.
2211       SrcKey sk{fcg.vmFunc, fcg.bcOff, fcg.resumed};
2212
2213       for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
2214         if (ni->source == sk) {
2215           always_assert_log(
2216             !ni->interp,
2217             [&] {
2218               std::ostringstream oss;
2219               oss << folly::format("code generation failed with {}\n",
2220                                    fcg.what());
2221               print(oss, m_tx.irTrans()->hhbcTrans().unit());
2222               return oss.str();
2223             });
2224
2225           ni->interp = true;
2226           FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2227                  "after failing to code-gen \n\n",
2228                  m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2229           return Translator::Retry;
2230         }
2231       }
2232       throw fcg;
2233     }
2234   } catch (FailedCodeGen& fcg) {
2235     TRACE(1, "HHIR: FAILED to generate code for Translation %d "
2236           "@ %s:%d (%s)\n", m_tx.getCurrentTransID(),
2237           fcg.file, fcg.line, fcg.func);
2238     // HHIR:TODO Remove extra TRACE and adjust tools
2239     TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2240           fcg.file, fcg.line, fcg.func);
2241   } catch (FailedIRGen& x) {
2242     TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2243           x.file, x.line, x.func);
2244   } catch (const FailedAssertion& fa) {
2245     fa.print();
2246     StackTraceNoHeap::AddExtraLogging(
2247       "Assertion failure",
2248       folly::format("{}\n\nActive Unit:\n{}\n",
2249                     fa.summary, ht.unit().toString()).str());
2250     abort();
2251   } catch (const FailedTraceGen& e) {
2252     FTRACE(1, "HHIR: FAILED to translate whole unit: {}\n",
2253            e.what());
2254   }
2255   return Translator::Failure;
2256 }
2257
2258 void MCGenerator::traceCodeGen() {
2259   HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
2260   auto& unit = ht.unit();
2261
2262   auto finishPass = [&](const char* msg, int level) {
2263     dumpTrace(level, unit, msg, nullptr, nullptr, ht.irBuilder().guards());
2264     assert(checkCfg(unit));
2265   };
2266
2267   finishPass(" after initial translation ", kIRLevel);
2268
2269   optimize(unit, ht.irBuilder(), m_tx.mode());
2270   finishPass(" after optimizing ", kOptLevel);
2271
2272   auto regs = allocateRegs(unit);
2273   assert(checkRegisters(unit, regs)); // calls checkCfg internally.
2274
2275   recordBCInstr(OpTraceletGuard, code.main(), code.main().frontier());
2276   genCode(code.main(), code.stubs(), unit, &m_bcMap, this, regs);
2277
2278   m_numHHIRTrans++;
2279 }
2280
2281 MCGenerator::MCGenerator()
2282   : m_numNativeTrampolines(0)
2283   , m_numHHIRTrans(0)
2284   , m_catchTraceMap(128)
2285 {
2286   TRACE(1, "MCGenerator@%p startup\n", this);
2287   mcg = this;
2288
2289   m_unwindRegistrar = register_unwind_region(code.base(), code.codeSize());
2290
2291   static bool profileUp = false;
2292   if (!profileUp) {
2293     profileInit();
2294     profileUp = true;
2295   }
2296
2297   if (Trace::moduleEnabledRelease(Trace::printir) &&
2298       !RuntimeOption::EvalJit) {
2299     Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
2300                         "Did you mean to run with -vEval.Jit=1?\n");
2301   }
2302 }
2303
2304 void MCGenerator::initUniqueStubs() {
2305   // Put the following stubs into ahot, rather than a.
2306   CodeCache::Selector asmSel(CodeCache::Selector::Args(code).hot(true));
2307   switch (arch()) {
2308     case Arch::X64:
2309       m_tx.uniqueStubs = X64::emitUniqueStubs();
2310       break;
2311     case Arch::ARM:
2312       m_tx.uniqueStubs = ARM::emitUniqueStubs();
2313       break;
2314   }
2315 }
2316
2317 void MCGenerator::registerCatchBlock(CTCA ip, TCA block) {
2318   FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip, block);
2319   m_pendingCatchTraces.emplace_back(ip, block);
2320 }
2321
2322 void MCGenerator::processPendingCatchTraces() {
2323   for (auto const& pair : m_pendingCatchTraces) {
2324     m_catchTraceMap.insert(pair.first, pair.second);
2325   }
2326   m_pendingCatchTraces.clear();
2327 }
2328
2329 folly::Optional<TCA> MCGenerator::getCatchTrace(CTCA ip) const {
2330   TCA* found = m_catchTraceMap.find(ip);
2331   if (found) return *found;
2332   return folly::none;
2333 }
2334
2335 void MCGenerator::requestInit() {
2336   tl_regState = VMRegState::CLEAN;
2337   Timer::RequestInit();
2338   PendQ::drain();
2339   m_tx.requestResetHighLevelTranslator();
2340   Treadmill::startRequest();
2341   memset(&s_perfCounters, 0, sizeof(s_perfCounters));
2342   Stats::init();
2343 }
2344
2345 void MCGenerator::requestExit() {
2346   if (Translator::WriteLease().amOwner()) {
2347     Translator::WriteLease().drop();
2348   }
2349   TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
2350             " kept, %15" PRId64 " grabbed\n",
2351             Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept,
2352             Translator::WriteLease().m_hintGrabbed);
2353   PendQ::drain();
2354   Treadmill::finishRequest();
2355   Stats::dump();
2356   Stats::clear();
2357   Timer::RequestExit();
2358
2359   if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
2360     Trace::traceRelease("MCGenerator perf counters for %s:\n",
2361                         g_context->getRequestUrl(50).c_str());
2362     for (int i = 0; i < tpc_num_counters; i++) {
2363       Trace::traceRelease("%-20s %10" PRId64 "\n",
2364                           kPerfCounterNames[i], s_perfCounters[i]);
2365     }
2366     Trace::traceRelease("\n");
2367   }
2368 }
2369
2370 bool
2371 MCGenerator::isPseudoEvent(const char* event) {
2372   for (auto name : kPerfCounterNames) {
2373     if (!strcmp(event, name)) {
2374       return true;
2375     }
2376   }
2377   return false;
2378 }
2379
2380 void
2381 MCGenerator::getPerfCounters(Array& ret) {
2382   for (int i = 0; i < tpc_num_counters; i++) {
2383     // Until Perflab can automatically scale the values we give it to
2384     // an appropriate range, we have to fudge these numbers so they
2385     // look more like reasonable hardware counter values.
2386     ret.set(String::FromCStr(kPerfCounterNames[i]),
2387             s_perfCounters[i] * 1000);
2388   }
2389
2390   if (RuntimeOption::EnableInstructionCounts) {
2391     auto doCounts = [&](unsigned begin, const char* const name) {
2392       int64_t count = 0;
2393       for (; begin < Stats::Instr_InterpOneHighInvalid;
2394            begin += STATS_PER_OPCODE) {
2395         count += Stats::tl_counters[Stats::StatCounter(begin)];
2396       }
2397       ret.set(String::FromCStr(name), count);
2398     };
2399
2400     doCounts(Stats::Instr_TranslLowInvalid + STATS_PER_OPCODE,
2401              kInstrCountMCGName);
2402     doCounts(Stats::Instr_TranslIRPostLowInvalid + STATS_PER_OPCODE,
2403              kInstrCountIRName);
2404   }
2405
2406   for (auto const& pair : Timer::Counters()) {
2407     if (pair.second.total == 0 && pair.second.count == 0) continue;
2408
2409     ret.set(String("jit_time_") + pair.first, pair.second.total);
2410   }
2411 }
2412
2413 MCGenerator::~MCGenerator() {
2414 }
2415
2416 static Debug::TCRange rangeFrom(const CodeBlock& cb, const TCA addr,
2417                                 bool isAstubs) {
2418   assert(cb.contains(addr));
2419   return Debug::TCRange(addr, cb.frontier(), isAstubs);
2420 }
2421
2422 void MCGenerator::recordBCInstr(uint32_t op,
2423                                 const CodeBlock& cb,
2424                                 const TCA addr) {
2425   if (addr != cb.frontier()) {
2426     m_debugInfo.recordBCInstr(Debug::TCRange(addr, cb.frontier(),
2427                                              &cb == &code.stubs()), op);
2428   }
2429 }
2430
2431 void MCGenerator::recordGdbTranslation(SrcKey sk,
2432                                        const Func* srcFunc,
2433                                        const CodeBlock& cb,
2434                                        const TCA start,
2435                                        bool exit,
2436                                        bool inPrologue) {
2437   if (start != cb.frontier()) {
2438     assert(Translator::WriteLease().amOwner());
2439     if (!RuntimeOption::EvalJitNoGdb) {
2440       m_debugInfo.recordTracelet(rangeFrom(cb, start, &cb == &code.stubs()),
2441                                  srcFunc,
2442                                  reinterpret_cast<const Op*>(
2443                                    srcFunc->unit() ?
2444                                      srcFunc->unit()->at(sk.offset()) : nullptr
2445                                  ),
2446                                  exit, inPrologue);
2447     }
2448     if (RuntimeOption::EvalPerfPidMap) {
2449       m_debugInfo.recordPerfMap(rangeFrom(cb, start, &cb == &code.stubs()),
2450                                 srcFunc, exit, inPrologue);
2451     }
2452   }
2453 }
2454
2455 void MCGenerator::recordGdbStub(const CodeBlock& cb,
2456                                 const TCA start, const char* name) {
2457   if (!RuntimeOption::EvalJitNoGdb) {
2458     m_debugInfo.recordStub(rangeFrom(cb, start, &cb == &code.stubs()),
2459                            name);
2460   }
2461 }
2462
2463 std::string MCGenerator::getUsage() {
2464   std::string usage;
2465   size_t totalBlockSize = 0;
2466   size_t totalBlockCapacity = 0;
2467
2468   auto addRow = [&](const std::string& name, size_t used, size_t capacity) {
2469     totalBlockSize += used;
2470     totalBlockCapacity += capacity;
2471     auto percent = capacity ? 100 * used / capacity : 0;
2472     usage += folly::format("mcg: {:9} bytes ({}%) in {}\n",
2473                            used, percent, name).str();
2474   };
2475   code.forEachBlock([&](const char* name, const CodeBlock& a) {
2476     addRow(std::string("code.") + name, a.used(), a.capacity());
2477   });
2478   addRow("data", code.data().used(), code.data().capacity());
2479   addRow("RDS", RDS::usedBytes(),
2480          RuntimeOption::EvalJitTargetCacheSize * 3 / 4);
2481   addRow("persistentRDS", RDS::usedPersistentBytes(),
2482          RuntimeOption::EvalJitTargetCacheSize / 4);
2483   addRow("total",
2484          totalBlockSize + code.data().used() +
2485          RDS::usedBytes() + RDS::usedPersistentBytes(),
2486          totalBlockCapacity + code.data().capacity() +
2487          RuntimeOption::EvalJitTargetCacheSize);
2488
2489   return usage;
2490 }
2491
2492 std::string MCGenerator::getTCAddrs() {
2493   std::string addrs;
2494   code.forEachBlock([&](const char* name, const CodeBlock& a) {
2495       addrs += folly::format("{}: {}\n", name, a.base()).str();
2496   });
2497   return addrs;
2498 }
2499
2500 bool MCGenerator::addDbgGuards(const Unit* unit) {
2501   // TODO refactor
2502   // It grabs the write lease and iterating through whole SrcDB...
2503   bool locked = Translator::WriteLease().acquire(true);
2504   if (!locked) {
2505     return false;
2506   }
2507   struct timespec tsBegin, tsEnd;
2508   HPHP::Timer::GetMonotonicTime(tsBegin);
2509   // Doc says even find _could_ invalidate iterator, in pactice it should
2510   // be very rare, so go with it now.
2511   for (SrcDB::const_iterator it = m_tx.getSrcDB().begin();
2512        it != m_tx.getSrcDB().end(); ++it) {
2513     SrcKey const sk = SrcKey::fromAtomicInt(it->first);
2514     // We may have a SrcKey to a deleted function. NB: this may miss a
2515     // race with deleting a Func. See task #2826313.
2516     if (!Func::isFuncIdValid(sk.getFuncId())) continue;
2517     SrcRec* sr = it->second;
2518     if (sr->unitMd5() == unit->md5() &&
2519         !sr->hasDebuggerGuard() &&
2520         m_tx.isSrcKeyInBL(sk)) {
2521       addDbgGuardImpl(sk, sr);
2522     }
2523   }
2524   Translator::WriteLease().drop();
2525   HPHP::Timer::GetMonotonicTime(tsEnd);
2526   int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
2527   if (Trace::moduleEnabledRelease(Trace::mcg, 5)) {
2528     Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
2529   }
2530   return true;
2531 }
2532
2533 bool MCGenerator::addDbgGuard(const Func* func, Offset offset, bool resumed) {
2534   SrcKey sk(func, offset, resumed);
2535   {
2536     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2537       if (sr->hasDebuggerGuard()) {
2538         return true;
2539       }
2540     } else {
2541       // no translation yet
2542       return true;
2543     }
2544   }
2545   if (debug) {
2546     if (!m_tx.isSrcKeyInBL(sk)) {
2547       TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2548       return false;
2549     }
2550   }
2551   bool locked = Translator::WriteLease().acquire(true);
2552   if (!locked) {
2553     return false;
2554   }
2555   {
2556     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2557       addDbgGuardImpl(sk, sr);
2558     }
2559   }
2560   Translator::WriteLease().drop();
2561   return true;
2562 }
2563
2564 bool MCGenerator::dumpTCCode(const char* filename) {
2565 #define OPEN_FILE(F, SUFFIX)                                    \
2566   std::string F ## name = std::string(filename).append(SUFFIX); \
2567   FILE* F = fopen(F ## name .c_str(),"wb");                     \
2568   if (F == nullptr) return false;                               \
2569   SCOPE_EXIT{ fclose(F); };
2570
2571   OPEN_FILE(aFile,          "_a");
2572   OPEN_FILE(aprofFile,      "_aprof");
2573   OPEN_FILE(astubFile,      "_astub");
2574   OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
2575
2576 #undef OPEN_FILE
2577
2578   // dump starting from the trampolines; this assumes CodeCache places
2579   // trampolines before the translation cache
2580   size_t count = code.main().frontier() - code.trampolines().base();
2581   bool result = (fwrite(code.trampolines().base(), 1, count, aFile) == count);
2582   if (result) {
2583     count = code.prof().used();
2584     result = (fwrite(code.prof().base(), 1, count, aprofFile) == count);
2585   }
2586   if (result) {
2587     count = code.stubs().used();
2588     result = (fwrite(code.stubs().base(), 1, count, astubFile) == count);
2589   }
2590   if (result) {
2591     for (auto const& pair : m_trampolineMap) {
2592       void* helperAddr = pair.first;
2593       void* trampAddr = pair.second;
2594       auto functionName = getNativeFunctionName(helperAddr);
2595       fprintf(helperAddrFile,"%10p %10p %s\n",
2596               trampAddr, helperAddr,
2597               functionName.c_str());
2598     }
2599   }
2600   return result;
2601 }
2602
2603 // Returns true on success
2604 bool MCGenerator::dumpTC(bool ignoreLease) {
2605   if (!ignoreLease && !Translator::WriteLease().acquire(true)) return false;
2606   bool success = dumpTCData();
2607   if (success) {
2608     success = dumpTCCode("/tmp/tc_dump");
2609   }
2610   if (!ignoreLease) Translator::WriteLease().drop();
2611   return success;
2612 }
2613
2614 // Returns true on success
2615 bool tc_dump(void) {
2616   return mcg && mcg->dumpTC();
2617 }
2618
2619 // Returns true on success
2620 bool MCGenerator::dumpTCData() {
2621   gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
2622   if (!tcDataFile) return false;
2623
2624   if (!gzprintf(tcDataFile,
2625                 "repo_schema     = %s\n"
2626                 "a.base          = %p\n"
2627                 "a.frontier      = %p\n"
2628                 "aprof.base      = %p\n"
2629                 "aprof.frontier  = %p\n"
2630                 "astubs.base     = %p\n"
2631                 "astubs.frontier = %p\n\n",
2632                 kRepoSchemaId,
2633                 code.trampolines().base(), code.main().frontier(),
2634                 code.prof().base(), code.prof().frontier(),
2635                 code.stubs().base(), code.stubs().frontier())) {
2636     return false;
2637   }
2638
2639   if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
2640                 m_tx.getCurrentTransID())) {
2641     return false;
2642   }
2643
2644   for (TransID t = 0; t < m_tx.getCurrentTransID(); t++) {
2645     if (gzputs(tcDataFile,
2646                m_tx.getTransRec(t)->print(m_tx.getTransCounter(t)).c_str()) ==
2647          -1) {
2648       return false;
2649     }
2650   }
2651
2652   gzclose(tcDataFile);
2653   return true;
2654 }
2655
2656 void MCGenerator::invalidateSrcKey(SrcKey sk) {
2657   assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
2658   assert(Translator::WriteLease().amOwner());
2659   /*
2660    * Reroute existing translations for SrcKey to an as-yet indeterminate
2661    * new one.
2662    */
2663   SrcRec* sr = m_tx.getSrcDB().find(sk);
2664   assert(sr);
2665   /*
2666    * Since previous translations aren't reachable from here, we know we
2667    * just created some garbage in the TC. We currently have no mechanism
2668    * to reclaim this.
2669    */
2670   sr->replaceOldTranslations();
2671 }
2672
2673 void MCGenerator::setJmpTransID(TCA jmp) {
2674   if (m_tx.mode() != TransProfile) return;
2675
2676   TransID transId = m_tx.profData()->curTransID();
2677   FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
2678   m_jmpToTransID[jmp] = transId;
2679 }
2680
2681 void
2682 emitIncStat(CodeBlock& cb, uint64_t* tl_table, uint index, int n, bool force) {
2683   if (!force && !Stats::enabled()) return;
2684   intptr_t disp = uintptr_t(&tl_table[index]) - tlsBase();
2685
2686   if (arch() == Arch::X64) {
2687     X64Assembler a { cb };
2688
2689     a.    pushf ();
2690     //    addq $n, [%fs:disp]
2691     a.    fs().addq(n, baseless(disp));
2692     a.    popf  ();
2693   } else if (arch() == Arch::ARM) {
2694     using ARM::rAsm;
2695     using ARM::rAsm2;
2696     vixl::MacroAssembler a { cb };
2697
2698     a.    Mrs   (rAsm2, vixl::TPIDR_EL0);
2699     a.    Ldr   (rAsm, rAsm2[disp]);
2700     a.    Add   (rAsm, rAsm, n);
2701     a.    Str   (rAsm, rAsm2[disp]);
2702   } else {
2703     not_implemented();
2704   }
2705 }
2706
2707 } // HPHP::JIT
2708
2709 } // HPHP