hphp/runtime/vm/jit/mc-generator.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/vm/jit/mc-generator.h"
  17 #include "hphp/runtime/vm/jit/vtune-jit.h"
  18
  19 #include "folly/MapUtil.h"
  20
  21 #include <cinttypes>
  22 #include <stdint.h>
  23 #include <assert.h>
  24 #include <unistd.h>
  25 #include <sys/mman.h>
  26 #include <strstream>
  27 #include <stdio.h>
  28 #include <stdarg.h>
  29 #include <string>
  30 #include <queue>
  31 #include <unwind.h>
  32 #include <unordered_set>
  33
  34 #include <algorithm>
  35 #include <exception>
  36 #include <memory>
  37 #include <vector>
  38
  39 #include "folly/Format.h"
  40 #include "folly/String.h"
  41
  42 #include "hphp/util/abi-cxx.h"
  43 #include "hphp/util/disasm.h"
  44 #include "hphp/util/bitops.h"
  45 #include "hphp/util/debug.h"
  46 #include "hphp/util/maphuge.h"
  47 #include "hphp/util/rank.h"
  48 #include "hphp/util/ringbuffer.h"
  49 #include "hphp/util/timer.h"
  50 #include "hphp/util/trace.h"
  51 #include "hphp/util/meta.h"
  52 #include "hphp/util/process.h"
  53 #include "hphp/util/repo-schema.h"
  54 #include "hphp/util/cycles.h"
  55
  56 #include "hphp/runtime/vm/bytecode.h"
  57 #include "hphp/runtime/vm/php-debug.h"
  58 #include "hphp/runtime/vm/runtime.h"
  59 #include "hphp/runtime/base/arch.h"
  60 #include "hphp/runtime/base/complex-types.h"
  61 #include "hphp/runtime/base/execution-context.h"
  62 #include "hphp/runtime/base/runtime-option.h"
  63 #include "hphp/runtime/base/runtime-option-guard.h"
  64 #include "hphp/runtime/base/strings.h"
  65 #include "hphp/runtime/server/source-root-info.h"
  66 #include "hphp/runtime/base/zend-string.h"
  67 #include "hphp/runtime/ext/ext_closure.h"
  68 #include "hphp/runtime/ext/ext_generator.h"
  69 #include "hphp/runtime/ext/ext_function.h"
  70 #include "hphp/runtime/vm/debug/debug.h"
  71 #include "hphp/runtime/base/stats.h"
  72 #include "hphp/runtime/vm/pendq.h"
  73 #include "hphp/runtime/vm/srckey.h"
  74 #include "hphp/runtime/vm/treadmill.h"
  75 #include "hphp/runtime/vm/repo.h"
  76 #include "hphp/runtime/vm/type-profile.h"
  77 #include "hphp/runtime/vm/member-operations.h"
  78 #include "hphp/runtime/vm/jit/check.h"
  79 #include "hphp/runtime/vm/jit/hhbc-translator.h"
  80 #include "hphp/runtime/vm/jit/ir-translator.h"
  81 #include "hphp/runtime/vm/jit/normalized-instruction.h"
  82 #include "hphp/runtime/vm/jit/opt.h"
  83 #include "hphp/runtime/vm/jit/print.h"
  84 #include "hphp/runtime/vm/jit/region-selection.h"
  85 #include "hphp/runtime/vm/jit/srcdb.h"
  86 #include "hphp/runtime/base/rds.h"
  87 #include "hphp/runtime/vm/jit/tracelet.h"
  88 #include "hphp/runtime/vm/jit/translator-inline.h"
  89 #include "hphp/runtime/vm/jit/code-gen.h"
  90 #include "hphp/runtime/vm/jit/service-requests-inline.h"
  91 #include "hphp/runtime/vm/jit/back-end-x64.h" // XXX Layering violation.
  92 #include "hphp/runtime/vm/jit/debug-guards.h"
  93 #include "hphp/runtime/vm/jit/timer.h"
  94 #include "hphp/runtime/vm/unwind.h"
  95
  96 #include "hphp/runtime/vm/jit/mc-generator-internal.h"
  97
  98 namespace HPHP {
  99 namespace JIT {
 100
 101 TRACE_SET_MOD(mcg);
 102
 103 using namespace reg;
 104 using namespace Trace;
 105 using std::max;
 106
 107 #define TRANS_PERF_COUNTERS \
 108   TPC(translate) \
 109   TPC(retranslate) \
 110   TPC(interp_bb) \
 111   TPC(interp_instr) \
 112   TPC(interp_one) \
 113   TPC(max_trans) \
 114   TPC(enter_tc) \
 115   TPC(service_req)
 116
 117 #define TPC(n) "jit_" #n,
 118 static const char* const kPerfCounterNames[] = {
 119   TRANS_PERF_COUNTERS
 120 };
 121 #undef TPC
 122
 123 #define TPC(n) tpc_ ## n,
 124 enum TransPerfCounter {
 125   TRANS_PERF_COUNTERS
 126   tpc_num_counters
 127 };
 128 #undef TPC
 129 static __thread int64_t s_perfCounters[tpc_num_counters];
 130 #define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
 131
 132 static __thread size_t s_initialTCSize;
 133
 134 // The global MCGenerator object.
 135 MCGenerator* mcg;
 136
 137 CppCall MCGenerator::getDtorCall(DataType type) {
 138   switch (type) {
 139   case BitwiseKindOfString:
 140     return CppCall::method(&StringData::release);
 141   case KindOfArray:
 142     return CppCall::method(&ArrayData::release);
 143   case KindOfObject:
 144     return CppCall::method(&ObjectData::release);
 145   case KindOfResource:
 146     return CppCall::method(&ResourceData::release);
 147   case KindOfRef:
 148     return CppCall::method(&RefData::release);
 149   default:
 150     assert(false);
 151     NOT_REACHED();
 152   }
 153 }
 154
 155 bool MCGenerator::profileSrcKey(const SrcKey& sk) const {
 156   if (!sk.func()->shouldPGO()) return false;
 157   if (m_tx.profData()->optimized(sk.getFuncId())) return false;
 158   if (m_tx.profData()->profiling(sk.getFuncId())) return true;
 159   return requestCount() <= RuntimeOption::EvalJitProfileRequests;
 160 }
 161
 162 /*
 163  * Invalidate the SrcDB entries for func's SrcKeys that have any
 164  * Profile translation.
 165  */
 166 void MCGenerator::invalidateFuncProfSrcKeys(const Func* func) {
 167   assert(RuntimeOption::EvalJitPGO);
 168   FuncId funcId = func->getFuncId();
 169   for (auto tid : m_tx.profData()->funcProfTransIDs(funcId)) {
 170     invalidateSrcKey(m_tx.profData()->transSrcKey(tid));
 171   }
 172 }
 173
 174 TCA MCGenerator::retranslate(const TranslArgs& args) {
 175   SrcRec* sr = m_tx.getSrcDB().find(args.m_sk);
 176   always_assert(sr);
 177   bool locked = sr->tryLock();
 178   SCOPE_EXIT {
 179     if (locked) sr->freeLock();
 180   };
 181   if (isDebuggerAttachedProcess() && m_tx.isSrcKeyInBL(args.m_sk)) {
 182     // We are about to translate something known to be blacklisted by
 183     // debugger, exit early
 184     SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
 185     return nullptr;
 186   }
 187   LeaseHolder writer(Translator::WriteLease());
 188   if (!writer || !shouldTranslate()) return nullptr;
 189   if (!locked) {
 190     // Even though we knew above that we were going to skip
 191     // doing another translation, we wait until we get the
 192     // write lease, to avoid spinning through the tracelet
 193     // guards again and again while another thread is writing
 194     // to it.
 195     return sr->getTopTranslation();
 196   }
 197   SKTRACE(1, args.m_sk, "retranslate\n");
 198
 199   m_tx.setMode(profileSrcKey(args.m_sk) ? TransKind::Profile
 200                                         : TransKind::Live);
 201   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 202
 203   return translate(args);
 204 }
 205
 206 TCA MCGenerator::retranslateOpt(TransID transId, bool align) {
 207   LeaseHolder writer(Translator::WriteLease());
 208   if (!writer) return nullptr;
 209   if (isDebuggerAttachedProcess()) return nullptr;
 210
 211   TRACE(1, "retranslateOpt: transId = %u\n", transId);
 212
 213   SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
 214
 215   always_assert(m_tx.profData()->transRegion(transId) != nullptr);
 216
 217   Func*       func = m_tx.profData()->transFunc(transId);
 218   FuncId    funcId = func->getFuncId();
 219   const SrcKey& sk = m_tx.profData()->transSrcKey(transId);
 220
 221   if (m_tx.profData()->optimized(funcId)) return nullptr;
 222   m_tx.profData()->setOptimized(funcId);
 223
 224   bool setFuncBody = func->getDVFunclets().size() == 0;
 225
 226   func->setFuncBody(m_tx.uniqueStubs.funcBodyHelperThunk);
 227
 228   // Invalidate SrcDB's entries for all func's SrcKeys.
 229   invalidateFuncProfSrcKeys(func);
 230
 231   // Regenerate the prologues and DV funclets before the actual function body.
 232   TCA start = regeneratePrologues(func, sk);
 233
 234   // Regionize func and translate all its regions.
 235   std::vector<RegionDescPtr> regions;
 236   regionizeFunc(func, this, regions);
 237
 238   for (auto region : regions) {
 239     m_tx.setMode(TransKind::Optimize);
 240     always_assert(region->blocks.size() > 0);
 241     SrcKey regionSk = region->blocks[0]->start();
 242     auto translArgs = TranslArgs(regionSk, align).region(region);
 243     if (setFuncBody && regionSk.offset() == func->base()) {
 244       translArgs.setFuncBody();
 245       setFuncBody = false;
 246     }
 247     TCA regionStart = translate(translArgs);
 248     if (start == nullptr && regionSk == sk) {
 249       assert(regionStart);
 250       start = regionStart;
 251     }
 252     // Cloned closures' prologue tables point to the corresponding
 253     // main/DV entry point.  So update the prologue table when
 254     // retranslating their entries.
 255     if (func->isClonedClosure() && func->isEntry(regionSk.offset())) {
 256       int entryNumParams = func->getEntryNumParams(regionSk.offset());
 257       func->setPrologue(entryNumParams, regionStart);
 258     }
 259   }
 260   assert(start);
 261   return start;
 262 }
 263
 264 /*
 265  * Find or create a translation for sk. Returns TCA of "best" current
 266  * translation. May return NULL if it is currently impossible to create
 267  * a translation.
 268  */
 269 TCA
 270 MCGenerator::getTranslation(const TranslArgs& args) {
 271   auto sk = args.m_sk;
 272   sk.func()->validate();
 273   SKTRACE(2, sk,
 274           "getTranslation: curUnit %s funcId %x offset %d\n",
 275           sk.unit()->filepath()->data(),
 276           sk.getFuncId(),
 277           sk.offset());
 278   SKTRACE(2, sk, "   funcId: %x \n", sk.func()->getFuncId());
 279
 280   if (Translator::liveFrameIsPseudoMain() &&
 281       !RuntimeOption::EvalJitPseudomain) {
 282     SKTRACE(2, sk, "punting on pseudoMain\n");
 283     return nullptr;
 284   }
 285   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 286     TCA tca = sr->getTopTranslation();
 287     if (tca) {
 288       SKTRACE(2, sk, "getTranslation: found %p\n", tca);
 289       return tca;
 290     }
 291   }
 292   return createTranslation(args);
 293 }
 294
 295 int
 296 MCGenerator::numTranslations(SrcKey sk) const {
 297   if (const SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 298     return sr->translations().size();
 299   }
 300   return 0;
 301 }
 302
 303 static void populateLiveContext(RegionContext& ctx) {
 304   typedef RegionDesc::Location L;
 305
 306   const ActRec*     const fp {vmfp()};
 307   const TypedValue* const sp {vmsp()};
 308
 309   for (uint32_t i = 0; i < fp->m_func->numLocals(); ++i) {
 310     ctx.liveTypes.push_back(
 311       { L::Local{i}, liveTVType(frame_local(fp, i)) }
 312     );
 313   }
 314
 315   uint32_t stackOff = 0;
 316   visitStackElems(
 317     fp, sp, ctx.bcOffset,
 318     [&](const ActRec* ar) {
 319       // TODO(#2466980): when it's a Cls, we should pass the Class* in
 320       // the Type.
 321       auto const objOrCls =
 322         ar->hasThis()  ? Type::Obj.specialize(ar->getThis()->getVMClass()) :
 323         ar->hasClass() ? Type::Cls
 324                        : Type::Nullptr;
 325
 326       ctx.preLiveARs.push_back(
 327         { stackOff,
 328           ar->m_func,
 329           objOrCls
 330         }
 331       );
 332       FTRACE(2, "added prelive ActRec {}\n", show(ctx.preLiveARs.back()));
 333
 334       stackOff += kNumActRecCells;
 335     },
 336     [&](const TypedValue* tv) {
 337       ctx.liveTypes.push_back(
 338         { L::Stack{stackOff, ctx.spOffset - stackOff}, liveTVType(tv) }
 339       );
 340       stackOff++;
 341       FTRACE(2, "added live type {}\n", show(ctx.liveTypes.back()));
 342     }
 343   );
 344 }
 345
 346 TCA
 347 MCGenerator::createTranslation(const TranslArgs& args) {
 348   if (!shouldTranslate()) return nullptr;
 349
 350   /*
 351    * Try to become the writer. We delay this until we *know* we will have
 352    * a need to create new translations, instead of just trying to win the
 353    * lottery at the dawn of time. Hopefully lots of requests won't require
 354    * any new translation.
 355    */
 356   auto sk = args.m_sk;
 357   LeaseHolder writer(Translator::WriteLease());
 358   if (!writer || !shouldTranslate()) return nullptr;
 359
 360   if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 361     TCA tca = sr->getTopTranslation();
 362     if (tca) {
 363       // Handle extremely unlikely race; someone may have just already
 364       // added the first instance of this SrcRec while we did a
 365       // non-blocking wait on the write lease.
 366       return tca;
 367     } else {
 368       // Since we are holding the write lease, we know that sk is properly
 369       // initialized, except that it has no translations (due to
 370       // replaceOldTranslations)
 371       return retranslate(args);
 372     }
 373   }
 374
 375   // We put retranslate requests at the end of our slab to more frequently
 376   //   allow conditional jump fall-throughs
 377   TCA astart          = code.main().frontier();
 378   TCA realColdStart   = code.realCold().frontier();
 379   TCA realFrozenStart = code.realFrozen().frontier();
 380   TCA req = emitServiceReq(code.cold(), REQ_RETRANSLATE, sk.offset());
 381   SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
 382           sk.unit(), sk.offset(), req);
 383   SrcRec* sr = m_tx.getSrcRec(sk);
 384   sr->setFuncInfo(sk.func());
 385   sr->setAnchorTranslation(req);
 386
 387   size_t asize = code.main().frontier() - astart;
 388   size_t realColdSize   = code.realCold().frontier()   - realColdStart;
 389   size_t realFrozenSize = code.realFrozen().frontier() - realFrozenStart;
 390   assert(asize == 0);
 391   if (realColdSize && RuntimeOption::EvalDumpTCAnchors) {
 392     TransRec tr(sk,
 393                 TransKind::Anchor,
 394                 astart, asize, realColdStart, realColdSize,
 395                 realFrozenStart, realFrozenSize);
 396     m_tx.addTranslation(tr);
 397     if (RuntimeOption::EvalJitUseVtuneAPI) {
 398       reportTraceletToVtune(sk.unit(), sk.func(), tr);
 399     }
 400
 401     if (m_tx.profData()) {
 402       m_tx.profData()->addTransNonProf(TransKind::Anchor, sk);
 403     }
 404     assert(!m_tx.isTransDBEnabled() ||
 405            m_tx.getTransRec(realColdStart)->kind == TransKind::Anchor);
 406   }
 407
 408   return retranslate(args);
 409 }
 410
 411 TCA
 412 MCGenerator::lookupTranslation(SrcKey sk) const {
 413   if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
 414     return sr->getTopTranslation();
 415   }
 416   return nullptr;
 417 }
 418
 419 TCA
 420 MCGenerator::translate(const TranslArgs& args) {
 421   INC_TPC(translate);
 422
 423   assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
 424   assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
 425   assert(m_tx.mode() != TransKind::Invalid);
 426   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 427
 428   if (!args.m_interp) {
 429     if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
 430       RuntimeOption::EvalJit = false;
 431       ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
 432       return nullptr;
 433     }
 434   }
 435
 436   Func* func = const_cast<Func*>(args.m_sk.func());
 437   CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
 438                             .profile(m_tx.mode() == TransKind::Profile)
 439                             .hot((func->attrs() & AttrHot) && m_tx.useAHot()));
 440
 441   if (args.m_align) {
 442     mcg->backEnd().moveToAlign(code.main(),
 443                                MoveToAlignFlags::kNonFallthroughAlign);
 444   }
 445
 446   TCA start = code.main().frontier();
 447
 448   if (RuntimeOption::EvalJitDryRuns &&
 449       (m_tx.mode() == TransKind::Live || m_tx.mode() == TransKind::Profile)) {
 450     auto const useRegion =
 451       RuntimeOption::EvalJitRegionSelector == "tracelet";
 452     always_assert(useRegion ||
 453                   RuntimeOption::EvalJitRegionSelector == "");
 454
 455     auto dryArgs = args;
 456
 457     dryArgs.dryRun(!useRegion);
 458     {
 459       // First, run translateWork with the tracelet region selector. If
 460       // useRegion == false, the generated code will be thrown away at the end.
 461       OPTION_GUARD(EvalJitRegionSelector, "tracelet");
 462       OPTION_GUARD(EvalHHIRRelaxGuards, true);
 463       OPTION_GUARD(EvalHHBCRelaxGuards, false);
 464       translateWork(dryArgs);
 465     }
 466
 467     dryArgs.dryRun(useRegion);
 468     {
 469       // Now translate with analyze(), throwing away the generated code if
 470       // useRegion == true.
 471       OPTION_GUARD(EvalJitRegionSelector, "");
 472       OPTION_GUARD(EvalHHIRRelaxGuards, false);
 473       OPTION_GUARD(EvalHHBCRelaxGuards, true);
 474       translateWork(dryArgs);
 475     }
 476   } else {
 477     translateWork(args);
 478   }
 479
 480   if (args.m_setFuncBody) {
 481     func->setFuncBody(start);
 482   }
 483   SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
 484           getTopTranslation(args.m_sk), start);
 485
 486   return start;
 487 }
 488
 489 TCA
 490 MCGenerator::getCallArrayPrologue(Func* func) {
 491   TCA tca = func->getFuncBody();
 492   if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 493
 494   DVFuncletsVec dvs = func->getDVFunclets();
 495
 496   if (dvs.size()) {
 497     LeaseHolder writer(Translator::WriteLease());
 498     if (!writer) return nullptr;
 499     tca = func->getFuncBody();
 500     if (tca != m_tx.uniqueStubs.funcBodyHelperThunk) return tca;
 501     tca = mcg->backEnd().emitCallArrayPrologue(func, dvs);
 502     func->setFuncBody(tca);
 503   } else {
 504     SrcKey sk(func, func->base(), false);
 505     tca = mcg->getTranslation(TranslArgs(sk, false).setFuncBody());
 506   }
 507
 508   return tca;
 509 }
 510
 511 void
 512 MCGenerator::smashPrologueGuards(TCA* prologues, int numPrologues,
 513                                  const Func* func) {
 514   DEBUG_ONLY std::unique_ptr<LeaseHolder> writer;
 515   for (int i = 0; i < numPrologues; i++) {
 516     if (prologues[i] != m_tx.uniqueStubs.fcallHelperThunk
 517         && backEnd().funcPrologueHasGuard(prologues[i], func)) {
 518       if (debug) {
 519         /*
 520          * Unit's are sometimes created racily, in which case all
 521          * but the first are destroyed immediately. In that case,
 522          * the Funcs of the destroyed Units never need their
 523          * prologues smashing, and it would be a lock rank violation
 524          * to take the write lease here.
 525          * In all other cases, Funcs are destroyed via a delayed path
 526          * (treadmill) and the rank violation isn't an issue.
 527          */
 528         if (!writer) {
 529           writer.reset(new LeaseHolder(Translator::WriteLease(),
 530                        LeaseAcquire::BLOCKING));
 531         }
 532       }
 533       mcg->backEnd().funcPrologueSmashGuard(prologues[i], func);
 534     }
 535   }
 536 }
 537
 538 /*
 539  * funcPrologue --
 540  *
 541  * Given a callee and a number of args, match up to the callee's
 542  * argument expectations and dispatch.
 543  *
 544  * Call/return hand-shaking is a bit funny initially. At translation time,
 545  * we don't necessarily know what function we're calling. For instance,
 546  *
 547  *   f(g());
 548  *
 549  * Will lead to a set of basic blocks like:
 550  *
 551  * b1: pushfuncd "f"
 552  *     pushfuncd "g"
 553  *     fcall
 554  * b2: fcall
 555  *
 556  * The fcallc labelled "b2" above is not statically bindable in our
 557  * execution model.
 558  *
 559  * We decouple the call work into a per-callsite portion, responsible
 560  * for recording the return address, and a per-(callee, numArgs) portion,
 561  * responsible for fixing up arguments and dispatching to remaining
 562  * code. We call the per-callee portion a "prologue."
 563  *
 564  * Also, we are called from two distinct environments. From REQ_BIND_CALL,
 565  * we're running "between" basic blocks, with all VM registers sync'ed.
 566  * However, we're also called in the middle of basic blocks, when dropping
 567  * entries into func->m_prologues. So don't go around using the
 568  * translation-time values of vmfp()/vmsp(), since they have an
 569  * unpredictable relationship to the source.
 570  */
 571 bool
 572 MCGenerator::checkCachedPrologue(const Func* func, int paramIdx,
 573                                  TCA& prologue) const {
 574   prologue = (TCA)func->getPrologue(paramIdx);
 575   if (prologue != m_tx.uniqueStubs.fcallHelperThunk) {
 576     TRACE(1, "cached prologue %s(%d) -> cached %p\n",
 577           func->fullName()->data(), paramIdx, prologue);
 578     assert(isValidCodeAddress(prologue));
 579     return true;
 580   }
 581   return false;
 582 }
 583
 584 static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
 585   assert(tl_regState == VMRegState::DIRTY);
 586   tl_regState = VMRegState::CLEAN;
 587   vmfp() = ar;
 588   vmsp() = sp;
 589   vmpc() = ar->unit()->at(pcOff);
 590 }
 591
 592 TCA
 593 MCGenerator::getFuncPrologue(Func* func, int nPassed, ActRec* ar,
 594                              bool ignoreTCLimit) {
 595   func->validate();
 596   TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
 597   int const numParams = func->numNonVariadicParams();
 598   int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
 599
 600   bool const funcIsMagic = func->isMagic();
 601
 602   // Do a quick test before grabbing the write lease
 603   TCA prologue;
 604   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 605
 606   Offset entry = func->getEntryForNumArgs(nPassed);
 607   SrcKey funcBody(func, entry, false);
 608
 609   if (func->isClonedClosure()) {
 610     assert(ar);
 611     interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
 612     TCA tca = getTranslation(TranslArgs(funcBody, false));
 613     tl_regState = VMRegState::DIRTY;
 614     if (tca) {
 615       // racy, but ok...
 616       func->setPrologue(paramIndex, tca);
 617     }
 618     return tca;
 619   }
 620
 621   LeaseHolder writer(Translator::WriteLease());
 622   if (!writer) return nullptr;
 623   if (!ignoreTCLimit && !shouldTranslate()) return nullptr;
 624
 625   // Double check the prologue array now that we have the write lease
 626   // in case another thread snuck in and set the prologue already.
 627   if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
 628
 629   // We're comming from a BIND_CALL service request, so enable
 630   // profiling if we haven't optimized the function entry yet.
 631   assert(m_tx.mode() == TransKind::Invalid ||
 632          m_tx.mode() == TransKind::Prologue);
 633   if (m_tx.mode() == TransKind::Invalid && profileSrcKey(funcBody)) {
 634     m_tx.setMode(TransKind::Proflogue);
 635   } else {
 636     m_tx.setMode(TransKind::Prologue);
 637   }
 638   SCOPE_EXIT{ m_tx.setMode(TransKind::Invalid); };
 639
 640   CodeCache::Selector cbSel(CodeCache::Selector::Args(code)
 641                             .profile(m_tx.mode() == TransKind::Proflogue)
 642                             .hot((func->attrs() & AttrHot) && m_tx.useAHot()));
 643
 644   // If we're close to a cache line boundary, just burn some space to
 645   // try to keep the func and its body on fewer total lines.
 646   if (((uintptr_t)code.main().frontier() & backEnd().cacheLineMask()) >=
 647       (backEnd().cacheLineSize() / 2)) {
 648     backEnd().moveToAlign(code.main(), MoveToAlignFlags::kCacheLineAlign);
 649   }
 650
 651   // Careful: this isn't necessarily the real entry point. For funcIsMagic
 652   // prologues, this is just a possible prologue.
 653   TCA aStart    = code.main().frontier();
 654   TCA start     = aStart;
 655   TCA realColdStart   = mcg->code.realCold().frontier();
 656   TCA realFrozenStart = mcg->code.realFrozen().frontier();
 657
 658   auto const skFuncBody = [&] {
 659     assert(m_fixups.empty());
 660     auto ret = backEnd().emitFuncPrologue(code.main(), code.cold(), func,
 661                                           funcIsMagic, nPassed,
 662                                           start, aStart);
 663     m_fixups.process();
 664     return ret;
 665   }();
 666
 667   assert(backEnd().funcPrologueHasGuard(start, func));
 668   TRACE(2, "funcPrologue mcg %p %s(%d) setting prologue %p\n",
 669         this, func->fullName()->data(), nPassed, start);
 670   assert(isValidCodeAddress(start));
 671   func->setPrologue(paramIndex, start);
 672
 673   assert(m_tx.mode() == TransKind::Prologue ||
 674          m_tx.mode() == TransKind::Proflogue);
 675   TransRec tr(skFuncBody,
 676               m_tx.mode(),
 677               aStart,          code.main().frontier()       - aStart,
 678               realColdStart,   code.realCold().frontier()   - realColdStart,
 679               realFrozenStart, code.realFrozen().frontier() - realFrozenStart);
 680   m_tx.addTranslation(tr);
 681   if (RuntimeOption::EvalJitUseVtuneAPI) {
 682     reportTraceletToVtune(func->unit(), func, tr);
 683   }
 684
 685   if (m_tx.profData()) {
 686     m_tx.profData()->addTransPrologue(m_tx.mode(), skFuncBody, paramIndex);
 687   }
 688
 689   recordGdbTranslation(skFuncBody, func,
 690                        code.main(), aStart,
 691                        false, true);
 692   recordBCInstr(OpFuncPrologue, code.main(), start, false);
 693
 694   return start;
 695 }
 696
 697 /**
 698  * Given the proflogueTransId for a TransProflogue translation,
 699  * regenerate the prologue (as a TransPrologue).  Returns the starting
 700  * address for the translation corresponding to triggerSk, if such
 701  * translation is generated; otherwise returns nullptr.
 702  */
 703 TCA MCGenerator::regeneratePrologue(TransID prologueTransId,
 704                                     SrcKey triggerSk) {
 705   Func* func = m_tx.profData()->transFunc(prologueTransId);
 706   int  nArgs = m_tx.profData()->prologueArgs(prologueTransId);
 707
 708   // Regenerate the prologue.
 709   func->resetPrologue(nArgs);
 710   m_tx.setMode(TransKind::Prologue);
 711   SCOPE_EXIT { m_tx.setMode(TransKind::Invalid); };
 712   TCA start = getFuncPrologue(func, nArgs, nullptr /* ActRec */,
 713                               true /* ignoreTCLimit */);
 714   func->setPrologue(nArgs, start);
 715
 716   // Smash callers of the old prologue with the address of the new one.
 717   PrologueCallersRec* pcr =
 718     m_tx.profData()->prologueCallers(prologueTransId);
 719   for (TCA toSmash : pcr->mainCallers()) {
 720     backEnd().smashCall(toSmash, start);
 721   }
 722   // If the prologue has a guard, then smash its guard-callers as well.
 723   if (backEnd().funcPrologueHasGuard(start, func)) {
 724     TCA guard = backEnd().funcPrologueToGuard(start, func);
 725     for (TCA toSmash : pcr->guardCallers()) {
 726       backEnd().smashCall(toSmash, guard);
 727     }
 728   }
 729   pcr->clearAllCallers();
 730
 731   // If this prologue has a DV funclet, then generate a translation
 732   // for the DV funclet right after the prologue.  However, skip
 733   // cloned closures because their prologues are actually the DV
 734   // funclets already.
 735   TCA triggerSkStart = nullptr;
 736   if (nArgs < func->numNonVariadicParams() && !func->isClonedClosure()) {
 737     auto paramInfo = func->params()[nArgs];
 738     if (paramInfo.hasDefaultValue()) {
 739       m_tx.setMode(TransKind::Optimize);
 740       SrcKey  funcletSK(func, paramInfo.funcletOff, false);
 741       TransID funcletTransId = m_tx.profData()->dvFuncletTransId(func, nArgs);
 742       if (funcletTransId != kInvalidTransID) {
 743         invalidateSrcKey(funcletSK);
 744         TCA dvStart = translate(TranslArgs(funcletSK, false).
 745                                 transId(funcletTransId));
 746         if (dvStart && !triggerSkStart && funcletSK == triggerSk) {
 747           triggerSkStart = dvStart;
 748         }
 749         // Flag that this translation has been retranslated, so that
 750         // it's not retranslated again along with the function body.
 751         m_tx.profData()->setOptimized(funcletSK);
 752       }
 753     }
 754   }
 755
 756   return triggerSkStart;
 757 }
 758
 759 /**
 760  * Regenerate all prologues of func that were previously generated.
 761  * The prologues are sorted in ascending order of profile counters.
 762  * For prologues with corresponding DV funclets, their corresponding
 763  * DV funclet will be regenerated right after them.  The idea is to
 764  * generate the function body right after calling this function, so
 765  * that all prologues are placed right before it, and with the hottest
 766  * prologues closer to it.
 767  *
 768  * Returns the starting address for the translation corresponding to
 769  * triggerSk, if such translation is generated; otherwise returns
 770  * nullptr.
 771  */
 772 TCA MCGenerator::regeneratePrologues(Func* func, SrcKey triggerSk) {
 773   TCA triggerStart = nullptr;
 774   std::vector<TransID> prologTransIDs;
 775
 776   for (int nArgs = 0; nArgs < func->numPrologues(); nArgs++) {
 777     TransID tid = m_tx.profData()->prologueTransId(func, nArgs);
 778     if (tid != kInvalidTransID) {
 779       prologTransIDs.push_back(tid);
 780     }
 781   }
 782
 783   std::sort(prologTransIDs.begin(), prologTransIDs.end(),
 784           [&](TransID t1, TransID t2) -> bool {
 785             // This will sort in ascending order. Note that transCounters start
 786             // at JitPGOThreshold and count down.
 787             return m_tx.profData()->transCounter(t1) >
 788                    m_tx.profData()->transCounter(t2);
 789           });
 790
 791   for (TransID tid : prologTransIDs) {
 792     TCA start = regeneratePrologue(tid, triggerSk);
 793     if (triggerStart == nullptr && start != nullptr) {
 794       triggerStart = start;
 795     }
 796   }
 797
 798   return triggerStart;
 799 }
 800
 801 /*
 802  * bindJmp --
 803  *
 804  *   Runtime service handler that patches a jmp to the translation of
 805  *   u:dest from toSmash.
 806  */
 807 TCA
 808 MCGenerator::bindJmp(TCA toSmash, SrcKey destSk,
 809                      ServiceRequest req, bool& smashed) {
 810   TCA tDest = getTranslation(TranslArgs(destSk, false));
 811   if (!tDest) return nullptr;
 812   LeaseHolder writer(Translator::WriteLease());
 813   if (!writer) return tDest;
 814   SrcRec* sr = m_tx.getSrcRec(destSk);
 815   // The top translation may have changed while we waited for the
 816   // write lease, so read it again.  If it was replaced with a new
 817   // translation, then bind to the new one.  If it was invalidated,
 818   // then don't bind the jump.
 819   tDest = sr->getTopTranslation();
 820   if (tDest == nullptr) return nullptr;
 821
 822   if (req == REQ_BIND_ADDR) {
 823     auto addr = reinterpret_cast<TCA*>(toSmash);
 824     if (*addr == tDest) {
 825       // Already smashed
 826       return tDest;
 827     }
 828     sr->chainFrom(IncomingBranch::addr(addr));
 829   } else if (req == REQ_BIND_JCC || req == REQ_BIND_SIDE_EXIT) {
 830     auto jt = backEnd().jccTarget(toSmash);
 831     assert(jt);
 832     if (jt == tDest) {
 833       // Already smashed
 834       return tDest;
 835     }
 836     sr->chainFrom(IncomingBranch::jccFrom(toSmash));
 837   } else {
 838     assert(!backEnd().jccTarget(toSmash));
 839     if (!backEnd().jmpTarget(toSmash)
 840         || backEnd().jmpTarget(toSmash) == tDest) {
 841       // Already smashed
 842       return tDest;
 843     }
 844     sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
 845   }
 846   smashed = true;
 847   return tDest;
 848 }
 849
 850 /*
 851  * When we end a tracelet with a conditional jump, emitCondJmp first emits:
 852  *
 853  *   1:         j<CC> stubJmpccFirst
 854  *              jmp   stubJmpccFirst
 855  *
 856  * Our "taken" argument tells us whether the branch at 1: was taken or
 857  * not; and therefore which of offTaken and offNotTaken to continue executing.
 858  * If we did take the branch, we now rewrite the code so that the branch is
 859  * straightened. This predicts that subsequent executions will go the same way
 860  * as the first execution.
 861  *
 862  *              jn<CC> stubJmpccSecond:offNotTaken
 863  *              nop5   ; fallthru, or jmp if there's already a translation.
 864  * offTaken:
 865  *
 866  * If we did not take the branch, we leave the sense of the condition
 867  * intact, while patching it up to go to the unexplored code:
 868  *
 869  *              j<CC> stubJmpccSecond:offTaken
 870  *              nop5
 871  * offNotTaken:
 872  */
 873 TCA
 874 MCGenerator::bindJmpccFirst(TCA toSmash,
 875                             Offset offTaken, Offset offNotTaken,
 876                             bool taken,
 877                             ConditionCode cc,
 878                             bool& smashed) {
 879   const Func* f = liveFunc();
 880   LeaseHolder writer(Translator::WriteLease());
 881   if (!writer) return nullptr;
 882   Offset offWillExplore = taken ? offTaken : offNotTaken;
 883   Offset offWillDefer = taken ? offNotTaken : offTaken;
 884   SrcKey dest(f, offWillExplore, liveResumed());
 885   TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
 886         "taken %d\n",
 887         offWillExplore, offWillDefer, cc, taken);
 888
 889   // We want the branch to point to whichever side has not been explored
 890   // yet.
 891   if (taken) {
 892     cc = ccNegate(cc);
 893   }
 894
 895   auto& cb = code.blockFor(toSmash);
 896   Asm as { cb };
 897   // Its not clear where the IncomingBranch should go to if cb is code.frozen()
 898   assert(&cb != &code.frozen());
 899
 900   // XXX Use of kJmp*Len here is a layering violation.
 901   using namespace X64;
 902
 903   // can we just directly fall through?
 904   // a jmp + jz takes 5 + 6 = 11 bytes
 905   bool fallThru = toSmash + kJmpccLen + kJmpLen == cb.frontier() &&
 906     !m_tx.getSrcDB().find(dest);
 907
 908   TCA tDest;
 909   tDest = getTranslation(TranslArgs(dest, !fallThru));
 910   if (!tDest) {
 911     return 0;
 912   }
 913
 914   if (backEnd().jmpTarget(toSmash + kJmpccLen)
 915       != backEnd().jccTarget(toSmash)) {
 916     // someone else already smashed this one. Ideally we would
 917     // just re-execute from toSmash - except the flags will have
 918     // been trashed.
 919     return tDest;
 920   }
 921
 922   TCA stub = emitEphemeralServiceReq(code.frozen(),
 923                                      getFreeStub(code.frozen(), nullptr),
 924                                      REQ_BIND_JMPCC_SECOND,
 925                                      RipRelative(toSmash),
 926                                      offWillDefer, cc);
 927
 928   smashed = true;
 929   assert(Translator::WriteLease().amOwner());
 930   /*
 931    * Roll over the jcc and the jmp/fallthru. E.g., from:
 932    *
 933    *     toSmash:    jcc   <jmpccFirstStub>
 934    *     toSmash+6:  jmp   <jmpccFirstStub>
 935    *     toSmash+11: <probably the new translation == tdest>
 936    *
 937    * to:
 938    *
 939    *     toSmash:    j[n]z <jmpccSecondStub>
 940    *     toSmash+6:  nop5
 941    *     toSmash+11: newHotness
 942    */
 943   CodeCursor cg(cb, toSmash);
 944   as.jcc(cc, stub);
 945   m_tx.getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(cb.frontier()));
 946   TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
 947   return tDest;
 948 }
 949
 950 // smashes a jcc to point to a new destination
 951 TCA
 952 MCGenerator::bindJmpccSecond(TCA toSmash, const Offset off,
 953                              ConditionCode cc, bool& smashed) {
 954   const Func* f = liveFunc();
 955   SrcKey dest(f, off, liveResumed());
 956   TCA branch = getTranslation(TranslArgs(dest, true));
 957   if (branch) {
 958     LeaseHolder writer(Translator::WriteLease());
 959     if (writer) {
 960       if (branch == backEnd().jccTarget(toSmash)) {
 961         // already smashed
 962         return branch;
 963       } else {
 964         smashed = true;
 965         SrcRec* destRec = m_tx.getSrcRec(dest);
 966         destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
 967       }
 968     }
 969   }
 970   return branch;
 971 }
 972
 973 void MCGenerator::emitResolvedDeps(const ChangeMap& resolvedDeps) {
 974   for (const auto dep : resolvedDeps) {
 975     m_tx.irTrans()->assertType(dep.first, dep.second->rtt);
 976   }
 977 }
 978
 979 void
 980 MCGenerator::checkRefs(SrcKey sk,
 981                        const RefDeps& refDeps,
 982                        SrcRec& fail) {
 983   if (refDeps.size() == 0) {
 984     return;
 985   }
 986
 987   // Set up guards for each pushed ActRec that we've made reffiness
 988   // assumptions about
 989   for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
 990        it != refDeps.m_arMap.end(); ++it) {
 991     // Be careful! The actual Func might have fewer refs than the number
 992     // of args we're passing. To forestall this, we always prepare at
 993     // least 64 bits in the Func, and always fill out the refBitVec
 994     // to a multiple of 64 bits
 995
 996     int entryArDelta = it->first;
 997
 998     m_tx.irTrans()->hhbcTrans().guardRefs(entryArDelta,
 999                                           it->second.m_mask,
1000                                           it->second.m_vals);
1001   }
1002 }
1003
1004 namespace {
1005 class FreeRequestStubTrigger {
1006   TCA m_stub;
1007  public:
1008   explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
1009     TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
1010   }
1011   void operator()() {
1012     TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
1013     if (mcg->freeRequestStub(m_stub) != true) {
1014       // If we can't free the stub, enqueue again to retry.
1015       TRACE(3, "FreeStubTrigger: write lease failed, requeueing %p\n", m_stub);
1016       Treadmill::enqueue(FreeRequestStubTrigger(m_stub));
1017     }
1018   }
1019 };
1020 }
1021
1022 #ifdef DEBUG
1023
1024 struct DepthGuard {
1025   static __thread int m_depth;
1026   DepthGuard()  { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
1027   ~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
1028
1029   bool depthOne() const { return m_depth == 1; }
1030 };
1031 __thread int DepthGuard::m_depth;
1032
1033 #else
1034
1035 struct DepthGuard { bool depthOne() const { return false; } };
1036
1037 #endif
1038
1039 void
1040 MCGenerator::enterTC(TCA start, void* data) {
1041   if (debug) {
1042     fflush(stdout);
1043     fflush(stderr);
1044   }
1045   DepthGuard d;
1046   TReqInfo info;
1047   SrcKey sk;
1048
1049   if (LIKELY(start != nullptr)) {
1050     info.requestNum = data ? REQ_BIND_CALL : -1;
1051     info.saved_rStashedAr = (uintptr_t)data;
1052   } else {
1053     info.requestNum = -1;
1054     info.saved_rStashedAr = 0;
1055     sk = *(SrcKey*)data;
1056     start = getTranslation(TranslArgs(sk, true));
1057   }
1058   for (;;) {
1059     assert(sizeof(Cell) == 16);
1060     assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
1061     assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
1062
1063     Translator::WriteLease().gremlinUnlock();
1064     // Keep dispatching until we end up somewhere the translator
1065     // recognizes, or we luck out and the leaseholder exits.
1066     while (!start) {
1067       TRACE(2, "enterTC forwarding BB to interpreter\n");
1068       vmpc() = sk.unit()->at(sk.offset());
1069       INC_TPC(interp_bb);
1070       g_context->dispatchBB();
1071       PC newPc = vmpc();
1072       if (!newPc) { vmfp() = 0; return; }
1073       sk = SrcKey(liveFunc(), newPc, liveResumed());
1074       start = getTranslation(TranslArgs(sk, true));
1075     }
1076     assert(start == m_tx.uniqueStubs.funcBodyHelperThunk ||
1077            isValidCodeAddress(start) ||
1078            (start == m_tx.uniqueStubs.fcallHelperThunk &&
1079             info.saved_rStashedAr == (uintptr_t)data));
1080     assert(!Translator::WriteLease().amOwner());
1081     const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
1082     func->validate();
1083     INC_TPC(enter_tc);
1084
1085     TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
1086           vmfp(), func->name()->data(), vmsp());
1087     tl_regState = VMRegState::DIRTY;
1088
1089     if (Trace::moduleEnabledRelease(Trace::ringbuffer, 1)) {
1090       auto skData = sk.valid() ? sk.toAtomicInt() : uint64_t(-1LL);
1091       Trace::ringbufferEntry(RBTypeEnterTC, skData, (uint64_t)start);
1092     }
1093
1094     mcg->backEnd().enterTCHelper(start, info);
1095     assert(isValidVMStackAddress(vmRegsUnsafe().stack.top()));
1096
1097     tl_regState = VMRegState::CLEAN; // Careful: pc isn't sync'ed yet.
1098     TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
1099           vmfp(), vmsp());
1100
1101     if (debug) {
1102       // Debugging code: cede the write lease half the time.
1103       if (RuntimeOption::EvalJitStressLease) {
1104         if (d.depthOne() && (rand() % 2) == 0) {
1105           Translator::WriteLease().gremlinLock();
1106         }
1107       }
1108       // Ensure that each case either returns, or drives start to a valid
1109       // value.
1110       start = TCA(0xbee5face);
1111     }
1112
1113     TRACE(2, "enterTC: request(%s) args: %" PRIxPTR " %" PRIxPTR " %"
1114              PRIxPTR " %" PRIxPTR " %" PRIxPTR "\n",
1115           serviceReqName(info.requestNum),
1116           info.args[0], info.args[1], info.args[2], info.args[3],
1117           info.args[4]);
1118
1119     if (LIKELY(info.requestNum == REQ_EXIT)) {
1120       vmfp() = nullptr;
1121       return;
1122     }
1123     if (!handleServiceRequest(info, start, sk)) return;
1124   }
1125 }
1126
1127 /*
1128  * The contract is that each case will set sk to the place where
1129  * execution should resume, and optionally set start to the hardware
1130  * translation of the resumption point (or otherwise set it to null).
1131  * Returns false if we need to halt this nesting of the VM.
1132  *
1133  * start and sk might be subtly different; i.e., there are cases where
1134  * start != NULL && start != getTranslation(sk). For instance,
1135  * REQ_BIND_CALL has not finished executing the OpCall when it gets
1136  * here, and has even done some work on its behalf. sk == OpFCall,
1137  * while start == the point in the TC that's "half-way through" the
1138  * Call instruction. If we punt to the interpreter, the interpreter
1139  * will redo some of the work that the translator has already done.
1140  */
1141 bool MCGenerator::handleServiceRequest(TReqInfo& info,
1142                                        TCA& start,
1143                                        SrcKey& sk) {
1144   const ServiceRequest requestNum =
1145     static_cast<ServiceRequest>(info.requestNum);
1146   auto* const args = info.args;
1147   assert(requestNum != REQ_EXIT);
1148   INC_TPC(service_req);
1149
1150   bool smashed = false;
1151   switch (requestNum) {
1152   case REQ_BIND_CALL: {
1153     ReqBindCall* req = reinterpret_cast<ReqBindCall*>(args[0]);
1154     ActRec* calleeFrame = reinterpret_cast<ActRec*>(args[1]);
1155     TCA toSmash = req->m_toSmash;
1156     Func *func = const_cast<Func*>(calleeFrame->m_func);
1157     int nArgs = req->m_nArgs;
1158     bool isImmutable = req->m_isImmutable;
1159     TRACE(2, "enterTC: bindCall %s, ActRec %p\n",
1160           func->fullName()->data(), calleeFrame);
1161     TCA dest = getFuncPrologue(func, nArgs);
1162     TRACE(2, "enterTC: bindCall -> %p\n", dest);
1163     if (!isImmutable) {
1164       // We dont know we're calling the right function, so adjust
1165       // dest to point to the dynamic check of ar->m_func.
1166       dest = backEnd().funcPrologueToGuard(dest, func);
1167     } else {
1168       TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
1169             func->fullName()->data(), dest);
1170     }
1171     if (dest) {
1172       LeaseHolder writer(Translator::WriteLease());
1173       if (writer) {
1174         // Someone else may have changed the func prologue while we
1175         // waited for the write lease, so read it again.
1176         dest = getFuncPrologue(func, nArgs);
1177         assert(dest);
1178         if (!isImmutable) dest = backEnd().funcPrologueToGuard(dest, func);
1179
1180         if (backEnd().callTarget(toSmash) != dest) {
1181           assert(backEnd().callTarget(toSmash));
1182           TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
1183           backEnd().smashCall(toSmash, dest);
1184           smashed = true;
1185           // For functions to be PGO'ed, if their current prologues
1186           // are still profiling ones (living in code.prof()), then
1187           // save toSmash as a caller to the prologue, so that it can
1188           // later be smashed to call a new prologue when it's generated.
1189           int calleeNumParams = func->numNonVariadicParams();
1190           int calledPrologNumArgs = (nArgs <= calleeNumParams ?
1191                                      nArgs :  calleeNumParams + 1);
1192           if (code.prof().contains(dest)) {
1193             if (isImmutable) {
1194               m_tx.profData()->addPrologueMainCaller(func, calledPrologNumArgs,
1195                                                      toSmash);
1196             } else {
1197               m_tx.profData()->addPrologueGuardCaller(func, calledPrologNumArgs,
1198                                                       toSmash);
1199             }
1200           }
1201         }
1202       }
1203       // sk: stale, but doesn't matter since we have a valid dest TCA.
1204     } else {
1205       // We need translator help; we're not at the callee yet, so
1206       // roll back. The prelude has done some work already, but it
1207       // should be safe to redo.
1208       TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
1209             toSmash, dest);
1210       sk = req->m_sourceInstr;
1211
1212       // EnterTCHelper pushes the return ip onto the stack when the
1213       // requestNum is REQ_BIND_CALL, but if start is NULL, it will
1214       // interpret in doFCall, so we clear out the requestNum in this
1215       // case to prevent enterTCHelper from pushing the return ip
1216       // onto the stack.
1217       info.requestNum = ~REQ_BIND_CALL;
1218     }
1219     start = dest;
1220   } break;
1221
1222   case REQ_BIND_SIDE_EXIT:
1223   case REQ_BIND_JMP:
1224   case REQ_BIND_JCC:
1225   case REQ_BIND_ADDR:
1226   {
1227     TCA toSmash = (TCA)args[0];
1228     auto ai = static_cast<SrcKey::AtomicInt>(args[1]);
1229     sk = SrcKey::fromAtomicInt(ai);
1230     if (requestNum == REQ_BIND_SIDE_EXIT) {
1231       SKTRACE(3, sk, "side exit taken!\n");
1232     }
1233     start = bindJmp(toSmash, sk, requestNum, smashed);
1234   } break;
1235
1236   case REQ_BIND_JMPCC_FIRST: {
1237     TCA toSmash = (TCA)args[0];
1238     Offset offTaken = (Offset)args[1];
1239     Offset offNotTaken = (Offset)args[2];
1240     ConditionCode cc = ConditionCode(args[3]);
1241     bool taken = int64_t(args[4]) & 1;
1242     start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
1243                            taken, cc, smashed);
1244     // SrcKey: we basically need to emulate the fail
1245     sk = SrcKey(liveFunc(), taken ? offTaken : offNotTaken, liveResumed());
1246   } break;
1247
1248   case REQ_BIND_JMPCC_SECOND: {
1249     TCA toSmash = (TCA)args[0];
1250     Offset off = (Offset)args[1];
1251     ConditionCode cc = ConditionCode(args[2]);
1252     start = bindJmpccSecond(toSmash, off, cc, smashed);
1253     sk = SrcKey(liveFunc(), off, liveResumed());
1254   } break;
1255
1256   case REQ_RETRANSLATE_OPT: {
1257     auto ai = static_cast<SrcKey::AtomicInt>(args[0]);
1258     TransID transId = (TransID)args[1];
1259     sk = SrcKey::fromAtomicInt(ai);
1260     start = retranslateOpt(transId, false);
1261     SKTRACE(2, sk, "retranslated-OPT: transId = %d  start: @%p\n", transId,
1262             start);
1263     break;
1264   }
1265
1266   case REQ_RETRANSLATE: {
1267     INC_TPC(retranslate);
1268     sk = SrcKey(liveFunc(), (Offset)args[0], liveResumed());
1269     start = retranslate(TranslArgs(sk, true));
1270     SKTRACE(2, sk, "retranslated @%p\n", start);
1271   } break;
1272
1273   case REQ_INTERPRET: {
1274     Offset off = args[0];
1275     vmpc() = liveUnit()->at(off);
1276     /*
1277      * We know the compilation unit has not changed; basic blocks do
1278      * not span files. I claim even exceptions do not violate this
1279      * axiom.
1280      */
1281     SKTRACE(5, SrcKey(liveFunc(), off, liveResumed()), "interp: enter\n");
1282     // dispatch until BB ends
1283     INC_TPC(interp_bb);
1284     g_context->dispatchBB();
1285     PC newPc = vmpc();
1286     if (!newPc) { vmfp() = 0; return false; }
1287     SrcKey newSk(liveFunc(), newPc, liveResumed());
1288     SKTRACE(5, newSk, "interp: exit\n");
1289     sk = newSk;
1290     start = getTranslation(TranslArgs(newSk, true));
1291   } break;
1292
1293   case REQ_POST_INTERP_RET: {
1294     // This is only responsible for the control-flow aspect of the Ret:
1295     // getting to the destination's translation, if any.
1296     ActRec* ar = (ActRec*)args[0];
1297     ActRec* caller = (ActRec*)args[1];
1298     assert(caller == vmfp());
1299     Unit* destUnit = caller->m_func->unit();
1300     // Set PC so logging code in getTranslation doesn't get confused.
1301     vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
1302     SrcKey dest(caller->func(), vmpc(), caller->resumed());
1303     sk = dest;
1304     start = getTranslation(TranslArgs(dest, true));
1305     TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
1306           ar->m_func->fullName()->data(),
1307           caller->m_func->fullName()->data());
1308   } break;
1309
1310   case REQ_RESUME: {
1311     if (UNLIKELY(vmpc() == 0)) {
1312       vmfp() = 0;
1313       return false;
1314     }
1315     SrcKey dest(liveFunc(), vmpc(), liveResumed());
1316     sk = dest;
1317     start = getTranslation(TranslArgs(dest, true));
1318   } break;
1319
1320   case REQ_STACK_OVERFLOW:
1321     if (((ActRec*)info.saved_rStashedAr)->m_sfp == (ActRec*)vmfp()) {
1322       /*
1323        * The normal case - we were called via FCall, or FCallArray.
1324        * We need to construct the pc of the fcall from the return
1325        * address (which will be after the fcall). Because fcall is
1326        * a variable length instruction, and because we sometimes
1327        * delete instructions from the instruction stream, we
1328        * need to use fpi regions to find the fcall.
1329        */
1330       const FPIEnt* fe = liveFunc()->findPrecedingFPI(
1331         liveUnit()->offsetOf(vmpc()));
1332       vmpc() = liveUnit()->at(fe->m_fcallOff);
1333       assert(isFCallStar(*reinterpret_cast<const Op*>(vmpc())));
1334       raise_error("Stack overflow");
1335       NOT_REACHED();
1336     } else {
1337       /*
1338        * We were called via re-entry
1339        * Leak the params and the actrec, and tell the unwinder
1340        * that there's nothing left to do in this "entry".
1341        */
1342       vmsp() = (Cell*)((ActRec*)info.saved_rStashedAr + 1);
1343       throw VMReenterStackOverflow();
1344     }
1345
1346   case REQ_EXIT:
1347     not_reached();
1348   }
1349
1350   assert(start != TCA(0xbee5face));
1351   if (smashed && info.stubAddr) {
1352     Treadmill::enqueue(FreeRequestStubTrigger(info.stubAddr));
1353   }
1354
1355   return true;
1356 }
1357
1358 /*
1359  * Support for the stub freelist.
1360  */
1361 TCA FreeStubList::maybePop() {
1362   StubNode* ret = m_list;
1363   if (ret) {
1364     TRACE(1, "alloc stub %p\n", ret);
1365     m_list = ret->m_next;
1366     ret->m_freed = ~kStubFree;
1367   }
1368   return (TCA)ret;
1369 }
1370
1371 void FreeStubList::push(TCA stub) {
1372   /*
1373    * A freed stub may be released by Treadmill more than once if multiple
1374    * threads execute the service request before it is freed. We detect
1375    * duplicates by marking freed stubs
1376    */
1377   StubNode* n = (StubNode *)stub;
1378   if (n->m_freed == kStubFree) {
1379     TRACE(1, "already freed stub %p\n", stub);
1380     return;
1381   }
1382   n->m_freed = kStubFree;
1383   n->m_next = m_list;
1384   TRACE(1, "free stub %p (-> %p)\n", stub, m_list);
1385   m_list = n;
1386 }
1387
1388 bool
1389 MCGenerator::freeRequestStub(TCA stub) {
1390   LeaseHolder writer(Translator::WriteLease());
1391   /*
1392    * If we can't acquire the write lock, the caller
1393    * (FreeRequestStubTrigger) retries
1394    */
1395   if (!writer) return false;
1396   assert(code.frozen().contains(stub));
1397   m_freeStubs.push(stub);
1398   return true;
1399 }
1400
1401 TCA MCGenerator::getFreeStub(CodeBlock& frozen, CodeGenFixups* fixups) {
1402   TCA ret = m_freeStubs.maybePop();
1403   if (ret) {
1404     Stats::inc(Stats::Astub_Reused);
1405     always_assert(m_freeStubs.m_list == nullptr ||
1406                   code.isValidCodeAddress(TCA(m_freeStubs.m_list)));
1407     TRACE(1, "recycle stub %p\n", ret);
1408     if (fixups) {
1409       fixups->m_reusedStubs.emplace_back(ret);
1410     }
1411   } else {
1412     ret = frozen.frontier();
1413     Stats::inc(Stats::Astub_New);
1414     TRACE(1, "alloc new stub %p\n", ret);
1415   }
1416   return ret;
1417 }
1418
1419 #define O(opcode, imm, pusph, pop, flags) \
1420 /**
1421  * The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
1422  * calls into the interpreter, and then return a pointer to the
1423  * current ExecutionContext.
1424  */  \
1425 ExecutionContext*                                                       \
1426 interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) {                 \
1427   interp_set_regs(ar, sp, pcOff);                                       \
1428   SKTRACE(5, SrcKey(liveFunc(), vmpc(), liveResumed()), "%40s %p %p\n", \
1429           "interpOne" #opcode " before (fp,sp)",                        \
1430           vmfp(), vmsp());                                              \
1431   assert(*reinterpret_cast<const Op*>(vmpc()) == Op::opcode);           \
1432   auto const ec = g_context.getNoCheck();                               \
1433   Stats::inc(Stats::Instr_InterpOne ## opcode);                         \
1434   if (Trace::moduleEnabled(Trace::interpOne, 1)) {                      \
1435     static const StringData* cat = makeStaticString("interpOne");       \
1436     static const StringData* name = makeStaticString(#opcode);          \
1437     Stats::incStatGrouped(cat, name, 1);                                \
1438   }                                                                     \
1439   INC_TPC(interp_one)                                                   \
1440   /* Correct for over-counting in TC-stats. */                          \
1441   Stats::inc(Stats::Instr_TC, -1);                                      \
1442   ec->op##opcode();                                                     \
1443   /*
1444    * Only set regstate back to dirty if an exception is not
1445    * propagating.  If an exception is throwing, regstate for this call
1446    * is actually still correct, and we don't have information in the
1447    * fixup map for interpOne calls anyway.
1448    */ \
1449   tl_regState = VMRegState::DIRTY;                                      \
1450   return ec;                                                            \
1451 }
1452
1453 OPCODES
1454 #undef O
1455
1456 void* interpOneEntryPoints[] = {
1457 #define O(opcode, imm, pusph, pop, flags) \
1458   (void*)(interpOne ## opcode),
1459 OPCODES
1460 #undef O
1461 };
1462
1463 TCA MCGenerator::getTranslatedCaller() const {
1464   DECLARE_FRAME_POINTER(fp);
1465   ActRec* framePtr = fp;  // can't directly mutate the register-mapped one
1466   for (; framePtr; framePtr = framePtr->m_sfp) {
1467     TCA rip = (TCA)framePtr->m_savedRip;
1468     if (isValidCodeAddress(rip)) {
1469       return rip;
1470     }
1471   }
1472   return nullptr;
1473 }
1474
1475 void
1476 MCGenerator::syncWork() {
1477   assert(tl_regState == VMRegState::DIRTY);
1478   m_fixupMap.fixup(g_context.getNoCheck());
1479   tl_regState = VMRegState::CLEAN;
1480   Stats::inc(Stats::TC_Sync);
1481 }
1482
1483 TCA
1484 MCGenerator::emitNativeTrampoline(TCA helperAddr) {
1485   auto& trampolines = code.trampolines();
1486   if (!trampolines.canEmit(kExpectedPerTrampolineSize)) {
1487     // not enough space to emit a trampoline, so just return the
1488     // helper address and emitCall will the emit the right sequence
1489     // to call it indirectly
1490     TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
1491     return helperAddr;
1492   }
1493
1494   uint32_t index = m_numNativeTrampolines++;
1495   TCA trampAddr = trampolines.frontier();
1496   if (Stats::enabled()) {
1497     emitIncStat(trampolines, &Stats::tl_helper_counters[0], index);
1498     auto name = getNativeFunctionName(helperAddr);
1499     const size_t limit = 50;
1500     if (name.size() > limit) {
1501       name[limit] = '\0';
1502     }
1503
1504     // The duped string lives until process death intentionally.
1505     Stats::helperNames[index].store(strdup(name.c_str()),
1506                                     std::memory_order_release);
1507   }
1508
1509   Asm a { trampolines };
1510   // Move the 64-bit immediate address to rax, then jmp. If clobbering
1511   // rax is a problem, we could do an rip-relative call with the address
1512   // stored in the data section with no extra registers; but it has
1513   // worse memory locality.
1514   a.    emitImmReg(helperAddr, rax);
1515   a.    jmp    (rax);
1516   a.    ud2(); // hint that the jump doesn't go here.
1517
1518   m_trampolineMap[helperAddr] = trampAddr;
1519   recordBCInstr(OpNativeTrampoline, trampolines, trampAddr, false);
1520   if (RuntimeOption::EvalJitUseVtuneAPI) {
1521     reportTrampolineToVtune(trampAddr, trampolines.frontier() - trampAddr);
1522   }
1523
1524   return trampAddr;
1525 }
1526
1527 TCA
1528 MCGenerator::getNativeTrampoline(TCA helperAddr) {
1529   if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
1530     return helperAddr;
1531   }
1532   auto const trampAddr = (TCA)folly::get_default(m_trampolineMap, helperAddr);
1533   if (trampAddr) {
1534     return trampAddr;
1535   }
1536   return emitNativeTrampoline(helperAddr);
1537 }
1538
1539 bool
1540 MCGenerator::reachedTranslationLimit(SrcKey sk,
1541                                      const SrcRec& srcRec) const {
1542   if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
1543     INC_TPC(max_trans);
1544     if (debug && Trace::moduleEnabled(Trace::mcg, 2)) {
1545       const auto& tns = srcRec.translations();
1546       TRACE(1, "Too many (%zd) translations: %s, BC offset %d\n",
1547             tns.size(), sk.unit()->filepath()->data(),
1548             sk.offset());
1549       SKTRACE(2, sk, "{\n");
1550       TCA topTrans = srcRec.getTopTranslation();
1551       for (size_t i = 0; i < tns.size(); ++i) {
1552         const TransRec* rec = m_tx.getTransRec(tns[i]);
1553         assert(rec);
1554         SKTRACE(2, sk, "%zd %p\n", i, tns[i]);
1555         if (tns[i] == topTrans) {
1556           SKTRACE(2, sk, "%zd: *Top*\n", i);
1557         }
1558         if (rec->kind == TransKind::Anchor) {
1559           SKTRACE(2, sk, "%zd: Anchor\n", i);
1560         } else {
1561           SKTRACE(2, sk, "%zd: guards {\n", i);
1562           for (unsigned j = 0; j < rec->guards.size(); ++j) {
1563             FTRACE(2, "{}\n", rec->guards[j]);
1564           }
1565           SKTRACE(2, sk, "%zd } guards\n", i);
1566         }
1567       }
1568       SKTRACE(2, sk, "} /* Too many translations */\n");
1569     }
1570     return true;
1571   }
1572
1573   return false;
1574 }
1575
1576 void
1577 MCGenerator::emitGuardChecks(SrcKey sk,
1578                              const ChangeMap& resolvedDeps,
1579                              const ChangeMap& dependencies,
1580                              const RefDeps& refDeps,
1581                              SrcRec& fail) {
1582   if (Trace::moduleEnabled(Trace::stats, 2)) {
1583     emitIncStat(code.main(), Stats::TraceletGuard_enter);
1584   }
1585
1586   m_tx.irTrans()->hhbcTrans().emitRB(RBTypeTraceletGuards, sk);
1587   bool checkOuterTypeOnly = m_tx.mode() != TransKind::Profile;
1588   for (auto const& dep : dependencies) {
1589     /*
1590      * In some cases, we may have relaxed a guard to be the same as
1591      * something we knew from static analysis (in resolvedDeps)---in
1592      * this case skip emitting it.
1593      *
1594      * Note: this could probably also check whether we knew something
1595      * /better/ from static analysis than what we are trying to guard
1596      * on.  This code is on its way out and the tracelet region
1597      * selector doesn't have these issues, though, so we haven't tried
1598      * that here.
1599      */
1600     auto const it = resolvedDeps.find(dep.first);
1601     if (it != end(resolvedDeps)) {
1602       if (it->second->rtt == dep.second->rtt) {
1603         continue;
1604       }
1605     }
1606     m_tx.irTrans()->checkType(dep.first, dep.second->rtt, checkOuterTypeOnly);
1607   }
1608
1609   checkRefs(sk, refDeps, fail);
1610
1611   if (Trace::moduleEnabled(Trace::stats, 2)) {
1612     emitIncStat(code.main(), Stats::TraceletGuard_execute);
1613   }
1614 }
1615
1616
1617 void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
1618   if (!debug) return;
1619
1620   SrcKey sk = t.m_sk;
1621   DEBUG_ONLY auto unit = sk.unit();
1622
1623   TRACE(3, "----------------------------------------------\n");
1624   TRACE(3, "  Translating from file %s:%d %s at %p:\n",
1625         unit->filepath()->data(),
1626         unit->getLineNumber(sk.offset()),
1627         sk.func()->name()->data(),
1628         postGuards);
1629   TRACE(3, "  preconds:\n");
1630   TRACE(3, "    types:\n");
1631   for (DepMap::const_iterator i = t.m_dependencies.begin();
1632        i != t.m_dependencies.end(); ++i) {
1633     TRACE(3, "      %-5s\n", i->second->pretty().c_str());
1634   }
1635   if (t.m_refDeps.size() != 0) {
1636     TRACE(3, "    refs:\n");
1637     for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
1638         i != t.m_refDeps.m_arMap.end();
1639         ++i) {
1640       TRACE(3, "      (ActRec %" PRId64 " : %-5s)\n", i->first,
1641         i->second.pretty().c_str());
1642     }
1643   }
1644   TRACE(3, "  postconds:\n");
1645   for (ChangeMap::const_iterator i = t.m_changes.begin();
1646        i != t.m_changes.end(); ++i) {
1647     TRACE(3, "    %-5s\n", i->second->pretty().c_str());
1648   }
1649   for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
1650     TRACE(3, "  %6d: %s\n", ni->source.offset(),
1651       instrToString((Op*)ni->pc()).c_str());
1652     if (ni->breaksTracelet) break;
1653   }
1654   TRACE(3, "----------------------------------------------\n");
1655   if (Trace::moduleEnabled(Trace::mcg, 5)) {
1656     // prettyStack() expects to use vmpc(). Leave it in the state we
1657     // found it since this code is debug-only, and we don't want behavior
1658     // to vary across the optimized/debug builds.
1659     PC oldPC = vmpc();
1660     vmpc() = unit->at(sk.offset());
1661     TRACE(3, g_context->prettyStack(std::string(" mcg ")));
1662     vmpc() = oldPC;
1663     TRACE(3, "----------------------------------------------\n");
1664   }
1665 }
1666
1667 void
1668 MCGenerator::recordSyncPoint(CodeAddress frontier, Offset pcOff, Offset spOff) {
1669   m_fixups.m_pendingFixups.push_back(
1670     PendingFixup(frontier, Fixup(pcOff, spOff)));
1671 }
1672
1673 void
1674 CodeGenFixups::process() {
1675   for (uint i = 0; i < m_pendingFixups.size(); i++) {
1676     TCA tca = m_pendingFixups[i].m_tca;
1677     assert(mcg->isValidCodeAddress(tca));
1678     mcg->fixupMap().recordFixup(tca, m_pendingFixups[i].m_fixup);
1679   }
1680   m_pendingFixups.clear();
1681
1682   for (auto const& pair : m_pendingCatchTraces) {
1683     mcg->catchTraceMap().insert(pair.first, pair.second);
1684   }
1685   m_pendingCatchTraces.clear();
1686
1687   for (auto const& elm : m_pendingJmpTransIDs) {
1688     mcg->getJmpToTransIDMap().insert(elm);
1689   }
1690   m_pendingJmpTransIDs.clear();
1691   /*
1692    * Currently these are only used by the relocator,
1693    * so there's nothing left to do here.
1694    *
1695    * Once we try to relocate live code, we'll need to
1696    * store compact forms of these for later.
1697    */
1698   m_reusedStubs.clear();
1699   m_addressImmediates.clear();
1700   m_codePointers.clear();
1701   m_bcMap.clear();
1702   m_alignFixups.clear();
1703 }
1704
1705 void CodeGenFixups::clear() {
1706   m_pendingFixups.clear();
1707   m_pendingCatchTraces.clear();
1708   m_pendingJmpTransIDs.clear();
1709   m_reusedStubs.clear();
1710   m_addressImmediates.clear();
1711   m_codePointers.clear();
1712   m_bcMap.clear();
1713   m_alignFixups.clear();
1714 }
1715
1716 bool CodeGenFixups::empty() const {
1717   return
1718     m_pendingFixups.empty() &&
1719     m_pendingCatchTraces.empty() &&
1720     m_pendingJmpTransIDs.empty() &&
1721     m_reusedStubs.empty() &&
1722     m_addressImmediates.empty() &&
1723     m_codePointers.empty() &&
1724     m_bcMap.empty() &&
1725     m_alignFixups.empty();
1726 }
1727
1728 void
1729 MCGenerator::translateWork(const TranslArgs& args) {
1730   Timer _t(Timer::translate);
1731   auto sk = args.m_sk;
1732   std::unique_ptr<Tracelet> tlet;
1733
1734   SKTRACE(1, sk, "translateWork\n");
1735   assert(m_tx.getSrcDB().find(sk));
1736
1737   TCA        start             = code.main().frontier();
1738   TCA        coldStart         = code.cold().frontier();
1739   TCA        realColdStart     = code.realCold().frontier();
1740   TCA DEBUG_ONLY frozenStart   = code.frozen().frontier();
1741   TCA        realFrozenStart   = code.realFrozen().frontier();
1742   SrcRec&    srcRec            = *m_tx.getSrcRec(sk);
1743   TransKind  transKindToRecord = TransKind::Interp;
1744   UndoMarker undoA(code.main());
1745   UndoMarker undoAcold(code.cold());
1746   UndoMarker undoAfrozen(code.frozen());
1747   UndoMarker undoGlobalData(code.data());
1748
1749   auto resetState = [&] {
1750     undoA.undo();
1751     undoAcold.undo();
1752     undoAfrozen.undo();
1753     undoGlobalData.undo();
1754     m_fixups.clear();
1755     srcRec.clearInProgressTailJumps();
1756   };
1757
1758   auto assertCleanState = [&] {
1759     assert(code.main().frontier() == start);
1760     assert(code.frozen().frontier() == frozenStart);
1761     assert(m_fixups.empty());
1762     assert(srcRec.inProgressTailJumps().empty());
1763   };
1764
1765   PostConditions pconds;
1766   RegionDescPtr region;
1767   if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) {
1768     // Attempt to create a region at this SrcKey
1769     if (m_tx.mode() == TransKind::Optimize) {
1770       assert(RuntimeOption::EvalJitPGO);
1771       region = args.m_region;
1772       if (region) {
1773         assert(region->blocks.size() > 0);
1774       } else {
1775         TransID transId = args.m_transId;
1776         assert(transId != kInvalidTransID);
1777         region = selectHotRegion(transId, this);
1778         assert(region);
1779         if (region && region->blocks.size() == 0) region = nullptr;
1780       }
1781     } else {
1782       assert(m_tx.mode() == TransKind::Profile ||
1783              m_tx.mode() == TransKind::Live);
1784       RegionContext rContext { sk.func(), sk.offset(), liveSpOff(),
1785                                sk.resumed() };
1786       FTRACE(2, "populating live context for region\n");
1787       populateLiveContext(rContext);
1788       region = selectRegion(rContext, [&]{ return m_tx.analyze(sk); },
1789                             m_tx.mode());
1790
1791       if (RuntimeOption::EvalJitCompareRegions &&
1792           RuntimeOption::EvalJitRegionSelector == "tracelet") {
1793         // Re-analyze with guard relaxation on
1794         OPTION_GUARD(EvalHHBCRelaxGuards, 1);
1795         OPTION_GUARD(EvalHHIRRelaxGuards, 0);
1796         auto legacyRegion = selectTraceletLegacy(rContext.spOffset,
1797                                                  *m_tx.analyze(sk));
1798         if (!region) {
1799           Trace::ftraceRelease("{:-^60}\nCouldn't select tracelet region "
1800                                "for:\n{}", "", show(*legacyRegion));
1801         } else {
1802           diffRegions(*region, *legacyRegion);
1803         }
1804       }
1805     }
1806
1807     Translator::TranslateResult result = Translator::Retry;
1808     Translator::RegionBlacklist regionInterps;
1809     Offset const initSpOffset = region ? region->blocks[0]->initialSpOffset()
1810                                        : liveSpOff();
1811     bool bcControlFlow = RuntimeOption::EvalHHIRBytecodeControlFlow;
1812
1813     auto const transContext = TransContext {
1814       RuntimeOption::EvalJitPGO
1815         ? m_tx.profData()->curTransID()
1816         : kInvalidTransID,
1817       sk.offset(),
1818       initSpOffset,
1819       sk.resumed(),
1820       sk.func()
1821     };
1822
1823     while (result == Translator::Retry) {
1824       m_tx.traceStart(transContext);
1825
1826       // Try translating a region if we have one, then fall back to using the
1827       // Tracelet.
1828       if (region) {
1829         try {
1830           assertCleanState();
1831           result = m_tx.translateRegion(*region, bcControlFlow, regionInterps);
1832
1833           // If we're profiling, grab the postconditions so we can
1834           // use them in region selection whenever we decide to retranslate.
1835           if (m_tx.mode() == TransKind::Profile &&
1836               result == Translator::Success &&
1837               RuntimeOption::EvalJitPGOUsePostConditions) {
1838             pconds = m_tx.irTrans()->hhbcTrans().unit().postConditions();
1839           }
1840
1841           FTRACE(2, "translateRegion finished with result {}\n",
1842                  Translator::translateResultName(result));
1843         } catch (ControlFlowFailedExc& cfe) {
1844           FTRACE(2, "translateRegion with control flow failed: '{}'\n",
1845                  cfe.what());
1846           always_assert(bcControlFlow &&
1847             "control flow translation failed, but control flow not enabled");
1848           bcControlFlow = false;
1849           result = Translator::Retry;
1850         } catch (const std::exception& e) {
1851           FTRACE(1, "translateRegion failed with '{}'\n", e.what());
1852           result = Translator::Failure;
1853         }
1854         if (result == Translator::Failure) {
1855           m_tx.traceFree();
1856           m_tx.traceStart(transContext);
1857           resetState();
1858         }
1859       }
1860       if (!region || result == Translator::Failure) {
1861         // If the region translator failed for an Optimize translation, it's OK
1862         // to do a Live translation for the function entry.
1863         if (m_tx.mode() == TransKind::Optimize) {
1864           if (sk.getFuncId() == liveFunc()->getFuncId() &&
1865               liveUnit()->contains(vmpc()) &&
1866               sk.offset() == liveUnit()->offsetOf(vmpc()) &&
1867               sk.resumed() == liveResumed()) {
1868             m_tx.setMode(TransKind::Live);
1869           } else {
1870             m_tx.setMode(TransKind::Interp);
1871             m_tx.traceFree();
1872             break;
1873           }
1874         }
1875         FTRACE(1, "trying translateTracelet\n");
1876         assertCleanState();
1877         if (!tlet) tlet = m_tx.analyze(sk);
1878         result = translateTracelet(*tlet);
1879
1880         // If we're profiling, grab the postconditions so we can
1881         // use them in region selection whenever we decide to
1882         // retranslate.
1883         if (m_tx.mode() == TransKind::Profile &&
1884             result == Translator::Success &&
1885             RuntimeOption::EvalJitPGOUsePostConditions) {
1886           pconds = m_tx.irTrans()->hhbcTrans().unit().postConditions();
1887         }
1888       }
1889
1890       if (result != Translator::Success) {
1891         // Translation failed. Free resources for this trace, rollback the
1892         // translation cache frontiers, and discard any pending fixups.
1893         resetState();
1894       }
1895       m_tx.traceFree();
1896     }
1897
1898     if (result == Translator::Success) {
1899       assert(m_tx.mode() == TransKind::Live    ||
1900              m_tx.mode() == TransKind::Profile ||
1901              m_tx.mode() == TransKind::Optimize);
1902       transKindToRecord = m_tx.mode();
1903     }
1904   }
1905
1906   if (args.m_dryRun) {
1907     resetState();
1908     return;
1909   }
1910
1911   if (transKindToRecord == TransKind::Interp) {
1912     assertCleanState();
1913     FTRACE(1, "emitting dispatchBB interp request for failed translation\n");
1914     mcg->backEnd().emitInterpReq(code.main(), code.cold(), sk);
1915     // Fall through.
1916   }
1917
1918   recordGdbTranslation(sk, sk.func(), code.main(), start,
1919                        false, false);
1920   recordGdbTranslation(sk, sk.func(), code.cold(), coldStart,
1921                        false, false);
1922   if (RuntimeOption::EvalJitPGO) {
1923     if (transKindToRecord == TransKind::Profile) {
1924       if (!region) {
1925         always_assert(tlet);
1926         region = selectTraceletLegacy(liveSpOff(), *tlet);
1927       }
1928       m_tx.profData()->addTransProfile(region, pconds);
1929     } else {
1930       m_tx.profData()->addTransNonProf(transKindToRecord, sk);
1931     }
1932   }
1933
1934   TransRec tr(sk, transKindToRecord,
1935               start,           code.main().frontier()       - start,
1936               realColdStart,   code.realCold().frontier()   - realColdStart,
1937               realFrozenStart, code.realFrozen().frontier() - realFrozenStart,
1938               region, tlet.get(), m_fixups.m_bcMap);
1939   m_tx.addTranslation(tr);
1940   if (RuntimeOption::EvalJitUseVtuneAPI) {
1941     reportTraceletToVtune(sk.unit(), sk.func(), tr);
1942   }
1943
1944   m_fixups.process();
1945
1946   // SrcRec::newTranslation() makes this code reachable. Do this last;
1947   // otherwise there's some chance of hitting in the reader threads whose
1948   // metadata is not yet visible.
1949   TRACE(1, "newTranslation: %p  sk: (func %d, bcOff %d)\n",
1950         start, sk.getFuncId(), sk.offset());
1951   srcRec.newTranslation(start);
1952   TRACE(1, "mcg: %zd-byte tracelet\n", code.main().frontier() - start);
1953   if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
1954     Trace::traceRelease("%s", getUsage().c_str());
1955   }
1956 }
1957
1958 Translator::TranslateResult
1959 MCGenerator::translateTracelet(Tracelet& t) {
1960   if (RuntimeOption::EvalJitRegionSelector != "") {
1961     // In order to properly simulate a post-Tracelet world, refuse to translate
1962     // Tracelets when a region selector is active.
1963     return Translator::Failure;
1964   }
1965
1966   Timer _t(Timer::translateTracelet);
1967
1968   FTRACE(2, "attempting to translate tracelet:\n{}\n", t.toString());
1969   const SrcKey &sk = t.m_sk;
1970   SrcRec& srcRec = *m_tx.getSrcRec(sk);
1971   HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
1972   bool profilingFunc = false;
1973
1974   assert(srcRec.inProgressTailJumps().empty());
1975   try {
1976     emitResolvedDeps(t.m_resolvedDeps);
1977     {
1978       emitGuardChecks(sk, t.m_resolvedDeps,
1979         t.m_dependencies, t.m_refDeps, srcRec);
1980       ht.endGuards();
1981
1982       dumpTranslationInfo(t, code.main().frontier());
1983
1984       // after guards, add a counter for the translation if requested
1985       if (RuntimeOption::EvalJitTransCounters) {
1986         ht.emitIncTransCounter();
1987       }
1988
1989       if (m_tx.mode() == TransKind::Profile) {
1990         profilingFunc = true;
1991         if (t.func()->isEntry(sk.offset())) {
1992           ht.emitCheckCold(m_tx.profData()->curTransID());
1993         } else {
1994           ht.emitIncProfCounter(m_tx.profData()->curTransID());
1995         }
1996       }
1997
1998       ht.emitRB(RBTypeTraceletBody, t.m_sk);
1999       emitIncStat(code.main(), Stats::Instr_TC, t.m_numOpcodes);
2000     }
2001
2002     // Profiling on function entry.
2003     if (t.m_sk.offset() == t.func()->base()) {
2004       ht.profileFunctionEntry("Normal");
2005     }
2006
2007     /*
2008      * Profiling on the shapes of tracelets that are whole functions.
2009      * (These are the things we might consider trying to support
2010      * inlining.)
2011      */
2012     [&]{
2013       static const bool enabled = Stats::enabledAny() &&
2014                                   getenv("HHVM_STATS_FUNCSHAPE");
2015       if (!enabled) return;
2016       if (t.m_sk.offset() != t.func()->base()) return;
2017       if (auto last = t.m_instrStream.last) {
2018         if (last->op() != OpRetC && last->op() != OpRetV &&
2019             last->op() != OpCreateCont) {
2020           return;
2021         }
2022       }
2023       ht.profileSmallFunctionShape(traceletShape(t));
2024     }();
2025
2026     Timer irGenTimer(Timer::translateTracelet_irGeneration);
2027     // Translate each instruction in the tracelet
2028     for (auto* ni = t.m_instrStream.first; ni && !ht.hasExit();
2029          ni = ni->next) {
2030       ht.setBcOff(ni->source.offset(),
2031                   ni->breaksTracelet && !ht.isInlining());
2032       if (isAlwaysNop(ni->op())) ni->noOp = true;
2033
2034       try {
2035         SKTRACE(1, ni->source, "HHIR: translateInstr\n");
2036         assert(!(m_tx.mode() ==
2037                TransKind::Profile && ni->outputPredicted && ni->next));
2038         m_tx.irTrans()->translateInstr(*ni);
2039       } catch (FailedIRGen& fcg) {
2040         always_assert(!ni->interp);
2041         ni->interp = true;
2042         FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2043                "after failing to generate ir: {} \n\n",
2044                m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2045         return Translator::Retry;
2046       }
2047       assert(ni->source.offset() >= t.func()->base());
2048       // We sometimes leave the tail of a truncated tracelet in place to aid
2049       // analysis, but breaksTracelet is authoritative.
2050       if (ni->breaksTracelet || m_tx.irTrans()->hhbcTrans().hasExit()) break;
2051     }
2052     m_tx.traceEnd();
2053     irGenTimer.end();
2054
2055     try {
2056       traceCodeGen();
2057       TRACE(1, "HHIR: SUCCEEDED to generate code for Translation %d\n\n\n",
2058             m_tx.getCurrentTransID());
2059       if (profilingFunc) m_tx.profData()->setProfiling(t.func()->getFuncId());
2060       return Translator::Success;
2061     } catch (FailedCodeGen& fcg) {
2062       // Code-gen failed. Search for the bytecode instruction that caused the
2063       // problem, flag it to be interpreted, and retranslate the tracelet.
2064       SrcKey sk{fcg.vmFunc, fcg.bcOff, fcg.resumed};
2065
2066       for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
2067         if (ni->source == sk) {
2068           always_assert_log(
2069             !ni->interp,
2070             [&] {
2071               std::ostringstream oss;
2072               oss << folly::format("code generation failed with {}\n",
2073                                    fcg.what());
2074               print(oss, m_tx.irTrans()->hhbcTrans().unit());
2075               return oss.str();
2076             });
2077
2078           ni->interp = true;
2079           FTRACE(1, "HHIR: RETRY Translation {}: will interpOne BC instr {} "
2080                  "after failing to code-gen \n\n",
2081                  m_tx.getCurrentTransID(), ni->toString(), fcg.what());
2082           return Translator::Retry;
2083         }
2084       }
2085       throw fcg;
2086     } catch (const DataBlockFull& dbFull) {
2087       if (dbFull.name == "hot") {
2088         always_assert_flog(tx().useAHot(), "data block = {}\nmessage: {}\n",
2089                            dbFull.name, dbFull.what());
2090         tx().setUseAHot(false);
2091         // We can't return Retry here because the code block selection
2092         // will still say hot.
2093         return Translator::Failure;
2094       }
2095       throw dbFull;
2096     }
2097   } catch (FailedCodeGen& fcg) {
2098     TRACE(1, "HHIR: FAILED to generate code for Translation %d "
2099           "@ %s:%d (%s)\n", m_tx.getCurrentTransID(),
2100           fcg.file, fcg.line, fcg.func);
2101     // HHIR:TODO Remove extra TRACE and adjust tools
2102     TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2103           fcg.file, fcg.line, fcg.func);
2104   } catch (FailedIRGen& x) {
2105     TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
2106           x.file, x.line, x.func);
2107   } catch (const FailedAssertion& fa) {
2108     fa.print();
2109     StackTraceNoHeap::AddExtraLogging(
2110       "Assertion failure",
2111       folly::format("{}\n\nActive Unit:\n{}\n",
2112                     fa.summary, ht.unit().toString()).str());
2113     abort();
2114   } catch (const FailedTraceGen& e) {
2115     FTRACE(1, "HHIR: FAILED to translate whole unit: {}\n",
2116            e.what());
2117   } catch (const DataBlockFull& dbFull) {
2118     FTRACE(1, "HHIR: FAILED due to full data block: {}\n", dbFull.name);
2119     if (dbFull.name == "hot") {
2120       assert(tx().useAHot());
2121       tx().setUseAHot(false);
2122     } else {
2123       always_assert_flog(0, "data block = {}\nmessage: {}\n",
2124                          dbFull.name, dbFull.what());
2125     }
2126   }
2127
2128   return Translator::Failure;
2129 }
2130
2131 void MCGenerator::traceCodeGen() {
2132   HhbcTranslator& ht = m_tx.irTrans()->hhbcTrans();
2133   auto& unit = ht.unit();
2134
2135   auto finishPass = [&](const char* msg, int level) {
2136     printUnit(level, unit, msg, nullptr, nullptr, ht.irBuilder().guards());
2137     assert(checkCfg(unit));
2138   };
2139
2140   finishPass(" after initial translation ", kIRLevel);
2141
2142   optimize(unit, ht.irBuilder(), m_tx.mode());
2143   finishPass(" after optimizing ", kOptLevel);
2144   if (m_tx.mode() == TransKind::Profile &&
2145       RuntimeOption::EvalJitPGOUsePostConditions) {
2146     unit.collectPostConditions();
2147   }
2148
2149   auto regs = allocateRegs(unit);
2150   assert(checkRegisters(unit, regs)); // calls checkCfg internally.
2151
2152   recordBCInstr(OpTraceletGuard, code.main(), code.main().frontier(), false);
2153   genCode(unit, this, regs);
2154
2155   m_numHHIRTrans++;
2156 }
2157
2158 MCGenerator::MCGenerator()
2159   : m_backEnd(newBackEnd())
2160   , m_numNativeTrampolines(0)
2161   , m_numHHIRTrans(0)
2162   , m_catchTraceMap(128)
2163 {
2164   TRACE(1, "MCGenerator@%p startup\n", this);
2165   mcg = this;
2166
2167   m_unwindRegistrar = register_unwind_region(code.base(), code.codeSize());
2168
2169   static bool profileUp = false;
2170   if (!profileUp) {
2171     profileInit();
2172     profileUp = true;
2173   }
2174
2175   if (Trace::moduleEnabledRelease(Trace::printir) &&
2176       !RuntimeOption::EvalJit) {
2177     Trace::traceRelease("TRACE=printir is set but the jit isn't on. "
2178                         "Did you mean to run with -vEval.Jit=1?\n");
2179   }
2180 }
2181
2182 void MCGenerator::initUniqueStubs() {
2183   // Put the following stubs into ahot, rather than a.
2184   CodeCache::Selector cbSel(CodeCache::Selector::Args(code).
2185                             hot(m_tx.useAHot()));
2186   m_tx.uniqueStubs = mcg->backEnd().emitUniqueStubs();
2187 }
2188
2189 void MCGenerator::registerCatchBlock(CTCA ip, TCA block) {
2190   FTRACE(1, "registerCatchBlock: afterCall: {} block: {}\n", ip, block);
2191   m_fixups.m_pendingCatchTraces.emplace_back(ip, block);
2192 }
2193
2194 folly::Optional<TCA> MCGenerator::getCatchTrace(CTCA ip) const {
2195   TCA* found = m_catchTraceMap.find(ip);
2196   if (found) return *found;
2197   return folly::none;
2198 }
2199
2200 void MCGenerator::codeEmittedThisRequest(size_t& requestEntry,
2201                                          size_t& now) const {
2202   requestEntry = s_initialTCSize;
2203   now = code.totalUsed();
2204 }
2205
2206 void MCGenerator::requestInit() {
2207   tl_regState = VMRegState::CLEAN;
2208   Timer::RequestInit();
2209   PendQ::drain();
2210   Treadmill::startRequest();
2211   memset(&s_perfCounters, 0, sizeof(s_perfCounters));
2212   Stats::init();
2213   s_initialTCSize = code.totalUsed();
2214 }
2215
2216 void MCGenerator::requestExit() {
2217   if (Translator::WriteLease().amOwner()) {
2218     Translator::WriteLease().drop();
2219   }
2220   TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
2221             " kept, %15" PRId64 " grabbed\n",
2222             Process::GetThreadIdForTrace(), Translator::WriteLease().m_hintKept,
2223             Translator::WriteLease().m_hintGrabbed);
2224   PendQ::drain();
2225   Treadmill::finishRequest();
2226   Stats::dump();
2227   Stats::clear();
2228   Timer::RequestExit();
2229
2230   if (Trace::moduleEnabledRelease(Trace::mcgstats, 1)) {
2231     Trace::traceRelease("MCGenerator perf counters for %s:\n",
2232                         g_context->getRequestUrl(50).c_str());
2233     for (int i = 0; i < tpc_num_counters; i++) {
2234       Trace::traceRelease("%-20s %10" PRId64 "\n",
2235                           kPerfCounterNames[i], s_perfCounters[i]);
2236     }
2237     Trace::traceRelease("\n");
2238   }
2239 }
2240
2241 bool
2242 MCGenerator::isPseudoEvent(const char* event) {
2243   for (auto name : kPerfCounterNames) {
2244     if (!strcmp(event, name)) {
2245       return true;
2246     }
2247   }
2248   return false;
2249 }
2250
2251 void
2252 MCGenerator::getPerfCounters(Array& ret) {
2253   for (int i = 0; i < tpc_num_counters; i++) {
2254     // Until Perflab can automatically scale the values we give it to
2255     // an appropriate range, we have to fudge these numbers so they
2256     // look more like reasonable hardware counter values.
2257     ret.set(String::FromCStr(kPerfCounterNames[i]),
2258             s_perfCounters[i] * 1000);
2259   }
2260
2261   for (auto const& pair : Timer::Counters()) {
2262     if (pair.second.total == 0 && pair.second.count == 0) continue;
2263
2264     ret.set(String("jit_time_") + pair.first, pair.second.total);
2265   }
2266 }
2267
2268 MCGenerator::~MCGenerator() {
2269 }
2270
2271 static Debug::TCRange rangeFrom(const CodeBlock& cb, const TCA addr,
2272                                 bool isAcold) {
2273   assert(cb.contains(addr));
2274   return Debug::TCRange(addr, cb.frontier(), isAcold);
2275 }
2276
2277 void MCGenerator::recordBCInstr(uint32_t op,
2278                                 const CodeBlock& cb,
2279                                 const TCA addr,
2280                                 bool cold) {
2281   if (addr != cb.frontier()) {
2282     m_debugInfo.recordBCInstr(Debug::TCRange(addr, cb.frontier(),
2283                                              cold), op);
2284   }
2285 }
2286
2287 void MCGenerator::recordGdbTranslation(SrcKey sk,
2288                                        const Func* srcFunc,
2289                                        const CodeBlock& cb,
2290                                        const TCA start,
2291                                        bool exit,
2292                                        bool inPrologue) {
2293   if (start != cb.frontier()) {
2294     assert(Translator::WriteLease().amOwner());
2295     if (!RuntimeOption::EvalJitNoGdb) {
2296       m_debugInfo.recordTracelet(rangeFrom(cb, start, &cb == &code.cold()),
2297                                  srcFunc,
2298                                  reinterpret_cast<const Op*>(
2299                                    srcFunc->unit() ?
2300                                      srcFunc->unit()->at(sk.offset()) : nullptr
2301                                  ),
2302                                  exit, inPrologue);
2303     }
2304     if (RuntimeOption::EvalPerfPidMap) {
2305       m_debugInfo.recordPerfMap(rangeFrom(cb, start, &cb == &code.cold()),
2306                                 srcFunc, exit, inPrologue);
2307     }
2308   }
2309 }
2310
2311 void MCGenerator::recordGdbStub(const CodeBlock& cb,
2312                                 const TCA start, const char* name) {
2313   if (!RuntimeOption::EvalJitNoGdb) {
2314     m_debugInfo.recordStub(rangeFrom(cb, start, &cb == &code.cold()),
2315                            name);
2316   }
2317 }
2318
2319 std::string MCGenerator::getUsage() {
2320   std::string usage;
2321   size_t totalBlockSize = 0;
2322   size_t totalBlockCapacity = 0;
2323
2324   auto addRow = [&](const std::string& name, size_t used, size_t capacity) {
2325     totalBlockSize += used;
2326     totalBlockCapacity += capacity;
2327     auto percent = capacity ? 100 * used / capacity : 0;
2328     usage += folly::format("mcg: {:9} bytes ({}%) in {}\n",
2329                            used, percent, name).str();
2330   };
2331   code.forEachBlock([&](const char* name, const CodeBlock& a) {
2332     addRow(std::string("code.") + name, a.used(), a.capacity());
2333   });
2334   // Report code.stubs usage = code.cold + code.frozen usage, so
2335   // ODS doesn't break.
2336   auto const stubsUsed = code.realCold().used() + code.realFrozen().used();
2337   auto const stubsCapacity = code.realCold().capacity() +
2338     code.realFrozen().capacity();
2339   addRow(std::string("code.stubs"), stubsUsed, stubsCapacity);
2340
2341   addRow("data", code.data().used(), code.data().capacity());
2342   addRow("RDS", RDS::usedBytes(),
2343          RuntimeOption::EvalJitTargetCacheSize * 3 / 4);
2344   addRow("RDSLocal", RDS::usedLocalBytes(),
2345          RuntimeOption::EvalJitTargetCacheSize * 3 / 4);
2346   addRow("persistentRDS", RDS::usedPersistentBytes(),
2347          RuntimeOption::EvalJitTargetCacheSize / 4);
2348   addRow("total",
2349          totalBlockSize + code.data().used() +
2350          RDS::usedBytes() + RDS::usedPersistentBytes(),
2351          totalBlockCapacity + code.data().capacity() +
2352          RuntimeOption::EvalJitTargetCacheSize);
2353
2354   return usage;
2355 }
2356
2357 std::string MCGenerator::getTCAddrs() {
2358   std::string addrs;
2359   code.forEachBlock([&](const char* name, const CodeBlock& a) {
2360       addrs += folly::format("{}: {}\n", name, a.base()).str();
2361   });
2362   return addrs;
2363 }
2364
2365 bool MCGenerator::addDbgGuards(const Unit* unit) {
2366   // TODO refactor
2367   // It grabs the write lease and iterating through whole SrcDB...
2368   bool locked = Translator::WriteLease().acquire(true);
2369   if (!locked) {
2370     return false;
2371   }
2372   assert(mcg->cgFixups().empty());
2373   struct timespec tsBegin, tsEnd;
2374   HPHP::Timer::GetMonotonicTime(tsBegin);
2375   // Doc says even find _could_ invalidate iterator, in pactice it should
2376   // be very rare, so go with it now.
2377   for (SrcDB::const_iterator it = m_tx.getSrcDB().begin();
2378        it != m_tx.getSrcDB().end(); ++it) {
2379     SrcKey const sk = SrcKey::fromAtomicInt(it->first);
2380     // We may have a SrcKey to a deleted function. NB: this may miss a
2381     // race with deleting a Func. See task #2826313.
2382     if (!Func::isFuncIdValid(sk.getFuncId())) continue;
2383     SrcRec* sr = it->second;
2384     if (sr->unitMd5() == unit->md5() &&
2385         !sr->hasDebuggerGuard() &&
2386         m_tx.isSrcKeyInBL(sk)) {
2387       addDbgGuardImpl(sk, sr);
2388     }
2389   }
2390   mcg->cgFixups().process();
2391   Translator::WriteLease().drop();
2392   HPHP::Timer::GetMonotonicTime(tsEnd);
2393   int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
2394   if (Trace::moduleEnabledRelease(Trace::mcg, 5)) {
2395     Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
2396   }
2397   return true;
2398 }
2399
2400 bool MCGenerator::addDbgGuard(const Func* func, Offset offset, bool resumed) {
2401   SrcKey sk(func, offset, resumed);
2402   {
2403     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2404       if (sr->hasDebuggerGuard()) {
2405         return true;
2406       }
2407     } else {
2408       // no translation yet
2409       return true;
2410     }
2411   }
2412   if (debug) {
2413     if (!m_tx.isSrcKeyInBL(sk)) {
2414       TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
2415       return false;
2416     }
2417   }
2418   bool locked = Translator::WriteLease().acquire(true);
2419   if (!locked) {
2420     return false;
2421   }
2422   assert(mcg->cgFixups().empty());
2423   {
2424     if (SrcRec* sr = m_tx.getSrcDB().find(sk)) {
2425       addDbgGuardImpl(sk, sr);
2426     }
2427   }
2428   mcg->cgFixups().process();
2429   Translator::WriteLease().drop();
2430   return true;
2431 }
2432
2433 bool MCGenerator::dumpTCCode(const char* filename) {
2434 #define OPEN_FILE(F, SUFFIX)                                    \
2435   std::string F ## name = std::string(filename).append(SUFFIX); \
2436   FILE* F = fopen(F ## name .c_str(),"wb");                     \
2437   if (F == nullptr) return false;                               \
2438   SCOPE_EXIT{ fclose(F); };
2439
2440   OPEN_FILE(aFile,          "_a");
2441   OPEN_FILE(aprofFile,      "_aprof");
2442   OPEN_FILE(acoldFile,      "_acold");
2443   OPEN_FILE(afrozenFile,    "_afrozen");
2444   OPEN_FILE(helperAddrFile, "_helpers_addrs.txt");
2445
2446 #undef OPEN_FILE
2447
2448   // dump starting from the trampolines; this assumes CodeCache places
2449   // trampolines before the translation cache
2450   size_t count = code.main().frontier() - code.trampolines().base();
2451   bool result = (fwrite(code.trampolines().base(), 1, count, aFile) == count);
2452   if (result) {
2453     count = code.prof().used();
2454     result = (fwrite(code.prof().base(), 1, count, aprofFile) == count);
2455   }
2456   if (result) {
2457     count = code.cold().used();
2458     result = (fwrite(code.cold().base(), 1, count, acoldFile) == count);
2459   }
2460   if (result) {
2461     count = code.frozen().used();
2462     result = (fwrite(code.frozen().base(), 1, count, afrozenFile) == count);
2463   }
2464   if (result) {
2465     for (auto const& pair : m_trampolineMap) {
2466       void* helperAddr = pair.first;
2467       void* trampAddr = pair.second;
2468       auto functionName = getNativeFunctionName(helperAddr);
2469       fprintf(helperAddrFile,"%10p %10p %s\n",
2470               trampAddr, helperAddr,
2471               functionName.c_str());
2472     }
2473   }
2474   return result;
2475 }
2476
2477 // Returns true on success
2478 bool MCGenerator::dumpTC(bool ignoreLease) {
2479   if (!ignoreLease && !Translator::WriteLease().acquire(true)) return false;
2480   bool success = dumpTCData();
2481   if (success) {
2482     success = dumpTCCode("/tmp/tc_dump");
2483   }
2484   if (!ignoreLease) Translator::WriteLease().drop();
2485   return success;
2486 }
2487
2488 // Returns true on success
2489 bool tc_dump(void) {
2490   return mcg && mcg->dumpTC();
2491 }
2492
2493 // Returns true on success
2494 bool MCGenerator::dumpTCData() {
2495   gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
2496   if (!tcDataFile) return false;
2497
2498   if (!gzprintf(tcDataFile,
2499                 "repo_schema      = %s\n"
2500                 "a.base           = %p\n"
2501                 "a.frontier       = %p\n"
2502                 "aprof.base       = %p\n"
2503                 "aprof.frontier   = %p\n"
2504                 "acold.base       = %p\n"
2505                 "acold.frontier   = %p\n"
2506                 "afrozen.base     = %p\n"
2507                 "afrozen.frontier = %p\n\n",
2508                 kRepoSchemaId,
2509                 code.trampolines().base(), code.main().frontier(),
2510                 code.prof().base(), code.prof().frontier(),
2511                 code.cold().base(), code.cold().frontier(),
2512                 code.frozen().base(), code.frozen().frontier())) {
2513     return false;
2514   }
2515
2516   if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
2517                 m_tx.getCurrentTransID())) {
2518     return false;
2519   }
2520
2521   for (TransID t = 0; t < m_tx.getCurrentTransID(); t++) {
2522     if (gzputs(tcDataFile,
2523                m_tx.getTransRec(t)->print(m_tx.getTransCounter(t)).c_str()) ==
2524         -1) {
2525       return false;
2526     }
2527   }
2528
2529   gzclose(tcDataFile);
2530   return true;
2531 }
2532
2533 void MCGenerator::invalidateSrcKey(SrcKey sk) {
2534   assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO);
2535   assert(Translator::WriteLease().amOwner());
2536   /*
2537    * Reroute existing translations for SrcKey to an as-yet indeterminate
2538    * new one.
2539    */
2540   SrcRec* sr = m_tx.getSrcDB().find(sk);
2541   assert(sr);
2542   /*
2543    * Since previous translations aren't reachable from here, we know we
2544    * just created some garbage in the TC. We currently have no mechanism
2545    * to reclaim this.
2546    */
2547   sr->replaceOldTranslations();
2548 }
2549
2550 void MCGenerator::setJmpTransID(TCA jmp) {
2551   if (m_tx.mode() != TransKind::Profile) return;
2552
2553   TransID transId = m_tx.profData()->curTransID();
2554   FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId);
2555   m_fixups.m_pendingJmpTransIDs.emplace_back(jmp, transId);
2556 }
2557
2558 void RelocationInfo::recordAddress(TCA src, TCA dest, int range) {
2559   assert(m_destSize == size_t(-1) || dest - m_dest >= m_destSize);
2560   m_destSize = dest - m_dest;
2561   m_adjustedAddresses.emplace(src, std::make_pair(dest, range));
2562 }
2563
2564 TCA RelocationInfo::adjustedAddressAfter(TCA addr) const {
2565   if (size_t(addr - m_start) > size_t(m_end - m_start)) {
2566     return nullptr;
2567   }
2568
2569   auto it = m_adjustedAddresses.find(addr);
2570   if (it == m_adjustedAddresses.end()) return nullptr;
2571
2572   return it->second.first + it->second.second;
2573 }
2574
2575 TCA RelocationInfo::adjustedAddressBefore(TCA addr) const {
2576   if (size_t(addr - m_start) > size_t(m_end - m_start)) {
2577     return nullptr;
2578   }
2579
2580   auto it = m_adjustedAddresses.find(addr);
2581   if (it == m_adjustedAddresses.end()) return nullptr;
2582
2583   return it->second.first;
2584 }
2585
2586 void
2587 emitIncStat(CodeBlock& cb, uint64_t* tl_table, uint index, int n, bool force) {
2588   if (!force && !Stats::enabled()) return;
2589   intptr_t disp = uintptr_t(&tl_table[index]) - tlsBase();
2590
2591   mcg->backEnd().emitIncStat(cb, disp, n);
2592 }
2593
2594 } // HPHP::JIT
2595
2596 } // HPHP