hphp/runtime/vm/jit/vasm-x64.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/vasm-emit.h"
  18
  19 #include "hphp/runtime/base/arch.h"
  20 #include "hphp/runtime/vm/jit/back-end-x64.h"
  21 #include "hphp/runtime/vm/jit/block.h"
  22 #include "hphp/runtime/vm/jit/code-gen-helpers-x64.h"
  23 #include "hphp/runtime/vm/jit/code-gen.h"
  24 #include "hphp/runtime/vm/jit/func-prologues-x64.h"
  25 #include "hphp/runtime/vm/jit/mc-generator.h"
  26 #include "hphp/runtime/vm/jit/print.h"
  27 #include "hphp/runtime/vm/jit/prof-data.h"
  28 #include "hphp/runtime/vm/jit/service-requests-inline.h"
  29 #include "hphp/runtime/vm/jit/target-cache.h"
  30 #include "hphp/runtime/vm/jit/timer.h"
  31 #include "hphp/runtime/vm/jit/vasm.h"
  32 #include "hphp/runtime/vm/jit/vasm-instr.h"
  33 #include "hphp/runtime/vm/jit/vasm-print.h"
  34 #include "hphp/runtime/vm/jit/vasm-unit.h"
  35 #include "hphp/runtime/vm/jit/vasm-util.h"
  36 #include "hphp/runtime/vm/jit/vasm-visit.h"
  37
  38 #include <algorithm>
  39
  40 TRACE_SET_MOD(vasm);
  41
  42 namespace HPHP { namespace jit {
  43 ///////////////////////////////////////////////////////////////////////////////
  44
  45 using namespace reg;
  46 using namespace x64;
  47
  48 namespace x64 { struct ImmFolder; }
  49
  50 namespace {
  51 ///////////////////////////////////////////////////////////////////////////////
  52
  53 struct Vgen {
  54   Vgen(const Vunit& u, Vasm::AreaList& areas, AsmInfo* asmInfo)
  55     : unit(u)
  56     , backend(mcg->backEnd())
  57     , areas(areas)
  58     , m_asmInfo(asmInfo) {
  59     addrs.resize(u.blocks.size());
  60     points.resize(u.next_point);
  61   }
  62   void emit(jit::vector<Vlabel>&);
  63
  64  private:
  65   template<class Inst> void emit(const Inst& i) {
  66     always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
  67                        vinst_names[Vinstr(i).op], size_t(current));
  68   }
  69   // intrinsics
  70   void emit(const bindaddr& i);
  71   void emit(const bindcall& i);
  72   void emit(const bindjcc1st& i);
  73   void emit(const bindjcc& i);
  74   void emit(const bindjmp& i);
  75   void emit(const callstub& i);
  76   void emit(const contenter& i);
  77   void emit(const copy& i);
  78   void emit(const copy2& i);
  79   void emit(const debugtrap& i) { a->int3(); }
  80   void emit(const fallthru& i) {}
  81   void emit(const ldimmb& i);
  82   void emit(const ldimml& i);
  83   void emit(const ldimmq& i);
  84   void emit(const ldimmqs& i);
  85   void emit(const fallback& i);
  86   void emit(const fallbackcc& i);
  87   void emit(const load& i);
  88   void emit(const mccall& i);
  89   void emit(const mcprep& i);
  90   void emit(const nothrow& i);
  91   void emit(const store& i);
  92   void emit(const syncpoint& i);
  93   void emit(const unwind& i);
  94   void emit(const landingpad& i) {}
  95   void emit(const vretm& i);
  96   void emit(const vret& i);
  97
  98   // instructions
  99   void emit(andb i) { commuteSF(i); a->andb(i.s0, i.d); }
 100   void emit(andbi i) { binary(i); a->andb(i.s0, i.d); }
 101   void emit(const andbim& i) { a->andb(i.s, i.m); }
 102   void emit(andl i) { commuteSF(i); a->andl(i.s0, i.d); }
 103   void emit(andli i) { binary(i); a->andl(i.s0, i.d); }
 104   void emit(andq i) { commuteSF(i); a->andq(i.s0, i.d); }
 105   void emit(andqi i) { binary(i); a->andq(i.s0, i.d); }
 106   void emit(addli i) { binary(i); a->addl(i.s0, i.d); }
 107   void emit(const addlm& i) { a->addl(i.s0, i.m); }
 108   void emit(addq i) { commuteSF(i); a->addq(i.s0, i.d); }
 109   void emit(addqi i) { binary(i); a->addq(i.s0, i.d); }
 110   void emit(const addqim& i);
 111   void emit(addsd i) { commute(i); a->addsd(i.s0, i.d); }
 112   void emit(const call& i);
 113   void emit(const callm& i) { a->call(i.target); }
 114   void emit(const callr& i) { a->call(i.target); }
 115   void emit(const cloadq& i);
 116   void emit(const cmovq& i);
 117   void emit(const cmpb& i) { a->cmpb(i.s0, i.s1); }
 118   void emit(const cmpbi& i) { a->cmpb(i.s0, i.s1); }
 119   void emit(const cmpbim& i) { a->cmpb(i.s0, i.s1); }
 120   void emit(const cmpl& i) { a->cmpl(i.s0, i.s1); }
 121   void emit(const cmpli& i) { a->cmpl(i.s0, i.s1); }
 122   void emit(const cmplim& i) { a->cmpl(i.s0, i.s1); }
 123   void emit(const cmplm& i) { a->cmpl(i.s0, i.s1); }
 124   void emit(const cmpq& i) { a->cmpq(i.s0, i.s1); }
 125   void emit(const cmpqi& i) { a->cmpq(i.s0, i.s1); }
 126   void emit(const cmpqim& i) { a->cmpq(i.s0, i.s1); }
 127   void emit(const cmpqims& i);
 128   void emit(const cmpqm& i) { a->cmpq(i.s0, i.s1); }
 129   void emit(cmpsd i) { noncommute(i); a->cmpsd(i.s0, i.d, i.pred); }
 130   void emit(const cqo& i) { a->cqo(); }
 131   void emit(const cvttsd2siq& i) { a->cvttsd2siq(i.s, i.d); }
 132   void emit(const cvtsi2sd& i);
 133   void emit(const cvtsi2sdm& i);
 134   void emit(decl i) { unary(i); a->decl(i.d); }
 135   void emit(const declm& i) { a->decl(i.m); }
 136   void emit(decq i) { unary(i); a->decq(i.d); }
 137   void emit(const decqm& i) { a->decq(i.m); }
 138   void emit(divsd i) { noncommute(i); a->divsd(i.s0, i.d); }
 139   void emit(imul i) { commuteSF(i); a->imul(i.s0, i.d); }
 140   void emit(const idiv& i) { a->idiv(i.s); }
 141   void emit(incl i) { unary(i); a->incl(i.d); }
 142   void emit(const inclm& i) { a->incl(i.m); }
 143   void emit(incq i) { unary(i); a->incq(i.d); }
 144   void emit(const incqm& i) { a->incq(i.m); }
 145   void emit(const incqmlock& i) { a->lock(); a->incq(i.m); }
 146   void emit(const incwm& i) { a->incw(i.m); }
 147   void emit(const jcc& i);
 148   void emit(const jcci& i);
 149   void emit(const jmp& i);
 150   void emit(const jmpr& i) { a->jmp(i.target); }
 151   void emit(const jmpm& i) { a->jmp(i.target); }
 152   void emit(const jmpi& i) { a->jmp(i.target); }
 153   void emit(const lea& i);
 154   void emit(const leap& i) { a->lea(i.s, i.d); }
 155   void emit(const loadups& i) { a->movups(i.s, i.d); }
 156   void emit(const loadtqb& i) { a->loadb(i.s, i.d); }
 157   void emit(const loadl& i) { a->loadl(i.s, i.d); }
 158   void emit(const loadqp& i) { a->loadq(i.s, i.d); }
 159   void emit(const loadsd& i) { a->movsd(i.s, i.d); }
 160   void emit(const loadzbl& i) { a->loadzbl(i.s, i.d); }
 161   void emit(const loadzbq& i) { a->loadzbl(i.s, Reg32(i.d)); }
 162   void emit(const loadzlq& i) { a->loadl(i.s, Reg32(i.d)); }
 163   void emit(const movb& i) { a->movb(i.s, i.d); }
 164   void emit(const movl& i) { a->movl(i.s, i.d); }
 165   void emit(const movzbl& i) { a->movzbl(i.s, i.d); }
 166   void emit(const movzbq& i) { a->movzbl(i.s, Reg32(i.d)); }
 167   void emit(mulsd i) { commute(i); a->mulsd(i.s0, i.d); }
 168   void emit(neg i) { unary(i); a->neg(i.d); }
 169   void emit(const nop& i) { a->nop(); }
 170   void emit(not i) { unary(i); a->not(i.d); }
 171   void emit(notb i) { unary(i); a->notb(i.d); }
 172   void emit(const orwim& i) { a->orw(i.s0, i.m); }
 173   void emit(orq i) { commuteSF(i); a->orq(i.s0, i.d); }
 174   void emit(orqi i) { binary(i); a->orq(i.s0, i.d); }
 175   void emit(const orqim& i) { a->orq(i.s0, i.m); }
 176   void emit(const pop& i) { a->pop(i.d); }
 177   void emit(const popm& i) { a->pop(i.d); }
 178   void emit(psllq i) { binary(i); a->psllq(i.s0, i.d); }
 179   void emit(psrlq i) { binary(i); a->psrlq(i.s0, i.d); }
 180   void emit(const push& i) { a->push(i.s); }
 181   void emit(const roundsd& i) { a->roundsd(i.dir, i.s, i.d); }
 182   void emit(const ret& i) { a->ret(); }
 183   void emit(const sarq& i) { unary(i); a->sarq(i.d); }
 184   void emit(sarqi i) { binary(i); a->sarq(i.s0, i.d); }
 185   void emit(const setcc& i) { a->setcc(i.cc, i.d); }
 186   void emit(shlli i) { binary(i); a->shll(i.s0, i.d); }
 187   void emit(shlq i) { unary(i); a->shlq(i.d); }
 188   void emit(shlqi i) { binary(i); a->shlq(i.s0, i.d); }
 189   void emit(shrli i) { binary(i); a->shrl(i.s0, i.d); }
 190   void emit(shrqi i) { binary(i); a->shrq(i.s0, i.d); }
 191   void emit(const sqrtsd& i) { a->sqrtsd(i.s, i.d); }
 192   void emit(const storeups& i) { a->movups(i.s, i.m); }
 193   void emit(const storeb& i) { a->storeb(i.s, i.m); }
 194   void emit(const storebi& i);
 195   void emit(const storel& i) { a->storel(i.s, i.m); }
 196   void emit(const storeli& i) { a->storel(i.s, i.m); }
 197   void emit(const storeqi& i) { a->storeq(i.s, i.m); }
 198   void emit(const storesd& i) { a->movsd(i.s, i.m); }
 199   void emit(const storew& i) { a->storew(i.s, i.m); }
 200   void emit(const storewi& i) { a->storew(i.s, i.m); }
 201   void emit(subbi i) { binary(i); a->subb(i.s0, i.d); }
 202   void emit(subl i) { noncommute(i); a->subl(i.s0, i.d); }
 203   void emit(subli i) { binary(i); a->subl(i.s0, i.d); }
 204   void emit(subq i) { noncommute(i); a->subq(i.s0, i.d); }
 205   void emit(subqi i) { binary(i); a->subq(i.s0, i.d); }
 206   void emit(subsd i) { noncommute(i); a->subsd(i.s0, i.d); }
 207   void emit(const testb& i) { a->testb(i.s0, i.s1); }
 208   void emit(const testbi& i) { a->testb(i.s0, i.s1); }
 209   void emit(const testbim& i) { a->testb(i.s0, i.s1); }
 210   void emit(const testwim& i);
 211   void emit(const testl& i) { a->testl(i.s0, i.s1); }
 212   void emit(const testli& i) { a->testl(i.s0, i.s1); }
 213   void emit(const testlim& i);
 214   void emit(const testq& i) { a->testq(i.s0, i.s1); }
 215   void emit(const testqm& i) { a->testq(i.s0, i.s1); }
 216   void emit(const testqim& i);
 217   void emit(const ucomisd& i) { a->ucomisd(i.s0, i.s1); }
 218   void emit(const ud2& i) { a->ud2(); }
 219   void emit(unpcklpd i) { noncommute(i); a->unpcklpd(i.s0, i.d); }
 220   void emit(xorb i) { commuteSF(i); a->xorb(i.s0, i.d); }
 221   void emit(xorbi i) { binary(i); a->xorb(i.s0, i.d); }
 222   void emit(xorq i) { commuteSF(i); a->xorq(i.s0, i.d); }
 223   void emit(xorqi i) { binary(i); a->xorq(i.s0, i.d); }
 224
 225   // helpers
 226   void prep(Reg8 s, Reg8 d) { if (s != d) a->movb(s, d); }
 227   void prep(Reg32 s, Reg32 d) { if (s != d) a->movl(s, d); }
 228   void prep(Reg64 s, Reg64 d) { if (s != d) a->movq(s, d); }
 229   void prep(RegXMM s, RegXMM d) { if (s != d) a->movdqa(s, d); }
 230   CodeAddress start(Vlabel b) {
 231     auto area = unit.blocks[b].area;
 232     return areas[(int)area].start;
 233   }
 234   CodeBlock& main() { return area(AreaIndex::Main).code; }
 235   CodeBlock& cold() { return area(AreaIndex::Cold).code; }
 236   CodeBlock& frozen() { return area(AreaIndex::Frozen).code; }
 237   template<class Inst> void unary(Inst& i) { prep(i.s, i.d); }
 238   template<class Inst> void binary(Inst& i) { prep(i.s1, i.d); }
 239   template<class Inst> void commuteSF(Inst&);
 240   template<class Inst> void commute(Inst&);
 241   template<class Inst> void noncommute(Inst&);
 242
 243 private:
 244   Vasm::Area& area(AreaIndex i) {
 245     assertx((unsigned)i < areas.size());
 246     return areas[(unsigned)i];
 247   }
 248
 249 private:
 250   struct LabelPatch { CodeAddress instr; Vlabel target; };
 251   struct PointPatch { CodeAddress instr; Vpoint pos; };
 252   const Vunit& unit;
 253   BackEnd& backend;
 254   Vasm::AreaList& areas;
 255   AsmInfo* m_asmInfo;
 256   X64Assembler* a;
 257   Vlabel current{0}, next{0}; // in linear order
 258   jit::vector<CodeAddress> addrs, points;
 259   jit::vector<LabelPatch> jccs, jmps, calls, catches;
 260   jit::vector<PointPatch> ldpoints;
 261   jit::hash_map<uint64_t,uint64_t*> constants;
 262 };
 263
 264 // prepare a binary op that is not commutative.  s0 must be a different
 265 // register than s1 so we don't clobber it.
 266 template<class Inst> void Vgen::noncommute(Inst& i) {
 267   assertx(i.s1 == i.d || i.s0 != i.d); // do not clobber s0
 268   binary(i);
 269 }
 270
 271 // prepare a binary op that is commutative. Swap operands if the dest is s0.
 272 template<class Inst> void Vgen::commuteSF(Inst& i) {
 273   if (i.s1 != i.d && i.s0 == i.d) {
 274     i = Inst{i.s1, i.s0, i.d, i.sf};
 275   } else {
 276     binary(i);
 277   }
 278 }
 279
 280 // prepare a binary op that is commutative. Swap operands if the dest is s0.
 281 template<class Inst> void Vgen::commute(Inst& i) {
 282   if (i.s1 != i.d && i.s0 == i.d) {
 283     i = Inst{i.s1, i.s0, i.d};
 284   } else {
 285     binary(i);
 286   }
 287 }
 288
 289 ///////////////////////////////////////////////////////////////////////////////
 290
 291 void emitSimdImm(X64Assembler* a, int64_t val, Vreg d) {
 292   if (val == 0) {
 293     a->pxor(d, d); // does not modify flags
 294   } else {
 295     auto addr = mcg->allocLiteral(val);
 296     a->movsd(rip[(intptr_t)addr], d);
 297   }
 298 }
 299
 300 ///////////////////////////////////////////////////////////////////////////////
 301
 302 void Vgen::emit(const addqim& i) {
 303   if (i.m.seg == Vptr::FS) a->fs();
 304   a->addq(i.s0, i.m.mr());
 305 }
 306
 307 void Vgen::emit(const call& i) {
 308   // warning: this is a copy of emitCall(TCA) in code-gen-helpers-x64.cpp
 309   if (a->jmpDeltaFits(i.target)) {
 310     a->call(i.target);
 311   } else {
 312     // can't do a near call; store address in data section.
 313     // call by loading the address using rip-relative addressing.  This
 314     // assumes the data section is near the current code section.  Since
 315     // this sequence is directly in-line, rip-relative like this is
 316     // more compact than loading a 64-bit immediate.
 317     auto addr = mcg->allocLiteral((uint64_t)i.target);
 318     a->call(rip[(intptr_t)addr]);
 319   }
 320 }
 321
 322 void Vgen::emit(const cloadq& i) {
 323   auto m = i.t;
 324   always_assert(!m.index.isValid()); // not supported, but could be later.
 325   if (i.f != i.d) {
 326     if (i.d == m.base) {
 327       // We can't move f over d or we'll clobber the Vptr we need to load from.
 328       // Since cload does the load unconditionally anyway, we can just load and
 329       // cmov.
 330       a->loadq(i.t, i.d);
 331       a->cmov_reg64_reg64(ccNegate(i.cc), i.f, i.d);
 332       return;
 333     }
 334     a->movq(i.f, i.d);
 335   }
 336   a->cload_reg64_disp_reg64(i.cc, m.base, m.disp, i.d);
 337 }
 338
 339 // add s0 s1 d => mov s1->d; d += s0
 340 // cmov cc s d => if cc { mov s->d }
 341 void Vgen::emit(const cmovq& i) {
 342   if (i.f != i.d && i.t == i.d) {
 343     // negate the condition and swap t/f operands so we dont clobber i.t
 344     return emit(cmovq{ccNegate(i.cc), i.sf, i.t, i.f, i.d});
 345   } else {
 346     prep(i.f, i.d);
 347   }
 348   a->cmov_reg64_reg64(i.cc, i.t, i.d);
 349 }
 350
 351 void Vgen::emit(const contenter& i) {
 352   Label Stub, End;
 353   Reg64 fp = i.fp, target = i.target;
 354   a->jmp8(End);
 355
 356   asm_label(*a, Stub);
 357   a->pop(fp[AROFF(m_savedRip)]);
 358   a->jmp(target);
 359
 360   asm_label(*a, End);
 361   a->call(Stub);
 362   // m_savedRip will point here.
 363   emit(unwind{{i.targets[0], i.targets[1]}});
 364 }
 365
 366 void Vgen::emit(const copy& i) {
 367   if (i.s == i.d) return;
 368   if (i.s.isGP()) {
 369     if (i.d.isGP()) {                 // GP => GP
 370       a->movq(i.s, i.d);
 371     } else {                             // GP => XMM
 372       assertx(i.d.isSIMD());
 373       // This generates a movq x86 instruction, which zero extends
 374       // the 64-bit value in srcReg into a 128-bit XMM register
 375       a->movq_rx(i.s, i.d);
 376     }
 377   } else {
 378     if (i.d.isGP()) {                 // XMM => GP
 379       a->movq_xr(i.s, i.d);
 380     } else {                             // XMM => XMM
 381       assertx(i.d.isSIMD());
 382       // This copies all 128 bits in XMM,
 383       // thus avoiding partial register stalls
 384       a->movdqa(i.s, i.d);
 385     }
 386   }
 387 }
 388
 389 void Vgen::emit(const copy2& i) {
 390   assertx(i.s0.isValid() && i.s1.isValid() && i.d0.isValid() && i.d1.isValid());
 391   auto s0 = i.s0, s1 = i.s1, d0 = i.d0, d1 = i.d1;
 392   assertx(d0 != d1);
 393   if (d0 == s1) {
 394     if (d1 == s0) {
 395       a->xchgq(d0, d1);
 396     } else {
 397       // could do this in a simplify pass
 398       if (s1 != d1) a->movq(s1, d1); // save s1 first; d1 != s0
 399       if (s0 != d0) a->movq(s0, d0);
 400     }
 401   } else {
 402     // could do this in a simplify pass
 403     if (s0 != d0) a->movq(s0, d0);
 404     if (s1 != d1) a->movq(s1, d1);
 405   }
 406 }
 407
 408 void Vgen::emit(const bindaddr& i) {
 409   *i.dest = emitBindAddr(a->code(), frozen(), i.dest, i.sk, i.spOff);
 410   mcg->setJmpTransID(TCA(i.dest));
 411 }
 412
 413 void Vgen::emit(const bindcall& i) {
 414   mcg->backEnd().prepareForSmash(a->code(), kCallLen);
 415   a->call(i.stub);
 416   emit(unwind{{i.targets[0], i.targets[1]}});
 417 }
 418
 419 void Vgen::emit(const bindjcc1st& i) {
 420   emitBindJmpccFirst(a->code(), frozen(), i.cc, i.targets[0], i.targets[1],
 421                      i.spOff);
 422 }
 423
 424 void Vgen::emit(const bindjcc& i) {
 425   emitBindJ(
 426     a->code(),
 427     frozen(),
 428     i.cc,
 429     i.target,
 430     i.spOff,
 431     i.trflags
 432   );
 433 }
 434
 435 void Vgen::emit(const bindjmp& i) {
 436   emitBindJ(
 437     a->code(),
 438     frozen(),
 439     CC_None,
 440     i.target,
 441     i.spOff,
 442     i.trflags
 443   );
 444 }
 445
 446 void Vgen::emit(const callstub& i) {
 447   emit(call{i.target, i.args});
 448 }
 449
 450 void Vgen::emit(const cmpqims& i) {
 451   backend.prepareForSmash(a->code(), kCmpLen);
 452   a->cmpq(i.s0, i.s1);
 453 }
 454
 455 void Vgen::emit(const fallback& i) {
 456   emit(fallbackcc{CC_None, InvalidReg, i.dest, i.trflags, i.args});
 457 }
 458
 459 void Vgen::emit(const fallbackcc& i) {
 460   auto const destSR = mcg->tx().getSrcRec(i.dest);
 461   if (!i.trflags.packed) {
 462     destSR->emitFallbackJump(a->code(), i.cc);
 463   } else {
 464     destSR->emitFallbackJumpCustom(a->code(), frozen(), i.dest, i.trflags);
 465   }
 466 }
 467
 468 void Vgen::emit(const ldimmb& i) {
 469   // ldimmb is for Vconst::Byte, which is treated as unsigned uint8_t
 470   auto val = i.s.b();
 471   if (i.d.isGP()) {
 472     Vreg8 d = i.d;
 473     if (val == 0 && !i.saveflags) {
 474       a->xorb(d, d);
 475     } else {
 476       a->movb(val, d);
 477     }
 478   } else {
 479     emitSimdImm(a, uint8_t(val), i.d);
 480   }
 481 }
 482
 483 void Vgen::emit(const ldimml& i) {
 484   // ldimml is for Vconst::Long, which is treated as unsigned uint32_t
 485   auto val = i.s.l();
 486   if (i.d.isGP()) {
 487     Vreg32 d = i.d;
 488     if (val == 0 && !i.saveflags) {
 489       a->xorl(d, d);
 490     } else {
 491       a->movl(val, d);
 492     }
 493   } else {
 494     emitSimdImm(a, uint32_t(val), i.d);
 495   }
 496 }
 497
 498 void Vgen::emit(const ldimmq& i) {
 499   auto val = i.s.q();
 500   if (i.d.isGP()) {
 501     if (val == 0) {
 502       Reg64 d = i.d;
 503       if (i.saveflags) {
 504         a->movl(0, r32(d));
 505       } else {
 506         a->xorl(r32(d), r32(d));
 507       }
 508     } else {
 509       a->emitImmReg(i.s, i.d);
 510     }
 511   } else {
 512     emitSimdImm(a, val, i.d);
 513   }
 514 }
 515
 516 void Vgen::emit(const ldimmqs& i) {
 517   backend.prepareForSmash(a->code(), kMovLen);
 518   a->movq(0xdeadbeeffeedface, i.d);
 519
 520   auto immp = reinterpret_cast<uintptr_t*>(a->frontier()) - 1;
 521   *immp = i.s.q();
 522 }
 523
 524 void Vgen::emit(const load& i) {
 525   if (i.s.seg == Vptr::FS) a->fs();
 526   auto mref = i.s.mr();
 527   if (i.d.isGP()) {
 528     a->loadq(mref, i.d);
 529   } else {
 530     assertx(i.d.isSIMD());
 531     a->movsd(mref, i.d);
 532   }
 533 }
 534
 535 void Vgen::emit(const mccall& i) {
 536   backend.prepareForSmash(a->code(), kCallLen);
 537   a->call(i.target);
 538 }
 539
 540 // emit smashable mov as part of method cache callsite
 541 void Vgen::emit(const mcprep& i) {
 542   /*
 543    * For the first time through, set the cache to hold the address
 544    * of the movq (*2 + 1), so we can find the movq from the handler.
 545    *
 546    * We set the low bit for two reasons: the Class* will never be a valid
 547    * Class*, so we'll always miss the inline check before it's smashed, and
 548    * handlePrimeCacheMiss can tell it's not been smashed yet
 549    */
 550   emit(ldimmqs{0x8000000000000000u, i.d});
 551
 552   auto movAddr = reinterpret_cast<uintptr_t>(a->frontier()) - x64::kMovLen;
 553   auto immAddr = reinterpret_cast<uintptr_t*>(movAddr + x64::kMovImmOff);
 554
 555   *immAddr = (movAddr << 1) | 1;
 556   mcg->cgFixups().m_addressImmediates.insert(reinterpret_cast<TCA>(~movAddr));
 557 }
 558
 559 void Vgen::emit(const storebi& i) {
 560   if (i.m.seg == Vptr::FS) a->fs();
 561   a->storeb(i.s, i.m.mr());
 562 }
 563
 564 void Vgen::emit(const store& i) {
 565   if (i.s.isGP()) {
 566     a->storeq(i.s, i.d);
 567   } else {
 568     assertx(i.s.isSIMD());
 569     a->movsd(i.s, i.d);
 570   }
 571 }
 572
 573 void Vgen::emit(const syncpoint& i) {
 574   FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a->frontier(),
 575          i.fix.pcOffset, i.fix.spOffset);
 576   mcg->recordSyncPoint(a->frontier(), i.fix.pcOffset,
 577                        i.fix.spOffset);
 578 }
 579
 580 void Vgen::emit(const testwim& i) {
 581   // If there's only 1 byte of meaningful bits in the mask, we can adjust the
 582   // pointer offset and use testbim instead.
 583   int off = 0;
 584   uint16_t newMask = i.s0.w();
 585   while (newMask > 0xff && !(newMask & 0xff)) {
 586     off++;
 587     newMask >>= 8;
 588   }
 589
 590   if (newMask > 0xff) {
 591     a->testw(i.s0, i.s1);
 592   } else {
 593     emit(testbim{int8_t(newMask), i.s1 + off, i.sf});
 594   }
 595 }
 596
 597 void Vgen::emit(const testlim& i) {
 598   a->testl(i.s0, i.s1);
 599 }
 600
 601 void Vgen::emit(const testqim& i) {
 602   // The immediate is 32 bits, sign-extended to 64. If the sign bit isn't set,
 603   // we can get the same results by emitting a testlim.
 604   if (i.s0.l() < 0) {
 605     a->testq(i.s0, i.s1);
 606   } else {
 607     emit(testlim{i.s0, i.s1, i.sf});
 608   }
 609 }
 610
 611 void Vgen::emit(const nothrow& i) {
 612   // register a null catch trace at this position, telling the unwinder that
 613   // the function call returning to here isn't allowed to throw.
 614   mcg->registerCatchBlock(a->frontier(), nullptr);
 615 }
 616
 617 void Vgen::emit(const unwind& i) {
 618   // Unwind instructions terminate blocks with calls that can throw, and have
 619   // the edges to catch (unwinder) blocks and fall-through blocks.
 620   catches.push_back({a->frontier(), i.targets[1]});
 621   emit(jmp{i.targets[0]});
 622 }
 623
 624 void Vgen::emit(const vretm& i) {
 625   a->push(i.retAddr);
 626   a->loadq(i.prevFp, i.d);
 627   a->ret();
 628 }
 629
 630 void Vgen::emit(const vret& i) {
 631   a->push(i.retAddr);
 632   a->ret();
 633 }
 634
 635 // overall emitter
 636 void Vgen::emit(jit::vector<Vlabel>& labels) {
 637   // Some structures here track where we put things just for debug printing.
 638   struct Snippet {
 639     const IRInstruction* origin;
 640     TcaRange range;
 641   };
 642   struct BlockInfo {
 643     jit::vector<Snippet> snippets;
 644   };
 645
 646   // This is under the printir tracemod because it mostly shows you IR and
 647   // machine code, not vasm and machine code (not implemented).
 648   bool shouldUpdateAsmInfo = !!m_asmInfo;
 649
 650   std::vector<TransBCMapping>* bcmap = nullptr;
 651   if (mcg->tx().isTransDBEnabled() || RuntimeOption::EvalJitUseVtuneAPI) {
 652     bcmap = &mcg->cgFixups().m_bcMap;
 653   }
 654
 655   jit::vector<jit::vector<BlockInfo>> areaToBlockInfos;
 656   if (shouldUpdateAsmInfo) {
 657     areaToBlockInfos.resize(areas.size());
 658     for (auto& r : areaToBlockInfos) {
 659       r.resize(unit.blocks.size());
 660     }
 661   }
 662
 663   for (int i = 0, n = labels.size(); i < n; ++i) {
 664     assertx(checkBlockEnd(unit, labels[i]));
 665
 666     auto b = labels[i];
 667     auto& block = unit.blocks[b];
 668     X64Assembler as { area(block.area).code };
 669     a = &as;
 670     auto blockStart = a->frontier();
 671     addrs[b] = blockStart;
 672
 673     {
 674       // Compute the next block we will emit into the current area.
 675       auto cur_start = start(labels[i]);
 676       auto j = i + 1;
 677       while (j < labels.size() && cur_start != start(labels[j])) {
 678         j++;
 679       }
 680       next = j < labels.size() ? labels[j] : Vlabel(unit.blocks.size());
 681       current = b;
 682     }
 683
 684     const IRInstruction* currentOrigin = nullptr;
 685     auto blockInfo = shouldUpdateAsmInfo
 686       ? &areaToBlockInfos[unsigned(block.area)][b]
 687       : nullptr;
 688     auto start_snippet = [&](const Vinstr& inst) {
 689       if (!shouldUpdateAsmInfo) return;
 690
 691       blockInfo->snippets.push_back(
 692         Snippet { inst.origin, TcaRange { a->code().frontier(), nullptr } }
 693       );
 694     };
 695     auto finish_snippet = [&] {
 696       if (!shouldUpdateAsmInfo) return;
 697
 698       if (!blockInfo->snippets.empty()) {
 699         auto& snip = blockInfo->snippets.back();
 700         snip.range = TcaRange { snip.range.start(), a->code().frontier() };
 701       }
 702     };
 703
 704     for (auto& inst : block.code) {
 705       if (currentOrigin != inst.origin) {
 706         finish_snippet();
 707         start_snippet(inst);
 708         currentOrigin = inst.origin;
 709       }
 710
 711       if (bcmap && inst.origin) {
 712         auto sk = inst.origin->marker().sk();
 713         if (bcmap->empty() ||
 714             bcmap->back().md5 != sk.unit()->md5() ||
 715             bcmap->back().bcStart != sk.offset()) {
 716           bcmap->push_back(TransBCMapping{sk.unit()->md5(), sk.offset(),
 717                                           main().frontier(), cold().frontier(),
 718                                           frozen().frontier()});
 719         }
 720       }
 721
 722       switch (inst.op) {
 723 #define O(name, imms, uses, defs) \
 724         case Vinstr::name: emit(inst.name##_); break;
 725         VASM_OPCODES
 726 #undef O
 727       }
 728     }
 729
 730     finish_snippet();
 731   }
 732
 733   for (auto& p : jccs) {
 734     assertx(addrs[p.target]);
 735     X64Assembler::patchJcc(p.instr, addrs[p.target]);
 736   }
 737   for (auto& p : jmps) {
 738     assertx(addrs[p.target]);
 739     X64Assembler::patchJmp(p.instr, addrs[p.target]);
 740   }
 741   for (auto& p : calls) {
 742     assertx(addrs[p.target]);
 743     X64Assembler::patchCall(p.instr, addrs[p.target]);
 744   }
 745   for (auto& p : catches) {
 746     mcg->registerCatchBlock(p.instr, addrs[p.target]);
 747   }
 748   for (auto& p : ldpoints) {
 749     auto after_lea = p.instr + 7;
 750     auto d = points[p.pos] - after_lea;
 751     assertx(deltaFits(d, sz::dword));
 752     ((int32_t*)after_lea)[-1] = d;
 753   }
 754
 755   if (!shouldUpdateAsmInfo) {
 756     return;
 757   }
 758
 759   for (auto i = 0; i < areas.size(); ++i) {
 760     auto& blockInfos = areaToBlockInfos[i];
 761     for (auto const blockID : labels) {
 762       auto const& blockInfo = blockInfos[static_cast<size_t>(blockID)];
 763       if (blockInfo.snippets.empty()) continue;
 764
 765       const IRInstruction* currentOrigin = nullptr;
 766       for (auto const& snip : blockInfo.snippets) {
 767         if (currentOrigin != snip.origin && snip.origin) {
 768           currentOrigin = snip.origin;
 769         }
 770
 771         m_asmInfo->updateForInstruction(
 772           currentOrigin,
 773           static_cast<AreaIndex>(i),
 774           snip.range.start(),
 775           snip.range.end());
 776       }
 777     }
 778   }
 779 }
 780
 781 void Vgen::emit(const cvtsi2sd& i) {
 782   a->pxor(i.d, i.d);
 783   a->cvtsi2sd(i.s, i.d);
 784 }
 785
 786 void Vgen::emit(const cvtsi2sdm& i) {
 787   a->pxor(i.d, i.d);
 788   a->cvtsi2sd(i.s, i.d);
 789 }
 790
 791 void Vgen::emit(const jcc& i) {
 792   if (i.targets[1] != i.targets[0]) {
 793     if (next == i.targets[1]) {
 794       return emit(jcc{ccNegate(i.cc), i.sf, {i.targets[1], i.targets[0]}});
 795     }
 796     auto taken = i.targets[1];
 797     jccs.push_back({a->frontier(), taken});
 798     a->jcc(i.cc, a->frontier());
 799   }
 800   emit(jmp{i.targets[0]});
 801 }
 802
 803 void Vgen::emit(const jcci& i) {
 804   a->jcc(i.cc, i.taken);
 805   emit(jmp{i.target});
 806 }
 807
 808 void Vgen::emit(const jmp& i) {
 809   if (next == i.target) return;
 810   jmps.push_back({a->frontier(), i.target});
 811   a->jmp(a->frontier());
 812 }
 813
 814 void Vgen::emit(const lea& i) {
 815   // could do this in a simplify pass
 816   if (i.s.disp == 0 && i.s.base.isValid() && !i.s.index.isValid()) {
 817     emit(copy{i.s.base, i.d});
 818   } else {
 819     a->lea(i.s, i.d);
 820   }
 821 }
 822
 823 /*
 824  * Move all the elements of in into out, replacing count elements of out
 825  * starting at idx. in be will be cleared at the end.
 826  *
 827  * Example: vector_splice([1, 2, 3, 4, 5], 2, 1, [10, 11, 12]) will change out
 828  * to [1, 2, 10, 11, 12, 4, 5].
 829  */
 830 template<typename V>
 831 void vector_splice(V& out, size_t idx, size_t count, V& in) {
 832   auto out_size = out.size();
 833
 834   // Start by making room in out for the new elements.
 835   out.resize(out.size() + in.size() - count);
 836
 837   // Move everything after the to-be-overwritten elements to the new end.
 838   std::move_backward(out.begin() + idx + count, out.begin() + out_size,
 839                      out.end());
 840
 841   // Move the new elements in
 842   std::move(in.begin(), in.end(), out.begin() + idx);
 843   in.clear();
 844 }
 845
 846 // Lower svcreq{} by making copies to abi registers explicit, saving
 847 // vm regs, and returning to the VM. svcreq{} is guaranteed to be
 848 // at the end of a block, so we can just keep appending to the same
 849 // block.
 850 void lower_svcreq(Vunit& unit, Vlabel b, const Vinstr& inst) {
 851   assertx(unit.tuples[inst.svcreq_.extraArgs].size() < kNumServiceReqArgRegs);
 852   auto svcreq = inst.svcreq_; // copy it
 853   auto origin = inst.origin;
 854   auto& argv = unit.tuples[svcreq.extraArgs];
 855   unit.blocks[b].code.pop_back(); // delete the svcreq instruction
 856   Vout v(unit, b, origin);
 857
 858   RegSet arg_regs = svcreq.args;
 859   VregList arg_dests;
 860   for (int i = 0, n = argv.size(); i < n; ++i) {
 861     PhysReg d{serviceReqArgRegs[i]};
 862     arg_dests.push_back(d);
 863     arg_regs |= d;
 864   }
 865   v << copyargs{svcreq.extraArgs, v.makeTuple(arg_dests)};
 866   if (svcreq.stub_block) {
 867     v << leap{rip[(int64_t)svcreq.stub_block], rAsm};
 868   } else {
 869     v << ldimmq{0, rAsm}; // because persist flag
 870   }
 871   v << ldimmq{svcreq.req, rdi};
 872   arg_regs |= rAsm | rdi | rVmFp | rVmSp;
 873
 874   v << jmpi{TCA(handleSRHelper), arg_regs};
 875 }
 876
 877 void lowerSrem(Vunit& unit, Vlabel b, size_t iInst) {
 878   auto const& inst = unit.blocks[b].code[iInst];
 879   auto const& srem = inst.srem_;
 880   auto scratch = unit.makeScratchBlock();
 881   SCOPE_EXIT { unit.freeScratchBlock(scratch); };
 882   Vout v(unit, scratch, inst.origin);
 883   v << copy{srem.s0, rax};
 884   v << cqo{};                      // sign-extend rax => rdx:rax
 885   v << idiv{srem.s1, v.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
 886   v << copy{rdx, srem.d};
 887
 888   vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
 889 }
 890
 891 template<typename FromOp, typename ToOp>
 892 void lowerShift(Vunit& unit, Vlabel b, size_t iInst) {
 893   auto const& inst = unit.blocks[b].code[iInst];
 894   auto const& shift = inst.get<FromOp>();
 895   auto scratch = unit.makeScratchBlock();
 896   SCOPE_EXIT { unit.freeScratchBlock(scratch); };
 897   Vout v(unit, scratch, inst.origin);
 898   v << copy{shift.s0, rcx};
 899   v << ToOp{shift.s1, shift.d, shift.sf};
 900
 901   vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
 902 }
 903
 904 void lowerAbsdbl(Vunit& unit, Vlabel b, size_t iInst) {
 905   auto const& inst = unit.blocks[b].code[iInst];
 906   auto const& absdbl = inst.absdbl_;
 907   auto scratch = unit.makeScratchBlock();
 908   SCOPE_EXIT { unit.freeScratchBlock(scratch); };
 909   Vout v(unit, scratch, inst.origin);
 910
 911   // clear the high bit
 912   auto tmp = v.makeReg();
 913   v << psllq{1, absdbl.s, tmp};
 914   v << psrlq{1, tmp, absdbl.d};
 915
 916   vector_splice(unit.blocks[b].code, iInst, 1, unit.blocks[scratch].code);
 917 }
 918
 919 void lowerVcall(Vunit& unit, Vlabel b, size_t iInst) {
 920   auto& blocks = unit.blocks;
 921   auto& inst = blocks[b].code[iInst];
 922   auto const is_vcall = inst.op == Vinstr::vcall;
 923   auto const vcall = inst.vcall_;
 924   auto const vinvoke = inst.vinvoke_;
 925
 926   // Extract all the relevant information from the appropriate instruction.
 927   auto const is_smashable = !is_vcall && vinvoke.smashable;
 928   auto const call = is_vcall ? vcall.call : vinvoke.call;
 929   auto const& vargs = unit.vcallArgs[is_vcall ? vcall.args : vinvoke.args];
 930   auto const& stkArgs = vargs.stkArgs;
 931   auto const dests = unit.tuples[is_vcall ? vcall.d : vinvoke.d];
 932   auto const fixup = is_vcall ? vcall.fixup : vinvoke.fixup;
 933   auto const destType = is_vcall ? vcall.destType : vinvoke.destType;
 934
 935   auto scratch = unit.makeScratchBlock();
 936   SCOPE_EXIT { unit.freeScratchBlock(scratch); };
 937   Vout v(unit, scratch, inst.origin);
 938
 939   int32_t const adjust = (stkArgs.size() & 0x1) ? sizeof(uintptr_t) : 0;
 940   if (adjust) v << subqi{adjust, reg::rsp, reg::rsp, v.makeReg()};
 941
 942   // Push stack arguments, in reverse order.
 943   for (int i = stkArgs.size() - 1; i >= 0; --i) v << push{stkArgs[i]};
 944
 945   // Get the arguments in the proper registers.
 946   RegSet argRegs;
 947   auto doArgs = [&](const VregList& srcs, const PhysReg argNames[]) {
 948     VregList argDests;
 949     for (size_t i = 0; i < srcs.size(); ++i) {
 950       auto reg = argNames[i];
 951       argDests.push_back(reg);
 952       argRegs |= reg;
 953     }
 954     if (argDests.size()) {
 955       v << copyargs{v.makeTuple(srcs),
 956                     v.makeTuple(std::move(argDests))};
 957     }
 958   };
 959   doArgs(vargs.args, argNumToRegName);
 960   doArgs(vargs.simdArgs, argNumToSIMDRegName);
 961
 962   // Emit the call.
 963   if (is_smashable) v << mccall{(TCA)call.address(), argRegs};
 964   else              emitCall(v, call, argRegs);
 965
 966   // Handle fixup and unwind information.
 967   if (fixup.isValid()) v << syncpoint{fixup};
 968
 969   if (!is_vcall) {
 970     auto& targets = vinvoke.targets;
 971     v << unwind{{targets[0], targets[1]}};
 972
 973     // Insert an lea fixup for any stack args at the beginning of the catch
 974     // block.
 975     if (auto rspOffset = ((stkArgs.size() + 1) & ~1) * sizeof(uintptr_t)) {
 976       auto& taken = unit.blocks[targets[1]].code;
 977       assertx(taken.front().op == Vinstr::landingpad ||
 978              taken.front().op == Vinstr::jmp);
 979       Vinstr v{lea{rsp[rspOffset], rsp}};
 980       v.origin = taken.front().origin;
 981       if (taken.front().op == Vinstr::jmp) {
 982         taken.insert(taken.begin(), v);
 983       } else {
 984         taken.insert(taken.begin() + 1, v);
 985       }
 986     }
 987
 988     // Write out the code so far to the end of b. Remaining code will be
 989     // emitted to the next block.
 990     vector_splice(blocks[b].code, iInst, 1, blocks[scratch].code);
 991   } else if (vcall.nothrow) {
 992     v << nothrow{};
 993   }
 994
 995   // Copy the call result to the destination register(s)
 996   switch (destType) {
 997     case DestType::TV: {
 998       // rax contains m_type and m_aux but we're expecting just the type in
 999       // the lower bits, so shift the type result register.
1000       static_assert(offsetof(TypedValue, m_data) == 0, "");
1001       static_assert(offsetof(TypedValue, m_type) == 8, "");
1002       if (dests.size() == 2) {
1003         v << copy2{reg::rax, reg::rdx, dests[0], dests[1]};
1004       } else {
1005         // We have cases where we statically know the type but need the value
1006         // from native call. Even if the type does not really need a register
1007         // (e.g., InitNull), a Vreg is still allocated in assignRegs(), so the
1008         // following assertion holds.
1009         assertx(dests.size() == 1);
1010         v << copy{reg::rax, dests[0]};
1011       }
1012       break;
1013     }
1014     case DestType::SIMD: {
1015       // rax contains m_type and m_aux but we're expecting just the type in
1016       // the lower bits, so shift the type result register.
1017       static_assert(offsetof(TypedValue, m_data) == 0, "");
1018       static_assert(offsetof(TypedValue, m_type) == 8, "");
1019       assertx(dests.size() == 1);
1020       pack2(v, reg::rax, reg::rdx, dests[0]);
1021       break;
1022     }
1023     case DestType::SSA:
1024     case DestType::Byte:
1025       // copy the single-register result to dests[0]
1026       assertx(dests.size() == 1);
1027       assertx(dests[0].isValid());
1028       v << copy{reg::rax, dests[0]};
1029       break;
1030     case DestType::None:
1031       assertx(dests.empty());
1032       break;
1033     case DestType::Dbl:
1034       // copy the single-register result to dests[0]
1035       assertx(dests.size() == 1);
1036       assertx(dests[0].isValid());
1037       v << copy{reg::xmm0, dests[0]};
1038       break;
1039   }
1040
1041   if (stkArgs.size() > 0) {
1042     v << addqi{safe_cast<int32_t>(stkArgs.size() * sizeof(uintptr_t)
1043                                   + adjust),
1044                reg::rsp,
1045                reg::rsp,
1046                v.makeReg()};
1047   }
1048
1049   // Insert new instructions to the appropriate block
1050   if (is_vcall) {
1051     vector_splice(blocks[b].code, iInst, 1, blocks[scratch].code);
1052   } else {
1053     vector_splice(blocks[vinvoke.targets[0]].code, 0, 0,
1054                   blocks[scratch].code);
1055   }
1056 }
1057
1058 void lower_vcallstub(Vunit& unit, Vlabel b) {
1059   auto& code = unit.blocks[b].code;
1060   // vcallstub can only appear at the end of a block.
1061   auto const inst = code.back().get<vcallstub>();
1062   auto const origin = code.back().origin;
1063
1064   auto argRegs = inst.args;
1065   auto const& srcs = unit.tuples[inst.extraArgs];
1066   jit::vector<Vreg> dsts;
1067   for (int i = 0; i < srcs.size(); ++i) {
1068     dsts.emplace_back(argNumToRegName[i]);
1069     argRegs |= argNumToRegName[i];
1070   }
1071
1072   code.back() = copyargs{unit.makeTuple(srcs), unit.makeTuple(std::move(dsts))};
1073   code.emplace_back(callstub{inst.target, argRegs});
1074   code.back().origin = origin;
1075   code.emplace_back(unwind{{inst.targets[0], inst.targets[1]}});
1076   code.back().origin = origin;
1077 }
1078
1079 /*
1080  * Lower a few abstractions to facilitate straightforward x64 codegen.
1081  */
1082 void lowerForX64(Vunit& unit, const Abi& abi) {
1083   Timer _t(Timer::vasm_lower);
1084
1085   // This pass relies on having no critical edges in the unit.
1086   splitCriticalEdges(unit);
1087
1088   // Scratch block can change blocks allocation, hence cannot use regular
1089   // iterators.
1090   auto& blocks = unit.blocks;
1091
1092   PostorderWalker{unit}.dfs([&](Vlabel ib) {
1093     assertx(!blocks[ib].code.empty());
1094     auto& back = blocks[ib].code.back();
1095     if (back.op == Vinstr::svcreq) {
1096       lower_svcreq(unit, Vlabel{ib}, blocks[ib].code.back());
1097     } else if (back.op == Vinstr::vcallstub) {
1098       lower_vcallstub(unit, Vlabel{ib});
1099     }
1100
1101     for (size_t ii = 0; ii < blocks[ib].code.size(); ++ii) {
1102       auto& inst = blocks[ib].code[ii];
1103       switch (inst.op) {
1104         case Vinstr::vcall:
1105         case Vinstr::vinvoke:
1106           lowerVcall(unit, Vlabel{ib}, ii);
1107           break;
1108
1109         case Vinstr::srem:
1110           lowerSrem(unit, Vlabel{ib}, ii);
1111           break;
1112
1113         case Vinstr::sar:
1114           lowerShift<sar, sarq>(unit, Vlabel{ib}, ii);
1115           break;
1116
1117         case Vinstr::shl:
1118           lowerShift<shl, shlq>(unit, Vlabel{ib}, ii);
1119           break;
1120
1121         case Vinstr::absdbl:
1122           lowerAbsdbl(unit, Vlabel{ib}, ii);
1123           break;
1124
1125         case Vinstr::defvmsp:
1126           inst = copy{rVmSp, inst.defvmsp_.d};
1127           break;
1128
1129         case Vinstr::syncvmsp:
1130           inst = copy{inst.syncvmsp_.s, rVmSp};
1131           break;
1132
1133         case Vinstr::movtqb:
1134           inst = copy{inst.movtqb_.s, inst.movtqb_.d};
1135           break;
1136
1137         case Vinstr::movtql:
1138           inst = copy{inst.movtql_.s, inst.movtql_.d};
1139           break;
1140
1141         case Vinstr::countbytecode:
1142           inst = incqm{inst.countbytecode_.base[g_bytecodesVasm.handle()],
1143                        inst.countbytecode_.sf};
1144           break;
1145
1146         default:
1147           break;
1148       }
1149     }
1150   });
1151
1152   printUnit(kVasmLowerLevel, "after lower for X64", unit);
1153 }
1154
1155 ///////////////////////////////////////////////////////////////////////////////
1156 }
1157
1158 void optimizeX64(Vunit& unit, const Abi& abi) {
1159   Timer timer(Timer::vasm_optimize);
1160
1161   removeTrivialNops(unit);
1162   fuseBranches(unit);
1163   optimizeJmps(unit);
1164   optimizeExits(unit);
1165
1166   lowerForX64(unit, abi);
1167
1168   if (!unit.constants.empty()) {
1169     foldImms<x64::ImmFolder>(unit);
1170   }
1171   {
1172     Timer timer(Timer::vasm_copy);
1173     optimizeCopies(unit, abi);
1174   }
1175   if (unit.needsRegAlloc()) {
1176     Timer timer(Timer::vasm_xls);
1177     removeDeadCode(unit);
1178     allocateRegisters(unit, abi);
1179   }
1180   if (unit.blocks.size() > 1) {
1181     Timer timer(Timer::vasm_jumps);
1182     optimizeJmps(unit);
1183   }
1184 }
1185
1186 void emitX64(const Vunit& unit, Vasm::AreaList& areas, AsmInfo* asmInfo) {
1187   static thread_local bool busy;
1188   always_assert(!busy);
1189   busy = true;
1190   SCOPE_EXIT { busy = false; };
1191
1192   Timer timer(Timer::vasm_gen);
1193   auto blocks = layoutBlocks(unit);
1194   Vgen(unit, areas, asmInfo).emit(blocks);
1195 }
1196
1197 ///////////////////////////////////////////////////////////////////////////////
1198 }}