hphp/util/asm-x64.h

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #ifndef incl_HPHP_UTIL_ASM_X64_H_
  17 #define incl_HPHP_UTIL_ASM_X64_H_
  18
  19 #include <type_traits>
  20
  21 #include "hphp/util/atomic.h"
  22 #include "hphp/util/data-block.h"
  23 #include "hphp/util/immed.h"
  24 #include "hphp/util/safe-cast.h"
  25 #include "hphp/util/trace.h"
  26
  27 /*
  28  * An experimental macro assembler for x64, that strives for low coupling to
  29  * the runtime environment.
  30  *
  31  * There are more complete assemblers out there; if you use this one
  32  * yourself, expect not to find all the instructions you wanted to use. You
  33  * may have to go spelunking in the Intel manuals:
  34  *
  35  *   http://www.intel.com/products/processor/manuals/
  36  *
  37  * If you're looking for something more fully baked, here are some options
  38  * to consider:
  39  *
  40  *   1. Nanojit or llvm, both of which translate abstract virtual machine
  41  *      instructions to the native target architecture, or
  42  *   2. The embedded assemblers from v8, the Sun JVM, etc.
  43  */
  44
  45 /*
  46  * Some members cannot be const because their values aren't known in
  47  * an initialization list. Like the opposite of the "mutable" keyword.
  48  * This declares this property to readers.
  49  */
  50 #define logical_const /* nothing */
  51
  52 namespace HPHP { namespace jit {
  53
  54 #define TRACEMOD ::HPHP::Trace::asmx64
  55
  56 //////////////////////////////////////////////////////////////////////
  57
  58 struct MemoryRef;
  59 struct RIPRelativeRef;
  60 struct ScaledIndex;
  61 struct ScaledIndexDisp;
  62 struct DispReg;
  63
  64 const uint8_t kOpsizePrefix = 0x66;
  65
  66 struct Reg64 {
  67   explicit constexpr Reg64(int rn) : rn(rn) {}
  68
  69   // Integer conversion is allowed but only explicitly.  (It's not
  70   // unusual to want to printf registers, etc.  Just cast it first.)
  71   explicit constexpr operator int() const { return rn; }
  72
  73   MemoryRef operator[](intptr_t disp) const;
  74   MemoryRef operator[](Reg64) const;
  75   MemoryRef operator[](ScaledIndex) const;
  76   MemoryRef operator[](ScaledIndexDisp) const;
  77   MemoryRef operator[](DispReg) const;
  78
  79   constexpr bool operator==(Reg64 o) const { return rn == o.rn; }
  80   constexpr bool operator!=(Reg64 o) const { return rn != o.rn; }
  81
  82 private:
  83   int rn;
  84 };
  85
  86 #define SIMPLE_REGTYPE(What)                                        \
  87   struct What {                                                     \
  88     explicit constexpr What(int rn) : rn(rn) {}                     \
  89     explicit constexpr operator int() const { return rn; }          \
  90     constexpr bool operator==(What o) const { return rn == o.rn; }  \
  91     constexpr bool operator!=(What o) const { return rn != o.rn; }  \
  92   private:                                                          \
  93     int rn;                                                         \
  94   }
  95
  96 SIMPLE_REGTYPE(Reg32);
  97 SIMPLE_REGTYPE(Reg16);
  98 SIMPLE_REGTYPE(Reg8);
  99 SIMPLE_REGTYPE(RegXMM);
 100 SIMPLE_REGTYPE(RegSF);
 101
 102 #undef SIMPLE_REGTYPE
 103
 104 struct RegRIP {
 105   RIPRelativeRef operator[](intptr_t disp) const;
 106 };
 107
 108 // Convert between physical registers of different sizes
 109 inline Reg8 rbyte(Reg32 r)     { return Reg8(int(r)); }
 110 inline Reg8 rbyte(Reg64 r)     { return Reg8(int(r)); }
 111 inline Reg16 r16(Reg8 r)       { return Reg16(int(r)); }
 112 inline Reg32 r32(Reg8 r)       { return Reg32(int(r)); }
 113 inline Reg32 r32(Reg16 r)      { return Reg32(int(r)); }
 114 inline Reg32 r32(Reg32 r)      { return r; }
 115 inline Reg32 r32(Reg64 r)      { return Reg32(int(r)); }
 116 inline Reg64 r64(Reg8 r)       { return Reg64(int(r)); }
 117 inline Reg64 r64(Reg16 r)      { return Reg64(int(r)); }
 118 inline Reg64 r64(Reg32 r)      { return Reg64(int(r)); }
 119 inline Reg64 r64(Reg64 r)      { return r; }
 120
 121 //////////////////////////////////////////////////////////////////////
 122
 123 /*
 124  * The following structures define intermediate types for various
 125  * addressing modes.  They overload some operators to allow using
 126  * registers to look somewhat like pointers.
 127  *
 128  * E.g. rax[rbx*2 + 3] or *(rax + rbx*2 + 3).
 129  *
 130  * These operators are not defined commutatively; the thought is it
 131  * mandates the order you normally write them in a .S, but it could be
 132  * changed if this proves undesirable.
 133  */
 134
 135 // reg*x
 136 struct ScaledIndex {
 137   explicit ScaledIndex(Reg64 index, intptr_t scale)
 138     : index(index)
 139     , scale(scale)
 140   {
 141     assert((scale == 0x1 || scale == 0x2 || scale == 0x4 || scale == 0x8) &&
 142            "Invalid index register scaling (must be 1,2,4 or 8).");
 143     assert(int(index) != -1 && "invalid register");
 144   }
 145
 146   Reg64 index;
 147   intptr_t scale;
 148 };
 149
 150 // reg*x + disp
 151 struct ScaledIndexDisp {
 152   explicit ScaledIndexDisp(ScaledIndex si, intptr_t disp)
 153     : si(si)
 154     , disp(disp)
 155   {}
 156
 157   ScaledIndexDisp operator+(intptr_t x) const {
 158     return ScaledIndexDisp(si, disp + x);
 159   }
 160
 161   ScaledIndexDisp operator-(intptr_t x) const {
 162     return ScaledIndexDisp(si, disp - x);
 163   }
 164
 165   ScaledIndex si;
 166   intptr_t disp;
 167 };
 168
 169 // reg+x
 170 struct DispReg {
 171   explicit DispReg(Reg64 base, intptr_t disp = 0)
 172     : base(base)
 173     , disp(disp)
 174   {
 175     assert(int(base) != -1 && "invalid register");
 176   }
 177
 178   // Constructor for baseless().
 179   explicit DispReg(intptr_t disp)
 180     : base(-1)
 181     , disp(disp)
 182   {}
 183
 184   MemoryRef operator*() const;
 185   MemoryRef operator[](intptr_t) const;
 186
 187   DispReg operator+(intptr_t x) const {
 188     return DispReg(base, disp + x);
 189   }
 190
 191   DispReg operator-(intptr_t x) const {
 192     return DispReg(base, disp - x);
 193   }
 194
 195   Reg64 base;
 196   intptr_t disp;
 197 };
 198
 199 // reg + reg*x + y
 200 struct IndexedDispReg {
 201   explicit IndexedDispReg(Reg64 base, ScaledIndex sr)
 202     : base(base)
 203     , index(sr.index)
 204     , scale(sr.scale)
 205     , disp(0)
 206   {}
 207
 208   explicit IndexedDispReg(DispReg r)
 209     : base(r.base)
 210     , index(-1)
 211     , scale(1)
 212     , disp(r.disp)
 213   {}
 214
 215   // Constructor for baseless()
 216   explicit IndexedDispReg(ScaledIndexDisp sid)
 217     : base(-1)
 218     , index(sid.si.index)
 219     , scale(sid.si.scale)
 220     , disp(sid.disp)
 221   {}
 222
 223   MemoryRef operator*() const;
 224   MemoryRef operator[](intptr_t disp) const;
 225
 226   IndexedDispReg operator+(intptr_t disp) const {
 227     auto ret = *this;
 228     ret.disp += disp;
 229     return ret;
 230   }
 231
 232   IndexedDispReg operator-(intptr_t disp) const {
 233     auto ret = *this;
 234     ret.disp -= disp;
 235     return ret;
 236   }
 237
 238   Reg64 base;
 239   Reg64 index;
 240   int scale;
 241   intptr_t disp; // TODO #4613274: should be int32_t
 242 };
 243
 244 // rip+x
 245 struct DispRIP {
 246   explicit DispRIP(intptr_t disp) : disp(disp) {}
 247
 248   RIPRelativeRef operator*() const;
 249   RIPRelativeRef operator[](intptr_t x) const;
 250
 251   DispRIP operator+(intptr_t x) const {
 252     return DispRIP(disp + x);
 253   }
 254
 255   DispRIP operator-(intptr_t x) const {
 256     return DispRIP(disp - x);
 257   }
 258
 259   intptr_t disp;
 260 };
 261
 262 // *(reg + x)
 263 struct MemoryRef {
 264   explicit MemoryRef(DispReg dr) : r(dr) {}
 265   explicit MemoryRef(IndexedDispReg idr) : r(idr) {}
 266   IndexedDispReg r;
 267 };
 268
 269 // *(rip + x)
 270 struct RIPRelativeRef {
 271   explicit RIPRelativeRef(DispRIP r) : r(r) {}
 272   DispRIP r;
 273 };
 274
 275 inline MemoryRef IndexedDispReg::operator*() const {
 276   return MemoryRef(*this);
 277 }
 278
 279 inline MemoryRef IndexedDispReg::operator[](intptr_t x) const {
 280   return *(*this + x);
 281 }
 282
 283 inline MemoryRef DispReg::operator*() const {
 284   return MemoryRef(*this);
 285 }
 286
 287 inline MemoryRef DispReg::operator[](intptr_t x) const {
 288   return *(*this + x);
 289 }
 290
 291 inline RIPRelativeRef DispRIP::operator*() const {
 292   return RIPRelativeRef(*this);
 293 }
 294
 295 inline RIPRelativeRef DispRIP::operator[](intptr_t x) const {
 296   return *(*this + x);
 297 }
 298
 299 inline DispReg operator+(Reg64 r, intptr_t d) { return DispReg(r, d); }
 300 inline DispReg operator-(Reg64 r, intptr_t d) { return DispReg(r, -d); }
 301 inline DispRIP operator+(RegRIP r, intptr_t d) { return DispRIP(d); }
 302 inline DispRIP operator-(RegRIP r, intptr_t d) { return DispRIP(d); }
 303
 304 inline ScaledIndex operator*(Reg64 r, int scale) {
 305   return ScaledIndex(r, scale);
 306 }
 307 inline IndexedDispReg operator+(Reg64 base, ScaledIndex sr) {
 308   return IndexedDispReg(base, sr);
 309 }
 310 inline ScaledIndexDisp operator+(ScaledIndex si, intptr_t disp) {
 311   return ScaledIndexDisp(si, disp);
 312 }
 313 inline IndexedDispReg operator+(Reg64 b, Reg64 i) {
 314   return b + ScaledIndex(i, 0x1);
 315 }
 316
 317 inline MemoryRef operator*(Reg64 r)  { return MemoryRef(DispReg(r)); }
 318 inline DispRIP   operator*(RegRIP r) { return DispRIP(0); }
 319
 320 inline MemoryRef Reg64::operator[](intptr_t disp) const {
 321   return *(*this + disp);
 322 }
 323
 324 inline MemoryRef Reg64::operator[](Reg64 idx) const {
 325   return *(*this + idx * 1);
 326 }
 327
 328 inline MemoryRef Reg64::operator[](ScaledIndex si) const {
 329   return *(*this + si);
 330 }
 331
 332 inline MemoryRef Reg64::operator[](DispReg dr) const {
 333   return *(*this + ScaledIndex(dr.base, 0x1) + dr.disp);
 334 }
 335
 336 inline MemoryRef Reg64::operator[](ScaledIndexDisp sid) const {
 337   return *(*this + sid.si + sid.disp);
 338 }
 339
 340 inline RIPRelativeRef RegRIP::operator[](intptr_t disp) const {
 341   return *(*this + disp);
 342 }
 343
 344 /*
 345  * Used for the x64 addressing mode where there is a displacement,
 346  * possibly with a scaled index, but no base register.
 347  */
 348 inline MemoryRef baseless(intptr_t disp) { return *(DispReg { disp }); }
 349 inline MemoryRef baseless(ScaledIndexDisp sid) {
 350   return *(IndexedDispReg { sid });
 351 }
 352
 353 //////////////////////////////////////////////////////////////////////
 354
 355 namespace reg {
 356   constexpr Reg64 rax(0);
 357   constexpr Reg64 rcx(1);
 358   constexpr Reg64 rdx(2);
 359   constexpr Reg64 rbx(3);
 360   constexpr Reg64 rsp(4);
 361   constexpr Reg64 rbp(5);
 362   constexpr Reg64 rsi(6);
 363   constexpr Reg64 rdi(7);
 364
 365   constexpr Reg64 r8 (8);
 366   constexpr Reg64 r9 (9);
 367   constexpr Reg64 r10(10);
 368   constexpr Reg64 r11(11);
 369   constexpr Reg64 r12(12);
 370   constexpr Reg64 r13(13);
 371   constexpr Reg64 r14(14);
 372   constexpr Reg64 r15(15);
 373
 374   constexpr RegRIP rip = RegRIP();
 375
 376   constexpr Reg32 eax (0);
 377   constexpr Reg32 ecx (1);
 378   constexpr Reg32 edx (2);
 379   constexpr Reg32 ebx (3);
 380   constexpr Reg32 esp (4);
 381   constexpr Reg32 ebp (5);
 382   constexpr Reg32 esi (6);
 383   constexpr Reg32 edi (7);
 384   constexpr Reg32 r8d (8);
 385   constexpr Reg32 r9d (9);
 386   constexpr Reg32 r10d(10);
 387   constexpr Reg32 r11d(11);
 388   constexpr Reg32 r12d(12);
 389   constexpr Reg32 r13d(13);
 390   constexpr Reg32 r14d(14);
 391   constexpr Reg32 r15d(15);
 392
 393   constexpr Reg16 ax  (0);
 394   constexpr Reg16 cx  (1);
 395   constexpr Reg16 dx  (2);
 396   constexpr Reg16 bx  (3);
 397   constexpr Reg16 sp  (4);
 398   constexpr Reg16 bp  (5);
 399   constexpr Reg16 si  (6);
 400   constexpr Reg16 di  (7);
 401   constexpr Reg16 r8w (8);
 402   constexpr Reg16 r9w (9);
 403   constexpr Reg16 r10w(10);
 404   constexpr Reg16 r11w(11);
 405   constexpr Reg16 r12w(12);
 406   constexpr Reg16 r13w(13);
 407   constexpr Reg16 r14w(14);
 408   constexpr Reg16 r15w(15);
 409
 410   constexpr Reg8 al  (0);
 411   constexpr Reg8 cl  (1);
 412   constexpr Reg8 dl  (2);
 413   constexpr Reg8 bl  (3);
 414   constexpr Reg8 spl (4);
 415   constexpr Reg8 bpl (5);
 416   constexpr Reg8 sil (6);
 417   constexpr Reg8 dil (7);
 418   constexpr Reg8 r8b (8);
 419   constexpr Reg8 r9b (9);
 420   constexpr Reg8 r10b(10);
 421   constexpr Reg8 r11b(11);
 422   constexpr Reg8 r12b(12);
 423   constexpr Reg8 r13b(13);
 424   constexpr Reg8 r14b(14);
 425   constexpr Reg8 r15b(15);
 426
 427   // Reminder: these registers may not be mixed in any instruction
 428   // using a REX prefix (i.e. anything using r8-r15, spl, bpl, sil,
 429   // dil, etc).
 430   constexpr Reg8 ah(0x80 | 4);
 431   constexpr Reg8 ch(0x80 | 5);
 432   constexpr Reg8 dh(0x80 | 6);
 433   constexpr Reg8 bh(0x80 | 7);
 434
 435   constexpr RegXMM xmm0(0);
 436   constexpr RegXMM xmm1(1);
 437   constexpr RegXMM xmm2(2);
 438   constexpr RegXMM xmm3(3);
 439   constexpr RegXMM xmm4(4);
 440   constexpr RegXMM xmm5(5);
 441   constexpr RegXMM xmm6(6);
 442   constexpr RegXMM xmm7(7);
 443   constexpr RegXMM xmm8(8);
 444   constexpr RegXMM xmm9(9);
 445   constexpr RegXMM xmm10(10);
 446   constexpr RegXMM xmm11(11);
 447   constexpr RegXMM xmm12(12);
 448   constexpr RegXMM xmm13(13);
 449   constexpr RegXMM xmm14(14);
 450   constexpr RegXMM xmm15(15);
 451
 452 #define X(x) if (r == x) return "%"#x
 453   inline const char* regname(Reg64 r) {
 454     X(rax); X(rbx); X(rcx); X(rdx); X(rsp); X(rbp); X(rsi); X(rdi);
 455     X(r8); X(r9); X(r10); X(r11); X(r12); X(r13); X(r14); X(r15);
 456     return nullptr;
 457   }
 458   inline const char* regname(Reg32 r) {
 459     X(eax); X(ecx); X(edx); X(ebx); X(esp); X(ebp); X(esi); X(edi);
 460     X(r8d); X(r9d); X(r10d); X(r11d); X(r12d); X(r13d); X(r14d); X(r15d);
 461     return nullptr;
 462   }
 463   inline const char* regname(Reg16 r) {
 464     X(ax); X(cx); X(dx); X(bx); X(sp); X(bp); X(si); X(di);
 465     X(r8w); X(r9w); X(r10w); X(r11w); X(r12w); X(r13w); X(r14w); X(r15w);
 466     return nullptr;
 467   }
 468   inline const char* regname(Reg8 r) {
 469     X(al); X(cl); X(dl); X(bl); X(spl); X(bpl); X(sil); X(dil);
 470     X(r8b); X(r9b); X(r10b); X(r11b); X(r12b); X(r13b); X(r14b); X(r15b);
 471     X(ah); X(ch); X(dh); X(bh);
 472     return nullptr;
 473   }
 474   inline const char* regname(RegXMM r) {
 475     X(xmm0); X(xmm1); X(xmm2); X(xmm3); X(xmm4); X(xmm5); X(xmm6);
 476     X(xmm7); X(xmm8); X(xmm9); X(xmm10); X(xmm11); X(xmm12); X(xmm13);
 477     X(xmm14); X(xmm15);
 478     return nullptr;
 479   }
 480   inline const char* regname(RegSF r) {
 481     return "%flags";
 482   }
 483 #undef X
 484
 485 }
 486
 487 //////////////////////////////////////////////////////////////////////
 488
 489 enum X64InstrFlags {
 490   IF_REVERSE    = 0x0001, // The operand encoding for some instructions are
 491                           // "backwards" in x64; these instructions are
 492                           // called "reverse" instructions. There are a few
 493                           // details about emitting "reverse" instructions:
 494                           // (1) for the R_M address mode, we use the MR
 495                           // opcode, (2) for M_R and R address modes, we use
 496                           // the RM opcode, and (3) for the R_R address mode,
 497                           // we still use MR opcode, but we have to swap the
 498                           // first argument and the second argument.
 499
 500   IF_TWOBYTEOP  = 0x0002, // Some instructions have two byte opcodes. For
 501                           // these instructions, an additional byte (0x0F) is
 502                           // emitted before the standard opcode byte.
 503
 504   IF_JCC        = 0x0004, // instruction is jcc
 505   IF_IMUL       = 0x0008, // instruction is imul
 506   IF_HAS_IMM8   = 0x0010, // instruction has an encoding that takes an 8-bit
 507                           // immediate
 508   IF_SHIFT      = 0x0020, // instruction is rol, ror, rcl, rcr, shl, shr, sar
 509   IF_RET        = 0x0040, // instruction is ret
 510   IF_SHIFTD     = 0x0080, // instruction is shld, shrd
 511   IF_NO_REXW    = 0x0100, // rexW prefix is not needed
 512   IF_MOV        = 0x0200, // instruction is mov
 513   IF_COMPACTR   = 0x0400, // instruction supports compact-R encoding
 514   IF_RAX        = 0x0800, // instruction supports special rax encoding
 515   IF_XCHG       = 0x1000, // instruction is xchg (not xchgb)
 516   IF_BYTEREG    = 0x2000, // instruction is movzbq, movsbq
 517   IF_66PREFIXED = 0x4000, // instruction requires a manditory 0x66 prefix
 518   IF_F3PREFIXED = 0x8000, // instruction requires a manditory 0xf3 prefix
 519   IF_F2PREFIXED = 0x10000, // instruction requires a manditory 0xf2 prefix
 520   IF_THREEBYTEOP = 0x20000, // instruction requires a 0x0F 0x3A prefix
 521   IF_ROUND       = 0x40000, // instruction is round(sp)d
 522 };
 523
 524 /*
 525   Address mode to table index map:
 526       Table index 0 <- R_R / M_R(n) / R_M(r) / R(n)
 527       Table index 1 <- R_M(n) / M_R(r) / R(r)
 528       Table index 2 <- I / R_I / M_I / R_R_I / M_R_I / R_M_I
 529       Table index 3 <- "/digit" value used by the above address modes
 530       Table index 4 <- special R_I (for rax)
 531       Table index 5 <- compact-R / none
 532
 533   (n) - for normal instructions only (IF_REVERSE flag is not set)
 534   (r) - for reverse instructions only (IF_REVERSE flag is set)
 535
 536   0xF1 is used to indicate invalid opcodes.
 537 */
 538
 539 struct X64Instr {
 540   unsigned char table[6];
 541   unsigned long flags;
 542 };
 543
 544 //                                    0    1    2    3    4    5     flags
 545 const X64Instr instr_divsd =   { { 0x5E,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 546 const X64Instr instr_movups =  { { 0x10,0x11,0xF1,0x00,0xF1,0xF1 }, 0x0103  };
 547 const X64Instr instr_movdqa =  { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103  };
 548 const X64Instr instr_movdqu =  { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103  };
 549 const X64Instr instr_movsd =   { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 550 const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002  };
 551 const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002  };
 552 const X64Instr instr_xmmsub =  { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 553 const X64Instr instr_xmmadd =  { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 554 const X64Instr instr_xmmmul =  { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 555 const X64Instr instr_xmmsqrt = { { 0x51,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
 556 const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102  };
 557 const X64Instr instr_pxor=     { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102  };
 558 const X64Instr instr_psrlq=    { { 0xF1,0xF1,0x73,0x02,0xF1,0xF1 }, 0x4112  };
 559 const X64Instr instr_psllq=    { { 0xF1,0xF1,0x73,0x06,0xF1,0xF1 }, 0x4112  };
 560 const X64Instr instr_cvtsi2sd= { { 0x2a,0x2a,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
 561 const X64Instr instr_cvttsd2si={ { 0x2c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
 562 const X64Instr instr_lddqu =   { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
 563 const X64Instr instr_unpcklpd ={ { 0x14,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102  };
 564 const X64Instr instr_jmp =     { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910  };
 565 const X64Instr instr_call =    { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900  };
 566 const X64Instr instr_push =    { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510  };
 567 const X64Instr instr_pop =     { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500  };
 568 const X64Instr instr_inc =     { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000  };
 569 const X64Instr instr_dec =     { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000  };
 570 const X64Instr instr_not =     { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000  };
 571 const X64Instr instr_notb =    { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000  };
 572 const X64Instr instr_neg =     { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000  };
 573 const X64Instr instr_negb =    { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000  };
 574 const X64Instr instr_add =     { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810  };
 575 const X64Instr instr_addb =    { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810  };
 576 const X64Instr instr_sub =     { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810  };
 577 const X64Instr instr_subb =    { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810  };
 578 const X64Instr instr_and =     { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810  };
 579 const X64Instr instr_andb =    { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810  };
 580 const X64Instr instr_or  =     { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810  };
 581 const X64Instr instr_orb =     { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810  };
 582 const X64Instr instr_xor =     { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810  };
 583 const X64Instr instr_xorb =    { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810  };
 584 const X64Instr instr_mov =     { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600  };
 585 const X64Instr instr_movb =    { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610  };
 586 const X64Instr instr_test =    { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800  };
 587 const X64Instr instr_testb =   { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810  };
 588 const X64Instr instr_cmp =     { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810  };
 589 const X64Instr instr_cmpb =    { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810  };
 590 const X64Instr instr_sbb =     { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810  };
 591 const X64Instr instr_sbbb =    { { 0x18,0x1A,0x80,0x03,0x1C,0xF1 }, 0x0810  };
 592 const X64Instr instr_adc =     { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810  };
 593 const X64Instr instr_lea =     { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000  };
 594 const X64Instr instr_xchgb =   { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000  };
 595 const X64Instr instr_xchg =    { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000  };
 596 const X64Instr instr_imul =    { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019  };
 597 const X64Instr instr_mul =     { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000  };
 598 const X64Instr instr_div =     { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000  };
 599 const X64Instr instr_idiv =    { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000  };
 600 const X64Instr instr_cdq =     { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400  };
 601 const X64Instr instr_ret =     { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540  };
 602 const X64Instr instr_jcc =     { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114  };
 603 const X64Instr instr_cmovcc =  { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003  };
 604 const X64Instr instr_setcc =   { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102  };
 605 const X64Instr instr_movswx =  { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003  };
 606 const X64Instr instr_movsbx =  { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003  };
 607 const X64Instr instr_movzwx =  { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003  };
 608 const X64Instr instr_movzbx =  { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003  };
 609 const X64Instr instr_cwde =    { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400  };
 610 const X64Instr instr_cqo =     { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0000  };
 611 const X64Instr instr_rol =     { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020  };
 612 const X64Instr instr_ror =     { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020  };
 613 const X64Instr instr_rcl =     { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020  };
 614 const X64Instr instr_rcr =     { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020  };
 615 const X64Instr instr_shl =     { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020  };
 616 const X64Instr instr_shr =     { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020  };
 617 const X64Instr instr_sar =     { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020  };
 618 const X64Instr instr_xadd =    { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002  };
 619 const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002  };
 620 const X64Instr instr_nop =     { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500  };
 621 const X64Instr instr_shld =    { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082  };
 622 const X64Instr instr_shrd =    { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082  };
 623 const X64Instr instr_int3 =    { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500  };
 624 const X64Instr instr_roundsd = { { 0xF1,0xF1,0x0b,0x00,0xF1,0xF1 }, 0x64112 };
 625 const X64Instr instr_cmpsd =   { { 0xF1,0xF1,0xC2,0xF1,0xF1,0xF1 }, 0x10112 };
 626
 627 enum class RoundDirection : ssize_t {
 628   nearest  = 0,
 629   floor    = 1,
 630   ceil     = 2,
 631   truncate = 3,
 632 };
 633
 634 const char* show(RoundDirection);
 635
 636 enum class ComparisonPred : uint8_t {
 637   // True if...
 638   eq_ord = 0,    // ...operands are ordered AND equal
 639   ne_unord = 4,  // ...operands are unordered OR unequal
 640 };
 641
 642 enum ConditionCode {
 643   CC_None = -1,
 644   CC_O    = 0x00,
 645   CC_NO   = 0x01,
 646
 647   CC_B    = 0x02,
 648   CC_NAE  = 0x02,
 649   CC_AE   = 0x03,
 650   CC_NB   = 0x03,
 651   CC_NC   = 0x03,
 652
 653   CC_E    = 0x04,
 654   CC_Z    = 0x04,
 655   CC_NE   = 0x05,
 656   CC_NZ   = 0x05,
 657
 658   CC_BE   = 0x06,
 659   CC_NA   = 0x06,
 660   CC_A    = 0x07,
 661   CC_NBE  = 0x07,
 662
 663   CC_S    = 0x08,
 664   CC_NS   = 0x09,
 665
 666   CC_P    = 0x0A,
 667   CC_NP   = 0x0B,
 668
 669   CC_L    = 0x0C,
 670   CC_NGE  = 0x0C,
 671   CC_GE   = 0x0D,
 672   CC_NL   = 0x0D,
 673
 674   CC_LE   = 0x0E,
 675   CC_NG   = 0x0E,
 676   CC_G    = 0x0F,
 677   CC_NLE  = 0x0F,
 678 };
 679
 680 // names of condition codes, indexable by the ConditionCode enum value.
 681 extern const char* cc_names[];
 682
 683 inline ConditionCode ccNegate(ConditionCode c) {
 684   return ConditionCode(int(c) ^ 1); // And you thought x86 was irregular!
 685 }
 686
 687 ///////////////////////////////////////////////////////////////////////////////
 688
 689 struct Label;
 690
 691 /**
 692  * Copyright (c) 2009, Andrew J. Paroski
 693  * All rights reserved.
 694  *
 695  * Redistribution and use in source and binary forms, with or without
 696  * modification, are permitted provided that the following conditions are met:
 697  *     * Redistributions of source code must retain the above copyright
 698  *       notice, this list of conditions and the following disclaimer.
 699  *     * Redistributions in binary form must reproduce the above copyright
 700  *       notice, this list of conditions and the following disclaimer in the
 701  *       documentation and/or other materials provided with the distribution.
 702  *     * The names of the contributors may not be used to endorse or promote
 703  *       products derived from this software without specific prior written
 704  *       permission.
 705  *
 706  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 707  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 708  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 709  * ARE DISCLAIMED. IN NO EVENT SHALL ANDREW J. PAROSKI BE LIABLE FOR ANY
 710  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 711  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 712  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 713  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 714  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 715  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 716  */
 717
 718 struct X64Assembler {
 719 private:
 720   friend struct Label;
 721
 722   /*
 723    * Type for register numbers, independent of the size we're going to
 724    * be using it as. Also, the same register number may mean different
 725    * physical registers for different instructions (e.g. xmm0 and rax
 726    * are both 0). Only for internal use in X64Assembler.
 727    */
 728   enum class RegNumber : int {};
 729   static const RegNumber noreg = RegNumber(-1);
 730
 731 public:
 732   explicit X64Assembler(CodeBlock& cb) : codeBlock(cb) {}
 733
 734   X64Assembler(const X64Assembler&) = delete;
 735   X64Assembler& operator=(const X64Assembler&) = delete;
 736
 737   CodeBlock& code() const { return codeBlock; }
 738
 739   CodeAddress base() const {
 740     return codeBlock.base();
 741   }
 742
 743   CodeAddress frontier() const {
 744     return codeBlock.frontier();
 745   }
 746
 747   CodeAddress toDestAddress(CodeAddress addr) const {
 748     return codeBlock.toDestAddress(addr);
 749   }
 750
 751   void setFrontier(CodeAddress newFrontier) {
 752     codeBlock.setFrontier(newFrontier);
 753   }
 754
 755   size_t capacity() const {
 756     return codeBlock.capacity();
 757   }
 758
 759   size_t used() const {
 760     return codeBlock.used();
 761   }
 762
 763   size_t available() const {
 764     return codeBlock.available();
 765   }
 766
 767   bool contains(CodeAddress addr) const {
 768     return codeBlock.contains(addr);
 769   }
 770
 771   bool empty() const {
 772     return codeBlock.empty();
 773   }
 774
 775   void clear() {
 776     codeBlock.clear();
 777   }
 778
 779   bool canEmit(size_t nBytes) const {
 780     assert(capacity() >= used());
 781     return nBytes < (capacity() - used());
 782   }
 783
 784   /*
 785    * The following section defines the main interface for emitting
 786    * x64.
 787    *
 788    * Simple Examples:
 789    *
 790    *   a.  movq   (rax, rbx);       // order is AT&T: src, dest
 791    *   a.  loadq  (*rax, rbx);      // loads from *rax
 792    *   a.  loadq  (rax[0], rbx);    // also loads from *rax
 793    *   a.  storeq (rcx, rax[0xc]);  // store to rax + 0xc
 794    *   a.  addq   (0x1, rbx);       // increment rbx
 795    *
 796    * Addressing with index registers:
 797    *
 798    *   a.  movl   (index, ecx);
 799    *   a.  loadq  (*rax, rbx);
 800    *   a.  storeq (rbx, rbx[rcx*8]);
 801    *   a.  call   (rax);            // indirect call
 802    *
 803    */
 804
 805 #define BYTE_LOAD_OP(name, instr)                                     \
 806   void name##b(MemoryRef m, Reg8 r)        { instrMR(instr, m, r); }  \
 807
 808 #define LOAD_OP(name, instr)                                          \
 809   void name##q(MemoryRef m, Reg64 r) { instrMR(instr, m, r); }        \
 810   void name##l(MemoryRef m, Reg32 r) { instrMR(instr, m, r); }        \
 811   void name##w(MemoryRef m, Reg16 r) { instrMR(instr, m, r); }        \
 812   void name##q(RIPRelativeRef m, Reg64 r) { instrMR(instr, m, r); } \
 813   BYTE_LOAD_OP(name, instr##b)
 814
 815 #define BYTE_STORE_OP(name, instr)                                    \
 816   void name##b(Reg8 r, MemoryRef m)        { instrRM(instr, r, m); }  \
 817   void name##b(Immed i, MemoryRef m)       { instrIM8(instr, i, m); } \
 818
 819 #define STORE_OP(name, instr)                                           \
 820   void name##w(Immed i, MemoryRef m) { instrIM16(instr, i, m); }        \
 821   void name##l(Immed i, MemoryRef m) { instrIM32(instr, i, m); }        \
 822   void name##w(Reg16 r, MemoryRef m) { instrRM(instr, r, m); }          \
 823   void name##l(Reg32 r, MemoryRef m) { instrRM(instr, r, m); }          \
 824   void name##q(Reg64 r, MemoryRef m) { instrRM(instr, r, m); }          \
 825   BYTE_STORE_OP(name, instr ## b)
 826
 827 #define BYTE_REG_OP(name, instr)                              \
 828   void name##b(Reg8 r1, Reg8 r2) { instrRR(instr, r1, r2); }  \
 829   void name##b(Immed i, Reg8 r)  { instrIR(instr, i, r); }    \
 830
 831 #define REG_OP(name, instr)                                       \
 832   void name##q(Reg64 r1, Reg64 r2)   { instrRR(instr, r1, r2); }  \
 833   void name##l(Reg32 r1, Reg32 r2)   { instrRR(instr, r1, r2); }  \
 834   void name##w(Reg16 r1, Reg16 r2)   { instrRR(instr, r1, r2); }  \
 835   void name##l(Immed i, Reg32 r)     { instrIR(instr, i, r); }    \
 836   void name##w(Immed i, Reg16 r)     { instrIR(instr, i, r); }    \
 837   BYTE_REG_OP(name, instr##b)
 838
 839   /*
 840    * For when we a have a memory operand and the operand size is
 841    * 64-bits, only a 32-bit (sign-extended) immediate is supported.
 842    */
 843 #define IMM64_STORE_OP(name, instr)             \
 844   void name##q(Immed i, MemoryRef m) {          \
 845     return instrIM(instr, i, m);                \
 846   }
 847
 848   /*
 849    * For instructions other than movq, even when the operand size is
 850    * 64 bits only a 32-bit (sign-extended) immediate is supported.
 851    */
 852 #define IMM64R_OP(name, instr)                  \
 853   void name##q(Immed imm, Reg64 r) {            \
 854     always_assert(imm.fits(sz::dword));         \
 855     return instrIR(instr, imm, r);              \
 856   }
 857
 858 #define FULL_OP(name, instr)                    \
 859   LOAD_OP(name, instr)                          \
 860   STORE_OP(name, instr)                         \
 861   REG_OP(name, instr)                           \
 862   IMM64_STORE_OP(name, instr)                   \
 863   IMM64R_OP(name, instr)
 864
 865   // We rename x64's mov to store and load for improved code
 866   // readability.
 867   LOAD_OP        (load,  instr_mov)
 868   STORE_OP       (store, instr_mov)
 869   IMM64_STORE_OP (store, instr_mov)
 870   REG_OP         (mov,   instr_mov)
 871
 872   FULL_OP(add, instr_add)
 873   FULL_OP(xor, instr_xor)
 874   FULL_OP(sub, instr_sub)
 875   FULL_OP(and, instr_and)
 876   FULL_OP(or,  instr_or)
 877   FULL_OP(test,instr_test)
 878   FULL_OP(cmp, instr_cmp)
 879   FULL_OP(sbb, instr_sbb)
 880
 881 #undef IMM64_OP
 882 #undef IMM64R_OP
 883 #undef FULL_OP
 884 #undef REG_OP
 885 #undef STORE_OP
 886 #undef LOAD_OP
 887 #undef BYTE_LOAD_OP
 888 #undef BYTE_STORE_OP
 889 #undef BYTE_REG_OP
 890
 891   // 64-bit immediates work with mov to a register.
 892   void movq(Immed64 imm, Reg64 r) { instrIR(instr_mov, imm, r); }
 893
 894   // movzbx is a special snowflake. We don't have movzbq because it behaves
 895   // exactly the same as movzbl but takes an extra byte.
 896   void loadzbl(MemoryRef m, Reg32 r)        { instrMR(instr_movzbx,
 897                                                       m, rbyte(r)); }
 898   void movzbl(Reg8 src, Reg32 dest)         { emitRR32(instr_movzbx,
 899                                                        rn(src), rn(dest)); }
 900   void movsbl(Reg8 src, Reg32 dest)         { emitRR(instr_movsbx,
 901                                                        rn(src), rn(dest)); }
 902   void movzwl(Reg16 src, Reg32 dest)        { emitRR32(instr_movzwx,
 903                                                        rn(src), rn(dest)); }
 904
 905   void loadsbq(MemoryRef m, Reg64 r)        { instrMR(instr_movsbx,
 906                                                       m, r); }
 907   void movsbq(Reg8 src, Reg64 dest)         { emitRR(instr_movsbx,
 908                                                        rn(src), rn(dest)); }
 909
 910   void lea(MemoryRef p, Reg64 reg)        { instrMR(instr_lea, p, reg); }
 911   void lea(RIPRelativeRef p, Reg64 reg)   { instrMR(instr_lea, p, reg); }
 912
 913   void xchgq(Reg64 r1, Reg64 r2) { instrRR(instr_xchg, r1, r2); }
 914   void xchgl(Reg32 r1, Reg32 r2) { instrRR(instr_xchg, r1, r2); }
 915   void xchgb(Reg8 r1, Reg8 r2)   { instrRR(instr_xchgb, r1, r2); }
 916
 917   void imul(Reg64 r1, Reg64 r2)  { instrRR(instr_imul, r1, r2); }
 918
 919   void push(Reg64 r)  { instrR(instr_push, r); }
 920   void pushl(Reg32 r) { instrR(instr_push, r); }
 921   void pop (Reg64 r)  { instrR(instr_pop,  r); }
 922   void idiv(Reg64 r)  { instrR(instr_idiv, r); }
 923   void incq(Reg64 r)  { instrR(instr_inc,  r); }
 924   void incl(Reg32 r)  { instrR(instr_inc,  r); }
 925   void incw(Reg16 r)  { instrR(instr_inc,  r); }
 926   void decq(Reg64 r)  { instrR(instr_dec,  r); }
 927   void decl(Reg32 r)  { instrR(instr_dec,  r); }
 928   void decw(Reg16 r)  { instrR(instr_dec,  r); }
 929   void notb(Reg8 r)   { instrR(instr_notb, r); }
 930   void not(Reg64 r)   { instrR(instr_not,  r); }
 931   void neg(Reg64 r)   { instrR(instr_neg,  r); }
 932   void negb(Reg8 r)   { instrR(instr_negb, r); }
 933   void ret()          { emit(instr_ret); }
 934   void ret(Immed i)   { emitI(instr_ret, i.w(), sz::word); }
 935   void cqo()          { emit(instr_cqo); }
 936   void nop()          { emit(instr_nop); }
 937   void int3()         { emit(instr_int3); }
 938   void ud2()          { byte(0x0f); byte(0x0b); }
 939   void pushf()        { byte(0x9c); }
 940   void popf()         { byte(0x9d); }
 941   void lock()         { byte(0xF0); }
 942
 943   void push(MemoryRef m) { instrM(instr_push, m); }
 944   void pop (MemoryRef m) { instrM(instr_pop,  m); }
 945   void incq(MemoryRef m) { instrM(instr_inc,  m); }
 946   void incl(MemoryRef m) { instrM32(instr_inc, m); }
 947   void incw(MemoryRef m) { instrM16(instr_inc, m); }
 948   void decq(MemoryRef m) { instrM(instr_dec,  m); }
 949   void decl(MemoryRef m) { instrM32(instr_dec, m); }
 950   void decw(MemoryRef m) { instrM16(instr_dec, m); }
 951
 952   void push(Immed64 i) { emitI(instr_push, i.q()); }
 953
 954   void movups(RegXMM x, MemoryRef m)        { instrRM(instr_movups, x, m); }
 955   void movups(MemoryRef m, RegXMM x)        { instrMR(instr_movups, m, x); }
 956   void movdqu(RegXMM x, MemoryRef m)        { instrRM(instr_movdqu, x, m); }
 957   void movdqu(MemoryRef m, RegXMM x)        { instrMR(instr_movdqu, m, x); }
 958   void movdqa(RegXMM x, RegXMM y)           { instrRR(instr_movdqa, x, y); }
 959   void movdqa(RegXMM x, MemoryRef m)        { instrRM(instr_movdqa, x, m); }
 960   void movdqa(MemoryRef m, RegXMM x)        { instrMR(instr_movdqa, m, x); }
 961   void movsd (RegXMM x, RegXMM y)           { instrRR(instr_movsd,  x, y); }
 962   void movsd (RegXMM x, MemoryRef m)        { instrRM(instr_movsd,  x, m); }
 963   void movsd (MemoryRef m, RegXMM x)        { instrMR(instr_movsd,  m, x); }
 964   void movsd (RIPRelativeRef m, RegXMM x)   { instrMR(instr_movsd,  m, x); }
 965   void lddqu (MemoryRef m, RegXMM x)        { instrMR(instr_lddqu, m, x); }
 966   void unpcklpd(RegXMM s, RegXMM d)         { instrRR(instr_unpcklpd, d, s); }
 967
 968   void rorq  (Immed i, Reg64 r) { instrIR(instr_ror, i, r); }
 969   void shlq  (Immed i, Reg64 r) { instrIR(instr_shl, i, r); }
 970   void shrq  (Immed i, Reg64 r) { instrIR(instr_shr, i, r); }
 971   void sarq  (Immed i, Reg64 r) { instrIR(instr_sar, i, r); }
 972   void shll  (Immed i, Reg32 r) { instrIR(instr_shl, i, r); }
 973   void shrl  (Immed i, Reg32 r) { instrIR(instr_shr, i, r); }
 974   void shlw  (Immed i, Reg16 r) { instrIR(instr_shl, i, r); }
 975   void shrw  (Immed i, Reg16 r) { instrIR(instr_shr, i, r); }
 976
 977   void shlq (Reg64 r) { instrR(instr_shl, r); }
 978   void sarq (Reg64 r) { instrR(instr_sar, r); }
 979
 980   void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
 981     emitIRR(instr_roundsd, rn(dst), rn(src), ssize_t(d));
 982   }
 983
 984   void cmpsd(RegXMM src, RegXMM dst, ComparisonPred pred) {
 985     emitIRR(instr_cmpsd, rn(dst), rn(src), ssize_t(pred));
 986   }
 987
 988   /*
 989    * Control-flow directives.  Primitive labeling/patching facilities
 990    * are available, as well as slightly higher-level ones via the
 991    * Label class.
 992    */
 993
 994   bool jmpDeltaFits(CodeAddress dest) {
 995     int64_t delta = dest - (codeBlock.frontier() + 5);
 996     return deltaFits(delta, sz::dword);
 997   }
 998
 999   void jmp(Reg64 r)            { instrR(instr_jmp, r); }
1000   void jmp(MemoryRef m)        { instrM(instr_jmp, m); }
1001   void jmp(RIPRelativeRef m)   { instrM(instr_jmp, m); }
1002   void call(Reg64 r)           { instrR(instr_call, r); }
1003   void call(MemoryRef m)       { instrM(instr_call, m); }
1004   void call(RIPRelativeRef m)  { instrM(instr_call, m); }
1005
1006   void jmp8(CodeAddress dest)  { emitJ8(instr_jmp, ssize_t(dest)); }
1007
1008   void jmp(CodeAddress dest) {
1009     always_assert(dest && jmpDeltaFits(dest));
1010     emitJ32(instr_jmp, ssize_t(dest));
1011   }
1012
1013   void call(CodeAddress dest) {
1014     always_assert(dest && jmpDeltaFits(dest));
1015     emitJ32(instr_call, ssize_t(dest));
1016   }
1017
1018   void jcc(ConditionCode cond, CodeAddress dest) {
1019     emitCJ32(instr_jcc, cond, (ssize_t)dest);
1020   }
1021
1022   void jcc8(ConditionCode cond, CodeAddress dest) {
1023     emitCJ8(instr_jcc, cond, (ssize_t)dest);
1024   }
1025
1026   void jmpAuto(CodeAddress dest) {
1027     auto delta = dest - (codeBlock.frontier() + 2);
1028     if (deltaFits(delta, sz::byte)) {
1029       jmp8(dest);
1030     } else {
1031       jmp(dest);
1032     }
1033   }
1034
1035   void jccAuto(ConditionCode cc, CodeAddress dest) {
1036     auto delta = dest - (codeBlock.frontier() + 2);
1037     if (deltaFits(delta, sz::byte)) {
1038       jcc8(cc, dest);
1039     } else {
1040       jcc(cc, dest);
1041     }
1042   }
1043
1044   void call(Label&);
1045   void jmp(Label&);
1046   void jmp8(Label&);
1047   void jcc(ConditionCode, Label&);
1048   void jcc8(ConditionCode, Label&);
1049
1050 #define CCS \
1051   CC(o,   CC_O)         \
1052   CC(no,  CC_NO)        \
1053   CC(nae, CC_NAE)       \
1054   CC(ae,  CC_AE)        \
1055   CC(nb,  CC_NB)        \
1056   CC(e,   CC_E)         \
1057   CC(z,   CC_Z)         \
1058   CC(ne,  CC_NE)        \
1059   CC(nz,  CC_NZ)        \
1060   CC(b,   CC_B)         \
1061   CC(be,  CC_BE)        \
1062   CC(nbe, CC_NBE)       \
1063   CC(s,   CC_S)         \
1064   CC(ns,  CC_NS)        \
1065   CC(p,   CC_P)         \
1066   CC(np,  CC_NP)        \
1067   CC(nge, CC_NGE)       \
1068   CC(g,   CC_G)         \
1069   CC(l,   CC_L)         \
1070   CC(ge,  CC_GE)        \
1071   CC(nl,  CC_NL)        \
1072   CC(ng,  CC_NG)        \
1073   CC(le,  CC_LE)        \
1074   CC(nle, CC_NLE)
1075
1076 #define CC(_nm, _code)                                        \
1077   void j ## _nm(CodeAddress dest)      { jcc(_code, dest); }  \
1078   void j ## _nm ## 8(CodeAddress dest) { jcc8(_code, dest); } \
1079   void j ## _nm(Label&);                                      \
1080   void j ## _nm ## 8(Label&);
1081   CCS
1082 #undef CC
1083
1084   void setcc(int cc, Reg8 byteReg) {
1085     emitCR(instr_setcc, cc, rn(byteReg), sz::byte);
1086   }
1087
1088 #define CC(_nm, _cond)                          \
1089   void set ## _nm(Reg8 byteReg) {               \
1090     setcc(_cond, byteReg);                      \
1091   }
1092   CCS
1093 #undef CC
1094
1095   void psllq(Immed i, RegXMM r) { emitIR(instr_psllq, rn(r), i.b()); }
1096   void psrlq(Immed i, RegXMM r) { emitIR(instr_psrlq, rn(r), i.b()); }
1097
1098   void movq_rx(Reg64 rSrc, RegXMM rdest) {
1099     emitRR(instr_gpr2xmm, rn(rdest), rn(rSrc));
1100   }
1101   void movq_xr(RegXMM rSrc, Reg64 rdest) {
1102     emitRR(instr_xmm2gpr, rn(rSrc), rn(rdest));
1103   }
1104
1105   void addsd(RegXMM src, RegXMM srcdest) {
1106     emitRR(instr_xmmadd, rn(srcdest), rn(src));
1107   }
1108   void mulsd(RegXMM src, RegXMM srcdest) {
1109     emitRR(instr_xmmmul, rn(srcdest), rn(src));
1110   }
1111   void subsd(RegXMM src, RegXMM srcdest) {
1112     emitRR(instr_xmmsub, rn(srcdest), rn(src));
1113   }
1114   void pxor(RegXMM src, RegXMM srcdest) {
1115     emitRR(instr_pxor, rn(srcdest), rn(src));
1116   }
1117   void cvtsi2sd(Reg64 src, RegXMM dest) {
1118     emitRR(instr_cvtsi2sd, rn(dest), rn(src));
1119   }
1120   void cvtsi2sd(MemoryRef m, RegXMM dest) {
1121     instrMR(instr_cvtsi2sd, m, dest);
1122   }
1123   void ucomisd(RegXMM l, RegXMM r) {
1124     emitRR(instr_ucomisd, rn(l), rn(r));
1125   }
1126   void sqrtsd(RegXMM src, RegXMM dest) {
1127     emitRR(instr_xmmsqrt, rn(dest), rn(src));
1128   }
1129
1130   void divsd(RegXMM src, RegXMM srcdest) {
1131     emitRR(instr_divsd, rn(srcdest), rn(src));
1132   }
1133   void cvttsd2siq(RegXMM src, Reg64 dest) {
1134     emitRR(instr_cvttsd2si, rn(dest), rn(src));
1135   }
1136
1137   /*
1138    * The following utility functions do more than emit specific code.
1139    * (E.g. combine common idioms or patterns, smash code, etc.)
1140    */
1141
1142   void emitImmReg(Immed64 imm, Reg64 dest) {
1143     if (imm.q() == 0) {
1144       // Zeros the top bits also.
1145       xorl  (r32(dest), r32(dest));
1146       return;
1147     }
1148     if (LIKELY(imm.q() > 0 && imm.fits(sz::dword))) {
1149       // This will zero out the high-order bits.
1150       movl (imm.l(), r32(dest));
1151       return;
1152     }
1153     movq (imm.q(), dest);
1154   }
1155
1156   static void patchJcc(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
1157     assert(jmp[0] == 0x0F && (jmp[1] & 0xF0) == 0x80);
1158     ssize_t diff = dest - (from + 6);
1159     *(int32_t*)(jmp + 2) = safe_cast<int32_t>(diff);
1160   }
1161
1162   static void patchJcc8(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
1163     assert((jmp[0] & 0xF0) == 0x70);
1164     ssize_t diff = dest - (from + 2);  // one for opcode, one for offset
1165     *(int8_t*)(jmp + 1) = safe_cast<int8_t>(diff);
1166   }
1167
1168   static void patchJmp(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
1169     assert(jmp[0] == 0xE9);
1170     ssize_t diff = dest - (from + 5);
1171     *(int32_t*)(jmp + 1) = safe_cast<int32_t>(diff);
1172   }
1173
1174   static void patchJmp8(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
1175     assert(jmp[0] == 0xEB);
1176     ssize_t diff = dest - (from + 2);  // one for opcode, one for offset
1177     *(int8_t*)(jmp + 1) = safe_cast<int8_t>(diff);
1178   }
1179
1180   static void patchCall(CodeAddress call, CodeAddress from, CodeAddress dest) {
1181     assert(call[0] == 0xE8);
1182     ssize_t diff = dest - (from + 5);
1183     *(int32_t*)(call + 1) = safe_cast<int32_t>(diff);
1184   }
1185
1186   void emitInt3s(int n) {
1187     for (auto i = 0; i < n; ++i) {
1188       byte(0xcc);
1189     }
1190   }
1191
1192   void emitNop(int n) {
1193     if (n == 0) return;
1194     static const uint8_t nops[][9] = {
1195       { },
1196       { 0x90 },
1197       { 0x66, 0x90 },
1198       { 0x0f, 0x1f, 0x00 },
1199       { 0x0f, 0x1f, 0x40, 0x00 },
1200       { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
1201       { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
1202       { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
1203       { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
1204       { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
1205     };
1206     // While n >= 9, emit 9 byte NOPs
1207     while (n >= 9) {
1208       bytes(9, nops[9]);
1209       n -= 9;
1210     }
1211     bytes(n, nops[n]);
1212   }
1213
1214   /*
1215    * Low-level emitter functions.
1216    *
1217    * These functions are the core of the assembler, and can also be
1218    * used directly.
1219    */
1220
1221   void byte(uint8_t b) {
1222     codeBlock.byte(b);
1223   }
1224   void word(uint16_t w) {
1225     codeBlock.word(w);
1226   }
1227   void dword(uint32_t dw) {
1228     codeBlock.dword(dw);
1229   }
1230   void qword(uint64_t qw) {
1231     codeBlock.qword(qw);
1232   }
1233   void bytes(size_t n, const uint8_t* bs) {
1234     codeBlock.bytes(n, bs);
1235   }
1236
1237   // op %r
1238   // ------
1239   // Restrictions:
1240   //     r cannot be set to 'none'
1241   ALWAYS_INLINE
1242   void emitCR(X64Instr op, int jcond, RegNumber regN, int opSz = sz::qword) {
1243     assert(regN != noreg);
1244     int r = int(regN);
1245
1246     // Opsize prefix
1247     if (opSz == sz::word) {
1248       byte(kOpsizePrefix);
1249     }
1250
1251     // REX
1252     unsigned char rex = 0;
1253     bool highByteReg = false;
1254     if (opSz == sz::byte) {
1255       if (byteRegNeedsRex(r)) {
1256         rex |= 0x40;
1257       }
1258       r = byteRegEncodeNumber(r, highByteReg);
1259     }
1260     if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1261     if (r & 8) rex |= 1;
1262     if (rex) {
1263       byte(0x40 | rex);
1264       if (highByteReg) byteRegMisuse();
1265     }
1266     // If the instruction supports compact-R mode, use that
1267     if (op.flags & IF_COMPACTR) {
1268       byte(op.table[5] | (r & 7));
1269       return;
1270     }
1271     char opcode = (op.flags & IF_REVERSE) ? op.table[1] : op.table[0];
1272     char rval = op.table[3];
1273     // Handle two byte opcodes
1274     if (op.flags & IF_TWOBYTEOP) byte(0x0F);
1275     byte(opcode | jcond);
1276     emitModrm(3, rval, r);
1277   }
1278
1279   ALWAYS_INLINE
1280   void emitR(X64Instr op, RegNumber r, int opSz = sz::qword) {
1281     emitCR(op, 0, r, opSz);
1282   }
1283
1284   ALWAYS_INLINE
1285   void emitR32(X64Instr op, RegNumber r) {
1286     emitCR(op, 0, r, sz::dword);
1287   }
1288
1289   ALWAYS_INLINE
1290   void emitR16(X64Instr op, RegNumber r) {
1291     emitCR(op, 0, r, sz::word);
1292   }
1293
1294   // op %r2, %r1
1295   // -----------
1296   // Restrictions:
1297   //     r1 cannot be set to noreg
1298   //     r2 cannot be set to noreg
1299   ALWAYS_INLINE
1300   void emitCRR(X64Instr op, int jcond, RegNumber rn1, RegNumber rn2,
1301                int opSz = sz::qword) {
1302     assert(rn1 != noreg && rn2 != noreg);
1303     int r1 = int(rn1);
1304     int r2 = int(rn2);
1305     bool reverse = ((op.flags & IF_REVERSE) != 0);
1306     prefixBytes(op.flags, opSz);
1307     // The xchg instruction is special; we have compact encodings for
1308     // exchanging with rax or eax.
1309     if (op.flags & IF_XCHG) {
1310       if (r1 == int(reg::rax)) {
1311         // REX
1312         unsigned char rex = 0;
1313         if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1314         assert(!(op.flags & IF_BYTEREG));
1315         if (r2 & 8) rex |= (reverse ? 4 : 1);
1316         if (rex) byte(0x40 | rex);
1317         // If the second register is rax, emit opcode with the first
1318         // register id embedded
1319         byte(op.table[5] | (r2 & 7));
1320         return;
1321       } else if (r2 == int(reg::rax)) {
1322         reverse = !reverse;
1323         // REX
1324         unsigned char rex = 0;
1325         if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) {
1326           rex |= 8;
1327         }
1328         if (r1 & 8) rex |= (reverse ? 1 : 4);
1329         if (rex) byte(0x40 | rex);
1330         // If the first register is rax, emit opcode with the second
1331         // register id embedded
1332         byte(op.table[5] | (r1 & 7));
1333         return;
1334       }
1335     }
1336     // REX
1337     unsigned char rex = 0;
1338     if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1339     bool highByteReg = false;
1340     // movzbx's first operand is a bytereg regardless of operand size
1341     if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
1342       if (byteRegNeedsRex(r1) ||
1343           (!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
1344         rex |= 0x40;
1345       }
1346       r1 = byteRegEncodeNumber(r1, highByteReg);
1347       r2 = byteRegEncodeNumber(r2, highByteReg);
1348     }
1349     if (r1 & 8) rex |= (reverse ? 1 : 4);
1350     if (r2 & 8) rex |= (reverse ? 4 : 1);
1351     if (rex) {
1352       byte(0x40 | rex);
1353       if (highByteReg) byteRegMisuse();
1354     }
1355     // For two byte opcodes
1356     if ((op.flags & (IF_TWOBYTEOP | IF_IMUL)) != 0) byte(0x0F);
1357     byte(op.table[0] | jcond);
1358     if (reverse) {
1359       emitModrm(3, r2, r1);
1360     } else {
1361       emitModrm(3, r1, r2);
1362     }
1363   }
1364
1365   ALWAYS_INLINE
1366   void emitCRR32(X64Instr op, int jcond, RegNumber r1, RegNumber r2) {
1367     emitCRR(op, jcond, r1, r2, sz::dword);
1368   }
1369
1370   ALWAYS_INLINE
1371   void emitRR(X64Instr op, RegNumber r1, RegNumber r2, int opSz = sz::qword) {
1372     emitCRR(op, 0, r1, r2, opSz);
1373   }
1374
1375   ALWAYS_INLINE
1376   void emitRR32(X64Instr op, RegNumber r1, RegNumber r2) {
1377     emitCRR(op, 0, r1, r2, sz::dword);
1378   }
1379
1380   ALWAYS_INLINE
1381   void emitRR16(X64Instr op, RegNumber r1, RegNumber r2) {
1382     emitCRR(op, 0, r1, r2, sz::word);
1383   }
1384
1385   ALWAYS_INLINE
1386   void emitRR8(X64Instr op, RegNumber r1, RegNumber r2) {
1387     emitCRR(op, 0, r1, r2, sz::byte);
1388   }
1389
1390   // op $imm, %r
1391   // -----------
1392   // Restrictions:
1393   //     r cannot be set to noreg
1394   ALWAYS_INLINE
1395   void emitIR(X64Instr op, RegNumber rname, ssize_t imm,
1396               int opSz = sz::qword) {
1397     assert(rname != noreg);
1398     int r = int(rname);
1399     // Opsize prefix
1400     prefixBytes(op.flags, opSz);
1401     // Determine the size of the immediate.  This might change opSz so
1402     // do it first.
1403     int immSize;
1404     if ((op.flags & IF_MOV) && opSz == sz::qword) {
1405       immSize = computeImmediateSizeForMovRI64(op, imm, opSz);
1406     } else {
1407       immSize = computeImmediateSize(op, imm, opSz);
1408     }
1409     // REX
1410     unsigned char rex = 0;
1411     bool highByteReg = false;
1412     if (opSz == sz::byte) {
1413       if (byteRegNeedsRex(r)) {
1414         rex |= 0x40;
1415       }
1416       r = byteRegEncodeNumber(r, highByteReg);
1417     }
1418     if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1419     if (r & 8) rex |= 1;
1420     if (rex) {
1421       byte(0x40 | rex);
1422       if (highByteReg) byteRegMisuse();
1423     }
1424     // Use the special rax encoding if the instruction supports it
1425     if (r == int(reg::rax) && immSize == sz::dword &&
1426         (op.flags & IF_RAX)) {
1427       byte(op.table[4]);
1428       emitImmediate(op, imm, immSize);
1429       return;
1430     }
1431     // Use the compact-R encoding if the operand size and the immediate
1432     // size are the same
1433     if ((op.flags & IF_COMPACTR) && immSize == opSz) {
1434       byte(op.table[5] | (r & 7));
1435       emitImmediate(op, imm, immSize);
1436       return;
1437     }
1438     // For two byte opcodes
1439     if ((op.flags & (IF_TWOBYTEOP | IF_IMUL)) != 0) byte(0x0F);
1440     int rval = op.table[3];
1441     // shift/rotate instructions have special opcode when
1442     // immediate is 1
1443     if ((op.flags & IF_SHIFT) != 0 && imm == 1) {
1444       byte(0xd1);
1445       emitModrm(3, rval, r);
1446       // don't emit immediate
1447       return;
1448     }
1449     int opcode = (immSize == sz::byte && opSz != sz::byte) ?
1450       (op.table[2] | 2) : op.table[2];
1451     byte(opcode);
1452     emitModrm(3, rval, r);
1453     emitImmediate(op, imm, immSize);
1454   }
1455
1456   ALWAYS_INLINE
1457   void emitIR32(X64Instr op, RegNumber r, ssize_t imm) {
1458     emitIR(op, r, imm, sz::dword);
1459   }
1460
1461   ALWAYS_INLINE
1462   void emitIR16(X64Instr op, RegNumber r, ssize_t imm) {
1463     emitIR(op, r, safe_cast<int16_t>(imm), sz::word);
1464   }
1465
1466   ALWAYS_INLINE
1467   void emitIR8(X64Instr op, RegNumber r, ssize_t imm) {
1468     emitIR(op, r, safe_cast<int8_t>(imm), sz::byte);
1469   }
1470
1471   // op $imm, %r2, %r1
1472   // -----------------
1473   // Restrictions:
1474   //     r1 cannot be set to noreg
1475   //     r2 cannot be set to noreg
1476   ALWAYS_INLINE
1477   void emitIRR(X64Instr op, RegNumber rn1, RegNumber rn2, ssize_t imm,
1478                int opSz = sz::qword) {
1479     assert(rn1 != noreg && rn2 != noreg);
1480     int r1 = int(rn1);
1481     int r2 = int(rn2);
1482     bool reverse = ((op.flags & IF_REVERSE) != 0);
1483     // Opsize prefix
1484     prefixBytes(op.flags, opSz);
1485     // REX
1486     unsigned char rex = 0;
1487     if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1488     bool highByteReg = false;
1489     if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
1490       if (byteRegNeedsRex(r1) ||
1491           (!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
1492         rex |= 0x40;
1493       }
1494       r1 = byteRegEncodeNumber(r1, highByteReg);
1495       r2 = byteRegEncodeNumber(r2, highByteReg);
1496     }
1497     if (r1 & 8) rex |= (reverse ? 1 : 4);
1498     if (r2 & 8) rex |= (reverse ? 4 : 1);
1499     if (rex) {
1500       byte(0x40 | rex);
1501       if (highByteReg) byteRegMisuse();
1502     }
1503     // Determine the size of the immediate
1504     int immSize = computeImmediateSize(op, imm, opSz);
1505     if (op.flags & IF_TWOBYTEOP || op.flags & IF_THREEBYTEOP) byte(0x0F);
1506     if (op.flags & IF_THREEBYTEOP) byte(0x3a);
1507     int opcode = (immSize == sz::byte && opSz != sz::byte &&
1508                   (op.flags & IF_ROUND) == 0) ?
1509       (op.table[2] | 2) : op.table[2];
1510     byte(opcode);
1511     if (reverse) {
1512       emitModrm(3, r2, r1);
1513     } else {
1514       emitModrm(3, r1, r2);
1515     }
1516     emitImmediate(op, imm, immSize);
1517   }
1518
1519   ALWAYS_INLINE
1520   void emitCI(X64Instr op, int jcond, ssize_t imm, int opSz = sz::qword) {
1521     // Opsize prefix
1522     prefixBytes(op.flags, opSz);
1523     // REX
1524     if ((op.flags & IF_NO_REXW) == 0) {
1525       byte(0x48);
1526     }
1527     // Determine the size of the immediate
1528     int immSize = computeImmediateSize(op, imm, opSz);
1529     // Emit opcode
1530     if ((op.flags & IF_JCC) != 0) {
1531       // jcc is weird so we handle it separately
1532       if (immSize != sz::byte) {
1533         byte(0x0F);
1534         byte(jcond | 0x80);
1535       } else {
1536         byte(jcond | 0x70);
1537       }
1538     } else {
1539       int opcode = (immSize == sz::byte && opSz != sz::byte) ?
1540         (op.table[2] | 2) : op.table[2];
1541       byte(jcond | opcode);
1542     }
1543     emitImmediate(op, imm, immSize);
1544   }
1545
1546   ALWAYS_INLINE
1547   void emitI(X64Instr op, ssize_t imm, int opSz = sz::qword) {
1548     emitCI(op, 0, imm, opSz);
1549   }
1550
1551   ALWAYS_INLINE
1552   void emitJ8(X64Instr op, ssize_t imm) {
1553     assert((op.flags & IF_JCC) == 0);
1554     ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
1555     // Emit opcode and 8-bit immediate
1556     byte(0xEB);
1557     byte(safe_cast<int8_t>(delta));
1558   }
1559
1560   ALWAYS_INLINE
1561   void emitCJ8(X64Instr op, int jcond, ssize_t imm) {
1562     // this is for jcc only
1563     assert(op.flags & IF_JCC);
1564     ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
1565     // Emit opcode
1566     byte(jcond | 0x70);
1567     // Emit 8-bit offset
1568     byte(safe_cast<int8_t>(delta));
1569   }
1570
1571   ALWAYS_INLINE
1572   void emitJ32(X64Instr op, ssize_t imm) {
1573     // call and jmp are supported, jcc is not supported
1574     assert((op.flags & IF_JCC) == 0);
1575     int32_t delta =
1576       safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 5));
1577     uint8_t *bdelta = (uint8_t*)&delta;
1578     uint8_t instr[] = { op.table[2],
1579       bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
1580     bytes(5, instr);
1581   }
1582
1583   ALWAYS_INLINE
1584   void emitCJ32(X64Instr op, int jcond, ssize_t imm) {
1585     // jcc is supported, call and jmp are not supported
1586     assert(op.flags & IF_JCC);
1587     int32_t delta =
1588       safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 6));
1589     uint8_t* bdelta = (uint8_t*)&delta;
1590     uint8_t instr[6] = { 0x0f, uint8_t(0x80 | jcond),
1591       bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
1592     bytes(6, instr);
1593   }
1594
1595   // op disp(%br,%ir,s)
1596   //   (for reverse == false, hasImmediate == false, r == noreg)
1597   // op $imm, disp(%br,%ir,s)
1598   //   (for reverse == false, hasImmediate == true,  r == noreg)
1599   // op %r, disp(%br,%ir,s)
1600   //   (for reverse == false, hasImmediate == false, r != noreg)
1601   // op $imm, %r, disp(%br,%ir,s)
1602   //   (for reverse == false, hasImmediate == true,  r != noreg)
1603   // op disp(%br,%ir,s), %r
1604   //   (for reverse == true,  hasImmediate == false, r != noreg)
1605   // op $imm, disp(%br,%ir,s), %r
1606   //   (for reverse == true,  hasImmediate == true,  r != noreg)
1607   // -----------------------------------------------------------------
1608   // Restrictions:
1609   //     ir cannot be set to 'sp'
1610   ALWAYS_INLINE
1611   void emitCMX(X64Instr op, int jcond, RegNumber brName, RegNumber irName,
1612                int s, int64_t disp,
1613                RegNumber rName,
1614                bool reverse = false,
1615                ssize_t imm = 0,
1616                bool hasImmediate = false,
1617                int opSz = sz::qword,
1618                bool ripRelative = false) {
1619     assert(irName != rn(reg::rsp));
1620
1621     int ir = int(irName);
1622     int r = int(rName);
1623     int br = int(brName);
1624
1625     // The opsize prefix can be placed here, if the instruction
1626     // deals with words.
1627     // When an instruction has a manditory prefix, it goes before the
1628     // REX byte if we end up needing one.
1629     prefixBytes(op.flags, opSz);
1630
1631     // Determine immSize from the 'hasImmediate' flag
1632     int immSize = sz::nosize;
1633     if (hasImmediate) {
1634       immSize = computeImmediateSize(op, imm, opSz);
1635     }
1636     if ((op.flags & IF_REVERSE) != 0) reverse = !reverse;
1637     // Determine if we need to use a two byte opcode;
1638     // imul is weird so we have a special case for it
1639     bool twoByteOpcode = ((op.flags & IF_TWOBYTEOP) != 0) ||
1640       ((op.flags & IF_IMUL) != 0 && rName != noreg &&
1641       immSize == sz::nosize);
1642     // Again, imul is weird
1643     if ((op.flags & IF_IMUL) != 0 && rName != noreg) {
1644       reverse = !reverse;
1645     }
1646     // The wily rex byte, a multipurpose extension to the opcode space for x64
1647     unsigned char rex = 0;
1648     if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
1649
1650     bool highByteReg = false;
1651     // XXX: This IF_BYTEREG check is a special case for movzbl: we currently
1652     // encode it using an opSz of sz::byte but it doesn't actually have a
1653     // byte-sized operand like other instructions can.
1654     if (!(op.flags & IF_BYTEREG) && opSz == sz::byte && rName != noreg) {
1655       if (byteRegNeedsRex(r)) {
1656         rex |= 0x40;
1657       }
1658       r = byteRegEncodeNumber(r, highByteReg);
1659     }
1660
1661     if (rName != noreg && (r & 8)) rex |= 4;
1662     if (irName != noreg && (ir & 8)) rex |= 2;
1663     if (brName != noreg && (br & 8)) rex |= 1;
1664     if (rex) {
1665       byte(0x40 | rex);
1666       if (highByteReg) byteRegMisuse();
1667     }
1668     // Emit the opcode
1669     if (immSize != sz::nosize) {
1670       if (twoByteOpcode) byte(0x0F);
1671       if (immSize == sz::byte && opSz != sz::byte) {
1672         byte(op.table[2] | 2 | jcond);
1673       } else {
1674         byte(op.table[2] | jcond);
1675       }
1676     } else {
1677       if (twoByteOpcode) byte(0x0F);
1678       int opcode;
1679       if ((op.flags & IF_IMUL) != 0) {
1680         opcode = (rName == noreg) ? op.table[1] : op.table[0];
1681       } else {
1682         opcode = reverse ? op.table[1] : op.table[0];
1683       }
1684       byte(opcode | jcond);
1685     }
1686     // SIB byte if:
1687     //   1. We're using an index register.
1688     //   2. The base register is rsp-like.
1689     //   3. We're doing a baseless disp access and it is not rip-relative.
1690     bool sibIsNeeded =
1691       ir != int(noreg) ||                      /* 1 */
1692       br == int(reg::rsp) || br == int(reg::r12) || /* 2 */
1693       (br == int(noreg) && !ripRelative);
1694     // If there is no register and no immediate, use the /r value
1695     if (r == int(noreg)) r = op.table[3];
1696     // If noreg was specified for 'ir', we use
1697     // the encoding for the sp register
1698     if (ir == int(noreg)) ir = 4;
1699     int dispSize = sz::nosize;
1700     if (disp != 0) {
1701       if (!ripRelative && disp <= 127 && disp >= -128) {
1702         dispSize = sz::byte;
1703       } else {
1704         dispSize = sz::dword;
1705       }
1706     }
1707     // Set 'mod' based on the size of the displacement
1708     int mod;
1709     switch (dispSize) {
1710       case sz::nosize: mod = 0; break;
1711       case sz::byte: mod = 1; break;
1712       default: mod = 2; break;
1713     }
1714     // Handle special cases for 'br'
1715     if (br == int(noreg)) {
1716       // If noreg was specified for 'br', we use the encoding
1717       // for the rbp register (or rip, if we're emitting a
1718       // rip-relative instruction), and we must set mod=0 and
1719       // "upgrade" to a DWORD-sized displacement
1720       br = 5;
1721       mod = 0;
1722       dispSize = sz::dword;
1723     } else if ((br & 7) == 5 && dispSize == sz::nosize) {
1724       // If br == rbp and no displacement was specified, we
1725       // must "upgrade" to using a 1-byte displacement value
1726       dispSize = sz::byte;
1727       mod = 1;
1728     }
1729     // Emit modr/m and the sib
1730     if (sibIsNeeded) {
1731       // s:                               0  1  2   3  4   5   6   7  8
1732       static const int scaleLookup[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3 };
1733       assert(s > 0 && s <= 8);
1734       int scale = scaleLookup[s];
1735       assert(scale != -1);
1736       emitModrm(mod, r, 4);
1737       byte((scale << 6) | ((ir & 7) << 3) | (br & 7));
1738     } else {
1739       emitModrm(mod, r, br);
1740     }
1741     // Emit displacement if needed
1742     if (dispSize == sz::dword) {
1743       if (ripRelative) {
1744         disp -= (int64_t)codeBlock.frontier() + immSize + dispSize;
1745       }
1746       dword(disp);
1747     } else if (dispSize == sz::byte) {
1748       byte(disp & 0xff);
1749     }
1750     // Emit immediate if needed
1751     if (immSize != sz::nosize) {
1752       emitImmediate(op, imm, immSize);
1753     }
1754   }
1755
1756   ALWAYS_INLINE
1757   void emitIM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1758               ssize_t imm, int opSz = sz::qword) {
1759     emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, opSz);
1760   }
1761
1762   ALWAYS_INLINE
1763   void emitIM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1764                ssize_t imm) {
1765     emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
1766             sz::byte);
1767   }
1768
1769   ALWAYS_INLINE
1770   void emitIM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1771                 ssize_t imm) {
1772     emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
1773             sz::word);
1774   }
1775
1776   ALWAYS_INLINE
1777   void emitIM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1778                 ssize_t imm) {
1779     emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, sz::dword);
1780   }
1781
1782   ALWAYS_INLINE
1783   void emitRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1784               RegNumber r, int opSz = sz::qword) {
1785     emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, opSz);
1786   }
1787
1788   ALWAYS_INLINE
1789   void emitRM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1790                 RegNumber r) {
1791     emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::dword);
1792   }
1793
1794   ALWAYS_INLINE
1795   void emitRM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1796                 RegNumber r) {
1797     emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::word);
1798   }
1799
1800   ALWAYS_INLINE
1801   void emitRM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1802                RegNumber r) {
1803     emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::byte);
1804   }
1805
1806   ALWAYS_INLINE
1807   void emitCMR(X64Instr op, int jcond, RegNumber br, RegNumber ir,
1808                int s, int disp, RegNumber r, int opSz = sz::qword) {
1809     emitCMX(op, jcond, br, ir, s, disp, r, true, 0, false, opSz);
1810   }
1811
1812   ALWAYS_INLINE
1813   void emitMR(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
1814               RegNumber r, int opSz = sz::qword, bool ripRelative = false) {
1815     emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, opSz, ripRelative);
1816   }
1817
1818   ALWAYS_INLINE
1819   void emitMR32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1820                 RegNumber r) {
1821     emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::dword);
1822   }
1823
1824   ALWAYS_INLINE
1825   void emitMR16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1826                 RegNumber r) {
1827     emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::word);
1828   }
1829
1830   ALWAYS_INLINE
1831   void emitMR8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1832                RegNumber r) {
1833     emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::byte);
1834   }
1835
1836   ALWAYS_INLINE
1837   void emitIRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1838                RegNumber r, ssize_t imm, int opSz = sz::qword) {
1839     emitCMX(op, 0, br, ir, s, disp, r, false, imm, true, opSz);
1840   }
1841
1842   ALWAYS_INLINE
1843   void emitIMR(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1844                RegNumber r, ssize_t imm, int opSz = sz::qword) {
1845     emitCMX(op, 0, br, ir, s, disp, r, true, imm, true, opSz);
1846   }
1847
1848   ALWAYS_INLINE
1849   void emitM(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
1850              int opSz = sz::qword, bool ripRelative = false) {
1851     emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, opSz,
1852             ripRelative);
1853   }
1854
1855   ALWAYS_INLINE
1856   void emitM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
1857     emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::dword);
1858   }
1859
1860   ALWAYS_INLINE
1861   void emitM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
1862     emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::word);
1863   }
1864
1865   ALWAYS_INLINE
1866   void emitCM(X64Instr op, int jcond, RegNumber br,
1867               RegNumber ir, int s, int disp, int opSz = sz::qword) {
1868     emitCMX(op, jcond, br, ir, s, disp, noreg, false, 0, false, opSz);
1869   }
1870
1871   // emit (with no arguments)
1872   ALWAYS_INLINE
1873   void emit(X64Instr op) {
1874     if ((op.flags & IF_NO_REXW) == 0) {
1875       byte(0x48);
1876     }
1877     byte(op.table[5]);
1878   }
1879
1880   // Segment register prefixes.
1881   X64Assembler& fs()  { byte(0x64); return *this; }
1882   X64Assembler& gs()  { byte(0x65); return *this; }
1883
1884 public:
1885   /*
1886    * The following functions use a naming convention for an older API
1887    * to the assembler; conditional loads and moves haven't yet been
1888    * ported.
1889    */
1890
1891   // CMOVcc [rbase + off], rdest
1892   inline void cload_reg64_disp_reg64(ConditionCode cc, Reg64 rbase,
1893                                      int off, Reg64 rdest) {
1894     emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
1895             false /*reverse*/);
1896
1897   }
1898   inline void cload_reg64_disp_reg32(ConditionCode cc, Reg64 rbase,
1899                                      int off, Reg32 rdest) {
1900     emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
1901             false /*reverse*/,
1902             0 /*imm*/,
1903             false /*hasImmediate*/,
1904             sz::dword /*opSz*/);
1905   }
1906   inline void cmov_reg64_reg64(ConditionCode cc, Reg64 rsrc, Reg64 rdest) {
1907     emitCRR(instr_cmovcc, cc, rn(rsrc), rn(rdest));
1908   }
1909
1910 private:
1911   bool byteRegNeedsRex(int rn) const {
1912     // Without a rex, 4 through 7 mean the high 8-bit byte registers.
1913     return rn >= 4 && rn <= 7;
1914   }
1915   int byteRegEncodeNumber(int rn, bool& seenHigh) const {
1916     // We flag a bit in ah, ch, dh, bh so byteRegNeedsRex doesn't
1917     // trigger.
1918     if (rn & 0x80) seenHigh = true;
1919     return rn & ~0x80;
1920   }
1921   // In 64-bit mode, you can't mix accesses to high byte registers
1922   // with low byte registers other than al,cl,bl,dl.  We assert this.
1923   void byteRegMisuse() const {
1924     assert(!"High byte registers can't be used with new x64 registers, or"
1925             " anything requiring a REX prefix");
1926   }
1927
1928   int computeImmediateSize(X64Instr op,
1929                            ssize_t imm,
1930                            int opsize = sz::dword) {
1931     // Most instructions take a 32-bit or 16-bit immediate,
1932     // depending on the presence of the opsize prefix (0x66).
1933     int immSize = opsize == sz::word ? sz::word : sz::dword;
1934     // ret always takes a 16-bit immediate.
1935     if (op.flags & IF_RET) {
1936       immSize = sz::word;
1937     }
1938     // Use an 8-bit immediate if the instruction supports it and if
1939     // the immediate value fits in a byte
1940     if (deltaFits(imm, sz::byte) && (op.flags & IF_HAS_IMM8) != 0) {
1941       immSize = sz::byte;
1942     }
1943     return immSize;
1944   }
1945
1946   void emitModrm(int x, int y, int z) {
1947     byte((x << 6) | ((y & 7) << 3) | (z & 7));
1948   }
1949
1950   /*
1951    * The mov instruction supports an 8 byte immediate for the RI
1952    * address mode when opSz is qword.  It also supports a 4-byte
1953    * immediate with opSz qword (the immediate is sign-extended).
1954    *
1955    * On the other hand, if it fits in 32-bits as an unsigned, we can
1956    * change opSz to dword, which will zero the top 4 bytes instead of
1957    * sign-extending.
1958    */
1959   int computeImmediateSizeForMovRI64(X64Instr op, ssize_t imm, int& opSz) {
1960     assert(opSz == sz::qword);
1961     if (deltaFits(imm, sz::dword)) {
1962       return computeImmediateSize(op, imm);
1963     }
1964     if (magFits(imm, sz::dword)) {
1965       opSz = sz::dword;
1966       return sz::dword;
1967     }
1968     return sz::qword;
1969   }
1970
1971   void emitImmediate(X64Instr op, ssize_t imm, int immSize) {
1972     if (immSize == sz::nosize) {
1973       return;
1974     }
1975     if ((op.flags & (IF_SHIFT | IF_SHIFTD)) == 0) {
1976       if (immSize == sz::dword) {
1977         dword(imm);
1978       } else if (immSize == sz::byte) {
1979         byte(imm);
1980       } else if (immSize == sz::word) {
1981         word(imm);
1982       } else {
1983         qword(imm);
1984       }
1985     } else {
1986       // we always use a byte-sized immediate for shift instructions
1987       byte(imm);
1988     }
1989   }
1990
1991   void prefixBytes(unsigned long flags, int opSz) {
1992     if (opSz == sz::word && !(flags & IF_RET)) byte(kOpsizePrefix);
1993     if (flags & IF_66PREFIXED) byte(0x66);
1994     if (flags & IF_F2PREFIXED) byte(0xF2);
1995     if (flags & IF_F3PREFIXED) byte(0xF3);
1996   }
1997
1998 private:
1999   RegNumber rn(Reg8 r)   { return RegNumber(int(r)); }
2000   RegNumber rn(Reg16 r)  { return RegNumber(int(r)); }
2001   RegNumber rn(Reg32 r)  { return RegNumber(int(r)); }
2002   RegNumber rn(Reg64 r)  { return RegNumber(int(r)); }
2003   RegNumber rn(RegXMM r) { return RegNumber(int(r)); }
2004
2005   // Wraps a bunch of the emit* functions to make using them with the
2006   // typed wrappers more terse. We should have these replace
2007   // the emit functions eventually.
2008
2009 #define UMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
2010 #define URIP(m) noreg, noreg, sz::byte, m.r.disp
2011
2012   void instrR(X64Instr   op, Reg64  r)           { emitR(op,    rn(r));        }
2013   void instrR(X64Instr   op, Reg32  r)           { emitR32(op,  rn(r));        }
2014   void instrR(X64Instr   op, Reg16  r)           { emitR16(op,  rn(r));        }
2015   void instrR(X64Instr   op, Reg8   r)           { emitR(op, rn(r), sz::byte); }
2016   void instrRR(X64Instr  op, Reg64  x, Reg64  y) { emitRR(op,   rn(x), rn(y)); }
2017   void instrRR(X64Instr  op, Reg32  x, Reg32  y) { emitRR32(op, rn(x), rn(y)); }
2018   void instrRR(X64Instr  op, Reg16  x, Reg16  y) { emitRR16(op, rn(x), rn(y)); }
2019   void instrRR(X64Instr  op, Reg8   x, Reg8   y) { emitRR8(op,  rn(x), rn(y)); }
2020   void instrRR(X64Instr  op, RegXMM x, RegXMM y) { emitRR(op,   rn(x), rn(y)); }
2021   void instrM(X64Instr   op, MemoryRef m)        { emitM(op,    UMR(m));       }
2022   void instrM(X64Instr   op, RIPRelativeRef m)   { emitM(op,    URIP(m),
2023                                                          sz::qword, true);     }
2024   void instrM32(X64Instr op, MemoryRef m)        { emitM32(op,  UMR(m));       }
2025   void instrM16(X64Instr op, MemoryRef m)        { emitM16(op,  UMR(m));       }
2026
2027   void instrRM(X64Instr op,
2028                Reg64 r,
2029                MemoryRef m)        { emitRM(op, UMR(m), rn(r)); }
2030   void instrRM(X64Instr op,
2031                Reg32 r,
2032                MemoryRef m)        { emitRM32(op, UMR(m), rn(r)); }
2033   void instrRM(X64Instr op,
2034                Reg16 r,
2035                MemoryRef m)        { emitRM16(op, UMR(m), rn(r)); }
2036   void instrRM(X64Instr op,
2037                Reg8 r,
2038                MemoryRef m)        { emitRM8(op, UMR(m), rn(r)); }
2039   void instrRM(X64Instr op,
2040                RegXMM x,
2041                MemoryRef m)        { emitRM(op, UMR(m), rn(x)); }
2042
2043   void instrMR(X64Instr op,
2044                MemoryRef m,
2045                Reg64 r)            { emitMR(op, UMR(m), rn(r)); }
2046   void instrMR(X64Instr op,
2047                MemoryRef m,
2048                Reg32 r)            { emitMR32(op, UMR(m), rn(r)); }
2049   void instrMR(X64Instr op,
2050                MemoryRef m,
2051                Reg16 r)            { emitMR16(op, UMR(m), rn(r)); }
2052   void instrMR(X64Instr op,
2053                MemoryRef m,
2054                Reg8 r)             { emitMR8(op, UMR(m), rn(r)); }
2055   void instrMR(X64Instr op,
2056                MemoryRef m,
2057                RegXMM x)           { emitMR(op, UMR(m), rn(x)); }
2058   void instrMR(X64Instr op,
2059                RIPRelativeRef m,
2060                Reg64 r)            { emitMR(op, URIP(m), rn(r),
2061                                             sz::qword, true); }
2062   void instrMR(X64Instr op,
2063                RIPRelativeRef m,
2064                RegXMM r)           { emitMR(op, URIP(m), rn(r),
2065                                             sz::qword, true); }
2066
2067   void instrIR(X64Instr op, Immed64 i, Reg64 r) {
2068     emitIR(op, rn(r), i.q());
2069   }
2070   void instrIR(X64Instr op, Immed i, Reg64 r) {
2071     emitIR(op, rn(r), i.q());
2072   }
2073   void instrIR(X64Instr op, Immed i, Reg32 r) {
2074     emitIR32(op, rn(r), i.l());
2075   }
2076   void instrIR(X64Instr op, Immed i, Reg16 r) {
2077     emitIR16(op, rn(r), i.w());
2078   }
2079   void instrIR(X64Instr op, Immed i, Reg8 r) {
2080     emitIR8(op, rn(r), i.b());
2081   }
2082
2083   void instrIM(X64Instr op, Immed i, MemoryRef m) {
2084     emitIM(op, UMR(m), i.q());
2085   }
2086   void instrIM32(X64Instr op, Immed i, MemoryRef m) {
2087     emitIM32(op, UMR(m), i.l());
2088   }
2089   void instrIM16(X64Instr op, Immed i, MemoryRef m) {
2090     emitIM16(op, UMR(m), i.w());
2091   }
2092   void instrIM8(X64Instr op, Immed i, MemoryRef m) {
2093     emitIM8(op, UMR(m), i.b());
2094   }
2095
2096 #undef UMR
2097 #undef URIP
2098
2099   CodeBlock& codeBlock;
2100 };
2101
2102 //////////////////////////////////////////////////////////////////////
2103
2104 struct Label {
2105   explicit Label()
2106     : m_a(nullptr)
2107     , m_address(nullptr)
2108   {}
2109
2110   ~Label() {
2111     if (!m_toPatch.empty()) {
2112       assert(m_a && m_address && "Label had jumps but was never set");
2113     }
2114     for (auto& ji : m_toPatch) {
2115       auto realSrc = ji.a->toDestAddress(ji.addr);
2116       switch (ji.type) {
2117       case Branch::Jmp:   ji.a->patchJmp(realSrc, ji.addr, m_address);  break;
2118       case Branch::Jmp8:  ji.a->patchJmp8(realSrc, ji.addr, m_address); break;
2119       case Branch::Jcc:   ji.a->patchJcc(realSrc, ji.addr, m_address);  break;
2120       case Branch::Jcc8:  ji.a->patchJcc8(realSrc, ji.addr, m_address); break;
2121       case Branch::Call:  ji.a->patchCall(realSrc, ji.addr, m_address); break;
2122       }
2123     }
2124   }
2125
2126   Label(const Label&) = delete;
2127   Label& operator=(const Label&) = delete;
2128
2129   void jmp(X64Assembler& a) {
2130     addJump(&a, Branch::Jmp);
2131     a.jmp(m_address ? m_address : a.frontier());
2132   }
2133
2134   void jmp8(X64Assembler& a) {
2135     addJump(&a, Branch::Jmp8);
2136     a.jmp8(m_address ? m_address : a.frontier());
2137   }
2138
2139   void jcc(X64Assembler& a, ConditionCode cc) {
2140     addJump(&a, Branch::Jcc);
2141     a.jcc(cc, m_address ? m_address : a.frontier());
2142   }
2143
2144   void jcc8(X64Assembler& a, ConditionCode cc) {
2145     addJump(&a, Branch::Jcc8);
2146     a.jcc8(cc, m_address ? m_address : a.frontier());
2147   }
2148
2149   void call(X64Assembler& a) {
2150     addJump(&a, Branch::Call);
2151     a.call(m_address ? m_address : a.frontier());
2152   }
2153
2154   void jmpAuto(X64Assembler& a) {
2155     assert(m_address);
2156     auto delta = m_address - (a.frontier() + 2);
2157     if (deltaFits(delta, sz::byte)) {
2158       jmp8(a);
2159     } else {
2160       jmp(a);
2161     }
2162   }
2163
2164   void jccAuto(X64Assembler& a, ConditionCode cc) {
2165     assert(m_address);
2166     auto delta = m_address - (a.frontier() + 2);
2167     if (deltaFits(delta, sz::byte)) {
2168       jcc8(a, cc);
2169     } else {
2170       jcc(a, cc);
2171     }
2172   }
2173
2174   friend void asm_label(X64Assembler& a, Label& l) {
2175     assert(!l.m_address && !l.m_a && "Label was already set");
2176     l.m_a = &a;
2177     l.m_address = a.frontier();
2178   }
2179
2180 private:
2181   enum class Branch {
2182     Jcc,
2183     Jcc8,
2184     Jmp,
2185     Jmp8,
2186     Call
2187   };
2188
2189   struct JumpInfo {
2190     Branch type;
2191     X64Assembler* a;
2192     CodeAddress addr;
2193   };
2194
2195 private:
2196   void addJump(X64Assembler* a, Branch type) {
2197     if (m_address) return;
2198     JumpInfo info;
2199     info.type = type;
2200     info.a = a;
2201     info.addr = a->codeBlock.frontier();
2202     m_toPatch.push_back(info);
2203   }
2204
2205 private:
2206   X64Assembler* m_a;
2207   CodeAddress m_address;
2208   std::vector<JumpInfo> m_toPatch;
2209 };
2210
2211 inline void X64Assembler::jmp(Label& l) { l.jmp(*this); }
2212 inline void X64Assembler::jmp8(Label& l) { l.jmp8(*this); }
2213 inline void X64Assembler::jcc(ConditionCode c, Label& l) { l.jcc(*this, c); }
2214 inline void X64Assembler::jcc8(ConditionCode c, Label& l) { l.jcc8(*this, c); }
2215 inline void X64Assembler::call(Label& l) { l.call(*this); }
2216
2217 #define CC(nm, code)                                                    \
2218   inline void X64Assembler::j##nm(Label& l) { l.jcc(*this, code); }     \
2219   inline void X64Assembler::j##nm##8(Label& l) { l.jcc8(*this, code); }
2220   CCS
2221 #undef CC
2222
2223 //////////////////////////////////////////////////////////////////////
2224
2225 /*
2226  * Select the assembler which contains a given address.
2227  *
2228  * E.g.:
2229  *
2230  *   Asm& a = codeBlockChoose(toPatch, a, acold);
2231  *   a.patchJmp(...);
2232  */
2233 inline CodeBlock& codeBlockChoose(CodeAddress addr) {
2234   always_assert_flog(false,
2235                      "address {} was not part of any known code block", addr);
2236 }
2237 template<class... Blocks>
2238 CodeBlock& codeBlockChoose(CodeAddress addr, CodeBlock& a, Blocks&... as) {
2239   if (a.contains(addr)) return a;
2240   return codeBlockChoose(addr, as...);
2241 }
2242
2243 //////////////////////////////////////////////////////////////////////
2244
2245 namespace x64 {
2246
2247 struct DecodedInstruction {
2248   DecodedInstruction(uint8_t* ip, uint8_t* base)
2249     : m_base(base)
2250   { decode(ip); }
2251
2252   explicit DecodedInstruction(uint8_t* ip) : DecodedInstruction(ip, ip) {}
2253
2254   std::string toString();
2255   size_t size() { return m_size; }
2256
2257   bool hasPicOffset() const { return m_flags.picOff; }
2258   uint8_t* picAddress() const;
2259   bool setPicAddress(uint8_t* target);
2260
2261   bool hasOffset() const { return m_offSz != 0; }
2262   int32_t offset() const;
2263
2264   bool hasImmediate() const { return m_immSz; }
2265   int64_t immediate() const;
2266   bool setImmediate(int64_t value);
2267   bool isNop() const;
2268   bool isBranch(bool allowCond = true) const;
2269   bool isCall() const;
2270   bool isJmp() const;
2271   bool isLea() const;
2272   ConditionCode jccCondCode() const;
2273   bool shrinkBranch();
2274   void widenBranch();
2275   uint8_t getModRm() const;
2276 private:
2277   void decode(uint8_t* ip);
2278   bool decodePrefix(uint8_t* ip);
2279   int decodeRexVexXop(uint8_t* ip);
2280   int decodeOpcode(uint8_t* ip);
2281   void determineOperandsMap0(uint8_t* ip);
2282   void determineOperandsMap1(uint8_t* ip);
2283   void determineOperandsMap2(uint8_t* ip);
2284   void determineOperandsMap3(uint8_t* ip);
2285   int decodeModRm(uint8_t* ip);
2286   int decodeImm(uint8_t* ip);
2287
2288   // We may wish to decode an instruction whose address is m_ip, but treat all
2289   // PIC references as relative to m_base.
2290   uint8_t* m_base;
2291
2292   uint8_t*   m_ip;
2293   uint32_t   m_size;
2294
2295   union {
2296     uint32_t m_flagsVal;
2297     struct {
2298       uint32_t lock      : 1;
2299       uint32_t repNE     : 1;
2300       uint32_t rep       : 1;
2301
2302       uint32_t cs        : 1;
2303       uint32_t ss        : 1;
2304       uint32_t ds        : 1;
2305       uint32_t es        : 1;
2306       uint32_t fs        : 1;
2307       uint32_t gs        : 1;
2308       uint32_t bTaken    : 1;
2309       uint32_t bNotTaken : 1;
2310
2311       uint32_t opndSzOvr : 1;
2312       uint32_t addrSzOvr : 1;
2313
2314       uint32_t rex       : 1;
2315       uint32_t vex       : 1;
2316       uint32_t xop       : 1;
2317
2318       uint32_t w         : 1;
2319       uint32_t r         : 1;
2320       uint32_t x         : 1;
2321       uint32_t b         : 1;
2322       uint32_t l         : 1;
2323
2324       uint32_t def64     : 1;
2325       uint32_t immIsAddr : 1;
2326       uint32_t picOff    : 1;
2327       uint32_t hasModRm  : 1;
2328       uint32_t hasSib    : 1;
2329     } m_flags;
2330   };
2331
2332   uint8_t       m_map_select;
2333   uint8_t       m_xtra_op;
2334   uint8_t       m_opcode;
2335   uint8_t       m_immSz;
2336   uint8_t       m_offSz;
2337 };
2338
2339 #undef TRACEMOD
2340 #undef logical_const
2341 #undef CCS
2342
2343 }}}
2344
2345 #endif