2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #ifndef incl_HPHP_UTIL_ASM_X64_LEGACY_H_
17 #define incl_HPHP_UTIL_ASM_X64_LEGACY_H_
19 namespace HPHP
{ namespace jit
{
21 //////////////////////////////////////////////////////////////////////
24 IF_REVERSE
= 0x0001, // The operand encoding for some instructions are
25 // "backwards" in x64; these instructions are
26 // called "reverse" instructions. There are a few
27 // details about emitting "reverse" instructions:
28 // (1) for the R_M address mode, we use the MR
29 // opcode, (2) for M_R and R address modes, we use
30 // the RM opcode, and (3) for the R_R address mode,
31 // we still use MR opcode, but we have to swap the
32 // first argument and the second argument.
34 IF_TWOBYTEOP
= 0x0002, // Some instructions have two byte opcodes. For
35 // these instructions, an additional byte (0x0F) is
36 // emitted before the standard opcode byte.
38 IF_JCC
= 0x0004, // instruction is jcc
39 IF_IMUL
= 0x0008, // instruction is imul
40 IF_HAS_IMM8
= 0x0010, // instruction has an encoding that takes an 8-bit
42 IF_SHIFT
= 0x0020, // instruction is rol, ror, rcl, rcr, shl, shr, sar
43 IF_RET
= 0x0040, // instruction is ret
44 IF_SHIFTD
= 0x0080, // instruction is shld, shrd
45 IF_NO_REXW
= 0x0100, // rexW prefix is not needed
46 IF_MOV
= 0x0200, // instruction is mov
47 IF_COMPACTR
= 0x0400, // instruction supports compact-R encoding
48 IF_RAX
= 0x0800, // instruction supports special rax encoding
49 IF_XCHG
= 0x1000, // instruction is xchg (not xchgb)
50 IF_BYTEREG
= 0x2000, // instruction is movzbq, movsbq
51 IF_66PREFIXED
= 0x4000, // instruction requires a manditory 0x66 prefix
52 IF_F3PREFIXED
= 0x8000, // instruction requires a manditory 0xf3 prefix
53 IF_F2PREFIXED
= 0x10000, // instruction requires a manditory 0xf2 prefix
54 IF_THREEBYTEOP
= 0x20000, // instruction requires a 0x0F 0x3[8A] prefix
55 IF_ROUND
= 0x40000, // instruction is round(sp)d
59 Address mode to table index map:
60 Table index 0 <- R_R / M_R(n) / R_M(r) / R(n)
61 Table index 1 <- R_M(n) / M_R(r) / R(r)
62 Table index 2 <- I / R_I / M_I / R_R_I / M_R_I / R_M_I
63 Table index 3 <- "/digit" value used by the above address modes
64 Table index 4 <- special R_I (for rax)
65 Table index 5 <- compact-R / none
67 (n) - for normal instructions only (IF_REVERSE flag is not set)
68 (r) - for reverse instructions only (IF_REVERSE flag is set)
70 0xF1 is used to indicate invalid opcodes.
74 unsigned char table
[6];
79 const X64Instr instr_divsd
= { { 0x5E,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
80 const X64Instr instr_movups
= { { 0x10,0x11,0xF1,0x00,0xF1,0xF1 }, 0x0103 };
81 const X64Instr instr_movdqa
= { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
82 const X64Instr instr_movdqu
= { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
83 const X64Instr instr_movsd
= { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
84 const X64Instr instr_gpr2xmm
= { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
85 const X64Instr instr_xmm2gpr
= { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
86 const X64Instr instr_xmmsub
= { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
87 const X64Instr instr_xmmadd
= { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
88 const X64Instr instr_xmmmul
= { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
89 const X64Instr instr_xmmsqrt
= { { 0x51,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
90 const X64Instr instr_ucomisd
= { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
91 const X64Instr instr_pxor
= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
92 const X64Instr instr_psrlq
= { { 0xF1,0xF1,0x73,0x02,0xF1,0xF1 }, 0x4112 };
93 const X64Instr instr_psllq
= { { 0xF1,0xF1,0x73,0x06,0xF1,0xF1 }, 0x4112 };
94 const X64Instr instr_cvtsi2sd
= { { 0x2a,0x2a,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
95 const X64Instr instr_cvttsd2si
={ { 0x2c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
96 const X64Instr instr_lddqu
= { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
97 const X64Instr instr_unpcklpd
={ { 0x14,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
98 const X64Instr instr_jmp
= { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
99 const X64Instr instr_call
= { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
100 const X64Instr instr_push
= { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
101 const X64Instr instr_pop
= { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
102 const X64Instr instr_inc
= { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
103 const X64Instr instr_dec
= { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
104 const X64Instr instr_not
= { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
105 const X64Instr instr_notb
= { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
106 const X64Instr instr_neg
= { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
107 const X64Instr instr_negb
= { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
108 const X64Instr instr_add
= { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
109 const X64Instr instr_addb
= { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
110 const X64Instr instr_sub
= { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
111 const X64Instr instr_subb
= { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
112 const X64Instr instr_and
= { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
113 const X64Instr instr_andb
= { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
114 const X64Instr instr_or
= { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
115 const X64Instr instr_orb
= { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
116 const X64Instr instr_xor
= { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
117 const X64Instr instr_xorb
= { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
118 const X64Instr instr_mov
= { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
119 const X64Instr instr_movb
= { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
120 const X64Instr instr_test
= { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
121 const X64Instr instr_testb
= { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
122 const X64Instr instr_cmp
= { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
123 const X64Instr instr_cmpb
= { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
124 const X64Instr instr_sbb
= { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
125 const X64Instr instr_sbbb
= { { 0x18,0x1A,0x80,0x03,0x1C,0xF1 }, 0x0810 };
126 const X64Instr instr_adc
= { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
127 const X64Instr instr_lea
= { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
128 const X64Instr instr_xchgb
= { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
129 const X64Instr instr_xchg
= { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
130 const X64Instr instr_imul
= { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
131 const X64Instr instr_mul
= { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
132 const X64Instr instr_div
= { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
133 const X64Instr instr_idiv
= { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
134 const X64Instr instr_cdq
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
135 const X64Instr instr_ret
= { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
136 const X64Instr instr_jcc
= { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
137 const X64Instr instr_cmovcc
= { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
138 const X64Instr instr_setcc
= { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
139 const X64Instr instr_movswx
= { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
140 const X64Instr instr_movsbx
= { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
141 const X64Instr instr_movzwx
= { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
142 const X64Instr instr_movzbx
= { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
143 const X64Instr instr_cwde
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
144 const X64Instr instr_cqo
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0000 };
145 const X64Instr instr_rol
= { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
146 const X64Instr instr_ror
= { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
147 const X64Instr instr_rcl
= { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
148 const X64Instr instr_rcr
= { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
149 const X64Instr instr_shl
= { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
150 const X64Instr instr_shr
= { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
151 const X64Instr instr_sar
= { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
152 const X64Instr instr_xadd
= { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
153 const X64Instr instr_cmpxchg
= { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
154 const X64Instr instr_nop
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
155 const X64Instr instr_shld
= { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
156 const X64Instr instr_shrd
= { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
157 const X64Instr instr_int3
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
158 const X64Instr instr_roundsd
= { { 0xF1,0xF1,0x0b,0x00,0xF1,0xF1 }, 0x64112 };
159 const X64Instr instr_cmpsd
= { { 0xF1,0xF1,0xC2,0xF1,0xF1,0xF1 }, 0x10112 };
160 const X64Instr instr_crc32
= { { 0xF1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x30001 };
163 ///////////////////////////////////////////////////////////////////////////////
166 * Copyright (c) 2009, Andrew J. Paroski
167 * All rights reserved.
169 * Redistribution and use in source and binary forms, with or without
170 * modification, are permitted provided that the following conditions are met:
171 * * Redistributions of source code must retain the above copyright
172 * notice, this list of conditions and the following disclaimer.
173 * * Redistributions in binary form must reproduce the above copyright
174 * notice, this list of conditions and the following disclaimer in the
175 * documentation and/or other materials provided with the distribution.
176 * * The names of the contributors may not be used to endorse or promote
177 * products derived from this software without specific prior written
180 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
181 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
182 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
183 * ARE DISCLAIMED. IN NO EVENT SHALL ANDREW J. PAROSKI BE LIABLE FOR ANY
184 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
185 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
186 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
187 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
188 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
189 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
192 struct X64Assembler final
: public X64AssemblerBase
{
194 explicit X64Assembler(CodeBlock
& cb
) : X64AssemblerBase(cb
) {}
196 X64Assembler(const X64Assembler
&) = delete;
197 X64Assembler
& operator=(const X64Assembler
&) = delete;
200 * The following section defines the main interface for emitting
205 * a. movq (rax, rbx); // order is AT&T: src, dest
206 * a. loadq (*rax, rbx); // loads from *rax
207 * a. loadq (rax[0], rbx); // also loads from *rax
208 * a. storeq (rcx, rax[0xc]); // store to rax + 0xc
209 * a. addq (0x1, rbx); // increment rbx
211 * Addressing with index registers:
213 * a. movl (index, ecx);
214 * a. loadq (*rax, rbx);
215 * a. storeq (rbx, rbx[rcx*8]);
216 * a. call (rax); // indirect call
220 #define BYTE_LOAD_OP(name, instr) \
221 void name##b(MemoryRef m, Reg8 r) { instrMR(instr, m, r); } \
223 #define LOAD_OP(name, instr) \
224 void name##q(MemoryRef m, Reg64 r) { instrMR(instr, m, r); } \
225 void name##l(MemoryRef m, Reg32 r) { instrMR(instr, m, r); } \
226 void name##w(MemoryRef m, Reg16 r) { instrMR(instr, m, r); } \
227 void name##q(RIPRelativeRef m, Reg64 r) { instrMR(instr, m, r); } \
228 BYTE_LOAD_OP(name, instr##b)
230 #define BYTE_STORE_OP(name, instr) \
231 void name##b(Reg8 r, MemoryRef m) { instrRM(instr, r, m); } \
232 void name##b(Immed i, MemoryRef m) { instrIM8(instr, i, m); } \
234 #define STORE_OP(name, instr) \
235 void name##w(Immed i, MemoryRef m) { instrIM16(instr, i, m); } \
236 void name##l(Immed i, MemoryRef m) { instrIM32(instr, i, m); } \
237 void name##w(Reg16 r, MemoryRef m) { instrRM(instr, r, m); } \
238 void name##l(Reg32 r, MemoryRef m) { instrRM(instr, r, m); } \
239 void name##q(Reg64 r, MemoryRef m) { instrRM(instr, r, m); } \
240 BYTE_STORE_OP(name, instr ## b)
242 #define BYTE_REG_OP(name, instr) \
243 void name##b(Reg8 r1, Reg8 r2) { instrRR(instr, r1, r2); } \
244 void name##b(Immed i, Reg8 r) { instrIR(instr, i, r); } \
246 #define REG_OP(name, instr) \
247 void name##q(Reg64 r1, Reg64 r2) { instrRR(instr, r1, r2); } \
248 void name##l(Reg32 r1, Reg32 r2) { instrRR(instr, r1, r2); } \
249 void name##w(Reg16 r1, Reg16 r2) { instrRR(instr, r1, r2); } \
250 void name##l(Immed i, Reg32 r) { instrIR(instr, i, r); } \
251 void name##w(Immed i, Reg16 r) { instrIR(instr, i, r); } \
252 BYTE_REG_OP(name, instr##b)
255 * For when we a have a memory operand and the operand size is
256 * 64-bits, only a 32-bit (sign-extended) immediate is supported.
258 #define IMM64_STORE_OP(name, instr) \
259 void name##q(Immed i, MemoryRef m) { \
260 return instrIM(instr, i, m); \
264 * For instructions other than movq, even when the operand size is
265 * 64 bits only a 32-bit (sign-extended) immediate is supported.
267 #define IMM64R_OP(name, instr) \
268 void name##q(Immed imm, Reg64 r) { \
269 always_assert(imm.fits(sz::dword)); \
270 return instrIR(instr, imm, r); \
273 #define FULL_OP(name, instr) \
274 LOAD_OP(name, instr) \
275 STORE_OP(name, instr) \
276 REG_OP(name, instr) \
277 IMM64_STORE_OP(name, instr) \
278 IMM64R_OP(name, instr)
280 // We rename x64's mov to store and load for improved code
282 LOAD_OP (load
, instr_mov
)
283 STORE_OP (store
, instr_mov
)
284 IMM64_STORE_OP (store
, instr_mov
)
285 REG_OP (mov
, instr_mov
)
287 FULL_OP(add
, instr_add
)
288 FULL_OP(xor, instr_xor
)
289 FULL_OP(sub
, instr_sub
)
290 FULL_OP(and, instr_and
)
291 FULL_OP(or, instr_or
)
292 FULL_OP(test
,instr_test
)
293 FULL_OP(cmp
, instr_cmp
)
294 FULL_OP(sbb
, instr_sbb
)
304 #undef IMM64_STORE_OP
306 // 64-bit immediates work with mov to a register.
307 void movq(Immed64 imm
, Reg64 r
) { instrIR(instr_mov
, imm
, r
); }
309 // movzbx is a special snowflake. We don't have movzbq because it behaves
310 // exactly the same as movzbl but takes an extra byte.
311 void loadzbl(MemoryRef m
, Reg32 r
) { instrMR(instr_movzbx
,
313 void movzbl(Reg8 src
, Reg32 dest
) { emitRR32(instr_movzbx
,
314 rn(src
), rn(dest
)); }
315 void movsbl(Reg8 src
, Reg32 dest
) { emitRR(instr_movsbx
,
316 rn(src
), rn(dest
)); }
317 void movzwl(Reg16 src
, Reg32 dest
) { emitRR32(instr_movzwx
,
318 rn(src
), rn(dest
)); }
320 void loadsbq(MemoryRef m
, Reg64 r
) { instrMR(instr_movsbx
,
322 void movsbq(Reg8 src
, Reg64 dest
) { emitRR(instr_movsbx
,
323 rn(src
), rn(dest
)); }
324 void crc32q(Reg64 src
, Reg64 dest
) { instrRR(instr_crc32
, src
, dest
); }
326 void lea(MemoryRef p
, Reg64 reg
) { instrMR(instr_lea
, p
, reg
); }
327 void lea(RIPRelativeRef p
, Reg64 reg
) { instrMR(instr_lea
, p
, reg
); }
329 void xchgq(Reg64 r1
, Reg64 r2
) { instrRR(instr_xchg
, r1
, r2
); }
330 void xchgl(Reg32 r1
, Reg32 r2
) { instrRR(instr_xchg
, r1
, r2
); }
331 void xchgb(Reg8 r1
, Reg8 r2
) { instrRR(instr_xchgb
, r1
, r2
); }
333 void imul(Reg64 r1
, Reg64 r2
) { instrRR(instr_imul
, r1
, r2
); }
335 void push(Reg64 r
) { instrR(instr_push
, r
); }
336 void pushl(Reg32 r
) { instrR(instr_push
, r
); }
337 void pop (Reg64 r
) { instrR(instr_pop
, r
); }
338 void idiv(Reg64 r
) { instrR(instr_idiv
, r
); }
339 void incq(Reg64 r
) { instrR(instr_inc
, r
); }
340 void incl(Reg32 r
) { instrR(instr_inc
, r
); }
341 void incw(Reg16 r
) { instrR(instr_inc
, r
); }
342 void decq(Reg64 r
) { instrR(instr_dec
, r
); }
343 void decl(Reg32 r
) { instrR(instr_dec
, r
); }
344 void decw(Reg16 r
) { instrR(instr_dec
, r
); }
345 void notb(Reg8 r
) { instrR(instr_notb
, r
); }
346 void not(Reg64 r
) { instrR(instr_not
, r
); }
347 void neg(Reg64 r
) { instrR(instr_neg
, r
); }
348 void negb(Reg8 r
) { instrR(instr_negb
, r
); }
349 void ret() { emit(instr_ret
); }
350 void ret(Immed i
) { emitI(instr_ret
, i
.w(), sz::word
); }
351 void cqo() { emit(instr_cqo
); }
352 void nop() { emit(instr_nop
); }
353 void int3() { emit(instr_int3
); }
354 void ud2() { byte(0x0f); byte(0x0b); }
355 void pushf() { byte(0x9c); }
356 void popf() { byte(0x9d); }
357 void lock() { byte(0xF0); }
359 void push(MemoryRef m
) { instrM(instr_push
, m
); }
360 void pop (MemoryRef m
) { instrM(instr_pop
, m
); }
361 void incq(MemoryRef m
) { instrM(instr_inc
, m
); }
362 void incl(MemoryRef m
) { instrM32(instr_inc
, m
); }
363 void incw(MemoryRef m
) { instrM16(instr_inc
, m
); }
364 void decqlock(MemoryRef m
) { lock(); decq(m
); }
365 void decq(MemoryRef m
) { instrM(instr_dec
, m
); }
366 void decl(MemoryRef m
) { instrM32(instr_dec
, m
); }
367 void decw(MemoryRef m
) { instrM16(instr_dec
, m
); }
369 void push(Immed64 i
) { emitI(instr_push
, i
.q()); }
371 void movups(RegXMM x
, MemoryRef m
) { instrRM(instr_movups
, x
, m
); }
372 void movups(MemoryRef m
, RegXMM x
) { instrMR(instr_movups
, m
, x
); }
373 void movdqu(RegXMM x
, MemoryRef m
) { instrRM(instr_movdqu
, x
, m
); }
374 void movdqu(MemoryRef m
, RegXMM x
) { instrMR(instr_movdqu
, m
, x
); }
375 void movdqa(RegXMM x
, RegXMM y
) { instrRR(instr_movdqa
, x
, y
); }
376 void movdqa(RegXMM x
, MemoryRef m
) { instrRM(instr_movdqa
, x
, m
); }
377 void movdqa(MemoryRef m
, RegXMM x
) { instrMR(instr_movdqa
, m
, x
); }
378 void movsd (RegXMM x
, RegXMM y
) { instrRR(instr_movsd
, x
, y
); }
379 void movsd (RegXMM x
, MemoryRef m
) { instrRM(instr_movsd
, x
, m
); }
380 void movsd (MemoryRef m
, RegXMM x
) { instrMR(instr_movsd
, m
, x
); }
381 void movsd (RIPRelativeRef m
, RegXMM x
) { instrMR(instr_movsd
, m
, x
); }
382 void lddqu (MemoryRef m
, RegXMM x
) { instrMR(instr_lddqu
, m
, x
); }
383 void unpcklpd(RegXMM s
, RegXMM d
) { instrRR(instr_unpcklpd
, d
, s
); }
385 void rorq (Immed i
, Reg64 r
) { instrIR(instr_ror
, i
, r
); }
386 void shlq (Immed i
, Reg64 r
) { instrIR(instr_shl
, i
, r
); }
387 void shrq (Immed i
, Reg64 r
) { instrIR(instr_shr
, i
, r
); }
388 void sarq (Immed i
, Reg64 r
) { instrIR(instr_sar
, i
, r
); }
389 void shll (Immed i
, Reg32 r
) { instrIR(instr_shl
, i
, r
); }
390 void shrl (Immed i
, Reg32 r
) { instrIR(instr_shr
, i
, r
); }
391 void shlw (Immed i
, Reg16 r
) { instrIR(instr_shl
, i
, r
); }
392 void shrw (Immed i
, Reg16 r
) { instrIR(instr_shr
, i
, r
); }
394 void shlq (Reg64 r
) { instrR(instr_shl
, r
); }
395 void shrq (Reg64 r
) { instrR(instr_shr
, r
); }
396 void sarq (Reg64 r
) { instrR(instr_sar
, r
); }
398 void roundsd (RoundDirection d
, RegXMM src
, RegXMM dst
) {
399 emitIRR(instr_roundsd
, rn(dst
), rn(src
), ssize_t(d
));
402 void cmpsd(RegXMM src
, RegXMM dst
, ComparisonPred pred
) {
403 emitIRR(instr_cmpsd
, rn(dst
), rn(src
), ssize_t(pred
));
407 * Control-flow directives. Primitive labeling/patching facilities
408 * are available, as well as slightly higher-level ones via the
412 void jmp(Reg64 r
) { instrR(instr_jmp
, r
); }
413 void jmp(MemoryRef m
) { instrM(instr_jmp
, m
); }
414 void jmp(RIPRelativeRef m
) { instrM(instr_jmp
, m
); }
415 void call(Reg64 r
) { instrR(instr_call
, r
); }
416 void call(MemoryRef m
) { instrM(instr_call
, m
); }
417 void call(RIPRelativeRef m
) { instrM(instr_call
, m
); }
419 void jmp8(CodeAddress dest
) { emitJ8(instr_jmp
, ssize_t(dest
)); }
421 void jmp(CodeAddress dest
) {
422 always_assert_flog(dest
&& jmpDeltaFits(dest
), "Bad Jmp: {}", dest
);
423 emitJ32(instr_jmp
, ssize_t(dest
));
426 void call(CodeAddress dest
) {
427 always_assert(dest
&& jmpDeltaFits(dest
));
428 emitJ32(instr_call
, ssize_t(dest
));
431 void jcc(ConditionCode cond
, CodeAddress dest
) {
432 emitCJ32(instr_jcc
, cond
, (ssize_t
)dest
);
435 void jcc8(ConditionCode cond
, CodeAddress dest
) {
436 emitCJ8(instr_jcc
, cond
, (ssize_t
)dest
);
439 using X64AssemblerBase::call
;
440 using X64AssemblerBase::jmp
;
441 using X64AssemblerBase::jmp8
;
442 using X64AssemblerBase::jcc
;
443 using X64AssemblerBase::jcc8
;
445 void setcc(int cc
, Reg8 byteReg
) {
446 emitCR(instr_setcc
, cc
, rn(byteReg
), sz::byte
);
449 void psllq(Immed i
, RegXMM r
) { emitIR(instr_psllq
, rn(r
), i
.b()); }
450 void psrlq(Immed i
, RegXMM r
) { emitIR(instr_psrlq
, rn(r
), i
.b()); }
452 void movq_rx(Reg64 rSrc
, RegXMM rdest
) {
453 emitRR(instr_gpr2xmm
, rn(rdest
), rn(rSrc
));
455 void movq_xr(RegXMM rSrc
, Reg64 rdest
) {
456 emitRR(instr_xmm2gpr
, rn(rSrc
), rn(rdest
));
459 void addsd(RegXMM src
, RegXMM srcdest
) {
460 emitRR(instr_xmmadd
, rn(srcdest
), rn(src
));
462 void mulsd(RegXMM src
, RegXMM srcdest
) {
463 emitRR(instr_xmmmul
, rn(srcdest
), rn(src
));
465 void subsd(RegXMM src
, RegXMM srcdest
) {
466 emitRR(instr_xmmsub
, rn(srcdest
), rn(src
));
468 void pxor(RegXMM src
, RegXMM srcdest
) {
469 emitRR(instr_pxor
, rn(srcdest
), rn(src
));
471 void cvtsi2sd(Reg64 src
, RegXMM dest
) {
472 emitRR(instr_cvtsi2sd
, rn(dest
), rn(src
));
474 void cvtsi2sd(MemoryRef m
, RegXMM dest
) {
475 instrMR(instr_cvtsi2sd
, m
, dest
);
477 void ucomisd(RegXMM l
, RegXMM r
) {
478 emitRR(instr_ucomisd
, rn(l
), rn(r
));
480 void sqrtsd(RegXMM src
, RegXMM dest
) {
481 emitRR(instr_xmmsqrt
, rn(dest
), rn(src
));
484 void divsd(RegXMM src
, RegXMM srcdest
) {
485 emitRR(instr_divsd
, rn(srcdest
), rn(src
));
487 void cvttsd2siq(RegXMM src
, Reg64 dest
) {
488 emitRR(instr_cvttsd2si
, rn(dest
), rn(src
));
491 void emitInt3s(int n
) {
492 for (auto i
= 0; i
< n
; ++i
) {
497 void emitNop(int n
) {
499 static const uint8_t nops
[][9] = {
503 { 0x0f, 0x1f, 0x00 },
504 { 0x0f, 0x1f, 0x40, 0x00 },
505 { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
506 { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
507 { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
508 { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
509 { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
511 // While n >= 9, emit 9 byte NOPs
520 while (available() >= 2) ud2();
521 if (available() > 0) int3();
522 assertx(available() == 0);
526 X64Assembler
& prefix(const MemoryRef
& mr
) {
527 static const uint8_t prefixes
[] = {
529 0x64, // Segment::FS prefix
530 0x65 // Segment::GS prefix
532 if (mr
.segment
!= Segment::DS
) {
533 byte(prefixes
[int(mr
.segment
)]);
538 * Low-level emitter functions.
540 * These functions are the core of the assembler, and can also be
547 // r cannot be set to 'none'
549 void emitCR(X64Instr op
, int jcond
, RegNumber regN
, int opSz
= sz::qword
) {
550 assert(regN
!= noreg
);
554 if (opSz
== sz::word
) {
559 unsigned char rex
= 0;
560 bool highByteReg
= false;
561 if (opSz
== sz::byte
) {
562 if (byteRegNeedsRex(r
)) {
565 r
= byteRegEncodeNumber(r
, highByteReg
);
567 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
571 if (highByteReg
) byteRegMisuse();
573 // If the instruction supports compact-R mode, use that
574 if (op
.flags
& IF_COMPACTR
) {
575 byte(op
.table
[5] | (r
& 7));
578 char opcode
= (op
.flags
& IF_REVERSE
) ? op
.table
[1] : op
.table
[0];
579 char rval
= op
.table
[3];
580 // Handle two byte opcodes
581 if (op
.flags
& IF_TWOBYTEOP
) byte(0x0F);
582 byte(opcode
| jcond
);
583 emitModrm(3, rval
, r
);
587 void emitR(X64Instr op
, RegNumber r
, int opSz
= sz::qword
) {
588 emitCR(op
, 0, r
, opSz
);
592 void emitR32(X64Instr op
, RegNumber r
) {
593 emitCR(op
, 0, r
, sz::dword
);
597 void emitR16(X64Instr op
, RegNumber r
) {
598 emitCR(op
, 0, r
, sz::word
);
604 // r1 cannot be set to noreg
605 // r2 cannot be set to noreg
607 void emitCRR(X64Instr op
, int jcond
, RegNumber rn1
, RegNumber rn2
,
608 int opSz
= sz::qword
) {
609 assert(rn1
!= noreg
&& rn2
!= noreg
);
612 bool reverse
= ((op
.flags
& IF_REVERSE
) != 0);
613 prefixBytes(op
.flags
, opSz
);
614 // The xchg instruction is special; we have compact encodings for
615 // exchanging with rax or eax.
616 if (op
.flags
& IF_XCHG
) {
617 if (r1
== int(reg::rax
)) {
619 unsigned char rex
= 0;
620 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
621 assert(!(op
.flags
& IF_BYTEREG
));
622 if (r2
& 8) rex
|= (reverse
? 4 : 1);
623 if (rex
) byte(0x40 | rex
);
624 // If the second register is rax, emit opcode with the first
625 // register id embedded
626 byte(op
.table
[5] | (r2
& 7));
628 } else if (r2
== int(reg::rax
)) {
631 unsigned char rex
= 0;
632 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) {
635 if (r1
& 8) rex
|= (reverse
? 1 : 4);
636 if (rex
) byte(0x40 | rex
);
637 // If the first register is rax, emit opcode with the second
638 // register id embedded
639 byte(op
.table
[5] | (r1
& 7));
644 unsigned char rex
= 0;
645 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
646 bool highByteReg
= false;
647 // movzbx's first operand is a bytereg regardless of operand size
648 if (opSz
== sz::byte
|| (op
.flags
& IF_BYTEREG
)) {
649 if (byteRegNeedsRex(r1
) ||
650 (!(op
.flags
& IF_BYTEREG
) && byteRegNeedsRex(r2
))) {
653 r1
= byteRegEncodeNumber(r1
, highByteReg
);
654 r2
= byteRegEncodeNumber(r2
, highByteReg
);
656 if (r1
& 8) rex
|= (reverse
? 1 : 4);
657 if (r2
& 8) rex
|= (reverse
? 4 : 1);
660 if (highByteReg
) byteRegMisuse();
662 // For two/three byte opcodes
663 if ((op
.flags
& (IF_TWOBYTEOP
| IF_IMUL
| IF_THREEBYTEOP
)) != 0) byte(0x0F);
664 if ((op
.flags
& IF_THREEBYTEOP
) != 0) byte(0x38);
665 byte(op
.table
[0] | jcond
);
667 emitModrm(3, r2
, r1
);
669 emitModrm(3, r1
, r2
);
674 void emitCRR32(X64Instr op
, int jcond
, RegNumber r1
, RegNumber r2
) {
675 emitCRR(op
, jcond
, r1
, r2
, sz::dword
);
679 void emitRR(X64Instr op
, RegNumber r1
, RegNumber r2
, int opSz
= sz::qword
) {
680 emitCRR(op
, 0, r1
, r2
, opSz
);
684 void emitRR32(X64Instr op
, RegNumber r1
, RegNumber r2
) {
685 emitCRR(op
, 0, r1
, r2
, sz::dword
);
689 void emitRR16(X64Instr op
, RegNumber r1
, RegNumber r2
) {
690 emitCRR(op
, 0, r1
, r2
, sz::word
);
694 void emitRR8(X64Instr op
, RegNumber r1
, RegNumber r2
) {
695 emitCRR(op
, 0, r1
, r2
, sz::byte
);
701 // r cannot be set to noreg
703 void emitIR(X64Instr op
, RegNumber rname
, ssize_t imm
,
704 int opSz
= sz::qword
) {
705 assert(rname
!= noreg
);
708 prefixBytes(op
.flags
, opSz
);
709 // Determine the size of the immediate. This might change opSz so
712 if ((op
.flags
& IF_MOV
) && opSz
== sz::qword
) {
713 immSize
= computeImmediateSizeForMovRI64(op
, imm
, opSz
);
715 immSize
= computeImmediateSize(op
, imm
, opSz
);
718 unsigned char rex
= 0;
719 bool highByteReg
= false;
720 if (opSz
== sz::byte
) {
721 if (byteRegNeedsRex(r
)) {
724 r
= byteRegEncodeNumber(r
, highByteReg
);
726 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
730 if (highByteReg
) byteRegMisuse();
732 // Use the special rax encoding if the instruction supports it
733 if (r
== int(reg::rax
) && immSize
== sz::dword
&&
734 (op
.flags
& IF_RAX
)) {
736 emitImmediate(op
, imm
, immSize
);
739 // Use the compact-R encoding if the operand size and the immediate
741 if ((op
.flags
& IF_COMPACTR
) && immSize
== opSz
) {
742 byte(op
.table
[5] | (r
& 7));
743 emitImmediate(op
, imm
, immSize
);
746 // For two byte opcodes
747 if ((op
.flags
& (IF_TWOBYTEOP
| IF_IMUL
)) != 0) byte(0x0F);
748 int rval
= op
.table
[3];
749 // shift/rotate instructions have special opcode when
751 if ((op
.flags
& IF_SHIFT
) != 0 && imm
== 1) {
753 emitModrm(3, rval
, r
);
754 // don't emit immediate
757 int opcode
= (immSize
== sz::byte
&& opSz
!= sz::byte
) ?
758 (op
.table
[2] | 2) : op
.table
[2];
760 emitModrm(3, rval
, r
);
761 emitImmediate(op
, imm
, immSize
);
765 void emitIR32(X64Instr op
, RegNumber r
, ssize_t imm
) {
766 emitIR(op
, r
, imm
, sz::dword
);
770 void emitIR16(X64Instr op
, RegNumber r
, ssize_t imm
) {
771 emitIR(op
, r
, safe_cast
<int16_t>(imm
), sz::word
);
775 void emitIR8(X64Instr op
, RegNumber r
, ssize_t imm
) {
776 emitIR(op
, r
, safe_cast
<int8_t>(imm
), sz::byte
);
782 // r1 cannot be set to noreg
783 // r2 cannot be set to noreg
785 void emitIRR(X64Instr op
, RegNumber rn1
, RegNumber rn2
, ssize_t imm
,
786 int opSz
= sz::qword
) {
787 assert(rn1
!= noreg
&& rn2
!= noreg
);
790 bool reverse
= ((op
.flags
& IF_REVERSE
) != 0);
792 prefixBytes(op
.flags
, opSz
);
794 unsigned char rex
= 0;
795 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
796 bool highByteReg
= false;
797 if (opSz
== sz::byte
|| (op
.flags
& IF_BYTEREG
)) {
798 if (byteRegNeedsRex(r1
) ||
799 (!(op
.flags
& IF_BYTEREG
) && byteRegNeedsRex(r2
))) {
802 r1
= byteRegEncodeNumber(r1
, highByteReg
);
803 r2
= byteRegEncodeNumber(r2
, highByteReg
);
805 if (r1
& 8) rex
|= (reverse
? 1 : 4);
806 if (r2
& 8) rex
|= (reverse
? 4 : 1);
809 if (highByteReg
) byteRegMisuse();
811 // Determine the size of the immediate
812 int immSize
= computeImmediateSize(op
, imm
, opSz
);
813 if (op
.flags
& IF_TWOBYTEOP
|| op
.flags
& IF_THREEBYTEOP
) byte(0x0F);
814 if (op
.flags
& IF_THREEBYTEOP
) byte(0x3a);
815 int opcode
= (immSize
== sz::byte
&& opSz
!= sz::byte
&&
816 (op
.flags
& IF_ROUND
) == 0) ?
817 (op
.table
[2] | 2) : op
.table
[2];
820 emitModrm(3, r2
, r1
);
822 emitModrm(3, r1
, r2
);
824 emitImmediate(op
, imm
, immSize
);
828 void emitCI(X64Instr op
, int jcond
, ssize_t imm
, int opSz
= sz::qword
) {
830 prefixBytes(op
.flags
, opSz
);
832 if ((op
.flags
& IF_NO_REXW
) == 0) {
835 // Determine the size of the immediate
836 int immSize
= computeImmediateSize(op
, imm
, opSz
);
838 if ((op
.flags
& IF_JCC
) != 0) {
839 // jcc is weird so we handle it separately
840 if (immSize
!= sz::byte
) {
847 int opcode
= (immSize
== sz::byte
&& opSz
!= sz::byte
) ?
848 (op
.table
[2] | 2) : op
.table
[2];
849 byte(jcond
| opcode
);
851 emitImmediate(op
, imm
, immSize
);
855 void emitI(X64Instr op
, ssize_t imm
, int opSz
= sz::qword
) {
856 emitCI(op
, 0, imm
, opSz
);
860 void emitJ8(X64Instr op
, ssize_t imm
) {
861 assert((op
.flags
& IF_JCC
) == 0);
862 ssize_t delta
= imm
- ((ssize_t
)codeBlock
.frontier() + 2);
863 // Emit opcode and 8-bit immediate
865 byte(safe_cast
<int8_t>(delta
));
869 void emitCJ8(X64Instr op
, int jcond
, ssize_t imm
) {
870 // this is for jcc only
871 assert(op
.flags
& IF_JCC
);
872 ssize_t delta
= imm
- ((ssize_t
)codeBlock
.frontier() + 2);
876 byte(safe_cast
<int8_t>(delta
));
880 void emitJ32(X64Instr op
, ssize_t imm
) {
881 // call and jmp are supported, jcc is not supported
882 assert((op
.flags
& IF_JCC
) == 0);
884 safe_cast
<int32_t>(imm
- ((ssize_t
)codeBlock
.frontier() + 5));
885 uint8_t *bdelta
= (uint8_t*)&delta
;
886 uint8_t instr
[] = { op
.table
[2],
887 bdelta
[0], bdelta
[1], bdelta
[2], bdelta
[3] };
892 void emitCJ32(X64Instr op
, int jcond
, ssize_t imm
) {
893 // jcc is supported, call and jmp are not supported
894 assert(op
.flags
& IF_JCC
);
896 safe_cast
<int32_t>(imm
- ((ssize_t
)codeBlock
.frontier() + 6));
897 uint8_t* bdelta
= (uint8_t*)&delta
;
898 uint8_t instr
[6] = { 0x0f, uint8_t(0x80 | jcond
),
899 bdelta
[0], bdelta
[1], bdelta
[2], bdelta
[3] };
903 // op disp(%br,%ir,s)
904 // (for reverse == false, hasImmediate == false, r == noreg)
905 // op $imm, disp(%br,%ir,s)
906 // (for reverse == false, hasImmediate == true, r == noreg)
907 // op %r, disp(%br,%ir,s)
908 // (for reverse == false, hasImmediate == false, r != noreg)
909 // op $imm, %r, disp(%br,%ir,s)
910 // (for reverse == false, hasImmediate == true, r != noreg)
911 // op disp(%br,%ir,s), %r
912 // (for reverse == true, hasImmediate == false, r != noreg)
913 // op $imm, disp(%br,%ir,s), %r
914 // (for reverse == true, hasImmediate == true, r != noreg)
915 // -----------------------------------------------------------------
917 // ir cannot be set to 'sp'
919 void emitCMX(X64Instr op
, int jcond
, RegNumber brName
, RegNumber irName
,
922 bool reverse
= false,
924 bool hasImmediate
= false,
925 int opSz
= sz::qword
,
926 bool ripRelative
= false) {
927 assert(irName
!= rn(reg::rsp
));
929 int ir
= int(irName
);
931 int br
= int(brName
);
933 // The opsize prefix can be placed here, if the instruction
935 // When an instruction has a manditory prefix, it goes before the
936 // REX byte if we end up needing one.
937 prefixBytes(op
.flags
, opSz
);
939 // Determine immSize from the 'hasImmediate' flag
940 int immSize
= sz::nosize
;
942 immSize
= computeImmediateSize(op
, imm
, opSz
);
944 if ((op
.flags
& IF_REVERSE
) != 0) reverse
= !reverse
;
945 // Determine if we need to use a two byte opcode;
946 // imul is weird so we have a special case for it
947 bool twoByteOpcode
= ((op
.flags
& IF_TWOBYTEOP
) != 0) ||
948 ((op
.flags
& IF_IMUL
) != 0 && rName
!= noreg
&&
949 immSize
== sz::nosize
);
950 // Again, imul is weird
951 if ((op
.flags
& IF_IMUL
) != 0 && rName
!= noreg
) {
954 // The wily rex byte, a multipurpose extension to the opcode space for x64
955 unsigned char rex
= 0;
956 if ((op
.flags
& IF_NO_REXW
) == 0 && opSz
== sz::qword
) rex
|= 8;
958 bool highByteReg
= false;
959 // XXX: This IF_BYTEREG check is a special case for movzbl: we currently
960 // encode it using an opSz of sz::byte but it doesn't actually have a
961 // byte-sized operand like other instructions can.
962 if (!(op
.flags
& IF_BYTEREG
) && opSz
== sz::byte
&& rName
!= noreg
) {
963 if (byteRegNeedsRex(r
)) {
966 r
= byteRegEncodeNumber(r
, highByteReg
);
969 if (rName
!= noreg
&& (r
& 8)) rex
|= 4;
970 if (irName
!= noreg
&& (ir
& 8)) rex
|= 2;
971 if (brName
!= noreg
&& (br
& 8)) rex
|= 1;
974 if (highByteReg
) byteRegMisuse();
977 if (immSize
!= sz::nosize
) {
978 if (twoByteOpcode
) byte(0x0F);
979 if (immSize
== sz::byte
&& opSz
!= sz::byte
) {
980 byte(op
.table
[2] | 2 | jcond
);
982 byte(op
.table
[2] | jcond
);
985 if (twoByteOpcode
) byte(0x0F);
987 if ((op
.flags
& IF_IMUL
) != 0) {
988 opcode
= (rName
== noreg
) ? op
.table
[1] : op
.table
[0];
990 opcode
= reverse
? op
.table
[1] : op
.table
[0];
992 byte(opcode
| jcond
);
995 // 1. We're using an index register.
996 // 2. The base register is rsp-like.
997 // 3. We're doing a baseless disp access and it is not rip-relative.
999 ir
!= int(noreg
) || /* 1 */
1000 br
== int(reg::rsp
) || br
== int(reg::r12
) || /* 2 */
1001 (br
== int(noreg
) && !ripRelative
);
1002 // If there is no register and no immediate, use the /r value
1003 if (r
== int(noreg
)) r
= op
.table
[3];
1004 // If noreg was specified for 'ir', we use
1005 // the encoding for the sp register
1006 if (ir
== int(noreg
)) ir
= 4;
1007 int dispSize
= sz::nosize
;
1009 if (!ripRelative
&& disp
<= 127 && disp
>= -128) {
1010 dispSize
= sz::byte
;
1012 dispSize
= sz::dword
;
1015 // Set 'mod' based on the size of the displacement
1018 case sz::nosize
: mod
= 0; break;
1019 case sz::byte
: mod
= 1; break;
1020 default: mod
= 2; break;
1022 // Handle special cases for 'br'
1023 if (br
== int(noreg
)) {
1024 // If noreg was specified for 'br', we use the encoding
1025 // for the rbp register (or rip, if we're emitting a
1026 // rip-relative instruction), and we must set mod=0 and
1027 // "upgrade" to a DWORD-sized displacement
1030 dispSize
= sz::dword
;
1031 } else if ((br
& 7) == 5 && dispSize
== sz::nosize
) {
1032 // If br == rbp and no displacement was specified, we
1033 // must "upgrade" to using a 1-byte displacement value
1034 dispSize
= sz::byte
;
1037 // Emit modr/m and the sib
1039 // s: 0 1 2 3 4 5 6 7 8
1040 static const int scaleLookup
[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3 };
1041 assert(s
> 0 && s
<= 8);
1042 int scale
= scaleLookup
[s
];
1043 assert(scale
!= -1);
1044 emitModrm(mod
, r
, 4);
1045 byte((scale
<< 6) | ((ir
& 7) << 3) | (br
& 7));
1047 emitModrm(mod
, r
, br
);
1049 // Emit displacement if needed
1050 if (dispSize
== sz::dword
) {
1052 disp
-= (int64_t)codeBlock
.frontier() + immSize
+ dispSize
;
1055 } else if (dispSize
== sz::byte
) {
1058 // Emit immediate if needed
1059 if (immSize
!= sz::nosize
) {
1060 emitImmediate(op
, imm
, immSize
);
1065 void emitIM(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1066 ssize_t imm
, int opSz
= sz::qword
) {
1067 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, imm
, true, opSz
);
1071 void emitIM8(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1073 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, imm
, true,
1078 void emitIM16(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1080 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, imm
, true,
1085 void emitIM32(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1087 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, imm
, true, sz::dword
);
1091 void emitRM(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1092 RegNumber r
, int opSz
= sz::qword
) {
1093 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, false, 0, false, opSz
);
1097 void emitRM32(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1099 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, false, 0, false, sz::dword
);
1103 void emitRM16(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1105 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, false, 0, false, sz::word
);
1109 void emitRM8(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1111 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, false, 0, false, sz::byte
);
1115 void emitCMR(X64Instr op
, int jcond
, RegNumber br
, RegNumber ir
,
1116 int s
, int disp
, RegNumber r
, int opSz
= sz::qword
) {
1117 emitCMX(op
, jcond
, br
, ir
, s
, disp
, r
, true, 0, false, opSz
);
1121 void emitMR(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int64_t disp
,
1122 RegNumber r
, int opSz
= sz::qword
, bool ripRelative
= false) {
1123 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, true, 0, false, opSz
, ripRelative
);
1127 void emitMR32(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1129 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, true, 0, false, sz::dword
);
1133 void emitMR16(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1135 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, true, 0, false, sz::word
);
1139 void emitMR8(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1141 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, true, 0, false, sz::byte
);
1145 void emitIRM(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1146 RegNumber r
, ssize_t imm
, int opSz
= sz::qword
) {
1147 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, false, imm
, true, opSz
);
1151 void emitIMR(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
,
1152 RegNumber r
, ssize_t imm
, int opSz
= sz::qword
) {
1153 emitCMX(op
, 0, br
, ir
, s
, disp
, r
, true, imm
, true, opSz
);
1157 void emitM(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int64_t disp
,
1158 int opSz
= sz::qword
, bool ripRelative
= false) {
1159 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, 0, false, opSz
,
1164 void emitM32(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
) {
1165 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, 0, false, sz::dword
);
1169 void emitM16(X64Instr op
, RegNumber br
, RegNumber ir
, int s
, int disp
) {
1170 emitCMX(op
, 0, br
, ir
, s
, disp
, noreg
, false, 0, false, sz::word
);
1174 void emitCM(X64Instr op
, int jcond
, RegNumber br
,
1175 RegNumber ir
, int s
, int disp
, int opSz
= sz::qword
) {
1176 emitCMX(op
, jcond
, br
, ir
, s
, disp
, noreg
, false, 0, false, opSz
);
1179 // emit (with no arguments)
1181 void emit(X64Instr op
) {
1182 if ((op
.flags
& IF_NO_REXW
) == 0) {
1190 * The following functions use a naming convention for an older API
1191 * to the assembler; conditional loads and moves haven't yet been
1195 // CMOVcc [rbase + off], rdest
1196 inline void cload_reg64_disp_reg64(ConditionCode cc
, Reg64 rbase
,
1197 int off
, Reg64 rdest
) {
1198 emitCMX(instr_cmovcc
, cc
, rn(rbase
), noreg
, sz::byte
, off
, rn(rdest
),
1202 inline void cload_reg64_disp_reg32(ConditionCode cc
, Reg64 rbase
,
1203 int off
, Reg32 rdest
) {
1204 emitCMX(instr_cmovcc
, cc
, rn(rbase
), noreg
, sz::byte
, off
, rn(rdest
),
1207 false /*hasImmediate*/,
1208 sz::dword
/*opSz*/);
1210 inline void cmov_reg64_reg64(ConditionCode cc
, Reg64 rsrc
, Reg64 rdest
) {
1211 emitCRR(instr_cmovcc
, cc
, rn(rsrc
), rn(rdest
));
1215 bool byteRegNeedsRex(int rn
) const {
1216 // Without a rex, 4 through 7 mean the high 8-bit byte registers.
1217 return rn
>= 4 && rn
<= 7;
1219 int byteRegEncodeNumber(int rn
, bool& seenHigh
) const {
1220 // We flag a bit in ah, ch, dh, bh so byteRegNeedsRex doesn't
1222 if (rn
& 0x80) seenHigh
= true;
1225 // In 64-bit mode, you can't mix accesses to high byte registers
1226 // with low byte registers other than al,cl,bl,dl. We assert this.
1227 void byteRegMisuse() const {
1228 assert(!"High byte registers can't be used with new x64 registers, or"
1229 " anything requiring a REX prefix");
1232 int computeImmediateSize(X64Instr op
,
1234 int opsize
= sz::dword
) {
1235 // Most instructions take a 32-bit or 16-bit immediate,
1236 // depending on the presence of the opsize prefix (0x66).
1237 int immSize
= opsize
== sz::word
? sz::word
: sz::dword
;
1238 // ret always takes a 16-bit immediate.
1239 if (op
.flags
& IF_RET
) {
1242 // Use an 8-bit immediate if the instruction supports it and if
1243 // the immediate value fits in a byte
1244 if (deltaFits(imm
, sz::byte
) && (op
.flags
& IF_HAS_IMM8
) != 0) {
1250 void emitModrm(int x
, int y
, int z
) {
1251 byte((x
<< 6) | ((y
& 7) << 3) | (z
& 7));
1255 * The mov instruction supports an 8 byte immediate for the RI
1256 * address mode when opSz is qword. It also supports a 4-byte
1257 * immediate with opSz qword (the immediate is sign-extended).
1259 * On the other hand, if it fits in 32-bits as an unsigned, we can
1260 * change opSz to dword, which will zero the top 4 bytes instead of
1263 int computeImmediateSizeForMovRI64(X64Instr op
, ssize_t imm
, int& opSz
) {
1264 assert(opSz
== sz::qword
);
1265 if (deltaFits(imm
, sz::dword
)) {
1266 return computeImmediateSize(op
, imm
);
1268 if (magFits(imm
, sz::dword
)) {
1275 void emitImmediate(X64Instr op
, ssize_t imm
, int immSize
) {
1276 if (immSize
== sz::nosize
) {
1279 if ((op
.flags
& (IF_SHIFT
| IF_SHIFTD
)) == 0) {
1280 if (immSize
== sz::dword
) {
1282 } else if (immSize
== sz::byte
) {
1284 } else if (immSize
== sz::word
) {
1290 // we always use a byte-sized immediate for shift instructions
1295 void prefixBytes(unsigned long flags
, int opSz
) {
1296 if (opSz
== sz::word
&& !(flags
& IF_RET
)) byte(kOpsizePrefix
);
1297 if (flags
& IF_66PREFIXED
) byte(0x66);
1298 if (flags
& IF_F2PREFIXED
) byte(0xF2);
1299 if (flags
& IF_F3PREFIXED
) byte(0xF3);
1303 // Wraps a bunch of the emit* functions to make using them with the
1304 // typed wrappers more terse. We should have these replace
1305 // the emit functions eventually.
1307 #define UMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
1308 #define URIP(m) noreg, noreg, sz::byte, m.r.disp
1310 void instrR(X64Instr op
, Reg64 r
) { emitR(op
, rn(r
)); }
1311 void instrR(X64Instr op
, Reg32 r
) { emitR32(op
, rn(r
)); }
1312 void instrR(X64Instr op
, Reg16 r
) { emitR16(op
, rn(r
)); }
1313 void instrR(X64Instr op
, Reg8 r
) { emitR(op
, rn(r
), sz::byte
); }
1314 void instrRR(X64Instr op
, Reg64 x
, Reg64 y
) { emitRR(op
, rn(x
), rn(y
)); }
1315 void instrRR(X64Instr op
, Reg32 x
, Reg32 y
) { emitRR32(op
, rn(x
), rn(y
)); }
1316 void instrRR(X64Instr op
, Reg16 x
, Reg16 y
) { emitRR16(op
, rn(x
), rn(y
)); }
1317 void instrRR(X64Instr op
, Reg8 x
, Reg8 y
) { emitRR8(op
, rn(x
), rn(y
)); }
1318 void instrRR(X64Instr op
, RegXMM x
, RegXMM y
) { emitRR(op
, rn(x
), rn(y
)); }
1319 void instrM(X64Instr op
, MemoryRef m
) { emitM(op
, UMR(m
)); }
1320 void instrM(X64Instr op
, RIPRelativeRef m
) { emitM(op
, URIP(m
),
1322 void instrM32(X64Instr op
, MemoryRef m
) { emitM32(op
, UMR(m
)); }
1323 void instrM16(X64Instr op
, MemoryRef m
) { emitM16(op
, UMR(m
)); }
1325 void instrRM(X64Instr op
,
1327 MemoryRef m
) { emitRM(op
, UMR(m
), rn(r
)); }
1328 void instrRM(X64Instr op
,
1330 MemoryRef m
) { emitRM32(op
, UMR(m
), rn(r
)); }
1331 void instrRM(X64Instr op
,
1333 MemoryRef m
) { emitRM16(op
, UMR(m
), rn(r
)); }
1334 void instrRM(X64Instr op
,
1336 MemoryRef m
) { emitRM8(op
, UMR(m
), rn(r
)); }
1337 void instrRM(X64Instr op
,
1339 MemoryRef m
) { emitRM(op
, UMR(m
), rn(x
)); }
1341 void instrMR(X64Instr op
,
1343 Reg64 r
) { emitMR(op
, UMR(m
), rn(r
)); }
1344 void instrMR(X64Instr op
,
1346 Reg32 r
) { emitMR32(op
, UMR(m
), rn(r
)); }
1347 void instrMR(X64Instr op
,
1349 Reg16 r
) { emitMR16(op
, UMR(m
), rn(r
)); }
1350 void instrMR(X64Instr op
,
1352 Reg8 r
) { emitMR8(op
, UMR(m
), rn(r
)); }
1353 void instrMR(X64Instr op
,
1355 RegXMM x
) { emitMR(op
, UMR(m
), rn(x
)); }
1356 void instrMR(X64Instr op
,
1358 Reg64 r
) { emitMR(op
, URIP(m
), rn(r
),
1360 void instrMR(X64Instr op
,
1362 RegXMM r
) { emitMR(op
, URIP(m
), rn(r
),
1365 void instrIR(X64Instr op
, Immed64 i
, Reg64 r
) {
1366 emitIR(op
, rn(r
), i
.q());
1368 void instrIR(X64Instr op
, Immed i
, Reg64 r
) {
1369 emitIR(op
, rn(r
), i
.q());
1371 void instrIR(X64Instr op
, Immed i
, Reg32 r
) {
1372 emitIR32(op
, rn(r
), i
.l());
1374 void instrIR(X64Instr op
, Immed i
, Reg16 r
) {
1375 emitIR16(op
, rn(r
), i
.w());
1377 void instrIR(X64Instr op
, Immed i
, Reg8 r
) {
1378 emitIR8(op
, rn(r
), i
.b());
1381 void instrIM(X64Instr op
, Immed i
, MemoryRef m
) {
1382 emitIM(op
, UMR(m
), i
.q());
1384 void instrIM32(X64Instr op
, Immed i
, MemoryRef m
) {
1385 emitIM32(op
, UMR(m
), i
.l());
1387 void instrIM16(X64Instr op
, Immed i
, MemoryRef m
) {
1388 emitIM16(op
, UMR(m
), i
.w());
1390 void instrIM8(X64Instr op
, Immed i
, MemoryRef m
) {
1391 emitIM8(op
, UMR(m
), i
.b());