Add a HHIR-level peephole optimization to reorder CheckTypes
[hiphop-php.git] / hphp / util / asm-x64-legacy.h
blobb5eef682e5c54440ccb3f16474ab732362eb5300
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #pragma once
18 namespace HPHP { namespace jit {
20 //////////////////////////////////////////////////////////////////////
22 enum X64InstrFlags {
23 IF_REVERSE = 0x0001, // The operand encoding for some instructions are
24 // "backwards" in x64; these instructions are
25 // called "reverse" instructions. There are a few
26 // details about emitting "reverse" instructions:
27 // (1) for the R_M address mode, we use the MR
28 // opcode, (2) for M_R and R address modes, we use
29 // the RM opcode, and (3) for the R_R address mode,
30 // we still use MR opcode, but we have to swap the
31 // first argument and the second argument.
33 IF_TWOBYTEOP = 0x0002, // Some instructions have two byte opcodes. For
34 // these instructions, an additional byte (0x0F) is
35 // emitted before the standard opcode byte.
37 IF_JCC = 0x0004, // instruction is jcc
38 IF_IMUL = 0x0008, // instruction is imul
39 IF_HAS_IMM8 = 0x0010, // instruction has an encoding that takes an 8-bit
40 // immediate
41 IF_SHIFT = 0x0020, // instruction is rol, ror, rcl, rcr, shl, shr, sar
42 IF_RET = 0x0040, // instruction is ret
43 IF_SHIFTD = 0x0080, // instruction is shld, shrd
44 IF_NO_REXW = 0x0100, // rexW prefix is not needed
45 IF_MOV = 0x0200, // instruction is mov
46 IF_COMPACTR = 0x0400, // instruction supports compact-R encoding
47 IF_RAX = 0x0800, // instruction supports special rax encoding
48 IF_XCHG = 0x1000, // instruction is xchg (not xchgb)
49 IF_BYTEREG = 0x2000, // instruction is movzbq, movsbq
50 IF_66PREFIXED = 0x4000, // instruction requires a manditory 0x66 prefix
51 IF_F3PREFIXED = 0x8000, // instruction requires a manditory 0xf3 prefix
52 IF_F2PREFIXED = 0x10000, // instruction requires a manditory 0xf2 prefix
53 IF_THREEBYTEOP = 0x20000, // instruction requires a 0x0F 0x3[8A] prefix
54 IF_ROUND = 0x40000, // instruction is round(sp)d
58 Address mode to table index map:
59 Table index 0 <- R_R / M_R(n) / R_M(r) / R(n)
60 Table index 1 <- R_M(n) / M_R(r) / R(r)
61 Table index 2 <- I / R_I / M_I / R_R_I / M_R_I / R_M_I
62 Table index 3 <- "/digit" value used by the above address modes
63 Table index 4 <- special R_I (for rax)
64 Table index 5 <- compact-R / none
66 (n) - for normal instructions only (IF_REVERSE flag is not set)
67 (r) - for reverse instructions only (IF_REVERSE flag is set)
69 0xF1 is used to indicate invalid opcodes.
72 struct X64Instr {
73 unsigned char table[6];
74 unsigned long flags;
77 // 0 1 2 3 4 5 flags
78 const X64Instr instr_divsd = { { 0x5E,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
79 const X64Instr instr_movups = { { 0x10,0x11,0xF1,0x00,0xF1,0xF1 }, 0x0103 };
80 const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
81 const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
82 const X64Instr instr_movsd = { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
83 const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
84 const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
85 const X64Instr instr_xmmsub = { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
86 const X64Instr instr_xmmadd = { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
87 const X64Instr instr_xmmmul = { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
88 const X64Instr instr_xmmsqrt = { { 0x51,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
89 const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
90 const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
91 const X64Instr instr_psrlq= { { 0xF1,0xF1,0x73,0x02,0xF1,0xF1 }, 0x4112 };
92 const X64Instr instr_psllq= { { 0xF1,0xF1,0x73,0x06,0xF1,0xF1 }, 0x4112 };
93 const X64Instr instr_cvtsi2sd= { { 0x2a,0x2a,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
94 const X64Instr instr_cvttsd2si={ { 0x2c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
95 const X64Instr instr_lddqu = { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
96 const X64Instr instr_unpcklpd ={ { 0x14,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
97 const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
98 const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
99 const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
100 const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
101 const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
102 const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
103 const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
104 const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
105 const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
106 const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
107 const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
108 const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
109 const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
110 const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
111 const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
112 const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
113 const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
114 const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
115 const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
116 const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
117 const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
118 const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
119 const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
120 const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
121 const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
122 const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
123 const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
124 const X64Instr instr_sbbb = { { 0x18,0x1A,0x80,0x03,0x1C,0xF1 }, 0x0810 };
125 const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
126 const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
127 const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
128 const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
129 const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
130 const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
131 const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
132 const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
133 const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
134 const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
135 const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
136 const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
137 const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
138 const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
139 const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
140 const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
141 const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
142 const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
143 const X64Instr instr_cqo = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0000 };
144 const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
145 const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
146 const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
147 const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
148 const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
149 const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
150 const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
151 const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
152 const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
153 const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
154 const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
155 const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
156 const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
157 const X64Instr instr_roundsd = { { 0xF1,0xF1,0x0b,0x00,0xF1,0xF1 }, 0x64112 };
158 const X64Instr instr_cmpsd = { { 0xF1,0xF1,0xC2,0xF1,0xF1,0xF1 }, 0x10112 };
159 const X64Instr instr_crc32 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x30001 };
160 const X64Instr instr_prefetch ={ { 0x18,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0002 };
161 const X64Instr instr_btr ={ { 0xB3,0xF1,0xBA,0x06,0xF1,0xF1 }, 0x0012 };
163 ///////////////////////////////////////////////////////////////////////////////
166 * Copyright (c) 2009, Andrew J. Paroski
167 * All rights reserved.
169 * Redistribution and use in source and binary forms, with or without
170 * modification, are permitted provided that the following conditions are met:
171 * * Redistributions of source code must retain the above copyright
172 * notice, this list of conditions and the following disclaimer.
173 * * Redistributions in binary form must reproduce the above copyright
174 * notice, this list of conditions and the following disclaimer in the
175 * documentation and/or other materials provided with the distribution.
176 * * The names of the contributors may not be used to endorse or promote
177 * products derived from this software without specific prior written
178 * permission.
180 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
181 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
182 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
183 * ARE DISCLAIMED. IN NO EVENT SHALL ANDREW J. PAROSKI BE LIABLE FOR ANY
184 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
185 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
186 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
187 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
188 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
189 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
192 struct X64Assembler final : public X64AssemblerBase {
193 public:
194 explicit X64Assembler(CodeBlock& cb) : X64AssemblerBase(cb) {}
196 X64Assembler(const X64Assembler&) = delete;
197 X64Assembler& operator=(const X64Assembler&) = delete;
200 * The following section defines the main interface for emitting
201 * x64.
203 * Simple Examples:
205 * a. movq (rax, rbx); // order is AT&T: src, dest
206 * a. loadq (*rax, rbx); // loads from *rax
207 * a. loadq (rax[0], rbx); // also loads from *rax
208 * a. storeq (rcx, rax[0xc]); // store to rax + 0xc
209 * a. addq (0x1, rbx); // increment rbx
211 * Addressing with index registers:
213 * a. movl (index, ecx);
214 * a. loadq (*rax, rbx);
215 * a. storeq (rbx, rbx[rcx*8]);
216 * a. call (rax); // indirect call
220 #define BYTE_LOAD_OP(name, instr) \
221 void name##b(MemoryRef m, Reg8 r) { instrMR(instr, m, r); } \
223 #define LOAD_OP(name, instr) \
224 void name##q(MemoryRef m, Reg64 r) { instrMR(instr, m, r); } \
225 void name##l(MemoryRef m, Reg32 r) { instrMR(instr, m, r); } \
226 void name##w(MemoryRef m, Reg16 r) { instrMR(instr, m, r); } \
227 void name##q(RIPRelativeRef m, Reg64 r) { instrMR(instr, m, r); } \
228 BYTE_LOAD_OP(name, instr##b)
230 #define BYTE_STORE_OP(name, instr) \
231 void name##b(Reg8 r, MemoryRef m) { instrRM(instr, r, m); } \
232 void name##b(Immed i, MemoryRef m) { instrIM8(instr, i, m); } \
234 #define STORE_OP(name, instr) \
235 void name##w(Immed i, MemoryRef m) { instrIM16(instr, i, m); } \
236 void name##l(Immed i, MemoryRef m) { instrIM32(instr, i, m); } \
237 void name##w(Reg16 r, MemoryRef m) { instrRM(instr, r, m); } \
238 void name##l(Reg32 r, MemoryRef m) { instrRM(instr, r, m); } \
239 void name##q(Reg64 r, MemoryRef m) { instrRM(instr, r, m); } \
240 BYTE_STORE_OP(name, instr ## b)
242 #define BYTE_REG_OP(name, instr) \
243 void name##b(Reg8 r1, Reg8 r2) { instrRR(instr, r1, r2); } \
244 void name##b(Immed i, Reg8 r) { instrIR(instr, i, r); } \
246 #define REG_OP(name, instr) \
247 void name##q(Reg64 r1, Reg64 r2) { instrRR(instr, r1, r2); } \
248 void name##l(Reg32 r1, Reg32 r2) { instrRR(instr, r1, r2); } \
249 void name##w(Reg16 r1, Reg16 r2) { instrRR(instr, r1, r2); } \
250 void name##l(Immed i, Reg32 r) { instrIR(instr, i, r); } \
251 void name##w(Immed i, Reg16 r) { instrIR(instr, i, r); } \
252 BYTE_REG_OP(name, instr##b)
255 * For when we a have a memory operand and the operand size is
256 * 64-bits, only a 32-bit (sign-extended) immediate is supported.
258 #define IMM64_STORE_OP(name, instr) \
259 void name##q(Immed i, MemoryRef m) { \
260 return instrIM(instr, i, m); \
264 * For instructions other than movq, even when the operand size is
265 * 64 bits only a 32-bit (sign-extended) immediate is supported.
267 #define IMM64R_OP(name, instr) \
268 void name##q(Immed imm, Reg64 r) { \
269 always_assert(imm.fits(sz::dword)); \
270 return instrIR(instr, imm, r); \
273 #define FULL_OP(name, instr) \
274 LOAD_OP(name, instr) \
275 STORE_OP(name, instr) \
276 REG_OP(name, instr) \
277 IMM64_STORE_OP(name, instr) \
278 IMM64R_OP(name, instr)
280 // We rename x64's mov to store and load for improved code
281 // readability.
282 LOAD_OP (load, instr_mov)
283 STORE_OP (store, instr_mov)
284 IMM64_STORE_OP (store, instr_mov)
285 REG_OP (mov, instr_mov)
287 FULL_OP(add, instr_add)
288 FULL_OP(xor, instr_xor)
289 FULL_OP(sub, instr_sub)
290 FULL_OP(and, instr_and)
291 FULL_OP(or, instr_or)
292 FULL_OP(test,instr_test)
293 FULL_OP(cmp, instr_cmp)
294 FULL_OP(sbb, instr_sbb)
296 #undef IMM64R_OP
297 #undef FULL_OP
298 #undef REG_OP
299 #undef STORE_OP
300 #undef LOAD_OP
301 #undef BYTE_LOAD_OP
302 #undef BYTE_STORE_OP
303 #undef BYTE_REG_OP
304 #undef IMM64_STORE_OP
306 // 64-bit immediates work with mov to a register.
307 void movq(Immed64 imm, Reg64 r) { instrIR(instr_mov, imm, r); }
309 // movzbx is a special snowflake. We don't have movzbq because it behaves
310 // exactly the same as movzbl but takes an extra byte.
311 void loadzbl(MemoryRef m, Reg32 r) { instrMR(instr_movzbx,
312 m, rbyte(r)); }
313 void loadzwl(MemoryRef m, Reg32 r) { instrMR(instr_movzwx, m, r); }
314 void movzbl(Reg8 src, Reg32 dest) { emitRR32(instr_movzbx,
315 rn(src), rn(dest)); }
316 void movsbl(Reg8 src, Reg32 dest) { emitRR(instr_movsbx,
317 rn(src), rn(dest)); }
318 void movzwl(Reg16 src, Reg32 dest) { emitRR32(instr_movzwx,
319 rn(src), rn(dest)); }
321 void loadsbq(MemoryRef m, Reg64 r) { instrMR(instr_movsbx,
322 m, r); }
323 void movsbq(Reg8 src, Reg64 dest) { emitRR(instr_movsbx,
324 rn(src), rn(dest)); }
325 void crc32q(Reg64 src, Reg64 dest) { instrRR(instr_crc32, src, dest); }
327 void lea(MemoryRef p, Reg64 reg) { instrMR(instr_lea, p, reg); }
328 void lea(RIPRelativeRef p, Reg64 reg) { instrMR(instr_lea, p, reg); }
330 void xchgq(Reg64 r1, Reg64 r2) { instrRR(instr_xchg, r1, r2); }
331 void xchgl(Reg32 r1, Reg32 r2) { instrRR(instr_xchg, r1, r2); }
332 void xchgb(Reg8 r1, Reg8 r2) { instrRR(instr_xchgb, r1, r2); }
334 void imul(Reg64 r1, Reg64 r2) { instrRR(instr_imul, r1, r2); }
336 void push(Reg64 r) { instrR(instr_push, r); }
337 void pushl(Reg32 r) { instrR(instr_push, r); }
338 void pop (Reg64 r) { instrR(instr_pop, r); }
339 void idiv(Reg64 r) { instrR(instr_idiv, r); }
340 void incq(Reg64 r) { instrR(instr_inc, r); }
341 void incl(Reg32 r) { instrR(instr_inc, r); }
342 void incw(Reg16 r) { instrR(instr_inc, r); }
343 void decq(Reg64 r) { instrR(instr_dec, r); }
344 void decl(Reg32 r) { instrR(instr_dec, r); }
345 void decw(Reg16 r) { instrR(instr_dec, r); }
346 void notb(Reg8 r) { instrR(instr_notb, r); }
347 void not(Reg64 r) { instrR(instr_not, r); }
348 void neg(Reg64 r) { instrR(instr_neg, r); }
349 void negb(Reg8 r) { instrR(instr_negb, r); }
350 void ret() { emit(instr_ret); }
351 void ret(Immed i) { emitI(instr_ret, i.w(), sz::word); }
352 void cqo() { emit(instr_cqo); }
353 void nop() { emit(instr_nop); }
354 void int3() { emit(instr_int3); }
355 void ud2() { byte(0x0f); byte(0x0b); }
356 void pushf() { byte(0x9c); }
357 void popf() { byte(0x9d); }
358 void lock() { byte(0xF0); }
360 void push(MemoryRef m) { instrM(instr_push, m); }
361 void pop (MemoryRef m) { instrM(instr_pop, m); }
362 void prefetch(MemoryRef m) { instrM(instr_prefetch, m); }
363 void incq(MemoryRef m) { instrM(instr_inc, m); }
364 void incl(MemoryRef m) { instrM32(instr_inc, m); }
365 void incw(MemoryRef m) { instrM16(instr_inc, m); }
366 void decqlock(MemoryRef m) { lock(); decq(m); }
367 void decq(MemoryRef m) { instrM(instr_dec, m); }
368 void decl(MemoryRef m) { instrM32(instr_dec, m); }
369 void decw(MemoryRef m) { instrM16(instr_dec, m); }
371 void push(Immed64 i) { emitI(instr_push, i.q()); }
373 void movups(RegXMM x, MemoryRef m) { instrRM(instr_movups, x, m); }
374 void movups(MemoryRef m, RegXMM x) { instrMR(instr_movups, m, x); }
375 void movdqu(RegXMM x, MemoryRef m) { instrRM(instr_movdqu, x, m); }
376 void movdqu(MemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
377 void movdqa(RegXMM x, RegXMM y) { instrRR(instr_movdqa, x, y); }
378 void movdqa(RegXMM x, MemoryRef m) { instrRM(instr_movdqa, x, m); }
379 void movdqa(MemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
380 void movsd (RegXMM x, RegXMM y) { instrRR(instr_movsd, x, y); }
381 void movsd (RegXMM x, MemoryRef m) { instrRM(instr_movsd, x, m); }
382 void movsd (MemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
383 void movsd (RIPRelativeRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
384 void lddqu (MemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
385 void unpcklpd(RegXMM s, RegXMM d) { instrRR(instr_unpcklpd, d, s); }
387 void rorq (Immed i, Reg64 r) { instrIR(instr_ror, i, r); }
388 void shlq (Immed i, Reg64 r) { instrIR(instr_shl, i, r); }
389 void shrq (Immed i, Reg64 r) { instrIR(instr_shr, i, r); }
390 void sarq (Immed i, Reg64 r) { instrIR(instr_sar, i, r); }
391 void shll (Immed i, Reg32 r) { instrIR(instr_shl, i, r); }
392 void shrl (Immed i, Reg32 r) { instrIR(instr_shr, i, r); }
393 void shlw (Immed i, Reg16 r) { instrIR(instr_shl, i, r); }
394 void shrw (Immed i, Reg16 r) { instrIR(instr_shr, i, r); }
396 void shlq (Reg64 r) { instrR(instr_shl, r); }
397 void shrq (Reg64 r) { instrR(instr_shr, r); }
398 void sarq (Reg64 r) { instrR(instr_sar, r); }
400 void btrq (Immed i, Reg64 r) { instrIR(instr_btr, i, r); }
402 void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
403 emitIRR(instr_roundsd, rn(dst), rn(src), ssize_t(d));
406 void cmpsd(RegXMM src, RegXMM dst, ComparisonPred pred) {
407 emitIRR(instr_cmpsd, rn(dst), rn(src), ssize_t(pred));
411 * Control-flow directives. Primitive labeling/patching facilities
412 * are available, as well as slightly higher-level ones via the
413 * Label class.
416 void jmp(Reg64 r) { instrR(instr_jmp, r); }
417 void jmp(MemoryRef m) { instrM(instr_jmp, m); }
418 void jmp(RIPRelativeRef m) { instrM(instr_jmp, m); }
419 void call(Reg64 r) { instrR(instr_call, r); }
420 void call(MemoryRef m) { instrM(instr_call, m); }
421 void call(RIPRelativeRef m) { instrM(instr_call, m); }
423 void jmp8(CodeAddress dest) { emitJ8(instr_jmp, ssize_t(dest)); }
425 void jmp(CodeAddress dest) {
426 always_assert_flog(dest && jmpDeltaFits(dest), "Bad Jmp: {}", dest);
427 emitJ32(instr_jmp, ssize_t(dest));
430 void call(CodeAddress dest) {
431 always_assert(dest && jmpDeltaFits(dest));
432 emitJ32(instr_call, ssize_t(dest));
435 void jcc(ConditionCode cond, CodeAddress dest) {
436 emitCJ32(instr_jcc, cond, (ssize_t)dest);
439 void jcc8(ConditionCode cond, CodeAddress dest) {
440 emitCJ8(instr_jcc, cond, (ssize_t)dest);
443 using X64AssemblerBase::call;
444 using X64AssemblerBase::jmp;
445 using X64AssemblerBase::jmp8;
446 using X64AssemblerBase::jcc;
447 using X64AssemblerBase::jcc8;
449 void setcc(int cc, Reg8 byteReg) {
450 emitCR(instr_setcc, cc, rn(byteReg), sz::byte);
453 void psllq(Immed i, RegXMM r) { emitIR(instr_psllq, rn(r), i.b()); }
454 void psrlq(Immed i, RegXMM r) { emitIR(instr_psrlq, rn(r), i.b()); }
456 void movq_rx(Reg64 rSrc, RegXMM rdest) {
457 emitRR(instr_gpr2xmm, rn(rdest), rn(rSrc));
459 void movq_xr(RegXMM rSrc, Reg64 rdest) {
460 emitRR(instr_xmm2gpr, rn(rSrc), rn(rdest));
463 void addsd(RegXMM src, RegXMM srcdest) {
464 emitRR(instr_xmmadd, rn(srcdest), rn(src));
466 void mulsd(RegXMM src, RegXMM srcdest) {
467 emitRR(instr_xmmmul, rn(srcdest), rn(src));
469 void subsd(RegXMM src, RegXMM srcdest) {
470 emitRR(instr_xmmsub, rn(srcdest), rn(src));
472 void pxor(RegXMM src, RegXMM srcdest) {
473 emitRR(instr_pxor, rn(srcdest), rn(src));
475 void cvtsi2sd(Reg64 src, RegXMM dest) {
476 emitRR(instr_cvtsi2sd, rn(dest), rn(src));
478 void cvtsi2sd(MemoryRef m, RegXMM dest) {
479 instrMR(instr_cvtsi2sd, m, dest);
481 void ucomisd(RegXMM l, RegXMM r) {
482 emitRR(instr_ucomisd, rn(l), rn(r));
484 void sqrtsd(RegXMM src, RegXMM dest) {
485 emitRR(instr_xmmsqrt, rn(dest), rn(src));
488 void divsd(RegXMM src, RegXMM srcdest) {
489 emitRR(instr_divsd, rn(srcdest), rn(src));
491 void cvttsd2siq(RegXMM src, Reg64 dest) {
492 emitRR(instr_cvttsd2si, rn(dest), rn(src));
495 void emitInt3s(int n) {
496 for (auto i = 0; i < n; ++i) {
497 byte(0xcc);
501 void emitNop(int n) {
502 if (n == 0) return;
503 static const uint8_t nops[][9] = {
504 { },
505 { 0x90 },
506 { 0x66, 0x90 },
507 { 0x0f, 0x1f, 0x00 },
508 { 0x0f, 0x1f, 0x40, 0x00 },
509 { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
510 { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
511 { 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
512 { 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
513 { 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
515 // While n >= 9, emit 9 byte NOPs
516 while (n >= 9) {
517 bytes(9, nops[9]);
518 n -= 9;
520 bytes(n, nops[n]);
523 void pad() {
524 while (available() >= 2) ud2();
525 if (available() > 0) int3();
526 assertx(available() == 0);
529 ALWAYS_INLINE
530 X64Assembler& prefix(const MemoryRef& mr) {
531 static const uint8_t prefixes[] = {
532 0xFF, // unused
533 0x64, // Segment::FS prefix
534 0x65 // Segment::GS prefix
536 if (mr.segment != Segment::DS) {
537 byte(prefixes[int(mr.segment)]);
539 return *this;
542 * Low-level emitter functions.
544 * These functions are the core of the assembler, and can also be
545 * used directly.
548 // op %r
549 // ------
550 // Restrictions:
551 // r cannot be set to 'none'
552 ALWAYS_INLINE
553 void emitCR(X64Instr op, int jcond, RegNumber regN, int opSz = sz::qword) {
554 assert(regN != noreg);
555 int r = int(regN);
557 // Opsize prefix
558 if (opSz == sz::word) {
559 byte(kOpsizePrefix);
562 // REX
563 unsigned char rex = 0;
564 bool highByteReg = false;
565 if (opSz == sz::byte) {
566 if (byteRegNeedsRex(r)) {
567 rex |= 0x40;
569 r = byteRegEncodeNumber(r, highByteReg);
571 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
572 if (r & 8) rex |= 1;
573 if (rex) {
574 byte(0x40 | rex);
575 if (highByteReg) byteRegMisuse();
577 // If the instruction supports compact-R mode, use that
578 if (op.flags & IF_COMPACTR) {
579 byte(op.table[5] | (r & 7));
580 return;
582 char opcode = (op.flags & IF_REVERSE) ? op.table[1] : op.table[0];
583 char rval = op.table[3];
584 // Handle two byte opcodes
585 if (op.flags & IF_TWOBYTEOP) byte(0x0F);
586 byte(opcode | jcond);
587 emitModrm(3, rval, r);
590 ALWAYS_INLINE
591 void emitR(X64Instr op, RegNumber r, int opSz = sz::qword) {
592 emitCR(op, 0, r, opSz);
595 ALWAYS_INLINE
596 void emitR32(X64Instr op, RegNumber r) {
597 emitCR(op, 0, r, sz::dword);
600 ALWAYS_INLINE
601 void emitR16(X64Instr op, RegNumber r) {
602 emitCR(op, 0, r, sz::word);
605 // op %r2, %r1
606 // -----------
607 // Restrictions:
608 // r1 cannot be set to noreg
609 // r2 cannot be set to noreg
610 ALWAYS_INLINE
611 void emitCRR(X64Instr op, int jcond, RegNumber rn1, RegNumber rn2,
612 int opSz = sz::qword) {
613 assert(rn1 != noreg && rn2 != noreg);
614 int r1 = int(rn1);
615 int r2 = int(rn2);
616 bool reverse = ((op.flags & IF_REVERSE) != 0);
617 prefixBytes(op.flags, opSz);
618 // The xchg instruction is special; we have compact encodings for
619 // exchanging with rax or eax.
620 if (op.flags & IF_XCHG) {
621 if (r1 == int(reg::rax)) {
622 // REX
623 unsigned char rex = 0;
624 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
625 assert(!(op.flags & IF_BYTEREG));
626 if (r2 & 8) rex |= (reverse ? 4 : 1);
627 if (rex) byte(0x40 | rex);
628 // If the second register is rax, emit opcode with the first
629 // register id embedded
630 byte(op.table[5] | (r2 & 7));
631 return;
632 } else if (r2 == int(reg::rax)) {
633 reverse = !reverse;
634 // REX
635 unsigned char rex = 0;
636 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) {
637 rex |= 8;
639 if (r1 & 8) rex |= (reverse ? 1 : 4);
640 if (rex) byte(0x40 | rex);
641 // If the first register is rax, emit opcode with the second
642 // register id embedded
643 byte(op.table[5] | (r1 & 7));
644 return;
647 // REX
648 unsigned char rex = 0;
649 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
650 bool highByteReg = false;
651 // movzbx's first operand is a bytereg regardless of operand size
652 if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
653 if (byteRegNeedsRex(r1) ||
654 (!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
655 rex |= 0x40;
657 r1 = byteRegEncodeNumber(r1, highByteReg);
658 r2 = byteRegEncodeNumber(r2, highByteReg);
660 if (r1 & 8) rex |= (reverse ? 1 : 4);
661 if (r2 & 8) rex |= (reverse ? 4 : 1);
662 if (rex) {
663 byte(0x40 | rex);
664 if (highByteReg) byteRegMisuse();
666 // For two/three byte opcodes
667 if ((op.flags & (IF_TWOBYTEOP | IF_IMUL | IF_THREEBYTEOP)) != 0) byte(0x0F);
668 if ((op.flags & IF_THREEBYTEOP) != 0) byte(0x38);
669 byte(op.table[0] | jcond);
670 if (reverse) {
671 emitModrm(3, r2, r1);
672 } else {
673 emitModrm(3, r1, r2);
677 ALWAYS_INLINE
678 void emitCRR32(X64Instr op, int jcond, RegNumber r1, RegNumber r2) {
679 emitCRR(op, jcond, r1, r2, sz::dword);
682 ALWAYS_INLINE
683 void emitRR(X64Instr op, RegNumber r1, RegNumber r2, int opSz = sz::qword) {
684 emitCRR(op, 0, r1, r2, opSz);
687 ALWAYS_INLINE
688 void emitRR32(X64Instr op, RegNumber r1, RegNumber r2) {
689 emitCRR(op, 0, r1, r2, sz::dword);
692 ALWAYS_INLINE
693 void emitRR16(X64Instr op, RegNumber r1, RegNumber r2) {
694 emitCRR(op, 0, r1, r2, sz::word);
697 ALWAYS_INLINE
698 void emitRR8(X64Instr op, RegNumber r1, RegNumber r2) {
699 emitCRR(op, 0, r1, r2, sz::byte);
702 // op $imm, %r
703 // -----------
704 // Restrictions:
705 // r cannot be set to noreg
706 ALWAYS_INLINE
707 void emitIR(X64Instr op, RegNumber rname, ssize_t imm,
708 int opSz = sz::qword) {
709 assert(rname != noreg);
710 int r = int(rname);
711 // Opsize prefix
712 prefixBytes(op.flags, opSz);
713 // Determine the size of the immediate. This might change opSz so
714 // do it first.
715 int immSize;
716 if ((op.flags & IF_MOV) && opSz == sz::qword) {
717 immSize = computeImmediateSizeForMovRI64(op, imm, opSz);
718 } else {
719 immSize = computeImmediateSize(op, imm, opSz);
721 // REX
722 unsigned char rex = 0;
723 bool highByteReg = false;
724 if (opSz == sz::byte) {
725 if (byteRegNeedsRex(r)) {
726 rex |= 0x40;
728 r = byteRegEncodeNumber(r, highByteReg);
730 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
731 if (r & 8) rex |= 1;
732 if (rex) {
733 byte(0x40 | rex);
734 if (highByteReg) byteRegMisuse();
736 // Use the special rax encoding if the instruction supports it
737 if (r == int(reg::rax) && immSize == sz::dword &&
738 (op.flags & IF_RAX)) {
739 byte(op.table[4]);
740 emitImmediate(op, imm, immSize);
741 return;
743 // Use the compact-R encoding if the operand size and the immediate
744 // size are the same
745 if ((op.flags & IF_COMPACTR) && immSize == opSz) {
746 byte(op.table[5] | (r & 7));
747 emitImmediate(op, imm, immSize);
748 return;
750 // For two byte opcodes
751 if ((op.flags & (IF_TWOBYTEOP | IF_IMUL)) != 0) byte(0x0F);
752 int rval = op.table[3];
753 // shift/rotate instructions have special opcode when
754 // immediate is 1
755 if ((op.flags & IF_SHIFT) != 0 && imm == 1) {
756 byte(0xd1);
757 emitModrm(3, rval, r);
758 // don't emit immediate
759 return;
761 int opcode = (immSize == sz::byte && opSz != sz::byte) ?
762 (op.table[2] | 2) : op.table[2];
763 byte(opcode);
764 emitModrm(3, rval, r);
765 emitImmediate(op, imm, immSize);
768 ALWAYS_INLINE
769 void emitIR32(X64Instr op, RegNumber r, ssize_t imm) {
770 emitIR(op, r, imm, sz::dword);
773 ALWAYS_INLINE
774 void emitIR16(X64Instr op, RegNumber r, ssize_t imm) {
775 emitIR(op, r, safe_cast<int16_t>(imm), sz::word);
778 ALWAYS_INLINE
779 void emitIR8(X64Instr op, RegNumber r, ssize_t imm) {
780 emitIR(op, r, safe_cast<int8_t>(imm), sz::byte);
783 // op $imm, %r2, %r1
784 // -----------------
785 // Restrictions:
786 // r1 cannot be set to noreg
787 // r2 cannot be set to noreg
788 ALWAYS_INLINE
789 void emitIRR(X64Instr op, RegNumber rn1, RegNumber rn2, ssize_t imm,
790 int opSz = sz::qword) {
791 assert(rn1 != noreg && rn2 != noreg);
792 int r1 = int(rn1);
793 int r2 = int(rn2);
794 bool reverse = ((op.flags & IF_REVERSE) != 0);
795 // Opsize prefix
796 prefixBytes(op.flags, opSz);
797 // REX
798 unsigned char rex = 0;
799 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
800 bool highByteReg = false;
801 if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
802 if (byteRegNeedsRex(r1) ||
803 (!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
804 rex |= 0x40;
806 r1 = byteRegEncodeNumber(r1, highByteReg);
807 r2 = byteRegEncodeNumber(r2, highByteReg);
809 if (r1 & 8) rex |= (reverse ? 1 : 4);
810 if (r2 & 8) rex |= (reverse ? 4 : 1);
811 if (rex) {
812 byte(0x40 | rex);
813 if (highByteReg) byteRegMisuse();
815 // Determine the size of the immediate
816 int immSize = computeImmediateSize(op, imm, opSz);
817 if (op.flags & IF_TWOBYTEOP || op.flags & IF_THREEBYTEOP) byte(0x0F);
818 if (op.flags & IF_THREEBYTEOP) byte(0x3a);
819 int opcode = (immSize == sz::byte && opSz != sz::byte &&
820 (op.flags & IF_ROUND) == 0) ?
821 (op.table[2] | 2) : op.table[2];
822 byte(opcode);
823 if (reverse) {
824 emitModrm(3, r2, r1);
825 } else {
826 emitModrm(3, r1, r2);
828 emitImmediate(op, imm, immSize);
831 ALWAYS_INLINE
832 void emitCI(X64Instr op, int jcond, ssize_t imm, int opSz = sz::qword) {
833 // Opsize prefix
834 prefixBytes(op.flags, opSz);
835 // REX
836 if ((op.flags & IF_NO_REXW) == 0) {
837 byte(0x48);
839 // Determine the size of the immediate
840 int immSize = computeImmediateSize(op, imm, opSz);
841 // Emit opcode
842 if ((op.flags & IF_JCC) != 0) {
843 // jcc is weird so we handle it separately
844 if (immSize != sz::byte) {
845 byte(0x0F);
846 byte(jcond | 0x80);
847 } else {
848 byte(jcond | 0x70);
850 } else {
851 int opcode = (immSize == sz::byte && opSz != sz::byte) ?
852 (op.table[2] | 2) : op.table[2];
853 byte(jcond | opcode);
855 emitImmediate(op, imm, immSize);
858 ALWAYS_INLINE
859 void emitI(X64Instr op, ssize_t imm, int opSz = sz::qword) {
860 emitCI(op, 0, imm, opSz);
863 ALWAYS_INLINE
864 void emitJ8(X64Instr op, ssize_t imm) {
865 assert((op.flags & IF_JCC) == 0);
866 ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
867 // Emit opcode and 8-bit immediate
868 byte(0xEB);
869 byte(safe_cast<int8_t>(delta));
872 ALWAYS_INLINE
873 void emitCJ8(X64Instr op, int jcond, ssize_t imm) {
874 // this is for jcc only
875 assert(op.flags & IF_JCC);
876 ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
877 // Emit opcode
878 byte(jcond | 0x70);
879 // Emit 8-bit offset
880 byte(safe_cast<int8_t>(delta));
883 ALWAYS_INLINE
884 void emitJ32(X64Instr op, ssize_t imm) {
885 // call and jmp are supported, jcc is not supported
886 assert((op.flags & IF_JCC) == 0);
887 int32_t delta =
888 safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 5));
889 uint8_t *bdelta = (uint8_t*)&delta;
890 uint8_t instr[] = { op.table[2],
891 bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
892 bytes(5, instr);
895 ALWAYS_INLINE
896 void emitCJ32(X64Instr op, int jcond, ssize_t imm) {
897 // jcc is supported, call and jmp are not supported
898 assert(op.flags & IF_JCC);
899 int32_t delta =
900 safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 6));
901 uint8_t* bdelta = (uint8_t*)&delta;
902 uint8_t instr[6] = { 0x0f, uint8_t(0x80 | jcond),
903 bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
904 bytes(6, instr);
907 // op disp(%br,%ir,s)
908 // (for reverse == false, hasImmediate == false, r == noreg)
909 // op $imm, disp(%br,%ir,s)
910 // (for reverse == false, hasImmediate == true, r == noreg)
911 // op %r, disp(%br,%ir,s)
912 // (for reverse == false, hasImmediate == false, r != noreg)
913 // op $imm, %r, disp(%br,%ir,s)
914 // (for reverse == false, hasImmediate == true, r != noreg)
915 // op disp(%br,%ir,s), %r
916 // (for reverse == true, hasImmediate == false, r != noreg)
917 // op $imm, disp(%br,%ir,s), %r
918 // (for reverse == true, hasImmediate == true, r != noreg)
919 // -----------------------------------------------------------------
920 // Restrictions:
921 // ir cannot be set to 'sp'
922 ALWAYS_INLINE
923 void emitCMX(X64Instr op, int jcond, RegNumber brName, RegNumber irName,
924 int s, int64_t disp,
925 RegNumber rName,
926 bool reverse = false,
927 ssize_t imm = 0,
928 bool hasImmediate = false,
929 int opSz = sz::qword,
930 bool ripRelative = false) {
931 assert(irName != rn(reg::rsp));
933 int ir = int(irName);
934 int r = int(rName);
935 int br = int(brName);
937 // The opsize prefix can be placed here, if the instruction
938 // deals with words.
939 // When an instruction has a manditory prefix, it goes before the
940 // REX byte if we end up needing one.
941 prefixBytes(op.flags, opSz);
943 // Determine immSize from the 'hasImmediate' flag
944 int immSize = sz::nosize;
945 if (hasImmediate) {
946 immSize = computeImmediateSize(op, imm, opSz);
948 if ((op.flags & IF_REVERSE) != 0) reverse = !reverse;
949 // Determine if we need to use a two byte opcode;
950 // imul is weird so we have a special case for it
951 bool twoByteOpcode = ((op.flags & IF_TWOBYTEOP) != 0) ||
952 ((op.flags & IF_IMUL) != 0 && rName != noreg &&
953 immSize == sz::nosize);
954 // Again, imul is weird
955 if ((op.flags & IF_IMUL) != 0 && rName != noreg) {
956 reverse = !reverse;
958 // The wily rex byte, a multipurpose extension to the opcode space for x64
959 unsigned char rex = 0;
960 if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
962 bool highByteReg = false;
963 // XXX: This IF_BYTEREG check is a special case for movzbl: we currently
964 // encode it using an opSz of sz::byte but it doesn't actually have a
965 // byte-sized operand like other instructions can.
966 if (!(op.flags & IF_BYTEREG) && opSz == sz::byte && rName != noreg) {
967 if (byteRegNeedsRex(r)) {
968 rex |= 0x40;
970 r = byteRegEncodeNumber(r, highByteReg);
973 if (rName != noreg && (r & 8)) rex |= 4;
974 if (irName != noreg && (ir & 8)) rex |= 2;
975 if (brName != noreg && (br & 8)) rex |= 1;
976 if (rex) {
977 byte(0x40 | rex);
978 if (highByteReg) byteRegMisuse();
980 // Emit the opcode
981 if (immSize != sz::nosize) {
982 if (twoByteOpcode) byte(0x0F);
983 if (immSize == sz::byte && opSz != sz::byte) {
984 byte(op.table[2] | 2 | jcond);
985 } else {
986 byte(op.table[2] | jcond);
988 } else {
989 if (twoByteOpcode) byte(0x0F);
990 int opcode;
991 if ((op.flags & IF_IMUL) != 0) {
992 opcode = (rName == noreg) ? op.table[1] : op.table[0];
993 } else {
994 opcode = reverse ? op.table[1] : op.table[0];
996 byte(opcode | jcond);
998 // SIB byte if:
999 // 1. We're using an index register.
1000 // 2. The base register is rsp-like.
1001 // 3. We're doing a baseless disp access and it is not rip-relative.
1002 bool sibIsNeeded =
1003 ir != int(noreg) || /* 1 */
1004 br == int(reg::rsp) || br == int(reg::r12) || /* 2 */
1005 (br == int(noreg) && !ripRelative);
1006 // If there is no register and no immediate, use the /r value
1007 if (r == int(noreg)) r = op.table[3];
1008 // If noreg was specified for 'ir', we use
1009 // the encoding for the sp register
1010 if (ir == int(noreg)) ir = 4;
1011 int dispSize = sz::nosize;
1012 if (disp != 0) {
1013 if (!ripRelative && disp <= 127 && disp >= -128) {
1014 dispSize = sz::byte;
1015 } else {
1016 dispSize = sz::dword;
1019 // Set 'mod' based on the size of the displacement
1020 int mod;
1021 switch (dispSize) {
1022 case sz::nosize: mod = 0; break;
1023 case sz::byte: mod = 1; break;
1024 default: mod = 2; break;
1026 // Handle special cases for 'br'
1027 if (br == int(noreg)) {
1028 // If noreg was specified for 'br', we use the encoding
1029 // for the rbp register (or rip, if we're emitting a
1030 // rip-relative instruction), and we must set mod=0 and
1031 // "upgrade" to a DWORD-sized displacement
1032 br = 5;
1033 mod = 0;
1034 dispSize = sz::dword;
1035 } else if ((br & 7) == 5 && dispSize == sz::nosize) {
1036 // If br == rbp and no displacement was specified, we
1037 // must "upgrade" to using a 1-byte displacement value
1038 dispSize = sz::byte;
1039 mod = 1;
1041 // Emit modr/m and the sib
1042 if (sibIsNeeded) {
1043 // s: 0 1 2 3 4 5 6 7 8
1044 static const int scaleLookup[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3 };
1045 assert(s > 0 && s <= 8);
1046 int scale = scaleLookup[s];
1047 assert(scale != -1);
1048 emitModrm(mod, r, 4);
1049 byte((scale << 6) | ((ir & 7) << 3) | (br & 7));
1050 } else {
1051 emitModrm(mod, r, br);
1053 // Emit displacement if needed
1054 if (dispSize == sz::dword) {
1055 if (ripRelative) {
1056 disp -= (int64_t)codeBlock.frontier() + immSize + dispSize;
1057 always_assert(deltaFits(disp, sz::dword));
1059 dword(disp);
1060 } else if (dispSize == sz::byte) {
1061 byte(disp & 0xff);
1063 // Emit immediate if needed
1064 if (immSize != sz::nosize) {
1065 emitImmediate(op, imm, immSize);
1069 ALWAYS_INLINE
1070 void emitIM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1071 ssize_t imm, int opSz = sz::qword) {
1072 emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, opSz);
1075 ALWAYS_INLINE
1076 void emitIM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1077 ssize_t imm) {
1078 emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
1079 sz::byte);
1082 ALWAYS_INLINE
1083 void emitIM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1084 ssize_t imm) {
1085 emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
1086 sz::word);
1089 ALWAYS_INLINE
1090 void emitIM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1091 ssize_t imm) {
1092 emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, sz::dword);
1095 ALWAYS_INLINE
1096 void emitRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1097 RegNumber r, int opSz = sz::qword) {
1098 emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, opSz);
1101 ALWAYS_INLINE
1102 void emitRM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1103 RegNumber r) {
1104 emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::dword);
1107 ALWAYS_INLINE
1108 void emitRM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1109 RegNumber r) {
1110 emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::word);
1113 ALWAYS_INLINE
1114 void emitRM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1115 RegNumber r) {
1116 emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::byte);
1119 ALWAYS_INLINE
1120 void emitCMR(X64Instr op, int jcond, RegNumber br, RegNumber ir,
1121 int s, int disp, RegNumber r, int opSz = sz::qword) {
1122 emitCMX(op, jcond, br, ir, s, disp, r, true, 0, false, opSz);
1125 ALWAYS_INLINE
1126 void emitMR(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
1127 RegNumber r, int opSz = sz::qword, bool ripRelative = false) {
1128 emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, opSz, ripRelative);
1131 ALWAYS_INLINE
1132 void emitMR32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1133 RegNumber r) {
1134 emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::dword);
1137 ALWAYS_INLINE
1138 void emitMR16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1139 RegNumber r) {
1140 emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::word);
1143 ALWAYS_INLINE
1144 void emitMR8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1145 RegNumber r) {
1146 emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::byte);
1149 ALWAYS_INLINE
1150 void emitIRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1151 RegNumber r, ssize_t imm, int opSz = sz::qword) {
1152 emitCMX(op, 0, br, ir, s, disp, r, false, imm, true, opSz);
1155 ALWAYS_INLINE
1156 void emitIMR(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
1157 RegNumber r, ssize_t imm, int opSz = sz::qword) {
1158 emitCMX(op, 0, br, ir, s, disp, r, true, imm, true, opSz);
1161 ALWAYS_INLINE
1162 void emitM(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
1163 int opSz = sz::qword, bool ripRelative = false) {
1164 emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, opSz,
1165 ripRelative);
1168 ALWAYS_INLINE
1169 void emitM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
1170 emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::dword);
1173 ALWAYS_INLINE
1174 void emitM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
1175 emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::word);
1178 ALWAYS_INLINE
1179 void emitCM(X64Instr op, int jcond, RegNumber br,
1180 RegNumber ir, int s, int disp, int opSz = sz::qword) {
1181 emitCMX(op, jcond, br, ir, s, disp, noreg, false, 0, false, opSz);
1184 // emit (with no arguments)
1185 ALWAYS_INLINE
1186 void emit(X64Instr op) {
1187 if ((op.flags & IF_NO_REXW) == 0) {
1188 byte(0x48);
1190 byte(op.table[5]);
1193 public:
1195 * The following functions use a naming convention for an older API
1196 * to the assembler; conditional loads and moves haven't yet been
1197 * ported.
1200 // CMOVcc [rbase + off], rdest
1201 inline void cload_reg64_disp_reg64(ConditionCode cc, Reg64 rbase,
1202 int off, Reg64 rdest) {
1203 emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
1204 false /*reverse*/);
1207 inline void cload_reg64_disp_reg32(ConditionCode cc, Reg64 rbase,
1208 int off, Reg32 rdest) {
1209 emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
1210 false /*reverse*/,
1211 0 /*imm*/,
1212 false /*hasImmediate*/,
1213 sz::dword /*opSz*/);
1215 inline void cmov_reg64_reg64(ConditionCode cc, Reg64 rsrc, Reg64 rdest) {
1216 emitCRR(instr_cmovcc, cc, rn(rsrc), rn(rdest));
1219 private:
1220 bool byteRegNeedsRex(int rn) const {
1221 // Without a rex, 4 through 7 mean the high 8-bit byte registers.
1222 return rn >= 4 && rn <= 7;
1224 int byteRegEncodeNumber(int rn, bool& seenHigh) const {
1225 // We flag a bit in ah, ch, dh, bh so byteRegNeedsRex doesn't
1226 // trigger.
1227 if (rn & 0x80) seenHigh = true;
1228 return rn & ~0x80;
1230 // In 64-bit mode, you can't mix accesses to high byte registers
1231 // with low byte registers other than al,cl,bl,dl. We assert this.
1232 void byteRegMisuse() const {
1233 assert(!"High byte registers can't be used with new x64 registers, or"
1234 " anything requiring a REX prefix");
1237 int computeImmediateSize(X64Instr op,
1238 ssize_t imm,
1239 int opsize = sz::dword) {
1240 // Most instructions take a 32-bit or 16-bit immediate,
1241 // depending on the presence of the opsize prefix (0x66).
1242 int immSize = opsize == sz::word ? sz::word : sz::dword;
1243 // ret always takes a 16-bit immediate.
1244 if (op.flags & IF_RET) {
1245 immSize = sz::word;
1247 // Use an 8-bit immediate if the instruction supports it and if
1248 // the immediate value fits in a byte
1249 if (deltaFits(imm, sz::byte) && (op.flags & IF_HAS_IMM8) != 0) {
1250 immSize = sz::byte;
1252 return immSize;
1255 void emitModrm(int x, int y, int z) {
1256 byte((x << 6) | ((y & 7) << 3) | (z & 7));
1260 * The mov instruction supports an 8 byte immediate for the RI
1261 * address mode when opSz is qword. It also supports a 4-byte
1262 * immediate with opSz qword (the immediate is sign-extended).
1264 * On the other hand, if it fits in 32-bits as an unsigned, we can
1265 * change opSz to dword, which will zero the top 4 bytes instead of
1266 * sign-extending.
1268 int computeImmediateSizeForMovRI64(X64Instr op, ssize_t imm, int& opSz) {
1269 assert(opSz == sz::qword);
1270 if (deltaFits(imm, sz::dword)) {
1271 return computeImmediateSize(op, imm);
1273 if (magFits(imm, sz::dword)) {
1274 opSz = sz::dword;
1275 return sz::dword;
1277 return sz::qword;
1280 void emitImmediate(X64Instr op, ssize_t imm, int immSize) {
1281 if (immSize == sz::nosize) {
1282 return;
1284 if ((op.flags & (IF_SHIFT | IF_SHIFTD)) == 0) {
1285 if (immSize == sz::dword) {
1286 dword(imm);
1287 } else if (immSize == sz::byte) {
1288 byte(imm);
1289 } else if (immSize == sz::word) {
1290 word(imm);
1291 } else {
1292 qword(imm);
1294 } else {
1295 // we always use a byte-sized immediate for shift instructions
1296 byte(imm);
1300 void prefixBytes(unsigned long flags, int opSz) {
1301 if (opSz == sz::word && !(flags & IF_RET)) byte(kOpsizePrefix);
1302 if (flags & IF_66PREFIXED) byte(0x66);
1303 if (flags & IF_F2PREFIXED) byte(0xF2);
1304 if (flags & IF_F3PREFIXED) byte(0xF3);
1307 private:
1308 // Wraps a bunch of the emit* functions to make using them with the
1309 // typed wrappers more terse. We should have these replace
1310 // the emit functions eventually.
1312 #define UMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
1313 #define URIP(m) noreg, noreg, sz::byte, m.r.disp
1315 void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
1316 void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
1317 void instrR(X64Instr op, Reg16 r) { emitR16(op, rn(r)); }
1318 void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
1319 void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
1320 void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
1321 void instrRR(X64Instr op, Reg16 x, Reg16 y) { emitRR16(op, rn(x), rn(y)); }
1322 void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
1323 void instrRR(X64Instr op, RegXMM x, RegXMM y) { emitRR(op, rn(x), rn(y)); }
1324 void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
1325 void instrM(X64Instr op, RIPRelativeRef m) { emitM(op, URIP(m),
1326 sz::qword, true); }
1327 void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
1328 void instrM16(X64Instr op, MemoryRef m) { emitM16(op, UMR(m)); }
1330 void instrRM(X64Instr op,
1331 Reg64 r,
1332 MemoryRef m) { emitRM(op, UMR(m), rn(r)); }
1333 void instrRM(X64Instr op,
1334 Reg32 r,
1335 MemoryRef m) { emitRM32(op, UMR(m), rn(r)); }
1336 void instrRM(X64Instr op,
1337 Reg16 r,
1338 MemoryRef m) { emitRM16(op, UMR(m), rn(r)); }
1339 void instrRM(X64Instr op,
1340 Reg8 r,
1341 MemoryRef m) { emitRM8(op, UMR(m), rn(r)); }
1342 void instrRM(X64Instr op,
1343 RegXMM x,
1344 MemoryRef m) { emitRM(op, UMR(m), rn(x)); }
1346 void instrMR(X64Instr op,
1347 MemoryRef m,
1348 Reg64 r) { emitMR(op, UMR(m), rn(r)); }
1349 void instrMR(X64Instr op,
1350 MemoryRef m,
1351 Reg32 r) { emitMR32(op, UMR(m), rn(r)); }
1352 void instrMR(X64Instr op,
1353 MemoryRef m,
1354 Reg16 r) { emitMR16(op, UMR(m), rn(r)); }
1355 void instrMR(X64Instr op,
1356 MemoryRef m,
1357 Reg8 r) { emitMR8(op, UMR(m), rn(r)); }
1358 void instrMR(X64Instr op,
1359 MemoryRef m,
1360 RegXMM x) { emitMR(op, UMR(m), rn(x)); }
1361 void instrMR(X64Instr op,
1362 RIPRelativeRef m,
1363 Reg64 r) { emitMR(op, URIP(m), rn(r),
1364 sz::qword, true); }
1365 void instrMR(X64Instr op,
1366 RIPRelativeRef m,
1367 RegXMM r) { emitMR(op, URIP(m), rn(r),
1368 sz::qword, true); }
1370 void instrIR(X64Instr op, Immed64 i, Reg64 r) {
1371 emitIR(op, rn(r), i.q());
1373 void instrIR(X64Instr op, Immed i, Reg64 r) {
1374 emitIR(op, rn(r), i.q());
1376 void instrIR(X64Instr op, Immed i, Reg32 r) {
1377 emitIR32(op, rn(r), i.l());
1379 void instrIR(X64Instr op, Immed i, Reg16 r) {
1380 emitIR16(op, rn(r), i.w());
1382 void instrIR(X64Instr op, Immed i, Reg8 r) {
1383 emitIR8(op, rn(r), i.b());
1386 void instrIM(X64Instr op, Immed i, MemoryRef m) {
1387 emitIM(op, UMR(m), i.q());
1389 void instrIM32(X64Instr op, Immed i, MemoryRef m) {
1390 emitIM32(op, UMR(m), i.l());
1392 void instrIM16(X64Instr op, Immed i, MemoryRef m) {
1393 emitIM16(op, UMR(m), i.w());
1395 void instrIM8(X64Instr op, Immed i, MemoryRef m) {
1396 emitIM8(op, UMR(m), i.b());
1399 #undef UMR
1400 #undef URIP