This fixes a bug in PHP/HH's crypt_blowfish implementation that can cause a short...
[hiphop-php.git] / hphp / util / asm-x64-intelxed.h
blob7ec4e7b46168ec2d4f7ce48e6394a02ae877f021
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 2018 Intel Corporation |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
17 #pragma once
19 extern "C" {
20 #include <xed-interface.h>
23 #include <tbb/concurrent_unordered_map.h>
26 * A macro assembler for x64, based on the Intel XED library, that strives
27 * for low coupling to the runtime environment and ease of extendability.
29 namespace HPHP::jit {
31 struct XedInit {
32 XedInit() {
33 xed_tables_init();
38 ///////////////////////////////////////////////////////////////////////////////
40 struct XedAssembler final : public X64AssemblerBase {
41 private:
42 static constexpr xed_state_t kXedState = {
43 XED_MACHINE_MODE_LONG_64,
44 XED_ADDRESS_WIDTH_64b
47 CodeAddress dest() const {
48 codeBlock.assertCanEmit(XED_MAX_INSTRUCTION_BYTES);
49 return codeBlock.toDestAddress(codeBlock.frontier());
52 static constexpr auto nullrip = RIPRelativeRef(DispRIP(0));
54 public:
55 explicit XedAssembler(CodeBlock& cb) : X64AssemblerBase(cb) {}
57 XedAssembler(const XedAssembler&) = delete;
58 XedAssembler& operator=(const XedAssembler&) = delete;
60 * The following section defines the main interface for emitting
61 * x64.
63 * Simple Examples:
65 * a. movq (rax, rbx); // order is AT&T: src, dest
66 * a. loadq (*rax, rbx); // loads from *rax
67 * a. loadq (rax[0], rbx); // also loads from *rax
68 * a. storeq (rcx, rax[0xc]); // store to rax + 0xc
69 * a. addq (0x1, rbx); // increment rbx
71 * Addressing with index registers:
73 * a. movl (index, ecx);
74 * a. loadq (*rax, rbx);
75 * a. storeq (rbx, rbx[rcx*8]);
76 * a. call (rax); // indirect call
80 #define BYTE_LOAD_OP(name, instr) \
81 void name##b(MemoryRef m, Reg8 r) { xedInstrMR(instr, m, r); }
83 #define LOAD_OP(name, instr) \
84 void name##q(MemoryRef m, Reg64 r) { xedInstrMR(instr, m, r); } \
85 void name##l(MemoryRef m, Reg32 r) { xedInstrMR(instr, m, r); } \
86 void name##w(MemoryRef m, Reg16 r) { xedInstrMR(instr, m, r); } \
87 void name##q(RIPRelativeRef m, Reg64 r) { xedInstrMR(instr, m, r); }\
88 BYTE_LOAD_OP(name, instr)
90 #define BYTE_STORE_OP(name, instr) \
91 void name##b(Reg8 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
92 void name##b(Immed i, MemoryRef m) { xedInstrIM(instr, i, m, \
93 sz::byte); }
95 #define STORE_OP(name, instr) \
96 void name##w(Immed i, MemoryRef m) { \
97 xedInstrIM(instr, i, m, IMMPROP(sz::word, \
98 sz::word | sz::byte), sz::word); \
99 } \
100 void name##l(Immed i, MemoryRef m) { \
101 xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
102 sz::dword | sz::byte), sz::dword);\
104 void name##w(Reg16 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
105 void name##l(Reg32 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
106 void name##q(Reg64 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
107 BYTE_STORE_OP(name, instr)
109 #define BYTE_REG_OP(name, instr) \
110 void name##b(Reg8 r1, Reg8 r2) { xedInstrRR(instr, r1, r2);} \
111 void name##b(Immed i, Reg8 r) { xedInstrIR(instr, i, r); }
113 #define REG_OP(name, instr) \
114 void name##q(Reg64 r1, Reg64 r2) { xedInstrRR(instr, r1, r2); } \
115 void name##l(Reg32 r1, Reg32 r2) { xedInstrRR(instr, r1, r2); } \
116 void name##w(Reg16 r1, Reg16 r2) { xedInstrRR(instr, r1, r2); } \
117 void name##l(Immed i, Reg32 r) { \
118 xedInstrIR(instr, i, r, IMMPROP(sz::dword, \
119 sz::dword | sz::byte)); \
121 void name##w(Immed i, Reg16 r) { \
122 xedInstrIR(instr, i, r, IMMPROP(sz::word, \
123 sz::word | sz::byte)); \
125 BYTE_REG_OP(name, instr)
127 #define IMM64_STORE_OP(name, instr) \
128 void name##q(Immed i, MemoryRef m) { \
129 xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
130 sz::dword | sz::byte), sz::qword);\
133 #define IMM64R_OP(name, instr) \
134 void name##q(Immed imm, Reg64 r) { \
135 always_assert(imm.fits(sz::dword)); \
136 xedInstrIR(instr, imm, r, IMMPROP(sz::dword, \
137 sz::dword | sz::byte)); \
140 #define FULL_OP(name, instr) \
141 LOAD_OP(name, instr) \
142 STORE_OP(name, instr) \
143 REG_OP(name, instr) \
144 IMM64_STORE_OP(name, instr) \
145 IMM64R_OP(name, instr)
147 // We rename x64's mov to store and load for improved code
148 // readability.
149 #define IMMPROP(size, allsizes) size
150 LOAD_OP (load, XED_ICLASS_MOV)
151 STORE_OP (store,XED_ICLASS_MOV)
152 IMM64_STORE_OP (store,XED_ICLASS_MOV)
153 REG_OP (mov, XED_ICLASS_MOV)
154 FULL_OP (test, XED_ICLASS_TEST)
155 #undef IMMPROP
157 #define IMMPROP(size, allsizes) allsizes
158 FULL_OP(add, XED_ICLASS_ADD)
159 FULL_OP(xor, XED_ICLASS_XOR)
160 FULL_OP(sub, XED_ICLASS_SUB)
161 FULL_OP(and, XED_ICLASS_AND)
162 FULL_OP(or, XED_ICLASS_OR)
163 FULL_OP(cmp, XED_ICLASS_CMP)
164 FULL_OP(sbb, XED_ICLASS_SBB)
165 #undef IMMPROP
167 #undef IMM64R_OP
168 #undef FULL_OP
169 #undef REG_OP
170 #undef STORE_OP
171 #undef LOAD_OP
172 #undef BYTE_LOAD_OP
173 #undef BYTE_STORE_OP
174 #undef BYTE_REG_OP
175 #undef IMM64_STORE_OP
177 // 64-bit immediates work with mov to a register.
178 void movq(Immed64 imm, Reg64 r) { xedInstrIR(XED_ICLASS_MOV, imm, r,
179 sz::qword | sz::dword); }
181 // movzbx is a special snowflake. We don't have movzbq because it behaves
182 // exactly the same as movzbl but takes an extra byte.
183 void loadzbl(MemoryRef m, Reg32 r) { xedInstrMR(XED_ICLASS_MOVZX,
184 m, r, sz::byte); }
185 void loadzwl(MemoryRef m, Reg32 r) { xedInstrMR(XED_ICLASS_MOVZX,
186 m, r, sz::word); }
187 void movzbl(Reg8 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVZX,
188 src, dest); }
189 void movsbl(Reg8 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVSX,
190 src, dest); }
191 void movzwl(Reg16 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVZX,
192 src, dest); }
194 void loadsbq(MemoryRef m, Reg64 r) { xedInstrMR(XED_ICLASS_MOVSX,
195 m, r, sz::byte); }
196 void movsbq(Reg8 src, Reg64 dest) { xedInstrRR(XED_ICLASS_MOVSX,
197 src, dest); }
198 void crc32q(Reg64 src, Reg64 dest) { xedInstrRR(XED_ICLASS_CRC32,
199 src, dest); }
201 void lea(MemoryRef p, Reg64 reg) { xedInstrMR(XED_ICLASS_LEA, p, reg); }
202 void lea(RIPRelativeRef p, Reg64 reg) { xedInstrMR(XED_ICLASS_LEA, p, reg); }
204 void xchgq(Reg64 r1, Reg64 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
205 void xchgl(Reg32 r1, Reg32 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
206 void xchgb(Reg8 r1, Reg8 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
208 void imul(Reg64 r1, Reg64 r2) { xedInstrRR(XED_ICLASS_IMUL, r1, r2); }
210 void push(Reg64 r) { xedInstrR(XED_ICLASS_PUSH, r); }
211 void pushl(Reg32 r) { xedInstrR(XED_ICLASS_PUSH, r); }
212 void pop (Reg64 r) { xedInstrR(XED_ICLASS_POP, r); }
213 void idiv(Reg64 r) { xedInstrR(XED_ICLASS_IDIV, r); }
214 void incq(Reg64 r) { xedInstrR(XED_ICLASS_INC, r); }
215 void incl(Reg32 r) { xedInstrR(XED_ICLASS_INC, r); }
216 void incw(Reg16 r) { xedInstrR(XED_ICLASS_INC, r); }
217 void decq(Reg64 r) { xedInstrR(XED_ICLASS_DEC, r); }
218 void decl(Reg32 r) { xedInstrR(XED_ICLASS_DEC, r); }
219 void decw(Reg16 r) { xedInstrR(XED_ICLASS_DEC, r); }
220 void notb(Reg8 r) { xedInstrR(XED_ICLASS_NOT, r); }
221 void not(Reg64 r) { xedInstrR(XED_ICLASS_NOT, r); }
222 void neg(Reg64 r) { xedInstrR(XED_ICLASS_NEG, r); }
223 void negb(Reg8 r) { xedInstrR(XED_ICLASS_NEG, r); }
224 void ret() { xedInstr(XED_ICLASS_RET_NEAR); }
225 void ret(Immed i) { xedInstrI(XED_ICLASS_IRET, i,
226 sz::word); }
227 void cqo() { xedInstr(XED_ICLASS_CQO); }
228 void nop() { xedInstr(XED_ICLASS_NOP, sz::byte); }
229 void int3() { xedInstr(XED_ICLASS_INT3, sz::byte); }
230 void ud2() { xedInstr(XED_ICLASS_UD2, sz::byte); }
231 void pushf() { xedInstr(XED_ICLASS_PUSHF, sz::word); }
232 void popf() { xedInstr(XED_ICLASS_POPF, sz::word); }
233 void lock() { always_assert(false); }
235 void push(MemoryRef m) { xedInstrM(XED_ICLASS_PUSH, m); }
236 void pop (MemoryRef m) { xedInstrM(XED_ICLASS_POP, m); }
237 void prefetch(MemoryRef m) { xedInstrM(XED_ICLASS_PREFETCHT1, m); }
238 void incq(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m); }
239 void incl(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m, sz::dword); }
240 void incw(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m, sz::word); }
241 void decqlock(MemoryRef m) { xedInstrM(XED_ICLASS_DEC_LOCK, m); }
242 void decq(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m); }
243 void decl(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m, sz::dword); }
244 void decw(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m, sz::word); }
246 //special case for push(imm)
247 void push(Immed64 i) {
248 xed_encoder_operand_t op = toXedOperand(i, sz::byte | sz::word | sz::dword);
249 xedEmit(XED_ICLASS_PUSH, op, op.width_bits < 32 ? 16 : 64);
252 void movups(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVUPS,
253 x, m, sz::qword * 2); }
254 void movups(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVUPS,
255 m, x, sz::qword * 2); }
256 void movdqu(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVDQU,
257 x, m); }
258 void movdqu(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVDQU,
259 m, x); }
260 void movdqa(RegXMM x, RegXMM y) { xedInstrRR(XED_ICLASS_MOVDQA,
261 y, x); }
262 void movdqa(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVDQA,
263 x, m); }
264 void movdqa(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVDQA,
265 m, x); }
266 void movsd (RegXMM x, RegXMM y) { xedInstrRR(XED_ICLASS_MOVSD_XMM,
267 y, x); }
268 void movsd (RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVSD_XMM,
269 x, m); }
270 void movsd (MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVSD_XMM,
271 m, x); }
272 void movsd (RIPRelativeRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVSD_XMM,
273 m, x); }
274 void lddqu (MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_LDDQU,
275 m, x); }
276 void unpcklpd(RegXMM s, RegXMM d) { xedInstrRR(XED_ICLASS_UNPCKLPD,
277 d, s); }
279 void rorq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_ROR, i, r, sz::byte); }
280 void shlq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
281 void shrq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
282 void sarq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SAR, i, r, sz::byte); }
283 void shll (Immed i, Reg32 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
284 void shrl (Immed i, Reg32 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
285 void shlw (Immed i, Reg16 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
286 void shrw (Immed i, Reg16 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
288 void shlq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SHL, r); }
289 void shrq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SHR, r); }
290 void sarq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SAR, r); }
292 void btrq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_BTR, i, r, sz::byte); }
294 void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
295 Immed i((int)d);
296 xedInstrIRR(XED_ICLASS_ROUNDSD, dst, src, i, sz::byte);
299 void cmpsd(RegXMM src, RegXMM dst, ComparisonPred pred) {
300 Immed i((int)pred);
301 xedInstrIRR(XED_ICLASS_CMPSD_XMM, dst, src, i, sz::byte);
305 * Control-flow directives. Primitive labeling/patching facilities
306 * are available, as well as slightly higher-level ones via the
307 * Label class.
310 void jmp(Reg64 r) { xedInstrR(XED_ICLASS_JMP, r); }
311 void jmp(MemoryRef m) { xedInstrM(XED_ICLASS_JMP, m); }
312 void jmp(RIPRelativeRef m) { xedInstrM(XED_ICLASS_JMP, m); }
313 void call(Reg64 r) { xedInstrR(XED_ICLASS_CALL_NEAR, r); }
314 void call(MemoryRef m) { xedInstrM(XED_ICLASS_CALL_NEAR, m); }
315 void call(RIPRelativeRef m) { xedInstrM(XED_ICLASS_CALL_NEAR, m); }
317 void jmp8(CodeAddress dest) { xedInstrRelBr(XED_ICLASS_JMP,
318 dest, sz::byte); }
320 void jmp(CodeAddress dest) {
321 xedInstrRelBr(XED_ICLASS_JMP, dest, sz::dword);
324 void call(CodeAddress dest) {
325 xedInstrRelBr(XED_ICLASS_CALL_NEAR, dest, sz::dword);
328 void jcc(ConditionCode cond, CodeAddress dest) {
329 xedInstrRelBr(ccToXedJump(cond), dest, sz::dword);
332 void jcc8(ConditionCode cond, CodeAddress dest) {
333 xedInstrRelBr(ccToXedJump(cond), dest, sz::byte);
336 using X64AssemblerBase::call;
337 using X64AssemblerBase::jmp;
338 using X64AssemblerBase::jmp8;
339 using X64AssemblerBase::jcc;
340 using X64AssemblerBase::jcc8;
342 void setcc(int cc, Reg8 byteReg) {
343 xedInstrR(ccToXedSetCC(cc), byteReg);
346 void psllq(Immed i, RegXMM r) { xedInstrIR(XED_ICLASS_PSLLQ, i, r,
347 sz::byte); }
348 void psrlq(Immed i, RegXMM r) { xedInstrIR(XED_ICLASS_PSRLQ, i, r,
349 sz::byte); }
351 void movq_rx(Reg64 rsrc, RegXMM rdest) {
352 xedInstrRR(XED_ICLASS_MOVQ, rsrc, rdest);
354 void movq_xr(RegXMM rsrc, Reg64 rdest) {
355 xedInstrRR(XED_ICLASS_MOVQ, rsrc, rdest);
358 void addsd(RegXMM src, RegXMM srcdest) {
359 xedInstrRR(XED_ICLASS_ADDSD, srcdest, src);
361 void mulsd(RegXMM src, RegXMM srcdest) {
362 xedInstrRR(XED_ICLASS_MULSD, srcdest, src);
364 void subsd(RegXMM src, RegXMM srcdest) {
365 xedInstrRR(XED_ICLASS_SUBSD, srcdest, src);
367 void pxor(RegXMM src, RegXMM srcdest) {
368 xedInstrRR(XED_ICLASS_PXOR, srcdest, src);
370 void cvtsi2sd(Reg64 src, RegXMM dest) {
371 xedInstrRR(XED_ICLASS_CVTSI2SD, src, dest);
373 void cvtsi2sd(MemoryRef m, RegXMM dest) {
374 xedInstrMR(XED_ICLASS_CVTSI2SD, m, dest);
376 void ucomisd(RegXMM l, RegXMM r) {
377 xedInstrRR(XED_ICLASS_UCOMISD, l, r);
379 void sqrtsd(RegXMM src, RegXMM dest) {
380 xedInstrRR(XED_ICLASS_SQRTSD, dest, src);
383 void divsd(RegXMM src, RegXMM srcdest) {
384 xedInstrRR(XED_ICLASS_DIVSD, srcdest, src);
386 void cvttsd2siq(RegXMM src, Reg64 dest) {
387 xedInstrRR(XED_ICLASS_CVTTSD2SI, src, dest);
390 private:
391 // XED conditional jump conversion functions
392 #define CC_TO_XED_ARRAY(xed_instr) { \
393 XED_ICLASS_##xed_instr##O, /*CC_O */ \
394 XED_ICLASS_##xed_instr##NO, /*CC_NO */ \
395 XED_ICLASS_##xed_instr##B, /*CC_B, CC_NAE */ \
396 XED_ICLASS_##xed_instr##NB, /*CC_AE, CC_NB, CC_NC */ \
397 XED_ICLASS_##xed_instr##Z, /*CC_E, CC_Z */ \
398 XED_ICLASS_##xed_instr##NZ, /*CC_NE, CC_NZ */ \
399 XED_ICLASS_##xed_instr##BE, /*CC_BE, CC_NA */ \
400 XED_ICLASS_##xed_instr##NBE, /*CC_A, CC_NBE */ \
401 XED_ICLASS_##xed_instr##S, /*CC_S */ \
402 XED_ICLASS_##xed_instr##NS, /*CC_NS */ \
403 XED_ICLASS_##xed_instr##P, /*CC_P */ \
404 XED_ICLASS_##xed_instr##NP, /*CC_NP */ \
405 XED_ICLASS_##xed_instr##L, /*CC_L, CC_NGE */ \
406 XED_ICLASS_##xed_instr##NL, /*CC_GE, CC_NL */ \
407 XED_ICLASS_##xed_instr##LE, /*CC_LE, CC_NG */ \
408 XED_ICLASS_##xed_instr##NLE /*CC_G, CC_NLE */ \
411 ALWAYS_INLINE
412 xed_iclass_enum_t ccToXedJump(ConditionCode c) {
413 assertx(c != CC_None);
414 static const xed_iclass_enum_t jumps[] = CC_TO_XED_ARRAY(J);
415 return jumps[(int)c];
418 ALWAYS_INLINE
419 xed_iclass_enum_t ccToXedSetCC(int c) {
420 assertx(c != -1);
421 static const xed_iclass_enum_t setccs[] = CC_TO_XED_ARRAY(SET);
422 return setccs[c];
425 ALWAYS_INLINE
426 xed_iclass_enum_t ccToXedCMov(ConditionCode c) {
427 assertx(c != CC_None);
428 static const xed_iclass_enum_t cmovs[] = CC_TO_XED_ARRAY(CMOV);
429 return cmovs[(int)c];
432 // XED emit functions
434 template<typename F> ALWAYS_INLINE
435 uint32_t xedEmitImpl(xed_iclass_enum_t instr, CodeAddress destination,
436 F xedFunc) {
437 xed_encoder_instruction_t instruction;
438 xed_encoder_request_t request;
439 uint32_t encodedSize = 0;
440 xed_error_enum_t xedError;
441 xed_bool_t convert_ok;
443 xedFunc(&instruction);
445 xed_encoder_request_zero(&request);
446 convert_ok = xed_convert_to_encoder_request(&request, &instruction);
447 always_assert(convert_ok && "Unable to convert instruction"
448 " to encoder request");
449 xedError = xed_encode(&request, destination, XED_MAX_INSTRUCTION_BYTES,
450 &encodedSize);
451 always_assert_flog(xedError == XED_ERROR_NONE,
452 "XED: Error when encoding {}(): {}",
453 xed_iclass_enum_t2str(instr),
454 xed_error_enum_t2str(xedError));
455 return encodedSize;
458 ALWAYS_INLINE
459 uint32_t xedEmit(xed_iclass_enum_t instr, xed_uint_t effOperandSizeBits,
460 CodeAddress destination = nullptr) {
461 auto size = xedEmitImpl(instr, destination ? destination : dest(),
462 [&](xed_encoder_instruction_t* i) {
463 xed_inst0(i, kXedState, instr,
464 effOperandSizeBits);
466 if (!destination) codeBlock.moveFrontier(size);
467 return size;
470 ALWAYS_INLINE
471 uint32_t xedEmit(xed_iclass_enum_t instr,
472 const xed_encoder_operand_t& op,
473 xed_uint_t effOperandSizeBits = 0,
474 CodeAddress destination = nullptr) {
475 auto size = xedEmitImpl(instr, destination ? destination : dest(),
476 [&](xed_encoder_instruction_t* i) {
477 xed_inst1(i, kXedState, instr,
478 effOperandSizeBits, op);
480 if (!destination) codeBlock.moveFrontier(size);
481 return size;
484 ALWAYS_INLINE
485 uint32_t xedEmit(xed_iclass_enum_t instr,
486 const xed_encoder_operand_t& op_1,
487 const xed_encoder_operand_t& op_2,
488 xed_uint_t effOperandSizeBits = 0,
489 CodeAddress destination = nullptr) {
490 auto size = xedEmitImpl(instr, destination ? destination : dest(),
491 [&](xed_encoder_instruction_t* i) {
492 xed_inst2(i, kXedState, instr,
493 effOperandSizeBits, op_1, op_2);
495 if (!destination) codeBlock.moveFrontier(size);
496 return size;
499 ALWAYS_INLINE
500 uint32_t xedEmit(xed_iclass_enum_t instr,
501 const xed_encoder_operand_t& op_1,
502 const xed_encoder_operand_t& op_2,
503 const xed_encoder_operand_t& op_3,
504 xed_uint_t effOperandSizeBits = 0,
505 CodeAddress destination = nullptr) {
506 auto size = xedEmitImpl(instr, destination ? destination : dest(),
507 [&](xed_encoder_instruction_t* i) {
508 xed_inst3(i, kXedState, instr,
509 effOperandSizeBits,
510 op_1, op_2, op_3);
512 if (!destination) codeBlock.moveFrontier(size);
513 return size;
516 public:
517 static constexpr auto kInt3Size = sz::byte;
518 static constexpr auto kUd2Size = sz::word;
520 void emitInt3s(int n) {
521 if (n == 0) return;
522 static auto const instr = [&]{
523 uint8_t int3;
524 xedEmit(XED_ICLASS_INT3, sz::byte,
525 reinterpret_cast<CodeAddress>(&int3));
526 return int3;
527 }();
528 for (auto i = 0; i < n; ++i) {
529 byte(instr);
533 void emitNop(int n) {
534 if (n == 0) return;
535 static const xed_iclass_enum_t nops[] = {
536 XED_ICLASS_INVALID,
537 XED_ICLASS_NOP,
538 XED_ICLASS_NOP2,
539 XED_ICLASS_NOP3,
540 XED_ICLASS_NOP4,
541 XED_ICLASS_NOP5,
542 XED_ICLASS_NOP6,
543 XED_ICLASS_NOP7,
544 XED_ICLASS_NOP8,
545 XED_ICLASS_NOP9,
547 // While n >= 9, emit 9 byte NOPs
548 while (n >= 9) {
549 xedInstr(XED_ICLASS_NOP9, sz::nosize);
550 n -= 9;
552 // Emit remaining NOPs (if any)
553 if (n) {
554 xedInstr(nops[n], sz::nosize);
558 void pad() {
559 auto remaining = available();
560 if (remaining == 0) return;
561 static auto const instrs = [&]{
562 struct {
563 uint8_t int3;
564 uint16_t ud2;
565 } data;
566 xedEmit(XED_ICLASS_INT3, sz::nosize,
567 reinterpret_cast<CodeAddress>(&data.int3));
568 xedEmit(XED_ICLASS_UD2, sz::nosize,
569 reinterpret_cast<CodeAddress>(&data.ud2));
570 return data;
571 }();
572 while (remaining >= kUd2Size) {
573 word(instrs.ud2);
574 remaining -= kUd2Size;
576 while (remaining >= kInt3Size) {
577 byte(instrs.int3);
578 remaining -= kInt3Size;
582 ALWAYS_INLINE
583 XedAssembler& prefix(const MemoryRef& mr) {
584 return *this;
587 public:
589 * The following functions use a naming convention for an older API
590 * to the assembler; conditional loads and moves haven't yet been
591 * ported.
594 // CMOVcc [rbase + off], rdest
595 inline void cload_reg64_disp_reg64(ConditionCode cc, Reg64 rbase,
596 int off, Reg64 rdest) {
597 MemoryRef m(DispReg(rbase, off));
598 xedInstrMR(ccToXedCMov(cc), m, rdest);
600 inline void cload_reg64_disp_reg32(ConditionCode cc, Reg64 rbase,
601 int off, Reg32 rdest) {
602 MemoryRef m(DispReg(rbase, off));
603 xedInstrMR(ccToXedCMov(cc), m, rdest);
605 inline void cmov_reg64_reg64(ConditionCode cc, Reg64 rsrc, Reg64 rdest) {
606 xedInstrRR(ccToXedCMov(cc), rsrc, rdest);
609 private:
611 * The following section contains conversion methods that take a Reg8/32/64,
612 * RegXMM, MemoryRef, RipRelative struct and convert it to a
613 * xed_encoder_operand_t.
616 static constexpr int bytesToBits(int sz) {
617 return (sz << 3);
620 static constexpr int bitsToBytes(int bits) {
621 return (bits >> 3);
624 union XedImmValue {
625 int8_t b;
626 uint8_t ub;
627 int16_t w;
628 int32_t l;
629 int64_t q;
630 uint64_t uq;
632 template<typename immtype>
633 XedImmValue(const immtype& imm, int immSize) {
634 uq = 0;
635 switch (immSize) {
636 case sz::byte:
637 b = imm.b(); break;
638 case sz::word:
639 w = imm.w(); break;
640 case sz::dword:
641 l = imm.l(); break;
642 case sz::qword:
643 q = imm.q(); break;
648 xed_reg_enum_t xedFromReg(const Reg64& reg) {
649 return xed_reg_enum_t(int(reg) + XED_REG_RAX);
652 xed_reg_enum_t xedFromReg(const Reg32& reg) {
653 return xed_reg_enum_t(int(reg) + XED_REG_EAX);
656 xed_reg_enum_t xedFromReg(const Reg16& reg) {
657 return xed_reg_enum_t(int(reg) + XED_REG_AX);
660 xed_reg_enum_t xedFromReg(const Reg8& reg) {
661 auto regid = int(reg);
662 if ((regid & 0x80) == 0) {
663 return xed_reg_enum_t(regid + XED_REG_AL);
665 return xed_reg_enum_t((regid - 0x84) + XED_REG_AH);
668 xed_reg_enum_t xedFromReg(const RegXMM& reg) {
669 return xed_reg_enum_t(int(reg) + XED_REG_XMM0);
672 int getDisplSize(intptr_t value) {
673 if (value == 0) return sz::nosize;
674 return deltaFits(value, sz::byte) ? sz::byte : sz::dword;
677 xed_enc_displacement_t xedDispFromValue(intptr_t value, int size) {
678 switch (size) {
679 case sz::nosize: return {0, 0};
680 case sz::byte: return {(xed_uint64_t)safe_cast<int8_t>(value),
681 (xed_uint32_t)bytesToBits(size)};
682 default: return {(xed_uint64_t)safe_cast<int32_t>(value),
683 (xed_uint32_t)bytesToBits(size)};
687 xed_enc_displacement_t xedDispFromValue(intptr_t value) {
688 return xedDispFromValue(value, getDisplSize(value));
691 template<typename regtype>
692 xed_encoder_operand_t toXedOperand(const regtype& reg) {
693 return xed_reg(xedFromReg(reg));
696 xed_encoder_operand_t toXedOperand(xed_reg_enum_t reg) {
697 return xed_reg(reg);
700 xed_encoder_operand_t toXedOperand(const MemoryRef& m, int memSize) {
701 static const xed_reg_enum_t segmentRegs[] = {
702 XED_REG_INVALID, //Segment::DS (no segment register override)
703 XED_REG_FS, //Segment::FS
704 XED_REG_GS //Segment::GS
706 xed_reg_enum_t base = int(m.r.base) != -1 ?
707 xedFromReg(m.r.base) : XED_REG_INVALID;
708 xed_reg_enum_t index = int(m.r.index) != -1 ?
709 xedFromReg(m.r.index) : XED_REG_INVALID;
710 return xed_mem_gbisd(segmentRegs[int(m.segment)],
711 base, index, m.r.scale,
712 xedDispFromValue(m.r.disp), bytesToBits(memSize));
715 xed_encoder_operand_t toXedOperand(const RIPRelativeRef& r, int memSize) {
716 return xed_mem_bd(XED_REG_RIP, xedDispFromValue(r.r.disp, sz::dword),
717 bytesToBits(memSize));
720 xed_encoder_operand_t toXedOperand(CodeAddress address, int size) {
721 return xed_relbr(safe_cast<int32_t>((int64_t)address), bytesToBits(size));
724 template<typename immtype>
725 xed_encoder_operand_t toXedOperand(const immtype& immed, int immSizes) {
726 assert((immSizes != 0) &&
727 (immSizes & ~(sz::byte | sz::word | sz::dword | sz::qword)) == 0);
728 if ((immSizes & (immSizes - 1)) != 0) {
729 immSizes = reduceImmSize(immed.q(), immSizes);
731 return xed_imm0(XedImmValue(immed, immSizes).uq, bytesToBits(immSizes));
734 ALWAYS_INLINE
735 int reduceImmSize(int64_t value, int allowedSizes) {
736 while (allowedSizes) {
737 int crtSize = (allowedSizes & -allowedSizes);
738 if (crtSize == sz::qword || deltaFits(value, crtSize)) {
739 return crtSize;
741 allowedSizes ^= crtSize;
743 assertx(false && "Could not find an optimal size for Immed");
744 return sz::nosize;
748 * Cache sizes for instruction types in a certain xedInstr context.
749 * This helps with emitting instructions where you need to know in advance
750 * the length of the instruction being emitted (such as when one of
751 * the operands is a RIPRelativeRef) by caching the size of the instruction
752 * and removing the need to call xedEmit twice each time (once to get
753 * the size, and once to actually emit the instruction).
755 typedef tbb::concurrent_unordered_map<int32_t, uint32_t> XedLenCache;
757 template<typename F> ALWAYS_INLINE
758 uint32_t xedCacheLen(XedLenCache* lenCache, F xedFunc, uint32_t key) {
759 auto res = lenCache->find(key);
760 if (res != lenCache->end()) {
761 return res->second;
763 auto instrLen = xedFunc();
764 lenCache->insert({key, instrLen});
765 return instrLen;
768 static constexpr uint32_t xedLenCacheKey(xed_iclass_enum_t instr,
769 uint32_t size) {
770 // 16 bits should fit a xed_iclass_enum_t value (there are currently ~1560
771 // distinct values).
772 return uint32_t(instr) | (size << 16);
775 // XEDInstr* wrappers
777 #define XED_WRAP_IMPL() \
778 XED_WRAP_X(64) \
779 XED_WRAP_X(32) \
780 XED_WRAP_X(16) \
781 XED_WRAP_X(8)
783 // instr(reg)
785 #define XED_INSTR_WRAPPER_IMPL(bitsize) \
786 ALWAYS_INLINE \
787 void xedInstrR(xed_iclass_enum_t instr, const Reg##bitsize& r) { \
788 xedEmit(instr, toXedOperand(r), bitsize); \
791 #define XED_WRAP_X XED_INSTR_WRAPPER_IMPL
792 XED_WRAP_IMPL()
793 #undef XED_WRAP_X
795 // instr(imm, reg)
797 #define XED_INSTIR_WRAPPER_IMPL(bitsize) \
798 ALWAYS_INLINE \
799 void xedInstrIR(xed_iclass_enum_t instr, const Immed& i, \
800 const Reg##bitsize& r, \
801 int immSize = bitsToBytes(bitsize)) { \
802 xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize), \
803 bitsize); \
806 #define XED_WRAP_X XED_INSTIR_WRAPPER_IMPL
807 XED_WRAP_IMPL()
808 #undef XED_WRAP_X
810 ALWAYS_INLINE
811 void xedInstrIR(xed_iclass_enum_t instr, const Immed64& i, const Reg64& r) {
812 xedEmit(instr, toXedOperand(r), toXedOperand(i, sz::qword),
813 bytesToBits(sz::qword));
816 ALWAYS_INLINE
817 void xedInstrIR(xed_iclass_enum_t instr, const Immed64& i, const Reg64& r,
818 int immSize) {
819 xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize),
820 bytesToBits(sz::qword));
823 ALWAYS_INLINE
824 void xedInstrIR(xed_iclass_enum_t instr, const Immed& i,
825 const RegXMM& r, int immSize) {
826 xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize));
829 // instr(reg, reg)
831 #define XED_INSTRR_WRAPPER_IMPL(bitsize) \
832 ALWAYS_INLINE \
833 void xedInstrRR(xed_iclass_enum_t instr, const Reg##bitsize& r1, \
834 const Reg##bitsize& r2) { \
835 xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bitsize); \
838 #define XED_WRAP_X XED_INSTRR_WRAPPER_IMPL
839 XED_WRAP_IMPL()
840 #undef XED_WRAP_X
842 ALWAYS_INLINE
843 void xedInstrRR_CL(xed_iclass_enum_t instr, const Reg64& r) {
844 xedEmit(instr, toXedOperand(r), toXedOperand(XED_REG_CL),
845 bytesToBits(sz::qword));
848 ALWAYS_INLINE
849 void xedInstrRR(xed_iclass_enum_t instr, const Reg8& r1, const Reg32& r2,
850 int size = sz::dword) {
851 xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
854 ALWAYS_INLINE
855 void xedInstrRR(xed_iclass_enum_t instr, const Reg16& r1, const Reg32& r2,
856 int size = sz::dword) {
857 xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
860 ALWAYS_INLINE
861 void xedInstrRR(xed_iclass_enum_t instr, const Reg8& r1, const Reg64& r2,
862 int size = sz::qword) {
863 xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
866 ALWAYS_INLINE
867 void xedInstrRR(xed_iclass_enum_t instr, const Reg64& r1, const RegXMM& r2) {
868 xedEmit(instr, toXedOperand(r2), toXedOperand(r1));
871 ALWAYS_INLINE
872 void xedInstrRR(xed_iclass_enum_t instr, const RegXMM& r1, const Reg64& r2) {
873 xedEmit(instr, toXedOperand(r2), toXedOperand(r1));
876 // Most instr(xmm_1, xmm_2) instructions take operands in reverse order
877 // compared to instr(reg_1, reg_2): source and destination are swapped
879 ALWAYS_INLINE
880 void xedInstrRR(xed_iclass_enum_t instr, const RegXMM& r1, const RegXMM& r2) {
881 xedEmit(instr, toXedOperand(r1), toXedOperand(r2));
884 // instr(imm)
886 ALWAYS_INLINE
887 void xedInstrI(xed_iclass_enum_t instr, const Immed& i, int immSize) {
888 xed_encoder_operand_t op = toXedOperand(i, immSize);
889 xedEmit(instr, op, op.width_bits);
892 // instr(mem)
894 ALWAYS_INLINE
895 void xedInstrM(xed_iclass_enum_t instr, const MemoryRef& m,
896 int size = sz::qword) {
897 xedEmit(instr, toXedOperand(m, size), bytesToBits(size));
900 ALWAYS_INLINE
901 void xedInstrM(xed_iclass_enum_t instr, RIPRelativeRef m,
902 int size = sz::qword) {
903 static XedLenCache lenCache;
904 auto instrLen = xedCacheLen(
905 &lenCache,
906 [&]() {
907 return xedEmit(instr, toXedOperand(nullrip, size),
908 bytesToBits(size), dest());
909 }, xedLenCacheKey(instr, 0));
910 m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen);
911 xedEmit(instr, toXedOperand(m, size), bytesToBits(size));
914 // instr(imm, mem)
916 ALWAYS_INLINE
917 void xedInstrIM(xed_iclass_enum_t instr, const Immed& i, const MemoryRef& m,
918 int size = sz::qword) {
919 assert(size && (size & (size - 1)) == 0);
920 xedEmit(instr, toXedOperand(m, size), toXedOperand(i, size),
921 bytesToBits(size));
924 ALWAYS_INLINE
925 void xedInstrIM(xed_iclass_enum_t instr, const Immed& i, const MemoryRef& m,
926 int immSize, int memSize) {
927 xedEmit(instr, toXedOperand(m, memSize), toXedOperand(i, immSize),
928 bytesToBits(memSize));
931 // instr(mem, reg)
933 #define XED_INSTMR_WRAPPER_IMPL(bitsize) \
934 ALWAYS_INLINE \
935 void xedInstrMR(xed_iclass_enum_t instr, const MemoryRef& m, \
936 const Reg##bitsize& r, \
937 int memSize = bitsToBytes(bitsize)) { \
938 xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize), bitsize); \
941 ALWAYS_INLINE \
942 void xedInstrMR(xed_iclass_enum_t instr, RIPRelativeRef m, \
943 const Reg##bitsize& r) { \
944 static XedLenCache lenCache; \
945 auto instrLen = xedCacheLen( \
946 &lenCache, \
947 [&] { \
948 return xedEmit( \
949 instr, toXedOperand(r), \
950 toXedOperand(nullrip, \
951 bitsToBytes(bitsize)), \
952 bitsize, dest()); \
953 }, xedLenCacheKey(instr, 0)); \
954 m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen); \
955 xedEmit(instr, toXedOperand(r), \
956 toXedOperand(m, bitsToBytes(bitsize)), bitsize); \
959 #define XED_WRAP_X XED_INSTMR_WRAPPER_IMPL
960 XED_WRAP_IMPL()
961 #undef XED_WRAP_X
963 ALWAYS_INLINE
964 void xedInstrMR(xed_iclass_enum_t instr, const MemoryRef& m,
965 const RegXMM& r, int memSize = sz::qword) {
966 xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize));
969 ALWAYS_INLINE
970 void xedInstrMR(xed_iclass_enum_t instr, RIPRelativeRef m,
971 const RegXMM& r, int memSize = sz::qword) {
972 static XedLenCache lenCache;
973 auto instrLen = xedCacheLen(
974 &lenCache,
975 [&]() {
976 return xedEmit(
977 instr, toXedOperand(r),
978 toXedOperand(nullrip, memSize),
979 0, dest());
980 }, xedLenCacheKey(instr, 0));
981 m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen);
982 xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize));
985 // instr(reg, mem)
987 #define XED_INSTRM_WRAPPER_IMPL(bitsize) \
988 ALWAYS_INLINE \
989 void xedInstrRM(xed_iclass_enum_t instr, const Reg##bitsize& r, \
990 const MemoryRef& m) { \
991 xedEmit(instr, toXedOperand(m, bitsToBytes(bitsize)), \
992 toXedOperand(r), bitsize); \
995 #define XED_WRAP_X XED_INSTRM_WRAPPER_IMPL
996 XED_WRAP_IMPL()
997 #undef XED_WRAP_X
999 ALWAYS_INLINE
1000 void xedInstrRM(xed_iclass_enum_t instr, const RegXMM& r,
1001 const MemoryRef& m, int memSize = sz::qword) {
1002 xedEmit(instr, toXedOperand(m, memSize), toXedOperand(r));
1005 // instr(xmm, xmm, imm)
1007 ALWAYS_INLINE
1008 void xedInstrIRR(xed_iclass_enum_t instr, const RegXMM& r1, const RegXMM& r2,
1009 const Immed& i, int immSize) {
1010 xedEmit(instr, toXedOperand(r1), toXedOperand(r2),
1011 toXedOperand(i, immSize));
1014 // instr(relbr)
1016 void xedInstrRelBr(xed_iclass_enum_t instr,
1017 CodeAddress destination, int size) {
1018 static XedLenCache lenCache;
1019 auto instrLen = xedCacheLen(
1020 &lenCache,
1021 [&]() {
1022 return xedEmit(instr, toXedOperand((CodeAddress)0,
1023 size), 0, dest());
1024 }, xedLenCacheKey(instr, size));
1025 auto target = destination - (frontier() + instrLen);
1026 xedEmit(instr, toXedOperand((CodeAddress)target, size));
1029 // instr()
1031 ALWAYS_INLINE
1032 void xedInstr(xed_iclass_enum_t instr, int size = sz::qword) {
1033 xedEmit(instr, bytesToBits(size));