2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 2018 Intel Corporation |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
20 #include <xed-interface.h>
23 #include <tbb/concurrent_unordered_map.h>
26 * A macro assembler for x64, based on the Intel XED library, that strives
27 * for low coupling to the runtime environment and ease of extendability.
38 ///////////////////////////////////////////////////////////////////////////////
40 struct XedAssembler final
: public X64AssemblerBase
{
42 static constexpr xed_state_t kXedState
= {
43 XED_MACHINE_MODE_LONG_64
,
47 CodeAddress
dest() const {
48 codeBlock
.assertCanEmit(XED_MAX_INSTRUCTION_BYTES
);
49 return codeBlock
.toDestAddress(codeBlock
.frontier());
52 static constexpr auto nullrip
= RIPRelativeRef(DispRIP(0));
55 explicit XedAssembler(CodeBlock
& cb
) : X64AssemblerBase(cb
) {}
57 XedAssembler(const XedAssembler
&) = delete;
58 XedAssembler
& operator=(const XedAssembler
&) = delete;
60 * The following section defines the main interface for emitting
65 * a. movq (rax, rbx); // order is AT&T: src, dest
66 * a. loadq (*rax, rbx); // loads from *rax
67 * a. loadq (rax[0], rbx); // also loads from *rax
68 * a. storeq (rcx, rax[0xc]); // store to rax + 0xc
69 * a. addq (0x1, rbx); // increment rbx
71 * Addressing with index registers:
73 * a. movl (index, ecx);
74 * a. loadq (*rax, rbx);
75 * a. storeq (rbx, rbx[rcx*8]);
76 * a. call (rax); // indirect call
80 #define BYTE_LOAD_OP(name, instr) \
81 void name##b(MemoryRef m, Reg8 r) { xedInstrMR(instr, m, r); }
83 #define LOAD_OP(name, instr) \
84 void name##q(MemoryRef m, Reg64 r) { xedInstrMR(instr, m, r); } \
85 void name##l(MemoryRef m, Reg32 r) { xedInstrMR(instr, m, r); } \
86 void name##w(MemoryRef m, Reg16 r) { xedInstrMR(instr, m, r); } \
87 void name##q(RIPRelativeRef m, Reg64 r) { xedInstrMR(instr, m, r); }\
88 BYTE_LOAD_OP(name, instr)
90 #define BYTE_STORE_OP(name, instr) \
91 void name##b(Reg8 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
92 void name##b(Immed i, MemoryRef m) { xedInstrIM(instr, i, m, \
95 #define STORE_OP(name, instr) \
96 void name##w(Immed i, MemoryRef m) { \
97 xedInstrIM(instr, i, m, IMMPROP(sz::word, \
98 sz::word | sz::byte), sz::word); \
100 void name##l(Immed i, MemoryRef m) { \
101 xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
102 sz::dword | sz::byte), sz::dword);\
104 void name##w(Reg16 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
105 void name##l(Reg32 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
106 void name##q(Reg64 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
107 BYTE_STORE_OP(name, instr)
109 #define BYTE_REG_OP(name, instr) \
110 void name##b(Reg8 r1, Reg8 r2) { xedInstrRR(instr, r1, r2);} \
111 void name##b(Immed i, Reg8 r) { xedInstrIR(instr, i, r); }
113 #define REG_OP(name, instr) \
114 void name##q(Reg64 r1, Reg64 r2) { xedInstrRR(instr, r1, r2); } \
115 void name##l(Reg32 r1, Reg32 r2) { xedInstrRR(instr, r1, r2); } \
116 void name##w(Reg16 r1, Reg16 r2) { xedInstrRR(instr, r1, r2); } \
117 void name##l(Immed i, Reg32 r) { \
118 xedInstrIR(instr, i, r, IMMPROP(sz::dword, \
119 sz::dword | sz::byte)); \
121 void name##w(Immed i, Reg16 r) { \
122 xedInstrIR(instr, i, r, IMMPROP(sz::word, \
123 sz::word | sz::byte)); \
125 BYTE_REG_OP(name, instr)
127 #define IMM64_STORE_OP(name, instr) \
128 void name##q(Immed i, MemoryRef m) { \
129 xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
130 sz::dword | sz::byte), sz::qword);\
133 #define IMM64R_OP(name, instr) \
134 void name##q(Immed imm, Reg64 r) { \
135 always_assert(imm.fits(sz::dword)); \
136 xedInstrIR(instr, imm, r, IMMPROP(sz::dword, \
137 sz::dword | sz::byte)); \
140 #define FULL_OP(name, instr) \
141 LOAD_OP(name, instr) \
142 STORE_OP(name, instr) \
143 REG_OP(name, instr) \
144 IMM64_STORE_OP(name, instr) \
145 IMM64R_OP(name, instr)
147 // We rename x64's mov to store and load for improved code
149 #define IMMPROP(size, allsizes) size
150 LOAD_OP (load
, XED_ICLASS_MOV
)
151 STORE_OP (store
,XED_ICLASS_MOV
)
152 IMM64_STORE_OP (store
,XED_ICLASS_MOV
)
153 REG_OP (mov
, XED_ICLASS_MOV
)
154 FULL_OP (test
, XED_ICLASS_TEST
)
157 #define IMMPROP(size, allsizes) allsizes
158 FULL_OP(add
, XED_ICLASS_ADD
)
159 FULL_OP(xor, XED_ICLASS_XOR
)
160 FULL_OP(sub
, XED_ICLASS_SUB
)
161 FULL_OP(and, XED_ICLASS_AND
)
162 FULL_OP(or, XED_ICLASS_OR
)
163 FULL_OP(cmp
, XED_ICLASS_CMP
)
164 FULL_OP(sbb
, XED_ICLASS_SBB
)
175 #undef IMM64_STORE_OP
177 // 64-bit immediates work with mov to a register.
178 void movq(Immed64 imm
, Reg64 r
) { xedInstrIR(XED_ICLASS_MOV
, imm
, r
,
179 sz::qword
| sz::dword
); }
181 // movzbx is a special snowflake. We don't have movzbq because it behaves
182 // exactly the same as movzbl but takes an extra byte.
183 void loadzbl(MemoryRef m
, Reg32 r
) { xedInstrMR(XED_ICLASS_MOVZX
,
185 void loadzwl(MemoryRef m
, Reg32 r
) { xedInstrMR(XED_ICLASS_MOVZX
,
187 void movzbl(Reg8 src
, Reg32 dest
) { xedInstrRR(XED_ICLASS_MOVZX
,
189 void movsbl(Reg8 src
, Reg32 dest
) { xedInstrRR(XED_ICLASS_MOVSX
,
191 void movzwl(Reg16 src
, Reg32 dest
) { xedInstrRR(XED_ICLASS_MOVZX
,
194 void loadsbq(MemoryRef m
, Reg64 r
) { xedInstrMR(XED_ICLASS_MOVSX
,
196 void movsbq(Reg8 src
, Reg64 dest
) { xedInstrRR(XED_ICLASS_MOVSX
,
198 void crc32q(Reg64 src
, Reg64 dest
) { xedInstrRR(XED_ICLASS_CRC32
,
201 void lea(MemoryRef p
, Reg64 reg
) { xedInstrMR(XED_ICLASS_LEA
, p
, reg
); }
202 void lea(RIPRelativeRef p
, Reg64 reg
) { xedInstrMR(XED_ICLASS_LEA
, p
, reg
); }
204 void xchgq(Reg64 r1
, Reg64 r2
) { xedInstrRR(XED_ICLASS_XCHG
, r1
, r2
); }
205 void xchgl(Reg32 r1
, Reg32 r2
) { xedInstrRR(XED_ICLASS_XCHG
, r1
, r2
); }
206 void xchgb(Reg8 r1
, Reg8 r2
) { xedInstrRR(XED_ICLASS_XCHG
, r1
, r2
); }
208 void imul(Reg64 r1
, Reg64 r2
) { xedInstrRR(XED_ICLASS_IMUL
, r1
, r2
); }
210 void push(Reg64 r
) { xedInstrR(XED_ICLASS_PUSH
, r
); }
211 void pushl(Reg32 r
) { xedInstrR(XED_ICLASS_PUSH
, r
); }
212 void pop (Reg64 r
) { xedInstrR(XED_ICLASS_POP
, r
); }
213 void idiv(Reg64 r
) { xedInstrR(XED_ICLASS_IDIV
, r
); }
214 void incq(Reg64 r
) { xedInstrR(XED_ICLASS_INC
, r
); }
215 void incl(Reg32 r
) { xedInstrR(XED_ICLASS_INC
, r
); }
216 void incw(Reg16 r
) { xedInstrR(XED_ICLASS_INC
, r
); }
217 void decq(Reg64 r
) { xedInstrR(XED_ICLASS_DEC
, r
); }
218 void decl(Reg32 r
) { xedInstrR(XED_ICLASS_DEC
, r
); }
219 void decw(Reg16 r
) { xedInstrR(XED_ICLASS_DEC
, r
); }
220 void notb(Reg8 r
) { xedInstrR(XED_ICLASS_NOT
, r
); }
221 void not(Reg64 r
) { xedInstrR(XED_ICLASS_NOT
, r
); }
222 void neg(Reg64 r
) { xedInstrR(XED_ICLASS_NEG
, r
); }
223 void negb(Reg8 r
) { xedInstrR(XED_ICLASS_NEG
, r
); }
224 void ret() { xedInstr(XED_ICLASS_RET_NEAR
); }
225 void ret(Immed i
) { xedInstrI(XED_ICLASS_IRET
, i
,
227 void cqo() { xedInstr(XED_ICLASS_CQO
); }
228 void nop() { xedInstr(XED_ICLASS_NOP
, sz::byte
); }
229 void int3() { xedInstr(XED_ICLASS_INT3
, sz::byte
); }
230 void ud2() { xedInstr(XED_ICLASS_UD2
, sz::byte
); }
231 void pushf() { xedInstr(XED_ICLASS_PUSHF
, sz::word
); }
232 void popf() { xedInstr(XED_ICLASS_POPF
, sz::word
); }
233 void lock() { always_assert(false); }
235 void push(MemoryRef m
) { xedInstrM(XED_ICLASS_PUSH
, m
); }
236 void pop (MemoryRef m
) { xedInstrM(XED_ICLASS_POP
, m
); }
237 void prefetch(MemoryRef m
) { xedInstrM(XED_ICLASS_PREFETCHT1
, m
); }
238 void incq(MemoryRef m
) { xedInstrM(XED_ICLASS_INC
, m
); }
239 void incl(MemoryRef m
) { xedInstrM(XED_ICLASS_INC
, m
, sz::dword
); }
240 void incw(MemoryRef m
) { xedInstrM(XED_ICLASS_INC
, m
, sz::word
); }
241 void decqlock(MemoryRef m
) { xedInstrM(XED_ICLASS_DEC_LOCK
, m
); }
242 void decq(MemoryRef m
) { xedInstrM(XED_ICLASS_DEC
, m
); }
243 void decl(MemoryRef m
) { xedInstrM(XED_ICLASS_DEC
, m
, sz::dword
); }
244 void decw(MemoryRef m
) { xedInstrM(XED_ICLASS_DEC
, m
, sz::word
); }
246 //special case for push(imm)
247 void push(Immed64 i
) {
248 xed_encoder_operand_t op
= toXedOperand(i
, sz::byte
| sz::word
| sz::dword
);
249 xedEmit(XED_ICLASS_PUSH
, op
, op
.width_bits
< 32 ? 16 : 64);
252 void movups(RegXMM x
, MemoryRef m
) { xedInstrRM(XED_ICLASS_MOVUPS
,
253 x
, m
, sz::qword
* 2); }
254 void movups(MemoryRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_MOVUPS
,
255 m
, x
, sz::qword
* 2); }
256 void movdqu(RegXMM x
, MemoryRef m
) { xedInstrRM(XED_ICLASS_MOVDQU
,
258 void movdqu(MemoryRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_MOVDQU
,
260 void movdqa(RegXMM x
, RegXMM y
) { xedInstrRR(XED_ICLASS_MOVDQA
,
262 void movdqa(RegXMM x
, MemoryRef m
) { xedInstrRM(XED_ICLASS_MOVDQA
,
264 void movdqa(MemoryRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_MOVDQA
,
266 void movsd (RegXMM x
, RegXMM y
) { xedInstrRR(XED_ICLASS_MOVSD_XMM
,
268 void movsd (RegXMM x
, MemoryRef m
) { xedInstrRM(XED_ICLASS_MOVSD_XMM
,
270 void movsd (MemoryRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_MOVSD_XMM
,
272 void movsd (RIPRelativeRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_MOVSD_XMM
,
274 void lddqu (MemoryRef m
, RegXMM x
) { xedInstrMR(XED_ICLASS_LDDQU
,
276 void unpcklpd(RegXMM s
, RegXMM d
) { xedInstrRR(XED_ICLASS_UNPCKLPD
,
279 void rorq (Immed i
, Reg64 r
) { xedInstrIR(XED_ICLASS_ROR
, i
, r
, sz::byte
); }
280 void shlq (Immed i
, Reg64 r
) { xedInstrIR(XED_ICLASS_SHL
, i
, r
, sz::byte
); }
281 void shrq (Immed i
, Reg64 r
) { xedInstrIR(XED_ICLASS_SHR
, i
, r
, sz::byte
); }
282 void sarq (Immed i
, Reg64 r
) { xedInstrIR(XED_ICLASS_SAR
, i
, r
, sz::byte
); }
283 void shll (Immed i
, Reg32 r
) { xedInstrIR(XED_ICLASS_SHL
, i
, r
, sz::byte
); }
284 void shrl (Immed i
, Reg32 r
) { xedInstrIR(XED_ICLASS_SHR
, i
, r
, sz::byte
); }
285 void shlw (Immed i
, Reg16 r
) { xedInstrIR(XED_ICLASS_SHL
, i
, r
, sz::byte
); }
286 void shrw (Immed i
, Reg16 r
) { xedInstrIR(XED_ICLASS_SHR
, i
, r
, sz::byte
); }
288 void shlq (Reg64 r
) { xedInstrRR_CL(XED_ICLASS_SHL
, r
); }
289 void shrq (Reg64 r
) { xedInstrRR_CL(XED_ICLASS_SHR
, r
); }
290 void sarq (Reg64 r
) { xedInstrRR_CL(XED_ICLASS_SAR
, r
); }
292 void btrq (Immed i
, Reg64 r
) { xedInstrIR(XED_ICLASS_BTR
, i
, r
, sz::byte
); }
294 void roundsd (RoundDirection d
, RegXMM src
, RegXMM dst
) {
296 xedInstrIRR(XED_ICLASS_ROUNDSD
, dst
, src
, i
, sz::byte
);
299 void cmpsd(RegXMM src
, RegXMM dst
, ComparisonPred pred
) {
301 xedInstrIRR(XED_ICLASS_CMPSD_XMM
, dst
, src
, i
, sz::byte
);
305 * Control-flow directives. Primitive labeling/patching facilities
306 * are available, as well as slightly higher-level ones via the
310 void jmp(Reg64 r
) { xedInstrR(XED_ICLASS_JMP
, r
); }
311 void jmp(MemoryRef m
) { xedInstrM(XED_ICLASS_JMP
, m
); }
312 void jmp(RIPRelativeRef m
) { xedInstrM(XED_ICLASS_JMP
, m
); }
313 void call(Reg64 r
) { xedInstrR(XED_ICLASS_CALL_NEAR
, r
); }
314 void call(MemoryRef m
) { xedInstrM(XED_ICLASS_CALL_NEAR
, m
); }
315 void call(RIPRelativeRef m
) { xedInstrM(XED_ICLASS_CALL_NEAR
, m
); }
317 void jmp8(CodeAddress dest
) { xedInstrRelBr(XED_ICLASS_JMP
,
320 void jmp(CodeAddress dest
) {
321 xedInstrRelBr(XED_ICLASS_JMP
, dest
, sz::dword
);
324 void call(CodeAddress dest
) {
325 xedInstrRelBr(XED_ICLASS_CALL_NEAR
, dest
, sz::dword
);
328 void jcc(ConditionCode cond
, CodeAddress dest
) {
329 xedInstrRelBr(ccToXedJump(cond
), dest
, sz::dword
);
332 void jcc8(ConditionCode cond
, CodeAddress dest
) {
333 xedInstrRelBr(ccToXedJump(cond
), dest
, sz::byte
);
336 using X64AssemblerBase::call
;
337 using X64AssemblerBase::jmp
;
338 using X64AssemblerBase::jmp8
;
339 using X64AssemblerBase::jcc
;
340 using X64AssemblerBase::jcc8
;
342 void setcc(int cc
, Reg8 byteReg
) {
343 xedInstrR(ccToXedSetCC(cc
), byteReg
);
346 void psllq(Immed i
, RegXMM r
) { xedInstrIR(XED_ICLASS_PSLLQ
, i
, r
,
348 void psrlq(Immed i
, RegXMM r
) { xedInstrIR(XED_ICLASS_PSRLQ
, i
, r
,
351 void movq_rx(Reg64 rsrc
, RegXMM rdest
) {
352 xedInstrRR(XED_ICLASS_MOVQ
, rsrc
, rdest
);
354 void movq_xr(RegXMM rsrc
, Reg64 rdest
) {
355 xedInstrRR(XED_ICLASS_MOVQ
, rsrc
, rdest
);
358 void addsd(RegXMM src
, RegXMM srcdest
) {
359 xedInstrRR(XED_ICLASS_ADDSD
, srcdest
, src
);
361 void mulsd(RegXMM src
, RegXMM srcdest
) {
362 xedInstrRR(XED_ICLASS_MULSD
, srcdest
, src
);
364 void subsd(RegXMM src
, RegXMM srcdest
) {
365 xedInstrRR(XED_ICLASS_SUBSD
, srcdest
, src
);
367 void pxor(RegXMM src
, RegXMM srcdest
) {
368 xedInstrRR(XED_ICLASS_PXOR
, srcdest
, src
);
370 void cvtsi2sd(Reg64 src
, RegXMM dest
) {
371 xedInstrRR(XED_ICLASS_CVTSI2SD
, src
, dest
);
373 void cvtsi2sd(MemoryRef m
, RegXMM dest
) {
374 xedInstrMR(XED_ICLASS_CVTSI2SD
, m
, dest
);
376 void ucomisd(RegXMM l
, RegXMM r
) {
377 xedInstrRR(XED_ICLASS_UCOMISD
, l
, r
);
379 void sqrtsd(RegXMM src
, RegXMM dest
) {
380 xedInstrRR(XED_ICLASS_SQRTSD
, dest
, src
);
383 void divsd(RegXMM src
, RegXMM srcdest
) {
384 xedInstrRR(XED_ICLASS_DIVSD
, srcdest
, src
);
386 void cvttsd2siq(RegXMM src
, Reg64 dest
) {
387 xedInstrRR(XED_ICLASS_CVTTSD2SI
, src
, dest
);
391 // XED conditional jump conversion functions
392 #define CC_TO_XED_ARRAY(xed_instr) { \
393 XED_ICLASS_##xed_instr##O, /*CC_O */ \
394 XED_ICLASS_##xed_instr##NO, /*CC_NO */ \
395 XED_ICLASS_##xed_instr##B, /*CC_B, CC_NAE */ \
396 XED_ICLASS_##xed_instr##NB, /*CC_AE, CC_NB, CC_NC */ \
397 XED_ICLASS_##xed_instr##Z, /*CC_E, CC_Z */ \
398 XED_ICLASS_##xed_instr##NZ, /*CC_NE, CC_NZ */ \
399 XED_ICLASS_##xed_instr##BE, /*CC_BE, CC_NA */ \
400 XED_ICLASS_##xed_instr##NBE, /*CC_A, CC_NBE */ \
401 XED_ICLASS_##xed_instr##S, /*CC_S */ \
402 XED_ICLASS_##xed_instr##NS, /*CC_NS */ \
403 XED_ICLASS_##xed_instr##P, /*CC_P */ \
404 XED_ICLASS_##xed_instr##NP, /*CC_NP */ \
405 XED_ICLASS_##xed_instr##L, /*CC_L, CC_NGE */ \
406 XED_ICLASS_##xed_instr##NL, /*CC_GE, CC_NL */ \
407 XED_ICLASS_##xed_instr##LE, /*CC_LE, CC_NG */ \
408 XED_ICLASS_##xed_instr##NLE /*CC_G, CC_NLE */ \
412 xed_iclass_enum_t
ccToXedJump(ConditionCode c
) {
413 assertx(c
!= CC_None
);
414 static const xed_iclass_enum_t jumps
[] = CC_TO_XED_ARRAY(J
);
415 return jumps
[(int)c
];
419 xed_iclass_enum_t
ccToXedSetCC(int c
) {
421 static const xed_iclass_enum_t setccs
[] = CC_TO_XED_ARRAY(SET
);
426 xed_iclass_enum_t
ccToXedCMov(ConditionCode c
) {
427 assertx(c
!= CC_None
);
428 static const xed_iclass_enum_t cmovs
[] = CC_TO_XED_ARRAY(CMOV
);
429 return cmovs
[(int)c
];
432 // XED emit functions
434 template<typename F
> ALWAYS_INLINE
435 uint32_t xedEmitImpl(xed_iclass_enum_t instr
, CodeAddress destination
,
437 xed_encoder_instruction_t instruction
;
438 xed_encoder_request_t request
;
439 uint32_t encodedSize
= 0;
440 xed_error_enum_t xedError
;
441 xed_bool_t convert_ok
;
443 xedFunc(&instruction
);
445 xed_encoder_request_zero(&request
);
446 convert_ok
= xed_convert_to_encoder_request(&request
, &instruction
);
447 always_assert(convert_ok
&& "Unable to convert instruction"
448 " to encoder request");
449 xedError
= xed_encode(&request
, destination
, XED_MAX_INSTRUCTION_BYTES
,
451 always_assert_flog(xedError
== XED_ERROR_NONE
,
452 "XED: Error when encoding {}(): {}",
453 xed_iclass_enum_t2str(instr
),
454 xed_error_enum_t2str(xedError
));
459 uint32_t xedEmit(xed_iclass_enum_t instr
, xed_uint_t effOperandSizeBits
,
460 CodeAddress destination
= nullptr) {
461 auto size
= xedEmitImpl(instr
, destination
? destination
: dest(),
462 [&](xed_encoder_instruction_t
* i
) {
463 xed_inst0(i
, kXedState
, instr
,
466 if (!destination
) codeBlock
.moveFrontier(size
);
471 uint32_t xedEmit(xed_iclass_enum_t instr
,
472 const xed_encoder_operand_t
& op
,
473 xed_uint_t effOperandSizeBits
= 0,
474 CodeAddress destination
= nullptr) {
475 auto size
= xedEmitImpl(instr
, destination
? destination
: dest(),
476 [&](xed_encoder_instruction_t
* i
) {
477 xed_inst1(i
, kXedState
, instr
,
478 effOperandSizeBits
, op
);
480 if (!destination
) codeBlock
.moveFrontier(size
);
485 uint32_t xedEmit(xed_iclass_enum_t instr
,
486 const xed_encoder_operand_t
& op_1
,
487 const xed_encoder_operand_t
& op_2
,
488 xed_uint_t effOperandSizeBits
= 0,
489 CodeAddress destination
= nullptr) {
490 auto size
= xedEmitImpl(instr
, destination
? destination
: dest(),
491 [&](xed_encoder_instruction_t
* i
) {
492 xed_inst2(i
, kXedState
, instr
,
493 effOperandSizeBits
, op_1
, op_2
);
495 if (!destination
) codeBlock
.moveFrontier(size
);
500 uint32_t xedEmit(xed_iclass_enum_t instr
,
501 const xed_encoder_operand_t
& op_1
,
502 const xed_encoder_operand_t
& op_2
,
503 const xed_encoder_operand_t
& op_3
,
504 xed_uint_t effOperandSizeBits
= 0,
505 CodeAddress destination
= nullptr) {
506 auto size
= xedEmitImpl(instr
, destination
? destination
: dest(),
507 [&](xed_encoder_instruction_t
* i
) {
508 xed_inst3(i
, kXedState
, instr
,
512 if (!destination
) codeBlock
.moveFrontier(size
);
517 static constexpr auto kInt3Size
= sz::byte
;
518 static constexpr auto kUd2Size
= sz::word
;
520 void emitInt3s(int n
) {
522 static auto const instr
= [&]{
524 xedEmit(XED_ICLASS_INT3
, sz::byte
,
525 reinterpret_cast<CodeAddress
>(&int3
));
528 for (auto i
= 0; i
< n
; ++i
) {
533 void emitNop(int n
) {
535 static const xed_iclass_enum_t nops
[] = {
547 // While n >= 9, emit 9 byte NOPs
549 xedInstr(XED_ICLASS_NOP9
, sz::nosize
);
552 // Emit remaining NOPs (if any)
554 xedInstr(nops
[n
], sz::nosize
);
559 auto remaining
= available();
560 if (remaining
== 0) return;
561 static auto const instrs
= [&]{
566 xedEmit(XED_ICLASS_INT3
, sz::nosize
,
567 reinterpret_cast<CodeAddress
>(&data
.int3
));
568 xedEmit(XED_ICLASS_UD2
, sz::nosize
,
569 reinterpret_cast<CodeAddress
>(&data
.ud2
));
572 while (remaining
>= kUd2Size
) {
574 remaining
-= kUd2Size
;
576 while (remaining
>= kInt3Size
) {
578 remaining
-= kInt3Size
;
583 XedAssembler
& prefix(const MemoryRef
& mr
) {
589 * The following functions use a naming convention for an older API
590 * to the assembler; conditional loads and moves haven't yet been
594 // CMOVcc [rbase + off], rdest
595 inline void cload_reg64_disp_reg64(ConditionCode cc
, Reg64 rbase
,
596 int off
, Reg64 rdest
) {
597 MemoryRef
m(DispReg(rbase
, off
));
598 xedInstrMR(ccToXedCMov(cc
), m
, rdest
);
600 inline void cload_reg64_disp_reg32(ConditionCode cc
, Reg64 rbase
,
601 int off
, Reg32 rdest
) {
602 MemoryRef
m(DispReg(rbase
, off
));
603 xedInstrMR(ccToXedCMov(cc
), m
, rdest
);
605 inline void cmov_reg64_reg64(ConditionCode cc
, Reg64 rsrc
, Reg64 rdest
) {
606 xedInstrRR(ccToXedCMov(cc
), rsrc
, rdest
);
611 * The following section contains conversion methods that take a Reg8/32/64,
612 * RegXMM, MemoryRef, RipRelative struct and convert it to a
613 * xed_encoder_operand_t.
616 static constexpr int bytesToBits(int sz
) {
620 static constexpr int bitsToBytes(int bits
) {
632 template<typename immtype
>
633 XedImmValue(const immtype
& imm
, int immSize
) {
648 xed_reg_enum_t
xedFromReg(const Reg64
& reg
) {
649 return xed_reg_enum_t(int(reg
) + XED_REG_RAX
);
652 xed_reg_enum_t
xedFromReg(const Reg32
& reg
) {
653 return xed_reg_enum_t(int(reg
) + XED_REG_EAX
);
656 xed_reg_enum_t
xedFromReg(const Reg16
& reg
) {
657 return xed_reg_enum_t(int(reg
) + XED_REG_AX
);
660 xed_reg_enum_t
xedFromReg(const Reg8
& reg
) {
661 auto regid
= int(reg
);
662 if ((regid
& 0x80) == 0) {
663 return xed_reg_enum_t(regid
+ XED_REG_AL
);
665 return xed_reg_enum_t((regid
- 0x84) + XED_REG_AH
);
668 xed_reg_enum_t
xedFromReg(const RegXMM
& reg
) {
669 return xed_reg_enum_t(int(reg
) + XED_REG_XMM0
);
672 int getDisplSize(intptr_t value
) {
673 if (value
== 0) return sz::nosize
;
674 return deltaFits(value
, sz::byte
) ? sz::byte
: sz::dword
;
677 xed_enc_displacement_t
xedDispFromValue(intptr_t value
, int size
) {
679 case sz::nosize
: return {0, 0};
680 case sz::byte
: return {(xed_uint64_t
)safe_cast
<int8_t>(value
),
681 (xed_uint32_t
)bytesToBits(size
)};
682 default: return {(xed_uint64_t
)safe_cast
<int32_t>(value
),
683 (xed_uint32_t
)bytesToBits(size
)};
687 xed_enc_displacement_t
xedDispFromValue(intptr_t value
) {
688 return xedDispFromValue(value
, getDisplSize(value
));
691 template<typename regtype
>
692 xed_encoder_operand_t
toXedOperand(const regtype
& reg
) {
693 return xed_reg(xedFromReg(reg
));
696 xed_encoder_operand_t
toXedOperand(xed_reg_enum_t reg
) {
700 xed_encoder_operand_t
toXedOperand(const MemoryRef
& m
, int memSize
) {
701 static const xed_reg_enum_t segmentRegs
[] = {
702 XED_REG_INVALID
, //Segment::DS (no segment register override)
703 XED_REG_FS
, //Segment::FS
704 XED_REG_GS
//Segment::GS
706 xed_reg_enum_t base
= int(m
.r
.base
) != -1 ?
707 xedFromReg(m
.r
.base
) : XED_REG_INVALID
;
708 xed_reg_enum_t index
= int(m
.r
.index
) != -1 ?
709 xedFromReg(m
.r
.index
) : XED_REG_INVALID
;
710 return xed_mem_gbisd(segmentRegs
[int(m
.segment
)],
711 base
, index
, m
.r
.scale
,
712 xedDispFromValue(m
.r
.disp
), bytesToBits(memSize
));
715 xed_encoder_operand_t
toXedOperand(const RIPRelativeRef
& r
, int memSize
) {
716 return xed_mem_bd(XED_REG_RIP
, xedDispFromValue(r
.r
.disp
, sz::dword
),
717 bytesToBits(memSize
));
720 xed_encoder_operand_t
toXedOperand(CodeAddress address
, int size
) {
721 return xed_relbr(safe_cast
<int32_t>((int64_t)address
), bytesToBits(size
));
724 template<typename immtype
>
725 xed_encoder_operand_t
toXedOperand(const immtype
& immed
, int immSizes
) {
726 assert((immSizes
!= 0) &&
727 (immSizes
& ~(sz::byte
| sz::word
| sz::dword
| sz::qword
)) == 0);
728 if ((immSizes
& (immSizes
- 1)) != 0) {
729 immSizes
= reduceImmSize(immed
.q(), immSizes
);
731 return xed_imm0(XedImmValue(immed
, immSizes
).uq
, bytesToBits(immSizes
));
735 int reduceImmSize(int64_t value
, int allowedSizes
) {
736 while (allowedSizes
) {
737 int crtSize
= (allowedSizes
& -allowedSizes
);
738 if (crtSize
== sz::qword
|| deltaFits(value
, crtSize
)) {
741 allowedSizes
^= crtSize
;
743 assertx(false && "Could not find an optimal size for Immed");
748 * Cache sizes for instruction types in a certain xedInstr context.
749 * This helps with emitting instructions where you need to know in advance
750 * the length of the instruction being emitted (such as when one of
751 * the operands is a RIPRelativeRef) by caching the size of the instruction
752 * and removing the need to call xedEmit twice each time (once to get
753 * the size, and once to actually emit the instruction).
755 typedef tbb::concurrent_unordered_map
<int32_t, uint32_t> XedLenCache
;
757 template<typename F
> ALWAYS_INLINE
758 uint32_t xedCacheLen(XedLenCache
* lenCache
, F xedFunc
, uint32_t key
) {
759 auto res
= lenCache
->find(key
);
760 if (res
!= lenCache
->end()) {
763 auto instrLen
= xedFunc();
764 lenCache
->insert({key
, instrLen
});
768 static constexpr uint32_t xedLenCacheKey(xed_iclass_enum_t instr
,
770 // 16 bits should fit a xed_iclass_enum_t value (there are currently ~1560
772 return uint32_t(instr
) | (size
<< 16);
775 // XEDInstr* wrappers
777 #define XED_WRAP_IMPL() \
785 #define XED_INSTR_WRAPPER_IMPL(bitsize) \
787 void xedInstrR(xed_iclass_enum_t instr, const Reg##bitsize& r) { \
788 xedEmit(instr, toXedOperand(r), bitsize); \
791 #define XED_WRAP_X XED_INSTR_WRAPPER_IMPL
797 #define XED_INSTIR_WRAPPER_IMPL(bitsize) \
799 void xedInstrIR(xed_iclass_enum_t instr, const Immed& i, \
800 const Reg##bitsize& r, \
801 int immSize = bitsToBytes(bitsize)) { \
802 xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize), \
806 #define XED_WRAP_X XED_INSTIR_WRAPPER_IMPL
811 void xedInstrIR(xed_iclass_enum_t instr
, const Immed64
& i
, const Reg64
& r
) {
812 xedEmit(instr
, toXedOperand(r
), toXedOperand(i
, sz::qword
),
813 bytesToBits(sz::qword
));
817 void xedInstrIR(xed_iclass_enum_t instr
, const Immed64
& i
, const Reg64
& r
,
819 xedEmit(instr
, toXedOperand(r
), toXedOperand(i
, immSize
),
820 bytesToBits(sz::qword
));
824 void xedInstrIR(xed_iclass_enum_t instr
, const Immed
& i
,
825 const RegXMM
& r
, int immSize
) {
826 xedEmit(instr
, toXedOperand(r
), toXedOperand(i
, immSize
));
831 #define XED_INSTRR_WRAPPER_IMPL(bitsize) \
833 void xedInstrRR(xed_iclass_enum_t instr, const Reg##bitsize& r1, \
834 const Reg##bitsize& r2) { \
835 xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bitsize); \
838 #define XED_WRAP_X XED_INSTRR_WRAPPER_IMPL
843 void xedInstrRR_CL(xed_iclass_enum_t instr
, const Reg64
& r
) {
844 xedEmit(instr
, toXedOperand(r
), toXedOperand(XED_REG_CL
),
845 bytesToBits(sz::qword
));
849 void xedInstrRR(xed_iclass_enum_t instr
, const Reg8
& r1
, const Reg32
& r2
,
850 int size
= sz::dword
) {
851 xedEmit(instr
, toXedOperand(r2
), toXedOperand(r1
), bytesToBits(size
));
855 void xedInstrRR(xed_iclass_enum_t instr
, const Reg16
& r1
, const Reg32
& r2
,
856 int size
= sz::dword
) {
857 xedEmit(instr
, toXedOperand(r2
), toXedOperand(r1
), bytesToBits(size
));
861 void xedInstrRR(xed_iclass_enum_t instr
, const Reg8
& r1
, const Reg64
& r2
,
862 int size
= sz::qword
) {
863 xedEmit(instr
, toXedOperand(r2
), toXedOperand(r1
), bytesToBits(size
));
867 void xedInstrRR(xed_iclass_enum_t instr
, const Reg64
& r1
, const RegXMM
& r2
) {
868 xedEmit(instr
, toXedOperand(r2
), toXedOperand(r1
));
872 void xedInstrRR(xed_iclass_enum_t instr
, const RegXMM
& r1
, const Reg64
& r2
) {
873 xedEmit(instr
, toXedOperand(r2
), toXedOperand(r1
));
876 // Most instr(xmm_1, xmm_2) instructions take operands in reverse order
877 // compared to instr(reg_1, reg_2): source and destination are swapped
880 void xedInstrRR(xed_iclass_enum_t instr
, const RegXMM
& r1
, const RegXMM
& r2
) {
881 xedEmit(instr
, toXedOperand(r1
), toXedOperand(r2
));
887 void xedInstrI(xed_iclass_enum_t instr
, const Immed
& i
, int immSize
) {
888 xed_encoder_operand_t op
= toXedOperand(i
, immSize
);
889 xedEmit(instr
, op
, op
.width_bits
);
895 void xedInstrM(xed_iclass_enum_t instr
, const MemoryRef
& m
,
896 int size
= sz::qword
) {
897 xedEmit(instr
, toXedOperand(m
, size
), bytesToBits(size
));
901 void xedInstrM(xed_iclass_enum_t instr
, RIPRelativeRef m
,
902 int size
= sz::qword
) {
903 static XedLenCache lenCache
;
904 auto instrLen
= xedCacheLen(
907 return xedEmit(instr
, toXedOperand(nullrip
, size
),
908 bytesToBits(size
), dest());
909 }, xedLenCacheKey(instr
, 0));
910 m
.r
.disp
-= ((int64_t)frontier() + (int64_t)instrLen
);
911 xedEmit(instr
, toXedOperand(m
, size
), bytesToBits(size
));
917 void xedInstrIM(xed_iclass_enum_t instr
, const Immed
& i
, const MemoryRef
& m
,
918 int size
= sz::qword
) {
919 assert(size
&& (size
& (size
- 1)) == 0);
920 xedEmit(instr
, toXedOperand(m
, size
), toXedOperand(i
, size
),
925 void xedInstrIM(xed_iclass_enum_t instr
, const Immed
& i
, const MemoryRef
& m
,
926 int immSize
, int memSize
) {
927 xedEmit(instr
, toXedOperand(m
, memSize
), toXedOperand(i
, immSize
),
928 bytesToBits(memSize
));
933 #define XED_INSTMR_WRAPPER_IMPL(bitsize) \
935 void xedInstrMR(xed_iclass_enum_t instr, const MemoryRef& m, \
936 const Reg##bitsize& r, \
937 int memSize = bitsToBytes(bitsize)) { \
938 xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize), bitsize); \
942 void xedInstrMR(xed_iclass_enum_t instr, RIPRelativeRef m, \
943 const Reg##bitsize& r) { \
944 static XedLenCache lenCache; \
945 auto instrLen = xedCacheLen( \
949 instr, toXedOperand(r), \
950 toXedOperand(nullrip, \
951 bitsToBytes(bitsize)), \
953 }, xedLenCacheKey(instr, 0)); \
954 m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen); \
955 xedEmit(instr, toXedOperand(r), \
956 toXedOperand(m, bitsToBytes(bitsize)), bitsize); \
959 #define XED_WRAP_X XED_INSTMR_WRAPPER_IMPL
964 void xedInstrMR(xed_iclass_enum_t instr
, const MemoryRef
& m
,
965 const RegXMM
& r
, int memSize
= sz::qword
) {
966 xedEmit(instr
, toXedOperand(r
), toXedOperand(m
, memSize
));
970 void xedInstrMR(xed_iclass_enum_t instr
, RIPRelativeRef m
,
971 const RegXMM
& r
, int memSize
= sz::qword
) {
972 static XedLenCache lenCache
;
973 auto instrLen
= xedCacheLen(
977 instr
, toXedOperand(r
),
978 toXedOperand(nullrip
, memSize
),
980 }, xedLenCacheKey(instr
, 0));
981 m
.r
.disp
-= ((int64_t)frontier() + (int64_t)instrLen
);
982 xedEmit(instr
, toXedOperand(r
), toXedOperand(m
, memSize
));
987 #define XED_INSTRM_WRAPPER_IMPL(bitsize) \
989 void xedInstrRM(xed_iclass_enum_t instr, const Reg##bitsize& r, \
990 const MemoryRef& m) { \
991 xedEmit(instr, toXedOperand(m, bitsToBytes(bitsize)), \
992 toXedOperand(r), bitsize); \
995 #define XED_WRAP_X XED_INSTRM_WRAPPER_IMPL
1000 void xedInstrRM(xed_iclass_enum_t instr
, const RegXMM
& r
,
1001 const MemoryRef
& m
, int memSize
= sz::qword
) {
1002 xedEmit(instr
, toXedOperand(m
, memSize
), toXedOperand(r
));
1005 // instr(xmm, xmm, imm)
1008 void xedInstrIRR(xed_iclass_enum_t instr
, const RegXMM
& r1
, const RegXMM
& r2
,
1009 const Immed
& i
, int immSize
) {
1010 xedEmit(instr
, toXedOperand(r1
), toXedOperand(r2
),
1011 toXedOperand(i
, immSize
));
1016 void xedInstrRelBr(xed_iclass_enum_t instr
,
1017 CodeAddress destination
, int size
) {
1018 static XedLenCache lenCache
;
1019 auto instrLen
= xedCacheLen(
1022 return xedEmit(instr
, toXedOperand((CodeAddress
)0,
1024 }, xedLenCacheKey(instr
, size
));
1025 auto target
= destination
- (frontier() + instrLen
);
1026 xedEmit(instr
, toXedOperand((CodeAddress
)target
, size
));
1032 void xedInstr(xed_iclass_enum_t instr
, int size
= sz::qword
) {
1033 xedEmit(instr
, bytesToBits(size
));