2 ** Definitions for x86 and x64 CPUs.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
6 #ifndef _LJ_TARGET_X86_H
7 #define _LJ_TARGET_X86_H
9 /* -- Registers IDs ------------------------------------------------------- */
13 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
14 _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
16 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
17 _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
20 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
27 #define RIDENUM(name) RID_##name,
30 GPRDEF(RIDENUM
) /* General-purpose registers (GPRs). */
31 FPRDEF(RIDENUM
) /* Floating-point registers (FPRs). */
33 RID_MRM
= RID_MAX
, /* Pseudo-id for ModRM operand. */
34 RID_RIP
= RID_MAX
+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
36 /* Calling conventions. */
45 /* These definitions must match with the *.dasc file(s): */
46 RID_BASE
= RID_EDX
, /* Interpreter BASE. */
47 #if LJ_64 && !LJ_ABI_WIN
48 RID_LPC
= RID_EBX
, /* Interpreter PC. */
49 RID_DISPATCH
= RID_R14D
, /* Interpreter DISPATCH table. */
51 RID_LPC
= RID_ESI
, /* Interpreter PC. */
52 RID_DISPATCH
= RID_EBX
, /* Interpreter DISPATCH table. */
55 /* Register ranges [min, max) and number of registers. */
56 RID_MIN_GPR
= RID_EAX
,
57 RID_MIN_FPR
= RID_XMM0
,
58 RID_MAX_GPR
= RID_MIN_FPR
,
59 RID_MAX_FPR
= RID_MAX
,
60 RID_NUM_GPR
= RID_MAX_GPR
- RID_MIN_GPR
,
61 RID_NUM_FPR
= RID_MAX_FPR
- RID_MIN_FPR
,
64 /* -- Register sets ------------------------------------------------------- */
66 /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
67 #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
69 - LJ_GC64*RID2RSET(RID_DISPATCH))
70 #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
71 #define RSET_ALL (RSET_GPR|RSET_FPR)
72 #define RSET_INIT RSET_ALL
75 /* Note: this requires the use of FORCE_REX! */
76 #define RSET_GPR8 RSET_GPR
78 #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1))
81 /* ABI-specific register sets. */
82 #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
85 /* Windows x64 ABI. */
86 #define RSET_SCRATCH \
87 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
89 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
90 #define REGARG_NUMGPR 4
91 #define REGARG_NUMFPR 4
92 #define REGARG_FIRSTFPR RID_XMM0
93 #define REGARG_LASTFPR RID_XMM3
94 #define STACKARG_OFS (4*8)
96 /* The rest of the civilized x64 world has a common ABI. */
97 #define RSET_SCRATCH \
98 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
100 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
101 <<5))<<5))<<5))<<5))<<5))
102 #define REGARG_NUMGPR 6
103 #define REGARG_NUMFPR 8
104 #define REGARG_FIRSTFPR RID_XMM0
105 #define REGARG_LASTFPR RID_XMM7
106 #define STACKARG_OFS 0
109 /* Common x86 ABI. */
110 #define RSET_SCRATCH (RSET_ACD|RSET_FPR)
111 #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
112 #define REGARG_NUMGPR 2 /* Fastcall only. */
113 #define REGARG_NUMFPR 0
114 #define STACKARG_OFS 0
118 /* Prefer the low 8 regs of each type to reduce REX prefixes. */
120 #define rset_picktop_(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
123 /* -- Spill slots --------------------------------------------------------- */
125 /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
127 ** SPS_FIXED: Available fixed spill slots in interpreter frame.
128 ** This definition must match with the *.dasc file(s).
130 ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
134 #define SPS_FIXED (4*2)
135 #define SPS_FIRST (4*2) /* Don't use callee register save area. */
151 #define sps_scale(slot) (4 * (int32_t)(slot))
152 #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
154 /* -- Exit state ---------------------------------------------------------- */
156 /* This definition must match with the *.dasc file(s). */
158 lua_Number fpr
[RID_NUM_FPR
]; /* Floating-point registers. */
159 intptr_t gpr
[RID_NUM_GPR
]; /* General-purpose registers. */
160 int32_t spill
[256]; /* Spill slots. */
163 /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
164 #define EXITSTUB_SPACING (2+2)
165 #define EXITSTUBS_PER_GROUP 32
167 #define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
169 /* -- x86 ModRM operand encoding ------------------------------------------ */
172 XM_OFS0
= 0x00, XM_OFS8
= 0x40, XM_OFS32
= 0x80, XM_REG
= 0xc0,
173 XM_SCALE1
= 0x00, XM_SCALE2
= 0x40, XM_SCALE4
= 0x80, XM_SCALE8
= 0xc0,
177 /* Structure to hold variable ModRM operand. */
179 int32_t ofs
; /* Offset. */
180 uint8_t base
; /* Base register or RID_NONE. */
181 uint8_t idx
; /* Index register or RID_NONE. */
182 uint8_t scale
; /* Index scale (XM_SCALE1 .. XM_SCALE8). */
185 /* -- Opcodes ------------------------------------------------------------- */
187 /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
188 #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24)))
189 #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
190 #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24)))
191 #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24)))
192 #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24)))
193 #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
194 #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
196 #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
197 #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
198 #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
199 #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
201 /* This list of x86 opcodes is not intended to be complete. Opcodes are only
202 ** included when needed. Take a look at DynASM or jit.dis_x86 to see the
206 /* Fixed length opcodes. XI_* prefix. */
213 XI_PUSH
= 0x50, /* Really 50+r. */
214 XI_JCCs
= 0x70, /* Really 7x. */
215 XI_JCCn
= 0x80, /* Really 0f8x. */
217 XI_MOVrib
= 0xb0, /* Really b0+r. */
218 XI_MOVri
= 0xb8, /* Really b8+r. */
229 /* Note: little-endian byte-order! */
232 XI_FDUP
= 0xc0d9, /* Really fld st0. */
233 XI_FPOP
= 0xd8dd, /* Really fstp st0. */
234 XI_FPOP1
= 0xd9dd, /* Really fstp st1. */
239 /* VEX-encoded instructions. XV_* prefix. */
240 XV_RORX
= XV_f20f3a(f0
),
241 XV_SARX
= XV_f30f38(f7
),
242 XV_SHLX
= XV_660f38(f7
),
243 XV_SHRX
= XV_f20f38(f7
),
245 /* Variable-length opcodes. XO_* prefix. */
249 XO_MOVtow
= XO_66(89),
254 XO_ARITHib
= XO_(80),
256 XO_ARITHi8
= XO_(83),
257 XO_ARITHiw8
= XO_66(83),
260 XO_SHIFTcl
= XO_(d3
),
267 XO_GROUP3b
= XO_(f6
),
269 XO_GROUP5b
= XO_(fe
),
271 XO_MOVZXb
= XO_0f(b6
),
272 XO_MOVZXw
= XO_0f(b7
),
273 XO_MOVSXb
= XO_0f(be
),
274 XO_MOVSXw
= XO_0f(bf
),
276 XO_BSWAP
= XO_0f(c8
),
279 XO_MOVSD
= XO_f20f(10),
280 XO_MOVSDto
= XO_f20f(11),
281 XO_MOVSS
= XO_f30f(10),
282 XO_MOVSSto
= XO_f30f(11),
283 XO_MOVLPD
= XO_660f(12),
284 XO_MOVAPS
= XO_0f(28),
285 XO_XORPS
= XO_0f(57),
286 XO_ANDPS
= XO_0f(54),
287 XO_ADDSD
= XO_f20f(58),
288 XO_SUBSD
= XO_f20f(5c
),
289 XO_MULSD
= XO_f20f(59),
290 XO_DIVSD
= XO_f20f(5e
),
291 XO_SQRTSD
= XO_f20f(51),
292 XO_MINSD
= XO_f20f(5d
),
293 XO_MAXSD
= XO_f20f(5f
),
294 XO_ROUNDSD
= 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
295 XO_UCOMISD
= XO_660f(2e
),
296 XO_CVTSI2SD
= XO_f20f(2a
),
297 XO_CVTTSD2SI
= XO_f20f(2c
),
298 XO_CVTSI2SS
= XO_f30f(2a
),
299 XO_CVTTSS2SI
= XO_f30f(2c
),
300 XO_CVTSS2SD
= XO_f30f(5a
),
301 XO_CVTSD2SS
= XO_f20f(5a
),
302 XO_ADDSS
= XO_f30f(58),
303 XO_MOVD
= XO_660f(6e
),
304 XO_MOVDto
= XO_660f(7e
),
306 XO_FLDd
= XO_(d9
), XOg_FLDd
= 0,
307 XO_FLDq
= XO_(dd
), XOg_FLDq
= 0,
308 XO_FILDd
= XO_(db
), XOg_FILDd
= 0,
309 XO_FILDq
= XO_(df
), XOg_FILDq
= 5,
310 XO_FSTPd
= XO_(d9
), XOg_FSTPd
= 3,
311 XO_FSTPq
= XO_(dd
), XOg_FSTPq
= 3,
312 XO_FISTPq
= XO_(df
), XOg_FISTPq
= 7,
313 XO_FISTTPq
= XO_(dd
), XOg_FISTTPq
= 1,
314 XO_FADDq
= XO_(dc
), XOg_FADDq
= 0,
315 XO_FLDCW
= XO_(d9
), XOg_FLDCW
= 5,
316 XO_FNSTCW
= XO_(d9
), XOg_FNSTCW
= 7
319 /* x86 opcode groups. */
320 typedef uint32_t x86Group
;
322 #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
323 #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
324 #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
325 #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
327 #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
328 #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27)))
331 XOg_ADD
, XOg_OR
, XOg_ADC
, XOg_SBB
, XOg_AND
, XOg_SUB
, XOg_XOR
, XOg_CMP
,
336 XOg_ROL
, XOg_ROR
, XOg_RCL
, XOg_RCR
, XOg_SHL
, XOg_SHR
, XOg_SAL
, XOg_SAR
340 XOg_TEST
, XOg_TEST_
, XOg_NOT
, XOg_NEG
, XOg_MUL
, XOg_IMUL
, XOg_DIV
, XOg_IDIV
344 XOg_INC
, XOg_DEC
, XOg_CALL
, XOg_CALLfar
, XOg_JMP
, XOg_JMPfar
, XOg_PUSH
347 /* x86 condition codes. */
349 CC_O
, CC_NO
, CC_B
, CC_NB
, CC_E
, CC_NE
, CC_BE
, CC_NBE
,
350 CC_S
, CC_NS
, CC_P
, CC_NP
, CC_L
, CC_NL
, CC_LE
, CC_NLE
,
351 CC_C
= CC_B
, CC_NAE
= CC_C
, CC_NC
= CC_NB
, CC_AE
= CC_NB
,
352 CC_Z
= CC_E
, CC_NZ
= CC_NE
, CC_NA
= CC_BE
, CC_A
= CC_NBE
,
353 CC_PE
= CC_P
, CC_PO
= CC_NP
, CC_NGE
= CC_L
, CC_GE
= CC_NL
,
354 CC_NG
= CC_LE
, CC_G
= CC_NLE