OSX/iOS: Fix SDK incompatibility.
[luajit-2.0.git] / src / lj_emit_x86.h
blobd215402c967b63d9b7adeed7bdef459547b39666
1 /*
2 ** x86/x64 instruction emitter.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4 */
6 /* -- Emit basic instructions --------------------------------------------- */
8 #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7)))
10 #if LJ_64
11 #define REXRB(p, rr, rb) \
12 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
13 if (rex != 0x40) *--(p) = rex; }
14 #define FORCE_REX 0x200
15 #define REX_64 (FORCE_REX|0x080000)
16 #define VEX_64 0x800000
17 #else
18 #define REXRB(p, rr, rb) ((void)0)
19 #define FORCE_REX 0
20 #define REX_64 0
21 #define VEX_64 0
22 #endif
23 #if LJ_GC64
24 #define REX_GC64 REX_64
25 #else
26 #define REX_GC64 0
27 #endif
29 #define emit_i8(as, i) (*--as->mcp = (MCode)(i))
30 #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
31 #define emit_u32(as, u) (*(uint32_t *)(as->mcp-4) = (u), as->mcp -= 4)
33 #define emit_x87op(as, xo) \
34 (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2)
36 /* op */
37 static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
38 MCode *p, int delta)
40 int n = (int8_t)xo;
41 if (n == -60) { /* VEX-encoded instruction */
42 #if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44 #endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
48 #if defined(__GNUC__) || defined(__clang__)
49 if (__builtin_constant_p(xo) && n == -2)
50 p[delta-2] = (MCode)(xo >> 24);
51 else if (__builtin_constant_p(xo) && n == -3)
52 *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16);
53 else
54 #endif
55 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
56 p += n + delta;
57 #if LJ_64
59 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
60 if (rex != 0x40) {
61 rex |= (rr >> 16);
62 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
63 else if ((xo & 0xffffff) == 0x6600fd) { *p = (MCode)rex; rex = 0x66; }
64 *--p = (MCode)rex;
67 #else
68 UNUSED(rr); UNUSED(rb); UNUSED(rx);
69 #endif
70 return p;
73 /* op + modrm */
74 #define emit_opm(xo, mode, rr, rb, p, delta) \
75 (p[(delta)-1] = MODRM((mode), (rr), (rb)), \
76 emit_op((xo), (rr), (rb), 0, (p), (delta)))
78 /* op + modrm + sib */
79 #define emit_opmx(xo, mode, scale, rr, rb, rx, p) \
80 (p[-1] = MODRM((scale), (rx), (rb)), \
81 p[-2] = MODRM((mode), (rr), RID_ESP), \
82 emit_op((xo), (rr), (rb), (rx), (p), -1))
84 /* op r1, r2 */
85 static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
87 MCode *p = as->mcp;
88 as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0);
91 #if LJ_64 && defined(LUA_USE_ASSERT)
92 /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
93 static int32_t ptr2addr(const void *p)
95 lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
96 return i32ptr(p);
98 #else
99 #define ptr2addr(p) (i32ptr((p)))
100 #endif
102 /* op r, [base+ofs] */
103 static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
105 MCode *p = as->mcp;
106 x86Mode mode;
107 if (ra_hasreg(rb)) {
108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
113 mode = XM_OFS0;
114 } else if (checki8(ofs)) {
115 *--p = (MCode)ofs;
116 mode = XM_OFS8;
117 } else {
118 p -= 4;
119 *(int32_t *)p = ofs;
120 mode = XM_OFS32;
122 if ((rb&7) == RID_ESP)
123 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
124 } else {
125 *(int32_t *)(p-4) = ofs;
126 #if LJ_64
127 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
128 p -= 5;
129 rb = RID_ESP;
130 #else
131 p -= 4;
132 rb = RID_EBP;
133 #endif
134 mode = XM_OFS0;
136 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
139 /* op r, [base+idx*scale+ofs] */
140 static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
141 x86Mode scale, int32_t ofs)
143 MCode *p = as->mcp;
144 x86Mode mode;
145 if (ofs == 0 && (rb&7) != RID_EBP) {
146 mode = XM_OFS0;
147 } else if (checki8(ofs)) {
148 mode = XM_OFS8;
149 *--p = (MCode)ofs;
150 } else {
151 mode = XM_OFS32;
152 p -= 4;
153 *(int32_t *)p = ofs;
155 as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p);
158 /* op r, i */
159 static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
161 MCode *p = as->mcp;
162 x86Op xo;
163 if (checki8(i)) {
164 *--p = (MCode)i;
165 xo = XG_TOXOi8(xg);
166 } else {
167 p -= 4;
168 *(int32_t *)p = i;
169 xo = XG_TOXOi(xg);
171 as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
174 /* op [base+ofs], i */
175 static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
176 int32_t i)
178 x86Op xo;
179 if (checki8(i)) {
180 emit_i8(as, i);
181 xo = XG_TOXOi8(xg);
182 } else {
183 emit_i32(as, i);
184 xo = XG_TOXOi(xg);
186 emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
189 #define emit_shifti(as, xg, r, i) \
190 (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r)))
192 /* op r, rm/mrm */
193 static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
195 MCode *p = as->mcp;
196 x86Mode mode = XM_REG;
197 if (rb == RID_MRM) {
198 rb = as->mrm.base;
199 if (rb == RID_NONE) {
200 rb = RID_EBP;
201 mode = XM_OFS0;
202 p -= 4;
203 *(int32_t *)p = as->mrm.ofs;
204 if (as->mrm.idx != RID_NONE)
205 goto mrmidx;
206 #if LJ_64
207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
208 rb = RID_ESP;
209 #endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
215 } else {
216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
217 mode = XM_OFS0;
218 } else if (checki8(as->mrm.ofs)) {
219 *--p = (MCode)as->mrm.ofs;
220 mode = XM_OFS8;
221 } else {
222 p -= 4;
223 *(int32_t *)p = as->mrm.ofs;
224 mode = XM_OFS32;
226 if (as->mrm.idx != RID_NONE) {
227 mrmidx:
228 as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p);
229 return;
231 if ((rb&7) == RID_ESP)
232 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
235 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
238 /* op rm/mrm, i */
239 static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 x86Op xo;
242 if (checki8(i)) {
243 emit_i8(as, i);
244 xo = XG_TOXOi8(xg);
245 } else {
246 emit_i32(as, i);
247 xo = XG_TOXOi(xg);
249 emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64));
252 /* -- Emit loads/stores --------------------------------------------------- */
254 /* mov [base+ofs], i */
255 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
257 emit_i32(as, i);
258 emit_rmro(as, XO_MOVmi, 0, base, ofs);
261 /* mov [base+ofs], r */
262 #define emit_movtomro(as, r, base, ofs) \
263 emit_rmro(as, XO_MOVto, (r), (base), (ofs))
265 /* Get/set global_State fields. */
266 #define emit_opgl(as, xo, r, field) \
267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
268 #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
269 #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
271 #define emit_setvmstate(as, i) \
272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
274 /* mov r, i / xor r, r */
275 static void emit_loadi(ASMState *as, Reg r, int32_t i)
277 /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */
278 if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
279 (as->curins+1 < as->T->nins &&
280 IR(as->curins+1)->o == IR_HIOP))) &&
281 !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) ||
282 (*as->mcp & 0xf0) == XI_JCCs)) {
283 emit_rr(as, XO_ARITH(XOg_XOR), r, r);
284 } else {
285 MCode *p = as->mcp;
286 *(int32_t *)(p-4) = i;
287 p[-5] = (MCode)(XI_MOVri+(r&7));
288 p -= 5;
289 REXRB(p, 0, r);
290 as->mcp = p;
294 #if LJ_GC64
295 #define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297 #define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299 #define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301 /* mov r, addr */
302 #define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304 #else
305 /* mov r, addr */
306 #define emit_loada(as, r, addr) \
307 emit_loadi(as, (r), ptr2addr((addr)))
308 #endif
310 #if LJ_64
311 /* mov r, imm64 or shorter 32 bit extended load. */
312 static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
314 if (checku32(u64)) { /* 32 bit load clears upper 32 bits. */
315 emit_loadi(as, r, (int32_t)u64);
316 } else if (checki32((int64_t)u64)) { /* Sign-extended 32 bit load. */
317 MCode *p = as->mcp;
318 *(int32_t *)(p-4) = (int32_t)u64;
319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320 #if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328 #endif
329 } else { /* Full-size 64 bit load. */
330 MCode *p = as->mcp;
331 *(uint64_t *)(p-8) = u64;
332 p[-9] = (MCode)(XI_MOVri+(r&7));
333 p[-10] = 0x48 + ((r>>3)&1);
334 p -= 10;
335 as->mcp = p;
338 #endif
340 /* op r, [addr] */
341 static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
343 #if LJ_GC64
344 if (checki32(dispofs(as, addr))) {
345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370 #endif
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374 #if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377 #else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379 #endif
383 /* Load 64 bit IR constant into register. */
384 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398 #if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
405 "bad interned 64 bit constant");
406 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
407 emit_loadu64(as, r, *k);
408 return;
409 } else {
410 /* If all else fails, add the FP constant at the MCode area bottom. */
411 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
412 *(uint64_t *)as->mcbot = *k;
413 ir->i = (int32_t)(as->mctop - as->mcbot);
414 as->mcbot += 8;
415 as->mclim = as->mcbot + MCLIM_REDZONE;
416 lj_mcode_commitbot(as->J, as->mcbot);
418 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
419 #else
420 } else {
421 emit_rma(as, xo, r64, k);
422 #endif
426 /* -- Emit control-flow instructions -------------------------------------- */
428 /* Label for short jumps. */
429 typedef MCode *MCLabel;
431 #if LJ_32 && LJ_HASFFI
432 /* jmp short target */
433 static void emit_sjmp(ASMState *as, MCLabel target)
435 MCode *p = as->mcp;
436 ptrdiff_t delta = target - p;
437 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
438 p[-1] = (MCode)(int8_t)delta;
439 p[-2] = XI_JMPs;
440 as->mcp = p - 2;
442 #endif
444 /* jcc short target */
445 static void emit_sjcc(ASMState *as, int cc, MCLabel target)
447 MCode *p = as->mcp;
448 ptrdiff_t delta = target - p;
449 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
450 p[-1] = (MCode)(int8_t)delta;
451 p[-2] = (MCode)(XI_JCCs+(cc&15));
452 as->mcp = p - 2;
455 /* jcc short (pending target) */
456 static MCLabel emit_sjcc_label(ASMState *as, int cc)
458 MCode *p = as->mcp;
459 p[-1] = 0;
460 p[-2] = (MCode)(XI_JCCs+(cc&15));
461 as->mcp = p - 2;
462 return p;
465 /* Fixup jcc short target. */
466 static void emit_sfixup(ASMState *as, MCLabel source)
468 source[-1] = (MCode)(as->mcp-source);
471 /* Return label pointing to current PC. */
472 #define emit_label(as) ((as)->mcp)
474 /* Compute relative 32 bit offset for jump and call instructions. */
475 static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
477 ptrdiff_t delta = target - p;
478 UNUSED(J);
479 lj_assertJ(delta == (int32_t)delta, "jump target out of range");
480 return (int32_t)delta;
483 /* jcc target */
484 static void emit_jcc(ASMState *as, int cc, MCode *target)
486 MCode *p = as->mcp;
487 *(int32_t *)(p-4) = jmprel(as->J, p, target);
488 p[-5] = (MCode)(XI_JCCn+(cc&15));
489 p[-6] = 0x0f;
490 as->mcp = p - 6;
493 /* jmp target */
494 static void emit_jmp(ASMState *as, MCode *target)
496 MCode *p = as->mcp;
497 *(int32_t *)(p-4) = jmprel(as->J, p, target);
498 p[-5] = XI_JMP;
499 as->mcp = p - 5;
502 /* call target */
503 static void emit_call_(ASMState *as, MCode *target)
505 MCode *p = as->mcp;
506 #if LJ_64
507 if (target-p != (int32_t)(target-p)) {
508 /* Assumes RID_RET is never an argument to calls and always clobbered. */
509 emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
510 emit_loadu64(as, RID_RET, (uint64_t)target);
511 return;
513 #endif
514 *(int32_t *)(p-4) = jmprel(as->J, p, target);
515 p[-5] = XI_CALL;
516 as->mcp = p - 5;
519 #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
521 /* -- Emit generic operations --------------------------------------------- */
523 /* Use 64 bit operations to handle 64 bit IR types. */
524 #if LJ_64
525 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
526 #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
527 #else
528 #define REX_64IR(ir, r) (r)
529 #define VEX_64IR(ir, r) (r)
530 #endif
532 /* Generic move between two regs. */
533 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
535 UNUSED(ir);
536 if (dst < RID_MAX_GPR)
537 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
538 else
539 emit_rr(as, XO_MOVAPS, dst, src);
542 /* Generic load of register with base and (small) offset address. */
543 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
545 if (r < RID_MAX_GPR)
546 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
547 else
548 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
551 /* Generic store of register with base and (small) offset address. */
552 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
554 if (r < RID_MAX_GPR)
555 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
556 else
557 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
560 /* Add offset to pointer. */
561 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
563 if (ofs) {
564 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
568 #define emit_spsub(as, ofs) emit_addptr(as, RID_ESP|REX_64, -(ofs))
570 /* Prefer rematerialization of BASE/L from global_State over spills. */
571 #define emit_canremat(ref) ((ref) <= REF_BASE)