OSX/iOS: Fix SDK incompatibility.
[luajit-2.0.git] / src / lj_emit_arm64.h
blob51d0c351eae6b7d68d6a8b27c329d086a4459e57
1 /*
2 ** ARM64 instruction emitter.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4 **
5 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6 ** Sponsored by Cisco Systems, Inc.
7 */
9 /* -- Constant encoding --------------------------------------------------- */
11 static uint64_t get_k64val(ASMState *as, IRRef ref)
13 IRIns *ir = IR(ref);
14 if (ir->o == IR_KINT64) {
15 return ir_kint64(ir)->u64;
16 } else if (ir->o == IR_KGC) {
17 return (uint64_t)ir_kgc(ir);
18 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
19 return (uint64_t)ir_kptr(ir);
20 } else {
21 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
22 "bad 64 bit const IR op %d", ir->o);
23 return (uint32_t)ir->i; /* Zero-extended. */
27 /* Encode constant in K12 format for data processing instructions. */
28 static uint32_t emit_isk12(int64_t n)
30 uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
31 uint32_t m = n < 0 ? 0x40000000 : 0;
32 if (k < 0x1000) {
33 return (uint32_t)(A64I_K12|m|A64F_U12(k));
34 } else if ((k & 0xfff000) == k) {
35 return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
37 return 0;
40 #define emit_clz64(n) (lj_fls64(n)^63)
41 #define emit_ctz64(n) lj_ffs64(n)
43 /* Encode constant in K13 format for logical data processing instructions. */
44 static uint32_t emit_isk13(uint64_t n, int is64)
46 /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */
47 int rot, ones, size, immr, imms;
48 if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n;
49 if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */
50 rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64;
51 n = lj_ror(n, rot & 63);
52 ones = emit_ctz64(~n);
53 size = emit_clz64(n) + ones;
54 if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */
55 immr = -rot & (size - 1);
56 imms = (-(size << 1) | (ones - 1)) & 63;
57 return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms);
60 static uint32_t emit_isfpk64(uint64_t n)
62 uint64_t etop9 = ((n >> 54) & 0x1ff);
63 if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
64 return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
66 return ~0u;
69 /* -- Emit basic instructions --------------------------------------------- */
71 static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
73 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
76 static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
78 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
81 static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
83 *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
86 static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
88 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
91 static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
93 *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
96 static void emit_d(ASMState *as, A64Ins ai, Reg rd)
98 *--as->mcp = ai | A64F_D(rd);
101 static void emit_n(ASMState *as, A64Ins ai, Reg rn)
103 *--as->mcp = ai | A64F_N(rn);
106 static int emit_checkofs(A64Ins ai, int64_t ofs)
108 int scale = (ai >> 30) & 3;
109 if (ofs < 0 || (ofs & ((1<<scale)-1))) {
110 return (ofs >= -256 && ofs <= 255) ? -1 : 0;
111 } else {
112 return (ofs < (4096<<scale)) ? 1 : 0;
116 static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
118 if (ofs >= 0) {
119 return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */
120 } else if (ofs >= -256) {
121 return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff);
122 } else {
123 return A64F_D(31); /* Will mismatch prev. */
127 static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
129 int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
130 lj_assertA(ot, "load/store offset %d out of range", ofs);
131 /* Combine LDR/STR pairs to LDP/STP. */
132 if ((sc == 2 || sc == 3) &&
133 (!(ai & 0x400000) || rd != rn) &&
134 as->mcp != as->mcloop) {
135 uint32_t prev = *as->mcp & ~A64F_D(31);
136 int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
137 A64Ins aip;
138 if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) {
139 aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
140 } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) {
141 aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
142 ofsm = ofs;
143 } else {
144 goto nopair;
146 if (lj_ror((unsigned int)ofsm + (64u<<sc), sc) <= 127u) {
147 *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) |
148 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
149 return;
152 nopair:
153 if (ot == 1)
154 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
155 else
156 *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
159 /* -- Emit loads/stores --------------------------------------------------- */
161 /* Prefer rematerialization of BASE/L from global_State over spills. */
162 #define emit_canremat(ref) ((ref) <= REF_BASE)
164 /* Try to find a one-step delta relative to other consts. */
165 static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
167 RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
168 while (work) {
169 Reg r = rset_picktop(work);
170 IRRef ref = regcost_ref(as->cost[r]);
171 lj_assertA(r != rd, "dest reg %d not free", rd);
172 if (ref < REF_TRUE) {
173 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
174 get_k64val(as, ref);
175 int64_t delta = (int64_t)(k - kx);
176 if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */
177 if (delta == 0) {
178 emit_dm(as, is64|A64I_MOVw, rd, r);
179 return 1;
180 } else {
181 uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
182 if (k12) {
183 emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
184 return 1;
186 /* Do other ops or multi-step deltas pay off? Probably not.
187 ** E.g. XOR rarely helps with pointer consts.
191 rset_clear(work, r);
193 return 0; /* Failed. */
196 #define glofs(as, k) \
197 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
198 #define mcpofs(as, k) \
199 ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
200 #define checkmcpofs(as, k) \
201 (A64F_S_OK(mcpofs(as, k)>>2, 19))
203 /* Try to form a const as ADR or ADRP or ADRP + ADD. */
204 static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
206 A64Ins ai = A64I_ADR;
207 int64_t ofs = mcpofs(as, k);
208 if (!A64F_S_OK((uint64_t)ofs, 21)) {
209 uint64_t kpage = k & ~0xfffull;
210 MCode *adrp = as->mcp - 1 - (k != kpage);
211 ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
212 if (!A64F_S_OK(ofs, 21))
213 return 0; /* Failed. */
214 if (k != kpage)
215 emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
216 ai = A64I_ADRP;
218 emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd);
219 return 1;
222 static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
224 int zeros = 0, ones = 0, neg, lshift = 0;
225 int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
226 /* Count non-homogeneous 16 bit fragments. */
227 while (--i >= 0) {
228 uint32_t frag = (u64 >> i*16) & 0xffff;
229 zeros += (frag != 0);
230 ones += (frag != 0xffff);
232 neg = ones < zeros; /* Use MOVN if it pays off. */
233 if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */
234 uint32_t k13 = emit_isk13(u64, is64);
235 if (k13) {
236 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
237 return;
239 if (emit_kdelta(as, rd, u64, is64)) {
240 return;
242 if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */
243 return;
246 if (neg) {
247 u64 = ~u64;
248 if (!is64) u64 = (uint32_t)u64;
250 if (u64) {
251 /* Find first/last fragment to be filled. */
252 int shift = (63-emit_clz64(u64)) & ~15;
253 lshift = emit_ctz64(u64) & ~15;
254 for (; shift > lshift; shift -= 16) {
255 uint32_t frag = (u64 >> shift) & 0xffff;
256 if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
257 if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
258 emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
261 /* But MOVN needs an inverted value. */
262 emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
263 A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
266 /* Load a 32 bit constant into a GPR. */
267 #define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i)
269 /* Load a 64 bit constant into a GPR. */
270 #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
272 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
274 /* Get/set from constant pointer. */
275 static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
277 Reg base = RID_GL;
278 int64_t ofs = glofs(as, p);
279 if (emit_checkofs(ai, ofs)) {
280 /* GL + offset, might subsequently fuse to LDP/STP. */
281 } else if (ai == A64I_LDRx && checkmcpofs(as, p)) {
282 /* IP + offset is cheaper than allock, but address must be in range. */
283 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
284 return;
285 } else { /* Split up into base reg + offset. */
286 int64_t i64 = i64ptr(p);
287 base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
288 ofs = i64 & 0x7fffull;
290 emit_lso(as, ai, r, base, ofs);
293 /* Load 64 bit IR constant into register. */
294 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
296 const uint64_t *k = &ir_k64(ir)->u64;
297 int64_t ofs;
298 if (r >= RID_MAX_GPR) {
299 uint32_t fpk = emit_isfpk64(*k);
300 if (fpk != ~0u) {
301 emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
302 return;
305 ofs = glofs(as, k);
306 if (emit_checkofs(A64I_LDRx, ofs)) {
307 emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
308 (r & 31), RID_GL, ofs);
309 } else {
310 if (r >= RID_MAX_GPR) {
311 emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
312 r = RID_TMP;
314 if (checkmcpofs(as, k))
315 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
316 else
317 emit_loadu64(as, r, *k);
321 /* Get/set global_State fields. */
322 #define emit_getgl(as, r, field) \
323 emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
324 #define emit_setgl(as, r, field) \
325 emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
327 /* Trace number is determined from pc of exit instruction. */
328 #define emit_setvmstate(as, i) UNUSED(i)
330 /* -- Emit control-flow instructions -------------------------------------- */
332 /* Label for internal jumps. */
333 typedef MCode *MCLabel;
335 /* Return label pointing to current PC. */
336 #define emit_label(as) ((as)->mcp)
338 static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
340 MCode *p = --as->mcp;
341 ptrdiff_t delta = target - p;
342 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
343 *p = A64I_BCC | A64F_S19(delta) | cond;
346 static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
348 MCode *p = --as->mcp;
349 ptrdiff_t delta = target - p;
350 lj_assertA(A64F_S_OK(delta, 26), "branch target out of range");
351 *p = ai | A64F_S26(delta);
354 static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
356 MCode *p = --as->mcp;
357 ptrdiff_t delta = target - p;
358 lj_assertA(bit < 63, "bit number out of range");
359 lj_assertA(A64F_S_OK(delta, 14), "branch target out of range");
360 if (bit > 31) ai |= A64I_X;
361 *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
364 static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
366 MCode *p = --as->mcp;
367 ptrdiff_t delta = target - p;
368 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
369 *p = ai | A64F_S19(delta) | r;
372 #define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
374 static void emit_call(ASMState *as, ASMFunction target)
376 MCode *p = --as->mcp;
377 #if LJ_ABI_PAUTH
378 char *targetp = ptrauth_auth_data((char *)target,
379 ptrauth_key_function_pointer, 0);
380 #else
381 char *targetp = (char *)target;
382 #endif
383 ptrdiff_t delta = targetp - (char *)p;
384 if (A64F_S_OK(delta>>2, 26)) {
385 *p = A64I_BL | A64F_S26(delta>>2);
386 } else { /* Target out of range: need indirect call. But don't use R0-R7. */
387 Reg r = ra_allock(as, i64ptr(target),
388 RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
389 *p = A64I_BLR_AUTH | A64F_N(r);
393 /* -- Emit generic operations --------------------------------------------- */
395 /* Generic move between two regs. */
396 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
398 if (dst >= RID_MAX_GPR) {
399 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
400 (dst & 31), (src & 31));
401 return;
403 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
404 MCode ins = *as->mcp, swp = (src^dst);
405 if ((ins & 0xbf800000) == 0xb9000000) {
406 if (!((ins ^ (dst << 5)) & 0x000003e0))
407 *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
408 if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
409 *as->mcp = ins ^ swp; /* Swap D in store. */
412 emit_dm(as, A64I_MOVx, dst, src);
415 /* Generic load of register with base and (small) offset address. */
416 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
418 if (r >= RID_MAX_GPR)
419 emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
420 else
421 emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
424 /* Generic store of register with base and (small) offset address. */
425 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
427 if (r >= RID_MAX_GPR)
428 emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
429 else
430 emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
433 /* Emit an arithmetic operation with a constant operand. */
434 static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
435 int32_t i, RegSet allow)
437 uint32_t k = emit_isk12(i);
438 if (k)
439 emit_dn(as, ai^k, dest, src);
440 else
441 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
444 /* Add offset to pointer. */
445 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
447 if (ofs)
448 emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
449 ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs,
450 rset_exclude(RSET_GPR, r));
453 #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))