2 ** ARM64 IR assembler (SSA IR -> machine code).
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
5 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6 ** Sponsored by Cisco Systems, Inc.
9 /* -- Register allocator extensions --------------------------------------- */
11 /* Allocate a register with a hint. */
12 static Reg
ra_hintalloc(ASMState
*as
, IRRef ref
, Reg hint
, RegSet allow
)
16 if (!ra_hashint(r
) && !iscrossref(as
, ref
))
17 ra_sethint(IR(ref
)->r
, hint
); /* Propagate register hint. */
18 r
= ra_allocref(as
, ref
, allow
);
24 /* Allocate two source registers for three-operand instructions. */
25 static Reg
ra_alloc2(ASMState
*as
, IRIns
*ir
, RegSet allow
)
27 IRIns
*irl
= IR(ir
->op1
), *irr
= IR(ir
->op2
);
28 Reg left
= irl
->r
, right
= irr
->r
;
29 if (ra_hasreg(left
)) {
32 right
= ra_allocref(as
, ir
->op2
, rset_exclude(allow
, left
));
35 } else if (ra_hasreg(right
)) {
37 left
= ra_allocref(as
, ir
->op1
, rset_exclude(allow
, right
));
38 } else if (ra_hashint(right
)) {
39 right
= ra_allocref(as
, ir
->op2
, allow
);
40 left
= ra_alloc1(as
, ir
->op1
, rset_exclude(allow
, right
));
42 left
= ra_allocref(as
, ir
->op1
, allow
);
43 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(allow
, left
));
45 return left
| (right
<< 8);
48 /* -- Guard handling ------------------------------------------------------ */
50 /* Setup all needed exit stubs. */
51 static void asm_exitstub_setup(ASMState
*as
, ExitNo nexits
)
54 MCode
*mxp
= as
->mctop
;
55 if (mxp
- (nexits
+ 3 + MCLIM_REDZONE
) < as
->mclim
)
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i
= nexits
-1; (int32_t)i
>= 0; i
--)
59 *--mxp
= A64I_LE(A64I_BL
| A64F_S26(-3-i
));
60 *--mxp
= A64I_LE(A64I_MOVZw
| A64F_U16(as
->T
->traceno
));
62 *mxp
= A64I_LE(A64I_BL
| A64F_S26(((MCode
*)(void *)lj_vm_exit_handler
-mxp
)));
63 *--mxp
= A64I_LE(A64I_STRx
| A64F_D(RID_LR
) | A64F_N(RID_SP
));
67 static MCode
*asm_exitstub_addr(ASMState
*as
, ExitNo exitno
)
69 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as
->mctop
+ exitno
+ 3;
73 /* Emit conditional branch to exit for guard. */
74 static void asm_guardcc(ASMState
*as
, A64CC cc
)
76 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
78 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
80 *p
= A64I_B
| A64F_S26(target
-p
);
81 emit_cond_branch(as
, cc
^1, p
-1);
84 emit_cond_branch(as
, cc
, target
);
87 /* Emit test and branch instruction to exit for guard, if in range. */
88 static int asm_guardtnb(ASMState
*as
, A64Ins ai
, Reg r
, uint32_t bit
)
90 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
92 ptrdiff_t delta
= target
- p
;
93 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
94 if (as
->orignins
> 1023) return 0; /* Delta might end up too large. */
96 *p
= A64I_B
| A64F_S26(delta
);
99 } else if (LJ_UNLIKELY(delta
>= 0x1fff)) {
102 emit_tnb(as
, ai
, r
, bit
, target
);
106 /* Emit compare and branch instruction to exit for guard. */
107 static void asm_guardcnb(ASMState
*as
, A64Ins ai
, Reg r
)
109 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
111 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
113 *p
= A64I_B
| A64F_S26(target
-p
);
114 emit_cnb(as
, ai
^0x01000000u
, r
, p
-1);
117 emit_cnb(as
, ai
, r
, target
);
120 /* -- Operand fusion ------------------------------------------------------ */
122 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
123 #define CONFLICT_SEARCH_LIM 31
125 static int asm_isk32(ASMState
*as
, IRRef ref
, int32_t *k
)
127 if (irref_isk(ref
)) {
129 if (ir
->o
== IR_KNULL
|| !irt_is64(ir
->t
)) {
132 } else if (checki32((int64_t)ir_k64(ir
)->u64
)) {
133 *k
= (int32_t)ir_k64(ir
)->u64
;
140 /* Check if there's no conflicting instruction between curins and ref. */
141 static int noconflict(ASMState
*as
, IRRef ref
, IROp conflict
)
144 IRRef i
= as
->curins
;
145 if (i
> ref
+ CONFLICT_SEARCH_LIM
)
146 return 0; /* Give up, ref is too far away. */
148 if (ir
[i
].o
== conflict
)
149 return 0; /* Conflict found. */
150 return 1; /* Ok, no conflict. */
153 /* Fuse the array base of colocated arrays. */
154 static int32_t asm_fuseabase(ASMState
*as
, IRRef ref
)
157 if (ir
->o
== IR_TNEW
&& ir
->op1
<= LJ_MAX_COLOSIZE
&&
158 !neverfuse(as
) && noconflict(as
, ref
, IR_NEWREF
))
159 return (int32_t)sizeof(GCtab
);
163 #define FUSE_REG 0x40000000
165 /* Fuse array/hash/upvalue reference into register+offset operand. */
166 static Reg
asm_fuseahuref(ASMState
*as
, IRRef ref
, int32_t *ofsp
, RegSet allow
,
170 if (ra_noreg(ir
->r
)) {
171 if (ir
->o
== IR_AREF
) {
172 if (mayfuse(as
, ref
)) {
173 if (irref_isk(ir
->op2
)) {
174 IRRef tab
= IR(ir
->op1
)->op1
;
175 int32_t ofs
= asm_fuseabase(as
, tab
);
176 IRRef refa
= ofs
? tab
: ir
->op1
;
177 ofs
+= 8*IR(ir
->op2
)->i
;
178 if (emit_checkofs(ins
, ofs
)) {
180 return ra_alloc1(as
, refa
, allow
);
183 Reg base
= ra_alloc1(as
, ir
->op1
, allow
);
184 *ofsp
= FUSE_REG
|ra_alloc1(as
, ir
->op2
, rset_exclude(allow
, base
));
188 } else if (ir
->o
== IR_HREFK
) {
189 if (mayfuse(as
, ref
)) {
190 int32_t ofs
= (int32_t)(IR(ir
->op2
)->op2
* sizeof(Node
));
191 if (emit_checkofs(ins
, ofs
)) {
193 return ra_alloc1(as
, ir
->op1
, allow
);
196 } else if (ir
->o
== IR_UREFC
) {
197 if (irref_isk(ir
->op1
)) {
198 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
199 GCupval
*uv
= &gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
;
200 int64_t ofs
= glofs(as
, &uv
->tv
);
201 if (emit_checkofs(ins
, ofs
)) {
202 *ofsp
= (int32_t)ofs
;
206 } else if (ir
->o
== IR_TMPREF
) {
207 *ofsp
= (int32_t)glofs(as
, &J2G(as
->J
)->tmptv
);
212 return ra_alloc1(as
, ref
, allow
);
215 /* Fuse m operand into arithmetic/logic instructions. */
216 static uint32_t asm_fuseopm(ASMState
*as
, A64Ins ai
, IRRef ref
, RegSet allow
)
219 int logical
= (ai
& 0x1f000000) == 0x0a000000;
220 if (ra_hasreg(ir
->r
)) {
221 ra_noweak(as
, ir
->r
);
222 return A64F_M(ir
->r
);
223 } else if (irref_isk(ref
)) {
224 int64_t k
= get_k64val(as
, ref
);
225 uint32_t m
= logical
? emit_isk13(k
, irt_is64(ir
->t
)) :
226 emit_isk12(irt_is64(ir
->t
) ? k
: (int32_t)k
);
229 } else if (mayfuse(as
, ref
)) {
230 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BSAR
&& irref_isk(ir
->op2
)) ||
231 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
)) {
232 A64Shift sh
= ir
->o
== IR_BSHR
? A64SH_LSR
:
233 ir
->o
== IR_BSAR
? A64SH_ASR
: A64SH_LSL
;
234 int shift
= ir
->o
== IR_ADD
? 1 :
235 (IR(ir
->op2
)->i
& (irt_is64(ir
->t
) ? 63 : 31));
236 IRIns
*irl
= IR(ir
->op1
);
237 if (sh
== A64SH_LSL
&&
238 irl
->o
== IR_CONV
&& !logical
&&
239 irl
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
) &&
242 Reg m
= ra_alloc1(as
, irl
->op1
, allow
);
243 return A64F_M(m
) | A64F_EXSH(A64EX_SXTW
, shift
);
245 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
246 return A64F_M(m
) | A64F_SH(sh
, shift
);
248 } else if (ir
->o
== IR_BROR
&& logical
&& irref_isk(ir
->op2
)) {
249 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
250 int shift
= (IR(ir
->op2
)->i
& (irt_is64(ir
->t
) ? 63 : 31));
251 return A64F_M(m
) | A64F_SH(A64SH_ROR
, shift
);
252 } else if (ir
->o
== IR_CONV
&& !logical
&&
253 ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)) {
254 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
255 return A64F_M(m
) | A64F_EX(A64EX_SXTW
);
258 return A64F_M(ra_allocref(as
, ref
, allow
));
261 /* Fuse XLOAD/XSTORE reference into load/store operand. */
262 static void asm_fusexref(ASMState
*as
, A64Ins ai
, Reg rd
, IRRef ref
,
268 if (ra_noreg(ir
->r
) && canfuse(as
, ir
)) {
269 if (ir
->o
== IR_ADD
) {
270 if (asm_isk32(as
, ir
->op2
, &ofs
) && emit_checkofs(ai
, ofs
)) {
274 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
275 IRIns
*irl
= IR(lref
);
276 if (mayfuse(as
, irl
->op1
)) {
277 unsigned int shift
= 4;
278 if (irl
->o
== IR_BSHL
&& irref_isk(irl
->op2
)) {
279 shift
= (IR(irl
->op2
)->i
& 63);
280 } else if (irl
->o
== IR_ADD
&& irl
->op1
== irl
->op2
) {
283 if ((ai
>> 30) == shift
) {
289 if (irl
->o
== IR_CONV
&&
290 irl
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
) &&
297 rm
= ra_alloc1(as
, lref
, allow
);
298 rn
= ra_alloc1(as
, rref
, rset_exclude(allow
, rm
));
299 emit_dnm(as
, (ai
^A64I_LS_R
), (rd
& 31), rn
, rm
);
302 } else if (ir
->o
== IR_STRREF
) {
303 if (asm_isk32(as
, ir
->op2
, &ofs
)) {
305 } else if (asm_isk32(as
, ir
->op1
, &ofs
)) {
308 Reg refk
= irref_isk(ir
->op1
) ? ir
->op1
: ir
->op2
;
309 Reg refv
= irref_isk(ir
->op1
) ? ir
->op2
: ir
->op1
;
310 Reg rn
= ra_alloc1(as
, refv
, allow
);
311 IRIns
*irr
= IR(refk
);
313 if (irr
+1 == ir
&& !ra_used(irr
) &&
314 irr
->o
== IR_ADD
&& irref_isk(irr
->op2
)) {
315 ofs
= sizeof(GCstr
) + IR(irr
->op2
)->i
;
316 if (emit_checkofs(ai
, ofs
)) {
317 Reg rm
= ra_alloc1(as
, irr
->op1
, rset_exclude(allow
, rn
));
318 m
= A64F_M(rm
) | A64F_EX(A64EX_SXTW
);
322 m
= asm_fuseopm(as
, 0, refk
, rset_exclude(allow
, rn
));
325 emit_lso(as
, ai
, rd
, rd
, ofs
);
326 emit_dn(as
, A64I_ADDx
^m
, rd
, rn
);
329 ofs
+= sizeof(GCstr
);
330 if (!emit_checkofs(ai
, ofs
)) {
331 Reg rn
= ra_alloc1(as
, ref
, allow
);
332 Reg rm
= ra_allock(as
, ofs
, rset_exclude(allow
, rn
));
333 emit_dnm(as
, (ai
^A64I_LS_R
)|A64I_LS_UXTWx
, rd
, rn
, rm
);
338 base
= ra_alloc1(as
, ref
, allow
);
339 emit_lso(as
, ai
, (rd
& 31), base
, ofs
);
342 /* Fuse FP multiply-add/sub. */
343 static int asm_fusemadd(ASMState
*as
, IRIns
*ir
, A64Ins ai
, A64Ins air
)
345 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
347 if ((as
->flags
& JIT_F_OPT_FMA
) &&
349 ((mayfuse(as
, lref
) && (irm
= IR(lref
), irm
->o
== IR_MUL
) &&
351 (mayfuse(as
, rref
) && (irm
= IR(rref
), irm
->o
== IR_MUL
) &&
352 (rref
= lref
, ai
= air
, ra_noreg(irm
->r
))))) {
353 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
354 Reg add
= ra_hintalloc(as
, rref
, dest
, RSET_FPR
);
355 Reg left
= ra_alloc2(as
, irm
,
356 rset_exclude(rset_exclude(RSET_FPR
, dest
), add
));
357 Reg right
= (left
>> 8); left
&= 255;
358 emit_dnma(as
, ai
, (dest
& 31), (left
& 31), (right
& 31), (add
& 31));
364 /* Fuse BAND + BSHL/BSHR into UBFM. */
365 static int asm_fuseandshift(ASMState
*as
, IRIns
*ir
)
367 IRIns
*irl
= IR(ir
->op1
);
368 lj_assertA(ir
->o
== IR_BAND
, "bad usage");
369 if (canfuse(as
, irl
) && irref_isk(ir
->op2
)) {
370 uint64_t mask
= get_k64val(as
, ir
->op2
);
371 if (irref_isk(irl
->op2
) && (irl
->o
== IR_BSHR
|| irl
->o
== IR_BSHL
)) {
372 int32_t shmask
= irt_is64(irl
->t
) ? 63 : 31;
373 int32_t shift
= (IR(irl
->op2
)->i
& shmask
);
374 int32_t imms
= shift
;
375 if (irl
->o
== IR_BSHL
) {
377 shift
= (shmask
-shift
+1) & shmask
;
380 if (mask
&& !((mask
+1) & mask
)) { /* Contiguous 1-bits at the bottom. */
381 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
382 Reg left
= ra_alloc1(as
, irl
->op1
, RSET_GPR
);
383 A64Ins ai
= shmask
== 63 ? A64I_UBFMx
: A64I_UBFMw
;
384 imms
+= 63 - emit_clz64(mask
);
385 if (imms
> shmask
) imms
= shmask
;
386 emit_dn(as
, ai
| A64F_IMMS(imms
) | A64F_IMMR(shift
), dest
, left
);
394 /* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
395 static int asm_fuseorshift(ASMState
*as
, IRIns
*ir
)
397 IRIns
*irl
= IR(ir
->op1
), *irr
= IR(ir
->op2
);
398 lj_assertA(ir
->o
== IR_BOR
, "bad usage");
399 if (canfuse(as
, irl
) && canfuse(as
, irr
) &&
400 ((irl
->o
== IR_BSHR
&& irr
->o
== IR_BSHL
) ||
401 (irl
->o
== IR_BSHL
&& irr
->o
== IR_BSHR
))) {
402 if (irref_isk(irl
->op2
) && irref_isk(irr
->op2
)) {
403 IRRef lref
= irl
->op1
, rref
= irr
->op1
;
404 uint32_t lshift
= IR(irl
->op2
)->i
, rshift
= IR(irr
->op2
)->i
;
405 if (irl
->o
== IR_BSHR
) { /* BSHR needs to be the right operand. */
407 IRRef tmp1
= lref
; lref
= rref
; rref
= tmp1
;
408 tmp2
= lshift
; lshift
= rshift
; rshift
= tmp2
;
410 if (rshift
+ lshift
== (irt_is64(ir
->t
) ? 64 : 32)) {
411 A64Ins ai
= irt_is64(ir
->t
) ? A64I_EXTRx
: A64I_EXTRw
;
412 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
413 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
414 Reg right
= ra_alloc1(as
, rref
, rset_exclude(RSET_GPR
, left
));
415 emit_dnm(as
, ai
| A64F_IMMS(rshift
), dest
, left
, right
);
423 /* -- Calls --------------------------------------------------------------- */
425 /* Generate a call to a C function. */
426 static void asm_gencall(ASMState
*as
, const CCallInfo
*ci
, IRRef
*args
)
428 uint32_t n
, nargs
= CCI_XNARGS(ci
);
429 int32_t spofs
= 0, spalign
= LJ_HASFFI
&& LJ_TARGET_OSX
? 0 : 7;
430 Reg gpr
, fpr
= REGARG_FIRSTFPR
;
432 emit_call(as
, ci
->func
);
433 for (gpr
= REGARG_FIRSTGPR
; gpr
<= REGARG_LASTGPR
; gpr
++)
434 as
->cost
[gpr
] = REGCOST(~0u, ASMREF_L
);
435 gpr
= REGARG_FIRSTGPR
;
436 #if LJ_HASFFI && LJ_ABI_WIN
437 if ((ci
->flags
& CCI_VARARG
)) {
438 fpr
= REGARG_LASTFPR
+1;
441 for (n
= 0; n
< nargs
; n
++) { /* Setup args. */
445 if (irt_isfp(ir
->t
)) {
446 if (fpr
<= REGARG_LASTFPR
) {
447 lj_assertA(rset_test(as
->freeset
, fpr
),
448 "reg %d not free", fpr
); /* Must have been evicted. */
449 ra_leftov(as
, fpr
, ref
);
451 #if LJ_HASFFI && LJ_ABI_WIN
452 } else if ((ci
->flags
& CCI_VARARG
) && (gpr
<= REGARG_LASTGPR
)) {
453 Reg rf
= ra_alloc1(as
, ref
, RSET_FPR
);
454 emit_dn(as
, A64I_FMOV_R_D
, gpr
++, rf
& 31);
457 Reg r
= ra_alloc1(as
, ref
, RSET_FPR
);
458 int32_t al
= spalign
;
459 #if LJ_HASFFI && LJ_TARGET_OSX
460 al
|= irt_isnum(ir
->t
) ? 7 : 3;
462 spofs
= (spofs
+ al
) & ~al
;
463 if (LJ_BE
&& al
>= 7 && !irt_isnum(ir
->t
)) spofs
+= 4, al
-= 4;
464 emit_spstore(as
, ir
, r
, spofs
);
468 if (gpr
<= REGARG_LASTGPR
) {
469 lj_assertA(rset_test(as
->freeset
, gpr
),
470 "reg %d not free", gpr
); /* Must have been evicted. */
471 ra_leftov(as
, gpr
, ref
);
474 Reg r
= ra_alloc1(as
, ref
, RSET_GPR
);
475 int32_t al
= spalign
;
476 #if LJ_HASFFI && LJ_TARGET_OSX
477 al
|= irt_size(ir
->t
) - 1;
479 spofs
= (spofs
+ al
) & ~al
;
481 if (LJ_BE
&& al
>= 7 && !irt_is64(ir
->t
)) spofs
+= 4, al
-= 4;
482 emit_spstore(as
, ir
, r
, spofs
);
484 lj_assertA(al
== 0 || al
== 1, "size %d unexpected", al
+ 1);
485 emit_lso(as
, al
? A64I_STRH
: A64I_STRB
, r
, RID_SP
, spofs
);
490 #if LJ_HASFFI && LJ_TARGET_OSX
491 } else { /* Marker for start of varargs. */
492 gpr
= REGARG_LASTGPR
+1;
493 fpr
= REGARG_LASTFPR
+1;
500 /* Setup result reg/sp for call. Evict scratch regs. */
501 static void asm_setupresult(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
503 RegSet drop
= RSET_SCRATCH
;
504 int hiop
= ((ir
+1)->o
== IR_HIOP
&& !irt_isnil((ir
+1)->t
));
505 if (ra_hasreg(ir
->r
))
506 rset_clear(drop
, ir
->r
); /* Dest reg handled below. */
507 if (hiop
&& ra_hasreg((ir
+1)->r
))
508 rset_clear(drop
, (ir
+1)->r
); /* Dest reg handled below. */
509 ra_evictset(as
, drop
); /* Evictions must be performed first. */
511 lj_assertA(!irt_ispri(ir
->t
), "PRI dest");
512 if (irt_isfp(ir
->t
)) {
513 if (ci
->flags
& CCI_CASTU64
) {
514 Reg dest
= ra_dest(as
, ir
, RSET_FPR
) & 31;
515 emit_dn(as
, irt_isnum(ir
->t
) ? A64I_FMOV_D_R
: A64I_FMOV_S_R
,
518 ra_destreg(as
, ir
, RID_FPRET
);
523 ra_destreg(as
, ir
, RID_RET
);
529 static void asm_callx(ASMState
*as
, IRIns
*ir
)
531 IRRef args
[CCI_NARGS_MAX
*2];
535 ci
.flags
= asm_callx_flags(as
, ir
);
536 asm_collectargs(as
, ir
, &ci
, args
);
537 asm_setupresult(as
, ir
, &ci
);
538 func
= ir
->op2
; irf
= IR(func
);
539 if (irf
->o
== IR_CARG
) { func
= irf
->op1
; irf
= IR(func
); }
540 if (irref_isk(func
)) { /* Call to constant address. */
541 ci
.func
= (ASMFunction
)(ir_k64(irf
)->u64
);
542 } else { /* Need a non-argument register for indirect calls. */
543 Reg freg
= ra_alloc1(as
, func
, RSET_RANGE(RID_X8
, RID_MAX_GPR
)-RSET_FIXED
);
544 emit_n(as
, A64I_BLR_AUTH
, freg
);
545 ci
.func
= (ASMFunction
)(void *)0;
547 asm_gencall(as
, &ci
, args
);
550 /* -- Returns ------------------------------------------------------------- */
552 /* Return to lower frame. Guard that it goes to the right spot. */
553 static void asm_retf(ASMState
*as
, IRIns
*ir
)
555 Reg base
= ra_alloc1(as
, REF_BASE
, RSET_GPR
);
556 void *pc
= ir_kptr(IR(ir
->op2
));
557 int32_t delta
= 1+LJ_FR2
+bc_a(*((const BCIns
*)pc
- 1));
558 as
->topslot
-= (BCReg
)delta
;
559 if ((int32_t)as
->topslot
< 0) as
->topslot
= 0;
560 irt_setmark(IR(REF_BASE
)->t
); /* Children must not coalesce with BASE reg. */
561 emit_setgl(as
, base
, jit_base
);
562 emit_addptr(as
, base
, -8*delta
);
563 asm_guardcc(as
, CC_NE
);
564 emit_nm(as
, A64I_CMPx
, RID_TMP
,
565 ra_allock(as
, i64ptr(pc
), rset_exclude(RSET_GPR
, base
)));
566 emit_lso(as
, A64I_LDRx
, RID_TMP
, base
, -8);
569 /* -- Buffer operations --------------------------------------------------- */
572 static void asm_bufhdr_write(ASMState
*as
, Reg sb
)
574 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, sb
));
576 irgc
.ot
= IRT(0, IRT_PGC
); /* GC type. */
577 emit_storeofs(as
, &irgc
, RID_TMP
, sb
, offsetof(SBuf
, L
));
578 emit_dn(as
, A64I_BFMx
| A64F_IMMS(lj_fls(SBUF_MASK_FLAG
)) | A64F_IMMR(0), RID_TMP
, tmp
);
579 emit_getgl(as
, RID_TMP
, cur_L
);
580 emit_loadofs(as
, &irgc
, tmp
, sb
, offsetof(SBuf
, L
));
584 /* -- Type conversions ---------------------------------------------------- */
586 static void asm_tointg(ASMState
*as
, IRIns
*ir
, Reg left
)
588 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_FPR
, left
));
589 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
590 asm_guardcc(as
, CC_NE
);
591 emit_nm(as
, A64I_FCMPd
, (tmp
& 31), (left
& 31));
592 emit_dn(as
, A64I_FCVT_F64_S32
, (tmp
& 31), dest
);
593 emit_dn(as
, A64I_FCVT_S32_F64
, dest
, (left
& 31));
596 static void asm_tobit(ASMState
*as
, IRIns
*ir
)
598 RegSet allow
= RSET_FPR
;
599 Reg left
= ra_alloc1(as
, ir
->op1
, allow
);
600 Reg right
= ra_alloc1(as
, ir
->op2
, rset_clear(allow
, left
));
601 Reg tmp
= ra_scratch(as
, rset_clear(allow
, right
));
602 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
603 emit_dn(as
, A64I_FMOV_R_S
, dest
, (tmp
& 31));
604 emit_dnm(as
, A64I_FADDd
, (tmp
& 31), (left
& 31), (right
& 31));
607 static void asm_conv(ASMState
*as
, IRIns
*ir
)
609 IRType st
= (IRType
)(ir
->op2
& IRCONV_SRCMASK
);
610 int st64
= (st
== IRT_I64
|| st
== IRT_U64
|| st
== IRT_P64
);
611 int stfp
= (st
== IRT_NUM
|| st
== IRT_FLOAT
);
612 IRRef lref
= ir
->op1
;
613 lj_assertA(irt_type(ir
->t
) != st
, "inconsistent types for CONV");
614 if (irt_isfp(ir
->t
)) {
615 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
616 if (stfp
) { /* FP to FP conversion. */
617 emit_dn(as
, st
== IRT_NUM
? A64I_FCVT_F32_F64
: A64I_FCVT_F64_F32
,
618 (dest
& 31), (ra_alloc1(as
, lref
, RSET_FPR
) & 31));
619 } else { /* Integer to FP conversion. */
620 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
621 A64Ins ai
= irt_isfloat(ir
->t
) ?
622 (((IRT_IS64
>> st
) & 1) ?
623 (st
== IRT_I64
? A64I_FCVT_F32_S64
: A64I_FCVT_F32_U64
) :
624 (st
== IRT_INT
? A64I_FCVT_F32_S32
: A64I_FCVT_F32_U32
)) :
625 (((IRT_IS64
>> st
) & 1) ?
626 (st
== IRT_I64
? A64I_FCVT_F64_S64
: A64I_FCVT_F64_U64
) :
627 (st
== IRT_INT
? A64I_FCVT_F64_S32
: A64I_FCVT_F64_U32
));
628 emit_dn(as
, ai
, (dest
& 31), left
);
630 } else if (stfp
) { /* FP to integer conversion. */
631 if (irt_isguard(ir
->t
)) {
632 /* Checked conversions are only supported from number to int. */
633 lj_assertA(irt_isint(ir
->t
) && st
== IRT_NUM
,
634 "bad type for checked CONV");
635 asm_tointg(as
, ir
, ra_alloc1(as
, lref
, RSET_FPR
));
637 Reg left
= ra_alloc1(as
, lref
, RSET_FPR
);
638 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
639 A64Ins ai
= irt_is64(ir
->t
) ?
641 (irt_isi64(ir
->t
) ? A64I_FCVT_S64_F64
: A64I_FCVT_U64_F64
) :
642 (irt_isi64(ir
->t
) ? A64I_FCVT_S64_F32
: A64I_FCVT_U64_F32
)) :
644 (irt_isint(ir
->t
) ? A64I_FCVT_S32_F64
: A64I_FCVT_U32_F64
) :
645 (irt_isint(ir
->t
) ? A64I_FCVT_S32_F32
: A64I_FCVT_U32_F32
));
646 emit_dn(as
, ai
, dest
, (left
& 31));
648 } else if (st
>= IRT_I8
&& st
<= IRT_U16
) { /* Extend to 32 bit integer. */
649 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
650 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
651 A64Ins ai
= st
== IRT_I8
? A64I_SXTBw
:
652 st
== IRT_U8
? A64I_UXTBw
:
653 st
== IRT_I16
? A64I_SXTHw
: A64I_UXTHw
;
654 lj_assertA(irt_isint(ir
->t
) || irt_isu32(ir
->t
), "bad type for CONV EXT");
655 emit_dn(as
, ai
, dest
, left
);
657 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
658 if (irt_is64(ir
->t
)) {
659 if (st64
|| !(ir
->op2
& IRCONV_SEXT
)) {
660 /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
661 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
662 } else { /* 32 to 64 bit sign extension. */
663 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
664 emit_dn(as
, A64I_SXTW
, dest
, left
);
667 if (st64
&& !(ir
->op2
& IRCONV_NONE
)) {
668 /* This is either a 32 bit reg/reg mov which zeroes the hiword
669 ** or a load of the loword from a 64 bit address.
671 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
672 emit_dm(as
, A64I_MOVw
, dest
, left
);
673 } else { /* 32/32 bit no-op (cast). */
674 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
680 static void asm_strto(ASMState
*as
, IRIns
*ir
)
682 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_strscan_num
];
686 ra_evictset(as
, RSET_SCRATCH
);
688 if (ra_hasspill(ir
->s
)) {
689 ofs
= sps_scale(ir
->s
);
690 if (ra_hasreg(ir
->r
)) {
692 ra_modified(as
, ir
->r
);
693 emit_spload(as
, ir
, ir
->r
, ofs
);
696 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
697 emit_lso(as
, A64I_LDRd
, (dest
& 31), RID_SP
, 0);
700 asm_guardcnb(as
, A64I_CBZ
, RID_RET
);
701 args
[0] = ir
->op1
; /* GCstr *str */
702 args
[1] = ASMREF_TMP1
; /* TValue *n */
703 asm_gencall(as
, ci
, args
);
704 tmp
= ra_releasetmp(as
, ASMREF_TMP1
);
705 emit_opk(as
, A64I_ADDx
, tmp
, RID_SP
, ofs
, RSET_GPR
);
708 /* -- Memory references --------------------------------------------------- */
710 /* Store tagged value for ref at base+ofs. */
711 static void asm_tvstore64(ASMState
*as
, Reg base
, int32_t ofs
, IRRef ref
)
713 RegSet allow
= rset_exclude(RSET_GPR
, base
);
715 lj_assertA(irt_ispri(ir
->t
) || irt_isaddr(ir
->t
) || irt_isinteger(ir
->t
),
716 "store of IR type %d", irt_type(ir
->t
));
717 if (irref_isk(ref
)) {
719 lj_ir_kvalue(as
->J
->L
, &k
, ir
);
720 emit_lso(as
, A64I_STRx
, ra_allock(as
, k
.u64
, allow
), base
, ofs
);
722 Reg src
= ra_alloc1(as
, ref
, allow
);
723 rset_clear(allow
, src
);
724 if (irt_isinteger(ir
->t
)) {
725 Reg type
= ra_allock(as
, (int64_t)irt_toitype(ir
->t
) << 47, allow
);
726 emit_lso(as
, A64I_STRx
, RID_TMP
, base
, ofs
);
727 emit_dnm(as
, A64I_ADDx
| A64F_EX(A64EX_UXTW
), RID_TMP
, type
, src
);
729 Reg type
= ra_allock(as
, (int32_t)irt_toitype(ir
->t
), allow
);
730 emit_lso(as
, A64I_STRx
, RID_TMP
, base
, ofs
);
731 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 47), RID_TMP
, src
, type
);
736 /* Get pointer to TValue. */
737 static void asm_tvptr(ASMState
*as
, Reg dest
, IRRef ref
, MSize mode
)
739 if ((mode
& IRTMPREF_IN1
)) {
741 if (irt_isnum(ir
->t
)) {
742 if (irref_isk(ref
) && !(mode
& IRTMPREF_OUT1
)) {
743 /* Use the number constant itself as a TValue. */
744 ra_allockreg(as
, i64ptr(ir_knum(ir
)), dest
);
747 emit_lso(as
, A64I_STRd
, (ra_alloc1(as
, ref
, RSET_FPR
) & 31), dest
, 0);
749 asm_tvstore64(as
, dest
, 0, ref
);
752 /* g->tmptv holds the TValue(s). */
753 emit_dn(as
, A64I_ADDx
^emit_isk12(glofs(as
, &J2G(as
->J
)->tmptv
)), dest
, RID_GL
);
756 static void asm_aref(ASMState
*as
, IRIns
*ir
)
758 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
760 if (irref_isk(ir
->op2
)) {
761 IRRef tab
= IR(ir
->op1
)->op1
;
762 int32_t ofs
= asm_fuseabase(as
, tab
);
763 IRRef refa
= ofs
? tab
: ir
->op1
;
764 uint32_t k
= emit_isk12(ofs
+ 8*IR(ir
->op2
)->i
);
766 base
= ra_alloc1(as
, refa
, RSET_GPR
);
767 emit_dn(as
, A64I_ADDx
^k
, dest
, base
);
771 base
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
772 idx
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, base
));
773 emit_dnm(as
, A64I_ADDx
| A64F_EXSH(A64EX_UXTW
, 3), dest
, base
, idx
);
776 /* Inlined hash lookup. Specialized for key type and for const keys.
777 ** The equivalent C code is:
778 ** Node *n = hashkey(t, key);
780 ** if (lj_obj_equal(&n->key, key)) return &n->val;
781 ** } while ((n = nextnode(n)));
784 static void asm_href(ASMState
*as
, IRIns
*ir
, IROp merge
)
786 RegSet allow
= RSET_GPR
;
787 int destused
= ra_used(ir
);
788 Reg dest
= ra_dest(as
, ir
, allow
);
789 Reg tab
= ra_alloc1(as
, ir
->op1
, rset_clear(allow
, dest
));
790 Reg tmp
= RID_TMP
, type
= RID_NONE
, key
, tkey
;
791 IRRef refkey
= ir
->op2
;
792 IRIns
*irkey
= IR(refkey
);
793 int isk
= irref_isk(refkey
);
794 IRType1 kt
= irkey
->t
;
797 MCLabel l_end
, l_loop
;
798 rset_clear(allow
, tab
);
800 /* Allocate register for tkey outside of the loop. */
803 if (irt_isaddr(kt
)) {
804 kk
= ((int64_t)irt_toitype(kt
) << 47) | irkey
[1].tv
.u64
;
805 } else if (irt_isnum(kt
)) {
806 kk
= (int64_t)ir_knum(irkey
)->u64
;
807 /* Assumes -0.0 is already canonicalized to +0.0. */
809 lj_assertA(irt_ispri(kt
) && !irt_isnil(kt
), "bad HREF key type");
810 kk
= ~((int64_t)~irt_toitype(kt
) << 47);
813 tkey
= k
? 0 : ra_allock(as
, kk
, allow
);
815 tkey
= ra_scratch(as
, allow
);
818 /* Key not found in chain: jump to exit (if merged) or load niltv. */
819 l_end
= emit_label(as
);
821 if (merge
== IR_NE
) {
822 asm_guardcc(as
, CC_AL
);
823 } else if (destused
) {
824 uint32_t k12
= emit_isk12(offsetof(global_State
, nilnode
.val
));
825 lj_assertA(k12
!= 0, "Cannot k12 encode niltv(L)");
826 emit_dn(as
, A64I_ADDx
^k12
, dest
, RID_GL
);
829 /* Follow hash chain until the end. */
832 emit_lso(as
, A64I_LDRx
, dest
, dest
, offsetof(Node
, next
));
834 /* Type and value comparison. */
836 asm_guardcc(as
, CC_EQ
);
838 emit_cond_branch(as
, CC_EQ
, l_end
);
839 emit_nm(as
, A64I_CMPx
^k
, tmp
, tkey
);
841 emit_lso(as
, A64I_LDRx
, dest
, dest
, offsetof(Node
, next
));
842 emit_lso(as
, A64I_LDRx
, tmp
, dest
, offsetof(Node
, key
));
843 *l_loop
= A64I_X
| A64I_CBNZ
| A64F_S19(as
->mcp
- l_loop
) | dest
;
845 /* Construct tkey as canonicalized or tagged key. */
848 key
= ra_alloc1(as
, refkey
, RSET_FPR
);
849 emit_dnm(as
, A64I_CSELx
| A64F_CC(CC_EQ
), tkey
, RID_ZERO
, tkey
);
850 /* A64I_FMOV_R_D from key to tkey done below. */
852 lj_assertA(irt_isaddr(kt
), "bad HREF key type");
853 key
= ra_alloc1(as
, refkey
, allow
);
854 type
= ra_allock(as
, irt_toitype(kt
) << 15, rset_clear(allow
, key
));
855 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 32), tkey
, key
, type
);
859 /* Load main position relative to tab->node into dest. */
860 khash
= isk
? ir_khash(as
, irkey
) : 1;
862 emit_lso(as
, A64I_LDRx
, dest
, tab
, offsetof(GCtab
, node
));
864 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 3), dest
, tmp
, dest
);
865 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 1), dest
, dest
, dest
);
866 emit_lso(as
, A64I_LDRx
, tmp
, tab
, offsetof(GCtab
, node
));
868 Reg tmphash
= ra_allock(as
, khash
, allow
);
869 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmphash
);
870 emit_lso(as
, A64I_LDRw
, dest
, tab
, offsetof(GCtab
, hmask
));
871 } else if (irt_isstr(kt
)) {
872 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmp
);
873 emit_lso(as
, A64I_LDRw
, tmp
, key
, offsetof(GCstr
, sid
));
874 emit_lso(as
, A64I_LDRw
, dest
, tab
, offsetof(GCtab
, hmask
));
875 } else { /* Must match with hash*() in lj_tab.c. */
876 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmp
);
877 emit_lso(as
, A64I_LDRw
, tmp
, tab
, offsetof(GCtab
, hmask
));
878 emit_dnm(as
, A64I_SUBw
, dest
, dest
, tmp
);
879 emit_dnm(as
, A64I_EXTRw
| (A64F_IMMS(32-HASH_ROT3
)), tmp
, tmp
, tmp
);
880 emit_dnm(as
, A64I_EORw
| A64F_SH(A64SH_ROR
, 32-HASH_ROT2
), dest
, tmp
, dest
);
881 emit_dnm(as
, A64I_SUBw
, tmp
, tmp
, dest
);
882 emit_dnm(as
, A64I_EXTRw
| (A64F_IMMS(32-HASH_ROT1
)), dest
, dest
, dest
);
884 emit_dnm(as
, A64I_EORw
, tmp
, tkey
, dest
);
885 emit_dnm(as
, A64I_ADDw
, dest
, dest
, dest
);
886 emit_dn(as
, A64I_LSRx
| A64F_IMMR(32)|A64F_IMMS(32), dest
, tkey
);
887 emit_nm(as
, A64I_FCMPZd
, (key
& 31), 0);
888 emit_dn(as
, A64I_FMOV_R_D
, tkey
, (key
& 31));
890 emit_dnm(as
, A64I_EORw
, tmp
, key
, dest
);
891 emit_dnm(as
, A64I_EORx
| A64F_SH(A64SH_LSR
, 32), dest
, type
, key
);
897 static void asm_hrefk(ASMState
*as
, IRIns
*ir
)
899 IRIns
*kslot
= IR(ir
->op2
);
900 IRIns
*irkey
= IR(kslot
->op1
);
901 int32_t ofs
= (int32_t)(kslot
->op2
* sizeof(Node
));
902 int32_t kofs
= ofs
+ (int32_t)offsetof(Node
, key
);
903 int bigofs
= !emit_checkofs(A64I_LDRx
, kofs
);
904 Reg dest
= (ra_used(ir
) || bigofs
) ? ra_dest(as
, ir
, RSET_GPR
) : RID_NONE
;
905 Reg node
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
907 RegSet allow
= rset_exclude(RSET_GPR
, node
);
909 lj_assertA(ofs
% sizeof(Node
) == 0, "unaligned HREFK slot");
912 rset_clear(allow
, dest
);
913 kofs
= (int32_t)offsetof(Node
, key
);
914 } else if (ra_hasreg(dest
)) {
915 emit_opk(as
, A64I_ADDx
, dest
, node
, ofs
, allow
);
917 asm_guardcc(as
, CC_NE
);
918 if (irt_ispri(irkey
->t
)) {
919 k
= ~((int64_t)~irt_toitype(irkey
->t
) << 47);
920 } else if (irt_isnum(irkey
->t
)) {
921 k
= ir_knum(irkey
)->u64
;
923 k
= ((uint64_t)irt_toitype(irkey
->t
) << 47) | (uint64_t)ir_kgc(irkey
);
925 emit_nm(as
, A64I_CMPx
, RID_TMP
, ra_allock(as
, k
, allow
));
926 emit_lso(as
, A64I_LDRx
, RID_TMP
, idx
, kofs
);
928 emit_opk(as
, A64I_ADDx
, dest
, node
, ofs
, rset_exclude(RSET_GPR
, node
));
931 static void asm_uref(ASMState
*as
, IRIns
*ir
)
933 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
934 if (irref_isk(ir
->op1
)) {
935 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
936 MRef
*v
= &gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
.v
;
937 emit_lsptr(as
, A64I_LDRx
, dest
, v
);
939 if (ir
->o
== IR_UREFC
) {
940 asm_guardcnb(as
, A64I_CBZ
, RID_TMP
);
941 emit_opk(as
, A64I_ADDx
, dest
, dest
,
942 (int32_t)offsetof(GCupval
, tv
), RSET_GPR
);
943 emit_lso(as
, A64I_LDRB
, RID_TMP
, dest
,
944 (int32_t)offsetof(GCupval
, closed
));
946 emit_lso(as
, A64I_LDRx
, dest
, dest
, (int32_t)offsetof(GCupval
, v
));
948 emit_lso(as
, A64I_LDRx
, dest
, ra_alloc1(as
, ir
->op1
, RSET_GPR
),
949 (int32_t)offsetof(GCfuncL
, uvptr
) + 8*(int32_t)(ir
->op2
>> 8));
953 static void asm_fref(ASMState
*as
, IRIns
*ir
)
955 UNUSED(as
); UNUSED(ir
);
956 lj_assertA(!ra_used(ir
), "unfused FREF");
959 static void asm_strref(ASMState
*as
, IRIns
*ir
)
961 RegSet allow
= RSET_GPR
;
962 Reg dest
= ra_dest(as
, ir
, allow
);
963 Reg base
= ra_alloc1(as
, ir
->op1
, allow
);
964 IRIns
*irr
= IR(ir
->op2
);
965 int32_t ofs
= sizeof(GCstr
);
967 rset_clear(allow
, base
);
968 if (irref_isk(ir
->op2
) && (m
= emit_isk12(ofs
+ irr
->i
))) {
969 emit_dn(as
, A64I_ADDx
^m
, dest
, base
);
971 emit_dn(as
, (A64I_ADDx
^A64I_K12
) | A64F_U12(ofs
), dest
, dest
);
972 emit_dnm(as
, A64I_ADDx
, dest
, base
, ra_alloc1(as
, ir
->op2
, allow
));
976 /* -- Loads and stores ---------------------------------------------------- */
978 static A64Ins
asm_fxloadins(IRIns
*ir
)
980 switch (irt_type(ir
->t
)) {
981 case IRT_I8
: return A64I_LDRB
^ A64I_LS_S
;
982 case IRT_U8
: return A64I_LDRB
;
983 case IRT_I16
: return A64I_LDRH
^ A64I_LS_S
;
984 case IRT_U16
: return A64I_LDRH
;
985 case IRT_NUM
: return A64I_LDRd
;
986 case IRT_FLOAT
: return A64I_LDRs
;
987 default: return irt_is64(ir
->t
) ? A64I_LDRx
: A64I_LDRw
;
991 static A64Ins
asm_fxstoreins(IRIns
*ir
)
993 switch (irt_type(ir
->t
)) {
994 case IRT_I8
: case IRT_U8
: return A64I_STRB
;
995 case IRT_I16
: case IRT_U16
: return A64I_STRH
;
996 case IRT_NUM
: return A64I_STRd
;
997 case IRT_FLOAT
: return A64I_STRs
;
998 default: return irt_is64(ir
->t
) ? A64I_STRx
: A64I_STRw
;
1002 static void asm_fload(ASMState
*as
, IRIns
*ir
)
1004 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1006 A64Ins ai
= asm_fxloadins(ir
);
1008 if (ir
->op1
== REF_NIL
) { /* FLOAD from GG_State with offset. */
1010 ofs
= (ir
->op2
<< 2) - GG_OFS(g
);
1012 idx
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1013 if (ir
->op2
== IRFL_TAB_ARRAY
) {
1014 ofs
= asm_fuseabase(as
, ir
->op1
);
1015 if (ofs
) { /* Turn the t->array load into an add for colocated arrays. */
1016 emit_dn(as
, (A64I_ADDx
^A64I_K12
) | A64F_U12(ofs
), dest
, idx
);
1020 ofs
= field_ofs
[ir
->op2
];
1022 emit_lso(as
, ai
, (dest
& 31), idx
, ofs
);
1025 static void asm_fstore(ASMState
*as
, IRIns
*ir
)
1027 if (ir
->r
!= RID_SINK
) {
1028 Reg src
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
1029 IRIns
*irf
= IR(ir
->op1
);
1030 Reg idx
= ra_alloc1(as
, irf
->op1
, rset_exclude(RSET_GPR
, src
));
1031 int32_t ofs
= field_ofs
[irf
->op2
];
1032 emit_lso(as
, asm_fxstoreins(ir
), (src
& 31), idx
, ofs
);
1036 static void asm_xload(ASMState
*as
, IRIns
*ir
)
1038 Reg dest
= ra_dest(as
, ir
, irt_isfp(ir
->t
) ? RSET_FPR
: RSET_GPR
);
1039 lj_assertA(!(ir
->op2
& IRXLOAD_UNALIGNED
), "unaligned XLOAD");
1040 asm_fusexref(as
, asm_fxloadins(ir
), dest
, ir
->op1
, RSET_GPR
);
1043 static void asm_xstore(ASMState
*as
, IRIns
*ir
)
1045 if (ir
->r
!= RID_SINK
) {
1046 Reg src
= ra_alloc1(as
, ir
->op2
, irt_isfp(ir
->t
) ? RSET_FPR
: RSET_GPR
);
1047 asm_fusexref(as
, asm_fxstoreins(ir
), src
, ir
->op1
,
1048 rset_exclude(RSET_GPR
, src
));
1052 static void asm_ahuvload(ASMState
*as
, IRIns
*ir
)
1056 RegSet gpr
= RSET_GPR
, allow
= irt_isnum(ir
->t
) ? RSET_FPR
: RSET_GPR
;
1057 lj_assertA(irt_isnum(ir
->t
) || irt_ispri(ir
->t
) || irt_isaddr(ir
->t
) ||
1059 "bad load type %d", irt_type(ir
->t
));
1061 Reg dest
= ra_dest(as
, ir
, allow
);
1062 tmp
= irt_isnum(ir
->t
) ? ra_scratch(as
, rset_clear(gpr
, dest
)) : dest
;
1063 if (irt_isaddr(ir
->t
)) {
1064 emit_dn(as
, A64I_ANDx
^emit_isk13(LJ_GCVMASK
, 1), dest
, dest
);
1065 } else if (irt_isnum(ir
->t
)) {
1066 emit_dn(as
, A64I_FMOV_D_R
, (dest
& 31), tmp
);
1067 } else if (irt_isint(ir
->t
)) {
1068 emit_dm(as
, A64I_MOVw
, dest
, dest
);
1071 tmp
= ra_scratch(as
, gpr
);
1073 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, rset_clear(gpr
, tmp
), A64I_LDRx
);
1074 rset_clear(gpr
, idx
);
1075 if (ofs
& FUSE_REG
) rset_clear(gpr
, ofs
& 31);
1076 if (ir
->o
== IR_VLOAD
) ofs
+= 8 * ir
->op2
;
1077 /* Always do the type check, even if the load result is unused. */
1078 asm_guardcc(as
, irt_isnum(ir
->t
) ? CC_LS
: CC_NE
);
1079 if (irt_type(ir
->t
) >= IRT_NUM
) {
1080 lj_assertA(irt_isinteger(ir
->t
) || irt_isnum(ir
->t
),
1081 "bad load type %d", irt_type(ir
->t
));
1082 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1083 ra_allock(as
, LJ_TISNUM
<< 15, gpr
), tmp
);
1084 } else if (irt_isaddr(ir
->t
)) {
1085 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(-irt_toitype(ir
->t
)), RID_TMP
);
1086 emit_dn(as
, A64I_ASRx
| A64F_IMMR(47), RID_TMP
, tmp
);
1087 } else if (irt_isnil(ir
->t
)) {
1088 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(1), tmp
);
1090 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1091 ra_allock(as
, (irt_toitype(ir
->t
) << 15) | 0x7fff, gpr
), tmp
);
1094 emit_dnm(as
, (A64I_LDRx
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, tmp
, idx
, (ofs
& 31));
1096 emit_lso(as
, A64I_LDRx
, tmp
, idx
, ofs
);
1099 static void asm_ahustore(ASMState
*as
, IRIns
*ir
)
1101 if (ir
->r
!= RID_SINK
) {
1102 RegSet allow
= RSET_GPR
;
1103 Reg idx
, src
= RID_NONE
, tmp
= RID_TMP
, type
= RID_NONE
;
1105 if (irt_isnum(ir
->t
)) {
1106 src
= ra_alloc1(as
, ir
->op2
, RSET_FPR
);
1107 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, allow
, A64I_STRd
);
1109 emit_dnm(as
, (A64I_STRd
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, (src
& 31), idx
, (ofs
&31));
1111 emit_lso(as
, A64I_STRd
, (src
& 31), idx
, ofs
);
1113 if (!irt_ispri(ir
->t
)) {
1114 src
= ra_alloc1(as
, ir
->op2
, allow
);
1115 rset_clear(allow
, src
);
1116 if (irt_isinteger(ir
->t
))
1117 type
= ra_allock(as
, (uint64_t)(int32_t)LJ_TISNUM
<< 47, allow
);
1119 type
= ra_allock(as
, irt_toitype(ir
->t
), allow
);
1121 tmp
= type
= ra_allock(as
, ~((int64_t)~irt_toitype(ir
->t
)<<47), allow
);
1123 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, rset_exclude(allow
, type
),
1126 emit_dnm(as
, (A64I_STRx
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, tmp
, idx
, (ofs
& 31));
1128 emit_lso(as
, A64I_STRx
, tmp
, idx
, ofs
);
1129 if (ra_hasreg(src
)) {
1130 if (irt_isinteger(ir
->t
)) {
1131 emit_dnm(as
, A64I_ADDx
| A64F_EX(A64EX_UXTW
), tmp
, type
, src
);
1133 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 47), tmp
, src
, type
);
1140 static void asm_sload(ASMState
*as
, IRIns
*ir
)
1142 int32_t ofs
= 8*((int32_t)ir
->op1
-2);
1144 Reg dest
= RID_NONE
, base
;
1145 RegSet allow
= RSET_GPR
;
1146 lj_assertA(!(ir
->op2
& IRSLOAD_PARENT
),
1147 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1148 lj_assertA(irt_isguard(t
) || !(ir
->op2
& IRSLOAD_TYPECHECK
),
1149 "inconsistent SLOAD variant");
1150 if ((ir
->op2
& IRSLOAD_CONVERT
) && irt_isguard(t
) && irt_isint(t
)) {
1151 dest
= ra_scratch(as
, RSET_FPR
);
1152 asm_tointg(as
, ir
, dest
);
1153 t
.irt
= IRT_NUM
; /* Continue with a regular number type check. */
1154 } else if (ra_used(ir
)) {
1156 if ((ir
->op2
& IRSLOAD_CONVERT
))
1157 tmp
= ra_scratch(as
, irt_isint(t
) ? RSET_FPR
: RSET_GPR
);
1158 lj_assertA((irt_isnum(t
)) || irt_isint(t
) || irt_isaddr(t
),
1159 "bad SLOAD type %d", irt_type(t
));
1160 dest
= ra_dest(as
, ir
, irt_isnum(t
) ? RSET_FPR
: allow
);
1161 base
= ra_alloc1(as
, REF_BASE
, rset_clear(allow
, dest
));
1162 if (irt_isaddr(t
)) {
1163 emit_dn(as
, A64I_ANDx
^emit_isk13(LJ_GCVMASK
, 1), dest
, dest
);
1164 } else if ((ir
->op2
& IRSLOAD_CONVERT
)) {
1166 emit_dn(as
, A64I_FCVT_S32_F64
, dest
, (tmp
& 31));
1167 /* If value is already loaded for type check, move it to FPR. */
1168 if ((ir
->op2
& IRSLOAD_TYPECHECK
))
1169 emit_dn(as
, A64I_FMOV_D_R
, (tmp
& 31), dest
);
1172 t
.irt
= IRT_NUM
; /* Check for original type. */
1174 emit_dn(as
, A64I_FCVT_F64_S32
, (dest
& 31), tmp
);
1176 t
.irt
= IRT_INT
; /* Check for original type. */
1178 } else if (irt_isint(t
) && (ir
->op2
& IRSLOAD_TYPECHECK
)) {
1179 emit_dm(as
, A64I_MOVw
, dest
, dest
);
1183 base
= ra_alloc1(as
, REF_BASE
, allow
);
1185 rset_clear(allow
, base
);
1186 if ((ir
->op2
& IRSLOAD_TYPECHECK
)) {
1188 if (ra_hasreg(dest
) && rset_test(RSET_GPR
, dest
)) {
1191 tmp
= ra_scratch(as
, allow
);
1192 rset_clear(allow
, tmp
);
1194 if (ra_hasreg(dest
) && tmp
!= dest
)
1195 emit_dn(as
, A64I_FMOV_D_R
, (dest
& 31), tmp
);
1196 /* Need type check, even if the load result is unused. */
1197 asm_guardcc(as
, irt_isnum(t
) ? CC_LS
: CC_NE
);
1198 if (irt_type(t
) >= IRT_NUM
) {
1199 lj_assertA(irt_isinteger(t
) || irt_isnum(t
),
1200 "bad SLOAD type %d", irt_type(t
));
1201 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1202 ra_allock(as
, (ir
->op2
& IRSLOAD_KEYINDEX
) ? LJ_KEYINDEX
: (LJ_TISNUM
<< 15), allow
), tmp
);
1203 } else if (irt_isnil(t
)) {
1204 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(1), tmp
);
1205 } else if (irt_ispri(t
)) {
1206 emit_nm(as
, A64I_CMPx
,
1207 ra_allock(as
, ~((int64_t)~irt_toitype(t
) << 47) , allow
), tmp
);
1209 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(-irt_toitype(t
)), RID_TMP
);
1210 emit_dn(as
, A64I_ASRx
| A64F_IMMR(47), RID_TMP
, tmp
);
1212 emit_lso(as
, A64I_LDRx
, tmp
, base
, ofs
);
1215 if (ra_hasreg(dest
)) {
1216 emit_lso(as
, irt_isnum(t
) ? A64I_LDRd
:
1217 (irt_isint(t
) ? A64I_LDRw
: A64I_LDRx
), (dest
& 31), base
,
1218 ofs
^ ((LJ_BE
&& irt_isint(t
) ? 4 : 0)));
1222 /* -- Allocations --------------------------------------------------------- */
1225 static void asm_cnew(ASMState
*as
, IRIns
*ir
)
1227 CTState
*cts
= ctype_ctsG(J2G(as
->J
));
1228 CTypeID id
= (CTypeID
)IR(ir
->op1
)->i
;
1230 CTInfo info
= lj_ctype_info(cts
, id
, &sz
);
1231 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_mem_newgco
];
1233 RegSet allow
= (RSET_GPR
& ~RSET_SCRATCH
);
1234 lj_assertA(sz
!= CTSIZE_INVALID
|| (ir
->o
== IR_CNEW
&& ir
->op2
!= REF_NIL
),
1235 "bad CNEW/CNEWI operands");
1238 asm_setupresult(as
, ir
, ci
); /* GCcdata * */
1239 /* Initialize immutable cdata object. */
1240 if (ir
->o
== IR_CNEWI
) {
1241 int32_t ofs
= sizeof(GCcdata
);
1242 Reg r
= ra_alloc1(as
, ir
->op2
, allow
);
1243 lj_assertA(sz
== 4 || sz
== 8, "bad CNEWI size %d", sz
);
1244 emit_lso(as
, sz
== 8 ? A64I_STRx
: A64I_STRw
, r
, RID_RET
, ofs
);
1245 } else if (ir
->op2
!= REF_NIL
) { /* Create VLA/VLS/aligned cdata. */
1246 ci
= &lj_ir_callinfo
[IRCALL_lj_cdata_newv
];
1247 args
[0] = ASMREF_L
; /* lua_State *L */
1248 args
[1] = ir
->op1
; /* CTypeID id */
1249 args
[2] = ir
->op2
; /* CTSize sz */
1250 args
[3] = ASMREF_TMP1
; /* CTSize align */
1251 asm_gencall(as
, ci
, args
);
1252 emit_loadi(as
, ra_releasetmp(as
, ASMREF_TMP1
), (int32_t)ctype_align(info
));
1256 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1258 Reg r
= (id
< 65536) ? RID_X1
: ra_allock(as
, id
, allow
);
1259 emit_lso(as
, A64I_STRB
, RID_TMP
, RID_RET
, offsetof(GCcdata
, gct
));
1260 emit_lso(as
, A64I_STRH
, r
, RID_RET
, offsetof(GCcdata
, ctypeid
));
1261 emit_d(as
, A64I_MOVZw
| A64F_U16(~LJ_TCDATA
), RID_TMP
);
1262 if (id
< 65536) emit_d(as
, A64I_MOVZw
| A64F_U16(id
), RID_X1
);
1264 args
[0] = ASMREF_L
; /* lua_State *L */
1265 args
[1] = ASMREF_TMP1
; /* MSize size */
1266 asm_gencall(as
, ci
, args
);
1267 ra_allockreg(as
, (int32_t)(sz
+sizeof(GCcdata
)),
1268 ra_releasetmp(as
, ASMREF_TMP1
));
1272 /* -- Write barriers ------------------------------------------------------ */
1274 static void asm_tbar(ASMState
*as
, IRIns
*ir
)
1276 Reg tab
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1277 Reg link
= ra_scratch(as
, rset_exclude(RSET_GPR
, tab
));
1279 MCLabel l_end
= emit_label(as
);
1280 emit_lso(as
, A64I_STRB
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1281 /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
1282 emit_lso(as
, A64I_STRx
, link
, tab
, (int32_t)offsetof(GCtab
, gclist
));
1283 emit_setgl(as
, tab
, gc
.grayagain
);
1284 emit_dn(as
, A64I_ANDw
^emit_isk13(~LJ_GC_BLACK
, 0), mark
, mark
);
1285 emit_getgl(as
, link
, gc
.grayagain
);
1286 emit_cond_branch(as
, CC_EQ
, l_end
);
1287 emit_n(as
, A64I_TSTw
^emit_isk13(LJ_GC_BLACK
, 0), mark
);
1288 emit_lso(as
, A64I_LDRB
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1291 static void asm_obar(ASMState
*as
, IRIns
*ir
)
1293 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_barrieruv
];
1297 /* No need for other object barriers (yet). */
1298 lj_assertA(IR(ir
->op1
)->o
== IR_UREFC
, "bad OBAR type");
1299 ra_evictset(as
, RSET_SCRATCH
);
1300 l_end
= emit_label(as
);
1301 args
[0] = ASMREF_TMP1
; /* global_State *g */
1302 args
[1] = ir
->op1
; /* TValue *tv */
1303 asm_gencall(as
, ci
, args
);
1304 emit_dm(as
, A64I_MOVx
, ra_releasetmp(as
, ASMREF_TMP1
), RID_GL
);
1305 obj
= IR(ir
->op1
)->r
;
1306 tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, obj
));
1307 emit_tnb(as
, A64I_TBZ
, tmp
, lj_ffs(LJ_GC_BLACK
), l_end
);
1308 emit_cond_branch(as
, CC_EQ
, l_end
);
1309 emit_n(as
, A64I_TSTw
^emit_isk13(LJ_GC_WHITES
, 0), RID_TMP
);
1310 val
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, obj
));
1311 emit_lso(as
, A64I_LDRB
, tmp
, obj
,
1312 (int32_t)offsetof(GCupval
, marked
)-(int32_t)offsetof(GCupval
, tv
));
1313 emit_lso(as
, A64I_LDRB
, RID_TMP
, val
, (int32_t)offsetof(GChead
, marked
));
1316 /* -- Arithmetic and logic operations ------------------------------------- */
1318 static void asm_fparith(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1320 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1321 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1322 right
= (left
>> 8); left
&= 255;
1323 emit_dnm(as
, ai
, (dest
& 31), (left
& 31), (right
& 31));
1326 static void asm_fpunary(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1328 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1329 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_FPR
);
1330 emit_dn(as
, ai
, (dest
& 31), (left
& 31));
1333 static void asm_fpmath(ASMState
*as
, IRIns
*ir
)
1335 IRFPMathOp fpm
= (IRFPMathOp
)ir
->op2
;
1336 if (fpm
== IRFPM_SQRT
) {
1337 asm_fpunary(as
, ir
, A64I_FSQRTd
);
1338 } else if (fpm
<= IRFPM_TRUNC
) {
1339 asm_fpunary(as
, ir
, fpm
== IRFPM_FLOOR
? A64I_FRINTMd
:
1340 fpm
== IRFPM_CEIL
? A64I_FRINTPd
: A64I_FRINTZd
);
1342 asm_callid(as
, ir
, IRCALL_lj_vm_floor
+ fpm
);
1346 static int asm_swapops(ASMState
*as
, IRRef lref
, IRRef rref
)
1349 if (irref_isk(rref
))
1350 return 0; /* Don't swap constants to the left. */
1351 if (irref_isk(lref
))
1352 return 1; /* But swap constants to the right. */
1354 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BROR
) ||
1355 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
) ||
1356 (ir
->o
== IR_CONV
&& ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)))
1357 return 0; /* Don't swap fusable operands to the left. */
1359 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BROR
) ||
1360 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
) ||
1361 (ir
->o
== IR_CONV
&& ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)))
1362 return 1; /* But swap fusable operands to the right. */
1363 return 0; /* Otherwise don't swap. */
1366 static void asm_intop(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1368 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1369 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1371 if ((ai
& ~A64I_S
) != A64I_SUBw
&& asm_swapops(as
, lref
, rref
)) {
1372 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1374 left
= ra_hintalloc(as
, lref
, dest
, RSET_GPR
);
1375 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1376 m
= asm_fuseopm(as
, ai
, rref
, rset_exclude(RSET_GPR
, left
));
1377 if (irt_isguard(ir
->t
)) { /* For IR_ADDOV etc. */
1378 asm_guardcc(as
, CC_VS
);
1381 emit_dn(as
, ai
^m
, dest
, left
);
1384 static void asm_intop_s(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1386 if (as
->flagmcp
== as
->mcp
) { /* Drop cmp r, #0. */
1391 asm_intop(as
, ir
, ai
);
1394 static void asm_intneg(ASMState
*as
, IRIns
*ir
)
1396 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1397 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1398 emit_dm(as
, irt_is64(ir
->t
) ? A64I_NEGx
: A64I_NEGw
, dest
, left
);
1401 /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1402 static void asm_intmul(ASMState
*as
, IRIns
*ir
)
1404 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1405 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1406 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1407 if (irt_isguard(ir
->t
)) { /* IR_MULOV */
1408 asm_guardcc(as
, CC_NE
);
1409 emit_dm(as
, A64I_MOVw
, dest
, dest
); /* Zero-extend. */
1410 emit_nm(as
, A64I_CMPx
| A64F_EX(A64EX_SXTW
), dest
, dest
);
1411 emit_dnm(as
, A64I_SMULL
, dest
, right
, left
);
1413 emit_dnm(as
, irt_is64(ir
->t
) ? A64I_MULx
: A64I_MULw
, dest
, left
, right
);
1417 static void asm_add(ASMState
*as
, IRIns
*ir
)
1419 if (irt_isnum(ir
->t
)) {
1420 if (!asm_fusemadd(as
, ir
, A64I_FMADDd
, A64I_FMADDd
))
1421 asm_fparith(as
, ir
, A64I_FADDd
);
1424 asm_intop_s(as
, ir
, A64I_ADDw
);
1427 static void asm_sub(ASMState
*as
, IRIns
*ir
)
1429 if (irt_isnum(ir
->t
)) {
1430 if (!asm_fusemadd(as
, ir
, A64I_FNMSUBd
, A64I_FMSUBd
))
1431 asm_fparith(as
, ir
, A64I_FSUBd
);
1434 asm_intop_s(as
, ir
, A64I_SUBw
);
1437 static void asm_mul(ASMState
*as
, IRIns
*ir
)
1439 if (irt_isnum(ir
->t
)) {
1440 asm_fparith(as
, ir
, A64I_FMULd
);
1446 #define asm_addov(as, ir) asm_add(as, ir)
1447 #define asm_subov(as, ir) asm_sub(as, ir)
1448 #define asm_mulov(as, ir) asm_mul(as, ir)
1450 #define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
1451 #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1453 static void asm_neg(ASMState
*as
, IRIns
*ir
)
1455 if (irt_isnum(ir
->t
)) {
1456 asm_fpunary(as
, ir
, A64I_FNEGd
);
1462 static void asm_band(ASMState
*as
, IRIns
*ir
)
1464 A64Ins ai
= A64I_ANDw
;
1465 if (asm_fuseandshift(as
, ir
))
1467 if (as
->flagmcp
== as
->mcp
) {
1468 /* Try to drop cmp r, #0. */
1473 asm_intop(as
, ir
, ai
);
1476 static void asm_borbxor(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1478 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1479 IRIns
*irl
= IR(lref
), *irr
= IR(rref
);
1480 if ((canfuse(as
, irl
) && irl
->o
== IR_BNOT
&& !irref_isk(rref
)) ||
1481 (canfuse(as
, irr
) && irr
->o
== IR_BNOT
&& !irref_isk(lref
))) {
1482 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1484 if (irl
->o
== IR_BNOT
) {
1485 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1487 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1489 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1490 m
= asm_fuseopm(as
, ai
, IR(rref
)->op1
, rset_exclude(RSET_GPR
, left
));
1491 emit_dn(as
, ai
^m
, dest
, left
);
1493 asm_intop(as
, ir
, ai
);
1497 static void asm_bor(ASMState
*as
, IRIns
*ir
)
1499 if (asm_fuseorshift(as
, ir
))
1501 asm_borbxor(as
, ir
, A64I_ORRw
);
1504 #define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1506 static void asm_bnot(ASMState
*as
, IRIns
*ir
)
1508 A64Ins ai
= A64I_MVNw
;
1509 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1510 uint32_t m
= asm_fuseopm(as
, ai
, ir
->op1
, RSET_GPR
);
1511 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1512 emit_d(as
, ai
^m
, dest
);
1515 static void asm_bswap(ASMState
*as
, IRIns
*ir
)
1517 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1518 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1519 emit_dn(as
, irt_is64(ir
->t
) ? A64I_REVx
: A64I_REVw
, dest
, left
);
1522 static void asm_bitshift(ASMState
*as
, IRIns
*ir
, A64Ins ai
, A64Shift sh
)
1524 int32_t shmask
= irt_is64(ir
->t
) ? 63 : 31;
1525 if (irref_isk(ir
->op2
)) { /* Constant shifts. */
1526 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1527 int32_t shift
= (IR(ir
->op2
)->i
& shmask
);
1528 IRIns
*irl
= IR(ir
->op1
);
1529 if (shmask
== 63) ai
+= A64I_UBFMx
- A64I_UBFMw
;
1531 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1532 if ((sh
== A64SH_LSR
|| sh
== A64SH_ASR
) && canfuse(as
, irl
)) {
1533 if (irl
->o
== IR_BSHL
&& irref_isk(irl
->op2
)) {
1534 int32_t shift2
= (IR(irl
->op2
)->i
& shmask
);
1535 shift
= ((shift
- shift2
) & shmask
);
1541 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1544 emit_dn(as
, ai
| A64F_IMMS(shmask
-shift
) |
1545 A64F_IMMR((shmask
-shift
+1)&shmask
), dest
, left
);
1547 case A64SH_LSR
: case A64SH_ASR
:
1548 emit_dn(as
, ai
| A64F_IMMS(shmask
) | A64F_IMMR(shift
), dest
, left
);
1551 emit_dnm(as
, ai
| A64F_IMMS(shift
), dest
, left
, left
);
1554 } else { /* Variable-length shifts. */
1555 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1556 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1557 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1558 emit_dnm(as
, (shmask
== 63 ? A64I_SHRx
: A64I_SHRw
) | A64F_BSH(sh
), dest
, left
, right
);
1562 #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1563 #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1564 #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1565 #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1566 #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1568 static void asm_intmin_max(ASMState
*as
, IRIns
*ir
, A64CC cc
)
1570 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1571 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1572 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1573 emit_dnm(as
, A64I_CSELw
|A64F_CC(cc
), dest
, left
, right
);
1574 emit_nm(as
, A64I_CMPw
, left
, right
);
1577 static void asm_fpmin_max(ASMState
*as
, IRIns
*ir
, A64CC fcc
)
1579 Reg dest
= (ra_dest(as
, ir
, RSET_FPR
) & 31);
1580 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1581 right
= ((left
>> 8) & 31); left
&= 31;
1582 emit_dnm(as
, A64I_FCSELd
| A64F_CC(fcc
), dest
, right
, left
);
1583 emit_nm(as
, A64I_FCMPd
, left
, right
);
1586 static void asm_min_max(ASMState
*as
, IRIns
*ir
, A64CC cc
, A64CC fcc
)
1588 if (irt_isnum(ir
->t
))
1589 asm_fpmin_max(as
, ir
, fcc
);
1591 asm_intmin_max(as
, ir
, cc
);
1594 #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
1595 #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
1597 /* -- Comparisons --------------------------------------------------------- */
1599 /* Map of comparisons to flags. ORDER IR. */
1600 static const uint8_t asm_compmap
[IR_ABC
+1] = {
1601 /* op FP swp int cc FP cc */
1602 /* LT */ CC_GE
+ (CC_HS
<< 4),
1603 /* GE x */ CC_LT
+ (CC_HI
<< 4),
1604 /* LE */ CC_GT
+ (CC_HI
<< 4),
1605 /* GT x */ CC_LE
+ (CC_HS
<< 4),
1606 /* ULT x */ CC_HS
+ (CC_LS
<< 4),
1607 /* UGE */ CC_LO
+ (CC_LO
<< 4),
1608 /* ULE x */ CC_HI
+ (CC_LO
<< 4),
1609 /* UGT */ CC_LS
+ (CC_LS
<< 4),
1610 /* EQ */ CC_NE
+ (CC_NE
<< 4),
1611 /* NE */ CC_EQ
+ (CC_EQ
<< 4),
1612 /* ABC */ CC_LS
+ (CC_LS
<< 4) /* Same as UGT. */
1615 /* FP comparisons. */
1616 static void asm_fpcomp(ASMState
*as
, IRIns
*ir
)
1620 int swp
= ((ir
->o
^ (ir
->o
>> 2)) & ~(ir
->o
>> 3) & 1);
1621 if (!swp
&& irref_isk(ir
->op2
) && ir_knum(IR(ir
->op2
))->u64
== 0) {
1622 left
= (ra_alloc1(as
, ir
->op1
, RSET_FPR
) & 31);
1626 left
= ra_alloc2(as
, ir
, RSET_FPR
);
1628 right
= (left
& 31); left
= ((left
>> 8) & 31);
1630 right
= ((left
>> 8) & 31); left
&= 31;
1634 asm_guardcc(as
, (asm_compmap
[ir
->o
] >> 4));
1635 emit_nm(as
, ai
, left
, right
);
1638 /* Integer comparisons. */
1639 static void asm_intcomp(ASMState
*as
, IRIns
*ir
)
1641 A64CC oldcc
, cc
= (asm_compmap
[ir
->o
] & 15);
1642 A64Ins ai
= irt_is64(ir
->t
) ? A64I_CMPx
: A64I_CMPw
;
1643 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1647 lj_assertA(irt_is64(ir
->t
) || irt_isint(ir
->t
) ||
1648 irt_isu32(ir
->t
) || irt_isaddr(ir
->t
) || irt_isu8(ir
->t
),
1649 "bad comparison data type %d", irt_type(ir
->t
));
1650 if (asm_swapops(as
, lref
, rref
)) {
1651 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1652 if (cc
>= CC_GE
) cc
^= 7; /* LT <-> GT, LE <-> GE */
1653 else if (cc
> CC_NE
) cc
^= 11; /* LO <-> HI, LS <-> HS */
1656 if (irref_isk(rref
) && get_k64val(as
, rref
) == 0) {
1657 IRIns
*irl
= IR(lref
);
1658 if (cc
== CC_GE
) cc
= CC_PL
;
1659 else if (cc
== CC_LT
) cc
= CC_MI
;
1660 else if (cc
> CC_NE
) goto nocombine
; /* Other conds don't work with tst. */
1661 cmpprev0
= (irl
+1 == ir
);
1662 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1663 if (cmpprev0
&& irl
->o
== IR_BAND
&& !ra_used(irl
)) {
1664 IRRef blref
= irl
->op1
, brref
= irl
->op2
;
1667 if (asm_swapops(as
, blref
, brref
)) {
1668 Reg tmp
= blref
; blref
= brref
; brref
= tmp
;
1670 bleft
= ra_alloc1(as
, blref
, RSET_GPR
);
1671 if (irref_isk(brref
)) {
1672 uint64_t k
= get_k64val(as
, brref
);
1673 if (k
&& !(k
& (k
-1)) && (cc
== CC_EQ
|| cc
== CC_NE
) &&
1674 asm_guardtnb(as
, cc
== CC_EQ
? A64I_TBZ
: A64I_TBNZ
, bleft
,
1677 m2
= emit_isk13(k
, irt_is64(irl
->t
));
1679 ai
= (irt_is64(irl
->t
) ? A64I_TSTx
: A64I_TSTw
);
1681 m2
= asm_fuseopm(as
, ai
, brref
, rset_exclude(RSET_GPR
, bleft
));
1682 asm_guardcc(as
, cc
);
1683 emit_n(as
, ai
^m2
, bleft
);
1686 if (cc
== CC_EQ
|| cc
== CC_NE
) {
1687 /* Combine cmp-bcc into cbz/cbnz. */
1688 ai
= cc
== CC_EQ
? A64I_CBZ
: A64I_CBNZ
;
1689 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1690 asm_guardcnb(as
, ai
, ra_alloc1(as
, lref
, RSET_GPR
));
1695 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1696 m
= asm_fuseopm(as
, ai
, rref
, rset_exclude(RSET_GPR
, left
));
1697 asm_guardcc(as
, cc
);
1698 emit_n(as
, ai
^m
, left
);
1699 /* Signed comparison with zero and referencing previous ins? */
1700 if (cmpprev0
&& (oldcc
<= CC_NE
|| oldcc
>= CC_GE
))
1701 as
->flagmcp
= as
->mcp
; /* Allow elimination of the compare. */
1704 static void asm_comp(ASMState
*as
, IRIns
*ir
)
1706 if (irt_isnum(ir
->t
))
1709 asm_intcomp(as
, ir
);
1712 #define asm_equal(as, ir) asm_comp(as, ir)
1714 /* -- Split register ops -------------------------------------------------- */
1716 /* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
1717 static void asm_hiop(ASMState
*as
, IRIns
*ir
)
1719 /* HIOP is marked as a store because it needs its own DCE logic. */
1720 int uselo
= ra_used(ir
-1), usehi
= ra_used(ir
); /* Loword/hiword used? */
1721 if (LJ_UNLIKELY(!(as
->flags
& JIT_F_OPT_DCE
))) uselo
= usehi
= 1;
1722 if (!usehi
) return; /* Skip unused hiword op for all remaining ops. */
1723 switch ((ir
-1)->o
) {
1729 ra_allocref(as
, ir
->op1
, RID2RSET(RID_RETLO
)); /* Mark lo op as used. */
1731 default: lj_assertA(0, "bad HIOP for op %d", (ir
-1)->o
); break;
1735 /* -- Profiling ----------------------------------------------------------- */
1737 static void asm_prof(ASMState
*as
, IRIns
*ir
)
1739 uint32_t k
= emit_isk13(HOOK_PROFILE
, 0);
1740 lj_assertA(k
!= 0, "HOOK_PROFILE does not fit in K13");
1742 asm_guardcc(as
, CC_NE
);
1743 emit_n(as
, A64I_TSTw
^k
, RID_TMP
);
1744 emit_lsptr(as
, A64I_LDRB
, RID_TMP
, (void *)&J2G(as
->J
)->hookmask
);
1747 /* -- Stack handling ------------------------------------------------------ */
1749 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1750 static void asm_stack_check(ASMState
*as
, BCReg topslot
,
1751 IRIns
*irp
, RegSet allow
, ExitNo exitno
)
1754 Reg pbase
= RID_BASE
;
1757 if (!ra_hasreg(pbase
))
1758 pbase
= allow
? (0x40 | rset_pickbot(allow
)) : (0xC0 | RID_RET
);
1760 emit_cond_branch(as
, CC_LS
, asm_exitstub_addr(as
, exitno
));
1761 if (pbase
& 0x80) /* Restore temp. register. */
1762 emit_lso(as
, A64I_LDRx
, (pbase
& 31), RID_SP
, 0);
1763 k
= emit_isk12((8*topslot
));
1764 lj_assertA(k
, "slot offset %d does not fit in K12", 8*topslot
);
1765 emit_n(as
, A64I_CMPx
^k
, RID_TMP
);
1766 emit_dnm(as
, A64I_SUBx
, RID_TMP
, RID_TMP
, (pbase
& 31));
1767 emit_lso(as
, A64I_LDRx
, RID_TMP
, RID_TMP
,
1768 (int32_t)offsetof(lua_State
, maxstack
));
1770 emit_getgl(as
, (pbase
& 31), jit_base
);
1771 if (pbase
& 0x80) /* Save temp register. */
1772 emit_lso(as
, A64I_STRx
, (pbase
& 31), RID_SP
, 0);
1774 emit_getgl(as
, RID_TMP
, cur_L
);
1777 /* Restore Lua stack from on-trace state. */
1778 static void asm_stack_restore(ASMState
*as
, SnapShot
*snap
)
1780 SnapEntry
*map
= &as
->T
->snapmap
[snap
->mapofs
];
1781 #ifdef LUA_USE_ASSERT
1782 SnapEntry
*flinks
= &as
->T
->snapmap
[snap_nextofs(as
->T
, snap
)-1-LJ_FR2
];
1784 MSize n
, nent
= snap
->nent
;
1785 /* Store the value of all modified slots to the Lua stack. */
1786 for (n
= 0; n
< nent
; n
++) {
1787 SnapEntry sn
= map
[n
];
1788 BCReg s
= snap_slot(sn
);
1789 int32_t ofs
= 8*((int32_t)s
-1-LJ_FR2
);
1790 IRRef ref
= snap_ref(sn
);
1791 IRIns
*ir
= IR(ref
);
1792 if ((sn
& SNAP_NORESTORE
))
1794 if ((sn
& SNAP_KEYINDEX
)) {
1795 RegSet allow
= rset_exclude(RSET_GPR
, RID_BASE
);
1796 Reg r
= irref_isk(ref
) ? ra_allock(as
, ir
->i
, allow
) :
1797 ra_alloc1(as
, ref
, allow
);
1798 rset_clear(allow
, r
);
1799 emit_lso(as
, A64I_STRw
, r
, RID_BASE
, ofs
);
1800 emit_lso(as
, A64I_STRw
, ra_allock(as
, LJ_KEYINDEX
, allow
), RID_BASE
, ofs
+4);
1801 } else if (irt_isnum(ir
->t
)) {
1802 Reg src
= ra_alloc1(as
, ref
, RSET_FPR
);
1803 emit_lso(as
, A64I_STRd
, (src
& 31), RID_BASE
, ofs
);
1805 asm_tvstore64(as
, RID_BASE
, ofs
, ref
);
1809 lj_assertA(map
+ nent
== flinks
, "inconsistent frames in snapshot");
1812 /* -- GC handling --------------------------------------------------------- */
1814 /* Marker to prevent patching the GC check exit. */
1815 #define ARM64_NOPATCH_GC_CHECK \
1816 (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
1818 /* Check GC threshold and do one or more GC steps. */
1819 static void asm_gc_check(ASMState
*as
)
1821 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_step_jit
];
1825 ra_evictset(as
, RSET_SCRATCH
);
1826 l_end
= emit_label(as
);
1827 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1828 asm_guardcnb(as
, A64I_CBNZ
, RID_RET
); /* Assumes asm_snap_prep() is done. */
1829 *--as
->mcp
= ARM64_NOPATCH_GC_CHECK
;
1830 args
[0] = ASMREF_TMP1
; /* global_State *g */
1831 args
[1] = ASMREF_TMP2
; /* MSize steps */
1832 asm_gencall(as
, ci
, args
);
1833 emit_dm(as
, A64I_MOVx
, ra_releasetmp(as
, ASMREF_TMP1
), RID_GL
);
1834 tmp2
= ra_releasetmp(as
, ASMREF_TMP2
);
1835 emit_loadi(as
, tmp2
, as
->gcsteps
);
1836 /* Jump around GC step if GC total < GC threshold. */
1837 emit_cond_branch(as
, CC_LS
, l_end
);
1838 emit_nm(as
, A64I_CMPx
, RID_TMP
, tmp2
);
1839 emit_getgl(as
, tmp2
, gc
.threshold
);
1840 emit_getgl(as
, RID_TMP
, gc
.total
);
1845 /* -- Loop handling ------------------------------------------------------- */
1847 /* Fixup the loop branch. */
1848 static void asm_loop_fixup(ASMState
*as
)
1850 MCode
*p
= as
->mctop
;
1851 MCode
*target
= as
->mcp
;
1852 if (as
->loopinv
) { /* Inverted loop branch? */
1853 uint32_t mask
= (p
[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu
: 0x7ffffu
;
1854 ptrdiff_t delta
= target
- (p
- 2);
1855 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1856 p
[-2] |= ((uint32_t)delta
& mask
) << 5;
1858 ptrdiff_t delta
= target
- (p
- 1);
1859 p
[-1] = A64I_B
| A64F_S26(delta
);
1863 /* Fixup the tail of the loop. */
1864 static void asm_loop_tail_fixup(ASMState
*as
)
1866 UNUSED(as
); /* Nothing to do. */
1869 /* -- Head of trace ------------------------------------------------------- */
1871 /* Coalesce BASE register for a root trace. */
1872 static void asm_head_root_base(ASMState
*as
)
1874 IRIns
*ir
= IR(REF_BASE
);
1878 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
1879 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
1881 emit_movrr(as
, ir
, r
, RID_BASE
);
1885 /* Coalesce BASE register for a side trace. */
1886 static Reg
asm_head_side_base(ASMState
*as
, IRIns
*irp
)
1888 IRIns
*ir
= IR(REF_BASE
);
1892 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
1893 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
1895 return r
; /* Same BASE register already coalesced. */
1896 } else if (ra_hasreg(irp
->r
) && rset_test(as
->freeset
, irp
->r
)) {
1897 /* Move from coalesced parent reg. */
1898 emit_movrr(as
, ir
, r
, irp
->r
);
1901 emit_getgl(as
, r
, jit_base
); /* Otherwise reload BASE. */
1907 /* -- Tail of trace ------------------------------------------------------- */
1909 /* Fixup the tail code. */
1910 static void asm_tail_fixup(ASMState
*as
, TraceNo lnk
)
1912 MCode
*p
= as
->mctop
;
1914 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1915 int32_t spadj
= as
->T
->spadjust
+ (lnk
? 0 : sps_scale(SPS_FIXED
));
1917 *--p
= A64I_LE(A64I_NOP
);
1920 /* Patch stack adjustment. */
1921 uint32_t k
= emit_isk12(spadj
);
1922 lj_assertA(k
, "stack adjustment %d does not fit in K12", spadj
);
1923 p
[-2] = (A64I_ADDx
^k
) | A64F_D(RID_SP
) | A64F_N(RID_SP
);
1925 /* Patch exit branch. */
1926 target
= lnk
? traceref(as
->J
, lnk
)->mcode
: (MCode
*)lj_vm_exit_interp
;
1927 p
[-1] = A64I_B
| A64F_S26((target
-p
)+1);
1930 /* Prepare tail of code. */
1931 static void asm_tail_prep(ASMState
*as
)
1933 MCode
*p
= as
->mctop
- 1; /* Leave room for exit branch. */
1935 as
->invmcp
= as
->mcp
= p
;
1937 as
->mcp
= p
-1; /* Leave room for stack pointer adjustment. */
1940 *p
= 0; /* Prevent load/store merging. */
1943 /* -- Trace setup --------------------------------------------------------- */
1945 /* Ensure there are enough stack slots for call arguments. */
1946 static Reg
asm_setup_call_slots(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
1949 uint32_t i
, nargs
= CCI_XNARGS(ci
);
1950 if (nargs
> (REGARG_NUMGPR
< REGARG_NUMFPR
? REGARG_NUMGPR
: REGARG_NUMFPR
) ||
1951 (LJ_TARGET_OSX
&& (ci
->flags
& CCI_VARARG
))) {
1952 IRRef args
[CCI_NARGS_MAX
*2];
1953 int ngpr
= REGARG_NUMGPR
, nfpr
= REGARG_NUMFPR
;
1954 int spofs
= 0, spalign
= LJ_TARGET_OSX
? 0 : 7, nslots
;
1955 asm_collectargs(as
, ir
, ci
, args
);
1957 if ((ci
->flags
& CCI_VARARG
)) nfpr
= 0;
1959 for (i
= 0; i
< nargs
; i
++) {
1963 /* Marker for start of varaargs. */
1968 } else if (irt_isfp(IR(args
[i
])->t
)) {
1969 if (nfpr
> 0) { nfpr
--; continue; }
1971 if ((ci
->flags
& CCI_VARARG
) && ngpr
> 0) { ngpr
--; continue; }
1973 al
|= irt_isnum(IR(args
[i
])->t
) ? 7 : 3;
1976 if (ngpr
> 0) { ngpr
--; continue; }
1978 al
|= irt_size(IR(args
[i
])->t
) - 1;
1981 spofs
= (spofs
+ 2*al
+1) & ~al
; /* Align and bump stack pointer. */
1983 nslots
= (spofs
+ 3) >> 2;
1984 if (nslots
> as
->evenspill
) /* Leave room for args in stack slots. */
1985 as
->evenspill
= nslots
;
1988 return REGSP_HINT(RID_RET
);
1991 static void asm_setup_target(ASMState
*as
)
1993 /* May need extra exit for asm_stack_check on side traces. */
1994 asm_exitstub_setup(as
, as
->T
->nsnap
+ (as
->parent
? 1 : 0));
1998 /* ARM64 instructions are always little-endian. Swap for ARM64BE. */
1999 static void asm_mcode_fixup(MCode
*mcode
, MSize size
)
2001 MCode
*pe
= (MCode
*)((char *)mcode
+ size
);
2002 while (mcode
< pe
) {
2004 *mcode
++ = lj_bswap(ins
);
2007 #define LJ_TARGET_MCODE_FIXUP 1
2010 /* -- Trace patching ------------------------------------------------------ */
2012 /* Patch exit jumps of existing machine code to a new target. */
2013 void lj_asm_patchexit(jit_State
*J
, GCtrace
*T
, ExitNo exitno
, MCode
*target
)
2015 MCode
*p
= T
->mcode
;
2016 MCode
*pe
= (MCode
*)((char *)p
+ T
->szmcode
);
2017 MCode
*cstart
= NULL
;
2018 MCode
*mcarea
= lj_mcode_patch(J
, p
, 0);
2019 MCode
*px
= exitstub_trace_addr(T
, exitno
);
2021 /* Note: this assumes a trace exit is only ever patched once. */
2022 for (; p
< pe
; p
++) {
2023 /* Look for exitstub branch, replace with branch to target. */
2024 ptrdiff_t delta
= target
- p
;
2025 MCode ins
= A64I_LE(*p
);
2026 if ((ins
& 0xff000000u
) == 0x54000000u
&&
2027 ((ins
^ ((px
-p
)<<5)) & 0x00ffffe0u
) == 0) {
2028 /* Patch bcc, if within range. */
2029 if (A64F_S_OK(delta
, 19)) {
2030 *p
= A64I_LE((ins
& 0xff00001fu
) | A64F_S19(delta
));
2031 if (!cstart
) cstart
= p
;
2033 } else if ((ins
& 0xfc000000u
) == 0x14000000u
&&
2034 ((ins
^ (px
-p
)) & 0x03ffffffu
) == 0) {
2036 lj_assertJ(A64F_S_OK(delta
, 26), "branch target out of range");
2037 *p
= A64I_LE((ins
& 0xfc000000u
) | A64F_S26(delta
));
2038 if (!cstart
) cstart
= p
;
2039 } else if ((ins
& 0x7e000000u
) == 0x34000000u
&&
2040 ((ins
^ ((px
-p
)<<5)) & 0x00ffffe0u
) == 0) {
2041 /* Patch cbz/cbnz, if within range. */
2042 if (p
[-1] == ARM64_NOPATCH_GC_CHECK
) {
2044 } else if (A64F_S_OK(delta
, 19)) {
2045 *p
= A64I_LE((ins
& 0xff00001fu
) | A64F_S19(delta
));
2046 if (!cstart
) cstart
= p
;
2048 } else if ((ins
& 0x7e000000u
) == 0x36000000u
&&
2049 ((ins
^ ((px
-p
)<<5)) & 0x0007ffe0u
) == 0) {
2050 /* Patch tbz/tbnz, if within range. */
2051 if (A64F_S_OK(delta
, 14)) {
2052 *p
= A64I_LE((ins
& 0xfff8001fu
) | A64F_S14(delta
));
2053 if (!cstart
) cstart
= p
;
2057 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2059 ptrdiff_t delta
= target
- px
;
2060 lj_assertJ(A64F_S_OK(delta
, 26), "branch target out of range");
2061 *px
= A64I_B
| A64F_S26(delta
);
2062 if (!cstart
) cstart
= px
;
2064 if (cstart
) lj_mcode_sync(cstart
, px
+1);
2065 lj_mcode_patch(J
, mcarea
, 1);