2 ** ARM64 IR assembler (SSA IR -> machine code).
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
5 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6 ** Sponsored by Cisco Systems, Inc.
9 /* -- Register allocator extensions --------------------------------------- */
11 /* Allocate a register with a hint. */
12 static Reg
ra_hintalloc(ASMState
*as
, IRRef ref
, Reg hint
, RegSet allow
)
16 if (!ra_hashint(r
) && !iscrossref(as
, ref
))
17 ra_sethint(IR(ref
)->r
, hint
); /* Propagate register hint. */
18 r
= ra_allocref(as
, ref
, allow
);
24 /* Allocate two source registers for three-operand instructions. */
25 static Reg
ra_alloc2(ASMState
*as
, IRIns
*ir
, RegSet allow
)
27 IRIns
*irl
= IR(ir
->op1
), *irr
= IR(ir
->op2
);
28 Reg left
= irl
->r
, right
= irr
->r
;
29 if (ra_hasreg(left
)) {
32 right
= ra_allocref(as
, ir
->op2
, rset_exclude(allow
, left
));
35 } else if (ra_hasreg(right
)) {
37 left
= ra_allocref(as
, ir
->op1
, rset_exclude(allow
, right
));
38 } else if (ra_hashint(right
)) {
39 right
= ra_allocref(as
, ir
->op2
, allow
);
40 left
= ra_alloc1(as
, ir
->op1
, rset_exclude(allow
, right
));
42 left
= ra_allocref(as
, ir
->op1
, allow
);
43 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(allow
, left
));
45 return left
| (right
<< 8);
48 /* -- Guard handling ------------------------------------------------------ */
50 /* Setup all needed exit stubs. */
51 static void asm_exitstub_setup(ASMState
*as
, ExitNo nexits
)
54 MCode
*mxp
= as
->mctop
;
55 if (mxp
- (nexits
+ 3 + MCLIM_REDZONE
) < as
->mclim
)
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i
= nexits
-1; (int32_t)i
>= 0; i
--)
59 *--mxp
= A64I_LE(A64I_BL
| A64F_S26(-3-i
));
60 *--mxp
= A64I_LE(A64I_MOVZw
| A64F_U16(as
->T
->traceno
));
62 *mxp
= A64I_LE(A64I_BL
| A64F_S26(((MCode
*)(void *)lj_vm_exit_handler
-mxp
)));
63 *--mxp
= A64I_LE(A64I_STRx
| A64F_D(RID_LR
) | A64F_N(RID_SP
));
67 static MCode
*asm_exitstub_addr(ASMState
*as
, ExitNo exitno
)
69 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as
->mctop
+ exitno
+ 3;
73 /* Emit conditional branch to exit for guard. */
74 static void asm_guardcc(ASMState
*as
, A64CC cc
)
76 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
78 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
80 *p
= A64I_B
| A64F_S26(target
-p
);
81 emit_cond_branch(as
, cc
^1, p
-1);
84 emit_cond_branch(as
, cc
, target
);
87 /* Emit test and branch instruction to exit for guard, if in range. */
88 static int asm_guardtnb(ASMState
*as
, A64Ins ai
, Reg r
, uint32_t bit
)
90 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
92 ptrdiff_t delta
= target
- p
;
93 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
94 if (as
->orignins
> 1023) return 0; /* Delta might end up too large. */
96 *p
= A64I_B
| A64F_S26(delta
);
99 } else if (LJ_UNLIKELY(delta
>= 0x1fff)) {
102 emit_tnb(as
, ai
, r
, bit
, target
);
106 /* Emit compare and branch instruction to exit for guard. */
107 static void asm_guardcnb(ASMState
*as
, A64Ins ai
, Reg r
)
109 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
111 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
113 *p
= A64I_B
| A64F_S26(target
-p
);
114 emit_cnb(as
, ai
^0x01000000u
, r
, p
-1);
117 emit_cnb(as
, ai
, r
, target
);
120 /* -- Operand fusion ------------------------------------------------------ */
122 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
123 #define CONFLICT_SEARCH_LIM 31
125 static int asm_isk32(ASMState
*as
, IRRef ref
, int32_t *k
)
127 if (irref_isk(ref
)) {
129 if (ir
->o
== IR_KNULL
|| !irt_is64(ir
->t
)) {
132 } else if (checki32((int64_t)ir_k64(ir
)->u64
)) {
133 *k
= (int32_t)ir_k64(ir
)->u64
;
140 /* Check if there's no conflicting instruction between curins and ref. */
141 static int noconflict(ASMState
*as
, IRRef ref
, IROp conflict
)
144 IRRef i
= as
->curins
;
145 if (i
> ref
+ CONFLICT_SEARCH_LIM
)
146 return 0; /* Give up, ref is too far away. */
148 if (ir
[i
].o
== conflict
)
149 return 0; /* Conflict found. */
150 return 1; /* Ok, no conflict. */
153 /* Fuse the array base of colocated arrays. */
154 static int32_t asm_fuseabase(ASMState
*as
, IRRef ref
)
157 if (ir
->o
== IR_TNEW
&& ir
->op1
<= LJ_MAX_COLOSIZE
&&
158 !neverfuse(as
) && noconflict(as
, ref
, IR_NEWREF
))
159 return (int32_t)sizeof(GCtab
);
163 #define FUSE_REG 0x40000000
165 /* Fuse array/hash/upvalue reference into register+offset operand. */
166 static Reg
asm_fuseahuref(ASMState
*as
, IRRef ref
, int32_t *ofsp
, RegSet allow
,
170 if (ra_noreg(ir
->r
)) {
171 if (ir
->o
== IR_AREF
) {
172 if (mayfuse(as
, ref
)) {
173 if (irref_isk(ir
->op2
)) {
174 IRRef tab
= IR(ir
->op1
)->op1
;
175 int32_t ofs
= asm_fuseabase(as
, tab
);
176 IRRef refa
= ofs
? tab
: ir
->op1
;
177 ofs
+= 8*IR(ir
->op2
)->i
;
178 if (emit_checkofs(ins
, ofs
)) {
180 return ra_alloc1(as
, refa
, allow
);
183 Reg base
= ra_alloc1(as
, ir
->op1
, allow
);
184 *ofsp
= FUSE_REG
|ra_alloc1(as
, ir
->op2
, rset_exclude(allow
, base
));
188 } else if (ir
->o
== IR_HREFK
) {
189 if (mayfuse(as
, ref
)) {
190 int32_t ofs
= (int32_t)(IR(ir
->op2
)->op2
* sizeof(Node
));
191 if (emit_checkofs(ins
, ofs
)) {
193 return ra_alloc1(as
, ir
->op1
, allow
);
196 } else if (ir
->o
== IR_UREFC
) {
197 if (irref_isk(ir
->op1
)) {
198 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
199 GCupval
*uv
= &gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
;
200 int64_t ofs
= glofs(as
, &uv
->tv
);
201 if (emit_checkofs(ins
, ofs
)) {
202 *ofsp
= (int32_t)ofs
;
206 } else if (ir
->o
== IR_TMPREF
) {
207 *ofsp
= (int32_t)glofs(as
, &J2G(as
->J
)->tmptv
);
212 return ra_alloc1(as
, ref
, allow
);
215 /* Fuse m operand into arithmetic/logic instructions. */
216 static uint32_t asm_fuseopm(ASMState
*as
, A64Ins ai
, IRRef ref
, RegSet allow
)
219 int logical
= (ai
& 0x1f000000) == 0x0a000000;
220 if (ra_hasreg(ir
->r
)) {
221 ra_noweak(as
, ir
->r
);
222 return A64F_M(ir
->r
);
223 } else if (irref_isk(ref
)) {
224 int64_t k
= get_k64val(as
, ref
);
225 uint32_t m
= logical
? emit_isk13(k
, irt_is64(ir
->t
)) :
226 emit_isk12(irt_is64(ir
->t
) ? k
: (int32_t)k
);
229 } else if (mayfuse(as
, ref
)) {
230 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BSAR
&& irref_isk(ir
->op2
)) ||
231 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
)) {
232 A64Shift sh
= ir
->o
== IR_BSHR
? A64SH_LSR
:
233 ir
->o
== IR_BSAR
? A64SH_ASR
: A64SH_LSL
;
234 int shift
= ir
->o
== IR_ADD
? 1 :
235 (IR(ir
->op2
)->i
& (irt_is64(ir
->t
) ? 63 : 31));
236 IRIns
*irl
= IR(ir
->op1
);
237 if (sh
== A64SH_LSL
&&
238 irl
->o
== IR_CONV
&& !logical
&&
239 irl
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
) &&
242 Reg m
= ra_alloc1(as
, irl
->op1
, allow
);
243 return A64F_M(m
) | A64F_EXSH(A64EX_SXTW
, shift
);
245 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
246 return A64F_M(m
) | A64F_SH(sh
, shift
);
248 } else if (ir
->o
== IR_BROR
&& logical
&& irref_isk(ir
->op2
)) {
249 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
250 int shift
= (IR(ir
->op2
)->i
& (irt_is64(ir
->t
) ? 63 : 31));
251 return A64F_M(m
) | A64F_SH(A64SH_ROR
, shift
);
252 } else if (ir
->o
== IR_CONV
&& !logical
&&
253 ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)) {
254 Reg m
= ra_alloc1(as
, ir
->op1
, allow
);
255 return A64F_M(m
) | A64F_EX(A64EX_SXTW
);
258 return A64F_M(ra_allocref(as
, ref
, allow
));
261 /* Fuse XLOAD/XSTORE reference into load/store operand. */
262 static void asm_fusexref(ASMState
*as
, A64Ins ai
, Reg rd
, IRRef ref
,
268 if (ra_noreg(ir
->r
) && canfuse(as
, ir
)) {
269 if (ir
->o
== IR_ADD
) {
270 if (asm_isk32(as
, ir
->op2
, &ofs
) && emit_checkofs(ai
, ofs
)) {
274 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
275 IRIns
*irl
= IR(lref
);
276 if (mayfuse(as
, irl
->op1
)) {
277 unsigned int shift
= 4;
278 if (irl
->o
== IR_BSHL
&& irref_isk(irl
->op2
)) {
279 shift
= (IR(irl
->op2
)->i
& 63);
280 } else if (irl
->o
== IR_ADD
&& irl
->op1
== irl
->op2
) {
283 if ((ai
>> 30) == shift
) {
289 if (irl
->o
== IR_CONV
&&
290 irl
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
) &&
297 rm
= ra_alloc1(as
, lref
, allow
);
298 rn
= ra_alloc1(as
, rref
, rset_exclude(allow
, rm
));
299 emit_dnm(as
, (ai
^A64I_LS_R
), (rd
& 31), rn
, rm
);
302 } else if (ir
->o
== IR_STRREF
) {
303 if (asm_isk32(as
, ir
->op2
, &ofs
)) {
305 } else if (asm_isk32(as
, ir
->op1
, &ofs
)) {
308 Reg refk
= irref_isk(ir
->op1
) ? ir
->op1
: ir
->op2
;
309 Reg refv
= irref_isk(ir
->op1
) ? ir
->op2
: ir
->op1
;
310 Reg rn
= ra_alloc1(as
, refv
, allow
);
311 IRIns
*irr
= IR(refk
);
313 if (irr
+1 == ir
&& !ra_used(irr
) &&
314 irr
->o
== IR_ADD
&& irref_isk(irr
->op2
)) {
315 ofs
= sizeof(GCstr
) + IR(irr
->op2
)->i
;
316 if (emit_checkofs(ai
, ofs
)) {
317 Reg rm
= ra_alloc1(as
, irr
->op1
, rset_exclude(allow
, rn
));
318 m
= A64F_M(rm
) | A64F_EX(A64EX_SXTW
);
322 m
= asm_fuseopm(as
, 0, refk
, rset_exclude(allow
, rn
));
325 emit_lso(as
, ai
, rd
, rd
, ofs
);
326 emit_dn(as
, A64I_ADDx
^m
, rd
, rn
);
329 ofs
+= sizeof(GCstr
);
330 if (!emit_checkofs(ai
, ofs
)) {
331 Reg rn
= ra_alloc1(as
, ref
, allow
);
332 Reg rm
= ra_allock(as
, ofs
, rset_exclude(allow
, rn
));
333 emit_dnm(as
, (ai
^A64I_LS_R
)|A64I_LS_UXTWx
, rd
, rn
, rm
);
338 base
= ra_alloc1(as
, ref
, allow
);
339 emit_lso(as
, ai
, (rd
& 31), base
, ofs
);
342 /* Fuse FP multiply-add/sub. */
343 static int asm_fusemadd(ASMState
*as
, IRIns
*ir
, A64Ins ai
, A64Ins air
)
345 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
347 if ((as
->flags
& JIT_F_OPT_FMA
) &&
349 ((mayfuse(as
, lref
) && (irm
= IR(lref
), irm
->o
== IR_MUL
) &&
351 (mayfuse(as
, rref
) && (irm
= IR(rref
), irm
->o
== IR_MUL
) &&
352 (rref
= lref
, ai
= air
, ra_noreg(irm
->r
))))) {
353 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
354 Reg add
= ra_hintalloc(as
, rref
, dest
, RSET_FPR
);
355 Reg left
= ra_alloc2(as
, irm
,
356 rset_exclude(rset_exclude(RSET_FPR
, dest
), add
));
357 Reg right
= (left
>> 8); left
&= 255;
358 emit_dnma(as
, ai
, (dest
& 31), (left
& 31), (right
& 31), (add
& 31));
364 /* Fuse BAND + BSHL/BSHR into UBFM. */
365 static int asm_fuseandshift(ASMState
*as
, IRIns
*ir
)
367 IRIns
*irl
= IR(ir
->op1
);
368 lj_assertA(ir
->o
== IR_BAND
, "bad usage");
369 if (canfuse(as
, irl
) && irref_isk(ir
->op2
)) {
370 uint64_t mask
= get_k64val(as
, ir
->op2
);
371 if (irref_isk(irl
->op2
) && (irl
->o
== IR_BSHR
|| irl
->o
== IR_BSHL
)) {
372 int32_t shmask
= irt_is64(irl
->t
) ? 63 : 31;
373 int32_t shift
= (IR(irl
->op2
)->i
& shmask
);
374 int32_t imms
= shift
;
375 if (irl
->o
== IR_BSHL
) {
377 shift
= (shmask
-shift
+1) & shmask
;
380 if (mask
&& !((mask
+1) & mask
)) { /* Contiguous 1-bits at the bottom. */
381 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
382 Reg left
= ra_alloc1(as
, irl
->op1
, RSET_GPR
);
383 A64Ins ai
= shmask
== 63 ? A64I_UBFMx
: A64I_UBFMw
;
384 imms
+= 63 - emit_clz64(mask
);
385 if (imms
> shmask
) imms
= shmask
;
386 emit_dn(as
, ai
| A64F_IMMS(imms
) | A64F_IMMR(shift
), dest
, left
);
394 /* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
395 static int asm_fuseorshift(ASMState
*as
, IRIns
*ir
)
397 IRIns
*irl
= IR(ir
->op1
), *irr
= IR(ir
->op2
);
398 lj_assertA(ir
->o
== IR_BOR
, "bad usage");
399 if (canfuse(as
, irl
) && canfuse(as
, irr
) &&
400 ((irl
->o
== IR_BSHR
&& irr
->o
== IR_BSHL
) ||
401 (irl
->o
== IR_BSHL
&& irr
->o
== IR_BSHR
))) {
402 if (irref_isk(irl
->op2
) && irref_isk(irr
->op2
)) {
403 IRRef lref
= irl
->op1
, rref
= irr
->op1
;
404 uint32_t lshift
= IR(irl
->op2
)->i
, rshift
= IR(irr
->op2
)->i
;
405 if (irl
->o
== IR_BSHR
) { /* BSHR needs to be the right operand. */
407 IRRef tmp1
= lref
; lref
= rref
; rref
= tmp1
;
408 tmp2
= lshift
; lshift
= rshift
; rshift
= tmp2
;
410 if (rshift
+ lshift
== (irt_is64(ir
->t
) ? 64 : 32)) {
411 A64Ins ai
= irt_is64(ir
->t
) ? A64I_EXTRx
: A64I_EXTRw
;
412 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
413 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
414 Reg right
= ra_alloc1(as
, rref
, rset_exclude(RSET_GPR
, left
));
415 emit_dnm(as
, ai
| A64F_IMMS(rshift
), dest
, left
, right
);
423 /* -- Calls --------------------------------------------------------------- */
425 /* Generate a call to a C function. */
426 static void asm_gencall(ASMState
*as
, const CCallInfo
*ci
, IRRef
*args
)
428 uint32_t n
, nargs
= CCI_XNARGS(ci
);
429 int32_t spofs
= 0, spalign
= LJ_HASFFI
&& LJ_TARGET_OSX
? 0 : 7;
430 Reg gpr
, fpr
= REGARG_FIRSTFPR
;
432 emit_call(as
, ci
->func
);
433 for (gpr
= REGARG_FIRSTGPR
; gpr
<= REGARG_LASTGPR
; gpr
++)
434 as
->cost
[gpr
] = REGCOST(~0u, ASMREF_L
);
435 gpr
= REGARG_FIRSTGPR
;
436 #if LJ_HASFFI && LJ_ABI_WIN
437 if ((ci
->flags
& CCI_VARARG
)) {
438 fpr
= REGARG_LASTFPR
+1;
441 for (n
= 0; n
< nargs
; n
++) { /* Setup args. */
445 if (irt_isfp(ir
->t
)) {
446 if (fpr
<= REGARG_LASTFPR
) {
447 lj_assertA(rset_test(as
->freeset
, fpr
),
448 "reg %d not free", fpr
); /* Must have been evicted. */
449 ra_leftov(as
, fpr
, ref
);
451 #if LJ_HASFFI && LJ_ABI_WIN
452 } else if ((ci
->flags
& CCI_VARARG
) && (gpr
<= REGARG_LASTGPR
)) {
453 Reg rf
= ra_alloc1(as
, ref
, RSET_FPR
);
454 emit_dn(as
, A64I_FMOV_R_D
, gpr
++, rf
& 31);
457 Reg r
= ra_alloc1(as
, ref
, RSET_FPR
);
458 int32_t al
= spalign
;
459 #if LJ_HASFFI && LJ_TARGET_OSX
460 al
|= irt_isnum(ir
->t
) ? 7 : 3;
462 spofs
= (spofs
+ al
) & ~al
;
463 if (LJ_BE
&& al
>= 7 && !irt_isnum(ir
->t
)) spofs
+= 4, al
-= 4;
464 emit_spstore(as
, ir
, r
, spofs
);
468 if (gpr
<= REGARG_LASTGPR
) {
469 lj_assertA(rset_test(as
->freeset
, gpr
),
470 "reg %d not free", gpr
); /* Must have been evicted. */
471 ra_leftov(as
, gpr
, ref
);
474 Reg r
= ra_alloc1(as
, ref
, RSET_GPR
);
475 int32_t al
= spalign
;
476 #if LJ_HASFFI && LJ_TARGET_OSX
477 al
|= irt_size(ir
->t
) - 1;
479 spofs
= (spofs
+ al
) & ~al
;
481 if (LJ_BE
&& al
>= 7 && !irt_is64(ir
->t
)) spofs
+= 4, al
-= 4;
482 emit_spstore(as
, ir
, r
, spofs
);
484 lj_assertA(al
== 0 || al
== 1, "size %d unexpected", al
+ 1);
485 emit_lso(as
, al
? A64I_STRH
: A64I_STRB
, r
, RID_SP
, spofs
);
490 #if LJ_HASFFI && LJ_TARGET_OSX
491 } else { /* Marker for start of varargs. */
492 gpr
= REGARG_LASTGPR
+1;
493 fpr
= REGARG_LASTFPR
+1;
500 /* Setup result reg/sp for call. Evict scratch regs. */
501 static void asm_setupresult(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
503 RegSet drop
= RSET_SCRATCH
;
504 int hiop
= ((ir
+1)->o
== IR_HIOP
&& !irt_isnil((ir
+1)->t
));
505 if (ra_hasreg(ir
->r
))
506 rset_clear(drop
, ir
->r
); /* Dest reg handled below. */
507 if (hiop
&& ra_hasreg((ir
+1)->r
))
508 rset_clear(drop
, (ir
+1)->r
); /* Dest reg handled below. */
509 ra_evictset(as
, drop
); /* Evictions must be performed first. */
511 lj_assertA(!irt_ispri(ir
->t
), "PRI dest");
512 if (irt_isfp(ir
->t
)) {
513 if (ci
->flags
& CCI_CASTU64
) {
514 Reg dest
= ra_dest(as
, ir
, RSET_FPR
) & 31;
515 emit_dn(as
, irt_isnum(ir
->t
) ? A64I_FMOV_D_R
: A64I_FMOV_S_R
,
518 ra_destreg(as
, ir
, RID_FPRET
);
523 ra_destreg(as
, ir
, RID_RET
);
529 static void asm_callx(ASMState
*as
, IRIns
*ir
)
531 IRRef args
[CCI_NARGS_MAX
*2];
535 ci
.flags
= asm_callx_flags(as
, ir
);
536 asm_collectargs(as
, ir
, &ci
, args
);
537 asm_setupresult(as
, ir
, &ci
);
538 func
= ir
->op2
; irf
= IR(func
);
539 if (irf
->o
== IR_CARG
) { func
= irf
->op1
; irf
= IR(func
); }
540 if (irref_isk(func
)) { /* Call to constant address. */
541 ci
.func
= (ASMFunction
)(ir_k64(irf
)->u64
);
542 } else { /* Need a non-argument register for indirect calls. */
543 Reg freg
= ra_alloc1(as
, func
, RSET_RANGE(RID_X8
, RID_MAX_GPR
)-RSET_FIXED
);
544 emit_n(as
, A64I_BLR_AUTH
, freg
);
545 ci
.func
= (ASMFunction
)(void *)0;
547 asm_gencall(as
, &ci
, args
);
550 /* -- Returns ------------------------------------------------------------- */
552 /* Return to lower frame. Guard that it goes to the right spot. */
553 static void asm_retf(ASMState
*as
, IRIns
*ir
)
555 Reg base
= ra_alloc1(as
, REF_BASE
, RSET_GPR
);
556 void *pc
= ir_kptr(IR(ir
->op2
));
557 int32_t delta
= 1+LJ_FR2
+bc_a(*((const BCIns
*)pc
- 1));
558 as
->topslot
-= (BCReg
)delta
;
559 if ((int32_t)as
->topslot
< 0) as
->topslot
= 0;
560 irt_setmark(IR(REF_BASE
)->t
); /* Children must not coalesce with BASE reg. */
561 emit_setgl(as
, base
, jit_base
);
562 emit_addptr(as
, base
, -8*delta
);
563 asm_guardcc(as
, CC_NE
);
564 emit_nm(as
, A64I_CMPx
, RID_TMP
,
565 ra_allock(as
, i64ptr(pc
), rset_exclude(RSET_GPR
, base
)));
566 emit_lso(as
, A64I_LDRx
, RID_TMP
, base
, -8);
569 /* -- Buffer operations --------------------------------------------------- */
572 static void asm_bufhdr_write(ASMState
*as
, Reg sb
)
574 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, sb
));
576 irgc
.ot
= IRT(0, IRT_PGC
); /* GC type. */
577 emit_storeofs(as
, &irgc
, RID_TMP
, sb
, offsetof(SBuf
, L
));
578 emit_dn(as
, A64I_BFMx
| A64F_IMMS(lj_fls(SBUF_MASK_FLAG
)) | A64F_IMMR(0), RID_TMP
, tmp
);
579 emit_getgl(as
, RID_TMP
, cur_L
);
580 emit_loadofs(as
, &irgc
, tmp
, sb
, offsetof(SBuf
, L
));
584 /* -- Type conversions ---------------------------------------------------- */
586 static void asm_tointg(ASMState
*as
, IRIns
*ir
, Reg left
)
588 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_FPR
, left
));
589 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
590 asm_guardcc(as
, CC_NE
);
591 emit_nm(as
, A64I_FCMPd
, (tmp
& 31), (left
& 31));
592 emit_dn(as
, A64I_FCVT_F64_S32
, (tmp
& 31), dest
);
593 emit_dn(as
, A64I_FCVT_S32_F64
, dest
, (left
& 31));
596 static void asm_tobit(ASMState
*as
, IRIns
*ir
)
598 RegSet allow
= RSET_FPR
;
599 Reg left
= ra_alloc1(as
, ir
->op1
, allow
);
600 Reg right
= ra_alloc1(as
, ir
->op2
, rset_clear(allow
, left
));
601 Reg tmp
= ra_scratch(as
, rset_clear(allow
, right
));
602 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
603 emit_dn(as
, A64I_FMOV_R_S
, dest
, (tmp
& 31));
604 emit_dnm(as
, A64I_FADDd
, (tmp
& 31), (left
& 31), (right
& 31));
607 static void asm_conv(ASMState
*as
, IRIns
*ir
)
609 IRType st
= (IRType
)(ir
->op2
& IRCONV_SRCMASK
);
610 int st64
= (st
== IRT_I64
|| st
== IRT_U64
|| st
== IRT_P64
);
611 int stfp
= (st
== IRT_NUM
|| st
== IRT_FLOAT
);
612 IRRef lref
= ir
->op1
;
613 lj_assertA(irt_type(ir
->t
) != st
, "inconsistent types for CONV");
614 if (irt_isfp(ir
->t
)) {
615 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
616 if (stfp
) { /* FP to FP conversion. */
617 emit_dn(as
, st
== IRT_NUM
? A64I_FCVT_F32_F64
: A64I_FCVT_F64_F32
,
618 (dest
& 31), (ra_alloc1(as
, lref
, RSET_FPR
) & 31));
619 } else { /* Integer to FP conversion. */
620 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
621 A64Ins ai
= irt_isfloat(ir
->t
) ?
622 (((IRT_IS64
>> st
) & 1) ?
623 (st
== IRT_I64
? A64I_FCVT_F32_S64
: A64I_FCVT_F32_U64
) :
624 (st
== IRT_INT
? A64I_FCVT_F32_S32
: A64I_FCVT_F32_U32
)) :
625 (((IRT_IS64
>> st
) & 1) ?
626 (st
== IRT_I64
? A64I_FCVT_F64_S64
: A64I_FCVT_F64_U64
) :
627 (st
== IRT_INT
? A64I_FCVT_F64_S32
: A64I_FCVT_F64_U32
));
628 emit_dn(as
, ai
, (dest
& 31), left
);
630 } else if (stfp
) { /* FP to integer conversion. */
631 if (irt_isguard(ir
->t
)) {
632 /* Checked conversions are only supported from number to int. */
633 lj_assertA(irt_isint(ir
->t
) && st
== IRT_NUM
,
634 "bad type for checked CONV");
635 asm_tointg(as
, ir
, ra_alloc1(as
, lref
, RSET_FPR
));
637 Reg left
= ra_alloc1(as
, lref
, RSET_FPR
);
638 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
639 A64Ins ai
= irt_is64(ir
->t
) ?
641 (irt_isi64(ir
->t
) ? A64I_FCVT_S64_F64
: A64I_FCVT_U64_F64
) :
642 (irt_isi64(ir
->t
) ? A64I_FCVT_S64_F32
: A64I_FCVT_U64_F32
)) :
644 (irt_isint(ir
->t
) ? A64I_FCVT_S32_F64
: A64I_FCVT_U32_F64
) :
645 (irt_isint(ir
->t
) ? A64I_FCVT_S32_F32
: A64I_FCVT_U32_F32
));
646 emit_dn(as
, ai
, dest
, (left
& 31));
648 } else if (st
>= IRT_I8
&& st
<= IRT_U16
) { /* Extend to 32 bit integer. */
649 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
650 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
651 A64Ins ai
= st
== IRT_I8
? A64I_SXTBw
:
652 st
== IRT_U8
? A64I_UXTBw
:
653 st
== IRT_I16
? A64I_SXTHw
: A64I_UXTHw
;
654 lj_assertA(irt_isint(ir
->t
) || irt_isu32(ir
->t
), "bad type for CONV EXT");
655 emit_dn(as
, ai
, dest
, left
);
657 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
658 if (irt_is64(ir
->t
)) {
659 if (st64
|| !(ir
->op2
& IRCONV_SEXT
)) {
660 /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
661 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
662 } else { /* 32 to 64 bit sign extension. */
663 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
664 emit_dn(as
, A64I_SXTW
, dest
, left
);
667 if (st64
&& !(ir
->op2
& IRCONV_NONE
)) {
668 /* This is either a 32 bit reg/reg mov which zeroes the hiword
669 ** or a load of the loword from a 64 bit address.
671 Reg left
= ra_alloc1(as
, lref
, RSET_GPR
);
672 emit_dm(as
, A64I_MOVw
, dest
, left
);
673 } else { /* 32/32 bit no-op (cast). */
674 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
680 static void asm_strto(ASMState
*as
, IRIns
*ir
)
682 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_strscan_num
];
686 ra_evictset(as
, RSET_SCRATCH
);
688 if (ra_hasspill(ir
->s
)) {
689 ofs
= sps_scale(ir
->s
);
690 if (ra_hasreg(ir
->r
)) {
692 ra_modified(as
, ir
->r
);
693 emit_spload(as
, ir
, ir
->r
, ofs
);
696 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
697 emit_lso(as
, A64I_LDRd
, (dest
& 31), RID_SP
, 0);
700 asm_guardcnb(as
, A64I_CBZ
, RID_RET
);
701 args
[0] = ir
->op1
; /* GCstr *str */
702 args
[1] = ASMREF_TMP1
; /* TValue *n */
703 asm_gencall(as
, ci
, args
);
704 tmp
= ra_releasetmp(as
, ASMREF_TMP1
);
705 emit_opk(as
, A64I_ADDx
, tmp
, RID_SP
, ofs
, RSET_GPR
);
708 /* -- Memory references --------------------------------------------------- */
710 /* Store tagged value for ref at base+ofs. */
711 static void asm_tvstore64(ASMState
*as
, Reg base
, int32_t ofs
, IRRef ref
)
713 RegSet allow
= rset_exclude(RSET_GPR
, base
);
715 lj_assertA(irt_ispri(ir
->t
) || irt_isaddr(ir
->t
) || irt_isinteger(ir
->t
),
716 "store of IR type %d", irt_type(ir
->t
));
717 if (irref_isk(ref
)) {
719 lj_ir_kvalue(as
->J
->L
, &k
, ir
);
720 emit_lso(as
, A64I_STRx
, ra_allock(as
, k
.u64
, allow
), base
, ofs
);
722 Reg src
= ra_alloc1(as
, ref
, allow
);
723 rset_clear(allow
, src
);
724 if (irt_isinteger(ir
->t
)) {
725 Reg type
= ra_allock(as
, (int64_t)irt_toitype(ir
->t
) << 47, allow
);
726 emit_lso(as
, A64I_STRx
, RID_TMP
, base
, ofs
);
727 emit_dnm(as
, A64I_ADDx
| A64F_EX(A64EX_UXTW
), RID_TMP
, type
, src
);
729 Reg type
= ra_allock(as
, (int32_t)irt_toitype(ir
->t
), allow
);
730 emit_lso(as
, A64I_STRx
, RID_TMP
, base
, ofs
);
731 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 47), RID_TMP
, src
, type
);
736 /* Get pointer to TValue. */
737 static void asm_tvptr(ASMState
*as
, Reg dest
, IRRef ref
, MSize mode
)
739 if ((mode
& IRTMPREF_IN1
)) {
741 if (irt_isnum(ir
->t
)) {
742 if (irref_isk(ref
) && !(mode
& IRTMPREF_OUT1
)) {
743 /* Use the number constant itself as a TValue. */
744 ra_allockreg(as
, i64ptr(ir_knum(ir
)), dest
);
747 emit_lso(as
, A64I_STRd
, (ra_alloc1(as
, ref
, RSET_FPR
) & 31), dest
, 0);
749 asm_tvstore64(as
, dest
, 0, ref
);
752 /* g->tmptv holds the TValue(s). */
753 emit_dn(as
, A64I_ADDx
^emit_isk12(glofs(as
, &J2G(as
->J
)->tmptv
)), dest
, RID_GL
);
756 static void asm_aref(ASMState
*as
, IRIns
*ir
)
758 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
760 if (irref_isk(ir
->op2
)) {
761 IRRef tab
= IR(ir
->op1
)->op1
;
762 int32_t ofs
= asm_fuseabase(as
, tab
);
763 IRRef refa
= ofs
? tab
: ir
->op1
;
764 uint32_t k
= emit_isk12(ofs
+ 8*IR(ir
->op2
)->i
);
766 base
= ra_alloc1(as
, refa
, RSET_GPR
);
767 emit_dn(as
, A64I_ADDx
^k
, dest
, base
);
771 base
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
772 idx
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, base
));
773 emit_dnm(as
, A64I_ADDx
| A64F_EXSH(A64EX_UXTW
, 3), dest
, base
, idx
);
776 /* Inlined hash lookup. Specialized for key type and for const keys.
777 ** The equivalent C code is:
778 ** Node *n = hashkey(t, key);
780 ** if (lj_obj_equal(&n->key, key)) return &n->val;
781 ** } while ((n = nextnode(n)));
784 static void asm_href(ASMState
*as
, IRIns
*ir
, IROp merge
)
786 RegSet allow
= RSET_GPR
;
787 int destused
= ra_used(ir
);
788 Reg dest
= ra_dest(as
, ir
, allow
);
789 Reg tab
= ra_alloc1(as
, ir
->op1
, rset_clear(allow
, dest
));
790 Reg tmp
= RID_TMP
, type
= RID_NONE
, key
= RID_NONE
, tkey
;
791 IRRef refkey
= ir
->op2
;
792 IRIns
*irkey
= IR(refkey
);
793 int isk
= irref_isk(refkey
);
794 IRType1 kt
= irkey
->t
;
797 MCLabel l_end
, l_loop
;
798 rset_clear(allow
, tab
);
800 /* Allocate register for tkey outside of the loop. */
803 if (irt_isaddr(kt
)) {
804 kk
= ((int64_t)irt_toitype(kt
) << 47) | irkey
[1].tv
.u64
;
805 } else if (irt_isnum(kt
)) {
806 kk
= (int64_t)ir_knum(irkey
)->u64
;
807 /* Assumes -0.0 is already canonicalized to +0.0. */
809 lj_assertA(irt_ispri(kt
) && !irt_isnil(kt
), "bad HREF key type");
810 kk
= ~((int64_t)~irt_toitype(kt
) << 47);
813 tkey
= k
? 0 : ra_allock(as
, kk
, allow
);
815 tkey
= ra_scratch(as
, allow
);
818 /* Key not found in chain: jump to exit (if merged) or load niltv. */
819 l_end
= emit_label(as
);
821 if (merge
== IR_NE
) {
822 asm_guardcc(as
, CC_AL
);
823 } else if (destused
) {
824 uint32_t k12
= emit_isk12(offsetof(global_State
, nilnode
.val
));
825 lj_assertA(k12
!= 0, "Cannot k12 encode niltv(L)");
826 emit_dn(as
, A64I_ADDx
^k12
, dest
, RID_GL
);
829 /* Follow hash chain until the end. */
832 emit_lso(as
, A64I_LDRx
, dest
, dest
, offsetof(Node
, next
));
834 /* Type and value comparison. */
836 asm_guardcc(as
, CC_EQ
);
838 emit_cond_branch(as
, CC_EQ
, l_end
);
839 emit_nm(as
, A64I_CMPx
^k
, tmp
, tkey
);
841 emit_lso(as
, A64I_LDRx
, dest
, dest
, offsetof(Node
, next
));
842 emit_lso(as
, A64I_LDRx
, tmp
, dest
, offsetof(Node
, key
));
843 *l_loop
= A64I_X
| A64I_CBNZ
| A64F_S19(as
->mcp
- l_loop
) | dest
;
845 /* Construct tkey as canonicalized or tagged key. */
848 key
= ra_alloc1(as
, refkey
, RSET_FPR
);
849 emit_dnm(as
, A64I_CSELx
| A64F_CC(CC_EQ
), tkey
, RID_ZERO
, tkey
);
850 /* A64I_FMOV_R_D from key to tkey done below. */
852 lj_assertA(irt_isaddr(kt
), "bad HREF key type");
853 key
= ra_alloc1(as
, refkey
, allow
);
854 type
= ra_allock(as
, irt_toitype(kt
) << 15, rset_clear(allow
, key
));
855 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 32), tkey
, key
, type
);
859 /* Load main position relative to tab->node into dest. */
860 khash
= isk
? ir_khash(as
, irkey
) : 1;
862 emit_lso(as
, A64I_LDRx
, dest
, tab
, offsetof(GCtab
, node
));
864 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 3), dest
, tmp
, dest
);
865 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 1), dest
, dest
, dest
);
866 emit_lso(as
, A64I_LDRx
, tmp
, tab
, offsetof(GCtab
, node
));
868 Reg tmphash
= ra_allock(as
, khash
, allow
);
869 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmphash
);
870 emit_lso(as
, A64I_LDRw
, dest
, tab
, offsetof(GCtab
, hmask
));
871 } else if (irt_isstr(kt
)) {
872 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmp
);
873 emit_lso(as
, A64I_LDRw
, tmp
, key
, offsetof(GCstr
, sid
));
874 emit_lso(as
, A64I_LDRw
, dest
, tab
, offsetof(GCtab
, hmask
));
875 } else { /* Must match with hash*() in lj_tab.c. */
876 emit_dnm(as
, A64I_ANDw
, dest
, dest
, tmp
);
877 emit_lso(as
, A64I_LDRw
, tmp
, tab
, offsetof(GCtab
, hmask
));
878 emit_dnm(as
, A64I_SUBw
, dest
, dest
, tmp
);
879 emit_dnm(as
, A64I_EXTRw
| (A64F_IMMS(32-HASH_ROT3
)), tmp
, tmp
, tmp
);
880 emit_dnm(as
, A64I_EORw
| A64F_SH(A64SH_ROR
, 32-HASH_ROT2
), dest
, tmp
, dest
);
881 emit_dnm(as
, A64I_SUBw
, tmp
, tmp
, dest
);
882 emit_dnm(as
, A64I_EXTRw
| (A64F_IMMS(32-HASH_ROT1
)), dest
, dest
, dest
);
884 emit_dnm(as
, A64I_EORw
, tmp
, tkey
, dest
);
885 emit_dnm(as
, A64I_ADDw
, dest
, dest
, dest
);
886 emit_dn(as
, A64I_LSRx
| A64F_IMMR(32)|A64F_IMMS(32), dest
, tkey
);
887 emit_nm(as
, A64I_FCMPZd
, (key
& 31), 0);
888 emit_dn(as
, A64I_FMOV_R_D
, tkey
, (key
& 31));
890 emit_dnm(as
, A64I_EORw
, tmp
, key
, dest
);
891 emit_dnm(as
, A64I_EORx
| A64F_SH(A64SH_LSR
, 32), dest
, type
, key
);
897 static void asm_hrefk(ASMState
*as
, IRIns
*ir
)
899 IRIns
*kslot
= IR(ir
->op2
);
900 IRIns
*irkey
= IR(kslot
->op1
);
901 int32_t ofs
= (int32_t)(kslot
->op2
* sizeof(Node
));
902 int32_t kofs
= ofs
+ (int32_t)offsetof(Node
, key
);
903 int bigofs
= !emit_checkofs(A64I_LDRx
, kofs
);
904 Reg dest
= (ra_used(ir
) || bigofs
) ? ra_dest(as
, ir
, RSET_GPR
) : RID_NONE
;
905 Reg node
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
907 RegSet allow
= rset_exclude(RSET_GPR
, node
);
909 lj_assertA(ofs
% sizeof(Node
) == 0, "unaligned HREFK slot");
912 rset_clear(allow
, dest
);
913 kofs
= (int32_t)offsetof(Node
, key
);
914 } else if (ra_hasreg(dest
)) {
915 emit_opk(as
, A64I_ADDx
, dest
, node
, ofs
, allow
);
917 asm_guardcc(as
, CC_NE
);
918 if (irt_ispri(irkey
->t
)) {
919 k
= ~((int64_t)~irt_toitype(irkey
->t
) << 47);
920 } else if (irt_isnum(irkey
->t
)) {
921 k
= ir_knum(irkey
)->u64
;
923 k
= ((uint64_t)irt_toitype(irkey
->t
) << 47) | (uint64_t)ir_kgc(irkey
);
925 emit_nm(as
, A64I_CMPx
, RID_TMP
, ra_allock(as
, k
, allow
));
926 emit_lso(as
, A64I_LDRx
, RID_TMP
, idx
, kofs
);
928 emit_opk(as
, A64I_ADDx
, dest
, node
, ofs
, rset_exclude(RSET_GPR
, node
));
931 static void asm_uref(ASMState
*as
, IRIns
*ir
)
933 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
934 int guarded
= (irt_t(ir
->t
) & (IRT_GUARD
|IRT_TYPE
)) == (IRT_GUARD
|IRT_PGC
);
935 if (irref_isk(ir
->op1
) && !guarded
) {
936 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
937 MRef
*v
= &gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
.v
;
938 emit_lsptr(as
, A64I_LDRx
, dest
, v
);
941 asm_guardcnb(as
, ir
->o
== IR_UREFC
? A64I_CBZ
: A64I_CBNZ
, RID_TMP
);
942 if (ir
->o
== IR_UREFC
)
943 emit_opk(as
, A64I_ADDx
, dest
, dest
,
944 (int32_t)offsetof(GCupval
, tv
), RSET_GPR
);
946 emit_lso(as
, A64I_LDRx
, dest
, dest
, (int32_t)offsetof(GCupval
, v
));
948 emit_lso(as
, A64I_LDRB
, RID_TMP
, dest
,
949 (int32_t)offsetof(GCupval
, closed
));
950 if (irref_isk(ir
->op1
)) {
951 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
952 uint64_t k
= gcrefu(fn
->l
.uvptr
[(ir
->op2
>> 8)]);
953 emit_loadu64(as
, dest
, k
);
955 emit_lso(as
, A64I_LDRx
, dest
, ra_alloc1(as
, ir
->op1
, RSET_GPR
),
956 (int32_t)offsetof(GCfuncL
, uvptr
) + 8*(int32_t)(ir
->op2
>> 8));
961 static void asm_fref(ASMState
*as
, IRIns
*ir
)
963 UNUSED(as
); UNUSED(ir
);
964 lj_assertA(!ra_used(ir
), "unfused FREF");
967 static void asm_strref(ASMState
*as
, IRIns
*ir
)
969 RegSet allow
= RSET_GPR
;
970 Reg dest
= ra_dest(as
, ir
, allow
);
971 Reg base
= ra_alloc1(as
, ir
->op1
, allow
);
972 IRIns
*irr
= IR(ir
->op2
);
973 int32_t ofs
= sizeof(GCstr
);
975 rset_clear(allow
, base
);
976 if (irref_isk(ir
->op2
) && (m
= emit_isk12(ofs
+ irr
->i
))) {
977 emit_dn(as
, A64I_ADDx
^m
, dest
, base
);
979 emit_dn(as
, (A64I_ADDx
^A64I_K12
) | A64F_U12(ofs
), dest
, dest
);
980 emit_dnm(as
, A64I_ADDx
, dest
, base
, ra_alloc1(as
, ir
->op2
, allow
));
984 /* -- Loads and stores ---------------------------------------------------- */
986 static A64Ins
asm_fxloadins(IRIns
*ir
)
988 switch (irt_type(ir
->t
)) {
989 case IRT_I8
: return A64I_LDRB
^ A64I_LS_S
;
990 case IRT_U8
: return A64I_LDRB
;
991 case IRT_I16
: return A64I_LDRH
^ A64I_LS_S
;
992 case IRT_U16
: return A64I_LDRH
;
993 case IRT_NUM
: return A64I_LDRd
;
994 case IRT_FLOAT
: return A64I_LDRs
;
995 default: return irt_is64(ir
->t
) ? A64I_LDRx
: A64I_LDRw
;
999 static A64Ins
asm_fxstoreins(IRIns
*ir
)
1001 switch (irt_type(ir
->t
)) {
1002 case IRT_I8
: case IRT_U8
: return A64I_STRB
;
1003 case IRT_I16
: case IRT_U16
: return A64I_STRH
;
1004 case IRT_NUM
: return A64I_STRd
;
1005 case IRT_FLOAT
: return A64I_STRs
;
1006 default: return irt_is64(ir
->t
) ? A64I_STRx
: A64I_STRw
;
1010 static void asm_fload(ASMState
*as
, IRIns
*ir
)
1012 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1014 A64Ins ai
= asm_fxloadins(ir
);
1016 if (ir
->op1
== REF_NIL
) { /* FLOAD from GG_State with offset. */
1018 ofs
= (ir
->op2
<< 2) - GG_OFS(g
);
1020 idx
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1021 if (ir
->op2
== IRFL_TAB_ARRAY
) {
1022 ofs
= asm_fuseabase(as
, ir
->op1
);
1023 if (ofs
) { /* Turn the t->array load into an add for colocated arrays. */
1024 emit_dn(as
, (A64I_ADDx
^A64I_K12
) | A64F_U12(ofs
), dest
, idx
);
1028 ofs
= field_ofs
[ir
->op2
];
1030 emit_lso(as
, ai
, (dest
& 31), idx
, ofs
);
1033 static void asm_fstore(ASMState
*as
, IRIns
*ir
)
1035 if (ir
->r
!= RID_SINK
) {
1036 Reg src
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
1037 IRIns
*irf
= IR(ir
->op1
);
1038 Reg idx
= ra_alloc1(as
, irf
->op1
, rset_exclude(RSET_GPR
, src
));
1039 int32_t ofs
= field_ofs
[irf
->op2
];
1040 emit_lso(as
, asm_fxstoreins(ir
), (src
& 31), idx
, ofs
);
1044 static void asm_xload(ASMState
*as
, IRIns
*ir
)
1046 Reg dest
= ra_dest(as
, ir
, irt_isfp(ir
->t
) ? RSET_FPR
: RSET_GPR
);
1047 lj_assertA(!(ir
->op2
& IRXLOAD_UNALIGNED
), "unaligned XLOAD");
1048 asm_fusexref(as
, asm_fxloadins(ir
), dest
, ir
->op1
, RSET_GPR
);
1051 static void asm_xstore(ASMState
*as
, IRIns
*ir
)
1053 if (ir
->r
!= RID_SINK
) {
1054 Reg src
= ra_alloc1(as
, ir
->op2
, irt_isfp(ir
->t
) ? RSET_FPR
: RSET_GPR
);
1055 asm_fusexref(as
, asm_fxstoreins(ir
), src
, ir
->op1
,
1056 rset_exclude(RSET_GPR
, src
));
1060 static void asm_ahuvload(ASMState
*as
, IRIns
*ir
)
1064 RegSet gpr
= RSET_GPR
, allow
= irt_isnum(ir
->t
) ? RSET_FPR
: RSET_GPR
;
1065 lj_assertA(irt_isnum(ir
->t
) || irt_ispri(ir
->t
) || irt_isaddr(ir
->t
) ||
1067 "bad load type %d", irt_type(ir
->t
));
1069 Reg dest
= ra_dest(as
, ir
, allow
);
1070 tmp
= irt_isnum(ir
->t
) ? ra_scratch(as
, rset_clear(gpr
, dest
)) : dest
;
1071 if (irt_isaddr(ir
->t
)) {
1072 emit_dn(as
, A64I_ANDx
^emit_isk13(LJ_GCVMASK
, 1), dest
, dest
);
1073 } else if (irt_isnum(ir
->t
)) {
1074 emit_dn(as
, A64I_FMOV_D_R
, (dest
& 31), tmp
);
1075 } else if (irt_isint(ir
->t
)) {
1076 emit_dm(as
, A64I_MOVw
, dest
, dest
);
1079 tmp
= ra_scratch(as
, gpr
);
1081 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, rset_clear(gpr
, tmp
), A64I_LDRx
);
1082 rset_clear(gpr
, idx
);
1083 if (ofs
& FUSE_REG
) rset_clear(gpr
, ofs
& 31);
1084 if (ir
->o
== IR_VLOAD
) ofs
+= 8 * ir
->op2
;
1085 /* Always do the type check, even if the load result is unused. */
1086 asm_guardcc(as
, irt_isnum(ir
->t
) ? CC_LS
: CC_NE
);
1087 if (irt_type(ir
->t
) >= IRT_NUM
) {
1088 lj_assertA(irt_isinteger(ir
->t
) || irt_isnum(ir
->t
),
1089 "bad load type %d", irt_type(ir
->t
));
1090 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1091 ra_allock(as
, LJ_TISNUM
<< 15, gpr
), tmp
);
1092 } else if (irt_isaddr(ir
->t
)) {
1093 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(-irt_toitype(ir
->t
)), RID_TMP
);
1094 emit_dn(as
, A64I_ASRx
| A64F_IMMR(47), RID_TMP
, tmp
);
1095 } else if (irt_isnil(ir
->t
)) {
1096 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(1), tmp
);
1098 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1099 ra_allock(as
, (irt_toitype(ir
->t
) << 15) | 0x7fff, gpr
), tmp
);
1102 emit_dnm(as
, (A64I_LDRx
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, tmp
, idx
, (ofs
& 31));
1104 emit_lso(as
, A64I_LDRx
, tmp
, idx
, ofs
);
1107 static void asm_ahustore(ASMState
*as
, IRIns
*ir
)
1109 if (ir
->r
!= RID_SINK
) {
1110 RegSet allow
= RSET_GPR
;
1111 Reg idx
, src
= RID_NONE
, tmp
= RID_TMP
, type
= RID_NONE
;
1113 if (irt_isnum(ir
->t
)) {
1114 src
= ra_alloc1(as
, ir
->op2
, RSET_FPR
);
1115 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, allow
, A64I_STRd
);
1117 emit_dnm(as
, (A64I_STRd
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, (src
& 31), idx
, (ofs
&31));
1119 emit_lso(as
, A64I_STRd
, (src
& 31), idx
, ofs
);
1121 if (!irt_ispri(ir
->t
)) {
1122 src
= ra_alloc1(as
, ir
->op2
, allow
);
1123 rset_clear(allow
, src
);
1124 if (irt_isinteger(ir
->t
))
1125 type
= ra_allock(as
, (uint64_t)(int32_t)LJ_TISNUM
<< 47, allow
);
1127 type
= ra_allock(as
, irt_toitype(ir
->t
), allow
);
1129 tmp
= type
= ra_allock(as
, ~((int64_t)~irt_toitype(ir
->t
)<<47), allow
);
1131 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, rset_exclude(allow
, type
),
1134 emit_dnm(as
, (A64I_STRx
^A64I_LS_R
)|A64I_LS_UXTWx
|A64I_LS_SH
, tmp
, idx
, (ofs
& 31));
1136 emit_lso(as
, A64I_STRx
, tmp
, idx
, ofs
);
1137 if (ra_hasreg(src
)) {
1138 if (irt_isinteger(ir
->t
)) {
1139 emit_dnm(as
, A64I_ADDx
| A64F_EX(A64EX_UXTW
), tmp
, type
, src
);
1141 emit_dnm(as
, A64I_ADDx
| A64F_SH(A64SH_LSL
, 47), tmp
, src
, type
);
1148 static void asm_sload(ASMState
*as
, IRIns
*ir
)
1150 int32_t ofs
= 8*((int32_t)ir
->op1
-2);
1152 Reg dest
= RID_NONE
, base
;
1153 RegSet allow
= RSET_GPR
;
1154 lj_assertA(!(ir
->op2
& IRSLOAD_PARENT
),
1155 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1156 lj_assertA(irt_isguard(t
) || !(ir
->op2
& IRSLOAD_TYPECHECK
),
1157 "inconsistent SLOAD variant");
1158 if ((ir
->op2
& IRSLOAD_CONVERT
) && irt_isguard(t
) && irt_isint(t
)) {
1159 dest
= ra_scratch(as
, RSET_FPR
);
1160 asm_tointg(as
, ir
, dest
);
1161 t
.irt
= IRT_NUM
; /* Continue with a regular number type check. */
1162 } else if (ra_used(ir
)) {
1164 if ((ir
->op2
& IRSLOAD_CONVERT
))
1165 tmp
= ra_scratch(as
, irt_isint(t
) ? RSET_FPR
: RSET_GPR
);
1166 lj_assertA((irt_isnum(t
)) || irt_isint(t
) || irt_isaddr(t
),
1167 "bad SLOAD type %d", irt_type(t
));
1168 dest
= ra_dest(as
, ir
, irt_isnum(t
) ? RSET_FPR
: allow
);
1169 base
= ra_alloc1(as
, REF_BASE
, rset_clear(allow
, dest
));
1170 if (irt_isaddr(t
)) {
1171 emit_dn(as
, A64I_ANDx
^emit_isk13(LJ_GCVMASK
, 1), dest
, dest
);
1172 } else if ((ir
->op2
& IRSLOAD_CONVERT
)) {
1174 emit_dn(as
, A64I_FCVT_S32_F64
, dest
, (tmp
& 31));
1175 /* If value is already loaded for type check, move it to FPR. */
1176 if ((ir
->op2
& IRSLOAD_TYPECHECK
))
1177 emit_dn(as
, A64I_FMOV_D_R
, (tmp
& 31), dest
);
1180 t
.irt
= IRT_NUM
; /* Check for original type. */
1182 emit_dn(as
, A64I_FCVT_F64_S32
, (dest
& 31), tmp
);
1184 t
.irt
= IRT_INT
; /* Check for original type. */
1186 } else if (irt_isint(t
) && (ir
->op2
& IRSLOAD_TYPECHECK
)) {
1187 emit_dm(as
, A64I_MOVw
, dest
, dest
);
1191 base
= ra_alloc1(as
, REF_BASE
, allow
);
1193 rset_clear(allow
, base
);
1194 if ((ir
->op2
& IRSLOAD_TYPECHECK
)) {
1196 if (ra_hasreg(dest
) && rset_test(RSET_GPR
, dest
)) {
1199 tmp
= ra_scratch(as
, allow
);
1200 rset_clear(allow
, tmp
);
1202 if (ra_hasreg(dest
) && tmp
!= dest
)
1203 emit_dn(as
, A64I_FMOV_D_R
, (dest
& 31), tmp
);
1204 /* Need type check, even if the load result is unused. */
1205 asm_guardcc(as
, irt_isnum(t
) ? CC_LS
: CC_NE
);
1206 if (irt_type(t
) >= IRT_NUM
) {
1207 lj_assertA(irt_isinteger(t
) || irt_isnum(t
),
1208 "bad SLOAD type %d", irt_type(t
));
1209 emit_nm(as
, A64I_CMPx
| A64F_SH(A64SH_LSR
, 32),
1210 ra_allock(as
, (ir
->op2
& IRSLOAD_KEYINDEX
) ? LJ_KEYINDEX
: (LJ_TISNUM
<< 15), allow
), tmp
);
1211 } else if (irt_isnil(t
)) {
1212 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(1), tmp
);
1213 } else if (irt_ispri(t
)) {
1214 emit_nm(as
, A64I_CMPx
,
1215 ra_allock(as
, ~((int64_t)~irt_toitype(t
) << 47) , allow
), tmp
);
1217 emit_n(as
, (A64I_CMNx
^A64I_K12
) | A64F_U12(-irt_toitype(t
)), RID_TMP
);
1218 emit_dn(as
, A64I_ASRx
| A64F_IMMR(47), RID_TMP
, tmp
);
1220 emit_lso(as
, A64I_LDRx
, tmp
, base
, ofs
);
1223 if (ra_hasreg(dest
)) {
1224 emit_lso(as
, irt_isnum(t
) ? A64I_LDRd
:
1225 (irt_isint(t
) ? A64I_LDRw
: A64I_LDRx
), (dest
& 31), base
,
1226 ofs
^ ((LJ_BE
&& irt_isint(t
) ? 4 : 0)));
1230 /* -- Allocations --------------------------------------------------------- */
1233 static void asm_cnew(ASMState
*as
, IRIns
*ir
)
1235 CTState
*cts
= ctype_ctsG(J2G(as
->J
));
1236 CTypeID id
= (CTypeID
)IR(ir
->op1
)->i
;
1238 CTInfo info
= lj_ctype_info(cts
, id
, &sz
);
1239 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_mem_newgco
];
1241 RegSet allow
= (RSET_GPR
& ~RSET_SCRATCH
);
1242 lj_assertA(sz
!= CTSIZE_INVALID
|| (ir
->o
== IR_CNEW
&& ir
->op2
!= REF_NIL
),
1243 "bad CNEW/CNEWI operands");
1246 asm_setupresult(as
, ir
, ci
); /* GCcdata * */
1247 /* Initialize immutable cdata object. */
1248 if (ir
->o
== IR_CNEWI
) {
1249 int32_t ofs
= sizeof(GCcdata
);
1250 Reg r
= ra_alloc1(as
, ir
->op2
, allow
);
1251 lj_assertA(sz
== 4 || sz
== 8, "bad CNEWI size %d", sz
);
1252 emit_lso(as
, sz
== 8 ? A64I_STRx
: A64I_STRw
, r
, RID_RET
, ofs
);
1253 } else if (ir
->op2
!= REF_NIL
) { /* Create VLA/VLS/aligned cdata. */
1254 ci
= &lj_ir_callinfo
[IRCALL_lj_cdata_newv
];
1255 args
[0] = ASMREF_L
; /* lua_State *L */
1256 args
[1] = ir
->op1
; /* CTypeID id */
1257 args
[2] = ir
->op2
; /* CTSize sz */
1258 args
[3] = ASMREF_TMP1
; /* CTSize align */
1259 asm_gencall(as
, ci
, args
);
1260 emit_loadi(as
, ra_releasetmp(as
, ASMREF_TMP1
), (int32_t)ctype_align(info
));
1264 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1266 Reg r
= (id
< 65536) ? RID_X1
: ra_allock(as
, id
, allow
);
1267 emit_lso(as
, A64I_STRB
, RID_TMP
, RID_RET
, offsetof(GCcdata
, gct
));
1268 emit_lso(as
, A64I_STRH
, r
, RID_RET
, offsetof(GCcdata
, ctypeid
));
1269 emit_d(as
, A64I_MOVZw
| A64F_U16(~LJ_TCDATA
), RID_TMP
);
1270 if (id
< 65536) emit_d(as
, A64I_MOVZw
| A64F_U16(id
), RID_X1
);
1272 args
[0] = ASMREF_L
; /* lua_State *L */
1273 args
[1] = ASMREF_TMP1
; /* MSize size */
1274 asm_gencall(as
, ci
, args
);
1275 ra_allockreg(as
, (int32_t)(sz
+sizeof(GCcdata
)),
1276 ra_releasetmp(as
, ASMREF_TMP1
));
1280 /* -- Write barriers ------------------------------------------------------ */
1282 static void asm_tbar(ASMState
*as
, IRIns
*ir
)
1284 Reg tab
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1285 Reg link
= ra_scratch(as
, rset_exclude(RSET_GPR
, tab
));
1287 MCLabel l_end
= emit_label(as
);
1288 emit_lso(as
, A64I_STRB
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1289 /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
1290 emit_lso(as
, A64I_STRx
, link
, tab
, (int32_t)offsetof(GCtab
, gclist
));
1291 emit_setgl(as
, tab
, gc
.grayagain
);
1292 emit_dn(as
, A64I_ANDw
^emit_isk13(~LJ_GC_BLACK
, 0), mark
, mark
);
1293 emit_getgl(as
, link
, gc
.grayagain
);
1294 emit_cond_branch(as
, CC_EQ
, l_end
);
1295 emit_n(as
, A64I_TSTw
^emit_isk13(LJ_GC_BLACK
, 0), mark
);
1296 emit_lso(as
, A64I_LDRB
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1299 static void asm_obar(ASMState
*as
, IRIns
*ir
)
1301 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_barrieruv
];
1305 /* No need for other object barriers (yet). */
1306 lj_assertA(IR(ir
->op1
)->o
== IR_UREFC
, "bad OBAR type");
1307 ra_evictset(as
, RSET_SCRATCH
);
1308 l_end
= emit_label(as
);
1309 args
[0] = ASMREF_TMP1
; /* global_State *g */
1310 args
[1] = ir
->op1
; /* TValue *tv */
1311 asm_gencall(as
, ci
, args
);
1312 emit_dm(as
, A64I_MOVx
, ra_releasetmp(as
, ASMREF_TMP1
), RID_GL
);
1313 obj
= IR(ir
->op1
)->r
;
1314 tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, obj
));
1315 emit_tnb(as
, A64I_TBZ
, tmp
, lj_ffs(LJ_GC_BLACK
), l_end
);
1316 emit_cond_branch(as
, CC_EQ
, l_end
);
1317 emit_n(as
, A64I_TSTw
^emit_isk13(LJ_GC_WHITES
, 0), RID_TMP
);
1318 val
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, obj
));
1319 emit_lso(as
, A64I_LDRB
, tmp
, obj
,
1320 (int32_t)offsetof(GCupval
, marked
)-(int32_t)offsetof(GCupval
, tv
));
1321 emit_lso(as
, A64I_LDRB
, RID_TMP
, val
, (int32_t)offsetof(GChead
, marked
));
1324 /* -- Arithmetic and logic operations ------------------------------------- */
1326 static void asm_fparith(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1328 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1329 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1330 right
= (left
>> 8); left
&= 255;
1331 emit_dnm(as
, ai
, (dest
& 31), (left
& 31), (right
& 31));
1334 static void asm_fpunary(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1336 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1337 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_FPR
);
1338 emit_dn(as
, ai
, (dest
& 31), (left
& 31));
1341 static void asm_fpmath(ASMState
*as
, IRIns
*ir
)
1343 IRFPMathOp fpm
= (IRFPMathOp
)ir
->op2
;
1344 if (fpm
== IRFPM_SQRT
) {
1345 asm_fpunary(as
, ir
, A64I_FSQRTd
);
1346 } else if (fpm
<= IRFPM_TRUNC
) {
1347 asm_fpunary(as
, ir
, fpm
== IRFPM_FLOOR
? A64I_FRINTMd
:
1348 fpm
== IRFPM_CEIL
? A64I_FRINTPd
: A64I_FRINTZd
);
1350 asm_callid(as
, ir
, IRCALL_lj_vm_floor
+ fpm
);
1354 static int asm_swapops(ASMState
*as
, IRRef lref
, IRRef rref
)
1357 if (irref_isk(rref
))
1358 return 0; /* Don't swap constants to the left. */
1359 if (irref_isk(lref
))
1360 return 1; /* But swap constants to the right. */
1362 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BROR
) ||
1363 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
) ||
1364 (ir
->o
== IR_CONV
&& ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)))
1365 return 0; /* Don't swap fusable operands to the left. */
1367 if ((ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BROR
) ||
1368 (ir
->o
== IR_ADD
&& ir
->op1
== ir
->op2
) ||
1369 (ir
->o
== IR_CONV
&& ir
->op2
== ((IRT_I64
<<IRCONV_DSH
)|IRT_INT
|IRCONV_SEXT
)))
1370 return 1; /* But swap fusable operands to the right. */
1371 return 0; /* Otherwise don't swap. */
1374 static void asm_intop(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1376 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1377 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1379 if ((ai
& ~A64I_S
) != A64I_SUBw
&& asm_swapops(as
, lref
, rref
)) {
1380 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1382 left
= ra_hintalloc(as
, lref
, dest
, RSET_GPR
);
1383 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1384 m
= asm_fuseopm(as
, ai
, rref
, rset_exclude(RSET_GPR
, left
));
1385 if (irt_isguard(ir
->t
)) { /* For IR_ADDOV etc. */
1386 asm_guardcc(as
, CC_VS
);
1389 emit_dn(as
, ai
^m
, dest
, left
);
1392 static void asm_intop_s(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1394 if (as
->flagmcp
== as
->mcp
) { /* Drop cmp r, #0. */
1399 asm_intop(as
, ir
, ai
);
1402 static void asm_intneg(ASMState
*as
, IRIns
*ir
)
1404 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1405 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1406 emit_dm(as
, irt_is64(ir
->t
) ? A64I_NEGx
: A64I_NEGw
, dest
, left
);
1409 /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1410 static void asm_intmul(ASMState
*as
, IRIns
*ir
)
1412 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1413 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1414 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1415 if (irt_isguard(ir
->t
)) { /* IR_MULOV */
1416 asm_guardcc(as
, CC_NE
);
1417 emit_dm(as
, A64I_MOVw
, dest
, dest
); /* Zero-extend. */
1418 emit_nm(as
, A64I_CMPx
| A64F_EX(A64EX_SXTW
), dest
, dest
);
1419 emit_dnm(as
, A64I_SMULL
, dest
, right
, left
);
1421 emit_dnm(as
, irt_is64(ir
->t
) ? A64I_MULx
: A64I_MULw
, dest
, left
, right
);
1425 static void asm_add(ASMState
*as
, IRIns
*ir
)
1427 if (irt_isnum(ir
->t
)) {
1428 if (!asm_fusemadd(as
, ir
, A64I_FMADDd
, A64I_FMADDd
))
1429 asm_fparith(as
, ir
, A64I_FADDd
);
1432 asm_intop_s(as
, ir
, A64I_ADDw
);
1435 static void asm_sub(ASMState
*as
, IRIns
*ir
)
1437 if (irt_isnum(ir
->t
)) {
1438 if (!asm_fusemadd(as
, ir
, A64I_FNMSUBd
, A64I_FMSUBd
))
1439 asm_fparith(as
, ir
, A64I_FSUBd
);
1442 asm_intop_s(as
, ir
, A64I_SUBw
);
1445 static void asm_mul(ASMState
*as
, IRIns
*ir
)
1447 if (irt_isnum(ir
->t
)) {
1448 asm_fparith(as
, ir
, A64I_FMULd
);
1454 #define asm_addov(as, ir) asm_add(as, ir)
1455 #define asm_subov(as, ir) asm_sub(as, ir)
1456 #define asm_mulov(as, ir) asm_mul(as, ir)
1458 #define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
1459 #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1461 static void asm_neg(ASMState
*as
, IRIns
*ir
)
1463 if (irt_isnum(ir
->t
)) {
1464 asm_fpunary(as
, ir
, A64I_FNEGd
);
1470 static void asm_band(ASMState
*as
, IRIns
*ir
)
1472 A64Ins ai
= A64I_ANDw
;
1473 if (asm_fuseandshift(as
, ir
))
1475 if (as
->flagmcp
== as
->mcp
) {
1476 /* Try to drop cmp r, #0. */
1481 asm_intop(as
, ir
, ai
);
1484 static void asm_borbxor(ASMState
*as
, IRIns
*ir
, A64Ins ai
)
1486 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1487 IRIns
*irl
= IR(lref
), *irr
= IR(rref
);
1488 if ((canfuse(as
, irl
) && irl
->o
== IR_BNOT
&& !irref_isk(rref
)) ||
1489 (canfuse(as
, irr
) && irr
->o
== IR_BNOT
&& !irref_isk(lref
))) {
1490 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1492 if (irl
->o
== IR_BNOT
) {
1493 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1495 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1497 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1498 m
= asm_fuseopm(as
, ai
, IR(rref
)->op1
, rset_exclude(RSET_GPR
, left
));
1499 emit_dn(as
, ai
^m
, dest
, left
);
1501 asm_intop(as
, ir
, ai
);
1505 static void asm_bor(ASMState
*as
, IRIns
*ir
)
1507 if (asm_fuseorshift(as
, ir
))
1509 asm_borbxor(as
, ir
, A64I_ORRw
);
1512 #define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1514 static void asm_bnot(ASMState
*as
, IRIns
*ir
)
1516 A64Ins ai
= A64I_MVNw
;
1517 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1518 uint32_t m
= asm_fuseopm(as
, ai
, ir
->op1
, RSET_GPR
);
1519 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1520 emit_d(as
, ai
^m
, dest
);
1523 static void asm_bswap(ASMState
*as
, IRIns
*ir
)
1525 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1526 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1527 emit_dn(as
, irt_is64(ir
->t
) ? A64I_REVx
: A64I_REVw
, dest
, left
);
1530 static void asm_bitshift(ASMState
*as
, IRIns
*ir
, A64Ins ai
, A64Shift sh
)
1532 int32_t shmask
= irt_is64(ir
->t
) ? 63 : 31;
1533 if (irref_isk(ir
->op2
)) { /* Constant shifts. */
1534 Reg left
, dest
= ra_dest(as
, ir
, RSET_GPR
);
1535 int32_t shift
= (IR(ir
->op2
)->i
& shmask
);
1536 IRIns
*irl
= IR(ir
->op1
);
1537 if (shmask
== 63) ai
+= A64I_UBFMx
- A64I_UBFMw
;
1539 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1540 if ((sh
== A64SH_LSR
|| sh
== A64SH_ASR
) && canfuse(as
, irl
)) {
1541 if (irl
->o
== IR_BSHL
&& irref_isk(irl
->op2
)) {
1542 int32_t shift2
= (IR(irl
->op2
)->i
& shmask
);
1543 shift
= ((shift
- shift2
) & shmask
);
1549 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1552 emit_dn(as
, ai
| A64F_IMMS(shmask
-shift
) |
1553 A64F_IMMR((shmask
-shift
+1)&shmask
), dest
, left
);
1555 case A64SH_LSR
: case A64SH_ASR
:
1556 emit_dn(as
, ai
| A64F_IMMS(shmask
) | A64F_IMMR(shift
), dest
, left
);
1559 emit_dnm(as
, ai
| A64F_IMMS(shift
), dest
, left
, left
);
1562 } else { /* Variable-length shifts. */
1563 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1564 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1565 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1566 emit_dnm(as
, (shmask
== 63 ? A64I_SHRx
: A64I_SHRw
) | A64F_BSH(sh
), dest
, left
, right
);
1570 #define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1571 #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1572 #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1573 #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1574 #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1576 static void asm_intmin_max(ASMState
*as
, IRIns
*ir
, A64CC cc
)
1578 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1579 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1580 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1581 emit_dnm(as
, A64I_CSELw
|A64F_CC(cc
), dest
, left
, right
);
1582 emit_nm(as
, A64I_CMPw
, left
, right
);
1585 static void asm_fpmin_max(ASMState
*as
, IRIns
*ir
, A64CC fcc
)
1587 Reg dest
= (ra_dest(as
, ir
, RSET_FPR
) & 31);
1588 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1589 right
= ((left
>> 8) & 31); left
&= 31;
1590 emit_dnm(as
, A64I_FCSELd
| A64F_CC(fcc
), dest
, right
, left
);
1591 emit_nm(as
, A64I_FCMPd
, left
, right
);
1594 static void asm_min_max(ASMState
*as
, IRIns
*ir
, A64CC cc
, A64CC fcc
)
1596 if (irt_isnum(ir
->t
))
1597 asm_fpmin_max(as
, ir
, fcc
);
1599 asm_intmin_max(as
, ir
, cc
);
1602 #define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
1603 #define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
1605 /* -- Comparisons --------------------------------------------------------- */
1607 /* Map of comparisons to flags. ORDER IR. */
1608 static const uint8_t asm_compmap
[IR_ABC
+1] = {
1609 /* op FP swp int cc FP cc */
1610 /* LT */ CC_GE
+ (CC_HS
<< 4),
1611 /* GE x */ CC_LT
+ (CC_HI
<< 4),
1612 /* LE */ CC_GT
+ (CC_HI
<< 4),
1613 /* GT x */ CC_LE
+ (CC_HS
<< 4),
1614 /* ULT x */ CC_HS
+ (CC_LS
<< 4),
1615 /* UGE */ CC_LO
+ (CC_LO
<< 4),
1616 /* ULE x */ CC_HI
+ (CC_LO
<< 4),
1617 /* UGT */ CC_LS
+ (CC_LS
<< 4),
1618 /* EQ */ CC_NE
+ (CC_NE
<< 4),
1619 /* NE */ CC_EQ
+ (CC_EQ
<< 4),
1620 /* ABC */ CC_LS
+ (CC_LS
<< 4) /* Same as UGT. */
1623 /* FP comparisons. */
1624 static void asm_fpcomp(ASMState
*as
, IRIns
*ir
)
1628 int swp
= ((ir
->o
^ (ir
->o
>> 2)) & ~(ir
->o
>> 3) & 1);
1629 if (!swp
&& irref_isk(ir
->op2
) && ir_knum(IR(ir
->op2
))->u64
== 0) {
1630 left
= (ra_alloc1(as
, ir
->op1
, RSET_FPR
) & 31);
1634 left
= ra_alloc2(as
, ir
, RSET_FPR
);
1636 right
= (left
& 31); left
= ((left
>> 8) & 31);
1638 right
= ((left
>> 8) & 31); left
&= 31;
1642 asm_guardcc(as
, (asm_compmap
[ir
->o
] >> 4));
1643 emit_nm(as
, ai
, left
, right
);
1646 /* Integer comparisons. */
1647 static void asm_intcomp(ASMState
*as
, IRIns
*ir
)
1649 A64CC oldcc
, cc
= (asm_compmap
[ir
->o
] & 15);
1650 A64Ins ai
= irt_is64(ir
->t
) ? A64I_CMPx
: A64I_CMPw
;
1651 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1655 lj_assertA(irt_is64(ir
->t
) || irt_isint(ir
->t
) ||
1656 irt_isu32(ir
->t
) || irt_isaddr(ir
->t
) || irt_isu8(ir
->t
),
1657 "bad comparison data type %d", irt_type(ir
->t
));
1658 if (asm_swapops(as
, lref
, rref
)) {
1659 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1660 if (cc
>= CC_GE
) cc
^= 7; /* LT <-> GT, LE <-> GE */
1661 else if (cc
> CC_NE
) cc
^= 11; /* LO <-> HI, LS <-> HS */
1664 if (irref_isk(rref
) && get_k64val(as
, rref
) == 0) {
1665 IRIns
*irl
= IR(lref
);
1666 if (cc
== CC_GE
) cc
= CC_PL
;
1667 else if (cc
== CC_LT
) cc
= CC_MI
;
1668 else if (cc
> CC_NE
) goto nocombine
; /* Other conds don't work with tst. */
1669 cmpprev0
= (irl
+1 == ir
);
1670 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1671 if (cmpprev0
&& irl
->o
== IR_BAND
&& !ra_used(irl
)) {
1672 IRRef blref
= irl
->op1
, brref
= irl
->op2
;
1675 if (asm_swapops(as
, blref
, brref
)) {
1676 Reg tmp
= blref
; blref
= brref
; brref
= tmp
;
1678 bleft
= ra_alloc1(as
, blref
, RSET_GPR
);
1679 if (irref_isk(brref
)) {
1680 uint64_t k
= get_k64val(as
, brref
);
1681 if (k
&& !(k
& (k
-1)) && (cc
== CC_EQ
|| cc
== CC_NE
) &&
1682 asm_guardtnb(as
, cc
== CC_EQ
? A64I_TBZ
: A64I_TBNZ
, bleft
,
1685 m2
= emit_isk13(k
, irt_is64(irl
->t
));
1687 ai
= (irt_is64(irl
->t
) ? A64I_TSTx
: A64I_TSTw
);
1689 m2
= asm_fuseopm(as
, ai
, brref
, rset_exclude(RSET_GPR
, bleft
));
1690 asm_guardcc(as
, cc
);
1691 emit_n(as
, ai
^m2
, bleft
);
1694 if (cc
== CC_EQ
|| cc
== CC_NE
) {
1695 /* Combine cmp-bcc into cbz/cbnz. */
1696 ai
= cc
== CC_EQ
? A64I_CBZ
: A64I_CBNZ
;
1697 if (irt_is64(ir
->t
)) ai
|= A64I_X
;
1698 asm_guardcnb(as
, ai
, ra_alloc1(as
, lref
, RSET_GPR
));
1703 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1704 m
= asm_fuseopm(as
, ai
, rref
, rset_exclude(RSET_GPR
, left
));
1705 asm_guardcc(as
, cc
);
1706 emit_n(as
, ai
^m
, left
);
1707 /* Signed comparison with zero and referencing previous ins? */
1708 if (cmpprev0
&& (oldcc
<= CC_NE
|| oldcc
>= CC_GE
))
1709 as
->flagmcp
= as
->mcp
; /* Allow elimination of the compare. */
1712 static void asm_comp(ASMState
*as
, IRIns
*ir
)
1714 if (irt_isnum(ir
->t
))
1717 asm_intcomp(as
, ir
);
1720 #define asm_equal(as, ir) asm_comp(as, ir)
1722 /* -- Split register ops -------------------------------------------------- */
1724 /* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
1725 static void asm_hiop(ASMState
*as
, IRIns
*ir
)
1727 /* HIOP is marked as a store because it needs its own DCE logic. */
1728 int uselo
= ra_used(ir
-1), usehi
= ra_used(ir
); /* Loword/hiword used? */
1729 if (LJ_UNLIKELY(!(as
->flags
& JIT_F_OPT_DCE
))) uselo
= usehi
= 1;
1730 if (!usehi
) return; /* Skip unused hiword op for all remaining ops. */
1731 switch ((ir
-1)->o
) {
1737 ra_allocref(as
, ir
->op1
, RID2RSET(RID_RETLO
)); /* Mark lo op as used. */
1739 default: lj_assertA(0, "bad HIOP for op %d", (ir
-1)->o
); break;
1743 /* -- Profiling ----------------------------------------------------------- */
1745 static void asm_prof(ASMState
*as
, IRIns
*ir
)
1747 uint32_t k
= emit_isk13(HOOK_PROFILE
, 0);
1748 lj_assertA(k
!= 0, "HOOK_PROFILE does not fit in K13");
1750 asm_guardcc(as
, CC_NE
);
1751 emit_n(as
, A64I_TSTw
^k
, RID_TMP
);
1752 emit_lsptr(as
, A64I_LDRB
, RID_TMP
, (void *)&J2G(as
->J
)->hookmask
);
1755 /* -- Stack handling ------------------------------------------------------ */
1757 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1758 static void asm_stack_check(ASMState
*as
, BCReg topslot
,
1759 IRIns
*irp
, RegSet allow
, ExitNo exitno
)
1762 Reg pbase
= RID_BASE
;
1765 if (!ra_hasreg(pbase
))
1766 pbase
= allow
? (0x40 | rset_pickbot(allow
)) : (0xC0 | RID_RET
);
1768 emit_cond_branch(as
, CC_LS
, asm_exitstub_addr(as
, exitno
));
1769 if (pbase
& 0x80) /* Restore temp. register. */
1770 emit_lso(as
, A64I_LDRx
, (pbase
& 31), RID_SP
, 0);
1771 k
= emit_isk12((8*topslot
));
1772 lj_assertA(k
, "slot offset %d does not fit in K12", 8*topslot
);
1773 emit_n(as
, A64I_CMPx
^k
, RID_TMP
);
1774 emit_dnm(as
, A64I_SUBx
, RID_TMP
, RID_TMP
, (pbase
& 31));
1775 emit_lso(as
, A64I_LDRx
, RID_TMP
, RID_TMP
,
1776 (int32_t)offsetof(lua_State
, maxstack
));
1778 emit_getgl(as
, (pbase
& 31), jit_base
);
1779 if (pbase
& 0x80) /* Save temp register. */
1780 emit_lso(as
, A64I_STRx
, (pbase
& 31), RID_SP
, 0);
1782 emit_getgl(as
, RID_TMP
, cur_L
);
1785 /* Restore Lua stack from on-trace state. */
1786 static void asm_stack_restore(ASMState
*as
, SnapShot
*snap
)
1788 SnapEntry
*map
= &as
->T
->snapmap
[snap
->mapofs
];
1789 #ifdef LUA_USE_ASSERT
1790 SnapEntry
*flinks
= &as
->T
->snapmap
[snap_nextofs(as
->T
, snap
)-1-LJ_FR2
];
1792 MSize n
, nent
= snap
->nent
;
1793 /* Store the value of all modified slots to the Lua stack. */
1794 for (n
= 0; n
< nent
; n
++) {
1795 SnapEntry sn
= map
[n
];
1796 BCReg s
= snap_slot(sn
);
1797 int32_t ofs
= 8*((int32_t)s
-1-LJ_FR2
);
1798 IRRef ref
= snap_ref(sn
);
1799 IRIns
*ir
= IR(ref
);
1800 if ((sn
& SNAP_NORESTORE
))
1802 if ((sn
& SNAP_KEYINDEX
)) {
1803 RegSet allow
= rset_exclude(RSET_GPR
, RID_BASE
);
1804 Reg r
= irref_isk(ref
) ? ra_allock(as
, ir
->i
, allow
) :
1805 ra_alloc1(as
, ref
, allow
);
1806 rset_clear(allow
, r
);
1807 emit_lso(as
, A64I_STRw
, r
, RID_BASE
, ofs
);
1808 emit_lso(as
, A64I_STRw
, ra_allock(as
, LJ_KEYINDEX
, allow
), RID_BASE
, ofs
+4);
1809 } else if (irt_isnum(ir
->t
)) {
1810 Reg src
= ra_alloc1(as
, ref
, RSET_FPR
);
1811 emit_lso(as
, A64I_STRd
, (src
& 31), RID_BASE
, ofs
);
1813 asm_tvstore64(as
, RID_BASE
, ofs
, ref
);
1817 lj_assertA(map
+ nent
== flinks
, "inconsistent frames in snapshot");
1820 /* -- GC handling --------------------------------------------------------- */
1822 /* Marker to prevent patching the GC check exit. */
1823 #define ARM64_NOPATCH_GC_CHECK \
1824 (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
1826 /* Check GC threshold and do one or more GC steps. */
1827 static void asm_gc_check(ASMState
*as
)
1829 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_step_jit
];
1833 ra_evictset(as
, RSET_SCRATCH
);
1834 l_end
= emit_label(as
);
1835 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1836 asm_guardcnb(as
, A64I_CBNZ
, RID_RET
); /* Assumes asm_snap_prep() is done. */
1837 *--as
->mcp
= ARM64_NOPATCH_GC_CHECK
;
1838 args
[0] = ASMREF_TMP1
; /* global_State *g */
1839 args
[1] = ASMREF_TMP2
; /* MSize steps */
1840 asm_gencall(as
, ci
, args
);
1841 emit_dm(as
, A64I_MOVx
, ra_releasetmp(as
, ASMREF_TMP1
), RID_GL
);
1842 tmp2
= ra_releasetmp(as
, ASMREF_TMP2
);
1843 emit_loadi(as
, tmp2
, as
->gcsteps
);
1844 /* Jump around GC step if GC total < GC threshold. */
1845 emit_cond_branch(as
, CC_LS
, l_end
);
1846 emit_nm(as
, A64I_CMPx
, RID_TMP
, tmp2
);
1847 emit_getgl(as
, tmp2
, gc
.threshold
);
1848 emit_getgl(as
, RID_TMP
, gc
.total
);
1853 /* -- Loop handling ------------------------------------------------------- */
1855 /* Fixup the loop branch. */
1856 static void asm_loop_fixup(ASMState
*as
)
1858 MCode
*p
= as
->mctop
;
1859 MCode
*target
= as
->mcp
;
1860 if (as
->loopinv
) { /* Inverted loop branch? */
1861 uint32_t mask
= (p
[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu
: 0x7ffffu
;
1862 ptrdiff_t delta
= target
- (p
- 2);
1863 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1864 p
[-2] |= ((uint32_t)delta
& mask
) << 5;
1866 ptrdiff_t delta
= target
- (p
- 1);
1867 p
[-1] = A64I_B
| A64F_S26(delta
);
1871 /* Fixup the tail of the loop. */
1872 static void asm_loop_tail_fixup(ASMState
*as
)
1874 UNUSED(as
); /* Nothing to do. */
1877 /* -- Head of trace ------------------------------------------------------- */
1879 /* Coalesce BASE register for a root trace. */
1880 static void asm_head_root_base(ASMState
*as
)
1882 IRIns
*ir
= IR(REF_BASE
);
1886 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
1887 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
1889 emit_movrr(as
, ir
, r
, RID_BASE
);
1893 /* Coalesce BASE register for a side trace. */
1894 static Reg
asm_head_side_base(ASMState
*as
, IRIns
*irp
)
1896 IRIns
*ir
= IR(REF_BASE
);
1900 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
1901 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
1903 return r
; /* Same BASE register already coalesced. */
1904 } else if (ra_hasreg(irp
->r
) && rset_test(as
->freeset
, irp
->r
)) {
1905 /* Move from coalesced parent reg. */
1906 emit_movrr(as
, ir
, r
, irp
->r
);
1909 emit_getgl(as
, r
, jit_base
); /* Otherwise reload BASE. */
1915 /* -- Tail of trace ------------------------------------------------------- */
1917 /* Fixup the tail code. */
1918 static void asm_tail_fixup(ASMState
*as
, TraceNo lnk
)
1920 MCode
*p
= as
->mctop
;
1922 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1923 int32_t spadj
= as
->T
->spadjust
+ (lnk
? 0 : sps_scale(SPS_FIXED
));
1925 *--p
= A64I_LE(A64I_NOP
);
1928 /* Patch stack adjustment. */
1929 uint32_t k
= emit_isk12(spadj
);
1930 lj_assertA(k
, "stack adjustment %d does not fit in K12", spadj
);
1931 p
[-2] = (A64I_ADDx
^k
) | A64F_D(RID_SP
) | A64F_N(RID_SP
);
1933 /* Patch exit branch. */
1934 target
= lnk
? traceref(as
->J
, lnk
)->mcode
: (MCode
*)lj_vm_exit_interp
;
1935 p
[-1] = A64I_B
| A64F_S26((target
-p
)+1);
1938 /* Prepare tail of code. */
1939 static void asm_tail_prep(ASMState
*as
)
1941 MCode
*p
= as
->mctop
- 1; /* Leave room for exit branch. */
1943 as
->invmcp
= as
->mcp
= p
;
1945 as
->mcp
= p
-1; /* Leave room for stack pointer adjustment. */
1948 *p
= 0; /* Prevent load/store merging. */
1951 /* -- Trace setup --------------------------------------------------------- */
1953 /* Ensure there are enough stack slots for call arguments. */
1954 static Reg
asm_setup_call_slots(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
1957 uint32_t i
, nargs
= CCI_XNARGS(ci
);
1958 if (nargs
> (REGARG_NUMGPR
< REGARG_NUMFPR
? REGARG_NUMGPR
: REGARG_NUMFPR
) ||
1959 (LJ_TARGET_OSX
&& (ci
->flags
& CCI_VARARG
))) {
1960 IRRef args
[CCI_NARGS_MAX
*2];
1961 int ngpr
= REGARG_NUMGPR
, nfpr
= REGARG_NUMFPR
;
1962 int spofs
= 0, spalign
= LJ_TARGET_OSX
? 0 : 7, nslots
;
1963 asm_collectargs(as
, ir
, ci
, args
);
1965 if ((ci
->flags
& CCI_VARARG
)) nfpr
= 0;
1967 for (i
= 0; i
< nargs
; i
++) {
1971 /* Marker for start of varaargs. */
1976 } else if (irt_isfp(IR(args
[i
])->t
)) {
1977 if (nfpr
> 0) { nfpr
--; continue; }
1979 if ((ci
->flags
& CCI_VARARG
) && ngpr
> 0) { ngpr
--; continue; }
1981 al
|= irt_isnum(IR(args
[i
])->t
) ? 7 : 3;
1984 if (ngpr
> 0) { ngpr
--; continue; }
1986 al
|= irt_size(IR(args
[i
])->t
) - 1;
1989 spofs
= (spofs
+ 2*al
+1) & ~al
; /* Align and bump stack pointer. */
1991 nslots
= (spofs
+ 3) >> 2;
1992 if (nslots
> as
->evenspill
) /* Leave room for args in stack slots. */
1993 as
->evenspill
= nslots
;
1996 return REGSP_HINT(irt_isfp(ir
->t
) ? RID_FPRET
: RID_RET
);
1999 static void asm_setup_target(ASMState
*as
)
2001 /* May need extra exit for asm_stack_check on side traces. */
2002 asm_exitstub_setup(as
, as
->T
->nsnap
+ (as
->parent
? 1 : 0));
2006 /* ARM64 instructions are always little-endian. Swap for ARM64BE. */
2007 static void asm_mcode_fixup(MCode
*mcode
, MSize size
)
2009 MCode
*pe
= (MCode
*)((char *)mcode
+ size
);
2010 while (mcode
< pe
) {
2012 *mcode
++ = lj_bswap(ins
);
2015 #define LJ_TARGET_MCODE_FIXUP 1
2018 /* -- Trace patching ------------------------------------------------------ */
2020 /* Patch exit jumps of existing machine code to a new target. */
2021 void lj_asm_patchexit(jit_State
*J
, GCtrace
*T
, ExitNo exitno
, MCode
*target
)
2023 MCode
*p
= T
->mcode
;
2024 MCode
*pe
= (MCode
*)((char *)p
+ T
->szmcode
);
2025 MCode
*cstart
= NULL
;
2026 MCode
*mcarea
= lj_mcode_patch(J
, p
, 0);
2027 MCode
*px
= exitstub_trace_addr(T
, exitno
);
2029 /* Note: this assumes a trace exit is only ever patched once. */
2030 for (; p
< pe
; p
++) {
2031 /* Look for exitstub branch, replace with branch to target. */
2032 ptrdiff_t delta
= target
- p
;
2033 MCode ins
= A64I_LE(*p
);
2034 if ((ins
& 0xff000000u
) == 0x54000000u
&&
2035 ((ins
^ ((px
-p
)<<5)) & 0x00ffffe0u
) == 0) {
2036 /* Patch bcc, if within range. */
2037 if (A64F_S_OK(delta
, 19)) {
2038 *p
= A64I_LE((ins
& 0xff00001fu
) | A64F_S19(delta
));
2039 if (!cstart
) cstart
= p
;
2041 } else if ((ins
& 0xfc000000u
) == 0x14000000u
&&
2042 ((ins
^ (px
-p
)) & 0x03ffffffu
) == 0) {
2044 lj_assertJ(A64F_S_OK(delta
, 26), "branch target out of range");
2045 *p
= A64I_LE((ins
& 0xfc000000u
) | A64F_S26(delta
));
2046 if (!cstart
) cstart
= p
;
2047 } else if ((ins
& 0x7e000000u
) == 0x34000000u
&&
2048 ((ins
^ ((px
-p
)<<5)) & 0x00ffffe0u
) == 0) {
2049 /* Patch cbz/cbnz, if within range. */
2050 if (p
[-1] == ARM64_NOPATCH_GC_CHECK
) {
2052 } else if (A64F_S_OK(delta
, 19)) {
2053 *p
= A64I_LE((ins
& 0xff00001fu
) | A64F_S19(delta
));
2054 if (!cstart
) cstart
= p
;
2056 } else if ((ins
& 0x7e000000u
) == 0x36000000u
&&
2057 ((ins
^ ((px
-p
)<<5)) & 0x0007ffe0u
) == 0) {
2058 /* Patch tbz/tbnz, if within range. */
2059 if (A64F_S_OK(delta
, 14)) {
2060 *p
= A64I_LE((ins
& 0xfff8001fu
) | A64F_S14(delta
));
2061 if (!cstart
) cstart
= p
;
2065 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2067 ptrdiff_t delta
= target
- px
;
2068 lj_assertJ(A64F_S_OK(delta
, 26), "branch target out of range");
2069 *px
= A64I_B
| A64F_S26(delta
);
2070 if (!cstart
) cstart
= px
;
2072 if (cstart
) lj_mcode_sync(cstart
, px
+1);
2073 lj_mcode_patch(J
, mcarea
, 1);