1 |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
6 |.section code_op, code_sub
8 |.actionlist build_actionlist
10 |.globalnames globnames
11 |.externnames extnames
13 |//-----------------------------------------------------------------------
16 |.define X64WIN, 1 // Windows/x64 calling conventions.
19 |// Fixed register assignments for the interpreter.
20 |// This is very fragile and has many dependencies. Caveat emptor.
21 |.define BASE, rdx // Not C callee-save, refetched anyway.
23 |.define KBASE, rdi // Must be C callee-save.
24 |.define PC, rsi // Must be C callee-save.
25 |.define DISPATCH, rbx // Must be C callee-save.
28 |.define DISPATCHd, ebx
30 |.define KBASE, r15 // Must be C callee-save.
31 |.define PC, rbx // Must be C callee-save.
32 |.define DISPATCH, r14 // Must be C callee-save.
35 |.define DISPATCHd, r14d
42 |.define RB, rbp // Must be rbp (C callee-save).
44 |.define RC, rax // Must be rax.
60 |.define CARG1, rcx // x64/WIN64 C call arguments.
69 |.define CARG1, rdi // x64/POSIX C call arguments.
83 |// Type definitions. Some of these are only used for documentation.
85 |.type GL, global_State
99 |// Stack layout while in interpreter. Must match with lj_frame.h.
100 |//-----------------------------------------------------------------------
101 |.if X64WIN // x64/Windows stack layout
103 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
105 | push rdi; push rsi; push rbx
106 | sub rsp, CFRAME_SPACE
109 | push rbp; saveregs_
112 | add rsp, CFRAME_SPACE
113 | pop rbx; pop rsi; pop rdi; pop rbp
116 |.define SAVE_CFRAME, aword [rsp+aword*13]
117 |.define SAVE_PC, aword [rsp+aword*12]
118 |.define SAVE_L, aword [rsp+aword*11]
119 |.define SAVE_ERRF, dword [rsp+dword*21]
120 |.define SAVE_NRES, dword [rsp+dword*20]
121 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123 |.define SAVE_R4, aword [rsp+aword*8]
124 |.define SAVE_R3, aword [rsp+aword*7]
125 |.define SAVE_R2, aword [rsp+aword*6]
126 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127 |.define ARG5, aword [rsp+aword*4]
128 |.define CSAVE_4, aword [rsp+aword*3]
129 |.define CSAVE_3, aword [rsp+aword*2]
130 |.define CSAVE_2, aword [rsp+aword*1]
131 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
134 |.define ARG5d, dword [rsp+dword*8]
135 |.define TMP1, ARG5 // TMP1 overlaps ARG5
136 |.define TMP1d, ARG5d
137 |.define TMP1hi, dword [rsp+dword*9]
138 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
140 |//-----------------------------------------------------------------------
141 |.else // x64/POSIX stack layout
143 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
145 | push rbx; push r15; push r14
149 | sub rsp, CFRAME_SPACE
152 | push rbp; saveregs_
155 | add rsp, CFRAME_SPACE
159 | pop r14; pop r15; pop rbx; pop rbp
162 |//----- 16 byte aligned,
164 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165 |.define SAVE_R4, aword [rsp+aword*10]
166 |.define SAVE_R3, aword [rsp+aword*9]
167 |.define SAVE_R2, aword [rsp+aword*8]
168 |.define SAVE_R1, aword [rsp+aword*7]
169 |.define SAVE_RU2, aword [rsp+aword*6]
170 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
172 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173 |.define SAVE_R4, aword [rsp+aword*8]
174 |.define SAVE_R3, aword [rsp+aword*7]
175 |.define SAVE_R2, aword [rsp+aword*6]
176 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
178 |.define SAVE_CFRAME, aword [rsp+aword*4]
179 |.define SAVE_PC, aword [rsp+aword*3]
180 |.define SAVE_L, aword [rsp+aword*2]
181 |.define SAVE_ERRF, dword [rsp+dword*3]
182 |.define SAVE_NRES, dword [rsp+dword*2]
183 |.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184 |//----- 16 byte aligned
186 |.define TMP1d, dword [rsp]
187 |.define TMP1hi, dword [rsp+dword*1]
188 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
192 |//-----------------------------------------------------------------------
194 |// Instruction headers.
195 |.macro ins_A; .endmacro
196 |.macro ins_AD; .endmacro
197 |.macro ins_AJ; .endmacro
198 |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199 |.macro ins_AB_; movzx RBd, RCH; .endmacro
200 |.macro ins_A_C; movzx RCd, RCL; .endmacro
201 |.macro ins_AND; not RD; .endmacro
203 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
210 | jmp aword [DISPATCH+OP*8]
213 |// Instruction footer.
215 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216 | .define ins_next, ins_NEXT
217 | .define ins_next_, ins_NEXT
219 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220 | // Affects only certain kinds of benchmarks (and only with -j off).
221 | // Around 10%-30% slower on Core2, a lot more slower on P4.
231 |// Call decode and dispatch.
233 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234 | mov PC, LFUNC:RB->pc
239 | jmp aword [DISPATCH+OP*8]
243 | // BASE = new base, RB = LFUNC, RD = nargs+1
248 |//-----------------------------------------------------------------------
250 |// Macros to clear or set tags.
251 |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252 |.macro settp, reg, tp
253 | mov64 ITYPE, ((int64_t)tp<<47)
256 |.macro settp, dst, reg, tp
257 | mov64 dst, ((int64_t)tp<<47)
261 | settp reg, LJ_TISNUM
263 |.macro setint, dst, reg
264 | settp dst, reg, LJ_TISNUM
267 |// Macros to test operand types.
268 |.macro checktp_nc, reg, tp, target
274 |.macro checktp, reg, tp, target
281 |.macro checktptp, src, tp, target
287 |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288 |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
291 |.macro checknumx, reg, target, jump
294 | cmp ITYPEd, LJ_TISNUM
297 |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298 |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299 |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300 |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301 |.macro checknumber, src, target; checknumx src, target, ja; .endmacro
303 |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304 |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
306 |// These operands must be used with movzx.
307 |.define PC_OP, byte [PC-4]
308 |.define PC_RA, byte [PC-3]
309 |.define PC_RB, byte [PC-1]
310 |.define PC_RC, byte [PC-2]
311 |.define PC_RD, word [PC-2]
313 |.macro branchPC, reg
314 | lea PC, [PC+reg*4-BCBIAS_J*4]
317 |// Assumes DISPATCH is relative to GL.
318 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
321 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323 |// Decrement hashed hotcount and trigger trace recorder if zero.
327 | and reg, HOTCOUNT_PCMASK
328 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
335 | and reg, HOTCOUNT_PCMASK
336 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
340 |// Set current VM state.
341 |.macro set_vmstate, st
342 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
345 |.macro fpop1; fstp st1; .endmacro
347 |// Synthesize SSE FP constants.
348 |.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
352 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353 | mov64 tmp, U64x(val,00000000); movd reg, tmp
356 |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357 | sseconst_hi reg, tmp, 80000000
359 |.macro sseconst_1, reg, tmp // Synthesize 1.0.
360 | sseconst_hi reg, tmp, 3ff00000
362 |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363 | sseconst_hi reg, tmp, bff00000
365 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366 | sseconst_hi reg, tmp, 43300000
368 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369 | sseconst_hi reg, tmp, 43380000
372 |// Move table write barrier back. Overwrites reg.
373 |.macro barrierback, tab, reg
374 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377 | mov tab->gclist, reg
380 |//-----------------------------------------------------------------------
382 /* Generate subroutines used by opcodes and other parts of the VM. */
383 /* The .code_sub section should be last to help static branch prediction. */
384 static void build_subroutines(BuildCtx *ctx)
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
396 | // Return from pcall or xpcall fast func.
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
415 | test PCd, FRAME_TYPE
422 | neg PC // Previous base = BASE - delta.
426 |1: // Move results down.
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
440 | jne >6 // More/less results wanted?
443 | mov L:RB->top, BASE
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
459 | mov aword [BASE-16], LJ_TNIL
464 |7: // Less results wanted.
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
486 | jmp ->vm_unwind_c_eh
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
498 |->vm_unwind_rethrow:
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
508 | and CARG1, CFRAME_RAWMASK
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
519 | mov [BASE-16], RA // Prepend false to error message.
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
548 | // RB = L, L->base = new base, L->top = top
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
553 | mov LFUNC:RB, [BASE-16]
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
596 | test PCd, FRAME_TYPE
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
604 | mov SAVE_ERRF, CARG4d
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
630 | sub PC, BASE // PC = frame delta + frame type
635 | add NARGS:RDd, 1 // RD = nargs+1
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
641 |->vm_call_dispatch_f:
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
669 | jz ->vm_leave_cp // No base? Just remove C frame.
672 | jmp <2 // Else continue with the call.
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
678 |//-- Continuation dispatch ----------------------------------------------
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
714 | lea RA, [BASE+RA*8]
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
736 |//-- Table indexing metamethods -----------------------------------------
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
779 |->cont_ra: // BASE = base, RC = result
782 | mov [BASE+RA*8], RB
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
794 | jmp ->vm_call_dispatch_f
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
803 | mov BASE, RB // Restore BASE.
809 |//-----------------------------------------------------------------------
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
854 | mov RB, [BASE+RA*8]
856 |->cont_nop: // BASE = base, (RC = result)
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
862 | mov [RA-24], PC // [cont|PC]
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
867 | lea PC, [RA+FRAME_CONT]
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
872 | jmp ->vm_call_dispatch_f
876 | mov L:CARG1, SAVE_L
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
881 | mov L:CARG1, SAVE_L
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
891 | mov BASE, RB // Restore BASE.
894 |//-- Comparison metamethods ---------------------------------------------
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
914 | mov BASE, L:RB->base
926 |->cont_condt: // BASE = base, RC = result
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
934 |->cont_condf: // BASE = base, RC = result
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
967 | mov L:RB->base, BASE
969 | mov CARG2d, dword [PC-4]
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
987 |//-- Arithmetic metamethods ---------------------------------------------
995 | lea RC, [KBASE+RC*8]
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1010 | lea RC, [BASE+RD*8]
1020 | lea RC, [BASE+RC*8]
1022 | lea RB, [BASE+RB*8]
1024 | lea RA, [BASE+RA*8]
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1035 | movzx CARG5d, PC_OP
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1050 | // Call metamethod for binary op.
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1072 | jne ->vmeta_binop // Binop call for compatibility.
1074 | mov TAB:CARG1, [BASE+RD*8]
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1081 |//-- Call metamethod ----------------------------------------------------
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1110 | // This is fragile. L->base must not move, KBASE must always be defined.
1111 | cmp KBASE, BASE // Continue with CALLT if flag set.
1114 | ins_call // Otherwise call resolved metamethod.
1116 |//-- Argument coercion for 'for' statement ------------------------------
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1136 |.macro .ffunc, name
1140 |.macro .ffunc_1, name
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1145 |.macro .ffunc_2, name
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1150 |.macro .ffunc_n, name, op
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1160 |.macro .ffunc_nn, name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1177 |//-- Base library: checks -----------------------------------------------
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1204 | mov RBd, LJ_TISNUM
1209 | mov CFUNC:RB, [BASE-16]
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1217 |//-- Base library: getters and setters ---------------------------------
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->hash
1234 | settp STR:RC, LJ_TSTR
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1244 | jmp ->fff_res1 // Not found, keep default result.
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1302 |//-- Base library: conversions ------------------------------------------
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1308 | checknumber RB, ->fff_fallback
1314 | // Only handles the string or number case inline.
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1320 | mov [BASE-16], STR:RB
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1344 |//-- Base library: iterators -------------------------------------------
1347 | je >2 // Missing 2nd arg?
1351 | checktab RA, ->fff_fallback
1354 | checktab CARG2, ->fff_fallback
1357 | mov L:RB->base, BASE // Add frame since C call can throw.
1358 | mov L:RB->top, BASE // Dummy frame length is ok.
1361 | lea CARG3, [BASE+8]
1362 | mov CARG2, RA // Caveat: CARG2 == BASE.
1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1368 | mov SAVE_PC, PC // Needed for ITERN fallback.
1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370 | // Flag returned in eax (RD).
1371 | mov BASE, L:RB->base
1372 | test RDd, RDd; jz >3 // End of traversal?
1373 | // Copy key and value to results.
1381 |2: // Set missing 2nd arg to nil.
1382 | mov aword [BASE+8], LJ_TNIL
1384 |3: // End of traversal: return nil.
1385 | mov aword [BASE-16], LJ_TNIL
1389 | mov TAB:RB, [BASE]
1391 | checktab TAB:RB, ->fff_fallback
1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1395 | mov CFUNC:RD, [BASE-16]
1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398 | settp CFUNC:RD, LJ_TFUNC
1400 | mov [BASE-16], CFUNC:RD
1401 | mov [BASE-8], TMPR
1402 | mov aword [BASE], LJ_TNIL
1406 |.ffunc_2 ipairs_aux
1407 | mov TAB:RB, [BASE]
1408 | checktab TAB:RB, ->fff_fallback
1411 | checkint RA, ->fff_fallback
1413 | checknumtp [BASE+8], ->fff_fallback
1414 | movsd xmm0, qword [BASE+8]
1420 | mov [BASE-16], ITYPE
1422 | sseconst_1 xmm1, TMPR
1424 | cvttsd2si RAd, xmm0
1425 | movsd qword [BASE-16], xmm0
1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428 | mov RD, TAB:RB->array
1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432 | // Copy array slot.
1436 |2: // Check for empty hash part first. Otherwise call C function.
1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1445 | mov RB, BASE // Save BASE.
1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449 | // cTValue * or NULL returned in eax (RD).
1458 | mov TAB:RB, [BASE]
1460 | checktab TAB:RB, ->fff_fallback
1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1464 | mov CFUNC:RD, [BASE-16]
1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467 | settp CFUNC:RD, LJ_TFUNC
1469 | mov [BASE-16], CFUNC:RD
1470 | mov [BASE-8], TMPR
1472 | mov64 RD, ((int64_t)LJ_TISNUM<<47)
1475 | mov qword [BASE], 0
1480 |//-- Base library: catch errors ----------------------------------------
1485 | mov PCd, 16+FRAME_PCALL
1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488 | shr RB, HOOK_ACTIVE_SHIFT
1490 | add PC, RB // Remember active hook before pcall.
1491 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1494 | mov RB, [RA+KBASE*8-24]
1495 | mov [RA+KBASE*8-16], RB
1498 | jmp ->vm_call_dispatch
1501 | mov LFUNC:RA, [BASE+8]
1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504 | mov [BASE], LFUNC:RA
1505 | mov [BASE+8], LFUNC:RB
1508 | mov PCd, 24+FRAME_PCALL
1511 |//-- Coroutine library --------------------------------------------------
1513 |.macro coroutine_resume_wrap, resume
1515 |.ffunc_1 coroutine_resume
1519 |.ffunc coroutine_wrap_aux
1520 | mov CFUNC:RB, [BASE-16]
1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1534 | je >1 // Status != LUA_YIELD (i.e. 0)?
1535 | cmp RA, L:RB->base // Check for presence of initial func.
1537 | mov PC, [RA-8] // Move initial function up.
1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1550 | mov L:RB->base, BASE
1552 | add BASE, 8 // Keep resumed thread in stack for GC.
1554 | mov L:RB->top, BASE
1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1560 | sub RB, PC // Relative to PC.
1564 |2: // Move args to coroutine.
1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1577 | mov BASE, L:RB->base
1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579 | set_vmstate INTERP
1581 | cmp eax, LUA_YIELD
1584 | mov RA, L:PC->base
1585 | mov KBASE, L:PC->top
1586 | mov L:PC->top, RA // Clear coroutine stack.
1589 | je >6 // No results?
1592 | cmp RD, L:RB->maxstack
1593 | ja >9 // Need to grow stack?
1597 |5: // Move results from coroutine.
1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606 | mov_true ITYPE // Prepend true to results.
1607 | mov [BASE-8], ITYPE
1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1619 | test PCd, FRAME_TYPE
1623 |8: // Coroutine returned with error (at co->top-1).
1625 | mov_false ITYPE // Prepend false to results.
1626 | mov [BASE-8], ITYPE
1629 | mov L:PC->top, RA // Clear error from coroutine stack.
1630 | // Copy error message.
1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639 | // Error function does not return.
1642 |9: // Handle stack expansion on return from yield.
1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1647 | call extern lj_state_growstack // (lua_State *L, int n)
1649 | mov BASE, L:RB->base
1650 | jmp <4 // Retry the stack move.
1653 | coroutine_resume_wrap 1 // coroutine.resume
1654 | coroutine_resume_wrap 0 // coroutine.wrap
1656 |.ffunc coroutine_yield
1658 | test aword L:RB->cframe, CFRAME_RESUME
1660 | mov L:RB->base, BASE
1661 | lea RD, [BASE+NARGS:RD*8-8]
1664 | mov aword L:RB->cframe, RD
1666 | mov byte L:RB->status, al
1667 | jmp ->vm_leave_unw
1669 |//-- Math library -------------------------------------------------------
1675 | cmp RBd, 0; jns ->fff_resi
1685 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1690 | checknum RB, ->fff_fallback
1698 |.ffunc_n math_sqrt, sqrtsd
1701 | movsd qword [BASE-16], xmm0
1709 | test PCd, FRAME_TYPE
1712 | cmp PC_RB, RDL // More results expected?
1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1720 |6: // Fill up results with nil.
1721 | mov aword [BASE+RD*8-24], LJ_TNIL
1725 |7: // Non-standard return case.
1726 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1729 |.macro math_round, func
1730 | .ffunc math_ .. func
1733 | checknumx RB, ->fff_resRB, je
1736 | checknumtp [BASE], ->fff_fallback
1738 | movsd xmm0, qword [BASE]
1739 | call ->vm_ .. func .. _sse
1741 | cvttsd2si RBd, xmm0
1742 | cmp RBd, 0x80000000
1744 | cvtsi2sd xmm1, RBd
1745 | ucomisd xmm0, xmm1
1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757 | checknumtp [BASE], ->fff_fallback
1758 | movsd xmm0, qword [BASE]
1764 |.macro math_extern, func
1765 | .ffunc_n math_ .. func
1772 |.macro math_extern2, func
1773 | .ffunc_nn math_ .. func
1792 | math_extern2 atan2
1795 |.ffunc_2 math_ldexp
1796 | checknumtp [BASE], ->fff_fallback
1797 | checknumtp [BASE+8], ->fff_fallback
1798 | fld qword [BASE+8]
1803 | fstp qword [BASE-16]
1806 |.ffunc_n math_frexp
1813 | movsd qword [BASE-16], xmm0
1818 | cvtsi2sd xmm1, RBd
1819 | movsd qword [BASE-8], xmm1
1825 | lea CARG1, [BASE-16]
1831 | movsd qword [BASE-8], xmm0
1835 |.macro math_minmax, name, cmovop, sseop
1841 |1: // Handle integers.
1842 | cmp RAd, RDd; jae ->fff_resRB
1843 | mov TMPR, [BASE+RA*8-8]
1851 | // Convert intermediate result to number and continue below.
1852 | cvtsi2sd xmm0, RBd
1857 | checknumtp [BASE], ->fff_fallback
1860 | movsd xmm0, qword [BASE]
1861 |5: // Handle numbers or integers.
1862 | cmp RAd, RDd; jae ->fff_resxmm0
1864 | mov RB, [BASE+RA*8-8]
1865 | checknumx RB, >6, jb
1867 | cvtsi2sd xmm1, RBd
1870 | checknumtp [BASE+RA*8-8], ->fff_fallback
1873 | movsd xmm1, qword [BASE+RA*8-8]
1880 | math_minmax math_min, cmovg, minsd
1881 | math_minmax math_max, cmovl, maxsd
1883 |//-- String library -----------------------------------------------------
1885 |.ffunc string_byte // Only handle the 1-arg case here.
1886 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1887 | mov STR:RB, [BASE]
1888 | checkstr STR:RB, ->fff_fallback
1890 | cmp dword STR:RB->len, 1
1891 | jb ->fff_res0 // Return no results for empty string.
1892 | movzx RBd, byte STR:RB[1]
1896 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1899 |.ffunc string_char // Only handle the 1-arg case here.
1901 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1904 | checkint RB, ->fff_fallback
1906 | checknumtp [BASE], ->fff_fallback
1907 | cvttsd2si RBd, qword [BASE]
1909 | cmp RBd, 255; ja ->fff_fallback
1912 | lea RD, TMP1 // Points to stack. Little-endian.
1915 | mov L:RB->base, BASE
1916 | mov CARG3d, TMPRd // Zero-extended to size_t.
1920 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1922 | // GCstr * returned in eax (RD).
1923 | mov BASE, L:RB->base
1925 | settp STR:RD, LJ_TSTR
1926 | mov [BASE-16], STR:RD
1932 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1935 | mov TMPR, [BASE+16]
1936 | checkint TMPR, ->fff_fallback
1938 | checknumtp [BASE+16], ->fff_fallback
1939 | cvttsd2si TMPRd, qword [BASE+16]
1942 | mov STR:RB, [BASE]
1943 | checkstr STR:RB, ->fff_fallback
1945 | mov ITYPE, [BASE+8]
1946 | mov RAd, ITYPEd // Must clear hiword for lea below.
1948 | cmp ITYPEd, LJ_TISNUM
1949 | jne ->fff_fallback
1951 | checknumtp [BASE+8], ->fff_fallback
1952 | cvttsd2si RAd, qword [BASE+8]
1954 | mov RCd, STR:RB->len
1955 | cmp RCd, TMPRd // len < end? (unsigned compare)
1958 | test RAd, RAd // start <= 0?
1961 | sub TMPRd, RAd // start > end?
1963 | lea RD, [STR:RB+RAd+#STR-1]
1968 |5: // Negative end or overflow.
1970 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1973 | mov TMPRd, RCd // end = len
1976 |7: // Negative start or underflow.
1978 | add RAd, RCd // start = start+(len+1)
1980 | jg <3 // start > 0?
1982 | mov RAd, 1 // start = 1
1985 |->fff_emptystr: // Range underflow.
1986 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1989 |.macro ffstring_op, name
1990 | .ffunc_1 string_ .. name
1993 | mov STR:TMPR, [BASE]
1994 | checkstr STR:TMPR, ->fff_fallback
1996 | mov STR:CARG2, [BASE]
1997 | checkstr STR:CARG2, ->fff_fallback
2000 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2001 | mov L:RB->base, BASE
2003 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2005 | mov RC, SBUF:CARG1->b
2006 | mov SBUF:CARG1->L, L:RB
2007 | mov SBUF:CARG1->p, RC
2009 | call extern lj_buf_putstr_ .. name
2011 | call extern lj_buf_tostr
2015 |ffstring_op reverse
2019 |//-- Bit library --------------------------------------------------------
2021 |.macro .ffunc_bit, name, kind, fdef
2024 | sseconst_tobit xmm1, RB
2038 | checknumtp [BASE], ->fff_fallback
2039 | movsd xmm0, qword [BASE]
2042 | sseconst_tobit xmm1, RB
2049 |.macro .ffunc_bit, name, kind
2050 | .ffunc_bit name, kind, .ffunc_1
2053 |.ffunc_bit bit_tobit, 0
2056 |.macro .ffunc_bit_op, name, ins
2057 | .ffunc_bit name, 2
2058 | mov TMPRd, NARGS:RDd // Save for fallback.
2059 | lea RD, [BASE+NARGS:RD*8-16]
2070 | ja ->fff_fallback_bit_op
2073 | checknumtp [RD], ->fff_fallback_bit_op
2074 | movsd xmm0, qword [RD]
2083 |.ffunc_bit_op bit_band, and
2084 |.ffunc_bit_op bit_bor, or
2085 |.ffunc_bit_op bit_bxor, xor
2087 |.ffunc_bit bit_bswap, 1
2091 |.ffunc_bit bit_bnot, 1
2097 | cvtsi2sd xmm0, RBd
2101 |->fff_fallback_bit_op:
2102 | mov NARGS:RDd, TMPRd // Restore for fallback
2103 | jmp ->fff_fallback
2105 |.macro .ffunc_bit_sh, name, ins
2107 | .ffunc_bit name, 1, .ffunc_2
2108 | // Note: no inline conversion from number for 2nd argument!
2110 | checkint RA, ->fff_fallback
2113 | sseconst_tobit xmm2, RB
2119 | ins RBd, cl // Assumes RA is ecx.
2123 |.ffunc_bit_sh bit_lshift, shl
2124 |.ffunc_bit_sh bit_rshift, shr
2125 |.ffunc_bit_sh bit_arshift, sar
2126 |.ffunc_bit_sh bit_rol, rol
2127 |.ffunc_bit_sh bit_ror, ror
2129 |//-----------------------------------------------------------------------
2132 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2133 | jmp ->fff_fallback
2135 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2136 |->fff_fallback: // Call fast function fallback handler.
2137 | // BASE = new base, RD = nargs+1
2139 | mov PC, [BASE-8] // Fallback may overwrite PC.
2140 | mov SAVE_PC, PC // Redundant (but a defined value).
2141 | mov L:RB->base, BASE
2142 | lea RD, [BASE+NARGS:RD*8-8]
2143 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2145 | mov CFUNC:RD, [BASE-16]
2147 | cmp RA, L:RB->maxstack
2148 | ja >5 // Need to grow stack.
2150 | call aword CFUNC:RD->f // (lua_State *L)
2151 | mov BASE, L:RB->base
2152 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2153 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2159 | lea NARGS:RDd, [RAd+1]
2160 | mov LFUNC:RB, [BASE-16]
2161 | jne ->vm_call_tail // Returned -1?
2163 | ins_callt // Returned 0: retry fast path.
2165 |// Reconstruct previous base for vmeta_call during tailcall.
2168 | test PCd, FRAME_TYPE
2172 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2173 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2178 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2180 |5: // Grow stack for fallback handler.
2181 | mov CARG2d, LUA_MINSTACK
2183 | call extern lj_state_growstack // (lua_State *L, int n)
2184 | mov BASE, L:RB->base
2185 | xor RDd, RDd // Simulate a return 0.
2186 | jmp <1 // Dumb retry (goes through ff first).
2188 |->fff_gcstep: // Call GC step function.
2189 | // BASE = new base, RD = nargs+1
2190 | pop RB // Must keep stack at same level.
2191 | mov TMP1, RB // Save return address
2193 | mov SAVE_PC, PC // Redundant (but a defined value).
2194 | mov L:RB->base, BASE
2195 | lea RD, [BASE+NARGS:RD*8-8]
2198 | call extern lj_gc_step // (lua_State *L)
2199 | mov BASE, L:RB->base
2205 | push RB // Restore return address.
2208 |//-----------------------------------------------------------------------
2209 |//-- Special dispatch targets -------------------------------------------
2210 |//-----------------------------------------------------------------------
2212 |->vm_record: // Dispatch target for recording phase.
2214 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2215 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2217 | // Decrement the hookcount for consistency, but always do the call.
2218 | test RDL, HOOK_ACTIVE
2220 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2222 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2226 |->vm_rethook: // Dispatch target for return hooks.
2227 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2228 | test RDL, HOOK_ACTIVE // Hook already active?
2232 |->vm_inshook: // Dispatch target for instr/line hooks.
2233 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2234 | test RDL, HOOK_ACTIVE // Hook already active?
2237 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2239 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2241 | test RDL, LUA_MASKLINE
2245 | mov L:RB->base, BASE
2246 | mov CARG2, PC // Caveat: CARG2 == BASE
2248 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2249 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2251 | mov BASE, L:RB->base
2257 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2259 |->cont_hook: // Continue from hook yield.
2262 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2265 |->vm_hotloop: // Hot loop counter underflow.
2267 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2269 | mov RB, LFUNC:RB->pc
2270 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2271 | lea RD, [BASE+RD*8]
2273 | mov L:RB->base, BASE
2276 | lea CARG1, [DISPATCH+GG_DISP2J]
2277 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2279 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2283 |->vm_callhook: // Dispatch target for call hooks.
2289 |->vm_hotcall: // Hot call counter underflow.
2292 | or PC, 1 // Marker for hot call.
2295 | lea RD, [BASE+NARGS:RD*8-8]
2297 | mov L:RB->base, BASE
2301 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2302 | // ASMFunction returned in eax/rax (RD).
2303 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2307 | mov BASE, L:RB->base
2317 |->cont_stitch: // Trace stitching.
2319 | // BASE = base, RC = result, RB = mbase
2320 | mov ITYPEd, [RB-24] // Save previous trace number.
2321 | mov TMPRd, MULTRES
2323 | lea RA, [BASE+RA*8] // Call base.
2326 |1: // Move results down.
2337 | lea RC, [BASE+RC*8-8]
2340 | ja >9 // More results wanted?
2342 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2343 | mov TRACE:RD, [RA+ITYPE*8]
2344 | test TRACE:RD, TRACE:RD
2346 | movzx RDd, word TRACE:RD->link
2348 | je ->cont_nop // Blacklisted.
2350 | jne =>BC_JLOOP // Jump to stitched trace.
2352 | // Stitch a new trace to the previous trace.
2353 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2355 | mov L:RB->base, BASE
2357 | lea CARG1, [DISPATCH+GG_DISP2J]
2358 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2359 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2360 | mov BASE, L:RB->base
2363 |9: // Fill up results with nil.
2364 | mov aword [RA], LJ_TNIL
2369 |->vm_profhook: // Dispatch target for profiler hook.
2372 | mov L:RB->base, BASE
2373 | mov CARG2, PC // Caveat: CARG2 == BASE
2375 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2376 | mov BASE, L:RB->base
2377 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2382 |//-----------------------------------------------------------------------
2383 |//-- Trace exit handler -------------------------------------------------
2384 |//-----------------------------------------------------------------------
2386 |// Called from an exit stub with the exit number on the stack.
2387 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2390 | push r13; push r12
2391 | push r11; push r10; push r9; push r8
2392 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2393 | push rbx; push rdx; push rcx; push rax
2394 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2395 | mov RCH, byte [rbp-16]
2396 | mov [rbp-8], r15; mov [rbp-16], r14
2397 | // Caveat: DISPATCH is rbx.
2398 | mov DISPATCH, [ebp]
2399 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2401 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2402 | mov [DISPATCH+DISPATCH_J(parent)], RA
2404 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2406 | sub rsp, 16*8 // Room for SSE regs.
2409 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2410 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2411 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2412 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2413 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2414 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2415 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2416 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2417 | // Caveat: RB is rbp.
2418 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2419 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2420 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2421 | mov L:RB->base, BASE
2423 | lea CARG2, [rsp+4*8]
2427 | lea CARG1, [DISPATCH+GG_DISP2J]
2428 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2429 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2430 | // MULTRES or negated error code returned in eax (RD).
2431 | mov RA, L:RB->cframe
2432 | and RA, CFRAME_RAWMASK
2433 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2434 | mov BASE, L:RB->base
2435 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2439 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2441 | // Restore additional callee-save registers only used in compiled code.
2443 | lea RA, [rsp+10*16+4*8]
2445 | movdqa xmm15, [RA-10*16]
2446 | movdqa xmm14, [RA-9*16]
2447 | movdqa xmm13, [RA-8*16]
2448 | movdqa xmm12, [RA-7*16]
2449 | movdqa xmm11, [RA-6*16]
2450 | movdqa xmm10, [RA-5*16]
2451 | movdqa xmm9, [RA-4*16]
2452 | movdqa xmm8, [RA-3*16]
2453 | movdqa xmm7, [RA-2*16]
2454 | mov rsp, RA // Reposition stack to C frame.
2455 | movdqa xmm6, [RA-1*16]
2465 | mov rsp, RA // Reposition stack to C frame.
2467 | test RDd, RDd; js >9 // Check for error from exit.
2470 | mov LFUNC:KBASE, [BASE-16]
2471 | cleartp LFUNC:KBASE
2472 | mov KBASE, LFUNC:KBASE->pc
2473 | mov KBASE, [KBASE+PC2PROTO(k)]
2474 | mov L:RB->base, BASE
2475 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2476 | set_vmstate INTERP
2477 | // Modified copy of ins_next which handles function header dispatch, too.
2483 | cmp OP, BC_FUNCF // Function header?
2485 | cmp OP, BC_FUNCC+2 // Fast function?
2488 | mov RCd, MULTRES // RC/RD holds nres+1.
2490 | jmp aword [DISPATCH+OP*8]
2492 |4: // Check frame below fast function.
2494 | test RCd, FRAME_TYPE
2495 | jnz <2 // Trace stitching continuation?
2496 | // Otherwise set KBASE for Lua function below fast function.
2497 | movzx RCd, byte [RC-3]
2499 | mov LFUNC:KBASE, [BASE+RC*8-24]
2500 | cleartp LFUNC:KBASE
2501 | mov KBASE, LFUNC:KBASE->pc
2502 | mov KBASE, [KBASE+PC2PROTO(k)]
2505 |9: // Rethrow error from the right C frame.
2509 | call extern lj_err_throw // (lua_State *L, int errcode)
2512 |//-----------------------------------------------------------------------
2513 |//-- Math helper functions ----------------------------------------------
2514 |//-----------------------------------------------------------------------
2516 |// FP value rounding. Called by math.floor/math.ceil fast functions
2517 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2518 |.macro vm_round, name, mode, cond
2521 | sseconst_abs xmm2, RD
2522 | sseconst_2p52 xmm3, RD
2524 | andpd xmm1, xmm2 // |x|
2525 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2527 | andnpd xmm2, xmm0 // Isolate sign bit.
2528 |.if mode == 2 // trunc(x)?
2530 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2532 | sseconst_1 xmm3, RD
2533 | cmpsd xmm0, xmm1, 1 // |x| < result?
2535 | subsd xmm1, xmm0 // If yes, subtract -1.
2536 | orpd xmm1, xmm2 // Merge sign bit back in.
2538 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2540 | orpd xmm1, xmm2 // Merge sign bit back in.
2541 | .if mode == 1 // ceil(x)?
2542 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2543 | cmpsd xmm0, xmm1, 6 // x > result?
2544 | .else // floor(x)?
2545 | sseconst_1 xmm2, RD
2546 | cmpsd xmm0, xmm1, 1 // x < result?
2549 | subsd xmm1, xmm0 // If yes, subtract +-1.
2556 | vm_round vm_floor, 0, 1
2557 | vm_round vm_ceil, 1, JIT
2558 | vm_round vm_trunc, 2, JIT
2560 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2562 |// Args in xmm0/xmm1, return value in xmm0.
2563 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2566 | sseconst_abs xmm2, RD
2567 | sseconst_2p52 xmm3, RD
2569 | andpd xmm4, xmm2 // |x/y|
2570 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2572 | andnpd xmm2, xmm0 // Isolate sign bit.
2573 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2575 | orpd xmm4, xmm2 // Merge sign bit back in.
2576 | sseconst_1 xmm2, RD
2577 | cmpsd xmm0, xmm4, 1 // x/y < result?
2579 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2590 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2592 | cmp eax, 1; jle >6 // i<=1?
2593 | // Now 1 < (unsigned)i <= 0x80000000.
2594 |1: // Handle leading zeros.
2595 | test eax, 1; jnz >2
2602 |3: // Handle trailing bits.
2613 | je <5 // x^1 ==> x
2614 | jb >7 // x^0 ==> 1
2617 | sseconst_1 xmm1, RD
2622 | sseconst_1 xmm0, RD
2625 |//-----------------------------------------------------------------------
2626 |//-- Miscellaneous functions --------------------------------------------
2627 |//-----------------------------------------------------------------------
2629 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2632 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2640 | .if X64WIN; pop rsi; .endif
2643 |//-----------------------------------------------------------------------
2644 |//-- Assertions ---------------------------------------------------------
2645 |//-----------------------------------------------------------------------
2647 |->assert_bad_for_arg_type:
2648 #ifdef LUA_USE_ASSERT
2653 |//-----------------------------------------------------------------------
2654 |//-- FFI helper functions -----------------------------------------------
2655 |//-----------------------------------------------------------------------
2657 |// Handler for callback functions. Callback slot number in ah/al.
2660 |.type CTSTATE, CTState, PC
2661 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2662 | lea DISPATCH, [ebp+GG_G2DISP]
2663 | mov CTSTATE, GL:ebp->ctype_state
2665 | mov CTSTATE->cb.slot, eax
2666 | mov CTSTATE->cb.gpr[0], CARG1
2667 | mov CTSTATE->cb.gpr[1], CARG2
2668 | mov CTSTATE->cb.gpr[2], CARG3
2669 | mov CTSTATE->cb.gpr[3], CARG4
2670 | movsd qword CTSTATE->cb.fpr[0], xmm0
2671 | movsd qword CTSTATE->cb.fpr[1], xmm1
2672 | movsd qword CTSTATE->cb.fpr[2], xmm2
2673 | movsd qword CTSTATE->cb.fpr[3], xmm3
2675 | lea rax, [rsp+CFRAME_SIZE+4*8]
2677 | lea rax, [rsp+CFRAME_SIZE]
2678 | mov CTSTATE->cb.gpr[4], CARG5
2679 | mov CTSTATE->cb.gpr[5], CARG6
2680 | movsd qword CTSTATE->cb.fpr[4], xmm4
2681 | movsd qword CTSTATE->cb.fpr[5], xmm5
2682 | movsd qword CTSTATE->cb.fpr[6], xmm6
2683 | movsd qword CTSTATE->cb.fpr[7], xmm7
2685 | mov CTSTATE->cb.stack, rax
2687 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2688 | mov CARG1, CTSTATE
2689 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2690 | // lua_State * returned in eax (RD).
2691 | set_vmstate INTERP
2692 | mov BASE, L:RD->base
2695 | mov LFUNC:RB, [BASE-16]
2702 |->cont_ffi_callback: // Return from FFI callback.
2705 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2706 | mov aword CTSTATE->L, L:RA
2707 | mov L:RA->base, BASE
2709 | mov CARG1, CTSTATE
2711 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2712 | mov rax, CTSTATE->cb.gpr[0]
2713 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2714 | jmp ->vm_leave_unw
2717 |->vm_ffi_call: // Call C function via FFI.
2718 | // Caveat: needs special frame unwinding, see below.
2720 | .type CCSTATE, CCallState, rbx
2721 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2723 | // Readjust stack.
2724 | mov eax, CCSTATE->spadj
2727 | // Copy stack slots.
2728 | movzx ecx, byte CCSTATE->nsp
2732 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2733 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2738 | movzx eax, byte CCSTATE->nfpr
2739 | mov CARG1, CCSTATE->gpr[0]
2740 | mov CARG2, CCSTATE->gpr[1]
2741 | mov CARG3, CCSTATE->gpr[2]
2742 | mov CARG4, CCSTATE->gpr[3]
2744 | mov CARG5, CCSTATE->gpr[4]
2745 | mov CARG6, CCSTATE->gpr[5]
2747 | test eax, eax; jz >5
2748 | movaps xmm0, CCSTATE->fpr[0]
2749 | movaps xmm1, CCSTATE->fpr[1]
2750 | movaps xmm2, CCSTATE->fpr[2]
2751 | movaps xmm3, CCSTATE->fpr[3]
2753 | cmp eax, 4; jbe >5
2754 | movaps xmm4, CCSTATE->fpr[4]
2755 | movaps xmm5, CCSTATE->fpr[5]
2756 | movaps xmm6, CCSTATE->fpr[6]
2757 | movaps xmm7, CCSTATE->fpr[7]
2761 | call aword CCSTATE->func
2763 | mov CCSTATE->gpr[0], rax
2764 | movaps CCSTATE->fpr[0], xmm0
2766 | mov CCSTATE->gpr[1], rdx
2767 | movaps CCSTATE->fpr[1], xmm1
2770 | mov rbx, [rbp-8]; leave; ret
2772 |// Note: vm_ffi_call must be the last function in this object file!
2774 |//-----------------------------------------------------------------------
2777 /* Generate the code for a single instruction. */
2778 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2781 |// Note: aligning all instructions does not pay off.
2786 /* -- Comparison ops ---------------------------------------------------- */
2788 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2790 |.macro jmp_comp, lt, ge, le, gt, target
2804 ||default: break; /* Shut up GCC. */
2808 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2809 | // RA = src1, RD = src2, JMP with RD = target
2811 | mov ITYPE, [BASE+RA*8]
2812 | mov RB, [BASE+RD*8]
2818 | cmp ITYPEd, LJ_TISNUM; jne >7
2819 | cmp RBd, LJ_TISNUM; jne >8
2822 | jmp_comp jge, jl, jg, jle, >9
2829 |7: // RA is not an integer.
2831 | // RA is a number.
2832 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2833 | // RA is a number, RD is an integer.
2834 | cvtsi2sd xmm0, RDd
2837 |8: // RA is an integer, RD is not an integer.
2839 | // RA is an integer, RD is a number.
2840 | cvtsi2sd xmm1, RAd
2844 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2845 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2853 | ucomisd xmm0, xmm1
2854 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2855 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2857 | jmp_comp jbe, ja, jb, jae, <9
2860 | jmp_comp jbe, ja, jb, jae, >1
2868 case BC_ISEQV: case BC_ISNEV:
2869 vk = op == BC_ISEQV;
2870 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2871 | mov RB, [BASE+RD*8]
2872 | mov ITYPE, [BASE+RA*8]
2879 | cmp RBd, LJ_TISNUM; jne >7
2880 | cmp ITYPEd, LJ_TISNUM; jne >8
2892 |7: // RD is not an integer.
2894 | // RD is a number.
2896 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2897 | // RD is a number, RA is an integer.
2898 | cvtsi2sd xmm0, RAd
2901 |8: // RD is an integer, RA is not an integer.
2903 | // RD is an integer, RA is a number.
2904 | cvtsi2sd xmm1, RDd
2908 | cmp RBd, LJ_TISNUM; jae >5
2909 | cmp ITYPEd, LJ_TISNUM; jae >5
2915 | ucomisd xmm0, xmm1
2919 | jp >2 // Unordered means not equal.
2922 | jp >2 // Unordered means not equal.
2927 |1: // EQ: Branch to the target.
2930 |2: // NE: Fallthrough to next instruction.
2938 |2: // NE: Branch to the target.
2941 |1: // EQ: Fallthrough to next instruction.
2943 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2944 op == BC_ISEQN || op == BC_ISNEN)) {
2950 if (op == BC_ISEQV || op == BC_ISNEV) {
2951 |5: // Either or both types are not numbers.
2953 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2954 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2957 | je <1 // Same GCobjs or pvalues?
2959 | jne <2 // Not the same type?
2960 | cmp RBd, LJ_TISTABUD
2961 | ja <2 // Different objects and not table/ud?
2963 | // Different tables or userdatas. Need to check __eq metamethod.
2964 | // Field metatable must be at same offset for GCtab and GCudata!
2966 | mov TAB:RB, TAB:RA->metatable
2967 | test TAB:RB, TAB:RB
2968 | jz <2 // No metatable?
2969 | test byte TAB:RB->nomm, 1<<MM_eq
2970 | jnz <2 // Or 'no __eq' flag set?
2972 | xor RBd, RBd // ne = 0
2974 | mov RBd, 1 // ne = 1
2976 | jmp ->vmeta_equal // Handle __eq metamethod.
2980 | cmp ITYPEd, LJ_TCDATA
2981 if (LJ_DUALNUM && vk) {
2986 | jmp ->vmeta_equal_cd
2990 case BC_ISEQS: case BC_ISNES:
2991 vk = op == BC_ISEQS;
2992 | ins_AND // RA = src, RD = str const, JMP with RD = target
2993 | mov RB, [BASE+RA*8]
2996 | cmp RB, [KBASE+RD*8]
3004 case BC_ISEQN: case BC_ISNEN:
3005 vk = op == BC_ISEQN;
3006 | ins_AD // RA = src, RD = num const, JMP with RD = target
3007 | mov RB, [BASE+RA*8]
3011 | mov RD, [KBASE+RD*8]
3024 |7: // RA is not an integer.
3026 | // RA is a number.
3027 | mov RD, [KBASE+RD*8]
3029 | // RA is a number, RD is an integer.
3030 | cvtsi2sd xmm0, RDd
3033 |8: // RA is an integer, RD is a number.
3034 | cvtsi2sd xmm0, RBd
3036 | ucomisd xmm0, xmm1
3043 | movsd xmm0, qword [KBASE+RD*8]
3046 | ucomisd xmm0, qword [BASE+RA*8]
3049 case BC_ISEQP: case BC_ISNEP:
3050 vk = op == BC_ISEQP;
3051 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3052 | mov RB, [BASE+RA*8]
3056 if (!LJ_HASFFI) goto iseqne_test;
3064 | cmp RBd, LJ_TCDATA; jne <2
3065 | jmp ->vmeta_equal_cd
3068 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3076 /* -- Unary test and copy ops ------------------------------------------- */
3078 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3079 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3080 | mov ITYPE, [BASE+RD*8]
3082 if (op == BC_ISTC || op == BC_ISFC) {
3086 | cmp ITYPEd, LJ_TISTRUECOND
3087 if (op == BC_IST || op == BC_ISTC) {
3092 if (op == BC_ISTC || op == BC_ISFC) {
3093 | mov [BASE+RA*8], RB
3097 |1: // Fallthrough to the next instruction.
3102 | ins_AD // RA = src, RD = -type
3103 | mov RB, [BASE+RA*8]
3106 | jne ->vmeta_istype
3110 | ins_AD // RA = src, RD = -(TISNUM-1)
3111 | checknumtp [BASE+RA*8], ->vmeta_istype
3115 /* -- Unary ops --------------------------------------------------------- */
3118 | ins_AD // RA = dst, RD = src
3119 | mov RB, [BASE+RD*8]
3120 | mov [BASE+RA*8], RB
3124 | ins_AD // RA = dst, RD = src
3125 | mov RB, [BASE+RD*8]
3128 | cmp RB, LJ_TISTRUECOND
3132 | mov [BASE+RA*8], RC
3136 | ins_AD // RA = dst, RD = src
3137 | mov RB, [BASE+RD*8]
3144 | mov [BASE+RA*8], RB
3147 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3152 | checknum RB, ->vmeta_unm
3154 | mov64 RD, U64x(80000000,00000000)
3159 | mov [BASE+RA*8], RB
3164 | ins_AD // RA = dst, RD = src
3165 | mov RD, [BASE+RD*8]
3168 | mov RDd, dword STR:RD->len
3171 | mov [BASE+RA*8], RD
3174 | cvtsi2sd xmm0, dword STR:RD->len
3176 | movsd qword [BASE+RA*8], xmm0
3180 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3181 | mov TAB:CARG1, TAB:RD
3183 | mov TAB:RB, TAB:RD->metatable
3189 | mov RB, BASE // Save BASE.
3190 | call extern lj_tab_len // (GCtab *t)
3191 | // Length of table returned in eax (RD).
3195 | cvtsi2sd xmm0, RDd
3197 | mov BASE, RB // Restore BASE.
3201 |9: // Check for __len.
3202 | test byte TAB:RB->nomm, 1<<MM_len
3204 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3208 /* -- Binary ops -------------------------------------------------------- */
3210 |.macro ins_arithpre, sseins, ssereg
3212 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3215 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3217 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3219 | movsd xmm0, qword [BASE+RB*8]
3220 | sseins ssereg, qword [KBASE+RC*8]
3223 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3225 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3227 | movsd xmm0, qword [KBASE+RC*8]
3228 | sseins ssereg, qword [BASE+RB*8]
3231 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3232 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3233 | movsd xmm0, qword [BASE+RB*8]
3234 | sseins ssereg, qword [BASE+RC*8]
3239 |.macro ins_arithdn, intins
3241 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3244 | mov RB, [BASE+RB*8]
3245 | mov RC, [KBASE+RC*8]
3246 | checkint RB, ->vmeta_arith_vno
3247 | checkint RC, ->vmeta_arith_vno
3248 | intins RBd, RCd; jo ->vmeta_arith_vno
3251 | mov RB, [BASE+RB*8]
3252 | mov RC, [KBASE+RC*8]
3253 | checkint RB, ->vmeta_arith_nvo
3254 | checkint RC, ->vmeta_arith_nvo
3255 | intins RCd, RBd; jo ->vmeta_arith_nvo
3258 | mov RB, [BASE+RB*8]
3259 | mov RC, [BASE+RC*8]
3260 | checkint RB, ->vmeta_arith_vvo
3261 | checkint RC, ->vmeta_arith_vvo
3262 | intins RBd, RCd; jo ->vmeta_arith_vvo
3267 | mov [BASE+RA*8], RC
3270 | mov [BASE+RA*8], RB
3275 |.macro ins_arithpost
3276 | movsd qword [BASE+RA*8], xmm0
3279 |.macro ins_arith, sseins
3280 | ins_arithpre sseins, xmm0
3285 |.macro ins_arith, intins, sseins
3287 | ins_arithdn intins
3293 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3294 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3295 | ins_arith add, addsd
3297 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3298 | ins_arith sub, subsd
3300 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3301 | ins_arith imul, mulsd
3303 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3307 | ins_arithpre movsd, xmm1
3313 case BC_MODNV: case BC_MODVV:
3314 | ins_arithpre movsd, xmm1
3315 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3318 | ins_arithpre movsd, xmm1
3328 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3329 | mov L:CARG1, SAVE_L
3330 | mov L:CARG1->base, BASE
3331 | lea CARG2, [BASE+RC*8]
3337 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3338 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3339 | mov BASE, L:RB->base
3342 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3344 | mov RC, [BASE+RB*8]
3345 | mov [BASE+RA*8], RC
3349 /* -- Constant ops ------------------------------------------------------ */
3352 | ins_AND // RA = dst, RD = str const (~)
3353 | mov RD, [KBASE+RD*8]
3355 | mov [BASE+RA*8], RD
3360 | ins_AND // RA = dst, RD = cdata const (~)
3361 | mov RD, [KBASE+RD*8]
3362 | settp RD, LJ_TCDATA
3363 | mov [BASE+RA*8], RD
3368 | ins_AD // RA = dst, RD = signed int16 literal
3372 | mov [BASE+RA*8], RD
3374 | movsx RDd, RDW // Sign-extend literal.
3375 | cvtsi2sd xmm0, RDd
3376 | movsd qword [BASE+RA*8], xmm0
3381 | ins_AD // RA = dst, RD = num const
3382 | movsd xmm0, qword [KBASE+RD*8]
3383 | movsd qword [BASE+RA*8], xmm0
3387 | ins_AD // RA = dst, RD = primitive type (~)
3390 | mov [BASE+RA*8], RD
3394 | ins_AD // RA = dst_start, RD = dst_end
3395 | lea RA, [BASE+RA*8+8]
3396 | lea RD, [BASE+RD*8]
3398 | mov [RA-8], RB // Sets minimum 2 slots.
3407 /* -- Upvalue and function ops ------------------------------------------ */
3410 | ins_AD // RA = dst, RD = upvalue #
3411 | mov LFUNC:RB, [BASE-16]
3413 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3414 | mov RB, UPVAL:RB->v
3416 | mov [BASE+RA*8], RD
3420 #define TV2MARKOFS \
3421 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3422 | ins_AD // RA = upvalue #, RD = src
3423 | mov LFUNC:RB, [BASE-16]
3425 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3426 | cmp byte UPVAL:RB->closed, 0
3427 | mov RB, UPVAL:RB->v
3428 | mov RA, [BASE+RD*8]
3431 | // Check barrier for closed upvalue.
3432 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3437 |2: // Upvalue is black. Check if new value is collectable and white.
3440 | sub RDd, LJ_TISGCV
3441 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3444 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3446 | // Crossed a write barrier. Move the barrier forward.
3449 | mov RB, BASE // Save BASE.
3451 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3453 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3454 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3455 | mov BASE, RB // Restore BASE.
3460 | ins_AND // RA = upvalue #, RD = str const (~)
3461 | mov LFUNC:RB, [BASE-16]
3463 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3464 | mov STR:RA, [KBASE+RD*8]
3465 | mov RD, UPVAL:RB->v
3466 | settp STR:ITYPE, STR:RA, LJ_TSTR
3467 | mov [RD], STR:ITYPE
3468 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3473 |2: // Check if string is white and ensure upvalue is closed.
3474 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3476 | cmp byte UPVAL:RB->closed, 0
3478 | // Crossed a write barrier. Move the barrier forward.
3479 | mov RB, BASE // Save BASE (CARG2 == BASE).
3481 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3482 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3483 | mov BASE, RB // Restore BASE.
3487 | ins_AD // RA = upvalue #, RD = num const
3488 | mov LFUNC:RB, [BASE-16]
3490 | movsd xmm0, qword [KBASE+RD*8]
3491 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3492 | mov RA, UPVAL:RB->v
3493 | movsd qword [RA], xmm0
3497 | ins_AD // RA = upvalue #, RD = primitive type (~)
3498 | mov LFUNC:RB, [BASE-16]
3500 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3503 | mov RA, UPVAL:RB->v
3508 | ins_AD // RA = level, RD = target
3509 | branchPC RD // Do this first to free RD.
3511 | cmp dword L:RB->openupval, 0
3513 | mov L:RB->base, BASE
3514 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3515 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3516 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3517 | mov BASE, L:RB->base
3523 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3525 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3526 | mov CARG3, [BASE-16]
3528 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3531 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3532 | call extern lj_func_newL_gc
3533 | // GCfuncL * returned in eax (RC).
3534 | mov BASE, L:RB->base
3536 | settp LFUNC:RC, LJ_TFUNC
3537 | mov [BASE+RA*8], LFUNC:RC
3541 /* -- Table ops --------------------------------------------------------- */
3544 | ins_AD // RA = dst, RD = hbits|asize
3546 | mov L:RB->base, BASE
3547 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3548 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3560 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3561 | // Table * returned in eax (RC).
3562 | mov BASE, L:RB->base
3564 | settp TAB:RC, LJ_TTAB
3565 | mov [BASE+RA*8], TAB:RC
3567 |3: // Turn 0x7ff into 0x801.
3572 | call extern lj_gc_step_fixtop // (lua_State *L)
3577 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3579 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3581 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3582 | mov L:RB->base, BASE
3585 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3586 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3587 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3588 | // Table * returned in eax (RC).
3589 | mov BASE, L:RB->base
3591 | settp TAB:RC, LJ_TTAB
3592 | mov [BASE+RA*8], TAB:RC
3596 | call extern lj_gc_step_fixtop // (lua_State *L)
3597 | movzx RDd, PC_RD // Need to reload RD.
3603 | ins_AND // RA = dst, RD = str const (~)
3604 | mov LFUNC:RB, [BASE-16]
3606 | mov TAB:RB, LFUNC:RB->env
3607 | mov STR:RC, [KBASE+RD*8]
3611 | ins_AND // RA = src, RD = str const (~)
3612 | mov LFUNC:RB, [BASE-16]
3614 | mov TAB:RB, LFUNC:RB->env
3615 | mov STR:RC, [KBASE+RD*8]
3620 | ins_ABC // RA = dst, RB = table, RC = key
3621 | mov TAB:RB, [BASE+RB*8]
3622 | mov RC, [BASE+RC*8]
3623 | checktab TAB:RB, ->vmeta_tgetv
3629 | // Convert number to int and back and compare.
3632 | cvttsd2si RCd, xmm0
3633 | cvtsi2sd xmm1, RCd
3634 | ucomisd xmm0, xmm1
3635 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3637 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3638 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3640 | add RC, TAB:RB->array
3641 | // Get array slot.
3643 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3646 | mov [BASE+RA*8], ITYPE
3649 |2: // Check for __index if table value is nil.
3650 | mov TAB:TMPR, TAB:RB->metatable
3651 | test TAB:TMPR, TAB:TMPR
3653 | test byte TAB:TMPR->nomm, 1<<MM_index
3654 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3658 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3663 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3664 | mov TAB:RB, [BASE+RB*8]
3666 | mov STR:RC, [KBASE+RC*8]
3667 | checktab TAB:RB, ->vmeta_tgets
3668 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3669 | mov TMPRd, TAB:RB->hmask
3670 | and TMPRd, STR:RC->hash
3672 | add NODE:TMPR, TAB:RB->node
3673 | settp ITYPE, STR:RC, LJ_TSTR
3675 | cmp NODE:TMPR->key, ITYPE
3677 | // Get node value.
3678 | mov ITYPE, NODE:TMPR->val
3679 | cmp ITYPE, LJ_TNIL
3680 | je >5 // Key found, but nil value?
3682 | mov [BASE+RA*8], ITYPE
3685 |4: // Follow hash chain.
3686 | mov NODE:TMPR, NODE:TMPR->next
3687 | test NODE:TMPR, NODE:TMPR
3689 | // End of hash chain: key not found, nil result.
3690 | mov ITYPE, LJ_TNIL
3692 |5: // Check for __index if table value is nil.
3693 | mov TAB:TMPR, TAB:RB->metatable
3694 | test TAB:TMPR, TAB:TMPR
3695 | jz <2 // No metatable: done.
3696 | test byte TAB:TMPR->nomm, 1<<MM_index
3697 | jnz <2 // 'no __index' flag set: done.
3698 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3701 | ins_ABC // RA = dst, RB = table, RC = byte literal
3702 | mov TAB:RB, [BASE+RB*8]
3703 | checktab TAB:RB, ->vmeta_tgetb
3704 | cmp RCd, TAB:RB->asize
3707 | add RC, TAB:RB->array
3708 | // Get array slot.
3710 | cmp ITYPE, LJ_TNIL
3713 | mov [BASE+RA*8], ITYPE
3716 |2: // Check for __index if table value is nil.
3717 | mov TAB:TMPR, TAB:RB->metatable
3718 | test TAB:TMPR, TAB:TMPR
3720 | test byte TAB:TMPR->nomm, 1<<MM_index
3721 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3725 | ins_ABC // RA = dst, RB = table, RC = key
3726 | mov TAB:RB, [BASE+RB*8]
3729 | mov RCd, dword [BASE+RC*8]
3731 | cvttsd2si RCd, qword [BASE+RC*8]
3733 | cmp RCd, TAB:RB->asize
3734 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3736 | add RC, TAB:RB->array
3737 | // Get array slot.
3741 | mov [BASE+RA*8], ITYPE
3746 | ins_ABC // RA = src, RB = table, RC = key
3747 | mov TAB:RB, [BASE+RB*8]
3748 | mov RC, [BASE+RC*8]
3749 | checktab TAB:RB, ->vmeta_tsetv
3755 | // Convert number to int and back and compare.
3758 | cvttsd2si RCd, xmm0
3759 | cvtsi2sd xmm1, RCd
3760 | ucomisd xmm0, xmm1
3761 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3763 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3766 | add RC, TAB:RB->array
3767 | cmp aword [RC], LJ_TNIL
3768 | je >3 // Previous value is nil?
3770 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3772 |2: // Set array slot.
3773 | mov RB, [BASE+RA*8]
3777 |3: // Check for __newindex if previous value is nil.
3778 | mov TAB:TMPR, TAB:RB->metatable
3779 | test TAB:TMPR, TAB:TMPR
3781 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3782 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3786 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3790 |7: // Possible table write barrier for the value. Skip valiswhite check.
3791 | barrierback TAB:RB, TMPR
3795 | ins_ABC // RA = src, RB = table, RC = str const (~)
3796 | mov TAB:RB, [BASE+RB*8]
3798 | mov STR:RC, [KBASE+RC*8]
3799 | checktab TAB:RB, ->vmeta_tsets
3800 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3801 | mov TMPRd, TAB:RB->hmask
3802 | and TMPRd, STR:RC->hash
3804 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3805 | add NODE:TMPR, TAB:RB->node
3806 | settp ITYPE, STR:RC, LJ_TSTR
3808 | cmp NODE:TMPR->key, ITYPE
3810 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3811 | cmp aword [TMPR], LJ_TNIL
3812 | je >4 // Previous value is nil?
3814 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3816 |3: // Set node value.
3817 | mov ITYPE, [BASE+RA*8]
3821 |4: // Check for __newindex if previous value is nil.
3822 | mov TAB:ITYPE, TAB:RB->metatable
3823 | test TAB:ITYPE, TAB:ITYPE
3825 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3826 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3829 |5: // Follow hash chain.
3830 | mov NODE:TMPR, NODE:TMPR->next
3831 | test NODE:TMPR, NODE:TMPR
3833 | // End of hash chain: key not found, add a new one.
3835 | // But check for __newindex first.
3836 | mov TAB:TMPR, TAB:RB->metatable
3837 | test TAB:TMPR, TAB:TMPR
3838 | jz >6 // No metatable: continue.
3839 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3840 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3843 | mov L:CARG1, SAVE_L
3844 | mov L:CARG1->base, BASE
3848 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3849 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3850 | mov L:CARG1, SAVE_L
3851 | mov BASE, L:CARG1->base
3854 | jmp <2 // Must check write barrier for value.
3856 |7: // Possible table write barrier for the value. Skip valiswhite check.
3857 | barrierback TAB:RB, ITYPE
3861 | ins_ABC // RA = src, RB = table, RC = byte literal
3862 | mov TAB:RB, [BASE+RB*8]
3863 | checktab TAB:RB, ->vmeta_tsetb
3864 | cmp RCd, TAB:RB->asize
3867 | add RC, TAB:RB->array
3868 | cmp aword [RC], LJ_TNIL
3869 | je >3 // Previous value is nil?
3871 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3873 |2: // Set array slot.
3874 | mov ITYPE, [BASE+RA*8]
3878 |3: // Check for __newindex if previous value is nil.
3879 | mov TAB:TMPR, TAB:RB->metatable
3880 | test TAB:TMPR, TAB:TMPR
3882 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3883 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3886 |7: // Possible table write barrier for the value. Skip valiswhite check.
3887 | barrierback TAB:RB, TMPR
3891 | ins_ABC // RA = src, RB = table, RC = key
3892 | mov TAB:RB, [BASE+RB*8]
3895 | mov RC, [BASE+RC*8]
3897 | cvttsd2si RCd, qword [BASE+RC*8]
3899 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3902 | cmp RCd, TAB:RB->asize
3905 | add RC, TAB:RB->array
3906 | // Set array slot.
3908 | mov ITYPE, [BASE+RA*8]
3912 |7: // Possible table write barrier for the value. Skip valiswhite check.
3913 | barrierback TAB:RB, TMPR
3918 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3920 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3921 | lea RA, [BASE+RA*8]
3922 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3924 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3929 | jz >4 // Nothing to copy?
3930 | add RDd, TMPRd // Compute needed size.
3931 | cmp RDd, TAB:RB->asize
3932 | ja >5 // Doesn't fit into array part?
3935 | add TMPR, TAB:RB->array
3936 |3: // Copy result slots to table.
3946 |5: // Need to resize array part.
3947 | mov L:CARG1, SAVE_L
3948 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3953 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3954 | mov BASE, L:RB->base
3955 | movzx RAd, PC_RA // Restore RA.
3956 | movzx RDd, PC_RD // Restore RD.
3959 |7: // Possible table write barrier for any value. Skip valiswhite check.
3960 | barrierback TAB:RB, RD
3964 /* -- Calls and vararg handling ----------------------------------------- */
3966 case BC_CALL: case BC_CALLM:
3967 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3968 if (op == BC_CALLM) {
3969 | add NARGS:RDd, MULTRES
3971 | mov LFUNC:RB, [BASE+RA*8]
3972 | checkfunc LFUNC:RB, ->vmeta_call_ra
3973 | lea BASE, [BASE+RA*8+16]
3978 | ins_AD // RA = base, RD = extra_nargs
3979 | add NARGS:RDd, MULTRES
3980 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3983 | ins_AD // RA = base, RD = nargs+1
3984 | lea RA, [BASE+RA*8+16]
3985 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3986 | mov LFUNC:RB, [RA-16]
3987 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3990 | test PCd, FRAME_TYPE
3993 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
3994 | mov MULTRES, NARGS:RDd
3997 |2: // Move args down.
4005 | mov LFUNC:RB, [BASE-16]
4008 | mov NARGS:RDd, MULTRES
4009 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4014 |5: // Tailcall to a fast function.
4015 | test PCd, FRAME_TYPE // Lua frame below?
4019 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4020 | cleartp LFUNC:KBASE
4021 | mov KBASE, LFUNC:KBASE->pc
4022 | mov KBASE, [KBASE+PC2PROTO(k)]
4025 |7: // Tailcall from a vararg function.
4026 | sub PC, FRAME_VARG
4027 | test PCd, FRAME_TYPEP
4028 | jnz >8 // Vararg frame below?
4029 | sub BASE, PC // Need to relocate BASE/KBASE down.
4034 | add PCd, FRAME_VARG
4039 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4040 | lea RA, [BASE+RA*8+16] // fb = base+2
4041 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4042 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4045 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-1] = fb[-5]
4046 | mov [RA-16], LFUNC:RB
4047 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4048 | checkfunc LFUNC:RB, ->vmeta_call
4054 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4056 | // NYI: add hotloop, record BC_ITERN.
4058 | mov TAB:RB, [BASE+RA*8-16]
4060 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4061 | mov TMPRd, TAB:RB->asize
4063 | mov ITYPE, TAB:RB->array
4064 |1: // Traverse array part.
4065 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4066 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4068 | cvtsi2sd xmm0, RCd
4070 | // Copy array slot to returned value.
4071 | mov RB, [ITYPE+RC*8]
4072 | mov [BASE+RA*8+8], RB
4073 | // Return array index as a numeric key.
4076 | mov [BASE+RA*8], ITYPE
4078 | movsd qword [BASE+RA*8], xmm0
4081 | mov [BASE+RA*8-8], RCd // Update control var.
4083 | movzx RDd, PC_RD // Get target from ITERL.
4088 |4: // Skip holes in array part.
4092 |5: // Traverse hash part.
4095 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4096 | imul ITYPEd, RCd, #NODE
4097 | add NODE:ITYPE, TAB:RB->node
4098 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4099 | lea TMPRd, [RCd+TMPRd+1]
4100 | // Copy key and value from hash slot.
4101 | mov RB, NODE:ITYPE->key
4102 | mov RC, NODE:ITYPE->val
4103 | mov [BASE+RA*8], RB
4104 | mov [BASE+RA*8+8], RC
4105 | mov [BASE+RA*8-8], TMPRd
4108 |7: // Skip holes in hash part.
4114 | ins_AD // RA = base, RD = target (points to ITERN)
4115 | mov CFUNC:RB, [BASE+RA*8-24]
4116 | checkfunc CFUNC:RB, >5
4117 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4118 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4119 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4121 | mov64 TMPR, U64x(fffe7fff, 00000000)
4122 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4125 |5: // Despecialize bytecode if any of the checks fail.
4128 | mov byte [PC], BC_ITERC
4133 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4134 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4135 | lea RA, [BASE+RA*8]
4136 | sub TMPR, [BASE-8]
4137 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4139 | jz >5 // Copy all varargs?
4140 | lea RB, [RA+RB*8-8]
4141 | cmp TMPR, BASE // No vararg slots?
4143 |1: // Copy vararg slots to destination slots.
4148 | cmp RA, RB // All destination slots filled?
4150 | cmp TMPR, BASE // No more vararg slots?
4152 |2: // Fill up remainder with nil.
4153 | mov aword [RA], LJ_TNIL
4160 |5: // Copy all varargs.
4161 | mov MULTRES, 1 // MULTRES = 0+1
4164 | jbe <3 // No vararg slots?
4168 | mov MULTRES, RBd // MULTRES = #varargs+1
4171 | cmp RC, L:RB->maxstack
4172 | ja >7 // Need to grow stack?
4173 |6: // Copy all vararg slots.
4178 | cmp TMPR, BASE // No more vararg slots?
4182 |7: // Grow stack for varargs.
4183 | mov L:RB->base, BASE
4186 | sub TMPR, BASE // Need delta, because BASE may change.
4188 | mov CARG2d, MULTRES
4191 | call extern lj_state_growstack // (lua_State *L, int n)
4192 | mov BASE, L:RB->base
4193 | movsxd TMPR, TMP1hi
4199 /* -- Returns ----------------------------------------------------------- */
4202 | ins_AD // RA = results, RD = extra_nresults
4203 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4204 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4207 case BC_RET: case BC_RET0: case BC_RET1:
4208 | ins_AD // RA = results, RD = nresults+1
4209 if (op != BC_RET0) {
4214 | mov MULTRES, RDd // Save nresults+1.
4215 | test PCd, FRAME_TYPE // Check frame type marker.
4216 | jnz >7 // Not returning to a fixarg Lua func?
4220 | mov KBASE, BASE // Use KBASE for result move.
4223 |2: // Move results down.
4224 | mov RB, [KBASE+RA]
4225 | mov [KBASE-16], RB
4230 | mov RDd, MULTRES // Note: MULTRES may be >255.
4231 | movzx RBd, PC_RB // So cannot compare with RDL!
4233 | cmp RBd, RDd // More results expected?
4242 | cmp PC_RB, RDL // More results expected?
4249 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4250 | mov LFUNC:KBASE, [BASE-16]
4251 | cleartp LFUNC:KBASE
4252 | mov KBASE, LFUNC:KBASE->pc
4253 | mov KBASE, [KBASE+PC2PROTO(k)]
4256 |6: // Fill up results with nil.
4258 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4261 | mov aword [BASE+RD*8-24], LJ_TNIL
4266 |7: // Non-standard return case.
4267 | lea RB, [PC-FRAME_VARG]
4268 | test RBd, FRAME_TYPEP
4270 | // Return from vararg function: relocate BASE down and RA up.
4272 if (op != BC_RET0) {
4278 /* -- Loops and branches ------------------------------------------------ */
4280 |.define FOR_IDX, [RA]
4281 |.define FOR_STOP, [RA+8]
4282 |.define FOR_STEP, [RA+16]
4283 |.define FOR_EXT, [RA+24]
4289 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4299 vk = (op == BC_IFORL || op == BC_JFORL);
4300 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4301 | lea RA, [BASE+RA*8]
4305 | mov TMPR, FOR_STOP
4307 | checkint TMPR, ->vmeta_for
4308 | mov ITYPE, FOR_STEP
4309 | test ITYPEd, ITYPEd; js >5
4311 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4313 #ifdef LUA_USE_ASSERT
4314 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4315 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4317 | mov ITYPE, FOR_STEP
4318 | test ITYPEd, ITYPEd; js >5
4319 | add RBd, ITYPEd; jo >1
4325 if (op == BC_FORI) {
4330 } else if (op == BC_JFORI) {
4336 } else if (op == BC_IFORL) {
4349 |5: // Invert check for negative step.
4352 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4354 | add RBd, ITYPEd; jo <1
4360 if (op == BC_FORI) {
4362 } else if (op == BC_JFORI) {
4366 } else if (op == BC_IFORL) {
4372 |9: // Fallback to FP variant.
4377 | checknumtp FOR_IDX, ->vmeta_for
4380 | checknumtp FOR_STOP, ->vmeta_for
4382 #ifdef LUA_USE_ASSERT
4383 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4384 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4389 | checknum RB, ->vmeta_for
4391 | movsd xmm0, qword FOR_IDX
4392 | movsd xmm1, qword FOR_STOP
4394 | addsd xmm0, qword FOR_STEP
4395 | movsd qword FOR_IDX, xmm0
4396 | test RB, RB; js >3
4400 | ucomisd xmm1, xmm0
4402 | movsd qword FOR_EXT, xmm0
4403 if (op == BC_FORI) {
4410 } else if (op == BC_JFORI) {
4414 } else if (op == BC_IFORL) {
4431 |3: // Invert comparison if step is negative.
4432 | ucomisd xmm0, xmm1
4440 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4448 | ins_AJ // RA = base, RD = target
4449 | lea RA, [BASE+RA*8]
4451 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4452 if (op == BC_JITERL) {
4456 | branchPC RD // Otherwise save control var + branch.
4464 | ins_A // RA = base, RD = target (loop extent)
4465 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4466 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4470 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4474 | ins_A // RA = base, RD = target (loop extent)
4480 | ins_AD // RA = base (ignored), RD = traceno
4481 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4482 | mov TRACE:RD, [RA+RD*8]
4483 | mov RD, TRACE:RD->mcode
4485 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4486 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4487 | // Save additional callee-save registers only used in compiled code.
4494 | sub rsp, 10*16+4*8
4495 | movdqa [RA-1*16], xmm6
4496 | movdqa [RA-2*16], xmm7
4497 | movdqa [RA-3*16], xmm8
4498 | movdqa [RA-4*16], xmm9
4499 | movdqa [RA-5*16], xmm10
4500 | movdqa [RA-6*16], xmm11
4501 | movdqa [RA-7*16], xmm12
4502 | movdqa [RA-8*16], xmm13
4503 | movdqa [RA-9*16], xmm14
4504 | movdqa [RA-10*16], xmm15
4515 | ins_AJ // RA = unused, RD = target
4520 /* -- Function headers -------------------------------------------------- */
4523 ** Reminder: A function may be called with func/args above L->maxstack,
4524 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4525 ** too. This means all FUNC* ops (including fast functions) must check
4526 ** for stack overflow _before_ adding more slots!
4533 case BC_FUNCV: /* NYI: compiled vararg functions. */
4534 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4542 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4543 | mov KBASE, [PC-4+PC2PROTO(k)]
4545 | lea RA, [BASE+RA*8] // Top of frame.
4546 | cmp RA, L:RB->maxstack
4547 | ja ->vm_growstack_f
4548 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4549 | cmp NARGS:RDd, RAd // Check for missing parameters.
4552 if (op == BC_JFUNCF) {
4559 |3: // Clear missing parameters.
4560 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4562 | cmp NARGS:RDd, RAd
4571 | int3 // NYI: compiled vararg functions
4572 break; /* NYI: compiled vararg functions. */
4575 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4576 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4577 | lea RD, [BASE+NARGS:RD*8+8]
4578 | mov LFUNC:KBASE, [BASE-16]
4579 | mov [RD-8], RB // Store delta + FRAME_VARG.
4580 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4583 | cmp RA, L:RB->maxstack
4584 | ja ->vm_growstack_v // Need to grow stack.
4587 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4591 |1: // Copy fixarg slots up to new frame.
4594 | jnb >3 // Less args than parameters?
4595 | mov KBASE, [RA-16]
4598 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4602 if (op == BC_JFUNCV) {
4606 | mov KBASE, [PC-4+PC2PROTO(k)]
4610 |3: // Clear missing parameters.
4611 | mov aword [RD], LJ_TNIL
4620 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4621 | mov CFUNC:RB, [BASE-16]
4623 | mov KBASE, CFUNC:RB->f
4625 | lea RD, [BASE+NARGS:RD*8-8]
4626 | mov L:RB->base, BASE
4627 | lea RA, [RD+8*LUA_MINSTACK]
4628 | cmp RA, L:RB->maxstack
4630 if (op == BC_FUNCC) {
4631 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4634 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4636 | ja ->vm_growstack_c // Need to grow stack.
4638 if (op == BC_FUNCC) {
4639 | call KBASE // (lua_State *L)
4641 | // (lua_State *L, lua_CFunction f)
4642 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4644 | // nresults returned in eax (RD).
4645 | mov BASE, L:RB->base
4646 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4647 | set_vmstate INTERP
4648 | lea RA, [BASE+RD*8]
4650 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4651 | mov PC, [BASE-8] // Fetch PC of caller.
4655 /* ---------------------------------------------------------------------- */
4658 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4664 static int build_backend(BuildCtx *ctx)
4667 dasm_growpc(Dst, BC__MAX);
4668 build_subroutines(ctx);
4670 for (op = 0; op < BC__MAX; op++)
4671 build_ins(ctx, (BCOp)op, op);
4675 /* Emit pseudo frame-info for all assembler functions. */
4676 static void emit_asm_debug(BuildCtx *ctx)
4678 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4679 switch (ctx->mode) {
4681 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4684 "\t.long .LECIE0-.LSCIE0\n"
4686 "\t.long 0xffffffff\n"
4692 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4693 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4698 "\t.long .LEFDE0-.LASFDE0\n"
4700 "\t.long .Lframe0\n"
4703 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4704 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4705 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4706 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4707 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4709 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4710 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4713 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4717 "\t.long .LEFDE1-.LASFDE1\n"
4719 "\t.long .Lframe0\n"
4720 "\t.quad lj_vm_ffi_call\n"
4722 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4723 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4724 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4725 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4727 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4730 #if (defined(__sun__) && defined(__svr4__))
4731 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4733 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4737 "\t.long .LECIE1-.LSCIE1\n"
4741 "\t.string \"zPR\"\n"
4745 "\t.uleb128 6\n" /* augmentation length */
4746 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4747 "\t.long lj_err_unwind_dwarf-.\n"
4748 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4749 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4750 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4755 "\t.long .LEFDE2-.LASFDE2\n"
4757 "\t.long .LASFDE2-.Lframe1\n"
4758 "\t.long .Lbegin-.\n"
4760 "\t.uleb128 0\n" /* augmentation length */
4761 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4762 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4763 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4764 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4765 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4767 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4771 "\t.long .LECIE2-.LSCIE2\n"
4775 "\t.string \"zR\"\n"
4779 "\t.uleb128 1\n" /* augmentation length */
4780 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4781 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4782 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4787 "\t.long .LEFDE3-.LASFDE3\n"
4789 "\t.long .LASFDE3-.Lframe2\n"
4790 "\t.long lj_vm_ffi_call-.\n"
4792 "\t.uleb128 0\n" /* augmentation length */
4793 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4794 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4795 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4796 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4798 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4803 /* Mental note: never let Apple design an assembler.
4804 ** Or a linker. Or a plastic case. But I digress.
4806 case BUILD_machasm: {
4811 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4814 "\t.set L$set$x,LECIEX-LSCIEX\n"
4819 "\t.ascii \"zPR\\0\"\n"
4823 "\t.byte 6\n" /* augmentation length */
4824 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4825 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4826 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4827 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4828 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4831 for (i = 0; i < ctx->nsym; i++) {
4832 const char *name = ctx->sym[i].name;
4833 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4834 if (size == 0) continue;
4836 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4841 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4842 "\t.long L$set$%d\n"
4844 "\t.long LASFDE%d-EH_frame1\n"
4847 "\t.byte 0\n" /* augmentation length */
4848 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4849 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4850 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4851 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4852 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4855 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4861 "\t.set L$set$y,LECIEY-LSCIEY\n"
4866 "\t.ascii \"zR\\0\"\n"
4870 "\t.byte 1\n" /* augmentation length */
4871 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4872 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4873 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4877 "_lj_vm_ffi_call.eh:\n"
4879 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4880 "\t.long L$set$yy\n"
4882 "\t.long LASFDEY-EH_frame2\n"
4883 "\t.long _lj_vm_ffi_call-.\n"
4885 "\t.byte 0\n" /* augmentation length */
4886 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4887 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4888 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4889 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4891 "LEFDEY:\n\n", fcsize);
4894 fprintf(ctx->fp, ".subsections_via_symbols\n");
4898 default: /* Difficult for other modes. */