1 |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
6 |.section code_op, code_sub
8 |.actionlist build_actionlist
10 |.globalnames globnames
11 |.externnames extnames
13 |//-----------------------------------------------------------------------
16 |.define X64WIN, 1 // Windows/x64 calling conventions.
19 |// Fixed register assignments for the interpreter.
20 |// This is very fragile and has many dependencies. Caveat emptor.
21 |.define BASE, rdx // Not C callee-save, refetched anyway.
23 |.define KBASE, rdi // Must be C callee-save.
24 |.define PC, rsi // Must be C callee-save.
25 |.define DISPATCH, rbx // Must be C callee-save.
28 |.define DISPATCHd, ebx
30 |.define KBASE, r15 // Must be C callee-save.
31 |.define PC, rbx // Must be C callee-save.
32 |.define DISPATCH, r14 // Must be C callee-save.
35 |.define DISPATCHd, r14d
42 |.define RB, rbp // Must be rbp (C callee-save).
44 |.define RC, rax // Must be rax.
60 |.define CARG1, rcx // x64/WIN64 C call arguments.
69 |.define CARG1, rdi // x64/POSIX C call arguments.
83 |// Type definitions. Some of these are only used for documentation.
85 |.type GL, global_State
99 |// Stack layout while in interpreter. Must match with lj_frame.h.
100 |//-----------------------------------------------------------------------
101 |.if X64WIN // x64/Windows stack layout
103 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
105 | push rdi; push rsi; push rbx
106 | sub rsp, CFRAME_SPACE
109 | push rbp; saveregs_
112 | add rsp, CFRAME_SPACE
113 | pop rbx; pop rsi; pop rdi; pop rbp
116 |.define SAVE_CFRAME, aword [rsp+aword*13]
117 |.define SAVE_PC, aword [rsp+aword*12]
118 |.define SAVE_L, aword [rsp+aword*11]
119 |.define SAVE_ERRF, dword [rsp+dword*21]
120 |.define SAVE_NRES, dword [rsp+dword*20]
121 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123 |.define SAVE_R4, aword [rsp+aword*8]
124 |.define SAVE_R3, aword [rsp+aword*7]
125 |.define SAVE_R2, aword [rsp+aword*6]
126 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127 |.define ARG5, aword [rsp+aword*4]
128 |.define CSAVE_4, aword [rsp+aword*3]
129 |.define CSAVE_3, aword [rsp+aword*2]
130 |.define CSAVE_2, aword [rsp+aword*1]
131 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
134 |.define ARG5d, dword [rsp+dword*8]
135 |.define TMP1, ARG5 // TMP1 overlaps ARG5
136 |.define TMP1d, ARG5d
137 |.define TMP1hi, dword [rsp+dword*9]
138 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
140 |//-----------------------------------------------------------------------
141 |.else // x64/POSIX stack layout
143 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
145 | push rbx; push r15; push r14
149 | sub rsp, CFRAME_SPACE
152 | push rbp; saveregs_
155 | add rsp, CFRAME_SPACE
159 | pop r14; pop r15; pop rbx; pop rbp
162 |//----- 16 byte aligned,
164 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165 |.define SAVE_R4, aword [rsp+aword*10]
166 |.define SAVE_R3, aword [rsp+aword*9]
167 |.define SAVE_R2, aword [rsp+aword*8]
168 |.define SAVE_R1, aword [rsp+aword*7]
169 |.define SAVE_RU2, aword [rsp+aword*6]
170 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
172 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173 |.define SAVE_R4, aword [rsp+aword*8]
174 |.define SAVE_R3, aword [rsp+aword*7]
175 |.define SAVE_R2, aword [rsp+aword*6]
176 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
178 |.define SAVE_CFRAME, aword [rsp+aword*4]
179 |.define SAVE_PC, aword [rsp+aword*3]
180 |.define SAVE_L, aword [rsp+aword*2]
181 |.define SAVE_ERRF, dword [rsp+dword*3]
182 |.define SAVE_NRES, dword [rsp+dword*2]
183 |.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184 |//----- 16 byte aligned
186 |.define TMP1d, dword [rsp]
187 |.define TMP1hi, dword [rsp+dword*1]
188 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
192 |//-----------------------------------------------------------------------
194 |// Instruction headers.
195 |.macro ins_A; .endmacro
196 |.macro ins_AD; .endmacro
197 |.macro ins_AJ; .endmacro
198 |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199 |.macro ins_AB_; movzx RBd, RCH; .endmacro
200 |.macro ins_A_C; movzx RCd, RCL; .endmacro
201 |.macro ins_AND; not RD; .endmacro
203 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
210 | jmp aword [DISPATCH+OP*8]
213 |// Instruction footer.
215 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216 | .define ins_next, ins_NEXT
217 | .define ins_next_, ins_NEXT
219 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220 | // Affects only certain kinds of benchmarks (and only with -j off).
221 | // Around 10%-30% slower on Core2, a lot more slower on P4.
231 |// Call decode and dispatch.
233 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234 | mov PC, LFUNC:RB->pc
239 | jmp aword [DISPATCH+OP*8]
243 | // BASE = new base, RB = LFUNC, RD = nargs+1
248 |//-----------------------------------------------------------------------
250 |// Macros to clear or set tags.
251 |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252 |.macro settp, reg, tp
253 | mov64 ITYPE, ((int64_t)tp<<47)
256 |.macro settp, dst, reg, tp
257 | mov64 dst, ((int64_t)tp<<47)
261 | settp reg, LJ_TISNUM
263 |.macro setint, dst, reg
264 | settp dst, reg, LJ_TISNUM
267 |// Macros to test operand types.
268 |.macro checktp_nc, reg, tp, target
274 |.macro checktp, reg, tp, target
281 |.macro checktptp, src, tp, target
287 |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288 |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
291 |.macro checknumx, reg, target, jump
294 | cmp ITYPEd, LJ_TISNUM
297 |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298 |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299 |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300 |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301 |.macro checknumber, src, target; checknumx src, target, ja; .endmacro
303 |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304 |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
306 |// These operands must be used with movzx.
307 |.define PC_OP, byte [PC-4]
308 |.define PC_RA, byte [PC-3]
309 |.define PC_RB, byte [PC-1]
310 |.define PC_RC, byte [PC-2]
311 |.define PC_RD, word [PC-2]
313 |.macro branchPC, reg
314 | lea PC, [PC+reg*4-BCBIAS_J*4]
317 |// Assumes DISPATCH is relative to GL.
318 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
321 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323 |// Decrement hashed hotcount and trigger trace recorder if zero.
327 | and reg, HOTCOUNT_PCMASK
328 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
335 | and reg, HOTCOUNT_PCMASK
336 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
340 |// Set current VM state.
341 |.macro set_vmstate, st
342 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
345 |.macro fpop1; fstp st1; .endmacro
347 |// Synthesize SSE FP constants.
348 |.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
352 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353 | mov64 tmp, U64x(val,00000000); movd reg, tmp
356 |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357 | sseconst_hi reg, tmp, 80000000
359 |.macro sseconst_1, reg, tmp // Synthesize 1.0.
360 | sseconst_hi reg, tmp, 3ff00000
362 |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363 | sseconst_hi reg, tmp, bff00000
365 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366 | sseconst_hi reg, tmp, 43300000
368 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369 | sseconst_hi reg, tmp, 43380000
372 |// Move table write barrier back. Overwrites reg.
373 |.macro barrierback, tab, reg
374 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377 | mov tab->gclist, reg
380 |//-----------------------------------------------------------------------
382 /* Generate subroutines used by opcodes and other parts of the VM. */
383 /* The .code_sub section should be last to help static branch prediction. */
384 static void build_subroutines(BuildCtx *ctx)
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
396 | // Return from pcall or xpcall fast func.
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
415 | test PCd, FRAME_TYPE
422 | neg PC // Previous base = BASE - delta.
426 |1: // Move results down.
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
440 | jne >6 // More/less results wanted?
443 | mov L:RB->top, BASE
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
459 | mov aword [BASE-16], LJ_TNIL
464 |7: // Less results wanted.
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
486 | jmp ->vm_unwind_c_eh
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
498 |->vm_unwind_rethrow:
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
508 | and CARG1, CFRAME_RAWMASK
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
519 | mov [BASE-16], RA // Prepend false to error message.
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
534 | sub RD, 16 // LJ_FR2
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
548 | // RB = L, L->base = new base, L->top = top
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
553 | mov LFUNC:RB, [BASE-16]
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
596 | test PCd, FRAME_TYPE
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
604 | mov SAVE_ERRF, CARG4d
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
630 | sub PC, BASE // PC = frame delta + frame type
635 | add NARGS:RDd, 1 // RD = nargs+1
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
641 |->vm_call_dispatch_f:
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
669 | jz ->vm_leave_cp // No base? Just remove C frame.
672 | jmp <2 // Else continue with the call.
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
678 |//-- Continuation dispatch ----------------------------------------------
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
714 | lea RA, [BASE+RA*8]
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
736 |//-- Table indexing metamethods -----------------------------------------
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
779 |->cont_ra: // BASE = base, RC = result
782 | mov [BASE+RA*8], RB
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
794 | jmp ->vm_call_dispatch_f
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
803 | mov BASE, RB // Restore BASE.
809 |//-----------------------------------------------------------------------
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
854 | mov RB, [BASE+RA*8]
856 |->cont_nop: // BASE = base, (RC = result)
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
862 | mov [RA-24], PC // [cont|PC]
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
867 | lea PC, [RA+FRAME_CONT]
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
872 | jmp ->vm_call_dispatch_f
876 | mov L:CARG1, SAVE_L
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
881 | mov L:CARG1, SAVE_L
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
891 | mov BASE, RB // Restore BASE.
894 |//-- Comparison metamethods ---------------------------------------------
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
914 | mov BASE, L:RB->base
926 |->cont_condt: // BASE = base, RC = result
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
934 |->cont_condf: // BASE = base, RC = result
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
967 | mov L:RB->base, BASE
969 | mov CARG2d, dword [PC-4]
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
987 |//-- Arithmetic metamethods ---------------------------------------------
995 | lea RC, [KBASE+RC*8]
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1010 | lea RC, [BASE+RD*8]
1020 | lea RC, [BASE+RC*8]
1022 | lea RB, [BASE+RB*8]
1024 | lea RA, [BASE+RA*8]
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1035 | movzx CARG5d, PC_OP
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1050 | // Call metamethod for binary op.
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1072 | jne ->vmeta_binop // Binop call for compatibility.
1074 | mov TAB:CARG1, [BASE+RD*8]
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1081 |//-- Call metamethod ----------------------------------------------------
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1110 | // This is fragile. L->base must not move, KBASE must always be defined.
1111 | cmp KBASE, BASE // Continue with CALLT if flag set.
1114 | ins_call // Otherwise call resolved metamethod.
1116 |//-- Argument coercion for 'for' statement ------------------------------
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1136 |.macro .ffunc, name
1140 |.macro .ffunc_1, name
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1145 |.macro .ffunc_2, name
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1150 |.macro .ffunc_n, name, op
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1160 |.macro .ffunc_nn, name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1177 |//-- Base library: checks -----------------------------------------------
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1204 | mov RBd, LJ_TISNUM
1209 | mov CFUNC:RB, [BASE-16]
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1217 |//-- Base library: getters and setters ---------------------------------
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->hash
1234 | settp STR:RC, LJ_TSTR
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1244 | jmp ->fff_res1 // Not found, keep default result.
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1302 |//-- Base library: conversions ------------------------------------------
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1308 | checknumber RB, ->fff_fallback
1314 | // Only handles the string or number case inline.
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1320 | mov [BASE-16], STR:RB
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1344 |//-- Base library: iterators -------------------------------------------
1347 | je >2 // Missing 2nd arg?
1351 | checktab RA, ->fff_fallback
1354 | checktab CARG2, ->fff_fallback
1357 | mov L:RB->base, BASE // Add frame since C call can throw.
1358 | mov L:RB->top, BASE // Dummy frame length is ok.
1361 | lea CARG3, [BASE+8]
1362 | mov CARG2, RA // Caveat: CARG2 == BASE.
1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1368 | mov SAVE_PC, PC // Needed for ITERN fallback.
1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370 | // Flag returned in eax (RD).
1371 | mov BASE, L:RB->base
1372 | test RDd, RDd; jz >3 // End of traversal?
1373 | // Copy key and value to results.
1381 |2: // Set missing 2nd arg to nil.
1382 | mov aword [BASE+8], LJ_TNIL
1384 |3: // End of traversal: return nil.
1385 | mov aword [BASE-16], LJ_TNIL
1389 | mov TAB:RB, [BASE]
1391 | checktab TAB:RB, ->fff_fallback
1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1395 | mov CFUNC:RD, [BASE-16]
1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398 | settp CFUNC:RD, LJ_TFUNC
1400 | mov [BASE-16], CFUNC:RD
1401 | mov [BASE-8], TMPR
1402 | mov aword [BASE], LJ_TNIL
1406 |.ffunc_2 ipairs_aux
1407 | mov TAB:RB, [BASE]
1408 | checktab TAB:RB, ->fff_fallback
1411 | checkint RA, ->fff_fallback
1413 | checknumtp [BASE+8], ->fff_fallback
1414 | movsd xmm0, qword [BASE+8]
1420 | mov [BASE-16], ITYPE
1422 | sseconst_1 xmm1, TMPR
1424 | cvttsd2si RAd, xmm0
1425 | movsd qword [BASE-16], xmm0
1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428 | mov RD, TAB:RB->array
1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432 | // Copy array slot.
1436 |2: // Check for empty hash part first. Otherwise call C function.
1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1445 | mov RB, BASE // Save BASE.
1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449 | // cTValue * or NULL returned in eax (RD).
1458 | mov TAB:RB, [BASE]
1460 | checktab TAB:RB, ->fff_fallback
1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1464 | mov CFUNC:RD, [BASE-16]
1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467 | settp CFUNC:RD, LJ_TFUNC
1469 | mov [BASE-16], CFUNC:RD
1470 | mov [BASE-8], TMPR
1472 | mov64 RD, ((int64_t)LJ_TISNUM<<47)
1475 | mov qword [BASE], 0
1480 |//-- Base library: catch errors ----------------------------------------
1485 | mov PCd, 16+FRAME_PCALL
1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488 | shr RB, HOOK_ACTIVE_SHIFT
1490 | add PC, RB // Remember active hook before pcall.
1491 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1494 | mov RB, [RA+KBASE*8-24]
1495 | mov [RA+KBASE*8-16], RB
1498 | jmp ->vm_call_dispatch
1501 | mov LFUNC:RA, [BASE+8]
1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504 | mov [BASE], LFUNC:RA
1505 | mov [BASE+8], LFUNC:RB
1508 | mov PCd, 24+FRAME_PCALL
1511 |//-- Coroutine library --------------------------------------------------
1513 |.macro coroutine_resume_wrap, resume
1515 |.ffunc_1 coroutine_resume
1519 |.ffunc coroutine_wrap_aux
1520 | mov CFUNC:RB, [BASE-16]
1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1534 | je >1 // Status != LUA_YIELD (i.e. 0)?
1535 | cmp RA, L:RB->base // Check for presence of initial func.
1537 | mov PC, [RA-8] // Move initial function up.
1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1550 | mov L:RB->base, BASE
1552 | add BASE, 8 // Keep resumed thread in stack for GC.
1554 | mov L:RB->top, BASE
1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1560 | sub RB, PC // Relative to PC.
1564 |2: // Move args to coroutine.
1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1577 | mov BASE, L:RB->base
1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579 | set_vmstate INTERP
1581 | cmp eax, LUA_YIELD
1584 | mov RA, L:PC->base
1585 | mov KBASE, L:PC->top
1586 | mov L:PC->top, RA // Clear coroutine stack.
1589 | je >6 // No results?
1592 | cmp RD, L:RB->maxstack
1593 | ja >9 // Need to grow stack?
1597 |5: // Move results from coroutine.
1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606 | mov_true ITYPE // Prepend true to results.
1607 | mov [BASE-8], ITYPE
1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1619 | test PCd, FRAME_TYPE
1623 |8: // Coroutine returned with error (at co->top-1).
1625 | mov_false ITYPE // Prepend false to results.
1626 | mov [BASE-8], ITYPE
1629 | mov L:PC->top, RA // Clear error from coroutine stack.
1630 | // Copy error message.
1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639 | // Error function does not return.
1642 |9: // Handle stack expansion on return from yield.
1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1647 | call extern lj_state_growstack // (lua_State *L, int n)
1649 | mov BASE, L:RB->base
1650 | jmp <4 // Retry the stack move.
1653 | coroutine_resume_wrap 1 // coroutine.resume
1654 | coroutine_resume_wrap 0 // coroutine.wrap
1656 |.ffunc coroutine_yield
1658 | test aword L:RB->cframe, CFRAME_RESUME
1660 | mov L:RB->base, BASE
1661 | lea RD, [BASE+NARGS:RD*8-8]
1664 | mov aword L:RB->cframe, RD
1666 | mov byte L:RB->status, al
1667 | jmp ->vm_leave_unw
1669 |//-- Math library -------------------------------------------------------
1675 | cmp RBd, 0; jns ->fff_resi
1685 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1690 | checknum RB, ->fff_fallback
1698 |.ffunc_n math_sqrt, sqrtsd
1701 | movsd qword [BASE-16], xmm0
1709 | test PCd, FRAME_TYPE
1712 | cmp PC_RB, RDL // More results expected?
1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1720 |6: // Fill up results with nil.
1721 | mov aword [BASE+RD*8-24], LJ_TNIL
1725 |7: // Non-standard return case.
1726 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1729 |.macro math_round, func
1730 | .ffunc math_ .. func
1733 | checknumx RB, ->fff_resRB, je
1736 | checknumtp [BASE], ->fff_fallback
1738 | movsd xmm0, qword [BASE]
1739 | call ->vm_ .. func .. _sse
1741 | cvttsd2si RBd, xmm0
1742 | cmp RBd, 0x80000000
1744 | cvtsi2sd xmm1, RBd
1745 | ucomisd xmm0, xmm1
1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757 | checknumtp [BASE], ->fff_fallback
1758 | movsd xmm0, qword [BASE]
1764 |.macro math_extern, func
1765 | .ffunc_n math_ .. func
1772 |.macro math_extern2, func
1773 | .ffunc_nn math_ .. func
1792 | math_extern2 atan2
1795 |.ffunc_2 math_ldexp
1796 | checknumtp [BASE], ->fff_fallback
1797 | checknumtp [BASE+8], ->fff_fallback
1798 | fld qword [BASE+8]
1803 | fstp qword [BASE-16]
1806 |.ffunc_n math_frexp
1817 | movsd qword [BASE-16], xmm0
1822 | cvtsi2sd xmm1, RBd
1823 | movsd qword [BASE-8], xmm1
1830 | lea CARG2, [BASE-16]
1832 | lea CARG1, [BASE-16]
1839 | movsd qword [BASE-8], xmm0
1843 |.macro math_minmax, name, cmovop, sseop
1849 |1: // Handle integers.
1850 | cmp RAd, RDd; jae ->fff_resRB
1851 | mov TMPR, [BASE+RA*8-8]
1859 | // Convert intermediate result to number and continue below.
1860 | cvtsi2sd xmm0, RBd
1865 | checknumtp [BASE], ->fff_fallback
1868 | movsd xmm0, qword [BASE]
1869 |5: // Handle numbers or integers.
1870 | cmp RAd, RDd; jae ->fff_resxmm0
1872 | mov RB, [BASE+RA*8-8]
1873 | checknumx RB, >6, jb
1875 | cvtsi2sd xmm1, RBd
1878 | checknumtp [BASE+RA*8-8], ->fff_fallback
1881 | movsd xmm1, qword [BASE+RA*8-8]
1888 | math_minmax math_min, cmovg, minsd
1889 | math_minmax math_max, cmovl, maxsd
1891 |//-- String library -----------------------------------------------------
1893 |.ffunc string_byte // Only handle the 1-arg case here.
1894 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1895 | mov STR:RB, [BASE]
1896 | checkstr STR:RB, ->fff_fallback
1898 | cmp dword STR:RB->len, 1
1899 | jb ->fff_res0 // Return no results for empty string.
1900 | movzx RBd, byte STR:RB[1]
1904 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1907 |.ffunc string_char // Only handle the 1-arg case here.
1909 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1912 | checkint RB, ->fff_fallback
1914 | checknumtp [BASE], ->fff_fallback
1915 | cvttsd2si RBd, qword [BASE]
1917 | cmp RBd, 255; ja ->fff_fallback
1920 | lea RD, TMP1 // Points to stack. Little-endian.
1923 | mov L:RB->base, BASE
1924 | mov CARG3d, TMPRd // Zero-extended to size_t.
1928 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1930 | // GCstr * returned in eax (RD).
1931 | mov BASE, L:RB->base
1933 | settp STR:RD, LJ_TSTR
1934 | mov [BASE-16], STR:RD
1940 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1943 | mov TMPR, [BASE+16]
1944 | checkint TMPR, ->fff_fallback
1946 | checknumtp [BASE+16], ->fff_fallback
1947 | cvttsd2si TMPRd, qword [BASE+16]
1950 | mov STR:RB, [BASE]
1951 | checkstr STR:RB, ->fff_fallback
1953 | mov ITYPE, [BASE+8]
1954 | mov RAd, ITYPEd // Must clear hiword for lea below.
1956 | cmp ITYPEd, LJ_TISNUM
1957 | jne ->fff_fallback
1959 | checknumtp [BASE+8], ->fff_fallback
1960 | cvttsd2si RAd, qword [BASE+8]
1962 | mov RCd, STR:RB->len
1963 | cmp RCd, TMPRd // len < end? (unsigned compare)
1966 | test RAd, RAd // start <= 0?
1969 | sub TMPRd, RAd // start > end?
1971 | lea RD, [STR:RB+RAd+#STR-1]
1976 |5: // Negative end or overflow.
1978 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1981 | mov TMPRd, RCd // end = len
1984 |7: // Negative start or underflow.
1986 | add RAd, RCd // start = start+(len+1)
1988 | jg <3 // start > 0?
1990 | mov RAd, 1 // start = 1
1993 |->fff_emptystr: // Range underflow.
1994 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1997 |.macro ffstring_op, name
1998 | .ffunc_1 string_ .. name
2001 | mov STR:TMPR, [BASE]
2002 | checkstr STR:TMPR, ->fff_fallback
2004 | mov STR:CARG2, [BASE]
2005 | checkstr STR:CARG2, ->fff_fallback
2008 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2009 | mov L:RB->base, BASE
2011 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2013 | mov RC, SBUF:CARG1->b
2014 | mov SBUF:CARG1->L, L:RB
2015 | mov SBUF:CARG1->p, RC
2017 | call extern lj_buf_putstr_ .. name
2019 | call extern lj_buf_tostr
2023 |ffstring_op reverse
2027 |//-- Bit library --------------------------------------------------------
2029 |.macro .ffunc_bit, name, kind, fdef
2032 | sseconst_tobit xmm1, RB
2046 | checknumtp [BASE], ->fff_fallback
2047 | movsd xmm0, qword [BASE]
2050 | sseconst_tobit xmm1, RB
2057 |.macro .ffunc_bit, name, kind
2058 | .ffunc_bit name, kind, .ffunc_1
2061 |.ffunc_bit bit_tobit, 0
2064 |.macro .ffunc_bit_op, name, ins
2065 | .ffunc_bit name, 2
2066 | mov TMPRd, NARGS:RDd // Save for fallback.
2067 | lea RD, [BASE+NARGS:RD*8-16]
2078 | ja ->fff_fallback_bit_op
2081 | checknumtp [RD], ->fff_fallback_bit_op
2082 | movsd xmm0, qword [RD]
2091 |.ffunc_bit_op bit_band, and
2092 |.ffunc_bit_op bit_bor, or
2093 |.ffunc_bit_op bit_bxor, xor
2095 |.ffunc_bit bit_bswap, 1
2099 |.ffunc_bit bit_bnot, 1
2105 | cvtsi2sd xmm0, RBd
2109 |->fff_fallback_bit_op:
2110 | mov NARGS:RDd, TMPRd // Restore for fallback
2111 | jmp ->fff_fallback
2113 |.macro .ffunc_bit_sh, name, ins
2115 | .ffunc_bit name, 1, .ffunc_2
2116 | // Note: no inline conversion from number for 2nd argument!
2118 | checkint RA, ->fff_fallback
2121 | sseconst_tobit xmm2, RB
2127 | ins RBd, cl // Assumes RA is ecx.
2131 |.ffunc_bit_sh bit_lshift, shl
2132 |.ffunc_bit_sh bit_rshift, shr
2133 |.ffunc_bit_sh bit_arshift, sar
2134 |.ffunc_bit_sh bit_rol, rol
2135 |.ffunc_bit_sh bit_ror, ror
2137 |//-----------------------------------------------------------------------
2140 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2141 | jmp ->fff_fallback
2143 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2144 |->fff_fallback: // Call fast function fallback handler.
2145 | // BASE = new base, RD = nargs+1
2147 | mov PC, [BASE-8] // Fallback may overwrite PC.
2148 | mov SAVE_PC, PC // Redundant (but a defined value).
2149 | mov L:RB->base, BASE
2150 | lea RD, [BASE+NARGS:RD*8-8]
2151 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2153 | mov CFUNC:RD, [BASE-16]
2155 | cmp RA, L:RB->maxstack
2156 | ja >5 // Need to grow stack.
2158 | call aword CFUNC:RD->f // (lua_State *L)
2159 | mov BASE, L:RB->base
2160 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2161 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2167 | lea NARGS:RDd, [RAd+1]
2168 | mov LFUNC:RB, [BASE-16]
2169 | jne ->vm_call_tail // Returned -1?
2171 | ins_callt // Returned 0: retry fast path.
2173 |// Reconstruct previous base for vmeta_call during tailcall.
2176 | test PCd, FRAME_TYPE
2180 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2181 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2186 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2188 |5: // Grow stack for fallback handler.
2189 | mov CARG2d, LUA_MINSTACK
2191 | call extern lj_state_growstack // (lua_State *L, int n)
2192 | mov BASE, L:RB->base
2193 | xor RDd, RDd // Simulate a return 0.
2194 | jmp <1 // Dumb retry (goes through ff first).
2196 |->fff_gcstep: // Call GC step function.
2197 | // BASE = new base, RD = nargs+1
2198 | pop RB // Must keep stack at same level.
2199 | mov TMP1, RB // Save return address
2201 | mov SAVE_PC, PC // Redundant (but a defined value).
2202 | mov L:RB->base, BASE
2203 | lea RD, [BASE+NARGS:RD*8-8]
2206 | call extern lj_gc_step // (lua_State *L)
2207 | mov BASE, L:RB->base
2213 | push RB // Restore return address.
2216 |//-----------------------------------------------------------------------
2217 |//-- Special dispatch targets -------------------------------------------
2218 |//-----------------------------------------------------------------------
2220 |->vm_record: // Dispatch target for recording phase.
2222 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2223 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2225 | // Decrement the hookcount for consistency, but always do the call.
2226 | test RDL, HOOK_ACTIVE
2228 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2230 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2234 |->vm_rethook: // Dispatch target for return hooks.
2235 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2236 | test RDL, HOOK_ACTIVE // Hook already active?
2240 |->vm_inshook: // Dispatch target for instr/line hooks.
2241 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2242 | test RDL, HOOK_ACTIVE // Hook already active?
2245 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2247 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2249 | test RDL, LUA_MASKLINE
2253 | mov L:RB->base, BASE
2254 | mov CARG2, PC // Caveat: CARG2 == BASE
2256 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2257 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2259 | mov BASE, L:RB->base
2265 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2267 |->cont_hook: // Continue from hook yield.
2270 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2273 |->vm_hotloop: // Hot loop counter underflow.
2275 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2277 | mov RB, LFUNC:RB->pc
2278 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2279 | lea RD, [BASE+RD*8]
2281 | mov L:RB->base, BASE
2284 | lea CARG1, [DISPATCH+GG_DISP2J]
2285 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2287 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2291 |->vm_callhook: // Dispatch target for call hooks.
2297 |->vm_hotcall: // Hot call counter underflow.
2300 | or PC, 1 // Marker for hot call.
2303 | lea RD, [BASE+NARGS:RD*8-8]
2305 | mov L:RB->base, BASE
2309 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2310 | // ASMFunction returned in eax/rax (RD).
2311 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2315 | mov BASE, L:RB->base
2325 |->cont_stitch: // Trace stitching.
2327 | // BASE = base, RC = result, RB = mbase
2328 | mov ITYPEd, [RB-24] // Save previous trace number.
2329 | mov TMPRd, MULTRES
2331 | lea RA, [BASE+RA*8] // Call base.
2334 |1: // Move results down.
2345 | lea RC, [BASE+RC*8-8]
2348 | ja >9 // More results wanted?
2350 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2351 | mov TRACE:RD, [RA+ITYPE*8]
2352 | test TRACE:RD, TRACE:RD
2354 | movzx RDd, word TRACE:RD->link
2356 | je ->cont_nop // Blacklisted.
2358 | jne =>BC_JLOOP // Jump to stitched trace.
2360 | // Stitch a new trace to the previous trace.
2361 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2363 | mov L:RB->base, BASE
2365 | lea CARG1, [DISPATCH+GG_DISP2J]
2366 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2367 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2368 | mov BASE, L:RB->base
2371 |9: // Fill up results with nil.
2372 | mov aword [RA], LJ_TNIL
2377 |->vm_profhook: // Dispatch target for profiler hook.
2380 | mov L:RB->base, BASE
2381 | mov CARG2, PC // Caveat: CARG2 == BASE
2383 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2384 | mov BASE, L:RB->base
2385 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2390 |//-----------------------------------------------------------------------
2391 |//-- Trace exit handler -------------------------------------------------
2392 |//-----------------------------------------------------------------------
2394 |// Called from an exit stub with the exit number on the stack.
2395 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2398 | push r13; push r12
2399 | push r11; push r10; push r9; push r8
2400 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2401 | push rbx; push rdx; push rcx; push rax
2402 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2403 | mov RCH, byte [rbp-16]
2404 | mov [rbp-8], r15; mov [rbp-16], r14
2405 | // Caveat: DISPATCH is rbx.
2406 | mov DISPATCH, [ebp]
2407 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2409 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2410 | mov [DISPATCH+DISPATCH_J(parent)], RA
2412 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2414 | sub rsp, 16*8 // Room for SSE regs.
2417 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2418 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2419 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2420 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2421 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2422 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2423 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2424 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2425 | // Caveat: RB is rbp.
2426 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2427 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2428 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2429 | mov L:RB->base, BASE
2431 | lea CARG2, [rsp+4*8]
2435 | lea CARG1, [DISPATCH+GG_DISP2J]
2436 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2437 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2438 | // MULTRES or negated error code returned in eax (RD).
2439 | mov RA, L:RB->cframe
2440 | and RA, CFRAME_RAWMASK
2441 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2442 | mov BASE, L:RB->base
2443 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2447 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2449 | // Restore additional callee-save registers only used in compiled code.
2451 | lea RA, [rsp+10*16+4*8]
2453 | movdqa xmm15, [RA-10*16]
2454 | movdqa xmm14, [RA-9*16]
2455 | movdqa xmm13, [RA-8*16]
2456 | movdqa xmm12, [RA-7*16]
2457 | movdqa xmm11, [RA-6*16]
2458 | movdqa xmm10, [RA-5*16]
2459 | movdqa xmm9, [RA-4*16]
2460 | movdqa xmm8, [RA-3*16]
2461 | movdqa xmm7, [RA-2*16]
2462 | mov rsp, RA // Reposition stack to C frame.
2463 | movdqa xmm6, [RA-1*16]
2473 | mov rsp, RA // Reposition stack to C frame.
2475 | test RDd, RDd; js >9 // Check for error from exit.
2478 | mov LFUNC:KBASE, [BASE-16]
2479 | cleartp LFUNC:KBASE
2480 | mov KBASE, LFUNC:KBASE->pc
2481 | mov KBASE, [KBASE+PC2PROTO(k)]
2482 | mov L:RB->base, BASE
2483 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2484 | set_vmstate INTERP
2485 | // Modified copy of ins_next which handles function header dispatch, too.
2491 | cmp OP, BC_FUNCF // Function header?
2493 | cmp OP, BC_FUNCC+2 // Fast function?
2496 | mov RCd, MULTRES // RC/RD holds nres+1.
2498 | jmp aword [DISPATCH+OP*8]
2500 |4: // Check frame below fast function.
2502 | test RCd, FRAME_TYPE
2503 | jnz <2 // Trace stitching continuation?
2504 | // Otherwise set KBASE for Lua function below fast function.
2505 | movzx RCd, byte [RC-3]
2507 | mov LFUNC:KBASE, [BASE+RC*8-24]
2508 | cleartp LFUNC:KBASE
2509 | mov KBASE, LFUNC:KBASE->pc
2510 | mov KBASE, [KBASE+PC2PROTO(k)]
2513 |9: // Rethrow error from the right C frame.
2517 | call extern lj_err_throw // (lua_State *L, int errcode)
2520 |//-----------------------------------------------------------------------
2521 |//-- Math helper functions ----------------------------------------------
2522 |//-----------------------------------------------------------------------
2524 |// FP value rounding. Called by math.floor/math.ceil fast functions
2525 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2526 |.macro vm_round, name, mode, cond
2529 | sseconst_abs xmm2, RD
2530 | sseconst_2p52 xmm3, RD
2532 | andpd xmm1, xmm2 // |x|
2533 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2535 | andnpd xmm2, xmm0 // Isolate sign bit.
2536 |.if mode == 2 // trunc(x)?
2538 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2540 | sseconst_1 xmm3, RD
2541 | cmpsd xmm0, xmm1, 1 // |x| < result?
2543 | subsd xmm1, xmm0 // If yes, subtract -1.
2544 | orpd xmm1, xmm2 // Merge sign bit back in.
2546 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2548 | orpd xmm1, xmm2 // Merge sign bit back in.
2549 | .if mode == 1 // ceil(x)?
2550 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2551 | cmpsd xmm0, xmm1, 6 // x > result?
2552 | .else // floor(x)?
2553 | sseconst_1 xmm2, RD
2554 | cmpsd xmm0, xmm1, 1 // x < result?
2557 | subsd xmm1, xmm0 // If yes, subtract +-1.
2564 | vm_round vm_floor, 0, 1
2565 | vm_round vm_ceil, 1, JIT
2566 | vm_round vm_trunc, 2, JIT
2568 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2570 |// Args in xmm0/xmm1, return value in xmm0.
2571 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2574 | sseconst_abs xmm2, RD
2575 | sseconst_2p52 xmm3, RD
2577 | andpd xmm4, xmm2 // |x/y|
2578 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2580 | andnpd xmm2, xmm0 // Isolate sign bit.
2581 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2583 | orpd xmm4, xmm2 // Merge sign bit back in.
2584 | sseconst_1 xmm2, RD
2585 | cmpsd xmm0, xmm4, 1 // x/y < result?
2587 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2598 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2600 | cmp eax, 1; jle >6 // i<=1?
2601 | // Now 1 < (unsigned)i <= 0x80000000.
2602 |1: // Handle leading zeros.
2603 | test eax, 1; jnz >2
2610 |3: // Handle trailing bits.
2621 | je <5 // x^1 ==> x
2622 | jb >7 // x^0 ==> 1
2625 | sseconst_1 xmm1, RD
2630 | sseconst_1 xmm0, RD
2633 |//-----------------------------------------------------------------------
2634 |//-- Miscellaneous functions --------------------------------------------
2635 |//-----------------------------------------------------------------------
2637 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2640 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2648 | .if X64WIN; pop rsi; .endif
2651 |//-----------------------------------------------------------------------
2652 |//-- Assertions ---------------------------------------------------------
2653 |//-----------------------------------------------------------------------
2655 |->assert_bad_for_arg_type:
2656 #ifdef LUA_USE_ASSERT
2661 |//-----------------------------------------------------------------------
2662 |//-- FFI helper functions -----------------------------------------------
2663 |//-----------------------------------------------------------------------
2665 |// Handler for callback functions. Callback slot number in ah/al.
2668 |.type CTSTATE, CTState, PC
2669 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2670 | lea DISPATCH, [ebp+GG_G2DISP]
2671 | mov CTSTATE, GL:ebp->ctype_state
2673 | mov CTSTATE->cb.slot, eax
2674 | mov CTSTATE->cb.gpr[0], CARG1
2675 | mov CTSTATE->cb.gpr[1], CARG2
2676 | mov CTSTATE->cb.gpr[2], CARG3
2677 | mov CTSTATE->cb.gpr[3], CARG4
2678 | movsd qword CTSTATE->cb.fpr[0], xmm0
2679 | movsd qword CTSTATE->cb.fpr[1], xmm1
2680 | movsd qword CTSTATE->cb.fpr[2], xmm2
2681 | movsd qword CTSTATE->cb.fpr[3], xmm3
2683 | lea rax, [rsp+CFRAME_SIZE+4*8]
2685 | lea rax, [rsp+CFRAME_SIZE]
2686 | mov CTSTATE->cb.gpr[4], CARG5
2687 | mov CTSTATE->cb.gpr[5], CARG6
2688 | movsd qword CTSTATE->cb.fpr[4], xmm4
2689 | movsd qword CTSTATE->cb.fpr[5], xmm5
2690 | movsd qword CTSTATE->cb.fpr[6], xmm6
2691 | movsd qword CTSTATE->cb.fpr[7], xmm7
2693 | mov CTSTATE->cb.stack, rax
2695 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2696 | mov CARG1, CTSTATE
2697 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2698 | // lua_State * returned in eax (RD).
2699 | set_vmstate INTERP
2700 | mov BASE, L:RD->base
2703 | mov LFUNC:RB, [BASE-16]
2710 |->cont_ffi_callback: // Return from FFI callback.
2713 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2714 | mov aword CTSTATE->L, L:RA
2715 | mov L:RA->base, BASE
2717 | mov CARG1, CTSTATE
2719 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2720 | mov rax, CTSTATE->cb.gpr[0]
2721 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2722 | jmp ->vm_leave_unw
2725 |->vm_ffi_call: // Call C function via FFI.
2726 | // Caveat: needs special frame unwinding, see below.
2728 | .type CCSTATE, CCallState, rbx
2729 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2731 | // Readjust stack.
2732 | mov eax, CCSTATE->spadj
2735 | // Copy stack slots.
2736 | movzx ecx, byte CCSTATE->nsp
2740 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2741 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2746 | movzx eax, byte CCSTATE->nfpr
2747 | mov CARG1, CCSTATE->gpr[0]
2748 | mov CARG2, CCSTATE->gpr[1]
2749 | mov CARG3, CCSTATE->gpr[2]
2750 | mov CARG4, CCSTATE->gpr[3]
2752 | mov CARG5, CCSTATE->gpr[4]
2753 | mov CARG6, CCSTATE->gpr[5]
2755 | test eax, eax; jz >5
2756 | movaps xmm0, CCSTATE->fpr[0]
2757 | movaps xmm1, CCSTATE->fpr[1]
2758 | movaps xmm2, CCSTATE->fpr[2]
2759 | movaps xmm3, CCSTATE->fpr[3]
2761 | cmp eax, 4; jbe >5
2762 | movaps xmm4, CCSTATE->fpr[4]
2763 | movaps xmm5, CCSTATE->fpr[5]
2764 | movaps xmm6, CCSTATE->fpr[6]
2765 | movaps xmm7, CCSTATE->fpr[7]
2769 | call aword CCSTATE->func
2771 | mov CCSTATE->gpr[0], rax
2772 | movaps CCSTATE->fpr[0], xmm0
2774 | mov CCSTATE->gpr[1], rdx
2775 | movaps CCSTATE->fpr[1], xmm1
2778 | mov rbx, [rbp-8]; leave; ret
2780 |// Note: vm_ffi_call must be the last function in this object file!
2782 |//-----------------------------------------------------------------------
2785 /* Generate the code for a single instruction. */
2786 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2789 |// Note: aligning all instructions does not pay off.
2794 /* -- Comparison ops ---------------------------------------------------- */
2796 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2798 |.macro jmp_comp, lt, ge, le, gt, target
2812 ||default: break; /* Shut up GCC. */
2816 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2817 | // RA = src1, RD = src2, JMP with RD = target
2819 | mov ITYPE, [BASE+RA*8]
2820 | mov RB, [BASE+RD*8]
2826 | cmp ITYPEd, LJ_TISNUM; jne >7
2827 | cmp RBd, LJ_TISNUM; jne >8
2830 | jmp_comp jge, jl, jg, jle, >9
2837 |7: // RA is not an integer.
2839 | // RA is a number.
2840 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2841 | // RA is a number, RD is an integer.
2842 | cvtsi2sd xmm0, RDd
2845 |8: // RA is an integer, RD is not an integer.
2847 | // RA is an integer, RD is a number.
2848 | cvtsi2sd xmm1, RAd
2852 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2853 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2861 | ucomisd xmm0, xmm1
2862 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2863 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2865 | jmp_comp jbe, ja, jb, jae, <9
2868 | jmp_comp jbe, ja, jb, jae, >1
2876 case BC_ISEQV: case BC_ISNEV:
2877 vk = op == BC_ISEQV;
2878 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2879 | mov RB, [BASE+RD*8]
2880 | mov ITYPE, [BASE+RA*8]
2887 | cmp RBd, LJ_TISNUM; jne >7
2888 | cmp ITYPEd, LJ_TISNUM; jne >8
2900 |7: // RD is not an integer.
2902 | // RD is a number.
2904 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2905 | // RD is a number, RA is an integer.
2906 | cvtsi2sd xmm0, RAd
2909 |8: // RD is an integer, RA is not an integer.
2911 | // RD is an integer, RA is a number.
2912 | cvtsi2sd xmm1, RDd
2916 | cmp RBd, LJ_TISNUM; jae >5
2917 | cmp ITYPEd, LJ_TISNUM; jae >5
2923 | ucomisd xmm0, xmm1
2927 | jp >2 // Unordered means not equal.
2930 | jp >2 // Unordered means not equal.
2935 |1: // EQ: Branch to the target.
2938 |2: // NE: Fallthrough to next instruction.
2946 |2: // NE: Branch to the target.
2949 |1: // EQ: Fallthrough to next instruction.
2951 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2952 op == BC_ISEQN || op == BC_ISNEN)) {
2958 if (op == BC_ISEQV || op == BC_ISNEV) {
2959 |5: // Either or both types are not numbers.
2961 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2962 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2965 | je <1 // Same GCobjs or pvalues?
2967 | jne <2 // Not the same type?
2968 | cmp RBd, LJ_TISTABUD
2969 | ja <2 // Different objects and not table/ud?
2971 | // Different tables or userdatas. Need to check __eq metamethod.
2972 | // Field metatable must be at same offset for GCtab and GCudata!
2974 | mov TAB:RB, TAB:RA->metatable
2975 | test TAB:RB, TAB:RB
2976 | jz <2 // No metatable?
2977 | test byte TAB:RB->nomm, 1<<MM_eq
2978 | jnz <2 // Or 'no __eq' flag set?
2980 | xor RBd, RBd // ne = 0
2982 | mov RBd, 1 // ne = 1
2984 | jmp ->vmeta_equal // Handle __eq metamethod.
2988 | cmp ITYPEd, LJ_TCDATA
2989 if (LJ_DUALNUM && vk) {
2994 | jmp ->vmeta_equal_cd
2998 case BC_ISEQS: case BC_ISNES:
2999 vk = op == BC_ISEQS;
3000 | ins_AND // RA = src, RD = str const, JMP with RD = target
3001 | mov RB, [BASE+RA*8]
3004 | cmp RB, [KBASE+RD*8]
3012 case BC_ISEQN: case BC_ISNEN:
3013 vk = op == BC_ISEQN;
3014 | ins_AD // RA = src, RD = num const, JMP with RD = target
3015 | mov RB, [BASE+RA*8]
3019 | mov RD, [KBASE+RD*8]
3032 |7: // RA is not an integer.
3034 | // RA is a number.
3035 | mov RD, [KBASE+RD*8]
3037 | // RA is a number, RD is an integer.
3038 | cvtsi2sd xmm0, RDd
3041 |8: // RA is an integer, RD is a number.
3042 | cvtsi2sd xmm0, RBd
3044 | ucomisd xmm0, xmm1
3051 | movsd xmm0, qword [KBASE+RD*8]
3054 | ucomisd xmm0, qword [BASE+RA*8]
3057 case BC_ISEQP: case BC_ISNEP:
3058 vk = op == BC_ISEQP;
3059 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3060 | mov RB, [BASE+RA*8]
3064 if (!LJ_HASFFI) goto iseqne_test;
3072 | cmp RBd, LJ_TCDATA; jne <2
3073 | jmp ->vmeta_equal_cd
3076 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3084 /* -- Unary test and copy ops ------------------------------------------- */
3086 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3087 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3088 | mov ITYPE, [BASE+RD*8]
3090 if (op == BC_ISTC || op == BC_ISFC) {
3094 | cmp ITYPEd, LJ_TISTRUECOND
3095 if (op == BC_IST || op == BC_ISTC) {
3100 if (op == BC_ISTC || op == BC_ISFC) {
3101 | mov [BASE+RA*8], RB
3105 |1: // Fallthrough to the next instruction.
3110 | ins_AD // RA = src, RD = -type
3111 | mov RB, [BASE+RA*8]
3114 | jne ->vmeta_istype
3118 | ins_AD // RA = src, RD = -(TISNUM-1)
3119 | checknumtp [BASE+RA*8], ->vmeta_istype
3123 /* -- Unary ops --------------------------------------------------------- */
3126 | ins_AD // RA = dst, RD = src
3127 | mov RB, [BASE+RD*8]
3128 | mov [BASE+RA*8], RB
3132 | ins_AD // RA = dst, RD = src
3133 | mov RB, [BASE+RD*8]
3136 | cmp RB, LJ_TISTRUECOND
3140 | mov [BASE+RA*8], RC
3144 | ins_AD // RA = dst, RD = src
3145 | mov RB, [BASE+RD*8]
3152 | mov [BASE+RA*8], RB
3155 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3160 | checknum RB, ->vmeta_unm
3162 | mov64 RD, U64x(80000000,00000000)
3167 | mov [BASE+RA*8], RB
3172 | ins_AD // RA = dst, RD = src
3173 | mov RD, [BASE+RD*8]
3176 | mov RDd, dword STR:RD->len
3179 | mov [BASE+RA*8], RD
3182 | cvtsi2sd xmm0, dword STR:RD->len
3184 | movsd qword [BASE+RA*8], xmm0
3188 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3189 | mov TAB:CARG1, TAB:RD
3191 | mov TAB:RB, TAB:RD->metatable
3197 | mov RB, BASE // Save BASE.
3198 | call extern lj_tab_len // (GCtab *t)
3199 | // Length of table returned in eax (RD).
3203 | cvtsi2sd xmm0, RDd
3205 | mov BASE, RB // Restore BASE.
3209 |9: // Check for __len.
3210 | test byte TAB:RB->nomm, 1<<MM_len
3212 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3216 /* -- Binary ops -------------------------------------------------------- */
3218 |.macro ins_arithpre, sseins, ssereg
3220 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3223 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3225 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3227 | movsd xmm0, qword [BASE+RB*8]
3228 | sseins ssereg, qword [KBASE+RC*8]
3231 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3233 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3235 | movsd xmm0, qword [KBASE+RC*8]
3236 | sseins ssereg, qword [BASE+RB*8]
3239 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3240 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3241 | movsd xmm0, qword [BASE+RB*8]
3242 | sseins ssereg, qword [BASE+RC*8]
3247 |.macro ins_arithdn, intins
3249 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3252 | mov RB, [BASE+RB*8]
3253 | mov RC, [KBASE+RC*8]
3254 | checkint RB, ->vmeta_arith_vno
3255 | checkint RC, ->vmeta_arith_vno
3256 | intins RBd, RCd; jo ->vmeta_arith_vno
3259 | mov RB, [BASE+RB*8]
3260 | mov RC, [KBASE+RC*8]
3261 | checkint RB, ->vmeta_arith_nvo
3262 | checkint RC, ->vmeta_arith_nvo
3263 | intins RCd, RBd; jo ->vmeta_arith_nvo
3266 | mov RB, [BASE+RB*8]
3267 | mov RC, [BASE+RC*8]
3268 | checkint RB, ->vmeta_arith_vvo
3269 | checkint RC, ->vmeta_arith_vvo
3270 | intins RBd, RCd; jo ->vmeta_arith_vvo
3275 | mov [BASE+RA*8], RC
3278 | mov [BASE+RA*8], RB
3283 |.macro ins_arithpost
3284 | movsd qword [BASE+RA*8], xmm0
3287 |.macro ins_arith, sseins
3288 | ins_arithpre sseins, xmm0
3293 |.macro ins_arith, intins, sseins
3295 | ins_arithdn intins
3301 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3302 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3303 | ins_arith add, addsd
3305 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3306 | ins_arith sub, subsd
3308 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3309 | ins_arith imul, mulsd
3311 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3315 | ins_arithpre movsd, xmm1
3321 case BC_MODNV: case BC_MODVV:
3322 | ins_arithpre movsd, xmm1
3323 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3326 | ins_arithpre movsd, xmm1
3336 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3337 | mov L:CARG1, SAVE_L
3338 | mov L:CARG1->base, BASE
3339 | lea CARG2, [BASE+RC*8]
3345 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3346 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3347 | mov BASE, L:RB->base
3350 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3352 | mov RC, [BASE+RB*8]
3353 | mov [BASE+RA*8], RC
3357 /* -- Constant ops ------------------------------------------------------ */
3360 | ins_AND // RA = dst, RD = str const (~)
3361 | mov RD, [KBASE+RD*8]
3363 | mov [BASE+RA*8], RD
3368 | ins_AND // RA = dst, RD = cdata const (~)
3369 | mov RD, [KBASE+RD*8]
3370 | settp RD, LJ_TCDATA
3371 | mov [BASE+RA*8], RD
3376 | ins_AD // RA = dst, RD = signed int16 literal
3380 | mov [BASE+RA*8], RD
3382 | movsx RDd, RDW // Sign-extend literal.
3383 | cvtsi2sd xmm0, RDd
3384 | movsd qword [BASE+RA*8], xmm0
3389 | ins_AD // RA = dst, RD = num const
3390 | movsd xmm0, qword [KBASE+RD*8]
3391 | movsd qword [BASE+RA*8], xmm0
3395 | ins_AD // RA = dst, RD = primitive type (~)
3398 | mov [BASE+RA*8], RD
3402 | ins_AD // RA = dst_start, RD = dst_end
3403 | lea RA, [BASE+RA*8+8]
3404 | lea RD, [BASE+RD*8]
3406 | mov [RA-8], RB // Sets minimum 2 slots.
3415 /* -- Upvalue and function ops ------------------------------------------ */
3418 | ins_AD // RA = dst, RD = upvalue #
3419 | mov LFUNC:RB, [BASE-16]
3421 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3422 | mov RB, UPVAL:RB->v
3424 | mov [BASE+RA*8], RD
3428 #define TV2MARKOFS \
3429 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3430 | ins_AD // RA = upvalue #, RD = src
3431 | mov LFUNC:RB, [BASE-16]
3433 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3434 | cmp byte UPVAL:RB->closed, 0
3435 | mov RB, UPVAL:RB->v
3436 | mov RA, [BASE+RD*8]
3439 | // Check barrier for closed upvalue.
3440 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3445 |2: // Upvalue is black. Check if new value is collectable and white.
3448 | sub RDd, LJ_TISGCV
3449 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3452 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3454 | // Crossed a write barrier. Move the barrier forward.
3457 | mov RB, BASE // Save BASE.
3459 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3461 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3462 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3463 | mov BASE, RB // Restore BASE.
3468 | ins_AND // RA = upvalue #, RD = str const (~)
3469 | mov LFUNC:RB, [BASE-16]
3471 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3472 | mov STR:RA, [KBASE+RD*8]
3473 | mov RD, UPVAL:RB->v
3474 | settp STR:ITYPE, STR:RA, LJ_TSTR
3475 | mov [RD], STR:ITYPE
3476 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3481 |2: // Check if string is white and ensure upvalue is closed.
3482 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3484 | cmp byte UPVAL:RB->closed, 0
3486 | // Crossed a write barrier. Move the barrier forward.
3487 | mov RB, BASE // Save BASE (CARG2 == BASE).
3489 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3490 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3491 | mov BASE, RB // Restore BASE.
3495 | ins_AD // RA = upvalue #, RD = num const
3496 | mov LFUNC:RB, [BASE-16]
3498 | movsd xmm0, qword [KBASE+RD*8]
3499 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3500 | mov RA, UPVAL:RB->v
3501 | movsd qword [RA], xmm0
3505 | ins_AD // RA = upvalue #, RD = primitive type (~)
3506 | mov LFUNC:RB, [BASE-16]
3508 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3511 | mov RA, UPVAL:RB->v
3516 | ins_AD // RA = level, RD = target
3517 | branchPC RD // Do this first to free RD.
3519 | cmp dword L:RB->openupval, 0
3521 | mov L:RB->base, BASE
3522 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3523 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3524 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3525 | mov BASE, L:RB->base
3531 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3533 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3534 | mov CARG3, [BASE-16]
3536 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3539 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3540 | call extern lj_func_newL_gc
3541 | // GCfuncL * returned in eax (RC).
3542 | mov BASE, L:RB->base
3544 | settp LFUNC:RC, LJ_TFUNC
3545 | mov [BASE+RA*8], LFUNC:RC
3549 /* -- Table ops --------------------------------------------------------- */
3552 | ins_AD // RA = dst, RD = hbits|asize
3554 | mov L:RB->base, BASE
3555 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3556 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3568 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3569 | // Table * returned in eax (RC).
3570 | mov BASE, L:RB->base
3572 | settp TAB:RC, LJ_TTAB
3573 | mov [BASE+RA*8], TAB:RC
3575 |3: // Turn 0x7ff into 0x801.
3580 | call extern lj_gc_step_fixtop // (lua_State *L)
3585 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3587 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3589 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3590 | mov L:RB->base, BASE
3593 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3594 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3595 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3596 | // Table * returned in eax (RC).
3597 | mov BASE, L:RB->base
3599 | settp TAB:RC, LJ_TTAB
3600 | mov [BASE+RA*8], TAB:RC
3604 | call extern lj_gc_step_fixtop // (lua_State *L)
3605 | movzx RDd, PC_RD // Need to reload RD.
3611 | ins_AND // RA = dst, RD = str const (~)
3612 | mov LFUNC:RB, [BASE-16]
3614 | mov TAB:RB, LFUNC:RB->env
3615 | mov STR:RC, [KBASE+RD*8]
3619 | ins_AND // RA = src, RD = str const (~)
3620 | mov LFUNC:RB, [BASE-16]
3622 | mov TAB:RB, LFUNC:RB->env
3623 | mov STR:RC, [KBASE+RD*8]
3628 | ins_ABC // RA = dst, RB = table, RC = key
3629 | mov TAB:RB, [BASE+RB*8]
3630 | mov RC, [BASE+RC*8]
3631 | checktab TAB:RB, ->vmeta_tgetv
3637 | // Convert number to int and back and compare.
3640 | cvttsd2si RCd, xmm0
3641 | cvtsi2sd xmm1, RCd
3642 | ucomisd xmm0, xmm1
3643 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3645 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3646 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3648 | add RC, TAB:RB->array
3649 | // Get array slot.
3651 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3654 | mov [BASE+RA*8], ITYPE
3657 |2: // Check for __index if table value is nil.
3658 | mov TAB:TMPR, TAB:RB->metatable
3659 | test TAB:TMPR, TAB:TMPR
3661 | test byte TAB:TMPR->nomm, 1<<MM_index
3662 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3666 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3671 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3672 | mov TAB:RB, [BASE+RB*8]
3674 | mov STR:RC, [KBASE+RC*8]
3675 | checktab TAB:RB, ->vmeta_tgets
3676 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3677 | mov TMPRd, TAB:RB->hmask
3678 | and TMPRd, STR:RC->hash
3680 | add NODE:TMPR, TAB:RB->node
3681 | settp ITYPE, STR:RC, LJ_TSTR
3683 | cmp NODE:TMPR->key, ITYPE
3685 | // Get node value.
3686 | mov ITYPE, NODE:TMPR->val
3687 | cmp ITYPE, LJ_TNIL
3688 | je >5 // Key found, but nil value?
3690 | mov [BASE+RA*8], ITYPE
3693 |4: // Follow hash chain.
3694 | mov NODE:TMPR, NODE:TMPR->next
3695 | test NODE:TMPR, NODE:TMPR
3697 | // End of hash chain: key not found, nil result.
3698 | mov ITYPE, LJ_TNIL
3700 |5: // Check for __index if table value is nil.
3701 | mov TAB:TMPR, TAB:RB->metatable
3702 | test TAB:TMPR, TAB:TMPR
3703 | jz <2 // No metatable: done.
3704 | test byte TAB:TMPR->nomm, 1<<MM_index
3705 | jnz <2 // 'no __index' flag set: done.
3706 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3709 | ins_ABC // RA = dst, RB = table, RC = byte literal
3710 | mov TAB:RB, [BASE+RB*8]
3711 | checktab TAB:RB, ->vmeta_tgetb
3712 | cmp RCd, TAB:RB->asize
3715 | add RC, TAB:RB->array
3716 | // Get array slot.
3718 | cmp ITYPE, LJ_TNIL
3721 | mov [BASE+RA*8], ITYPE
3724 |2: // Check for __index if table value is nil.
3725 | mov TAB:TMPR, TAB:RB->metatable
3726 | test TAB:TMPR, TAB:TMPR
3728 | test byte TAB:TMPR->nomm, 1<<MM_index
3729 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3733 | ins_ABC // RA = dst, RB = table, RC = key
3734 | mov TAB:RB, [BASE+RB*8]
3737 | mov RCd, dword [BASE+RC*8]
3739 | cvttsd2si RCd, qword [BASE+RC*8]
3741 | cmp RCd, TAB:RB->asize
3742 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3744 | add RC, TAB:RB->array
3745 | // Get array slot.
3749 | mov [BASE+RA*8], ITYPE
3754 | ins_ABC // RA = src, RB = table, RC = key
3755 | mov TAB:RB, [BASE+RB*8]
3756 | mov RC, [BASE+RC*8]
3757 | checktab TAB:RB, ->vmeta_tsetv
3763 | // Convert number to int and back and compare.
3766 | cvttsd2si RCd, xmm0
3767 | cvtsi2sd xmm1, RCd
3768 | ucomisd xmm0, xmm1
3769 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3771 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3774 | add RC, TAB:RB->array
3775 | cmp aword [RC], LJ_TNIL
3776 | je >3 // Previous value is nil?
3778 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3780 |2: // Set array slot.
3781 | mov RB, [BASE+RA*8]
3785 |3: // Check for __newindex if previous value is nil.
3786 | mov TAB:TMPR, TAB:RB->metatable
3787 | test TAB:TMPR, TAB:TMPR
3789 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3790 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3794 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3798 |7: // Possible table write barrier for the value. Skip valiswhite check.
3799 | barrierback TAB:RB, TMPR
3803 | ins_ABC // RA = src, RB = table, RC = str const (~)
3804 | mov TAB:RB, [BASE+RB*8]
3806 | mov STR:RC, [KBASE+RC*8]
3807 | checktab TAB:RB, ->vmeta_tsets
3808 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3809 | mov TMPRd, TAB:RB->hmask
3810 | and TMPRd, STR:RC->hash
3812 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3813 | add NODE:TMPR, TAB:RB->node
3814 | settp ITYPE, STR:RC, LJ_TSTR
3816 | cmp NODE:TMPR->key, ITYPE
3818 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3819 | cmp aword [TMPR], LJ_TNIL
3820 | je >4 // Previous value is nil?
3822 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3824 |3: // Set node value.
3825 | mov ITYPE, [BASE+RA*8]
3829 |4: // Check for __newindex if previous value is nil.
3830 | mov TAB:ITYPE, TAB:RB->metatable
3831 | test TAB:ITYPE, TAB:ITYPE
3833 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3834 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3837 |5: // Follow hash chain.
3838 | mov NODE:TMPR, NODE:TMPR->next
3839 | test NODE:TMPR, NODE:TMPR
3841 | // End of hash chain: key not found, add a new one.
3843 | // But check for __newindex first.
3844 | mov TAB:TMPR, TAB:RB->metatable
3845 | test TAB:TMPR, TAB:TMPR
3846 | jz >6 // No metatable: continue.
3847 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3848 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3851 | mov L:CARG1, SAVE_L
3852 | mov L:CARG1->base, BASE
3856 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3857 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3858 | mov L:CARG1, SAVE_L
3859 | mov BASE, L:CARG1->base
3862 | jmp <2 // Must check write barrier for value.
3864 |7: // Possible table write barrier for the value. Skip valiswhite check.
3865 | barrierback TAB:RB, ITYPE
3869 | ins_ABC // RA = src, RB = table, RC = byte literal
3870 | mov TAB:RB, [BASE+RB*8]
3871 | checktab TAB:RB, ->vmeta_tsetb
3872 | cmp RCd, TAB:RB->asize
3875 | add RC, TAB:RB->array
3876 | cmp aword [RC], LJ_TNIL
3877 | je >3 // Previous value is nil?
3879 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3881 |2: // Set array slot.
3882 | mov ITYPE, [BASE+RA*8]
3886 |3: // Check for __newindex if previous value is nil.
3887 | mov TAB:TMPR, TAB:RB->metatable
3888 | test TAB:TMPR, TAB:TMPR
3890 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3891 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3894 |7: // Possible table write barrier for the value. Skip valiswhite check.
3895 | barrierback TAB:RB, TMPR
3899 | ins_ABC // RA = src, RB = table, RC = key
3900 | mov TAB:RB, [BASE+RB*8]
3903 | mov RC, [BASE+RC*8]
3905 | cvttsd2si RCd, qword [BASE+RC*8]
3907 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3910 | cmp RCd, TAB:RB->asize
3913 | add RC, TAB:RB->array
3914 | // Set array slot.
3916 | mov ITYPE, [BASE+RA*8]
3920 |7: // Possible table write barrier for the value. Skip valiswhite check.
3921 | barrierback TAB:RB, TMPR
3926 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3928 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3929 | lea RA, [BASE+RA*8]
3930 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3932 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3937 | jz >4 // Nothing to copy?
3938 | add RDd, TMPRd // Compute needed size.
3939 | cmp RDd, TAB:RB->asize
3940 | ja >5 // Doesn't fit into array part?
3943 | add TMPR, TAB:RB->array
3944 |3: // Copy result slots to table.
3954 |5: // Need to resize array part.
3955 | mov L:CARG1, SAVE_L
3956 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3961 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3962 | mov BASE, L:RB->base
3963 | movzx RAd, PC_RA // Restore RA.
3964 | movzx RDd, PC_RD // Restore RD.
3967 |7: // Possible table write barrier for any value. Skip valiswhite check.
3968 | barrierback TAB:RB, RD
3972 /* -- Calls and vararg handling ----------------------------------------- */
3974 case BC_CALL: case BC_CALLM:
3975 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3976 if (op == BC_CALLM) {
3977 | add NARGS:RDd, MULTRES
3979 | mov LFUNC:RB, [BASE+RA*8]
3980 | checkfunc LFUNC:RB, ->vmeta_call_ra
3981 | lea BASE, [BASE+RA*8+16]
3986 | ins_AD // RA = base, RD = extra_nargs
3987 | add NARGS:RDd, MULTRES
3988 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3991 | ins_AD // RA = base, RD = nargs+1
3992 | lea RA, [BASE+RA*8+16]
3993 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3994 | mov LFUNC:RB, [RA-16]
3995 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3998 | test PCd, FRAME_TYPE
4001 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4002 | mov MULTRES, NARGS:RDd
4005 |2: // Move args down.
4013 | mov LFUNC:RB, [BASE-16]
4016 | mov NARGS:RDd, MULTRES
4017 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4022 |5: // Tailcall to a fast function.
4023 | test PCd, FRAME_TYPE // Lua frame below?
4027 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4028 | cleartp LFUNC:KBASE
4029 | mov KBASE, LFUNC:KBASE->pc
4030 | mov KBASE, [KBASE+PC2PROTO(k)]
4033 |7: // Tailcall from a vararg function.
4034 | sub PC, FRAME_VARG
4035 | test PCd, FRAME_TYPEP
4036 | jnz >8 // Vararg frame below?
4037 | sub BASE, PC // Need to relocate BASE/KBASE down.
4042 | add PCd, FRAME_VARG
4047 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4048 | lea RA, [BASE+RA*8+16] // fb = base+2
4049 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4050 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4053 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-1] = fb[-5]
4054 | mov [RA-16], LFUNC:RB
4055 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4056 | checkfunc LFUNC:RB, ->vmeta_call
4062 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4064 | // NYI: add hotloop, record BC_ITERN.
4066 | mov TAB:RB, [BASE+RA*8-16]
4068 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4069 | mov TMPRd, TAB:RB->asize
4071 | mov ITYPE, TAB:RB->array
4072 |1: // Traverse array part.
4073 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4074 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4076 | cvtsi2sd xmm0, RCd
4078 | // Copy array slot to returned value.
4079 | mov RB, [ITYPE+RC*8]
4080 | mov [BASE+RA*8+8], RB
4081 | // Return array index as a numeric key.
4084 | mov [BASE+RA*8], ITYPE
4086 | movsd qword [BASE+RA*8], xmm0
4089 | mov [BASE+RA*8-8], RCd // Update control var.
4091 | movzx RDd, PC_RD // Get target from ITERL.
4096 |4: // Skip holes in array part.
4100 |5: // Traverse hash part.
4103 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4104 | imul ITYPEd, RCd, #NODE
4105 | add NODE:ITYPE, TAB:RB->node
4106 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4107 | lea TMPRd, [RCd+TMPRd+1]
4108 | // Copy key and value from hash slot.
4109 | mov RB, NODE:ITYPE->key
4110 | mov RC, NODE:ITYPE->val
4111 | mov [BASE+RA*8], RB
4112 | mov [BASE+RA*8+8], RC
4113 | mov [BASE+RA*8-8], TMPRd
4116 |7: // Skip holes in hash part.
4122 | ins_AD // RA = base, RD = target (points to ITERN)
4123 | mov CFUNC:RB, [BASE+RA*8-24]
4124 | checkfunc CFUNC:RB, >5
4125 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4126 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4127 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4129 | mov64 TMPR, U64x(fffe7fff, 00000000)
4130 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4133 |5: // Despecialize bytecode if any of the checks fail.
4136 | mov byte [PC], BC_ITERC
4141 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4142 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4143 | lea RA, [BASE+RA*8]
4144 | sub TMPR, [BASE-8]
4145 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4147 | jz >5 // Copy all varargs?
4148 | lea RB, [RA+RB*8-8]
4149 | cmp TMPR, BASE // No vararg slots?
4151 |1: // Copy vararg slots to destination slots.
4156 | cmp RA, RB // All destination slots filled?
4158 | cmp TMPR, BASE // No more vararg slots?
4160 |2: // Fill up remainder with nil.
4161 | mov aword [RA], LJ_TNIL
4168 |5: // Copy all varargs.
4169 | mov MULTRES, 1 // MULTRES = 0+1
4172 | jbe <3 // No vararg slots?
4176 | mov MULTRES, RBd // MULTRES = #varargs+1
4179 | cmp RC, L:RB->maxstack
4180 | ja >7 // Need to grow stack?
4181 |6: // Copy all vararg slots.
4186 | cmp TMPR, BASE // No more vararg slots?
4190 |7: // Grow stack for varargs.
4191 | mov L:RB->base, BASE
4194 | sub TMPR, BASE // Need delta, because BASE may change.
4196 | mov CARG2d, MULTRES
4199 | call extern lj_state_growstack // (lua_State *L, int n)
4200 | mov BASE, L:RB->base
4201 | movsxd TMPR, TMP1hi
4207 /* -- Returns ----------------------------------------------------------- */
4210 | ins_AD // RA = results, RD = extra_nresults
4211 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4212 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4215 case BC_RET: case BC_RET0: case BC_RET1:
4216 | ins_AD // RA = results, RD = nresults+1
4217 if (op != BC_RET0) {
4222 | mov MULTRES, RDd // Save nresults+1.
4223 | test PCd, FRAME_TYPE // Check frame type marker.
4224 | jnz >7 // Not returning to a fixarg Lua func?
4228 | mov KBASE, BASE // Use KBASE for result move.
4231 |2: // Move results down.
4232 | mov RB, [KBASE+RA]
4233 | mov [KBASE-16], RB
4238 | mov RDd, MULTRES // Note: MULTRES may be >255.
4239 | movzx RBd, PC_RB // So cannot compare with RDL!
4241 | cmp RBd, RDd // More results expected?
4250 | cmp PC_RB, RDL // More results expected?
4257 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4258 | mov LFUNC:KBASE, [BASE-16]
4259 | cleartp LFUNC:KBASE
4260 | mov KBASE, LFUNC:KBASE->pc
4261 | mov KBASE, [KBASE+PC2PROTO(k)]
4264 |6: // Fill up results with nil.
4266 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4269 | mov aword [BASE+RD*8-24], LJ_TNIL
4274 |7: // Non-standard return case.
4275 | lea RB, [PC-FRAME_VARG]
4276 | test RBd, FRAME_TYPEP
4278 | // Return from vararg function: relocate BASE down and RA up.
4280 if (op != BC_RET0) {
4286 /* -- Loops and branches ------------------------------------------------ */
4288 |.define FOR_IDX, [RA]
4289 |.define FOR_STOP, [RA+8]
4290 |.define FOR_STEP, [RA+16]
4291 |.define FOR_EXT, [RA+24]
4297 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4307 vk = (op == BC_IFORL || op == BC_JFORL);
4308 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4309 | lea RA, [BASE+RA*8]
4313 | mov TMPR, FOR_STOP
4315 | checkint TMPR, ->vmeta_for
4316 | mov ITYPE, FOR_STEP
4317 | test ITYPEd, ITYPEd; js >5
4319 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4321 #ifdef LUA_USE_ASSERT
4322 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4323 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4325 | mov ITYPE, FOR_STEP
4326 | test ITYPEd, ITYPEd; js >5
4327 | add RBd, ITYPEd; jo >1
4333 if (op == BC_FORI) {
4338 } else if (op == BC_JFORI) {
4344 } else if (op == BC_IFORL) {
4357 |5: // Invert check for negative step.
4360 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4362 | add RBd, ITYPEd; jo <1
4368 if (op == BC_FORI) {
4370 } else if (op == BC_JFORI) {
4374 } else if (op == BC_IFORL) {
4380 |9: // Fallback to FP variant.
4385 | checknumtp FOR_IDX, ->vmeta_for
4388 | checknumtp FOR_STOP, ->vmeta_for
4390 #ifdef LUA_USE_ASSERT
4391 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4392 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4397 | checknum RB, ->vmeta_for
4399 | movsd xmm0, qword FOR_IDX
4400 | movsd xmm1, qword FOR_STOP
4402 | addsd xmm0, qword FOR_STEP
4403 | movsd qword FOR_IDX, xmm0
4404 | test RB, RB; js >3
4408 | ucomisd xmm1, xmm0
4410 | movsd qword FOR_EXT, xmm0
4411 if (op == BC_FORI) {
4418 } else if (op == BC_JFORI) {
4422 } else if (op == BC_IFORL) {
4439 |3: // Invert comparison if step is negative.
4440 | ucomisd xmm0, xmm1
4448 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4456 | ins_AJ // RA = base, RD = target
4457 | lea RA, [BASE+RA*8]
4459 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4460 if (op == BC_JITERL) {
4464 | branchPC RD // Otherwise save control var + branch.
4472 | ins_A // RA = base, RD = target (loop extent)
4473 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4474 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4478 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4482 | ins_A // RA = base, RD = target (loop extent)
4488 | ins_AD // RA = base (ignored), RD = traceno
4489 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4490 | mov TRACE:RD, [RA+RD*8]
4491 | mov RD, TRACE:RD->mcode
4493 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4494 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4495 | // Save additional callee-save registers only used in compiled code.
4502 | sub rsp, 10*16+4*8
4503 | movdqa [RA-1*16], xmm6
4504 | movdqa [RA-2*16], xmm7
4505 | movdqa [RA-3*16], xmm8
4506 | movdqa [RA-4*16], xmm9
4507 | movdqa [RA-5*16], xmm10
4508 | movdqa [RA-6*16], xmm11
4509 | movdqa [RA-7*16], xmm12
4510 | movdqa [RA-8*16], xmm13
4511 | movdqa [RA-9*16], xmm14
4512 | movdqa [RA-10*16], xmm15
4523 | ins_AJ // RA = unused, RD = target
4528 /* -- Function headers -------------------------------------------------- */
4531 ** Reminder: A function may be called with func/args above L->maxstack,
4532 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4533 ** too. This means all FUNC* ops (including fast functions) must check
4534 ** for stack overflow _before_ adding more slots!
4541 case BC_FUNCV: /* NYI: compiled vararg functions. */
4542 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4550 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4551 | mov KBASE, [PC-4+PC2PROTO(k)]
4553 | lea RA, [BASE+RA*8] // Top of frame.
4554 | cmp RA, L:RB->maxstack
4555 | ja ->vm_growstack_f
4556 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4557 | cmp NARGS:RDd, RAd // Check for missing parameters.
4560 if (op == BC_JFUNCF) {
4567 |3: // Clear missing parameters.
4568 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4570 | cmp NARGS:RDd, RAd
4579 | int3 // NYI: compiled vararg functions
4580 break; /* NYI: compiled vararg functions. */
4583 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4584 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4585 | lea RD, [BASE+NARGS:RD*8+8]
4586 | mov LFUNC:KBASE, [BASE-16]
4587 | mov [RD-8], RB // Store delta + FRAME_VARG.
4588 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4591 | cmp RA, L:RB->maxstack
4592 | ja ->vm_growstack_v // Need to grow stack.
4595 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4599 |1: // Copy fixarg slots up to new frame.
4602 | jnb >3 // Less args than parameters?
4603 | mov KBASE, [RA-16]
4606 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4610 if (op == BC_JFUNCV) {
4614 | mov KBASE, [PC-4+PC2PROTO(k)]
4618 |3: // Clear missing parameters.
4619 | mov aword [RD], LJ_TNIL
4628 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4629 | mov CFUNC:RB, [BASE-16]
4631 | mov KBASE, CFUNC:RB->f
4633 | lea RD, [BASE+NARGS:RD*8-8]
4634 | mov L:RB->base, BASE
4635 | lea RA, [RD+8*LUA_MINSTACK]
4636 | cmp RA, L:RB->maxstack
4638 if (op == BC_FUNCC) {
4639 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4642 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4644 | ja ->vm_growstack_c // Need to grow stack.
4646 if (op == BC_FUNCC) {
4647 | call KBASE // (lua_State *L)
4649 | // (lua_State *L, lua_CFunction f)
4650 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4652 | // nresults returned in eax (RD).
4653 | mov BASE, L:RB->base
4654 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4655 | set_vmstate INTERP
4656 | lea RA, [BASE+RD*8]
4658 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4659 | mov PC, [BASE-8] // Fetch PC of caller.
4663 /* ---------------------------------------------------------------------- */
4666 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4672 static int build_backend(BuildCtx *ctx)
4675 dasm_growpc(Dst, BC__MAX);
4676 build_subroutines(ctx);
4678 for (op = 0; op < BC__MAX; op++)
4679 build_ins(ctx, (BCOp)op, op);
4683 /* Emit pseudo frame-info for all assembler functions. */
4684 static void emit_asm_debug(BuildCtx *ctx)
4686 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4687 switch (ctx->mode) {
4689 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4692 "\t.long .LECIE0-.LSCIE0\n"
4694 "\t.long 0xffffffff\n"
4700 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4701 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4706 "\t.long .LEFDE0-.LASFDE0\n"
4708 "\t.long .Lframe0\n"
4711 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4712 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4713 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4714 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4715 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4717 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4718 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4721 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4725 "\t.long .LEFDE1-.LASFDE1\n"
4727 "\t.long .Lframe0\n"
4728 "\t.quad lj_vm_ffi_call\n"
4730 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4731 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4732 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4733 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4735 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4738 #if (defined(__sun__) && defined(__svr4__))
4739 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4741 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4745 "\t.long .LECIE1-.LSCIE1\n"
4749 "\t.string \"zPR\"\n"
4753 "\t.uleb128 6\n" /* augmentation length */
4754 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4755 "\t.long lj_err_unwind_dwarf-.\n"
4756 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4757 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4758 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4763 "\t.long .LEFDE2-.LASFDE2\n"
4765 "\t.long .LASFDE2-.Lframe1\n"
4766 "\t.long .Lbegin-.\n"
4768 "\t.uleb128 0\n" /* augmentation length */
4769 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4770 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4771 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4772 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4773 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4775 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4779 "\t.long .LECIE2-.LSCIE2\n"
4783 "\t.string \"zR\"\n"
4787 "\t.uleb128 1\n" /* augmentation length */
4788 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4789 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4790 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4795 "\t.long .LEFDE3-.LASFDE3\n"
4797 "\t.long .LASFDE3-.Lframe2\n"
4798 "\t.long lj_vm_ffi_call-.\n"
4800 "\t.uleb128 0\n" /* augmentation length */
4801 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4802 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4803 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4804 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4806 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4811 /* Mental note: never let Apple design an assembler.
4812 ** Or a linker. Or a plastic case. But I digress.
4814 case BUILD_machasm: {
4819 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4822 "\t.set L$set$x,LECIEX-LSCIEX\n"
4827 "\t.ascii \"zPR\\0\"\n"
4831 "\t.byte 6\n" /* augmentation length */
4832 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4833 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4834 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4835 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4836 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4839 for (i = 0; i < ctx->nsym; i++) {
4840 const char *name = ctx->sym[i].name;
4841 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4842 if (size == 0) continue;
4844 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4849 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4850 "\t.long L$set$%d\n"
4852 "\t.long LASFDE%d-EH_frame1\n"
4855 "\t.byte 0\n" /* augmentation length */
4856 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4857 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4858 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4859 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4860 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4863 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4869 "\t.set L$set$y,LECIEY-LSCIEY\n"
4874 "\t.ascii \"zR\\0\"\n"
4878 "\t.byte 1\n" /* augmentation length */
4879 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4880 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4881 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4885 "_lj_vm_ffi_call.eh:\n"
4887 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4888 "\t.long L$set$yy\n"
4890 "\t.long LASFDEY-EH_frame2\n"
4891 "\t.long _lj_vm_ffi_call-.\n"
4893 "\t.byte 0\n" /* augmentation length */
4894 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4895 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4896 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4897 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4899 "LEFDEY:\n\n", fcsize);
4902 fprintf(ctx->fp, ".subsections_via_symbols\n");
4906 default: /* Difficult for other modes. */