1 |// Low-level VM code for x86 CPUs.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h
10 |.section code_op, code_sub
12 |.actionlist build_actionlist
14 |.globalnames globnames
15 |.externnames extnames
17 |//-----------------------------------------------------------------------
19 |// Fixed register assignments for the interpreter.
20 |// This is very fragile and has many dependencies. Caveat emptor.
21 |.define BASE, edx // Not C callee-save, refetched anyway.
23 |.define KBASE, edi // Must be C callee-save.
24 |.define KBASEa, KBASE
25 |.define PC, esi // Must be C callee-save.
27 |.define DISPATCH, ebx // Must be C callee-save.
29 |.define KBASE, edi // Must be C callee-save.
31 |.define PC, esi // Must be C callee-save.
33 |.define DISPATCH, ebx // Must be C callee-save.
35 |.define KBASE, r15d // Must be C callee-save.
37 |.define PC, ebx // Must be C callee-save.
39 |.define DISPATCH, r14d // Must be C callee-save.
45 |.define RB, ebp // Must be ebp (C callee-save).
46 |.define RC, eax // Must be eax (fcomparepp and others).
67 |.define FCARG1, ecx // x86 fastcall arguments.
70 |.define CARG1, rcx // x64/WIN64 C call arguments.
78 |.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
79 |.define FCARG2, CARG2d
81 |.define CARG1, rdi // x64/POSIX C call arguments.
93 |.define FCARG1, CARG1d // Simulate x86 fastcall.
94 |.define FCARG2, CARG2d
97 |// Type definitions. Some of these are only used for documentation.
99 |.type GL, global_State
100 |.type TVALUE, TValue
104 |.type LFUNC, GCfuncL
105 |.type CFUNC, GCfuncC
106 |.type PROTO, GCproto
107 |.type UPVAL, GCupval
111 |.type EXITINFO, ExitInfo
113 |// Stack layout while in interpreter. Must match with lj_frame.h.
114 |//-----------------------------------------------------------------------
115 |.if not X64 // x86 stack layout.
117 |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
119 | push ebp; push edi; push esi; push ebx
120 | sub esp, CFRAME_SPACE
123 | add esp, CFRAME_SPACE
124 | pop ebx; pop esi; pop edi; pop ebp
127 |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
128 |.define SAVE_NRES, aword [esp+aword*14]
129 |.define SAVE_CFRAME, aword [esp+aword*13]
130 |.define SAVE_L, aword [esp+aword*12]
131 |//----- 16 byte aligned, ^^^ arguments from C caller
132 |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
133 |.define SAVE_R4, aword [esp+aword*10]
134 |.define SAVE_R3, aword [esp+aword*9]
135 |.define SAVE_R2, aword [esp+aword*8]
136 |//----- 16 byte aligned
137 |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
138 |.define SAVE_PC, aword [esp+aword*6]
139 |.define TMP2, aword [esp+aword*5]
140 |.define TMP1, aword [esp+aword*4]
141 |//----- 16 byte aligned
142 |.define ARG4, aword [esp+aword*3]
143 |.define ARG3, aword [esp+aword*2]
144 |.define ARG2, aword [esp+aword*1]
145 |.define ARG1, aword [esp] //<-- esp while in interpreter.
146 |//----- 16 byte aligned, ^^^ arguments for C callee
148 |// FPARGx overlaps ARGx and ARG(x+1) on x86.
149 |.define FPARG3, qword [esp+qword*1]
150 |.define FPARG1, qword [esp]
151 |// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
152 |.define TMPQ, qword [esp+aword*4]
156 |.define MULTRES, TMP2
158 |// Arguments for vm_call and vm_pcall.
159 |.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
161 |// Arguments for vm_cpcall.
162 |.define INARG_CP_CALL, SAVE_ERRF
163 |.define INARG_CP_UD, SAVE_NRES
164 |.define INARG_CP_FUNC, SAVE_CFRAME
166 |//-----------------------------------------------------------------------
167 |.elif X64WIN // x64/Windows stack layout
169 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
171 | push rbp; push rdi; push rsi; push rbx
172 | sub rsp, CFRAME_SPACE
175 | add rsp, CFRAME_SPACE
176 | pop rbx; pop rsi; pop rdi; pop rbp
179 |.define SAVE_CFRAME, aword [rsp+aword*13]
180 |.define SAVE_PC, dword [rsp+dword*25]
181 |.define SAVE_L, dword [rsp+dword*24]
182 |.define SAVE_ERRF, dword [rsp+dword*23]
183 |.define SAVE_NRES, dword [rsp+dword*22]
184 |.define TMP2, dword [rsp+dword*21]
185 |.define TMP1, dword [rsp+dword*20]
186 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
187 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
188 |.define SAVE_R4, aword [rsp+aword*8]
189 |.define SAVE_R3, aword [rsp+aword*7]
190 |.define SAVE_R2, aword [rsp+aword*6]
191 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
192 |.define ARG5, aword [rsp+aword*4]
193 |.define CSAVE_4, aword [rsp+aword*3]
194 |.define CSAVE_3, aword [rsp+aword*2]
195 |.define CSAVE_2, aword [rsp+aword*1]
196 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
197 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
199 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
200 |.define TMPQ, qword [rsp+aword*10]
201 |.define MULTRES, TMP2
203 |.define ARG5d, dword [rsp+aword*4]
206 |//-----------------------------------------------------------------------
207 |.else // x64/POSIX stack layout
209 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
211 | push rbp; push rbx; push r15; push r14
212 | sub rsp, CFRAME_SPACE
215 | add rsp, CFRAME_SPACE
216 | pop r14; pop r15; pop rbx; pop rbp
219 |//----- 16 byte aligned,
220 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
221 |.define SAVE_R4, aword [rsp+aword*8]
222 |.define SAVE_R3, aword [rsp+aword*7]
223 |.define SAVE_R2, aword [rsp+aword*6]
224 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
225 |.define SAVE_CFRAME, aword [rsp+aword*4]
226 |.define TMPa, aword [rsp+aword*3]
227 |//----- ^^^ awords above, vvv dwords below
228 |.define SAVE_PC, dword [rsp+dword*5]
229 |.define SAVE_L, dword [rsp+dword*4]
230 |.define SAVE_ERRF, dword [rsp+dword*3]
231 |.define SAVE_NRES, dword [rsp+dword*2]
232 |.define TMP2, dword [rsp+dword*1]
233 |.define TMP1, dword [rsp] //<-- rsp while in interpreter.
234 |//----- 16 byte aligned
236 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
237 |.define TMPQ, qword [rsp]
238 |.define TMP3, dword [rsp+aword*3]
239 |.define MULTRES, TMP2
243 |//-----------------------------------------------------------------------
245 |// Instruction headers.
246 |.macro ins_A; .endmacro
247 |.macro ins_AD; .endmacro
248 |.macro ins_AJ; .endmacro
249 |.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
250 |.macro ins_AB_; movzx RB, RCH; .endmacro
251 |.macro ins_A_C; movzx RC, RCL; .endmacro
252 |.macro ins_AND; not RDa; .endmacro
254 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
262 | jmp aword [DISPATCH+OP*8]
264 | jmp aword [DISPATCH+OP*4]
268 |// Instruction footer.
270 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
271 | .define ins_next, ins_NEXT
272 | .define ins_next_, ins_NEXT
274 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
275 | // Affects only certain kinds of benchmarks (and only with -j off).
276 | // Around 10%-30% slower on Core2, a lot more slower on P4.
286 |// Call decode and dispatch.
288 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
289 | mov PC, LFUNC:RB->pc
295 | jmp aword [DISPATCH+OP*8]
297 | jmp aword [DISPATCH+OP*4]
302 | // BASE = new base, RB = LFUNC, RD = nargs+1
307 |//-----------------------------------------------------------------------
309 |// Macros to test operand types.
310 |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
311 |.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro
312 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
313 |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
315 |// These operands must be used with movzx.
316 |.define PC_OP, byte [PC-4]
317 |.define PC_RA, byte [PC-3]
318 |.define PC_RB, byte [PC-1]
319 |.define PC_RC, byte [PC-2]
320 |.define PC_RD, word [PC-2]
322 |.macro branchPC, reg
323 | lea PC, [PC+reg*4-BCBIAS_J*4]
326 |// Assumes DISPATCH is relative to GL.
327 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
328 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
330 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
332 |// Decrement hashed hotcount and trigger trace recorder if zero.
336 | and reg, HOTCOUNT_PCMASK
337 | sub word [DISPATCH+reg+GG_DISP2HOT], 1
344 | and reg, HOTCOUNT_PCMASK
345 | sub word [DISPATCH+reg+GG_DISP2HOT], 1
349 |// Set current VM state.
350 |.macro set_vmstate, st
351 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
354 |// Annoying x87 stuff: support for two compare variants.
355 |.macro fcomparepp // Compare and pop st0 >< st1.
361 | fnstsw ax // eax modified!
366 |.macro fdup; fld st0; .endmacro
367 |.macro fpop1; fstp st1; .endmacro
369 |// Synthesize SSE FP constants.
370 |.macro sseconst_abs, reg, tmp // Synthesize abs mask.
372 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
374 | pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
378 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
380 | mov64 tmp, U64x(val,00000000); movd reg, tmp
382 | mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
386 |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
387 | sseconst_hi reg, tmp, 80000000
389 |.macro sseconst_1, reg, tmp // Synthesize 1.0.
390 | sseconst_hi reg, tmp, 3ff00000
392 |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
393 | sseconst_hi reg, tmp, bff00000
395 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
396 | sseconst_hi reg, tmp, 43300000
398 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
399 | sseconst_hi reg, tmp, 43380000
402 |// Move table write barrier back. Overwrites reg.
403 |.macro barrierback, tab, reg
404 | and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab)
405 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
406 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
407 | mov tab->gclist, reg
410 |//-----------------------------------------------------------------------
412 /* Generate subroutines used by opcodes and other parts of the VM. */
413 /* The .code_sub section should be last to help static branch prediction. */
414 static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
418 |//-----------------------------------------------------------------------
419 |//-- Return handling ----------------------------------------------------
420 |//-----------------------------------------------------------------------
426 | // Return from pcall or xpcall fast func.
428 | sub BASE, PC // Restore caller base.
429 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
430 | mov PC, [BASE-4] // Fetch PC of previous frame.
431 | // Prepending may overwrite the pcall frame, so do it at the end.
432 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
435 | add RD, 1 // RD = nresults+1
437 | test PC, FRAME_TYPE
438 | jz ->BC_RET_Z // Handle regular return to Lua.
441 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
449 | neg PC // Previous base = BASE - delta.
454 | mov RB, [BASE+RA] // Move results down.
456 | mov RB, [BASE+RA+4]
466 | mov RA, SAVE_NRES // RA = wanted nresults+1
469 | jne >6 // More/less results wanted?
472 | mov L:RB->top, BASE
475 | mov RAa, SAVE_CFRAME // Restore previous C frame.
476 | mov L:RB->cframe, RAa
477 | xor eax, eax // Ok return status for vm_pcall.
484 | jb >7 // Less results wanted?
485 | // More results wanted. Check stack size and fill up results with nil.
486 | cmp BASE, L:RB->maxstack
488 | mov dword [BASE-4], LJ_TNIL
493 |7: // Less results wanted.
495 | jz <5 // But check for LUA_MULTRET+1.
496 | sub RA, RD // Negative result!
497 | lea BASE, [BASE+RA*8] // Correct top.
500 |8: // Corner case: need to grow stack for filling up results.
501 | // This can happen if:
502 | // - A C function grows the stack (a lot).
503 | // - The GC shrinks the stack in between.
504 | // - A return back from a lua_call() with (high) nresults adjustment.
505 | mov L:RB->top, BASE // Save current top held in BASE (yes).
506 | mov MULTRES, RD // Need to fill only remainder with nil.
509 | call extern lj_state_growstack@8 // (lua_State *L, int n)
510 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
513 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
514 | // (void *cframe, int errcode)
516 | mov eax, CARG2d // Error return status for vm_pcall.
519 | mov eax, FCARG2 // Error return status for vm_pcall.
522 |->vm_unwind_c_eh: // Landing pad for external unwinder.
524 | mov GL:RB, L:RB->glref
525 | mov dword GL:RB->vmstate, ~LJ_VMST_C
528 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
531 | and CARG1, CFRAME_RAWMASK
534 | and FCARG1, CFRAME_RAWMASK
537 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
539 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
540 | mov RD, 1+1 // Really 1+2 results, incr. later.
541 | mov BASE, L:RB->base
542 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
543 | add DISPATCH, GG_G2DISP
544 | mov PC, [BASE-4] // Fetch PC of previous frame.
545 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
547 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
549 |//-----------------------------------------------------------------------
550 |//-- Grow stack for calls -----------------------------------------------
551 |//-----------------------------------------------------------------------
553 |->vm_growstack_c: // Grow stack for C function.
554 | mov FCARG2, LUA_MINSTACK
557 |->vm_growstack_v: // Grow stack for vararg Lua function.
561 |->vm_growstack_f: // Grow stack for fixarg Lua function.
562 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
563 | lea RD, [BASE+NARGS:RD*8-8]
565 | movzx RA, byte [PC-4+PC2PROTO(framesize)]
566 | add PC, 4 // Must point after first instruction.
567 | mov L:RB->base, BASE
572 | // RB = L, L->base = new base, L->top = top
574 | call extern lj_state_growstack@8 // (lua_State *L, int n)
575 | mov BASE, L:RB->base
577 | mov LFUNC:RB, [BASE-8]
582 | // BASE = new base, RB = LFUNC, RD = nargs+1, PC restored.
583 | ins_callt // Just retry the call.
585 |//-----------------------------------------------------------------------
586 |//-- Entry points into the assembler VM ---------------------------------
587 |//-----------------------------------------------------------------------
589 |->vm_resume: // Setup C frame and resume thread.
590 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
593 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
598 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
602 | lea KBASEa, [esp+CFRAME_RESUME]
603 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
604 | add DISPATCH, GG_G2DISP
605 | mov L:RB->cframe, KBASEa
606 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
607 | mov SAVE_CFRAME, RDa
612 | cmp byte L:RB->status, RDL
613 | je >3 // Initial resume (like a call).
615 | // Resume after yield (like a return).
617 | mov byte L:RB->status, RDL
618 | mov BASE, L:RB->base
622 | add RD, 1 // RD = nresults+1
623 | sub RA, BASE // RA = resultofs
626 | test PC, FRAME_TYPE
630 |->vm_pcall: // Setup protected C frame and enter VM.
631 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
635 | mov SAVE_ERRF, CARG4d
639 |->vm_call: // Setup C frame and enter VM.
640 | // (lua_State *L, TValue *base, int nres1)
644 |1: // Entry point for vm_pcall above (PC = ftype).
646 | mov SAVE_NRES, CARG3d
647 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
652 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
655 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
656 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
657 | mov SAVE_CFRAME, KBASEa
658 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
660 | mov L:RB->cframe, rsp
662 | mov L:RB->cframe, esp
665 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
666 | add DISPATCH, GG_G2DISP
668 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
670 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
672 | sub PC, BASE // PC = frame delta + frame type
677 | add NARGS:RD, 1 // RD = nargs+1
680 | mov LFUNC:RB, [RA-8]
681 | cmp dword [RA-4], LJ_TFUNC
682 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
684 |->vm_call_dispatch_f:
687 | // BASE = new base, RD = nargs+1
689 |->vm_cpcall: // Setup protected C frame, call C.
690 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
693 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
697 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
698 | mov RC, INARG_CP_UD // Get args before they are overwritten.
699 | mov RA, INARG_CP_FUNC
700 | mov BASE, INARG_CP_CALL
702 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
704 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
705 | sub KBASE, L:RB->top
706 | mov SAVE_ERRF, 0 // No error function.
707 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
708 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
711 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
712 | mov SAVE_CFRAME, KBASEa
713 | mov L:RB->cframe, rsp
715 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
717 | mov ARG3, RC // Have to copy args downwards.
721 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
722 | mov SAVE_CFRAME, KBASE
723 | mov L:RB->cframe, esp
725 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
727 | // TValue * (new base) or NULL returned in eax (RC).
729 | jz ->vm_leave_cp // No base? Just remove C frame.
732 | jmp <2 // Else continue with the call.
734 |//-----------------------------------------------------------------------
735 |//-- Metamethod handling ------------------------------------------------
736 |//-----------------------------------------------------------------------
738 |//-- Continuation dispatch ----------------------------------------------
741 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
745 | sub BASE, PC // Restore caller BASE.
746 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
747 | mov RC, RA // ... in [RC]
748 | mov PC, [RB-12] // Restore PC from [cont|PC].
750 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
751 | lea KBASEa, qword [=>0]
754 | mov RA, dword [RB-16]
756 | mov LFUNC:KBASE, [BASE-8]
757 | mov KBASE, LFUNC:KBASE->pc
758 | mov KBASE, [KBASE+PC2PROTO(k)]
759 | // BASE = base, RC = result, RB = meta base
760 | jmp RAa // Jump to continuation.
762 |->cont_cat: // BASE = base, RC = result, RB = mbase
765 | lea RA, [BASE+RA*8]
772 | mov L:CARG1d, SAVE_L
773 | mov L:CARG1d->base, BASE
780 | mov L:CARG1d, SAVE_L
781 | mov L:CARG1d->base, BASE
798 |//-- Table indexing metamethods -----------------------------------------
801 | mov TMP1, RC // RC = GCstr *
803 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
806 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
807 | mov [RA], TAB:RB // RB = GCtab *
808 | mov dword [RA+4], LJ_TTAB
824 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
828 | movzx RC, PC_RC // Reload TValue *k from RC.
829 | lea RC, [BASE+RC*8]
831 | movzx RB, PC_RB // Reload TValue *t from RB.
832 | lea RB, [BASE+RB*8]
835 | mov L:CARG1d, SAVE_L
836 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
838 | mov CARG3, RCa // May be 64 bit ptr to stack.
845 | mov L:RB->base, BASE
848 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
849 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
850 | mov BASE, L:RB->base
853 |->cont_ra: // BASE = base, RC = result
857 | mov [BASE+RA*8+4], RB
858 | mov [BASE+RA*8], RC
861 |3: // Call __index metamethod.
862 | // BASE = base, L->top = new base, stack = cont/func/t/k
864 | mov [RA-12], PC // [cont|PC]
865 | lea PC, [RA+FRAME_CONT]
867 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
868 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
869 | jmp ->vm_call_dispatch_f
871 |//-----------------------------------------------------------------------
874 | mov TMP1, RC // RC = GCstr *
876 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
879 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
880 | mov [RA], TAB:RB // RB = GCtab *
881 | mov dword [RA+4], LJ_TTAB
897 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
901 | movzx RC, PC_RC // Reload TValue *k from RC.
902 | lea RC, [BASE+RC*8]
904 | movzx RB, PC_RB // Reload TValue *t from RB.
905 | lea RB, [BASE+RB*8]
908 | mov L:CARG1d, SAVE_L
909 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
911 | mov CARG3, RCa // May be 64 bit ptr to stack.
918 | mov L:RB->base, BASE
921 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
922 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
923 | mov BASE, L:RB->base
926 | // NOBARRIER: lj_meta_tset ensures the table is not black.
928 | mov RB, [BASE+RA*8+4]
929 | mov RA, [BASE+RA*8]
932 |->cont_nop: // BASE = base, (RC = result)
935 |3: // Call __newindex metamethod.
936 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
938 | mov [RA-12], PC // [cont|PC]
940 | mov RB, [BASE+RC*8+4] // Copy value to third argument.
941 | mov RC, [BASE+RC*8]
944 | lea PC, [RA+FRAME_CONT]
946 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
947 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
948 | jmp ->vm_call_dispatch_f
950 |//-- Comparison metamethods ---------------------------------------------
955 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
957 | lea CARG3d, [BASE+RD*8]
958 | lea CARG2d, [BASE+RA*8]
960 | lea CARG2d, [BASE+RA*8]
961 | lea CARG3d, [BASE+RD*8]
963 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
964 | movzx CARG4d, PC_OP
967 | lea RD, [BASE+RD*8]
968 | lea RA, [BASE+RA*8]
974 | mov L:RB->base, BASE
977 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
978 | // 0/1 or TValue * (metamethod) returned in eax (RC).
980 | mov BASE, L:RB->base
992 |->cont_condt: // BASE = base, RC = result
994 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
998 |->cont_condf: // BASE = base, RC = result
999 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
1008 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1010 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1013 | mov CARG4d, RB // Caveat: CARG4d == RA.
1015 | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
1024 | mov L:RB->base, BASE
1027 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
1028 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1031 |//-- Arithmetic metamethods ---------------------------------------------
1034 | lea RC, [KBASE+RC*8]
1038 | lea RC, [KBASE+RC*8]
1039 | lea RB, [BASE+RB*8]
1044 | lea RC, [BASE+RD*8]
1049 | lea RC, [BASE+RC*8]
1051 | lea RB, [BASE+RB*8]
1053 | lea RA, [BASE+RA*8]
1060 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1062 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1064 | movzx CARG5d, PC_OP
1066 | mov CARG4d, RC // Caveat: CARG4d == RA.
1067 | mov L:CARG1d, SAVE_L
1068 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
1070 | mov L:RB, L:CARG1d
1079 | mov L:RB->base, BASE
1082 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1083 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1084 | mov BASE, L:RB->base
1088 | // Call metamethod for binary op.
1090 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1093 | mov [RA-12], PC // [cont|PC]
1094 | lea PC, [RC+FRAME_CONT]
1095 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
1096 | jmp ->vm_call_dispatch
1100 | mov L:RB->base, BASE
1101 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
1102 | mov L:FCARG1, L:RB
1104 | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
1105 | // TValue * (metamethod) returned in eax (RC).
1106 | mov BASE, L:RB->base
1107 | jmp ->vmeta_binop // Binop call for compatibility.
1109 |//-- Call metamethod ----------------------------------------------------
1112 | lea RA, [BASE+RA*8+8]
1113 |->vmeta_call: // Resolve and call __call metamethod.
1114 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1115 | mov TMP2, RA // Save RA, RC for us.
1116 | mov TMP1, NARGS:RD
1120 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1122 | lea CARG3d, [RA+NARGS:RD*8]
1123 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
1125 | lea RC, [RA+NARGS:RD*8]
1130 | mov L:RB->base, BASE // This is the callers base!
1133 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1134 | mov BASE, L:RB->base
1136 | mov NARGS:RD, TMP1
1137 | mov LFUNC:RB, [RA-8]
1139 | // This is fragile. L->base must not move, KBASE must always be defined.
1140 | cmp KBASE, BASE // Continue with CALLT if flag set.
1143 | ins_call // Otherwise call resolved metamethod.
1145 |//-- Argument coercion for 'for' statement ------------------------------
1149 | mov L:RB->base, BASE
1150 | mov FCARG2, RA // Caveat: FCARG2 == BASE
1151 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
1153 | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
1154 | mov BASE, L:RB->base
1160 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1162 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
1165 |//-----------------------------------------------------------------------
1166 |//-- Fast functions -----------------------------------------------------
1167 |//-----------------------------------------------------------------------
1169 |.macro .ffunc, name
1173 |.macro .ffunc_1, name
1175 | cmp NARGS:RD, 1+1; jb ->fff_fallback
1178 |.macro .ffunc_2, name
1180 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1183 |.macro .ffunc_n, name
1185 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1189 |.macro .ffunc_n, name, op
1191 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1196 |.macro .ffunc_nsse, name, op
1198 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1199 | op xmm0, qword [BASE]
1202 |.macro .ffunc_nsse, name
1203 | .ffunc_nsse name, movsd
1206 |.macro .ffunc_nn, name
1208 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1209 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1211 | fld qword [BASE+8]
1214 |.macro .ffunc_nnsse, name
1216 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1217 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1218 | movsd xmm0, qword [BASE]
1219 | movsd xmm1, qword [BASE+8]
1222 |.macro .ffunc_nnr, name
1224 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1225 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1226 | fld qword [BASE+8]
1230 |// Inlined GC threshold check. Caveat: uses label 1.
1232 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1233 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1239 |//-- Base library: checks -----------------------------------------------
1243 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
1272 | jbe >1; mov RC, RB; 1:
1274 | mov CFUNC:RB, [BASE-8]
1275 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1277 | mov dword [BASE-4], LJ_TSTR
1278 | mov [BASE-8], STR:RC
1281 |//-- Base library: getters and setters ---------------------------------
1283 |.ffunc_1 getmetatable
1286 | cmp RB, LJ_TTAB; jne >6
1287 |1: // Field metatable must be at same offset for GCtab and GCudata!
1288 | mov TAB:RB, [BASE]
1289 | mov TAB:RB, TAB:RB->metatable
1291 | test TAB:RB, TAB:RB
1292 | mov dword [BASE-4], LJ_TNIL
1294 | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
1295 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1296 | mov [BASE-8], TAB:RB
1297 | mov RA, TAB:RB->hmask
1298 | and RA, STR:RC->hash
1300 | add NODE:RA, TAB:RB->node
1301 |3: // Rearranged logic, because we expect _not_ to find the key.
1302 | cmp dword NODE:RA->key.it, LJ_TSTR
1304 | cmp dword NODE:RA->key.gcr, STR:RC
1307 | mov NODE:RA, NODE:RA->next
1308 | test NODE:RA, NODE:RA
1310 | jmp ->fff_res1 // Not found, keep default result.
1313 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1315 | mov [BASE-4], RB // Return value of mt.__metatable.
1320 | cmp RB, LJ_TUDATA; je <1
1321 | cmp RB, LJ_TISNUM; ja >7
1325 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1328 |.ffunc_2 setmetatable
1329 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1330 | // Fast path: no mt for table yet and not clearing the mt.
1331 | mov TAB:RB, [BASE]
1332 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1333 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
1334 | mov TAB:RC, [BASE+8]
1335 | mov TAB:RB->metatable, TAB:RC
1337 | mov dword [BASE-4], LJ_TTAB // Return original table.
1338 | mov [BASE-8], TAB:RB
1339 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1341 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1342 | barrierback TAB:RB, RC
1347 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1349 | mov RB, BASE // Save BASE.
1350 | lea CARG3d, [BASE+8]
1351 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1352 | mov CARG1d, SAVE_L
1354 | mov RB, BASE // Save BASE.
1355 | mov CARG2d, [BASE]
1356 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1357 | mov CARG1d, SAVE_L
1359 | mov TAB:RD, [BASE]
1363 | mov RB, BASE // Save BASE.
1367 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1368 | // cTValue * returned in eax (RD).
1369 | mov BASE, RB // Restore BASE.
1370 | mov RB, [RD] // Copy table slot.
1377 |//-- Base library: conversions ------------------------------------------
1380 | // Only handles the number case inline (without a base argument).
1381 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1382 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1384 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1386 | fld qword [BASE]; jmp ->fff_resn
1390 | // Only handles the string or number case inline.
1392 | cmp dword [BASE+4], LJ_TSTR; jne >3
1393 | // A __tostring method in the string base metatable is ignored.
1394 | mov STR:RD, [BASE]
1396 | mov dword [BASE-4], LJ_TSTR
1397 | mov [BASE-8], STR:RD
1399 |3: // Handle numbers inline, unless a number base metatable is present.
1400 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1401 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1402 | jne ->fff_fallback
1403 | ffgccheck // Caveat: uses label 1.
1405 | mov L:RB->base, BASE // Add frame since C call can throw.
1406 | mov SAVE_PC, PC // Redundant (but a defined value).
1407 |.if X64 and not X64WIN
1408 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1410 | mov L:FCARG1, L:RB
1411 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1412 | // GCstr returned in eax (RD).
1413 | mov BASE, L:RB->base
1416 |//-- Base library: iterators -------------------------------------------
1419 | je >2 // Missing 2nd arg?
1421 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1423 | mov L:RB->base, BASE // Add frame since C call can throw.
1426 | lea CARG3d, [BASE+8]
1427 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1430 | mov CARG2d, [BASE]
1431 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1434 | mov TAB:RD, [BASE]
1440 | mov SAVE_PC, PC // Redundant (but a defined value).
1441 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1442 | // Flag returned in eax (RD).
1443 | mov BASE, L:RB->base
1444 | test RD, RD; jz >3 // End of traversal?
1445 | mov RB, [BASE+8] // Copy key and value to results.
1456 |2: // Set missing 2nd arg to nil.
1457 | mov dword [BASE+12], LJ_TNIL
1459 |3: // End of traversal: return nil.
1460 | mov dword [BASE-4], LJ_TNIL
1464 | mov CFUNC:RB, [BASE-8]
1465 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1466 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1468 | mov dword [BASE-4], LJ_TFUNC
1469 | mov [BASE-8], CFUNC:RD
1470 | mov dword [BASE+12], LJ_TNIL
1474 |.ffunc_1 ipairs_aux
1475 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1476 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1479 | movsd xmm0, qword [BASE+8]
1480 | sseconst_1 xmm1, RBa
1483 | movsd qword [BASE-8], xmm0
1486 | fld qword [BASE+8]
1490 | fstp qword [BASE-8]
1494 | mov TAB:RB, [BASE]
1495 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
1497 | add RD, TAB:RB->array
1499 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
1500 | mov RB, [RD] // Copy array slot.
1505 |2: // Check for empty hash part first. Otherwise call C function.
1506 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1507 | mov FCARG1, TAB:RB
1508 | mov RB, BASE // Save BASE.
1509 | mov FCARG2, RD // Caveat: FCARG2 == BASE
1510 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1511 | // cTValue * or NULL returned in eax (RD).
1520 | mov CFUNC:RB, [BASE-8]
1521 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1522 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1524 | mov dword [BASE-4], LJ_TFUNC
1525 | mov [BASE-8], CFUNC:RD
1528 | movsd qword [BASE+8], xmm0
1531 | fstp qword [BASE+8]
1536 |//-- Base library: catch errors ----------------------------------------
1541 | mov PC, 8+FRAME_PCALL
1543 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
1544 | shr RB, HOOK_ACTIVE_SHIFT
1546 | add PC, RB // Remember active hook before pcall.
1547 | jmp ->vm_call_dispatch
1550 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
1551 | mov RB, [BASE+4] // Swap function and traceback.
1553 | mov dword [BASE+4], LJ_TFUNC
1554 | mov LFUNC:RB, [BASE]
1556 | mov [BASE+8], LFUNC:RB
1560 | mov PC, 16+FRAME_PCALL
1563 |//-- Coroutine library --------------------------------------------------
1565 |.macro coroutine_resume_wrap, resume
1567 |.ffunc_1 coroutine_resume
1570 |.ffunc coroutine_wrap_aux
1571 | mov CFUNC:RB, [BASE-8]
1572 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1582 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
1584 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1585 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1587 | je >1 // Status != LUA_YIELD (i.e. 0)?
1588 | cmp RA, L:RB->base // Check for presence of initial func.
1592 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1594 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1596 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1600 | mov L:RB->base, BASE
1602 | add BASE, 8 // Keep resumed thread in stack for GC.
1604 | mov L:RB->top, BASE
1606 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1608 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1610 | sub RBa, PCa // Relative to PC.
1614 |2: // Move args to coroutine.
1632 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1633 | set_vmstate INTERP
1639 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1641 | mov BASE, L:RB->base
1642 | cmp eax, LUA_YIELD
1645 | mov RA, L:PC->base
1646 | mov KBASE, L:PC->top
1647 | mov L:PC->top, RA // Clear coroutine stack.
1650 | je >6 // No results?
1653 | cmp RD, L:RB->maxstack
1654 | ja >9 // Need to grow stack?
1658 |5: // Move results from coroutine.
1668 | lea RD, [PC+2] // nresults+1 = 1 + true + results.
1669 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
1671 | lea RD, [PC+1] // nresults+1 = 1 + results.
1681 | test PC, FRAME_TYPE
1685 |8: // Coroutine returned with error (at co->top-1).
1687 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
1690 | mov L:PC->top, RA // Clear error from coroutine stack.
1691 | mov RD, [RA] // Copy error message.
1695 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1700 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
1701 | // Error function does not return.
1704 |9: // Handle stack expansion on return from yield.
1708 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1710 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1713 | call extern lj_state_growstack@8 // (lua_State *L, int n)
1714 | mov BASE, L:RB->base
1715 | jmp <4 // Retry the stack move.
1718 | coroutine_resume_wrap 1 // coroutine.resume
1719 | coroutine_resume_wrap 0 // coroutine.wrap
1721 |.ffunc coroutine_yield
1723 | test aword L:RB->cframe, CFRAME_RESUME
1725 | mov L:RB->base, BASE
1726 | lea RD, [BASE+NARGS:RD*8-8]
1729 | mov aword L:RB->cframe, RDa
1731 | mov byte L:RB->status, al
1732 | jmp ->vm_leave_unw
1734 |//-- Math library -------------------------------------------------------
1739 | fstp qword [BASE-8]
1742 |.ffunc_nsse math_abs
1743 | sseconst_abs xmm1, RDa
1747 | movsd qword [BASE-8], xmm0
1753 |->fff_resxmm0: // Dummy.
1756 | fstp qword [BASE-8]
1763 | test PC, FRAME_TYPE
1766 | cmp PC_RB, RDL // More results expected?
1768 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1770 | not RAa // Note: ~RA = -(RA+1)
1771 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
1774 |6: // Fill up results with nil.
1775 | mov dword [BASE+RD*8-12], LJ_TNIL
1779 |7: // Non-standard return case.
1780 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
1784 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
1785 |.ffunc_nsse math_floor; call ->vm_floor; jmp ->fff_resxmm0
1786 |.ffunc_nsse math_ceil; call ->vm_ceil; jmp ->fff_resxmm0
1788 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
1789 |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
1790 |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
1793 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
1794 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
1795 |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
1797 |.ffunc_n math_sin; fsin; jmp ->fff_resn
1798 |.ffunc_n math_cos; fcos; jmp ->fff_resn
1799 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
1802 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
1805 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
1807 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
1809 |.macro math_extern, func
1811 | .ffunc_nsse math_ .. func
1813 | movsd FPARG1, xmm0
1817 | .ffunc_n math_ .. func
1822 | call extern lj_wrapper_ .. func
1837 |.ffunc_nsse math_rad
1838 | mov CFUNC:RB, [BASE-8]
1839 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
1843 | mov CFUNC:RB, [BASE-8]
1844 | fmul qword CFUNC:RB->upvalue[0]
1848 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
1849 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
1851 |.ffunc_1 math_frexp
1853 | cmp RB, LJ_TISNUM; ja ->fff_fallback
1856 | mov [BASE-4], RB; mov [BASE-8], RC
1857 | shl RB, 1; cmp RB, 0xffe00000; jae >3
1860 | cmp RB, 0x00200000; jb >4
1862 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
1866 | mov TMP1, RB; fild TMP1
1869 | and RB, 0x800fffff // Mask off exponent.
1870 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
1874 | movsd qword [BASE], xmm0
1880 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
1882 | xorps xmm0, xmm0; jmp <2
1886 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
1888 | movsd xmm0, qword [BASE]
1889 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
1891 | movsd qword [BASE-8], xmm0
1894 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
1895 | fstp qword [BASE-8]
1897 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
1900 |.ffunc_nsse math_modf
1906 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
1912 | movsd qword [BASE-8], xmm0
1913 | movsd qword [BASE], xmm4
1919 | fstp qword [BASE-8]
1922 | mov RC, [BASE-4]; mov RB, [BASE+4]
1923 | xor RC, RB; js >3 // Need to adjust sign?
1928 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
1932 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
1934 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
1937 |.ffunc_nnr math_fmod
1938 |1: ; fprem; fnstsw ax; sahf; jp <1
1943 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
1945 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
1948 |.macro math_minmax, name, cmovop, nocmovop, sseop
1955 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja ->fff_fallback
1956 | movsd xmm1, qword [BASE+RB*8-8]
1967 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja >5
1968 | fld qword [BASE+RB*8-8]
1970 | fucomi st1; cmovop st1; fpop1
1973 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
1982 | math_minmax math_min, fcmovnbe, jz, minsd
1983 | math_minmax math_max, fcmovbe, jnz, maxsd
1986 | fpop; jmp ->fff_fallback
1989 |//-- String library -----------------------------------------------------
1991 |.ffunc_1 string_len
1992 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
1993 | mov STR:RB, [BASE]
1995 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
1997 | fild dword STR:RB->len; jmp ->fff_resn
2000 |.ffunc string_byte // Only handle the 1-arg case here.
2001 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2002 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2003 | mov STR:RB, [BASE]
2005 | cmp dword STR:RB->len, 1
2006 | jb ->fff_res0 // Return no results for empty string.
2007 | movzx RB, byte STR:RB[1]
2009 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2011 | mov TMP1, RB; fild TMP1; jmp ->fff_resn
2014 |.ffunc string_char // Only handle the 1-arg case here.
2016 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2017 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
2019 | cvtsd2si RC, qword [BASE]
2020 | cmp RC, 255; ja ->fff_fallback
2025 | cmp TMP2, 255; ja ->fff_fallback
2032 | lea RDa, TMP2 // Points to stack. Little-endian.
2035 | mov L:RB->base, BASE
2037 | mov CARG3d, TMP3 // Zero-extended to size_t.
2038 | mov CARG2, RDa // May be 64 bit ptr to stack.
2045 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2046 | // GCstr * returned in eax (RD).
2047 | mov BASE, L:RB->base
2049 | mov dword [BASE-4], LJ_TSTR
2050 | mov [BASE-8], STR:RD
2056 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2058 | cmp dword [BASE+20], LJ_TISNUM; ja ->fff_fallback
2060 | cvtsd2si RB, qword [BASE+16]
2063 | fld qword [BASE+16]
2067 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2068 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
2069 | mov STR:RB, [BASE]
2071 | mov RB, STR:RB->len
2073 | cvtsd2si RA, qword [BASE+8]
2076 | fld qword [BASE+8]
2082 | cmp RB, RC // len < end? (unsigned compare)
2085 | test RA, RA // start <= 0?
2089 | sub RC, RA // start > end?
2091 | lea RB, [STR:RB+RA+#STR-1]
2102 |5: // Negative end or overflow.
2104 | lea RC, [RC+RB+1] // end = end+(len+1)
2107 | mov RC, RB // end = len
2110 |7: // Negative start or underflow.
2112 | add RA, RB // start = start+(len+1)
2114 | jg <3 // start > 0?
2116 | mov RA, 1 // start = 1
2119 |->fff_emptystr: // Range underflow.
2120 | xor RC, RC // Zero length. Any ptr in RB is ok.
2123 |.ffunc_2 string_rep // Only handle the 1-char case inline.
2125 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2126 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
2127 | mov STR:RB, [BASE]
2129 | cvtsd2si RC, qword [BASE+8]
2131 | fld qword [BASE+8]
2136 | jle ->fff_emptystr // Count <= 0? (or non-int)
2137 | cmp dword STR:RB->len, 1
2138 | jb ->fff_emptystr // Zero length string?
2139 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2140 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2141 | movzx RA, byte STR:RB[1]
2142 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2148 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2153 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2156 |.ffunc_1 string_reverse
2158 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2159 | mov STR:RB, [BASE]
2160 | mov RC, STR:RB->len
2162 | jz ->fff_emptystr // Zero length string?
2163 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2165 | mov TMP2, PC // Need another temp register.
2171 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2173 | movzx RA, byte [RB]
2182 |.macro ffstring_case, name, lo, hi
2185 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2186 | mov STR:RB, [BASE]
2187 | mov RC, STR:RB->len
2188 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2190 | mov TMP2, PC // Need another temp register.
2196 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2198 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2199 | movzx RA, byte [RB+RC]
2215 |ffstring_case string_lower, 0x41, 0x5a
2216 |ffstring_case string_upper, 0x61, 0x7a
2218 |//-- Table library ------------------------------------------------------
2220 |.ffunc_1 table_getn
2221 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2222 | mov RB, BASE // Save BASE.
2223 | mov TAB:FCARG1, [BASE]
2224 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2225 | // Length of table returned in eax (RD).
2226 | mov BASE, RB // Restore BASE.
2228 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2231 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2235 |//-- Bit library --------------------------------------------------------
2237 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2240 |.ffunc_nsse bit_tobit
2241 | sseconst_tobit xmm1, RBa
2249 | mov TMP1, TOBIT_BIAS
2251 | fstp FPARG1 // 64 bit FP store.
2252 | fild ARG1 // 32 bit integer load (s2lfwd ok).
2257 |.macro .ffunc_bit, name
2260 | sseconst_tobit xmm1, RBa
2266 | mov TMP1, TOBIT_BIAS
2274 |.macro .ffunc_bit_op, name, ins
2276 | mov TMP2, NARGS:RD // Save for fallback.
2277 | lea RD, [BASE+NARGS:RD*8-16]
2281 | cmp dword [RD+4], LJ_TISNUM; ja ->fff_fallback_bit_op
2283 | movsd xmm0, qword [RD]
2299 |.ffunc_bit_op bit_band, and
2300 |.ffunc_bit_op bit_bor, or
2301 |.ffunc_bit_op bit_bxor, xor
2303 |.ffunc_bit bit_bswap
2307 |.ffunc_bit bit_bnot
2322 |->fff_fallback_bit_op:
2323 | mov NARGS:RD, TMP2 // Restore for fallback
2324 | jmp ->fff_fallback
2326 |.macro .ffunc_bit_sh, name, ins
2329 | sseconst_tobit xmm2, RBa
2332 | mov RC, RA // Assumes RA is ecx.
2338 | mov TMP1, TOBIT_BIAS
2343 | mov RC, RA // Assumes RA is ecx.
2353 |.ffunc_bit_sh bit_lshift, shl
2354 |.ffunc_bit_sh bit_rshift, shr
2355 |.ffunc_bit_sh bit_arshift, sar
2356 |.ffunc_bit_sh bit_rol, rol
2357 |.ffunc_bit_sh bit_ror, ror
2359 |//-----------------------------------------------------------------------
2362 | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
2363 | jmp ->fff_fallback
2365 | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
2366 |->fff_fallback: // Call fast function fallback handler.
2367 | // BASE = new base, RD = nargs+1
2369 | mov PC, [BASE-4] // Fallback may overwrite PC.
2370 | mov SAVE_PC, PC // Redundant (but a defined value).
2371 | mov L:RB->base, BASE
2372 | lea RD, [BASE+NARGS:RD*8-8]
2373 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2375 | mov CFUNC:RD, [BASE-8]
2376 | cmp RA, L:RB->maxstack
2377 | ja >5 // Need to grow stack.
2383 | call aword CFUNC:RD->f // (lua_State *L)
2384 | mov BASE, L:RB->base
2385 | // Either throws an error or recovers and returns 0 or MULTRES (+1).
2386 | test RD, RD; jnz ->fff_res // Returned MULTRES (already in RD).
2387 |1: // Returned 0: retry fast path.
2392 | mov LFUNC:RB, [BASE-8]
2393 | cmp dword [BASE-4], PC
2394 | jne >2 // Tailcalled?
2395 | ins_callt // Retry the call.
2397 |2: // Reconstruct previous base for vmeta_call.
2399 | test PC, FRAME_TYPE
2402 | not RBa // Note: ~RB = -(RB+1)
2403 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
2404 | jmp ->vm_call_dispatch // Resolve again.
2409 | jmp ->vm_call_dispatch // Resolve again.
2411 |5: // Grow stack for fallback handler.
2412 | mov FCARG2, LUA_MINSTACK
2414 | call extern lj_state_growstack@8 // (lua_State *L, int n)
2415 | mov BASE, L:RB->base
2416 | jmp <1 // Dumb retry (goes through ff first).
2418 |->fff_gcstep: // Call GC step function.
2419 | // BASE = new base, RD = nargs+1
2420 | pop RBa // Must keep stack at same level.
2421 | mov TMPa, RBa // Save return address
2423 | mov SAVE_PC, PC // Redundant (but a defined value).
2424 | mov L:RB->base, BASE
2425 | lea RD, [BASE+NARGS:RD*8-8]
2428 | call extern lj_gc_step@4 // (lua_State *L)
2429 | mov BASE, L:RB->base
2435 | push RBa // Restore return address.
2438 |//-----------------------------------------------------------------------
2439 |//-- Special dispatch targets -------------------------------------------
2440 |//-----------------------------------------------------------------------
2442 |->vm_record: // Dispatch target for recording phase.
2444 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2445 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2447 | // Decrement the hookcount for consistency, but always do the call.
2448 | test RDL, HOOK_ACTIVE
2450 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2452 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2456 |->vm_rethook: // Dispatch target for return hooks.
2457 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2458 | test RDL, HOOK_ACTIVE // Hook already active?
2462 |->vm_inshook: // Dispatch target for instr/line hooks.
2463 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2464 | test RDL, HOOK_ACTIVE // Hook already active?
2467 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2469 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2471 | test RDL, LUA_MASKLINE
2475 | mov L:RB->base, BASE
2476 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2478 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2479 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc)
2481 | mov BASE, L:RB->base
2488 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2490 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
2493 |->cont_hook: // Continue from hook yield.
2496 | mov MULTRES, RA // Restore MULTRES for *M ins.
2499 |->vm_hotloop: // Hot loop counter underflow.
2504 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
2505 | mov RB, LFUNC:RB->pc
2506 | movzx RD, byte [RB+PC2PROTO(framesize)]
2507 | lea RD, [BASE+RD*8]
2509 | mov L:RB->base, BASE
2512 | lea FCARG1, [DISPATCH+GG_DISP2J]
2513 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2515 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2520 |->vm_callhook: // Dispatch target for call hooks.
2526 |->vm_hotcall: // Hot call counter underflow.
2529 | or PC, 1 // Marker for hot call.
2532 | lea RD, [BASE+NARGS:RD*8-8]
2534 | mov L:RB->base, BASE
2538 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
2539 | // ASMFunction returned in eax/rax (RDa).
2540 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2544 | mov BASE, L:RB->base
2554 |//-----------------------------------------------------------------------
2555 |//-- Trace exit handler -------------------------------------------------
2556 |//-----------------------------------------------------------------------
2558 |// Called from an exit stub with the exit number on the stack.
2559 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2565 | push ebp; lea ebp, [esp+12]; push ebp
2566 | push ebx; push edx; push ecx; push eax
2567 | movzx RC, byte [ebp-4] // Reconstruct exit number.
2568 | mov RCH, byte [ebp-8]
2569 | mov [ebp-4], edi; mov [ebp-8], esi
2570 | // Caveat: DISPATCH is ebx.
2571 | mov DISPATCH, [ebp]
2572 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2574 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2575 | mov [DISPATCH+DISPATCH_J(parent)], RA
2576 | sub esp, 8*8+16 // Room for SSE regs + args.
2578 | // Must not access SSE regs if SSE2 is not present.
2579 | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2
2581 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2582 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2583 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2584 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2586 | // Caveat: RB is ebp.
2587 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
2588 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2589 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2590 | mov L:RB->base, BASE
2591 | lea FCARG2, [esp+16]
2592 | lea FCARG1, [DISPATCH+GG_DISP2J]
2593 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2594 | // Interpreter C frame returned in eax.
2595 | mov esp, eax // Reposition stack to C frame.
2596 | mov BASE, L:RB->base
2598 | mov SAVE_L, L:RB // Needed for on-trace resume/yield.
2603 | mov LFUNC:KBASE, [BASE-8]
2604 | mov KBASE, LFUNC:KBASE->pc
2605 | mov KBASE, [KBASE+PC2PROTO(k)]
2606 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2607 | set_vmstate INTERP
2611 |//-----------------------------------------------------------------------
2612 |//-- Math helper functions ----------------------------------------------
2613 |//-----------------------------------------------------------------------
2615 |// FP value rounding. Called by math.floor/math.ceil fast functions
2616 |// and from JIT code.
2618 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
2619 |.macro vm_round_x87, mode1, mode2
2620 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
2624 |.if mode2 ~= 0xffff
2628 | fldcw word [esp+6]
2630 | fldcw word [esp+4]
2635 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2636 |.macro vm_round_sse, mode
2637 | sseconst_abs xmm2, RDa
2638 | sseconst_2p52 xmm3, RDa
2640 | andpd xmm1, xmm2 // |x|
2641 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2643 | andnpd xmm2, xmm0 // Isolate sign bit.
2644 |.if mode == 2 // trunc(x)?
2646 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2648 | sseconst_1 xmm3, RDa
2649 | cmpsd xmm0, xmm1, 1 // |x| < result?
2651 | subsd xmm1, xmm0 // If yes, subtract -1.
2652 | orpd xmm1, xmm2 // Merge sign bit back in.
2654 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2656 | orpd xmm1, xmm2 // Merge sign bit back in.
2657 | .if mode == 1 // ceil(x)?
2658 | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
2659 | cmpsd xmm0, xmm1, 6 // x > result?
2660 | .else // floor(x)?
2661 | sseconst_1 xmm2, RDa
2662 | cmpsd xmm0, xmm1, 1 // x < result?
2665 | subsd xmm1, xmm0 // If yes, subtract +-1.
2672 |.macro vm_round, name, ssemode, mode1, mode2
2675 | vm_round_x87 mode1, mode2
2678 | vm_round_sse ssemode
2681 | vm_round vm_floor, 0, 0x0400, 0xf7ff
2682 | vm_round vm_ceil, 1, 0x0800, 0xfbff
2683 | vm_round vm_trunc, 2, 0x0c00, 0xffff
2685 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2688 |// Args in xmm0/xmm1, return value in xmm0.
2689 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2692 | sseconst_abs xmm2, RDa
2693 | sseconst_2p52 xmm3, RDa
2695 | andpd xmm4, xmm2 // |x/y|
2696 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2698 | andnpd xmm2, xmm0 // Isolate sign bit.
2699 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2701 | orpd xmm4, xmm2 // Merge sign bit back in.
2702 | sseconst_1 xmm2, RDa
2703 | cmpsd xmm0, xmm4, 1 // x/y < result?
2705 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2716 |// Args/ret on x87 stack (y on top). No xmm registers modified.
2717 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
2720 | fnstcw word [esp+4]
2725 | fldcw word [esp+6]
2727 | fldcw word [esp+4]
2733 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
2734 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
2735 |// Caveat: needs 3 slots on x87 stack!
2737 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
2740 | .define expscratch, dword [rsp+8] // Use scratch area.
2742 | .define expscratch, dword [rsp-8] // Use red zone.
2744 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
2746 | fst expscratch // Caveat: overwrites ARG1.
2747 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
2748 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
2749 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
2750 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
2751 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
2757 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
2761 |// Args/ret on x87 stack (y on top). RC (eax) modified.
2762 |// Caveat: needs 3 slots on x87 stack!
2764 | fist dword [esp+4] // Store/reload int before comparison.
2765 | fild dword [esp+4] // Integral exponent used in vm_powi.
2769 | fucomp st1; fnstsw ax; sahf
2771 | jnz >8 // Branch for FP exponents.
2772 | jp >9 // Branch for NaN exponent.
2773 | fpop // Pop y and fallthrough to vm_powi.
2775 |// FP/int power function x^i. Arg1/ret on x87 stack.
2776 |// Arg2 (int) on C stack. RC (eax) modified.
2777 |// Caveat: needs 2 slots on x87 stack!
2779 | cmp eax, 1; jle >6 // i<=1?
2780 | // Now 1 < (unsigned)i <= 0x80000000.
2781 |1: // Handle leading zeros.
2782 | test eax, 1; jnz >2
2789 |3: // Handle trailing bits.
2800 | je <5 // x^1 ==> x
2804 | cmp eax, 1; je <5 // x^-1 ==> 1/x
2805 | jmp <1 // x^-i ==> (1/x)^i
2807 | fpop; fld1 // x^0 ==> 1
2810 |8: // FP/FP power function x^y.
2814 | mov eax, [esp+4]; shl eax, 1
2815 | cmp eax, 0xff000000; je >2 // x^+-Inf?
2816 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
2817 | cmp eax, 0xff000000; je >4 // +-Inf^y?
2821 |9: // Handle x^NaN.
2826 | fucomp st2; fnstsw ax; sahf
2828 | je >1 // 1^NaN ==> 1
2829 | fxch // x^NaN ==> NaN
2834 |2: // Handle x^+-Inf.
2840 | fucomp st1; fnstsw ax; sahf
2842 | je >3 // +-1^+-Inf ==> 1
2843 | fpop; fabs; fldz; mov eax, 0; setc al
2844 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
2850 |4: // Handle +-0^y or +-Inf^y.
2851 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
2853 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
2854 | fldz // y < 0, +-Inf^y ==> 0
2857 | mov dword [esp+4], 0x7f800000 // Return +Inf.
2865 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
2866 |// Needs 16 byte scratch area for x86. Also called from JIT code.
2868 | cvtsd2si eax, xmm1
2869 | cvtsi2sd xmm2, eax
2870 | ucomisd xmm1, xmm2
2871 | jnz >8 // Branch for FP exponents.
2872 | jp >9 // Branch for NaN exponent.
2873 | // Fallthrough to vm_powi_sse.
2875 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2877 | cmp eax, 1; jle >6 // i<=1?
2878 | // Now 1 < (unsigned)i <= 0x80000000.
2879 |1: // Handle leading zeros.
2880 | test eax, 1; jnz >2
2887 |3: // Handle trailing bits.
2898 | je <5 // x^1 ==> x
2901 | sseconst_1 xmm1, RDa
2906 | cmp eax, 1; je <5 // x^-1 ==> 1/x
2907 | jmp <1 // x^-i ==> (1/x)^i
2909 | sseconst_1 xmm0, RDa
2912 |8: // FP/FP power function x^y.
2914 | movd rax, xmm1; shl rax, 1
2915 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
2916 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
2917 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
2919 | movsd qword [rsp+16], xmm1 // Use scratch area.
2920 | movsd qword [rsp+8], xmm0
2921 | fld qword [rsp+16]
2924 | movsd qword [rsp-16], xmm1 // Use red zone.
2925 | movsd qword [rsp-8], xmm0
2926 | fld qword [rsp-16]
2930 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
2931 | movsd qword [esp+4], xmm0
2932 | cmp dword [esp+12], 0; jne >1
2933 | mov eax, [esp+16]; shl eax, 1
2934 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
2936 | cmp dword [esp+4], 0; jne >1
2937 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
2938 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
2940 | fld qword [esp+12]
2943 | fyl2x // y*log2(x)
2944 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
2945 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
2947 | fstp qword [rsp+8] // Use scratch area.
2948 | movsd xmm0, qword [rsp+8]
2950 | fstp qword [rsp-8] // Use red zone.
2951 | movsd xmm0, qword [rsp-8]
2953 | fstp qword [esp+4] // Needs 8 byte scratch area.
2954 | movsd xmm0, qword [esp+4]
2958 |9: // Handle x^NaN.
2959 | sseconst_1 xmm2, RDa
2960 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
2961 | movaps xmm0, xmm1 // x^NaN ==> NaN
2965 |2: // Handle x^+-Inf.
2966 | sseconst_abs xmm2, RDa
2967 | andpd xmm0, xmm2 // |x|
2968 | sseconst_1 xmm2, RDa
2969 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
2970 | movmskpd eax, xmm1
2972 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
2974 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
2977 |4: // Handle +-0^y.
2978 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
2979 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
2982 |5: // Handle +-Inf^y.
2983 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
2984 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
2987 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
2988 |// Computes fpm(x) for extended math functions. ORDER FPM.
2994 | .define fpmop, CARG2d
2996 | .define fpmop, CARG1d
2998 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
2999 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3000 | sqrtsd xmm0, xmm0; ret
3003 | movsd qword [rsp+8], xmm0 // Use scratch area.
3006 | movsd qword [rsp-8], xmm0 // Use red zone.
3009 | cmp fpmop, 5; ja >2
3010 | .if X64WIN; pop rax; .endif
3013 | .if X64WIN; push rax; .endif
3017 | .if X64WIN; push rax; .endif
3019 |2: ; cmp fpmop, 7; je >1; ja >2
3020 | fldln2; fxch; fyl2x; jmp >7
3021 |1: ; fld1; fxch; fyl2x; jmp >7
3022 |2: ; cmp fpmop, 9; je >1; ja >2
3023 | fldlg2; fxch; fyl2x; jmp >7
3025 |2: ; cmp fpmop, 11; je >1; ja >9
3030 | fstp qword [rsp+8] // Use scratch area.
3031 | movsd xmm0, qword [rsp+8]
3033 | fstp qword [rsp-8] // Use red zone.
3034 | movsd xmm0, qword [rsp-8]
3038 |.else // x86 calling convention.
3040 | .define fpmop, eax
3041 | mov fpmop, [esp+12]
3042 | movsd xmm0, qword [esp+4]
3043 | cmp fpmop, 1; je >1; ja >2
3044 | call ->vm_floor; jmp >7
3045 |1: ; call ->vm_ceil; jmp >7
3046 |2: ; cmp fpmop, 3; je >1; ja >2
3047 | call ->vm_trunc; jmp >7
3051 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3054 |2: ; fld qword [esp+4]
3055 | cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
3056 |2: ; cmp fpmop, 7; je >1; ja >2
3057 | fldln2; fxch; fyl2x; ret
3058 |1: ; fld1; fxch; fyl2x; ret
3059 |2: ; cmp fpmop, 9; je >1; ja >2
3060 | fldlg2; fxch; fyl2x; ret
3062 |2: ; cmp fpmop, 11; je >1; ja >9
3064 |1: ; fptan; fpop; ret
3068 | mov fpmop, [esp+12]
3070 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3071 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3073 |2: ; cmp fpmop, 5; jb ->vm_exp; je ->vm_exp2
3074 | cmp fpmop, 7; je >1; ja >2
3075 | fldln2; fxch; fyl2x; ret
3076 |1: ; fld1; fxch; fyl2x; ret
3077 |2: ; cmp fpmop, 9; je >1; ja >2
3078 | fldlg2; fxch; fyl2x; ret
3080 |2: ; cmp fpmop, 11; je >1; ja >9
3082 |1: ; fptan; fpop; ret
3084 |9: ; int3 // Bad fpm.
3086 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3087 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3088 |// and basic math functions. ORDER ARITH
3094 | .define foldop, CARG3d
3096 | .define foldop, CARG1d
3098 | cmp foldop, 1; je >1; ja >2
3099 | addsd xmm0, xmm1; ret
3100 |1: ; subsd xmm0, xmm1; ret
3101 |2: ; cmp foldop, 3; je >1; ja >2
3102 | mulsd xmm0, xmm1; ret
3103 |1: ; divsd xmm0, xmm1; ret
3104 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3105 | cmp foldop, 7; je >1; ja >2
3106 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3107 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3108 |2: ; cmp foldop, 9; ja >2
3110 | movsd qword [rsp+8], xmm0 // Use scratch area.
3111 | movsd qword [rsp+16], xmm1
3113 | fld qword [rsp+16]
3115 | movsd qword [rsp-8], xmm0 // Use red zone.
3116 | movsd qword [rsp-16], xmm1
3118 | fld qword [rsp-16]
3124 | fstp qword [rsp+8] // Use scratch area.
3125 | movsd xmm0, qword [rsp+8]
3127 | fstp qword [rsp-8] // Use red zone.
3128 | movsd xmm0, qword [rsp-8]
3131 |1: ; fxch; fscale; fpop1; jmp <7
3132 |2: ; cmp foldop, 11; je >1; ja >9
3133 | minsd xmm0, xmm1; ret
3134 |1: ; maxsd xmm0, xmm1; ret
3135 |9: ; int3 // Bad op.
3137 |.else // x86 calling convention.
3139 | .define foldop, eax
3140 | mov foldop, [esp+20]
3141 | movsd xmm0, qword [esp+4]
3142 | movsd xmm1, qword [esp+12]
3143 | cmp foldop, 1; je >1; ja >2
3146 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3149 |1: ; subsd xmm0, xmm1; jmp <7
3150 |2: ; cmp foldop, 3; je >1; ja >2
3151 | mulsd xmm0, xmm1; jmp <7
3152 |1: ; divsd xmm0, xmm1; jmp <7
3155 | call ->vm_mod; jmp <7
3156 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3157 |2: ; cmp foldop, 7; je >1; ja >2
3158 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3159 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3160 |2: ; cmp foldop, 9; ja >2
3161 | fld qword [esp+4] // Reload from stack
3162 | fld qword [esp+12]
3165 |1: ; fxch; fscale; fpop1; ret
3166 |2: ; cmp foldop, 11; je >1; ja >9
3167 | minsd xmm0, xmm1; jmp <7
3168 |1: ; maxsd xmm0, xmm1; jmp <7
3169 |9: ; int3 // Bad op.
3175 | fld qword [esp+12]
3176 | cmp eax, 1; je >1; ja >2
3178 |1: ; fsubp st1; ret
3179 |2: ; cmp eax, 3; je >1; ja >2
3181 |1: ; fdivp st1; ret
3182 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3183 | cmp eax, 7; je >1; ja >2
3185 |1: ; fpop; fabs; ret
3186 |2: ; cmp eax, 9; je >1; ja >2
3188 |1: ; fxch; fscale; fpop1; ret
3189 |2: ; cmp eax, 11; je >1; ja >9
3191 | fucomi st1; fcmovnbe st1; fpop1; ret
3192 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3194 | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
3195 |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
3197 |9: ; int3 // Bad op.
3200 |//-----------------------------------------------------------------------
3201 |//-- Miscellaneous functions --------------------------------------------
3202 |//-----------------------------------------------------------------------
3204 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
3208 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3216 | .if X64WIN; pop rsi; .endif
3222 | xor edx, 0x00200000 // Toggle ID bit in flags.
3227 | xor eax, eax // Zero means no features supported.
3229 | jz >1 // No ID toggle means no CPUID support.
3230 | mov eax, [esp+4] // Argument 1 is function number.
3234 | mov edi, [esp+16] // Argument 2 is result area.
3245 |//-----------------------------------------------------------------------
3248 /* Generate the code for a single instruction. */
3249 static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3252 |// Note: aligning all instructions does not pay off.
3257 /* -- Comparison ops ---------------------------------------------------- */
3259 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3261 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3262 | // RA = src1, RD = src2, JMP with RD = target
3264 | checknum RA, ->vmeta_comp
3265 | checknum RD, ->vmeta_comp
3267 | movsd xmm0, qword [BASE+RD*8]
3269 | ucomisd xmm0, qword [BASE+RA*8]
3271 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
3272 | fld qword [BASE+RD*8]
3274 | fcomparepp // eax (RD) modified!
3276 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3277 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3291 default: break; /* Shut up GCC. */
3300 case BC_ISEQV: case BC_ISNEV:
3301 vk = op == BC_ISEQV;
3302 | ins_AD // RA = src1, RD = src2, JMP with RD = target
3303 | mov RB, [BASE+RD*8+4]
3305 | cmp RB, LJ_TISNUM; ja >5
3308 | movsd xmm0, qword [BASE+RD*8]
3309 | ucomisd xmm0, qword [BASE+RA*8]
3311 | fld qword [BASE+RA*8]
3312 | fld qword [BASE+RD*8]
3313 | fcomparepp // eax (RD) modified!
3317 | jp >2 // Unordered means not equal.
3320 | jp >2 // Unordered means not equal.
3325 |1: // EQ: Branch to the target.
3328 |2: // NE: Fallthrough to next instruction.
3330 |2: // NE: Branch to the target.
3333 |1: // EQ: Fallthrough to next instruction.
3337 if (op == BC_ISEQV || op == BC_ISNEV) {
3338 |5: // Either or both types are not numbers.
3339 | checktp RA, RB // Compare types.
3340 | jne <2 // Not the same type?
3342 | jae <1 // Same type and primitive type?
3344 | // Same types and not a primitive type. Compare GCobj or pvalue.
3345 | mov RA, [BASE+RA*8]
3346 | mov RD, [BASE+RD*8]
3348 | je <1 // Same GCobjs or pvalues?
3349 | cmp RB, LJ_TISTABUD
3350 | ja <2 // Different objects and not table/ud?
3352 | // Different tables or userdatas. Need to check __eq metamethod.
3353 | // Field metatable must be at same offset for GCtab and GCudata!
3354 | mov TAB:RB, TAB:RA->metatable
3355 | test TAB:RB, TAB:RB
3356 | jz <2 // No metatable?
3357 | test byte TAB:RB->nomm, 1<<MM_eq
3358 | jnz <2 // Or 'no __eq' flag set?
3360 | xor RB, RB // ne = 0
3362 | mov RB, 1 // ne = 1
3364 | jmp ->vmeta_equal // Handle __eq metamethod.
3367 case BC_ISEQS: case BC_ISNES:
3368 vk = op == BC_ISEQS;
3369 | ins_AND // RA = src, RD = str const, JMP with RD = target
3372 | mov RA, [BASE+RA*8]
3373 | cmp RA, [KBASE+RD*4]
3381 case BC_ISEQN: case BC_ISNEN:
3382 vk = op == BC_ISEQN;
3383 | ins_AD // RA = src, RD = num const, JMP with RD = target
3387 | movsd xmm0, qword [KBASE+RD*8]
3388 | ucomisd xmm0, qword [BASE+RA*8]
3390 | fld qword [BASE+RA*8]
3391 | fld qword [KBASE+RD*8]
3392 | fcomparepp // eax (RD) modified!
3395 case BC_ISEQP: case BC_ISNEP:
3396 vk = op == BC_ISEQP;
3397 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3402 /* -- Unary test and copy ops ------------------------------------------- */
3404 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3405 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3406 | mov RB, [BASE+RD*8+4]
3408 | cmp RB, LJ_TISTRUECOND
3409 if (op == BC_IST || op == BC_ISTC) {
3414 if (op == BC_ISTC || op == BC_ISFC) {
3415 | mov [BASE+RA*8+4], RB
3416 | mov RB, [BASE+RD*8]
3417 | mov [BASE+RA*8], RB
3421 |1: // Fallthrough to the next instruction.
3425 /* -- Unary ops --------------------------------------------------------- */
3428 | ins_AD // RA = dst, RD = src
3429 | mov RB, [BASE+RD*8+4]
3430 | mov RD, [BASE+RD*8] // Overwrites RD.
3431 | mov [BASE+RA*8+4], RB
3432 | mov [BASE+RA*8], RD
3436 | ins_AD // RA = dst, RD = src
3438 | checktp RD, LJ_TISTRUECOND
3440 | mov [BASE+RA*8+4], RB
3444 | ins_AD // RA = dst, RD = src
3445 | checknum RD, ->vmeta_unm
3447 | movsd xmm0, qword [BASE+RD*8]
3448 | sseconst_sign xmm1, RDa
3450 | movsd qword [BASE+RA*8], xmm0
3452 | fld qword [BASE+RD*8]
3454 | fstp qword [BASE+RA*8]
3459 | ins_AD // RA = dst, RD = src
3461 | mov STR:RD, [BASE+RD*8]
3464 | cvtsi2sd xmm0, dword STR:RD->len
3466 | movsd qword [BASE+RA*8], xmm0
3468 | fild dword STR:RD->len
3470 | fstp qword [BASE+RA*8]
3474 | checktab RD, ->vmeta_len
3475 | mov TAB:FCARG1, [BASE+RD*8]
3476 | mov RB, BASE // Save BASE.
3477 | call extern lj_tab_len@4 // (GCtab *t)
3478 | // Length of table returned in eax (RC).
3481 | mov BASE, RB // Restore BASE.
3485 | mov BASE, RB // Restore BASE.
3493 /* -- Binary ops -------------------------------------------------------- */
3495 |.macro ins_arithpre, ins, sseins, ssereg
3497 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3500 | checknum RB, ->vmeta_arith_vn
3502 | movsd xmm0, qword [BASE+RB*8]
3503 | sseins ssereg, qword [KBASE+RC*8]
3505 | fld qword [BASE+RB*8]
3506 | ins qword [KBASE+RC*8]
3510 | checknum RB, ->vmeta_arith_nv
3512 | movsd xmm0, qword [KBASE+RC*8]
3513 | sseins ssereg, qword [BASE+RB*8]
3515 | fld qword [KBASE+RC*8]
3516 | ins qword [BASE+RB*8]
3520 | checknum RB, ->vmeta_arith_vv
3521 | checknum RC, ->vmeta_arith_vv
3523 | movsd xmm0, qword [BASE+RB*8]
3524 | sseins ssereg, qword [BASE+RC*8]
3526 | fld qword [BASE+RB*8]
3527 | ins qword [BASE+RC*8]
3533 |.macro ins_arithpost
3535 | movsd qword [BASE+RA*8], xmm0
3537 | fstp qword [BASE+RA*8]
3541 |.macro ins_arith, ins, sseins
3542 | ins_arithpre ins, sseins, xmm0
3547 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3548 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3549 | ins_arith fadd, addsd
3551 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3552 | ins_arith fsub, subsd
3554 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3555 | ins_arith fmul, mulsd
3557 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3558 | ins_arith fdiv, divsd
3561 | ins_arithpre fld, movsd, xmm1
3567 case BC_MODNV: case BC_MODVV:
3568 | ins_arithpre fld, movsd, xmm1
3569 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3572 | ins_arithpre fld, movsd, xmm1
3579 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3581 | mov L:CARG1d, SAVE_L
3582 | mov L:CARG1d->base, BASE
3583 | lea CARG2d, [BASE+RC*8]
3587 | mov L:RB, L:CARG1d
3589 | lea RA, [BASE+RC*8]
3596 | mov L:RB->base, BASE
3599 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3600 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3601 | mov BASE, L:RB->base
3604 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
3606 | mov RC, [BASE+RB*8+4]
3607 | mov RB, [BASE+RB*8]
3608 | mov [BASE+RA*8+4], RC
3609 | mov [BASE+RA*8], RB
3613 /* -- Constant ops ------------------------------------------------------ */
3616 | ins_AND // RA = dst, RD = str const (~)
3617 | mov RD, [KBASE+RD*4]
3618 | mov dword [BASE+RA*8+4], LJ_TSTR
3619 | mov [BASE+RA*8], RD
3623 | ins_AD // RA = dst, RD = signed int16 literal
3625 | movsx RD, RDW // Sign-extend literal.
3627 | movsd qword [BASE+RA*8], xmm0
3629 | fild PC_RD // Refetch signed RD from instruction.
3630 | fstp qword [BASE+RA*8]
3635 | ins_AD // RA = dst, RD = num const
3637 | movsd xmm0, qword [KBASE+RD*8]
3638 | movsd qword [BASE+RA*8], xmm0
3640 | fld qword [KBASE+RD*8]
3641 | fstp qword [BASE+RA*8]
3646 | ins_AND // RA = dst, RD = primitive type (~)
3647 | mov [BASE+RA*8+4], RD
3651 | ins_AD // RA = dst_start, RD = dst_end
3652 | lea RA, [BASE+RA*8+12]
3653 | lea RD, [BASE+RD*8+4]
3655 | mov [RA-8], RB // Sets minimum 2 slots.
3664 /* -- Upvalue and function ops ------------------------------------------ */
3667 | ins_AD // RA = dst, RD = upvalue #
3668 | mov LFUNC:RB, [BASE-8]
3669 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
3670 | mov RB, UPVAL:RB->v
3673 | mov [BASE+RA*8+4], RD
3674 | mov [BASE+RA*8], RB
3678 #define TV2MARKOFS \
3679 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3680 | ins_AD // RA = upvalue #, RD = src
3681 | mov LFUNC:RB, [BASE-8]
3682 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
3683 | cmp byte UPVAL:RB->closed, 0
3684 | mov RB, UPVAL:RB->v
3685 | mov RA, [BASE+RD*8]
3686 | mov RD, [BASE+RD*8+4]
3690 | // Check barrier for closed upvalue.
3691 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3696 |2: // Upvalue is black. Check if new value is collectable and white.
3698 | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
3700 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3702 | // Crossed a write barrier. Move the barrier forward.
3703 |.if X64 and not X64WIN
3705 | mov RB, BASE // Save BASE.
3707 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
3709 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
3710 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
3711 | mov BASE, RB // Restore BASE.
3716 | ins_AND // RA = upvalue #, RD = str const (~)
3717 | mov LFUNC:RB, [BASE-8]
3718 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
3719 | mov GCOBJ:RA, [KBASE+RD*4]
3720 | mov RD, UPVAL:RB->v
3721 | mov [RD], GCOBJ:RA
3722 | mov dword [RD+4], LJ_TSTR
3723 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3728 |2: // Check if string is white and ensure upvalue is closed.
3729 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3731 | cmp byte UPVAL:RB->closed, 0
3733 | // Crossed a write barrier. Move the barrier forward.
3734 | mov RB, BASE // Save BASE (FCARG2 == BASE).
3736 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
3737 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
3738 | mov BASE, RB // Restore BASE.
3742 | ins_AD // RA = upvalue #, RD = num const
3743 | mov LFUNC:RB, [BASE-8]
3745 | movsd xmm0, qword [KBASE+RD*8]
3747 | fld qword [KBASE+RD*8]
3749 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
3750 | mov RA, UPVAL:RB->v
3752 | movsd qword [RA], xmm0
3759 | ins_AND // RA = upvalue #, RD = primitive type (~)
3760 | mov LFUNC:RB, [BASE-8]
3761 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
3762 | mov RA, UPVAL:RB->v
3767 | ins_AD // RA = level, RD = target
3768 | branchPC RD // Do this first to free RD.
3770 | cmp dword L:RB->openupval, 0
3772 | mov L:RB->base, BASE
3773 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
3774 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
3775 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
3776 | mov BASE, L:RB->base
3782 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3785 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
3786 | mov CARG3d, [BASE-8]
3787 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
3790 | mov LFUNC:RA, [BASE-8]
3791 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
3793 | mov ARG3, LFUNC:RA
3794 | mov ARG2, PROTO:RD
3796 | mov L:RB->base, BASE
3799 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3800 | call extern lj_func_newL_gc
3801 | // GCfuncL * returned in eax (RC).
3802 | mov BASE, L:RB->base
3804 | mov [BASE+RA*8], LFUNC:RC
3805 | mov dword [BASE+RA*8+4], LJ_TFUNC
3809 /* -- Table ops --------------------------------------------------------- */
3812 | ins_AD // RA = dst, RD = hbits|asize
3814 | mov L:CARG1d, SAVE_L
3815 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
3824 | mov RD, [DISPATCH+DISPATCH_GL(gc.total)]
3825 | mov L:RB, L:CARG1d
3826 | cmp RD, [DISPATCH+DISPATCH_GL(gc.threshold)]
3840 | mov RD, [DISPATCH+DISPATCH_GL(gc.total)]
3842 | cmp RD, [DISPATCH+DISPATCH_GL(gc.threshold)]
3843 | mov L:RB->base, BASE
3847 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3848 | // Table * returned in eax (RC).
3849 | mov BASE, L:RB->base
3851 | mov [BASE+RA*8], TAB:RC
3852 | mov dword [BASE+RA*8+4], LJ_TTAB
3854 |3: // Turn 0x7ff into 0x801.
3859 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
3861 | mov L:CARG1d, L:RB
3864 | mov L:FCARG1, L:RB
3865 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
3870 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3872 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3874 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3875 | mov L:RB->base, BASE
3878 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
3879 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
3880 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
3881 | // Table * returned in eax (RC).
3882 | mov BASE, L:RB->base
3884 | mov [BASE+RA*8], TAB:RC
3885 | mov dword [BASE+RA*8+4], LJ_TTAB
3888 | mov L:FCARG1, L:RB
3889 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
3890 | movzx RD, PC_RD // Need to reload RD.
3896 | ins_AND // RA = dst, RD = str const (~)
3897 | mov LFUNC:RB, [BASE-8]
3898 | mov TAB:RB, LFUNC:RB->env
3899 | mov STR:RC, [KBASE+RD*4]
3903 | ins_AND // RA = src, RD = str const (~)
3904 | mov LFUNC:RB, [BASE-8]
3905 | mov TAB:RB, LFUNC:RB->env
3906 | mov STR:RC, [KBASE+RD*4]
3911 | ins_ABC // RA = dst, RB = table, RC = key
3912 | checktab RB, ->vmeta_tgetv
3913 | mov TAB:RB, [BASE+RB*8]
3915 | // Integer key? Convert number to int and back and compare.
3918 | movsd xmm0, qword [BASE+RC*8]
3921 | ucomisd xmm0, xmm1
3924 | fld qword [BASE+RC*8]
3927 | fcomparepp // eax (RC) modified!
3931 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3932 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
3933 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3935 | add RC, TAB:RB->array
3936 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
3939 | mov RB, [RC] // Get array slot.
3941 | mov [BASE+RA*8], RB
3942 | mov [BASE+RA*8+4], RC
3945 |2: // Check for __index if table value is nil.
3946 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
3948 | mov TAB:RA, TAB:RB->metatable
3949 | test byte TAB:RA->nomm, 1<<MM_index
3950 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3951 | movzx RA, PC_RA // Restore RA.
3955 | checkstr RC, ->vmeta_tgetv
3956 | mov STR:RC, [BASE+RC*8]
3960 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3962 | mov STR:RC, [KBASE+RC*4]
3963 | checktab RB, ->vmeta_tgets
3964 | mov TAB:RB, [BASE+RB*8]
3965 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
3966 | mov RA, TAB:RB->hmask
3967 | and RA, STR:RC->hash
3969 | add NODE:RA, TAB:RB->node
3971 | cmp dword NODE:RA->key.it, LJ_TSTR
3973 | cmp dword NODE:RA->key.gcr, STR:RC
3975 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3976 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
3977 | je >5 // Key found, but nil value?
3979 | mov RB, [RA] // Get node value.
3981 | mov [BASE+RC*8], RB
3983 | mov [BASE+RC*8+4], RA
3991 |4: // Follow hash chain.
3992 | mov NODE:RA, NODE:RA->next
3993 | test NODE:RA, NODE:RA
3995 | // End of hash chain: key not found, nil result.
3997 |5: // Check for __index if table value is nil.
3998 | mov TAB:RA, TAB:RB->metatable
3999 | test TAB:RA, TAB:RA
4000 | jz <3 // No metatable: done.
4001 | test byte TAB:RA->nomm, 1<<MM_index
4002 | jnz <3 // 'no __index' flag set: done.
4003 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
4006 | ins_ABC // RA = dst, RB = table, RC = byte literal
4007 | checktab RB, ->vmeta_tgetb
4008 | mov TAB:RB, [BASE+RB*8]
4009 | cmp RC, TAB:RB->asize
4012 | add RC, TAB:RB->array
4013 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4016 | mov RB, [RC] // Get array slot.
4018 | mov [BASE+RA*8], RB
4019 | mov [BASE+RA*8+4], RC
4022 |2: // Check for __index if table value is nil.
4023 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4025 | mov TAB:RA, TAB:RB->metatable
4026 | test byte TAB:RA->nomm, 1<<MM_index
4027 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
4028 | movzx RA, PC_RA // Restore RA.
4033 | ins_ABC // RA = src, RB = table, RC = key
4034 | checktab RB, ->vmeta_tsetv
4035 | mov TAB:RB, [BASE+RB*8]
4037 | // Integer key? Convert number to int and back and compare.
4040 | movsd xmm0, qword [BASE+RC*8]
4043 | ucomisd xmm0, xmm1
4046 | fld qword [BASE+RC*8]
4049 | fcomparepp // eax (RC) modified!
4053 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
4054 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4057 | add RC, TAB:RB->array
4058 | cmp dword [RC+4], LJ_TNIL
4059 | je >3 // Previous value is nil?
4061 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4064 | mov RB, [BASE+RA*8+4] // Set array slot.
4065 | mov RA, [BASE+RA*8]
4070 |3: // Check for __newindex if previous value is nil.
4071 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4073 | mov TAB:RA, TAB:RB->metatable
4074 | test byte TAB:RA->nomm, 1<<MM_newindex
4075 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
4076 | movzx RA, PC_RA // Restore RA.
4080 | checkstr RC, ->vmeta_tsetv
4081 | mov STR:RC, [BASE+RC*8]
4084 |7: // Possible table write barrier for the value. Skip valiswhite check.
4085 | barrierback TAB:RB, RA
4086 | movzx RA, PC_RA // Restore RA.
4090 | ins_ABC // RA = src, RB = table, RC = str const (~)
4092 | mov STR:RC, [KBASE+RC*4]
4093 | checktab RB, ->vmeta_tsets
4094 | mov TAB:RB, [BASE+RB*8]
4095 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4096 | mov RA, TAB:RB->hmask
4097 | and RA, STR:RC->hash
4099 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
4100 | add NODE:RA, TAB:RB->node
4102 | cmp dword NODE:RA->key.it, LJ_TSTR
4104 | cmp dword NODE:RA->key.gcr, STR:RC
4106 | // Ok, key found. Assumes: offsetof(Node, val) == 0
4107 | cmp dword [RA+4], LJ_TNIL
4108 | je >4 // Previous value is nil?
4110 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4114 | mov RB, [BASE+RC*8+4] // Set node value.
4115 | mov RC, [BASE+RC*8]
4120 |4: // Check for __newindex if previous value is nil.
4121 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4123 | mov TMP1, RA // Save RA.
4124 | mov TAB:RA, TAB:RB->metatable
4125 | test byte TAB:RA->nomm, 1<<MM_newindex
4126 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4127 | mov RA, TMP1 // Restore RA.
4130 |5: // Follow hash chain.
4131 | mov NODE:RA, NODE:RA->next
4132 | test NODE:RA, NODE:RA
4134 | // End of hash chain: key not found, add a new one.
4136 | // But check for __newindex first.
4137 | mov TAB:RA, TAB:RB->metatable
4138 | test TAB:RA, TAB:RA
4139 | jz >6 // No metatable: continue.
4140 | test byte TAB:RA->nomm, 1<<MM_newindex
4141 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4145 | mov TMP3, TAB:RB // Save TAB:RB for us.
4147 | mov L:CARG1d, SAVE_L
4148 | mov L:CARG1d->base, BASE
4150 | mov CARG2d, TAB:RB
4151 | mov L:RB, L:CARG1d
4153 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
4158 | mov L:RB->base, BASE
4161 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4162 | // Handles write barrier for the new key. TValue * returned in eax (RC).
4163 | mov BASE, L:RB->base
4164 | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
4166 | jmp <2 // Must check write barrier for value.
4168 |7: // Possible table write barrier for the value. Skip valiswhite check.
4169 | barrierback TAB:RB, RC // Destroys STR:RC.
4173 | ins_ABC // RA = src, RB = table, RC = byte literal
4174 | checktab RB, ->vmeta_tsetb
4175 | mov TAB:RB, [BASE+RB*8]
4176 | cmp RC, TAB:RB->asize
4179 | add RC, TAB:RB->array
4180 | cmp dword [RC+4], LJ_TNIL
4181 | je >3 // Previous value is nil?
4183 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4186 | mov RB, [BASE+RA*8+4] // Set array slot.
4187 | mov RA, [BASE+RA*8]
4192 |3: // Check for __newindex if previous value is nil.
4193 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4195 | mov TAB:RA, TAB:RB->metatable
4196 | test byte TAB:RA->nomm, 1<<MM_newindex
4197 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
4198 | movzx RA, PC_RA // Restore RA.
4201 |7: // Possible table write barrier for the value. Skip valiswhite check.
4202 | barrierback TAB:RB, RA
4203 | movzx RA, PC_RA // Restore RA.
4208 | ins_AD // RA = base (table at base-1), RD = num const (start index)
4209 | mov TMP1, KBASE // Need one more free register.
4211 | movsd xmm0, qword [KBASE+RD*8]
4214 | fld qword [KBASE+RD*8]
4215 | fistp ARG4 // Const is guaranteed to be an int.
4219 | lea RA, [BASE+RA*8]
4220 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
4221 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4226 | cvtsd2si KBASE, xmm0 // Const is guaranteed to be an int.
4233 | jz >4 // Nothing to copy?
4234 | add RD, KBASE // Compute needed size.
4235 | cmp RD, TAB:RB->asize
4236 | jae >5 // Does not fit into array part?
4239 | add KBASE, TAB:RB->array
4240 |3: // Copy result slots to table.
4253 |5: // Need to resize array part.
4255 | mov L:CARG1d, SAVE_L
4256 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
4257 | mov CARG2d, TAB:RB
4259 | mov L:RB, L:CARG1d
4263 | mov L:RB->base, BASE
4268 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4269 | mov BASE, L:RB->base
4270 | movzx RA, PC_RA // Restore RA.
4273 |7: // Possible table write barrier for any value. Skip valiswhite check.
4274 | barrierback TAB:RB, RD
4278 /* -- Calls and vararg handling ----------------------------------------- */
4280 case BC_CALL: case BC_CALLM:
4281 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4282 if (op == BC_CALLM) {
4283 | add NARGS:RD, MULTRES
4285 | cmp dword [BASE+RA*8+4], LJ_TFUNC
4286 | mov LFUNC:RB, [BASE+RA*8]
4287 | jne ->vmeta_call_ra
4288 | lea BASE, [BASE+RA*8+8]
4293 | ins_AD // RA = base, RD = extra_nargs
4294 | add NARGS:RD, MULTRES
4295 | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op.
4298 | ins_AD // RA = base, RD = nargs+1
4299 | lea RA, [BASE+RA*8+8]
4300 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
4301 | mov LFUNC:RB, [RA-8]
4302 | cmp dword [RA-4], LJ_TFUNC
4306 | test PC, FRAME_TYPE
4309 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
4310 | mov MULTRES, NARGS:RD
4314 | mov RB, [RA] // Move args down.
4323 | mov LFUNC:RB, [BASE-8]
4325 | mov NARGS:RD, MULTRES
4326 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4331 |5: // Tailcall to a fast function.
4332 | test PC, FRAME_TYPE // Lua frame below?
4336 | lea RA, [BASE+RA*8]
4337 | mov LFUNC:KBASE, [RA-8] // Need to prepare KBASE.
4338 | mov KBASE, LFUNC:KBASE->pc
4339 | mov KBASE, [KBASE+PC2PROTO(k)]
4342 |7: // Tailcall from a vararg function.
4343 | jnp <1 // Vararg frame below?
4345 | sub BASE, PC // Need to relocate BASE/KBASE down.
4352 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4353 | lea RA, [BASE+RA*8+8] // fb = base+1
4354 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
4358 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
4362 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
4364 | mov [RA-8], LFUNC:RB
4366 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
4374 | ins_AB_ // RA = base, RB = nresults+1, (RC = 1)
4375 | mov LFUNC:RC, [BASE-8]
4376 | lea RA, [BASE+RA*8]
4377 | mov RC, LFUNC:RC->pc
4378 | movzx RC, byte [RC+PC2PROTO(numparams)]
4379 | mov TMP1, KBASE // Need one more free register.
4380 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
4381 | sub KBASE, [BASE-4]
4382 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
4384 | jz >5 // Copy all varargs?
4385 | lea RB, [RA+RB*8-8]
4386 | cmp KBASE, BASE // No vararg slots?
4388 |1: // Copy vararg slots to destination slots.
4395 | cmp RA, RB // All destination slots filled?
4397 | cmp KBASE, BASE // No more vararg slots?
4399 |2: // Fill up remainder with nil.
4400 | mov dword [RA+4], LJ_TNIL
4408 |5: // Copy all varargs.
4409 | mov MULTRES, 1 // MULTRES = 0+1
4412 | jbe <3 // No vararg slots?
4416 | mov MULTRES, RB // MULTRES = #varargs+1
4419 | cmp RC, L:RB->maxstack
4420 | ja >7 // Need to grow stack?
4421 |6: // Copy all vararg slots.
4428 | cmp KBASE, BASE // No more vararg slots?
4432 |7: // Grow stack for varargs.
4433 | mov L:RB->base, BASE
4436 | sub KBASE, BASE // Need delta, because BASE may change.
4437 | mov FCARG2, MULTRES
4440 | call extern lj_state_growstack@8 // (lua_State *L, int n)
4441 | mov BASE, L:RB->base
4447 /* -- Returns ----------------------------------------------------------- */
4450 | ins_AD // RA = results, RD = extra_nresults
4451 | add RD, MULTRES // MULTRES >=1, so RD >=1.
4452 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4455 case BC_RET: case BC_RET0: case BC_RET1:
4456 | ins_AD // RA = results, RD = nresults+1
4457 if (op != BC_RET0) {
4462 | mov MULTRES, RD // Save nresults+1.
4463 | test PC, FRAME_TYPE // Check frame type marker.
4464 | jnz >7 // Not returning to a fixarg Lua func?
4468 | mov KBASE, BASE // Use KBASE for result move.
4472 | mov RB, [KBASE+RA] // Move results down.
4474 | mov RB, [KBASE+RA+4]
4480 | mov RD, MULTRES // Note: MULTRES may be >255.
4481 | movzx RB, PC_RB // So cannot compare with RDL!
4483 | cmp RB, RD // More results expected?
4487 | mov RB, [BASE+RA+4]
4494 | cmp PC_RB, RDL // More results expected?
4500 | not RAa // Note: ~RA = -(RA+1)
4501 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
4502 | mov LFUNC:KBASE, [BASE-8]
4503 | mov KBASE, LFUNC:KBASE->pc
4504 | mov KBASE, [KBASE+PC2PROTO(k)]
4507 |6: // Fill up results with nil.
4509 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
4512 | mov dword [BASE+RD*8-12], LJ_TNIL
4517 |7: // Non-standard return case.
4519 | // Return from vararg function: relocate BASE down and RA up.
4522 if (op != BC_RET0) {
4528 /* -- Loops and branches ------------------------------------------------ */
4530 |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4]
4531 |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12]
4532 |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20]
4533 |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28]
4539 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4549 vk = (op == BC_IFORL || op == BC_JFORL);
4550 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4551 | lea RA, [BASE+RA*8]
4553 | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks
4554 | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for
4556 | mov RB, FOR_TSTEP // Load type/hiword of for step.
4558 | cmp RB, LJ_TISNUM; ja ->vmeta_for
4561 | movsd xmm0, FOR_IDX
4562 | movsd xmm1, FOR_STOP
4564 | addsd xmm0, FOR_STEP
4565 | movsd FOR_IDX, xmm0
4566 | test RB, RB; js >3
4570 | ucomisd xmm1, xmm0
4572 | movsd FOR_EXT, xmm0
4577 | fadd FOR_STEP // nidx = idx + step
4580 | test RB, RB; js >1
4585 | fxch // Swap lim/(n)idx if step non-negative.
4587 | fcomparepp // eax (RD) modified if !cmov.
4589 | movzx RD, PC_RD // Need to reload RD.
4592 if (op == BC_FORI) {
4595 } else if (op == BC_JFORI) {
4599 } else if (op == BC_IFORL) {
4608 |3: // Invert comparison if step is negative.
4609 | ucomisd xmm0, xmm1
4618 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4626 | ins_AJ // RA = base, RD = target
4627 | lea RA, [BASE+RA*8]
4629 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4630 if (op == BC_JITERL) {
4636 | branchPC RD // Otherwise save control var + branch.
4646 | ins_A // RA = base, RD = target (loop extent)
4647 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4648 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4652 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4656 | ins_A // RA = base, RD = target (loop extent)
4662 | ins_AD // RA = base (ignored), RD = traceno
4663 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4664 | mov TRACE:RD, [RA+RD*4]
4665 | mov RDa, TRACE:RD->mcode
4667 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4668 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
4674 | ins_AJ // RA = unused, RD = target
4679 /* -- Function headers -------------------------------------------------- */
4682 ** Reminder: A function may be called with func/args above L->maxstack,
4683 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4684 ** too. This means all FUNC* ops (including fast functions) must check
4685 ** for stack overflow _before_ adding more slots!
4690 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
4693 case BC_FUNCV: /* NYI: compiled vararg functions. */
4694 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4702 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4703 | mov KBASE, [PC-4+PC2PROTO(k)]
4705 | lea RA, [BASE+RA*8] // Top of frame.
4706 | cmp RA, L:RB->maxstack
4707 | ja ->vm_growstack_f
4708 | movzx RA, byte [PC-4+PC2PROTO(numparams)]
4709 | cmp NARGS:RD, RA // Check for missing parameters.
4712 if (op == BC_JFUNCF) {
4719 |3: // Clear missing parameters.
4720 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
4731 | int3 // NYI: compiled vararg functions
4732 break; /* NYI: compiled vararg functions. */
4735 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4736 | lea RB, [NARGS:RD*8+FRAME_VARG]
4737 | lea RD, [BASE+NARGS:RD*8]
4738 | mov LFUNC:KBASE, [BASE-8]
4739 | mov [RD-4], RB // Store delta + FRAME_VARG.
4740 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
4743 | cmp RA, L:RB->maxstack
4744 | ja ->vm_growstack_v // Need to grow stack.
4747 | movzx RB, byte [PC-4+PC2PROTO(numparams)]
4750 |1: // Copy fixarg slots up to new frame.
4753 | jnb >3 // Less args than parameters?
4759 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
4763 if (op == BC_JFUNCV) {
4767 | mov KBASE, [PC-4+PC2PROTO(k)]
4771 |3: // Clear missing parameters.
4772 | mov dword [RD+4], LJ_TNIL
4781 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4782 | mov CFUNC:RB, [BASE-8]
4783 | mov KBASEa, CFUNC:RB->f
4785 | lea RD, [BASE+NARGS:RD*8-8]
4786 | mov L:RB->base, BASE
4787 | lea RA, [RD+8*LUA_MINSTACK]
4788 | cmp RA, L:RB->maxstack
4790 if (op == BC_FUNCC) {
4792 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
4799 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
4805 | ja ->vm_growstack_c // Need to grow stack.
4807 if (op == BC_FUNCC) {
4808 | call KBASEa // (lua_State *L)
4810 | // (lua_State *L, lua_CFunction f)
4811 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4813 | set_vmstate INTERP
4814 | // nresults returned in eax (RD).
4815 | mov BASE, L:RB->base
4816 | lea RA, [BASE+RD*8]
4818 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4819 | mov PC, [BASE-4] // Fetch PC of caller.
4823 /* ---------------------------------------------------------------------- */
4826 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4832 static int build_backend(BuildCtx *ctx)
4837 #ifdef LUAJIT_CPU_NOCMOV
4840 #if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
4844 dasm_growpc(Dst, BC__MAX);
4846 build_subroutines(ctx, cmov, sse);
4849 for (op = 0; op < BC__MAX; op++)
4850 build_ins(ctx, (BCOp)op, op, cmov, sse);
4855 /* Emit pseudo frame-info for all assembler functions. */
4856 static void emit_asm_debug(BuildCtx *ctx)
4861 #define REG_SP "0x7"
4862 #define REG_RA "0x10"
4866 #define REG_SP "0x4"
4867 #define REG_RA "0x8"
4869 switch (ctx->mode) {
4871 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4874 "\t.long .LECIE0-.LSCIE0\n"
4876 "\t.long 0xffffffff\n"
4880 "\t.sleb128 -" SZPTR "\n"
4881 "\t.byte " REG_RA "\n"
4882 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
4883 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
4884 "\t.align " SZPTR "\n"
4888 "\t.long .LEFDE0-.LASFDE0\n"
4890 "\t.long .Lframe0\n"
4893 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4895 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4896 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4897 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4898 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4900 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
4901 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
4902 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
4903 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
4905 "\t.align " SZPTR "\n"
4906 ".LEFDE0:\n\n", (int)ctx->codesz, CFRAME_SIZE);
4907 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4910 "\t.long .LECIE1-.LSCIE1\n"
4914 "\t.string \"zPR\"\n"
4916 "\t.sleb128 -" SZPTR "\n"
4917 "\t.byte " REG_RA "\n"
4918 "\t.uleb128 6\n" /* augmentation length */
4919 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4920 "\t.long lj_err_unwind_dwarf-.\n"
4921 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4922 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
4923 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
4924 "\t.align " SZPTR "\n"
4928 "\t.long .LEFDE1-.LASFDE1\n"
4930 "\t.long .LASFDE1-.Lframe1\n"
4931 "\t.long .Lbegin-.\n"
4933 "\t.uleb128 0\n" /* augmentation length */
4934 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4936 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4937 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4938 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4939 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4941 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
4942 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
4943 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
4944 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
4946 "\t.align " SZPTR "\n"
4947 ".LEFDE1:\n\n", (int)ctx->codesz, CFRAME_SIZE);
4950 fprintf(ctx->fp, "\t.section .eh_frame,\"dr\"\n");
4952 "\t.def %slj_err_unwind_dwarf; .scl 2; .type 32; .endef\n",
4956 "\t.long LECIE1-LSCIE1\n"
4960 "\t.string \"zP\"\n"
4962 "\t.sleb128 -" SZPTR "\n"
4963 "\t.byte " REG_RA "\n"
4964 "\t.uleb128 5\n" /* augmentation length */
4965 "\t.byte 0x00\n" /* absptr */
4966 "\t.long %slj_err_unwind_dwarf\n"
4967 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
4968 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
4969 "\t.align " SZPTR "\n"
4970 "LECIE1:\n\n", LJ_32 ? "_" : "");
4973 "\t.long LEFDE1-LASFDE1\n"
4975 "\t.long LASFDE1-Lframe1\n"
4976 "\t.long %slj_vm_asm_begin\n"
4978 "\t.uleb128 0\n" /* augmentation length */
4979 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4981 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4982 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4983 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4984 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4986 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
4987 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
4988 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
4989 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
4991 "\t.align " SZPTR "\n"
4992 "LEFDE1:\n\n", LJ_32 ? "_" : "", (int)ctx->codesz, CFRAME_SIZE);
4995 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4998 "\t.set L$set$0,LECIE1-LSCIE1\n"
5003 "\t.ascii \"zPR\\0\"\n"
5005 "\t.byte 128-" SZPTR "\n"
5006 "\t.byte " REG_RA "\n"
5007 "\t.byte 6\n" /* augmentation length */
5008 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
5009 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
5010 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5012 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
5014 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
5016 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
5017 "\t.align " BSZPTR "\n"
5020 "_lj_vm_asm_begin.eh:\n"
5022 "\t.set L$set$1,LEFDE1-LASFDE1\n"
5025 "\t.long LASFDE1-EH_frame1\n"
5026 "\t.long _lj_vm_asm_begin-.\n"
5028 "\t.byte 0\n" /* augmentation length */
5029 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
5031 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
5032 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
5033 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
5034 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
5036 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
5037 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
5038 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
5039 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
5041 "\t.align " BSZPTR "\n"
5042 "LEFDE1:\n\n", (int)ctx->codesz, CFRAME_SIZE);
5044 "\t.non_lazy_symbol_pointer\n"
5045 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
5046 ".indirect_symbol _lj_err_unwind_dwarf\n"
5049 default: /* Difficult for other modes. */