1 |// Low-level VM code for ARM CPUs.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6 |.section code_op, code_sub
8 |.actionlist build_actionlist
10 |.globalnames globnames
11 |.externnames extnames
13 |// Note: The ragged indentation of the instructions is intentional.
14 |// The starting columns indicate data dependencies.
16 |//-----------------------------------------------------------------------
18 |// Fixed register assignments for the interpreter.
20 |// The following must be C callee-save (but BASE is often refetched).
21 |.define BASE, r4 // Base of current Lua stack frame.
22 |.define KBASE, r5 // Constants of current Lua function.
23 |.define PC, r6 // Next PC.
24 |.define DISPATCH, r7 // Opcode dispatch table.
25 |.define LREG, r8 // Register holding lua_State (also in SAVE_L).
26 |.define MASKR8, r9 // 255*8 constant for fast bytecode decoding.
28 |// The following temporaries are not saved across C calls, except for RA/RC.
29 |.define RA, r10 // Callee-save.
30 |.define RC, r11 // Callee-save.
32 |.define OP, r12 // Overlaps RB, must not be lr.
35 |// Calling conventions. Also used as temporaries.
40 |.define CARG12, r0 // For 1st soft-fp double.
41 |.define CARG34, r2 // For 2nd soft-fp double.
46 |// Stack layout while in interpreter. Must match with lj_frame.h.
47 |.define CFRAME_SPACE, #28
48 |.define SAVE_ERRF, [sp, #24]
49 |.define SAVE_NRES, [sp, #20]
50 |.define SAVE_CFRAME, [sp, #16]
51 |.define SAVE_L, [sp, #12]
52 |.define SAVE_PC, [sp, #8]
53 |.define SAVE_MULTRES, [sp, #4]
57 | push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
58 | sub sp, sp, CFRAME_SPACE
60 |.macro restoreregs_ret
61 | add sp, sp, CFRAME_SPACE
62 | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
65 |// Type definitions. Some of these are only used for documentation.
66 |.type L, lua_State, LREG
67 |.type GL, global_State
80 |//-----------------------------------------------------------------------
82 |// Trap for not-yet-implemented parts.
83 |.macro NYI; ud; .endmacro
85 |//-----------------------------------------------------------------------
87 |// Access to frame relative to BASE.
88 |.define FRAME_FUNC, #-8
89 |.define FRAME_PC, #-4
91 |.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro
92 |.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro
93 |.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro
94 |.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro
96 |// Instruction fetch.
103 |// Instruction decode+dispatch.
105 | ldr OP, [DISPATCH, OP, lsl #2]
116 |// Instruction footer.
118 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
119 | .define ins_next, ins_NEXT
120 | .define ins_next_, ins_NEXT
121 | .define ins_next1, ins_NEXT1
122 | .define ins_next2, ins_NEXT2
123 | .define ins_next3, ins_NEXT3
125 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
126 | // Affects only certain kinds of benchmarks (and only with -j off).
143 |// Avoid register name substitution for field name.
146 |// Call decode and dispatch.
148 | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
149 | ldr PC, LFUNC:CARG3->field_pc
152 | ldr OP, [DISPATCH, OP, lsl #2]
159 | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
160 | str PC, [BASE, FRAME_PC]
164 |//-----------------------------------------------------------------------
166 |// Macros to test operand types.
167 |.macro checktp, reg, tp; cmn reg, #-tp; .endmacro
168 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro
169 |.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro
170 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro
172 |// Assumes DISPATCH is relative to GL.
173 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
174 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
176 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
186 |// Set current VM state.
187 |.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro
188 |.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro
190 |//-----------------------------------------------------------------------
192 /* Generate subroutines used by opcodes and other parts of the VM. */
193 /* The .code_sub section should be last to help static branch prediction. */
194 static void build_subroutines(BuildCtx *ctx)
198 |//-----------------------------------------------------------------------
199 |//-- Return handling ----------------------------------------------------
200 |//-----------------------------------------------------------------------
203 | // See vm_return. Also: RB = previous base.
205 | beq ->cont_dispatch
207 | // Return from pcall or xpcall fast func.
208 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
209 | mvn CARG2, #~LJ_TTRUE
211 | // Prepending may overwrite the pcall frame, so do it at the end.
212 | str CARG2, [RA, FRAME_PC] // Prepend true to results.
216 | add RC, RC, #8 // RC = (nresults+1)*8.
217 | ands CARG1, PC, #FRAME_TYPE
218 | str RC, SAVE_MULTRES
219 | beq ->BC_RET_Z // Handle regular return to Lua.
222 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
223 | // CARG1 = PC & FRAME_TYPE
224 | bic RB, PC, #FRAME_TYPEP
225 | cmp CARG1, #FRAME_C
226 | sub RB, BASE, RB // RB = previous base.
230 | ldr KBASE, SAVE_NRES
231 | mv_vmstate CARG4, C
234 | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
238 | subs CARG3, CARG3, #8
239 | ldrd CARG12, [RA], #8
240 | strd CARG12, [BASE], #8
243 | cmp KBASE, RC // More/less results wanted?
246 | str BASE, L->top // Store new top.
249 | ldr RC, SAVE_CFRAME // Restore previous C frame.
250 | mov CRET1, #0 // Ok return status for vm_pcall.
257 | blt >7 // Less results wanted?
258 | // More results wanted. Check stack size and fill up results with nil.
259 | ldr CARG3, L->maxstack
260 | mvn CARG2, #~LJ_TNIL
263 | str CARG2, [BASE, #4]
268 |7: // Less results wanted.
269 | sub CARG1, RC, KBASE
270 | cmp KBASE, #0 // LUA_MULTRET+1 case?
271 | subne BASE, BASE, CARG1 // Either keep top or shrink it.
274 |8: // Corner case: need to grow stack for filling up results.
275 | // This can happen if:
276 | // - A C function grows the stack (a lot).
277 | // - The GC shrinks the stack in between.
278 | // - A return back from a lua_call() with (high) nresults adjustment.
279 | str BASE, L->top // Save current top held in BASE (yes).
282 | bl extern lj_state_growstack // (lua_State *L, int n)
283 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
286 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
288 |->vm_unwind_c_eh: // Landing pad for external unwinder.
291 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
293 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
296 |//-----------------------------------------------------------------------
297 |//-- Grow stack for calls -----------------------------------------------
298 |//-----------------------------------------------------------------------
300 |->vm_growstack_c: // Grow stack for C function.
303 |->vm_growstack_l: // Grow stack for Lua function.
306 |//-----------------------------------------------------------------------
307 |//-- Entry points into the assembler VM ---------------------------------
308 |//-----------------------------------------------------------------------
310 |->vm_resume: // Setup C frame and resume thread.
313 |->vm_pcall: // Setup protected C frame and enter VM.
314 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
317 | str CARG4, SAVE_ERRF
320 |->vm_call: // Setup C frame and enter VM.
321 | // (lua_State *L, TValue *base, int nres1)
325 |1: // Entry point for vm_pcall above (PC = ftype).
326 | ldr RC, L:CARG1->cframe
327 | str CARG3, SAVE_NRES
331 | str sp, L->cframe // Add our C frame to cframe chain.
332 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
333 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
334 | str RC, SAVE_CFRAME
335 | add DISPATCH, DISPATCH, #GG_G2DISP
337 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
338 | ldr RB, L->base // RB = old base (for vmeta_call).
342 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
343 | sub PC, PC, RB // PC = frame delta + frame type
344 | mv_vmstate CARG2, INTERP
345 | sub NARGS8:RC, CARG1, BASE
349 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
350 | ldrd CARG34, [BASE, FRAME_FUNC]
351 | checkfunc CARG4, ->vmeta_call
353 |->vm_call_dispatch_f:
355 | // BASE = new base, RC = nargs*8
357 |->vm_cpcall: // Setup protected C frame, call C.
358 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
361 | ldr RA, L:CARG1->stack
364 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
366 | sub RA, RA, RB // Compute -savestack(L, L->top).
367 | str sp, L->cframe // Add our C frame to cframe chain.
369 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
370 | str RB, SAVE_ERRF // No error function.
371 | str RC, SAVE_CFRAME
372 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
373 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
376 | add DISPATCH, DISPATCH, #GG_G2DISP
377 | bne <3 // Else continue with the call.
378 | b ->vm_leave_cp // No base? Just remove C frame.
380 |//-----------------------------------------------------------------------
381 |//-- Metamethod handling ------------------------------------------------
382 |//-----------------------------------------------------------------------
384 |//-- Continuation dispatch ----------------------------------------------
392 |//-- Table indexing metamethods -----------------------------------------
403 |//-----------------------------------------------------------------------
414 |//-- Comparison metamethods ---------------------------------------------
422 |->cont_ra: // RA = resultptr
425 |->cont_condt: // RA = resultptr
428 |->cont_condf: // RA = resultptr
434 |//-- Arithmetic metamethods ---------------------------------------------
454 |//-- Call metamethod ----------------------------------------------------
456 |->vmeta_call: // Resolve and call __call metamethod.
459 |->vmeta_callt: // Resolve __call for BC_CALLT.
462 |//-- Argument coercion for 'for' statement ------------------------------
467 |//-----------------------------------------------------------------------
468 |//-- Fast functions -----------------------------------------------------
469 |//-----------------------------------------------------------------------
475 |.macro .ffunc_1, name
480 |.macro .ffunc_2, name
485 |.macro .ffunc_n, name
490 |.macro .ffunc_nn, name
499 |//-- Base library: checks -----------------------------------------------
507 |//-- Base library: getters and setters ---------------------------------
509 |.ffunc_1 getmetatable
512 |.ffunc_2 setmetatable
518 |//-- Base library: conversions ------------------------------------------
526 |//-- Base library: iterators -------------------------------------------
540 |//-- Base library: catch errors ----------------------------------------
548 |//-- Coroutine library --------------------------------------------------
550 |.macro coroutine_resume_wrap, resume
552 |.ffunc_1 coroutine_resume
554 |.ffunc coroutine_wrap_aux
559 | coroutine_resume_wrap 1 // coroutine.resume
560 | coroutine_resume_wrap 0 // coroutine.wrap
562 |.ffunc coroutine_yield
565 |//-- Math library -------------------------------------------------------
579 |.macro math_extern, func
580 | .ffunc math_ .. func
584 |.macro math_extern2, func
585 | .ffunc math_ .. func
589 |.macro math_round, func
590 | .ffunc math_ .. func
627 |.macro math_minmax, name, cmpop
632 | math_minmax math_min, NYI
633 | math_minmax math_max, NYI
635 |//-- String library -----------------------------------------------------
640 |.ffunc string_byte // Only handle the 1-arg case here.
643 |.ffunc string_char // Only handle the 1-arg case here.
649 |.ffunc string_rep // Only handle the 1-char case inline.
652 |.ffunc string_reverse
655 |.macro ffstring_case, name, lo
660 |ffstring_case string_lower, 65
661 |ffstring_case string_upper, 97
663 |//-- Table library ------------------------------------------------------
668 |//-- Bit library --------------------------------------------------------
670 |.macro .ffunc_bit, name
671 | .ffunc_n bit_..name
680 |.macro .ffunc_bit_op, name, ins
685 |.ffunc_bit_op band, and
686 |.ffunc_bit_op bor, or
687 |.ffunc_bit_op bxor, xor
695 |.macro .ffunc_bit_sh, name, ins, shmod
696 | .ffunc_nn bit_..name
700 |.ffunc_bit_sh lshift, NYI, 1
701 |.ffunc_bit_sh rshift, NYI, 1
702 |.ffunc_bit_sh arshift, NYI, 1
703 |.ffunc_bit_sh rol, NYI, 2
704 |.ffunc_bit_sh ror, NYI, 0
706 |//-----------------------------------------------------------------------
708 |->fff_fallback: // Call fast function fallback handler.
711 |->fff_gcstep: // Call GC step function.
714 |//-----------------------------------------------------------------------
715 |//-- Special dispatch targets -------------------------------------------
716 |//-----------------------------------------------------------------------
718 |->vm_record: // Dispatch target for recording phase.
723 |->vm_rethook: // Dispatch target for return hooks.
726 |->vm_inshook: // Dispatch target for instr/line hooks.
729 |->cont_hook: // Continue from hook yield.
732 |->vm_hotloop: // Hot loop counter underflow.
737 |->vm_callhook: // Dispatch target for call hooks.
740 |->vm_hotcall: // Hot call counter underflow.
743 |//-----------------------------------------------------------------------
744 |//-- Trace exit handler -------------------------------------------------
745 |//-----------------------------------------------------------------------
756 |//-----------------------------------------------------------------------
757 |//-- Math helper functions ----------------------------------------------
758 |//-----------------------------------------------------------------------
760 |// FP value rounding. Called by math.floor/math.ceil fast functions
761 |// and from JIT code.
763 |.macro vm_round, name, mode
768 | vm_round vm_floor, 0
769 | vm_round vm_ceil, 1
771 | vm_round vm_trunc, 2
786 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
787 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
788 |// and basic math functions. ORDER ARITH
792 |//-----------------------------------------------------------------------
793 |//-- Miscellaneous functions --------------------------------------------
794 |//-----------------------------------------------------------------------
796 |//-----------------------------------------------------------------------
797 |//-- FFI helper functions -----------------------------------------------
798 |//-----------------------------------------------------------------------
805 |//-----------------------------------------------------------------------
808 /* Generate the code for a single instruction. */
809 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
816 /* -- Comparison ops ---------------------------------------------------- */
818 /* Remember: all ops branch for a true comparison, fall through otherwise. */
820 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
824 case BC_ISEQV: case BC_ISNEV:
829 case BC_ISEQS: case BC_ISNES:
834 case BC_ISEQN: case BC_ISNEN:
839 case BC_ISEQP: case BC_ISNEP:
844 /* -- Unary test and copy ops ------------------------------------------- */
846 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
850 /* -- Unary ops --------------------------------------------------------- */
865 /* -- Binary ops -------------------------------------------------------- */
867 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
870 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
873 case BC_MULVN: case BC_MULNV: case BC_MULVV:
876 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
882 case BC_MODNV: case BC_MODVV:
893 /* -- Constant ops ------------------------------------------------------ */
916 /* -- Upvalue and function ops ------------------------------------------ */
942 /* -- Table ops --------------------------------------------------------- */
978 /* -- Calls and vararg handling ----------------------------------------- */
1010 /* -- Returns ----------------------------------------------------------- */
1017 | // RA = results*8, RC = nresults+1
1018 | ldr PC, [BASE, FRAME_PC]
1021 | str RC, SAVE_MULTRES
1023 | ands CARG1, PC, #FRAME_TYPE
1024 | eor CARG2, PC, #FRAME_VARG
1025 | ldreq INS, [PC, #-4]
1029 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
1032 |->BC_RETV1_Z: // Non-standard return case.
1035 | tst CARG2, #FRAME_TYPEP
1037 | // Return from vararg function: relocate BASE down.
1038 | sub BASE, BASE, CARG2
1039 | ldr PC, [BASE, FRAME_PC]
1043 case BC_RET0: case BC_RET1:
1044 | // RA = results*8, RC = nresults+1
1045 | ldr PC, [BASE, FRAME_PC]
1047 | str RC, SAVE_MULTRES
1048 | ands CARG1, PC, #FRAME_TYPE
1049 | eor CARG2, PC, #FRAME_VARG
1050 | ldreq INS, [PC, #-4]
1052 if (op == BC_RET1) {
1053 | ldrd CARG12, [BASE, RA]
1055 | sub CARG4, BASE, #8
1056 | decode_RA8 RA, INS
1057 if (op == BC_RET1) {
1058 | strd CARG12, [CARG4]
1060 | sub BASE, CARG4, RA
1061 | decode_RB8 RB, INS
1062 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
1066 | ldr CARG2, LFUNC:CARG1->field_pc
1069 | ldr KBASE, [CARG2, #PC2PROTO(k)]
1072 |6: // Fill up results with nil.
1073 | sub CARG2, CARG4, #4
1074 | mvn CARG3, #~LJ_TNIL
1075 | str CARG3, [CARG2, RC]
1080 /* -- Loops and branches ------------------------------------------------ */
1086 | // Fall through. Assumes BC_IFORL follows.
1096 vk = (op == BC_IFORL || op == BC_JFORL);
1104 | // Fall through. Assumes BC_IITERL follows.
1133 /* -- Function headers -------------------------------------------------- */
1139 case BC_FUNCV: /* NYI: compiled vararg functions. */
1140 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
1148 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
1149 | ldr CARG1, L->maxstack
1150 | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)]
1151 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
1153 | bhi ->vm_growstack_l
1157 | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters.
1159 if (op == BC_JFUNCF) {
1165 |3: // Clear missing parameters.
1166 | mvn CARG1, #~LJ_TNIL
1167 | str CARG1, [BASE, NARGS8:RC]
1168 | add NARGS8:RC, NARGS8:RC, #8
1176 | NYI // NYI: compiled vararg functions
1177 break; /* NYI: compiled vararg functions. */
1180 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
1181 | ldr CARG1, L->maxstack
1182 | add CARG4, BASE, RC
1184 | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC.
1185 | add CARG2, RC, #8+FRAME_VARG
1186 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
1188 | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG.
1189 | bhs ->vm_growstack_l
1190 | ldrb RB, [PC, #-4+PC2PROTO(numparams)]
1194 | add BASE, CARG4, #8
1196 | mvn CARG3, #~LJ_TNIL
1198 | cmp RA, RC // Less args than parameters?
1199 | ldrdlo CARG12, [RA], #8
1200 | mvnhs CARG2, CARG3
1201 | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC).
1204 | strd CARG12, [CARG4, #8]!
1212 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
1213 if (op == BC_FUNCC) {
1214 | ldr CARG4, CFUNC:CARG3->f
1216 | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)]
1218 | add CARG2, RA, NARGS8:RC
1219 | ldr CARG1, L->maxstack
1220 | add RC, BASE, NARGS8:RC
1224 if (op == BC_FUNCCW) {
1225 | ldr CARG2, CFUNC:CARG3->f
1227 | mv_vmstate CARG3, C
1229 | bhi ->vm_growstack_c // Need to grow stack.
1231 | blx CARG4 // (lua_State *L [, lua_CFunction f])
1232 | // Returns nresults.
1234 | mv_vmstate CARG3, INTERP
1238 | ldr PC, [BASE, FRAME_PC]
1239 | sub RA, CRET2, RC // RA = L->top - nresults*8
1243 /* ---------------------------------------------------------------------- */
1246 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
1252 static int build_backend(BuildCtx *ctx)
1256 dasm_growpc(Dst, BC__MAX);
1258 build_subroutines(ctx);
1261 for (op = 0; op < BC__MAX; op++)
1262 build_ins(ctx, (BCOp)op, op);
1267 /* Emit pseudo frame-info for all assembler functions. */
1268 static void emit_asm_debug(BuildCtx *ctx)
1271 switch (ctx->mode) {
1273 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
1276 "\t.long .LECIE0-.LSCIE0\n"
1278 "\t.long 0xffffffff\n"
1283 "\t.byte 0xe\n" /* Return address is in lr. */
1284 "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */
1289 "\t.long .LEFDE0-.LASFDE0\n"
1291 "\t.long .Lframe0\n"
1294 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
1295 "\t.byte 0x8e\n\t.uleb128 1\n", /* Restore lr. */
1296 (int)ctx->codesz, CFRAME_SIZE);
1297 for (i = 11; i >= 4; i--) /* Restore r4-r11. */
1298 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
1302 /* NYI: emit ARM.exidx. */