1 |// Low-level VM code for ARM CPUs.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6 |.section code_op, code_sub
8 |.actionlist build_actionlist
10 |.globalnames globnames
11 |.externnames extnames
13 |// Note: The ragged indentation of the instructions is intentional.
14 |// The starting columns indicate data dependencies.
16 |//-----------------------------------------------------------------------
18 |// Fixed register assignments for the interpreter.
20 |// The following must be C callee-save (but BASE is often refetched).
21 |.define BASE, r4 // Base of current Lua stack frame.
22 |.define KBASE, r5 // Constants of current Lua function.
23 |.define PC, r6 // Next PC.
24 |.define DISPATCH, r7 // Opcode dispatch table.
25 |.define LREG, r8 // Register holding lua_State (also in SAVE_L).
26 |.define MASKR8, r9 // 255*8 constant for fast bytecode decoding.
28 |// The following temporaries are not saved across C calls, except for RA/RC.
29 |.define RA, r10 // Callee-save.
30 |.define RC, r11 // Callee-save.
32 |.define OP, r12 // Overlaps RB, must not be lr.
35 |// Calling conventions. Also used as temporaries.
40 |.define CARG12, r0 // For 1st soft-fp double.
41 |.define CARG34, r2 // For 2nd soft-fp double.
46 |// Stack layout while in interpreter. Must match with lj_frame.h.
47 |.define CFRAME_SPACE, #28
48 |.define SAVE_ERRF, [sp, #24]
49 |.define SAVE_NRES, [sp, #20]
50 |.define SAVE_CFRAME, [sp, #16]
51 |.define SAVE_L, [sp, #12]
52 |.define SAVE_PC, [sp, #8]
53 |.define SAVE_MULTRES, [sp, #4]
57 | push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
58 | sub sp, sp, CFRAME_SPACE
60 |.macro restoreregs_ret
61 | add sp, sp, CFRAME_SPACE
62 | pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
65 |// Type definitions. Some of these are only used for documentation.
66 |.type L, lua_State, LREG
67 |.type GL, global_State
80 |//-----------------------------------------------------------------------
82 |// Trap for not-yet-implemented parts.
83 |.macro NYI; ud; .endmacro
85 |//-----------------------------------------------------------------------
87 |// Access to frame relative to BASE.
88 |.define FRAME_FUNC, #-8
89 |.define FRAME_PC, #-4
91 |.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro
92 |.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro
93 |.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro
94 |.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro
95 |.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro
97 |// Instruction fetch.
104 |// Instruction decode+dispatch.
106 | ldr OP, [DISPATCH, OP, lsl #2]
117 |// Instruction footer.
119 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
120 | .define ins_next, ins_NEXT
121 | .define ins_next_, ins_NEXT
122 | .define ins_next1, ins_NEXT1
123 | .define ins_next2, ins_NEXT2
124 | .define ins_next3, ins_NEXT3
126 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
127 | // Affects only certain kinds of benchmarks (and only with -j off).
144 |// Avoid register name substitution for field name.
147 |// Call decode and dispatch.
149 | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
150 | ldr PC, LFUNC:CARG3->field_pc
151 | ldrb OP, [PC] // STALL: load PC. early PC.
153 | ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP.
160 | // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
161 | str PC, [BASE, FRAME_PC]
162 | ins_callt // STALL: locked PC.
165 |//-----------------------------------------------------------------------
167 |// Macros to test operand types.
168 |.macro checktp, reg, tp; cmn reg, #-tp; .endmacro
169 |.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro
170 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro
171 |.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro
172 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro
174 |// Assumes DISPATCH is relative to GL.
175 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
176 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
178 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
188 |// Set current VM state.
189 |.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro
190 |.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro
192 |//-----------------------------------------------------------------------
195 #error "Only dual-number mode supported for ARM target"
198 /* Generate subroutines used by opcodes and other parts of the VM. */
199 /* The .code_sub section should be last to help static branch prediction. */
200 static void build_subroutines(BuildCtx *ctx)
204 |//-----------------------------------------------------------------------
205 |//-- Return handling ----------------------------------------------------
206 |//-----------------------------------------------------------------------
209 | // See vm_return. Also: RB = previous base.
211 | beq ->cont_dispatch
213 | // Return from pcall or xpcall fast func.
214 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
215 | mvn CARG2, #~LJ_TTRUE
217 | // Prepending may overwrite the pcall frame, so do it at the end.
218 | str CARG2, [RA, FRAME_PC] // Prepend true to results.
222 | add RC, RC, #8 // RC = (nresults+1)*8.
223 | ands CARG1, PC, #FRAME_TYPE
224 | str RC, SAVE_MULTRES
225 | beq ->BC_RET_Z // Handle regular return to Lua.
228 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
229 | // CARG1 = PC & FRAME_TYPE
230 | bic RB, PC, #FRAME_TYPEP
231 | cmp CARG1, #FRAME_C
232 | sub RB, BASE, RB // RB = previous base.
236 | ldr KBASE, SAVE_NRES
237 | mv_vmstate CARG4, C
240 | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
244 | subs CARG3, CARG3, #8
245 | ldrd CARG12, [RA], #8
246 | strd CARG12, [BASE], #8
249 | cmp KBASE, RC // More/less results wanted?
252 | str BASE, L->top // Store new top.
255 | ldr RC, SAVE_CFRAME // Restore previous C frame.
256 | mov CRET1, #0 // Ok return status for vm_pcall.
263 | blt >7 // Less results wanted?
264 | // More results wanted. Check stack size and fill up results with nil.
265 | ldr CARG3, L->maxstack
266 | mvn CARG2, #~LJ_TNIL
269 | str CARG2, [BASE, #4]
274 |7: // Less results wanted.
275 | sub CARG1, RC, KBASE
276 | cmp KBASE, #0 // LUA_MULTRET+1 case?
277 | subne BASE, BASE, CARG1 // Either keep top or shrink it.
280 |8: // Corner case: need to grow stack for filling up results.
281 | // This can happen if:
282 | // - A C function grows the stack (a lot).
283 | // - The GC shrinks the stack in between.
284 | // - A return back from a lua_call() with (high) nresults adjustment.
285 | str BASE, L->top // Save current top held in BASE (yes).
288 | bl extern lj_state_growstack // (lua_State *L, int n)
289 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
292 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
294 |->vm_unwind_c_eh: // Landing pad for external unwinder.
297 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
299 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
302 |//-----------------------------------------------------------------------
303 |//-- Grow stack for calls -----------------------------------------------
304 |//-----------------------------------------------------------------------
306 |->vm_growstack_c: // Grow stack for C function.
309 |->vm_growstack_l: // Grow stack for Lua function.
312 |//-----------------------------------------------------------------------
313 |//-- Entry points into the assembler VM ---------------------------------
314 |//-----------------------------------------------------------------------
316 |->vm_resume: // Setup C frame and resume thread.
319 |->vm_pcall: // Setup protected C frame and enter VM.
320 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
323 | str CARG4, SAVE_ERRF
326 |->vm_call: // Setup C frame and enter VM.
327 | // (lua_State *L, TValue *base, int nres1)
331 |1: // Entry point for vm_pcall above (PC = ftype).
332 | ldr RC, L:CARG1->cframe
333 | str CARG3, SAVE_NRES
337 | str sp, L->cframe // Add our C frame to cframe chain.
338 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
339 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
340 | str RC, SAVE_CFRAME
341 | add DISPATCH, DISPATCH, #GG_G2DISP
343 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
344 | ldr RB, L->base // RB = old base (for vmeta_call).
348 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
349 | sub PC, PC, RB // PC = frame delta + frame type
350 | mv_vmstate CARG2, INTERP
351 | sub NARGS8:RC, CARG1, BASE
355 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
356 | ldrd CARG34, [BASE, FRAME_FUNC]
357 | checkfunc CARG4, ->vmeta_call
359 |->vm_call_dispatch_f:
361 | // BASE = new base, RC = nargs*8
363 |->vm_cpcall: // Setup protected C frame, call C.
364 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
367 | ldr RA, L:CARG1->stack
370 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
372 | sub RA, RA, RB // Compute -savestack(L, L->top).
373 | str sp, L->cframe // Add our C frame to cframe chain.
375 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
376 | str RB, SAVE_ERRF // No error function.
377 | str RC, SAVE_CFRAME
378 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
379 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
382 | add DISPATCH, DISPATCH, #GG_G2DISP
383 | bne <3 // Else continue with the call.
384 | b ->vm_leave_cp // No base? Just remove C frame.
386 |//-----------------------------------------------------------------------
387 |//-- Metamethod handling ------------------------------------------------
388 |//-----------------------------------------------------------------------
390 |//-- Continuation dispatch ----------------------------------------------
398 |//-- Table indexing metamethods -----------------------------------------
409 |//-----------------------------------------------------------------------
420 |//-- Comparison metamethods ---------------------------------------------
428 |->cont_ra: // RA = resultptr
431 |->cont_condt: // RA = resultptr
434 |->cont_condf: // RA = resultptr
440 |//-- Arithmetic metamethods ---------------------------------------------
445 | add CARG3, BASE, RB
446 | add CARG4, KBASE, RC
452 | add CARG4, BASE, RB
453 | add CARG3, KBASE, RC
457 | add CARG3, BASE, RC
458 | add CARG4, BASE, RC
464 | add CARG3, BASE, RB
465 | add CARG4, BASE, RC
468 | add CARG2, BASE, RA
473 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
474 | // Returns NULL (finished) or TValue * (metamethod).
478 | // Call metamethod for binary op.
480 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
486 |//-- Call metamethod ----------------------------------------------------
488 |->vmeta_call: // Resolve and call __call metamethod.
491 |->vmeta_callt: // Resolve __call for BC_CALLT.
494 |//-- Argument coercion for 'for' statement ------------------------------
501 | bl extern lj_meta_for // (lua_State *L, TValue *base)
516 |//-----------------------------------------------------------------------
517 |//-- Fast functions -----------------------------------------------------
518 |//-----------------------------------------------------------------------
524 |.macro .ffunc_1, name
526 | ldrd CARG12, [BASE]
531 |.macro .ffunc_2, name
533 | ldrd CARG12, [BASE]
534 | ldrd CARG34, [BASE, #8]
539 |.macro .ffunc_n, name
544 |.macro .ffunc_nn, name
553 |//-- Base library: checks -----------------------------------------------
561 |//-- Base library: getters and setters ---------------------------------
563 |.ffunc_1 getmetatable
566 |.ffunc_2 setmetatable
572 |//-- Base library: conversions ------------------------------------------
578 | // Only handles the string or number case inline.
579 | checktp CARG2, LJ_TSTR
580 | // A __tostring method in the string base metatable is ignored.
582 | // Handle numbers inline, unless a number base metatable is present.
583 | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])]
585 | checktp CARG2, LJ_TISNUM
591 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o)
592 | // Returns GCstr *.
594 | mvn CARG2, #~LJ_TSTR
597 |//-- Base library: iterators -------------------------------------------
611 |//-- Base library: catch errors ----------------------------------------
619 |//-- Coroutine library --------------------------------------------------
621 |.macro coroutine_resume_wrap, resume
623 |.ffunc_1 coroutine_resume
625 |.ffunc coroutine_wrap_aux
630 | coroutine_resume_wrap 1 // coroutine.resume
631 | coroutine_resume_wrap 0 // coroutine.wrap
633 |.ffunc coroutine_yield
636 |//-- Math library -------------------------------------------------------
642 | // CARG12 = TValue result.
643 | ldr PC, [BASE, FRAME_PC]
644 | strd CARG12, [BASE, #-8]
649 | // RC = (nresults+1)*8, PC = return.
650 | ands CARG1, PC, #FRAME_TYPE
651 | ldreq INS, [PC, #-4]
652 | str RC, SAVE_MULTRES
657 | cmp RB, RC // More results expected?
659 | decode_RA8 CARG1, INS
662 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
663 | sub BASE, RA, CARG1
666 |6: // Fill up results with nil.
668 | mvn CARG1, #~LJ_TNIL
670 | str CARG1, [CARG2, #-4]
673 |.macro math_extern, func
674 | .ffunc math_ .. func
678 |.macro math_extern2, func
679 | .ffunc math_ .. func
683 |.macro math_round, func
684 | .ffunc math_ .. func
721 |.macro math_minmax, name, cmpop
726 | math_minmax math_min, NYI
727 | math_minmax math_max, NYI
729 |//-- String library -----------------------------------------------------
734 |.ffunc string_byte // Only handle the 1-arg case here.
737 |.ffunc string_char // Only handle the 1-arg case here.
743 |.ffunc string_rep // Only handle the 1-char case inline.
746 |.ffunc string_reverse
749 |.macro ffstring_case, name, lo
754 |ffstring_case string_lower, 65
755 |ffstring_case string_upper, 97
757 |//-- Table library ------------------------------------------------------
762 |//-- Bit library --------------------------------------------------------
764 |.macro .ffunc_bit, name
765 | .ffunc_n bit_..name
774 |.macro .ffunc_bit_op, name, ins
779 |.ffunc_bit_op band, and
780 |.ffunc_bit_op bor, or
781 |.ffunc_bit_op bxor, eor
789 |.macro .ffunc_bit_sh, name, ins, shmod
790 | .ffunc_nn bit_..name
794 |.ffunc_bit_sh lshift, NYI, 1
795 |.ffunc_bit_sh rshift, NYI, 1
796 |.ffunc_bit_sh arshift, NYI, 1
797 |.ffunc_bit_sh rol, NYI, 2
798 |.ffunc_bit_sh ror, NYI, 0
800 |//-----------------------------------------------------------------------
802 |->fff_fallback: // Call fast function fallback handler.
803 | // BASE = new base, RC = nargs*8
804 | ldr CARG3, [BASE, FRAME_FUNC]
805 | ldr CARG2, L->maxstack
806 | add CARG1, BASE, NARGS8:RC
807 | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC.
809 | ldr CARG3, CFUNC:CARG3->f
811 | add CARG1, CARG1, #8*LUA_MINSTACK
812 | str PC, SAVE_PC // Redundant (but a defined value).
815 | bhi >5 // Need to grow stack.
816 | blx CARG3 // (lua_State *L)
817 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
821 | bgt ->fff_res // Returned nresults+1?
822 |1: // Returned 0 or -1: retry fast path.
824 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
825 | sub NARGS8:RC, CARG1, BASE
826 | bne >2 // Returned -1?
827 | ins_callt // Returned 0: retry fast path.
829 |2: // Reconstruct previous base for vmeta_call during tailcall.
830 | ands CARG1, PC, #FRAME_TYPE
831 | bic CARG2, PC, #FRAME_TYPEP
832 | ldreq INS, [PC, #-4]
833 | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8.
834 | sub RB, BASE, CARG2
835 | b ->vm_call_dispatch // Resolve again for tailcall.
837 |5: // Grow stack for fallback handler.
838 | mov CARG2, #LUA_MINSTACK
839 | bl extern lj_state_growstack // (lua_State *L, int n)
841 | cmp CARG1, CARG1 // Set zero-flag to force retry.
844 |->fff_gcstep: // Call GC step function.
847 |//-----------------------------------------------------------------------
848 |//-- Special dispatch targets -------------------------------------------
849 |//-----------------------------------------------------------------------
851 |->vm_record: // Dispatch target for recording phase.
856 |->vm_rethook: // Dispatch target for return hooks.
859 |->vm_inshook: // Dispatch target for instr/line hooks.
862 |->cont_hook: // Continue from hook yield.
865 |->vm_hotloop: // Hot loop counter underflow.
870 |->vm_callhook: // Dispatch target for call hooks.
873 |->vm_hotcall: // Hot call counter underflow.
876 |//-----------------------------------------------------------------------
877 |//-- Trace exit handler -------------------------------------------------
878 |//-----------------------------------------------------------------------
889 |//-----------------------------------------------------------------------
890 |//-- Math helper functions ----------------------------------------------
891 |//-----------------------------------------------------------------------
893 |// FP value rounding. Called by math.floor/math.ceil fast functions
894 |// and from JIT code.
896 |.macro vm_round, name, mode
901 | vm_round vm_floor, 0
902 | vm_round vm_ceil, 1
904 | vm_round vm_trunc, 2
922 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
923 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
924 |// and basic math functions. ORDER ARITH
928 | blo extern __aeabi_dadd
929 | beq extern __aeabi_dsub
931 | blo extern __aeabi_dmul
932 | beq extern __aeabi_ddiv
937 | eorlo CARG2, CARG2, #0x80000000
938 | biceq CARG2, CARG2, #0x80000000
940 | NYI // Other operations only needed by JIT compiler.
942 |//-----------------------------------------------------------------------
943 |//-- Miscellaneous functions --------------------------------------------
944 |//-----------------------------------------------------------------------
946 |//-----------------------------------------------------------------------
947 |//-- FFI helper functions -----------------------------------------------
948 |//-----------------------------------------------------------------------
955 |//-----------------------------------------------------------------------
958 /* Generate the code for a single instruction. */
959 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
966 /* -- Comparison ops ---------------------------------------------------- */
968 /* Remember: all ops branch for a true comparison, fall through otherwise. */
970 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
974 case BC_ISEQV: case BC_ISNEV:
979 case BC_ISEQS: case BC_ISNES:
984 case BC_ISEQN: case BC_ISNEN:
989 case BC_ISEQP: case BC_ISNEP:
994 /* -- Unary test and copy ops ------------------------------------------- */
996 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
1000 /* -- Unary ops --------------------------------------------------------- */
1003 | // RA = dst*8, RC = src
1006 | ldrd CARG12, [BASE, RC]
1008 | strd CARG12, [BASE, RA]
1012 | // RA = dst*8, RC = src
1013 | add RC, BASE, RC, lsl #3
1015 | ldr CARG1, [RC, #4]
1018 | checktp CARG1, LJ_TTRUE
1019 | mvnls CARG2, #~LJ_TFALSE
1020 | mvnhi CARG2, #~LJ_TTRUE
1021 | str CARG2, [RA, #4]
1025 | // RA = dst*8, RC = src
1027 | ldrd CARG12, [BASE, RC]
1030 | checktp CARG2, LJ_TISNUM
1032 | rsbs CARG1, CARG1, #0
1035 | strd CARG12, [BASE, RA]
1038 | mov CARG2, #0x01e00000 // 2^31.
1040 | orr CARG2, CARG2, #0x40000000
1044 | add CARG2, CARG2, #0x80000000
1048 | // RA = dst*8, RC = src
1050 | ldrd CARG12, [BASE, RC]
1051 | checkstr CARG2, >2
1052 | ldr CARG1, STR:CARG1->len
1054 | mvn CARG2, #~LJ_TISNUM
1057 | strd CARG12, [BASE, RA]
1060 | checktab CARG2, ->vmeta_len
1061 | bl extern lj_tab_len // (GCtab *t)
1062 | // Returns uint32_t (but less than 2^31).
1066 /* -- Binary ops -------------------------------------------------------- */
1068 |.macro ins_arithcheck, cond, ncond, target
1070 | cmn CARG4, #-LJ_TISNUM
1071 | cmn..cond CARG2, #-LJ_TISNUM
1073 | cmn CARG2, #-LJ_TISNUM
1074 | cmn..cond CARG4, #-LJ_TISNUM
1078 |.macro ins_arithcheck_int, target
1079 | ins_arithcheck eq, ne, target
1081 |.macro ins_arithcheck_num, target
1082 | ins_arithcheck lo, hs, target
1085 |.macro ins_arithpre
1086 | decode_RB8 RB, INS
1087 | decode_RC8 RC, INS
1088 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
1089 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
1092 | ldrd CARG12, [BASE, RB]
1093 | ldrd CARG34, [KBASE, RC]
1096 | ldrd CARG34, [BASE, RB]
1097 | ldrd CARG12, [KBASE, RC]
1100 | ldrd CARG12, [BASE, RB]
1101 | ldrd CARG34, [BASE, RC]
1106 |.macro ins_arithfallback, ins
1109 | ins ->vmeta_arith_vn
1112 | ins ->vmeta_arith_nv
1115 | ins ->vmeta_arith_vv
1120 |.macro ins_arithdn, intins, fpcall
1123 | ins_arithcheck_int >5
1124 |.if "intins" == "smull"
1125 | smull CARG1, RC, CARG3, CARG1
1126 | cmp RC, CARG1, asr #31
1127 | ins_arithfallback bne
1129 | intins CARG1, CARG1, CARG3
1130 | ins_arithfallback bvs
1134 | strd CARG12, [BASE, RA]
1137 | ins_arithfallback ins_arithcheck_num
1143 |.macro ins_arithfp, fpcall
1145 ||if (op == BC_MODVN) {
1148 | ins_arithfallback ins_arithcheck_num
1152 | strd CARG12, [BASE, RA]
1156 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
1157 | ins_arithdn adds, extern __aeabi_dadd
1159 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
1160 | ins_arithdn subs, extern __aeabi_dsub
1162 case BC_MULVN: case BC_MULNV: case BC_MULVV:
1163 | ins_arithdn smull, extern __aeabi_dmul
1165 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
1166 | ins_arithfp extern __aeabi_ddiv
1169 | // NYI: integer arithmetic.
1170 | // Note: __aeabi_idivmod is unsuitable. It uses trunc, not floor.
1171 | ins_arithfp ->vm_mod
1173 case BC_MODNV: case BC_MODVV:
1178 | // NYI: (partial) integer arithmetic.
1179 | ins_arithfp extern pow
1186 /* -- Constant ops ------------------------------------------------------ */
1189 | // RA = dst*8, RC = str_const (~)
1192 | ldr CARG1, [KBASE, RC, lsl #2]
1194 | mvn CARG2, #~LJ_TSTR
1195 | strd CARG12, [BASE, RA]
1204 | // RA = dst*8, (RC = int16_literal)
1205 | mov CARG1, INS, asr #16 // Refetch sign-extended reg.
1206 | mvn CARG2, #~LJ_TISNUM
1209 | strd CARG12, [BASE, RA]
1213 | // RA = dst*8, RC = num_const
1216 | ldrd CARG12, [KBASE, RC]
1218 | strd CARG12, [BASE, RA]
1222 | // RA = dst*8, RC = primitive_type (~)
1231 | // RA = base*8, RC = end
1233 | add RC, BASE, RC, lsl #3
1234 | mvn CARG1, #~LJ_TNIL
1235 | str CARG1, [RA, #4]
1238 | str CARG1, [RA, #4]
1245 /* -- Upvalue and function ops ------------------------------------------ */
1271 /* -- Table ops --------------------------------------------------------- */
1275 | // RA = dst*8, RC = (hbits|asize) | tab_const (~)
1276 if (op == BC_TDUP) {
1279 | ldr CARG3, [DISPATCH, #DISPATCH_GL(gc.total)]
1280 | ldr CARG4, [DISPATCH, #DISPATCH_GL(gc.threshold)]
1287 if (op == BC_TNEW) {
1288 | lsl CARG2, RC, #21
1289 | lsr CARG3, RC, #11
1290 | asr RC, CARG2, #21
1291 | lsr CARG2, CARG2, #21
1293 | addeq CARG2, CARG2, #2
1294 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
1295 | // Returns GCtab *.
1297 | ldr CARG2, [KBASE, RC, lsl #2]
1298 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
1299 | // Returns GCtab *.
1302 | mvn CARG2, #~LJ_TTAB
1305 | strd CARG12, [BASE, RA]
1308 | bl extern lj_gc_step_fixtop // (lua_State *L)
1313 | // RA = dst*8, RC = str_const (~)
1315 | // RA = dst*8, RC = str_const (~)
1316 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
1318 | ldr TAB:CARG1, LFUNC:CARG2->env
1319 | ldr STR:RC, [KBASE, RC, lsl #2]
1320 if (op == BC_GGET) {
1331 | decode_RB8 RB, INS
1333 | // RA = dst*8, RB = table*8, RC = str_const (~)
1334 | ldrd CARG12, [BASE, RB]
1336 | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC.
1337 | checktab CARG2, ->vmeta_tgets
1339 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
1340 | ldr CARG3, TAB:CARG1->hmask
1341 | ldr CARG4, STR:RC->hash
1342 | ldr NODE:INS, TAB:CARG1->node
1343 | mov TAB:RB, TAB:CARG1
1344 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
1345 | add CARG3, CARG3, CARG3, lsl #1
1346 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
1348 | ldrd CARG12, NODE:INS->key // STALL: early NODE:INS.
1349 | ldrd CARG34, NODE:INS->val
1350 | ldr NODE:INS, NODE:INS->next
1352 | checktpeq CARG2, LJ_TSTR
1354 | checktp CARG4, LJ_TNIL
1359 | strd CARG34, [BASE, RA]
1362 |4: // Follow hash chain.
1365 | // End of hash chain: key not found, nil result.
1367 |5: // Check for __index if table value is nil.
1368 | ldr TAB:CARG1, TAB:RB->metatable
1369 | mov CARG3, #0 // Optional clear of undef. value (during load stall).
1370 | mvn CARG4, #~LJ_TNIL
1372 | beq <3 // No metatable: done.
1373 | ldrb CARG2, TAB:CARG1->nomm
1374 | tst CARG2, #1<<MM_index
1375 | bne <3 // 'no __index' flag set: done.
1397 /* -- Calls and vararg handling ----------------------------------------- */
1400 | // RA = base*8, (RB = nresults+1,) RC = extra_nargs
1401 | ldr CARG1, SAVE_MULTRES
1402 | decode_RC8 NARGS8:RC, INS
1403 | add NARGS8:RC, NARGS8:RC, CARG1
1407 | // RA = base*8, (RB = nresults+1,) RC = nargs+1
1408 | decode_RC8 NARGS8:RC, INS
1410 | mov RB, BASE // Save old BASE for vmeta_call.
1411 | ldrd CARG34, [BASE, RA]!
1412 | sub NARGS8:RC, NARGS8:RC, #8
1413 | add BASE, BASE, #8
1414 | checkfunc CARG4, ->vmeta_call
1426 | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
1428 | mov RB, BASE // Save old BASE for vmeta_call.
1429 | ldrd CARG34, [RA, #-16]
1430 | ldrd CARG12, [RA, #-8]
1432 | strd CARG34, [RA, #8] // Copy state.
1433 | strd CARG12, [RA, #16] // Copy control var.
1434 | // STALL: locked CARG34.
1435 | ldrd LFUNC:CARG34, [RA, #-24]
1436 | mov NARGS8:RC, #16 // Iterators get 2 arguments.
1437 | // STALL: load CARG34.
1438 | strd LFUNC:CARG34, [RA] // Copy callable.
1439 | checkfunc CARG4, ->vmeta_call
1444 | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
1446 | // NYI: add hotloop, record BC_ITERN.
1449 | ldr TAB:RB, [RA, #-16]
1450 | ldr CARG1, [RA, #-8] // Get index from control var.
1451 | ldr INS, TAB:RB->asize
1452 | ldr CARG2, TAB:RB->array
1454 |1: // Traverse array part.
1455 | subs RC, CARG1, INS
1456 | add CARG3, CARG2, CARG1, lsl #3
1457 | bhs >5 // Index points after array part?
1458 | ldrd CARG34, [CARG3]
1459 | checktp CARG4, LJ_TNIL
1460 | addeq CARG1, CARG1, #1 // Skip holes in array part.
1462 | ldrh RC, [PC, #-2]
1463 | mvn CARG2, #~LJ_TISNUM
1464 | strd CARG34, [RA, #8]
1465 | add RC, PC, RC, lsl #2
1468 | sub PC, RC, #0x20000
1469 | str RB, [RA, #-8] // Update control var.
1473 |5: // Traverse hash part.
1474 | ldr CARG4, TAB:RB->hmask
1475 | ldr NODE:RB, TAB:RB->node
1477 | add CARG1, RC, RC, lsl #1
1478 | cmp RC, CARG4 // End of iteration? Branch to ITERL+1.
1479 | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
1481 | ldrd CARG12, NODE:CARG3->val
1482 | checktp CARG2, LJ_TNIL
1484 | beq <6 // Skip holes in hash part.
1485 | ldrh RB, [PC, #-2]
1487 | ldrd CARG34, NODE:CARG3->key
1488 | str RC, [RA, #-8] // Update control var.
1489 | strd CARG12, [RA, #8]
1490 | add RC, PC, RB, lsl #2
1491 | sub PC, RC, #0x20000
1497 | // RA = base*8, RD = target (points to ITERN)
1499 | add RC, PC, RC, lsl #2
1500 | ldrd CFUNC:CARG12, [RA, #-24]
1501 | ldr CARG3, [RA, #-12]
1502 | ldr CARG4, [RA, #-4]
1503 | checktp CARG2, LJ_TFUNC
1504 | ldrbeq CARG1, CFUNC:CARG1->ffid
1505 | checktpeq CARG3, LJ_TTAB
1506 | checktpeq CARG4, LJ_TNIL
1507 | cmpeq CARG1, #FF_next_N
1508 | subeq PC, RC, #0x20000
1513 | str CARG1, [RA, #-8] // Initialize control var.
1516 |5: // Despecialize bytecode if any of the checks fail.
1517 | mov CARG1, #BC_JMP
1519 | strb CARG1, [PC, #-4]
1520 | sub PC, RC, #0x20000
1521 | strb OP, [PC] // Subsumes ins_next1.
1530 /* -- Returns ----------------------------------------------------------- */
1533 | // RA = results*8, RC = extra results
1534 | ldr CARG1, SAVE_MULTRES
1535 | ldr PC, [BASE, FRAME_PC]
1537 | add RC, CARG1, RC, lsl #3
1542 | // RA = results*8, RC = nresults+1
1543 | ldr PC, [BASE, FRAME_PC]
1547 | str RC, SAVE_MULTRES
1549 | ands CARG1, PC, #FRAME_TYPE
1550 | eor CARG2, PC, #FRAME_VARG
1554 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
1555 | ldr INS, [PC, #-4]
1556 | subs CARG4, RC, #8
1557 | sub CARG3, BASE, #8
1560 | ldrd CARG12, [RA], #8
1561 | add BASE, BASE, #8
1562 | subs CARG4, CARG4, #8
1563 | strd CARG12, [BASE, #-16]
1566 | decode_RA8 RA, INS
1567 | sub BASE, CARG3, RA
1568 | decode_RB8 RB, INS
1569 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
1571 | cmp RB, RC // More results expected?
1573 | ldr CARG2, LFUNC:CARG1->field_pc
1576 | ldr KBASE, [CARG2, #PC2PROTO(k)]
1579 |6: // Fill up results with nil.
1580 | mvn CARG2, #~LJ_TNIL
1581 | sub BASE, BASE, #8
1583 | str CARG2, [BASE, #-12]
1586 |->BC_RETV1_Z: // Non-standard return case.
1589 | tst CARG2, #FRAME_TYPEP
1591 | // Return from vararg function: relocate BASE down.
1592 | sub BASE, BASE, CARG2
1593 | ldr PC, [BASE, FRAME_PC]
1597 case BC_RET0: case BC_RET1:
1598 | // RA = results*8, RC = nresults+1
1599 | ldr PC, [BASE, FRAME_PC]
1601 | str RC, SAVE_MULTRES
1602 | ands CARG1, PC, #FRAME_TYPE
1603 | eor CARG2, PC, #FRAME_VARG
1604 | ldreq INS, [PC, #-4]
1606 if (op == BC_RET1) {
1607 | ldrd CARG12, [BASE, RA]
1609 | sub CARG4, BASE, #8
1610 | decode_RA8 RA, INS
1611 if (op == BC_RET1) {
1612 | strd CARG12, [CARG4]
1614 | sub BASE, CARG4, RA
1615 | decode_RB8 RB, INS
1616 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
1620 | ldr CARG2, LFUNC:CARG1->field_pc
1623 | ldr KBASE, [CARG2, #PC2PROTO(k)]
1626 |6: // Fill up results with nil.
1627 | sub CARG2, CARG4, #4
1628 | mvn CARG3, #~LJ_TNIL
1629 | str CARG3, [CARG2, RC]
1634 /* -- Loops and branches ------------------------------------------------ */
1636 |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
1637 |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
1638 |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
1639 |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
1645 | // Fall through. Assumes BC_IFORL follows.
1655 | // RA = base*8, RC = target (after end of loop or start of loop)
1656 vk = (op == BC_IFORL || op == BC_JFORL);
1657 | ldrd CARG12, [RA, BASE]!
1658 | add RC, PC, RC, lsl #2
1660 | ldrd CARG34, FOR_STOP
1661 | checktp CARG2, LJ_TISNUM
1664 | checktp CARG4, LJ_TISNUM
1665 | ldr CARG4, FOR_STEP
1666 | checktpeq RB, LJ_TISNUM
1672 | ldrd CARG34, FOR_STEP
1673 | checktp CARG2, LJ_TISNUM
1675 | adds CARG1, CARG1, CARG3
1676 | ldr CARG4, FOR_STOP
1677 if (op == BC_IFORL) {
1678 | addvs RC, PC, #0x20000 // Overflow: prevent branch.
1687 if (op == BC_FORI) {
1688 | subgt PC, RC, #0x20000
1689 } else if (op == BC_JFORI) {
1691 } else if (op == BC_IFORL) {
1692 | suble PC, RC, #0x20000
1697 | strd CARG12, FOR_IDX
1701 | strd CARG12, FOR_EXT
1705 |4: // Invert check for negative step.
1715 | cmnlo CARG4, #-LJ_TISNUM
1716 | cmnlo RB, #-LJ_TISNUM
1719 | strd CARG12, FOR_IDX
1724 | bl extern __aeabi_dadd
1725 | strd CARG12, FOR_IDX
1726 | ldrd CARG34, FOR_STOP
1727 | strd CARG12, FOR_EXT
1730 | bl extern __aeabi_cdcmple
1731 if (op == BC_FORI) {
1732 | subhi PC, RC, #0x20000
1733 } else if (op == BC_JFORI) {
1735 } else if (op == BC_IFORL) {
1736 | subls PC, RC, #0x20000
1744 |8: // Invert check for negative step.
1746 | bl extern __aeabi_dadd
1747 | strd CARG12, FOR_IDX
1748 | strd CARG12, FOR_EXT
1752 | ldrd CARG12, FOR_STOP
1760 | // Fall through. Assumes BC_IITERL follows.
1768 | // RA = base*8, RC = target
1769 | ldrd CARG12, [RA, BASE]!
1770 if (op == BC_JITERL) {
1773 | add RC, PC, RC, lsl #2
1774 | // STALL: load CARG12.
1775 | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil.
1776 | subne PC, RC, #0x20000 // Otherwise save control var + branch.
1777 | strdne CARG12, [RA, #-8]
1783 | // RA = base*8, RC = target (loop extent)
1784 | // Note: RA/RC is only used by trace recorder to determine scope/extent
1785 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
1789 | // Fall through. Assumes BC_ILOOP follows.
1793 | // RA = base*8, RC = target (loop extent)
1804 | // RA = base*8 (only used by trace recorder), RC = target
1805 | add RC, PC, RC, lsl #2
1806 | sub PC, RC, #0x20000
1810 /* -- Function headers -------------------------------------------------- */
1816 case BC_FUNCV: /* NYI: compiled vararg functions. */
1817 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
1825 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
1826 | ldr CARG1, L->maxstack
1827 | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)]
1828 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
1830 | bhi ->vm_growstack_l
1834 | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters.
1836 if (op == BC_JFUNCF) {
1842 |3: // Clear missing parameters.
1843 | mvn CARG1, #~LJ_TNIL
1844 | str CARG1, [BASE, NARGS8:RC]
1845 | add NARGS8:RC, NARGS8:RC, #8
1853 | NYI // NYI: compiled vararg functions
1854 break; /* NYI: compiled vararg functions. */
1857 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
1858 | ldr CARG1, L->maxstack
1859 | add CARG4, BASE, RC
1861 | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC.
1862 | add CARG2, RC, #8+FRAME_VARG
1863 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
1865 | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG.
1866 | bhs ->vm_growstack_l
1867 | ldrb RB, [PC, #-4+PC2PROTO(numparams)]
1871 | add BASE, CARG4, #8
1873 | mvn CARG3, #~LJ_TNIL
1875 | cmp RA, RC // Less args than parameters?
1876 | ldrdlo CARG12, [RA], #8
1877 | movhs CARG2, CARG3
1878 | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC).
1881 | strd CARG12, [CARG4, #8]!
1889 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
1890 if (op == BC_FUNCC) {
1891 | ldr CARG4, CFUNC:CARG3->f
1893 | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)]
1895 | add CARG2, RA, NARGS8:RC
1896 | ldr CARG1, L->maxstack
1897 | add RC, BASE, NARGS8:RC
1901 if (op == BC_FUNCCW) {
1902 | ldr CARG2, CFUNC:CARG3->f
1904 | mv_vmstate CARG3, C
1906 | bhi ->vm_growstack_c // Need to grow stack.
1908 | blx CARG4 // (lua_State *L [, lua_CFunction f])
1909 | // Returns nresults.
1911 | mv_vmstate CARG3, INTERP
1915 | ldr PC, [BASE, FRAME_PC]
1916 | sub RA, CRET2, RC // RA = L->top - nresults*8
1920 /* ---------------------------------------------------------------------- */
1923 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
1929 static int build_backend(BuildCtx *ctx)
1933 dasm_growpc(Dst, BC__MAX);
1935 build_subroutines(ctx);
1938 for (op = 0; op < BC__MAX; op++)
1939 build_ins(ctx, (BCOp)op, op);
1944 /* Emit pseudo frame-info for all assembler functions. */
1945 static void emit_asm_debug(BuildCtx *ctx)
1948 switch (ctx->mode) {
1950 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
1953 "\t.long .LECIE0-.LSCIE0\n"
1955 "\t.long 0xffffffff\n"
1960 "\t.byte 0xe\n" /* Return address is in lr. */
1961 "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */
1966 "\t.long .LEFDE0-.LASFDE0\n"
1968 "\t.long .Lframe0\n"
1971 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
1972 "\t.byte 0x8e\n\t.uleb128 1\n", /* Restore lr. */
1973 (int)ctx->codesz, CFRAME_SIZE);
1974 for (i = 11; i >= 4; i--) /* Restore r4-r11. */
1975 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
1979 /* NYI: emit ARM.exidx. */