2 ** Trace recorder (bytecode -> SSA IR).
3 ** Copyright (C) 2005-2010 Mike Pall. See Copyright Notice in luajit.h
25 #include "lj_record.h"
28 #include "lj_dispatch.h"
31 /* Some local macros to save typing. Undef'd at the end. */
32 #define IR(ref) (&J->cur.ir[(ref)])
34 /* Pass IR on to next optimization in chain (FOLD). */
35 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
37 /* Emit raw IR without passing through optimizations. */
38 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
40 /* Context for recording an indexed load/store. */
41 typedef struct RecordIndex
{
42 TValue tabv
; /* Runtime value of table (or indexed object). */
43 TValue keyv
; /* Runtime value of key. */
44 TValue valv
; /* Runtime value of stored value. */
45 TValue mobjv
; /* Runtime value of metamethod object. */
46 GCtab
*mtv
; /* Runtime value of metatable object. */
47 cTValue
*oldv
; /* Runtime value of previously stored value. */
48 TRef tab
; /* Table (or indexed object) reference. */
49 TRef key
; /* Key reference. */
50 TRef val
; /* Value reference for a store or 0 for a load. */
51 TRef mt
; /* Metatable reference. */
52 TRef mobj
; /* Metamethod object reference. */
53 int idxchain
; /* Index indirections left or 0 for raw lookup. */
56 /* Requested results from rec_call(). */
58 /* Non-negative numbers are number of requested results. */
59 CALLRES_MULTI
= -1, /* Return multiple results. */
60 CALLRES_TAILCALL
= -2, /* Tail call. */
61 CALLRES_PENDING
= -3, /* Call is pending, no results yet. */
62 CALLRES_CONT
= -4 /* Continuation call. */
65 /* Forward declarations. */
66 static TRef
rec_idx(jit_State
*J
, RecordIndex
*ix
);
67 static int rec_call(jit_State
*J
, BCReg func
, ptrdiff_t cres
, ptrdiff_t nargs
);
69 /* -- Sanity checks ------------------------------------------------------- */
72 /* Sanity check the whole IR -- sloooow. */
73 static void rec_check_ir(jit_State
*J
)
75 IRRef i
, nins
= J
->cur
.nins
, nk
= J
->cur
.nk
;
76 lua_assert(nk
<= REF_BIAS
&& nins
>= REF_BIAS
&& nins
< 65536);
77 for (i
= nins
-1; i
>= nk
; i
--) {
79 uint32_t mode
= lj_ir_mode
[ir
->o
];
82 switch (irm_op1(mode
)) {
83 case IRMnone
: lua_assert(op1
== 0); break;
84 case IRMref
: lua_assert(op1
>= nk
);
85 lua_assert(i
>= REF_BIAS
? op1
< i
: op1
> i
); break;
87 case IRMcst
: lua_assert(i
< REF_BIAS
); continue;
89 switch (irm_op2(mode
)) {
90 case IRMnone
: lua_assert(op2
== 0); break;
91 case IRMref
: lua_assert(op2
>= nk
);
92 lua_assert(i
>= REF_BIAS
? op2
< i
: op2
> i
); break;
94 case IRMcst
: lua_assert(0); break;
97 lua_assert(ir
->prev
>= nk
);
98 lua_assert(i
>= REF_BIAS
? ir
->prev
< i
: ir
->prev
> i
);
99 lua_assert(IR(ir
->prev
)->o
== ir
->o
);
104 /* Compare frame stack of the recorder and the VM. */
105 static void rec_check_frames(jit_State
*J
)
107 cTValue
*frame
= J
->L
->base
- 1;
108 cTValue
*lim
= J
->L
->base
- J
->baseslot
;
109 int32_t depth
= J
->framedepth
;
110 while (frame
> lim
) {
112 lua_assert(depth
>= 0);
113 lua_assert((SnapEntry
)frame_ftsz(frame
) == J
->frame
[depth
]);
114 if (frame_iscont(frame
)) {
116 lua_assert(depth
>= 0);
117 lua_assert((SnapEntry
)frame_ftsz(frame
-1) == J
->frame
[depth
]);
119 frame
= frame_prev(frame
);
121 lua_assert(depth
== 0);
124 /* Sanity check the slots. */
125 static void rec_check_slots(jit_State
*J
)
127 BCReg s
, nslots
= J
->baseslot
+ J
->maxslot
;
129 lua_assert(J
->baseslot
>= 1 && J
->baseslot
< LJ_MAX_JSLOTS
);
130 lua_assert(nslots
< LJ_MAX_JSLOTS
);
131 for (s
= 0; s
< nslots
; s
++) {
132 TRef tr
= J
->slot
[s
];
133 if (s
!= 0 && (tr
& (TREF_CONT
|TREF_FRAME
)))
136 IRRef ref
= tref_ref(tr
);
137 lua_assert(ref
>= J
->cur
.nk
&& ref
< J
->cur
.nins
);
138 lua_assert(irt_t(IR(ref
)->t
) == tref_t(tr
));
141 lua_assert(J
->framedepth
== depth
);
146 /* -- Type handling and specialization ------------------------------------ */
148 /* Note: these functions return tagged references (TRef). */
150 /* Specialize a slot to a specific type. Note: slot can be negative! */
151 static TRef
sloadt(jit_State
*J
, int32_t slot
, IRType t
, int mode
)
153 /* Caller may set IRT_GUARD in t. */
154 TRef ref
= emitir_raw(IRT(IR_SLOAD
, t
), (int32_t)J
->baseslot
+slot
, mode
);
159 /* Specialize a slot to the runtime type. Note: slot can be negative! */
160 static TRef
sload(jit_State
*J
, int32_t slot
)
162 IRType t
= itype2irt(&J
->L
->base
[slot
]);
163 TRef ref
= emitir_raw(IRTG(IR_SLOAD
, t
), (int32_t)J
->baseslot
+slot
,
165 if (irtype_ispri(t
)) ref
= TREF_PRI(t
); /* Canonicalize primitive refs. */
170 /* Get TRef from slot. Load slot and specialize if not done already. */
171 #define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s)))
173 /* Get TRef for current function. */
174 static TRef
getcurrf(jit_State
*J
)
178 lua_assert(J
->baseslot
== 1);
179 return sloadt(J
, -1, IRT_FUNC
, IRSLOAD_READONLY
);
182 /* Compare for raw object equality.
183 ** Returns 0 if the objects are the same.
184 ** Returns 1 if they are different, but the same type.
185 ** Returns 2 for two different types.
186 ** Comparisons between primitives always return 1 -- no caller cares about it.
188 static int rec_objcmp(jit_State
*J
, TRef a
, TRef b
, cTValue
*av
, cTValue
*bv
)
190 int diff
= !lj_obj_equal(av
, bv
);
191 if (!tref_isk2(a
, b
)) { /* Shortcut, also handles primitives. */
192 IRType ta
= tref_isinteger(a
) ? IRT_INT
: tref_type(a
);
193 IRType tb
= tref_isinteger(b
) ? IRT_INT
: tref_type(b
);
195 /* Widen mixed number/int comparisons to number/number comparison. */
196 if (ta
== IRT_INT
&& tb
== IRT_NUM
) {
197 a
= emitir(IRTN(IR_TONUM
), a
, 0);
199 } else if (ta
== IRT_NUM
&& tb
== IRT_INT
) {
200 b
= emitir(IRTN(IR_TONUM
), b
, 0);
202 return 2; /* Two different types are never equal. */
205 emitir(IRTG(diff
? IR_NE
: IR_EQ
, ta
), a
, b
);
210 /* -- Record loop ops ----------------------------------------------------- */
214 LOOPEV_LEAVE
, /* Loop is left or not entered. */
215 LOOPEV_ENTER
/* Loop is entered. */
218 /* Canonicalize slots: convert integers to numbers. */
219 static void canonicalize_slots(jit_State
*J
)
222 for (s
= J
->baseslot
+J
->maxslot
-1; s
>= 1; s
--) {
223 TRef tr
= J
->slot
[s
];
224 if (tref_isinteger(tr
)) {
225 IRIns
*ir
= IR(tref_ref(tr
));
226 if (!(ir
->o
== IR_SLOAD
&& (ir
->op2
& IRSLOAD_READONLY
)))
227 J
->slot
[s
] = emitir(IRTN(IR_TONUM
), tr
, 0);
232 /* Stop recording. */
233 static void rec_stop(jit_State
*J
, TraceNo lnk
)
236 J
->cur
.link
= (uint16_t)lnk
;
237 /* Looping back at the same stack level? */
238 if (lnk
== J
->curtrace
&& J
->framedepth
+ J
->retdepth
== 0) {
239 if ((J
->flags
& JIT_F_OPT_LOOP
)) /* Shall we try to create a loop? */
240 goto nocanon
; /* Do not canonicalize or we lose the narrowing. */
241 if (J
->cur
.root
) /* Otherwise ensure we always link to the root trace. */
242 J
->cur
.link
= J
->cur
.root
;
244 canonicalize_slots(J
);
246 /* Note: all loop ops must set J->pc to the following instruction! */
247 lj_snap_add(J
); /* Add loop snapshot. */
249 J
->mergesnap
= 1; /* In case recording continues. */
252 /* Search bytecode backwards for a int/num constant slot initializer. */
253 static TRef
find_kinit(jit_State
*J
, const BCIns
*endpc
, BCReg slot
, IRType t
)
255 /* This algorithm is rather simplistic and assumes quite a bit about
256 ** how the bytecode is generated. It works fine for FORI initializers,
257 ** but it won't necessarily work in other cases (e.g. iterator arguments).
258 ** It doesn't do anything fancy, either (like backpropagating MOVs).
260 const BCIns
*pc
, *startpc
= proto_bc(J
->pt
);
261 for (pc
= endpc
-1; pc
> startpc
; pc
--) {
263 BCOp op
= bc_op(ins
);
264 /* First try to find the last instruction that stores to this slot. */
265 if (bcmode_a(op
) == BCMbase
&& bc_a(ins
) <= slot
) {
266 return 0; /* Multiple results, e.g. from a CALL or KNIL. */
267 } else if (bcmode_a(op
) == BCMdst
&& bc_a(ins
) == slot
) {
268 if (op
== BC_KSHORT
|| op
== BC_KNUM
) { /* Found const. initializer. */
269 /* Now try to verify there's no forward jump across it. */
270 const BCIns
*kpc
= pc
;
271 for ( ; pc
> startpc
; pc
--)
272 if (bc_op(*pc
) == BC_JMP
) {
273 const BCIns
*target
= pc
+bc_j(*pc
)+1;
274 if (target
> kpc
&& target
<= endpc
)
275 return 0; /* Conditional assignment. */
277 if (op
== BC_KSHORT
) {
278 int32_t k
= (int32_t)(int16_t)bc_d(ins
);
279 return t
== IRT_INT
? lj_ir_kint(J
, k
) : lj_ir_knum(J
, cast_num(k
));
281 lua_Number n
= proto_knum(J
->pt
, bc_d(ins
));
283 int32_t k
= lj_num2int(n
);
284 if (n
== cast_num(k
)) /* -0 is ok here. */
285 return lj_ir_kint(J
, k
);
286 return 0; /* Type mismatch. */
288 return lj_ir_knum(J
, n
);
292 return 0; /* Non-constant initializer. */
295 return 0; /* No assignment to this slot found? */
298 /* Peek before FORI to find a const initializer. Otherwise load from slot. */
299 static TRef
fori_arg(jit_State
*J
, const BCIns
*fori
, BCReg slot
, IRType t
)
301 TRef tr
= find_kinit(J
, fori
, slot
, t
);
307 tr
= sloadt(J
, (int32_t)slot
, t
, IRSLOAD_READONLY
|IRSLOAD_INHERIT
);
313 /* In-place coercion of FORI arguments. */
314 static lua_Number
for_coerce(jit_State
*J
, TValue
*o
)
316 if (!tvisnum(o
) && !(tvisstr(o
) && lj_str_tonum(strV(o
), o
)))
317 lj_trace_err(J
, LJ_TRERR_BADTYPE
);
321 /* Simulate the runtime behavior of the FOR loop iterator.
322 ** It's important to exactly reproduce the semantics of the interpreter.
324 static LoopEvent
for_iter(jit_State
*J
, IROp
*op
, BCReg ra
, int isforl
)
326 TValue
*forbase
= &J
->L
->base
[ra
];
327 lua_Number stopv
= for_coerce(J
, &forbase
[FORL_STOP
]);
328 lua_Number idxv
= for_coerce(J
, &forbase
[FORL_IDX
]);
329 lua_Number stepv
= for_coerce(J
, &forbase
[FORL_STEP
]);
332 if ((int32_t)forbase
[FORL_STEP
].u32
.hi
>= 0) {
333 if (idxv
<= stopv
) { *op
= IR_LE
; return LOOPEV_ENTER
; }
334 *op
= IR_GT
; return LOOPEV_LEAVE
;
336 if (stopv
<= idxv
) { *op
= IR_GE
; return LOOPEV_ENTER
; }
337 *op
= IR_LT
; return LOOPEV_LEAVE
;
341 /* Record FORL/JFORL or FORI/JFORI. */
342 static LoopEvent
rec_for(jit_State
*J
, const BCIns
*fori
, int isforl
)
344 BCReg ra
= bc_a(*fori
);
346 LoopEvent ev
= for_iter(J
, &op
, ra
, isforl
);
347 TRef
*tr
= &J
->base
[ra
];
350 if (isforl
) { /* Handle FORL/JFORL opcodes. */
353 if (!idx
) idx
= sloadt(J
, (int32_t)(ra
+FORL_IDX
), IRT_NUM
, 0);
355 stop
= fori_arg(J
, fori
, ra
+FORL_STOP
, t
);
356 step
= fori_arg(J
, fori
, ra
+FORL_STEP
, t
);
357 tr
[FORL_IDX
] = idx
= emitir(IRT(IR_ADD
, t
), idx
, step
);
358 } else { /* Handle FORI/JFORI opcodes. */
361 for (i
= FORL_IDX
; i
<= FORL_STEP
; i
++) {
362 lua_assert(J
->base
[ra
+i
] != 0); /* Assumes the slots are already set. */
363 tr
[i
] = lj_ir_tonum(J
, J
->base
[ra
+i
]);
366 stop
= tr
[FORL_STOP
];
367 if (!tref_isk(tr
[FORL_STEP
])) /* Non-const step: need direction guard. */
368 emitir(IRTG(((op
-IR_LT
)>>1)+IR_LT
, IRT_NUM
),
369 tr
[FORL_STEP
], lj_ir_knum_zero(J
));
373 if (ev
== LOOPEV_LEAVE
) {
374 J
->maxslot
= ra
+FORL_EXT
+1;
378 J
->pc
= fori
+bc_j(*fori
)+1;
382 emitir(IRTG(op
, t
), idx
, stop
);
384 if (ev
== LOOPEV_LEAVE
) {
386 J
->pc
= fori
+bc_j(*fori
)+1;
388 J
->maxslot
= ra
+FORL_EXT
+1;
395 /* Record ITERL/JITERL. */
396 static LoopEvent
rec_iterl(jit_State
*J
, const BCIns iterins
)
398 BCReg ra
= bc_a(iterins
);
399 lua_assert(J
->base
[ra
] != 0);
400 if (!tref_isnil(J
->base
[ra
])) { /* Looping back? */
401 J
->base
[ra
-1] = J
->base
[ra
]; /* Copy result of ITERC to control var. */
402 J
->maxslot
= ra
-1+bc_b(J
->pc
[-1]);
403 J
->pc
+= bc_j(iterins
)+1;
412 /* Record LOOP/JLOOP. Now, that was easy. */
413 static LoopEvent
rec_loop(jit_State
*J
, BCReg ra
)
420 /* Check if a loop repeatedly failed to trace because it didn't loop back. */
421 static int innerloopleft(jit_State
*J
, const BCIns
*pc
)
424 for (i
= 0; i
< PENALTY_SLOTS
; i
++)
425 if (J
->penalty
[i
].pc
== pc
) {
426 if (J
->penalty
[i
].reason
== LJ_TRERR_LLEAVE
&&
427 J
->penalty
[i
].val
>= 2*HOTCOUNT_MIN_PENALTY
)
434 /* Handle the case when an interpreted loop op is hit. */
435 static void rec_loop_interp(jit_State
*J
, const BCIns
*pc
, LoopEvent ev
)
437 if (J
->parent
== 0) {
438 if (pc
== J
->startpc
&& J
->framedepth
+ J
->retdepth
== 0) {
440 if (ev
== LOOPEV_LEAVE
) /* Must loop back to form a root trace. */
441 lj_trace_err(J
, LJ_TRERR_LLEAVE
);
442 rec_stop(J
, J
->curtrace
); /* Root trace forms a loop. */
443 } else if (ev
!= LOOPEV_LEAVE
) { /* Entering inner loop? */
444 /* It's usually better to abort here and wait until the inner loop
445 ** is traced. But if the inner loop repeatedly didn't loop back,
446 ** this indicates a low trip count. In this case try unrolling
447 ** an inner loop even in a root trace. But it's better to be a bit
448 ** more conservative here and only do it for very short loops.
450 if (!innerloopleft(J
, pc
))
451 lj_trace_err(J
, LJ_TRERR_LINNER
); /* Root trace hit an inner loop. */
452 if ((J
->loopref
&& J
->cur
.nins
- J
->loopref
> 8) || --J
->loopunroll
< 0)
453 lj_trace_err(J
, LJ_TRERR_LUNROLL
); /* Limit loop unrolling. */
454 J
->loopref
= J
->cur
.nins
;
456 } else if (ev
!= LOOPEV_LEAVE
) { /* Side trace enters an inner loop. */
457 J
->loopref
= J
->cur
.nins
;
458 if (--J
->loopunroll
< 0)
459 lj_trace_err(J
, LJ_TRERR_LUNROLL
); /* Limit loop unrolling. */
460 } /* Side trace continues across a loop that's left or not entered. */
463 /* Handle the case when an already compiled loop op is hit. */
464 static void rec_loop_jit(jit_State
*J
, TraceNo lnk
, LoopEvent ev
)
466 if (J
->parent
== 0) { /* Root trace hit an inner loop. */
467 /* Better let the inner loop spawn a side trace back here. */
468 lj_trace_err(J
, LJ_TRERR_LINNER
);
469 } else if (ev
!= LOOPEV_LEAVE
) { /* Side trace enters a compiled loop. */
470 J
->instunroll
= 0; /* Cannot continue across a compiled loop op. */
471 if (J
->pc
== J
->startpc
&& J
->framedepth
+ J
->retdepth
== 0)
472 lnk
= J
->curtrace
; /* Can form an extra loop. */
473 rec_stop(J
, lnk
); /* Link to the loop. */
474 } /* Side trace continues across a loop that's left or not entered. */
477 /* -- Metamethod handling ------------------------------------------------- */
479 /* Prepare to record call to metamethod. */
480 static BCReg
rec_mm_prep(jit_State
*J
, ASMFunction cont
)
482 BCReg s
, top
= curr_proto(J
->L
)->framesize
;
484 setcont(&J
->L
->base
[top
], cont
);
486 trcont
= lj_ir_kptr(J
, (void *)((int64_t)cont
- (int64_t)lj_vm_asm_begin
));
488 trcont
= lj_ir_kptr(J
, (void *)cont
);
490 J
->base
[top
] = trcont
| TREF_CONT
;
491 for (s
= J
->maxslot
; s
< top
; s
++)
492 J
->base
[s
] = TREF_NIL
;
496 /* Record metamethod lookup. */
497 static int rec_mm_lookup(jit_State
*J
, RecordIndex
*ix
, MMS mm
)
501 if (tref_istab(ix
->tab
)) {
502 mt
= tabref(tabV(&ix
->tabv
)->metatable
);
503 mix
.tab
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->tab
, IRFL_TAB_META
);
504 } else if (tref_isudata(ix
->tab
)) {
505 mt
= tabref(udataV(&ix
->tabv
)->metatable
);
506 mix
.tab
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->tab
, IRFL_UDATA_META
);
508 /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
509 mt
= tabref(basemt_obj(J2G(J
), &ix
->tabv
));
511 return 0; /* No metamethod. */
512 mix
.tab
= lj_ir_ktab(J
, mt
);
515 ix
->mt
= mt
? mix
.tab
: TREF_NIL
;
516 emitir(IRTG(mt
? IR_NE
: IR_EQ
, IRT_TAB
), mix
.tab
, lj_ir_knull(J
, IRT_TAB
));
519 GCstr
*mmstr
= strref(J2G(J
)->mmname
[mm
]);
520 cTValue
*mo
= lj_tab_getstr(mt
, mmstr
);
521 if (mo
&& !tvisnil(mo
))
522 copyTV(J
->L
, &ix
->mobjv
, mo
);
524 settabV(J
->L
, &mix
.tabv
, mt
);
525 if (isdead(J2G(J
), obj2gco(mmstr
)))
526 flipwhite(obj2gco(mmstr
)); /* Need same logic as lj_str_new(). */
527 setstrV(J
->L
, &mix
.keyv
, mmstr
);
528 mix
.key
= lj_ir_kstr(J
, mmstr
);
531 ix
->mobj
= rec_idx(J
, &mix
);
532 return !tref_isnil(ix
->mobj
); /* 1 if metamethod found, 0 if not. */
534 return 0; /* No metamethod. */
537 /* Record call to arithmetic metamethod (and MM_len). */
538 static TRef
rec_mm_arith(jit_State
*J
, RecordIndex
*ix
, MMS mm
)
540 /* Set up metamethod call first to save ix->tab and ix->tabv. */
541 BCReg func
= rec_mm_prep(J
, lj_cont_ra
);
542 TRef
*base
= J
->base
+ func
;
543 TValue
*basev
= J
->L
->base
+ func
;
544 base
[1] = ix
->tab
; base
[2] = ix
->key
;
545 copyTV(J
->L
, basev
+1, &ix
->tabv
);
546 copyTV(J
->L
, basev
+2, &ix
->keyv
);
547 if (!rec_mm_lookup(J
, ix
, mm
)) { /* Lookup metamethod on 1st operand. */
550 copyTV(J
->L
, &ix
->tabv
, &ix
->keyv
);
551 if (rec_mm_lookup(J
, ix
, mm
)) /* Lookup metamethod on 2nd operand. */
554 lj_trace_err(J
, LJ_TRERR_NOMM
);
558 copyTV(J
->L
, basev
+0, &ix
->mobjv
);
559 return rec_call(J
, func
, CALLRES_CONT
, 2) ? J
->base
[func
] : 0;
562 /* Call a comparison metamethod. */
563 static void rec_mm_callcomp(jit_State
*J
, RecordIndex
*ix
, int op
)
565 BCReg func
= rec_mm_prep(J
, (op
&1) ? lj_cont_condf
: lj_cont_condt
);
566 TRef
*base
= J
->base
+ func
;
567 TValue
*tv
= J
->L
->base
+ func
;
568 base
[0] = ix
->mobj
; base
[1] = ix
->val
; base
[2] = ix
->key
;
569 copyTV(J
->L
, tv
+0, &ix
->mobjv
);
570 copyTV(J
->L
, tv
+1, &ix
->valv
);
571 copyTV(J
->L
, tv
+2, &ix
->keyv
);
572 rec_call(J
, func
, CALLRES_CONT
, 2);
573 /* It doesn't matter whether this is immediately resolved or not.
574 ** Type specialization of the return type suffices to specialize
579 /* Record call to equality comparison metamethod (for tab and udata only). */
580 static void rec_mm_equal(jit_State
*J
, RecordIndex
*ix
, int op
)
583 copyTV(J
->L
, &ix
->tabv
, &ix
->valv
);
584 if (rec_mm_lookup(J
, ix
, MM_eq
)) { /* Lookup metamethod on 1st operand. */
588 copyTV(J
->L
, &mo1v
, &ix
->mobjv
);
589 /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
591 if (tvistab(bv
) && tabref(tabV(bv
)->metatable
) == ix
->mtv
) {
592 TRef mt2
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->key
, IRFL_TAB_META
);
593 emitir(IRTG(IR_EQ
, IRT_TAB
), mt2
, ix
->mt
);
594 } else if (tvisudata(bv
) && tabref(udataV(bv
)->metatable
) == ix
->mtv
) {
595 TRef mt2
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->key
, IRFL_UDATA_META
);
596 emitir(IRTG(IR_EQ
, IRT_TAB
), mt2
, ix
->mt
);
597 } else { /* Lookup metamethod on 2nd operand and compare both. */
599 copyTV(J
->L
, &ix
->tabv
, bv
);
600 if (!rec_mm_lookup(J
, ix
, MM_eq
) ||
601 rec_objcmp(J
, mo1
, ix
->mobj
, &mo1v
, &ix
->mobjv
))
604 rec_mm_callcomp(J
, ix
, op
);
608 /* Record call to ordered comparison metamethods (for arbitrary objects). */
609 static void rec_mm_comp(jit_State
*J
, RecordIndex
*ix
, int op
)
612 copyTV(J
->L
, &ix
->tabv
, &ix
->valv
);
614 MMS mm
= (op
& 2) ? MM_le
: MM_lt
; /* Try __le + __lt or only __lt. */
615 if (rec_mm_lookup(J
, ix
, mm
)) { /* Lookup metamethod on 1st operand. */
619 copyTV(J
->L
, &mo1v
, &ix
->mobjv
);
620 /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
622 if (tvistab(bv
) && tabref(tabV(bv
)->metatable
) == ix
->mtv
) {
623 TRef mt2
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->key
, IRFL_TAB_META
);
624 emitir(IRTG(IR_EQ
, IRT_TAB
), mt2
, ix
->mt
);
625 } else if (tvisudata(bv
) && tabref(udataV(bv
)->metatable
) == ix
->mtv
) {
626 TRef mt2
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->key
, IRFL_UDATA_META
);
627 emitir(IRTG(IR_EQ
, IRT_TAB
), mt2
, ix
->mt
);
628 } else { /* Lookup metamethod on 2nd operand and compare both. */
630 copyTV(J
->L
, &ix
->tabv
, bv
);
631 if (!rec_mm_lookup(J
, ix
, mm
) ||
632 rec_objcmp(J
, mo1
, ix
->mobj
, &mo1v
, &ix
->mobjv
))
635 rec_mm_callcomp(J
, ix
, op
);
639 /* First lookup failed. Retry with __lt and swapped operands. */
640 if (!(op
& 2)) break; /* Already at __lt. Interpreter will throw. */
641 ix
->tab
= ix
->key
; ix
->key
= ix
->val
; ix
->val
= ix
->tab
;
642 copyTV(J
->L
, &ix
->tabv
, &ix
->keyv
);
643 copyTV(J
->L
, &ix
->keyv
, &ix
->valv
);
644 copyTV(J
->L
, &ix
->valv
, &ix
->tabv
);
649 /* -- Indexed access ------------------------------------------------------ */
651 /* Record indexed key lookup. */
652 static TRef
rec_idx_key(jit_State
*J
, RecordIndex
*ix
)
655 GCtab
*t
= tabV(&ix
->tabv
);
656 ix
->oldv
= lj_tab_get(J
->L
, t
, &ix
->keyv
); /* Lookup previous value. */
658 /* Integer keys are looked up in the array part first. */
660 if (tref_isnumber(key
)) {
661 lua_Number n
= numV(&ix
->keyv
);
662 int32_t k
= lj_num2int(n
);
663 lua_assert(tvisnum(&ix
->keyv
));
664 /* Potential array key? */
665 if ((MSize
)k
< LJ_MAX_ASIZE
&& n
== cast_num(k
)) {
666 TRef asizeref
, ikey
= key
;
667 if (!tref_isinteger(ikey
))
668 ikey
= emitir(IRTGI(IR_TOINT
), ikey
, IRTOINT_INDEX
);
669 asizeref
= emitir(IRTI(IR_FLOAD
), ix
->tab
, IRFL_TAB_ASIZE
);
670 if ((MSize
)k
< t
->asize
) { /* Currently an array key? */
672 emitir(IRTGI(IR_ABC
), asizeref
, ikey
); /* Bounds check. */
673 arrayref
= emitir(IRT(IR_FLOAD
, IRT_PTR
), ix
->tab
, IRFL_TAB_ARRAY
);
674 return emitir(IRT(IR_AREF
, IRT_PTR
), arrayref
, ikey
);
675 } else { /* Currently not in array (may be an array extension)? */
676 emitir(IRTGI(IR_ULE
), asizeref
, ikey
); /* Inv. bounds check. */
677 if (k
== 0 && tref_isk(key
))
678 key
= lj_ir_knum_zero(J
); /* Canonicalize 0 or +-0.0 to +0.0. */
679 /* And continue with the hash lookup. */
681 } else if (!tref_isk(key
)) {
682 /* We can rule out const numbers which failed the integerness test
683 ** above. But all other numbers are potential array keys.
685 if (t
->asize
== 0) { /* True sparse tables have an empty array part. */
686 /* Guard that the array part stays empty. */
687 TRef tmp
= emitir(IRTI(IR_FLOAD
), ix
->tab
, IRFL_TAB_ASIZE
);
688 emitir(IRTGI(IR_EQ
), tmp
, lj_ir_kint(J
, 0));
690 lj_trace_err(J
, LJ_TRERR_NYITMIX
);
695 /* Otherwise the key is located in the hash part. */
696 if (tref_isinteger(key
)) /* Hash keys are based on numbers, not ints. */
697 ix
->key
= key
= emitir(IRTN(IR_TONUM
), key
, 0);
699 /* Optimize lookup of constant hash keys. */
700 MSize hslot
= (MSize
)((char *)ix
->oldv
- (char *)&noderef(t
->node
)[0].val
);
701 if (t
->hmask
> 0 && hslot
<= t
->hmask
*(MSize
)sizeof(Node
) &&
702 hslot
<= 65535*(MSize
)sizeof(Node
)) {
704 TRef hm
= emitir(IRTI(IR_FLOAD
), ix
->tab
, IRFL_TAB_HMASK
);
705 emitir(IRTGI(IR_EQ
), hm
, lj_ir_kint(J
, (int32_t)t
->hmask
));
706 node
= emitir(IRT(IR_FLOAD
, IRT_PTR
), ix
->tab
, IRFL_TAB_NODE
);
707 kslot
= lj_ir_kslot(J
, key
, hslot
/ sizeof(Node
));
708 return emitir(IRTG(IR_HREFK
, IRT_PTR
), node
, kslot
);
711 /* Fall back to a regular hash lookup. */
712 return emitir(IRT(IR_HREF
, IRT_PTR
), ix
->tab
, key
);
715 /* Determine whether a key is NOT one of the fast metamethod names. */
716 static int nommstr(jit_State
*J
, TRef key
)
718 if (tref_isstr(key
)) {
720 GCstr
*str
= ir_kstr(IR(tref_ref(key
)));
722 for (i
= 0; i
<= MM_FAST
; i
++)
723 if (strref(J2G(J
)->mmname
[i
]) == str
)
724 return 0; /* MUST be one the fast metamethod names. */
726 return 0; /* Variable string key MAY be a metamethod name. */
729 return 1; /* CANNOT be a metamethod name. */
732 /* Record indexed load/store. */
733 static TRef
rec_idx(jit_State
*J
, RecordIndex
*ix
)
739 while (!tref_istab(ix
->tab
)) { /* Handle non-table lookup. */
740 lua_assert(ix
->idxchain
!= 0); /* Never call raw rec_idx() on non-table. */
741 if (!rec_mm_lookup(J
, ix
, ix
->val
? MM_newindex
: MM_index
))
742 lj_trace_err(J
, LJ_TRERR_NOMM
);
744 if (tref_isfunc(ix
->mobj
)) { /* Handle metamethod call. */
745 BCReg func
= rec_mm_prep(J
, ix
->val
? lj_cont_nop
: lj_cont_ra
);
746 TRef
*base
= J
->base
+ func
;
747 TValue
*tv
= J
->L
->base
+ func
;
748 base
[0] = ix
->mobj
; base
[1] = ix
->tab
; base
[2] = ix
->key
;
749 setfuncV(J
->L
, tv
+0, funcV(&ix
->mobjv
));
750 copyTV(J
->L
, tv
+1, &ix
->tabv
);
751 copyTV(J
->L
, tv
+2, &ix
->keyv
);
754 copyTV(J
->L
, tv
+3, &ix
->valv
);
755 rec_call(J
, func
, CALLRES_CONT
, 3); /* mobj(tab, key, val) */
758 /* res = mobj(tab, key) */
759 return rec_call(J
, func
, CALLRES_CONT
, 2) ? J
->base
[func
] : 0;
762 /* Otherwise retry lookup with metaobject. */
764 copyTV(J
->L
, &ix
->tabv
, &ix
->mobjv
);
765 if (--ix
->idxchain
== 0)
766 lj_trace_err(J
, LJ_TRERR_IDXLOOP
);
769 /* First catch nil and NaN keys for tables. */
770 if (tvisnil(&ix
->keyv
) || (tvisnum(&ix
->keyv
) && tvisnan(&ix
->keyv
))) {
771 if (ix
->val
) /* Better fail early. */
772 lj_trace_err(J
, LJ_TRERR_STORENN
);
773 if (tref_isk(ix
->key
)) {
774 if (ix
->idxchain
&& rec_mm_lookup(J
, ix
, MM_index
))
780 /* Record the key lookup. */
781 xref
= rec_idx_key(J
, ix
);
782 xrefop
= IR(tref_ref(xref
))->o
;
783 loadop
= xrefop
== IR_AREF
? IR_ALOAD
: IR_HLOAD
;
786 if (ix
->val
== 0) { /* Indexed load */
787 IRType t
= itype2irt(oldv
);
788 TRef res
= emitir(IRTG(loadop
, t
), xref
, 0);
789 if (t
== IRT_NIL
&& ix
->idxchain
&& rec_mm_lookup(J
, ix
, MM_index
))
791 if (irtype_ispri(t
)) res
= TREF_PRI(t
); /* Canonicalize primitives. */
793 } else { /* Indexed store. */
794 GCtab
*mt
= tabref(tabV(&ix
->tabv
)->metatable
);
795 if (tvisnil(oldv
)) { /* Previous value was nil? */
796 /* Need to duplicate the hasmm check for the early guards. */
798 if (ix
->idxchain
&& mt
) {
799 cTValue
*mo
= lj_tab_getstr(mt
, strref(J2G(J
)->mmname
[MM_newindex
]));
800 hasmm
= mo
&& !tvisnil(mo
);
802 if (hasmm
|| oldv
== niltvg(J2G(J
)))
803 emitir(IRTG(loadop
, IRT_NIL
), xref
, 0); /* Guard for nil value. */
804 else if (xrefop
== IR_HREF
)
805 emitir(IRTG(IR_NE
, IRT_PTR
), xref
, lj_ir_kptr(J
, niltvg(J2G(J
))));
806 if (ix
->idxchain
&& rec_mm_lookup(J
, ix
, MM_newindex
)) { /* Metamethod? */
811 if (oldv
== niltvg(J2G(J
))) { /* Need to insert a new key. */
813 if (tref_isinteger(key
)) /* NEWREF needs a TValue as a key. */
814 key
= emitir(IRTN(IR_TONUM
), key
, 0);
815 xref
= emitir(IRT(IR_NEWREF
, IRT_PTR
), ix
->tab
, key
);
817 } else if (!lj_opt_fwd_wasnonnil(J
, loadop
, tref_ref(xref
))) {
818 /* Cannot derive that the previous value was non-nil, must do checks. */
819 if (xrefop
== IR_HREF
) /* Guard against store to niltv. */
820 emitir(IRTG(IR_NE
, IRT_PTR
), xref
, lj_ir_kptr(J
, niltvg(J2G(J
))));
821 if (ix
->idxchain
) { /* Metamethod lookup required? */
822 /* A check for NULL metatable is cheaper (hoistable) than a load. */
824 TRef mtref
= emitir(IRT(IR_FLOAD
, IRT_TAB
), ix
->tab
, IRFL_TAB_META
);
825 emitir(IRTG(IR_EQ
, IRT_TAB
), mtref
, lj_ir_knull(J
, IRT_TAB
));
827 IRType t
= itype2irt(oldv
);
828 emitir(IRTG(loadop
, t
), xref
, 0); /* Guard for non-nil value. */
832 if (tref_isinteger(ix
->val
)) /* Convert int to number before storing. */
833 ix
->val
= emitir(IRTN(IR_TONUM
), ix
->val
, 0);
834 emitir(IRT(loadop
+IRDELTA_L2S
, tref_type(ix
->val
)), xref
, ix
->val
);
835 if (tref_isgcv(ix
->val
))
836 emitir(IRT(IR_TBAR
, IRT_NIL
), ix
->tab
, 0);
837 /* Invalidate neg. metamethod cache for stores with certain string keys. */
838 if (!nommstr(J
, ix
->key
)) {
839 TRef fref
= emitir(IRT(IR_FREF
, IRT_PTR
), ix
->tab
, IRFL_TAB_NOMM
);
840 emitir(IRT(IR_FSTORE
, IRT_U8
), fref
, lj_ir_kint(J
, 0));
847 /* -- Upvalue access ------------------------------------------------------ */
849 /* Shrink disambiguation hash into an 8 bit value. */
850 static uint32_t shrink_dhash(uint32_t lo
, uint32_t hi
)
852 lo
^= hi
; hi
= lj_rol(hi
, 14);
853 lo
-= hi
; hi
= lj_rol(hi
, 5);
854 hi
^= lo
; hi
-= lj_rol(lo
, 27);
858 /* Record upvalue load/store. */
859 static TRef
rec_upvalue(jit_State
*J
, uint32_t uv
, TRef val
)
861 GCupval
*uvp
= &gcref(J
->fn
->l
.uvptr
[uv
])->uv
;
862 TRef fn
= getcurrf(J
);
865 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
866 uv
= (uv
<< 8) | shrink_dhash(uvp
->dhash
, uvp
->dhash
-0x04c11db7);
868 /* In current stack? */
869 if (uvval(uvp
) >= J
->L
->stack
&& uvval(uvp
) < J
->L
->maxstack
) {
870 int32_t slot
= (int32_t)(uvval(uvp
) - (J
->L
->base
- J
->baseslot
));
871 if (slot
>= 0) { /* Aliases an SSA slot? */
872 slot
-= (int32_t)J
->baseslot
; /* Note: slot number may be negative! */
873 /* NYI: add IR to guard that it's still aliasing the same slot. */
875 return getslot(J
, slot
);
878 if (slot
>= (int32_t)J
->maxslot
) J
->maxslot
= (BCReg
)(slot
+1);
883 uref
= tref_ref(emitir(IRTG(IR_UREFO
, IRT_PTR
), fn
, uv
));
886 uref
= tref_ref(emitir(IRTG(IR_UREFC
, IRT_PTR
), fn
, uv
));
888 if (val
== 0) { /* Upvalue load */
889 IRType t
= itype2irt(uvval(uvp
));
890 TRef res
= emitir(IRTG(IR_ULOAD
, t
), uref
, 0);
891 if (irtype_ispri(t
)) res
= TREF_PRI(t
); /* Canonicalize primitive refs. */
893 } else { /* Upvalue store. */
894 if (tref_isinteger(val
)) /* Convert int to number before storing. */
895 val
= emitir(IRTN(IR_TONUM
), val
, 0);
896 emitir(IRT(IR_USTORE
, tref_type(val
)), uref
, val
);
897 if (needbarrier
&& tref_isgcv(val
))
898 emitir(IRT(IR_OBAR
, IRT_NIL
), uref
, val
);
904 /* -- Record calls to fast functions -------------------------------------- */
906 /* Note: The function and the arguments for the bytecode CALL instructions
907 ** always occupy _new_ stack slots (above the highest active variable).
908 ** This means they must have been stored there by previous instructions
909 ** (MOV, K*, ADD etc.) which must be part of the same trace. This in turn
910 ** means their reference slots are already valid and their types have
911 ** already been specialized (i.e. getslot() would be redundant).
912 ** The 1st slot beyond the arguments is set to 0 before calling recff_*.
915 /* Data used by handlers to record a fast function. */
916 typedef struct RecordFFData
{
917 TValue
*argv
; /* Runtime argument values. */
918 GCfunc
*fn
; /* The currently recorded function. */
919 ptrdiff_t nargs
; /* Number of passed arguments. */
920 ptrdiff_t nres
; /* Number of returned results (defaults to 1). */
921 ptrdiff_t cres
; /* Wanted number of call results. */
922 uint32_t data
; /* Per-ffid auxiliary data (opcode, literal etc.). */
923 int metacall
; /* True if function was resolved via __call. */
926 /* Type of handler to record a fast function. */
927 typedef void (*RecordFunc
)(jit_State
*J
, TRef
*res
, RecordFFData
*rd
);
929 /* Avoid carrying two pointers around. */
932 /* Get runtime value of int argument. */
933 static int32_t argv2int(jit_State
*J
, TValue
*o
)
935 if (tvisstr(o
) && !lj_str_tonum(strV(o
), o
))
936 lj_trace_err(J
, LJ_TRERR_BADTYPE
);
937 return lj_num2bit(numV(o
));
940 /* Get runtime value of string argument. */
941 static GCstr
*argv2str(jit_State
*J
, TValue
*o
)
943 if (LJ_LIKELY(tvisstr(o
))) {
947 lua_assert(tvisnum(o
));
948 s
= lj_str_fromnum(J
->L
, &o
->n
);
954 /* Fallback handler for all fast functions that are not recorded (yet). */
955 static void recff_nyi(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
958 setfuncV(J
->L
, &J
->errinfo
, rd
->fn
);
959 lj_trace_err_info(J
, LJ_TRERR_NYIFF
);
962 LJ_NORET
static void recff_err_nyi(jit_State
*J
, RecordFFData
*rd
)
964 setfuncV(J
->L
, &J
->errinfo
, rd
->fn
);
965 lj_trace_err_info(J
, LJ_TRERR_NYIFFU
);
968 /* C functions can have arbitrary side-effects and are not recorded (yet). */
969 static void recff_c(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
972 setlightudV(&J
->errinfo
, (void *)rd
->fn
->c
.f
);
973 lj_trace_err_info(J
, LJ_TRERR_NYICF
);
976 /* -- Base library fast functions ----------------------------------------- */
978 static void recff_assert(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
980 /* Arguments already specialized. The interpreter throws for nil/false. */
982 for (i
= 0; arg
[i
]; i
++) /* Need to pass through all arguments. */
988 static void recff_type(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
990 /* Arguments already specialized. Result is a constant string. Neat, huh? */
991 IRType t
= tref_isinteger(arg
[0]) ? IRT_NUM
: tref_type(arg
[0]);
992 res
[0] = lj_ir_kstr(J
, strV(&rd
->fn
->c
.upvalue
[t
]));
995 static void recff_getmetatable(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
998 if (tref_istab(tr
)) {
1001 copyTV(J
->L
, &ix
.tabv
, &rd
->argv
[0]);
1002 if (rec_mm_lookup(J
, &ix
, MM_metatable
))
1006 } /* else: Interpreter will throw. */
1009 static void recff_setmetatable(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1013 if (tref_istab(tr
) && (tref_istab(mt
) || (mt
&& tref_isnil(mt
)))) {
1017 copyTV(J
->L
, &ix
.tabv
, &rd
->argv
[0]);
1018 rec_mm_lookup(J
, &ix
, MM_metatable
); /* Guard for no __metatable field. */
1019 fref
= emitir(IRT(IR_FREF
, IRT_PTR
), tr
, IRFL_TAB_META
);
1020 mtref
= tref_isnil(mt
) ? lj_ir_knull(J
, IRT_TAB
) : mt
;
1021 emitir(IRT(IR_FSTORE
, IRT_TAB
), fref
, mtref
);
1022 if (!tref_isnil(mt
))
1023 emitir(IRT(IR_TBAR
, IRT_TAB
), tr
, 0);
1026 } /* else: Interpreter will throw. */
1029 static void recff_rawget(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1031 if (tref_istab(arg
[0]) && arg
[1]) {
1033 ix
.tab
= arg
[0]; ix
.key
= arg
[1]; ix
.val
= 0; ix
.idxchain
= 0;
1034 settabV(J
->L
, &ix
.tabv
, tabV(&rd
->argv
[0]));
1035 copyTV(J
->L
, &ix
.keyv
, &rd
->argv
[1]);
1036 res
[0] = rec_idx(J
, &ix
);
1037 } /* else: Interpreter will throw. */
1040 static void recff_rawset(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1042 if (tref_istab(arg
[0]) && arg
[1] && arg
[2]) {
1044 ix
.tab
= arg
[0]; ix
.key
= arg
[1]; ix
.val
= arg
[2]; ix
.idxchain
= 0;
1045 settabV(J
->L
, &ix
.tabv
, tabV(&rd
->argv
[0]));
1046 copyTV(J
->L
, &ix
.keyv
, &rd
->argv
[1]);
1047 copyTV(J
->L
, &ix
.valv
, &rd
->argv
[2]);
1049 res
[0] = arg
[0]; /* Returns table. */
1050 } /* else: Interpreter will throw. */
1053 static void recff_rawequal(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1055 if (arg
[0] && arg
[1]) {
1056 int diff
= rec_objcmp(J
, arg
[0], arg
[1], &rd
->argv
[0], &rd
->argv
[1]);
1057 res
[0] = diff
? TREF_FALSE
: TREF_TRUE
;
1058 } /* else: Interpreter will throw. */
1061 static void recff_tonumber(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1064 if (tref_isnumber_str(tr
)) {
1066 TRef base
= lj_ir_toint(J
, arg
[1]);
1067 if (!tref_isk(base
) || IR(tref_ref(base
))->i
!= 10)
1068 recff_err_nyi(J
, rd
);
1071 tr
= emitir(IRTG(IR_STRTO
, IRT_NUM
), tr
, 0);
1079 static void recff_tostring(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1082 if (tref_isstr(tr
)) {
1083 /* Ignore __tostring in the string base metatable. */
1088 copyTV(J
->L
, &ix
.tabv
, &rd
->argv
[0]);
1089 if (rec_mm_lookup(J
, &ix
, MM_tostring
)) { /* Has __tostring metamethod? */
1090 if (rd
->metacall
) /* Must not use kludge. */
1091 recff_err_nyi(J
, rd
);
1093 copyTV(J
->L
, rd
->argv
- 1, &ix
.mobjv
); /* Kludge. */
1095 if (rec_call(J
, (BCReg
)(res
- J
->base
), 1, 1))
1098 rd
->cres
= CALLRES_PENDING
;
1099 /* Otherwise res[0] already contains the result. */
1100 } else if (tref_isnumber(tr
)) {
1101 res
[0] = emitir(IRT(IR_TOSTR
, IRT_STR
), tr
, 0);
1102 } else if (tref_ispri(tr
)) {
1103 res
[0] = lj_ir_kstr(J
, strV(&rd
->fn
->c
.upvalue
[tref_type(tr
)]));
1105 recff_err_nyi(J
, rd
);
1110 static void recff_ipairs_aux(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1114 if (tref_istab(ix
.tab
)) {
1115 if (!tvisnum(&rd
->argv
[1])) /* No support for string coercion. */
1116 lj_trace_err(J
, LJ_TRERR_BADTYPE
);
1117 setnumV(&ix
.keyv
, numV(&rd
->argv
[1])+(lua_Number
)1);
1118 settabV(J
->L
, &ix
.tabv
, tabV(&rd
->argv
[0]));
1119 ix
.val
= 0; ix
.idxchain
= 0;
1120 ix
.key
= lj_ir_toint(J
, arg
[1]);
1121 res
[0] = ix
.key
= emitir(IRTI(IR_ADD
), ix
.key
, lj_ir_kint(J
, 1));
1122 res
[1] = rec_idx(J
, &ix
);
1123 rd
->nres
= tref_isnil(res
[1]) ? 0 : 2;
1124 } /* else: Interpreter will throw. */
1127 static void recff_ipairs(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1130 if (tref_istab(tab
)) {
1131 res
[0] = lj_ir_kfunc(J
, funcV(&rd
->fn
->c
.upvalue
[0]));
1133 res
[2] = lj_ir_kint(J
, 0);
1135 } /* else: Interpreter will throw. */
1138 static void recff_pcall(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1140 if (rd
->nargs
>= 1) {
1141 BCReg parg
= (BCReg
)(arg
- J
->base
);
1142 J
->pc
= (const BCIns
*)(sizeof(TValue
) - 4 +
1143 (hook_active(J2G(J
)) ? FRAME_PCALLH
: FRAME_PCALL
));
1144 if (rec_call(J
, parg
, CALLRES_MULTI
, rd
->nargs
- 1)) { /* Resolved call. */
1145 res
[0] = TREF_TRUE
; /* Prepend true result. No need to move results. */
1146 rd
->nres
= (ptrdiff_t)J
->maxslot
- (ptrdiff_t)parg
+ 1;
1147 } else { /* Propagate pending call. */
1148 rd
->cres
= CALLRES_PENDING
;
1150 } /* else: Interpreter will throw. */
1153 /* Struct to pass context across lj_vm_cpcall. */
1154 typedef struct RecordXpcall
{
1160 static TValue
*recff_xpcall_cp(lua_State
*L
, lua_CFunction dummy
, void *ud
)
1162 jit_State
*J
= L2J(L
);
1163 RecordXpcall
*rx
= (RecordXpcall
*)ud
;
1165 rx
->resolved
= rec_call(J
, rx
->parg
, CALLRES_MULTI
, rx
->nargs
);
1169 static void recff_xpcall(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1171 if (rd
->nargs
>= 2) {
1173 BCReg parg
= (BCReg
)(arg
- J
->base
) + 1;
1175 TValue argv0
, argv1
;
1178 /* Swap function and traceback. */
1179 tmp
= arg
[0]; arg
[0] = arg
[1]; arg
[1] = tmp
;
1180 copyTV(J
->L
, &argv0
, &rd
->argv
[0]);
1181 copyTV(J
->L
, &argv1
, &rd
->argv
[1]);
1182 copyTV(J
->L
, &rd
->argv
[0], &argv1
);
1183 copyTV(J
->L
, &rd
->argv
[1], &argv0
);
1184 oargv
= savestack(J
->L
, rd
->argv
);
1185 J
->pc
= (const BCIns
*)(2*sizeof(TValue
) - 4 +
1186 (hook_active(J2G(J
)) ? FRAME_PCALLH
: FRAME_PCALL
));
1187 /* Need to protect rec_call because the recorder may throw. */
1189 rx
.nargs
= rd
->nargs
- 2;
1190 errcode
= lj_vm_cpcall(J
->L
, NULL
, &rx
, recff_xpcall_cp
);
1191 /* Always undo Lua stack swap to avoid confusing the interpreter. */
1192 rd
->argv
= restorestack(J
->L
, oargv
); /* Stack may have been resized. */
1193 copyTV(J
->L
, &rd
->argv
[0], &argv0
);
1194 copyTV(J
->L
, &rd
->argv
[1], &argv1
);
1196 lj_err_throw(J
->L
, errcode
); /* Propagate errors. */
1197 if (rx
.resolved
) { /* Resolved call. */
1198 ptrdiff_t i
, nres
= (ptrdiff_t)J
->maxslot
- (ptrdiff_t)parg
;
1199 rd
->nres
= nres
+ 1;
1200 res
[0] = TREF_TRUE
; /* Prepend true result. */
1201 for (i
= 1; i
<= nres
; i
++) /* Move results down. */
1203 } else { /* Propagate pending call. */
1204 rd
->cres
= CALLRES_PENDING
;
1206 } /* else: Interpreter will throw. */
1209 /* -- Math library fast functions ----------------------------------------- */
1211 static void recff_math_abs(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1213 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1214 res
[0] = emitir(IRTN(IR_ABS
), tr
, lj_ir_knum_abs(J
));
1218 /* Record rounding functions math.floor and math.ceil. */
1219 static void recff_math_round(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1221 if (tref_isinteger(arg
[0]))
1224 res
[0] = emitir(IRTN(IR_FPMATH
), lj_ir_tonum(J
, arg
[0]), rd
->data
);
1225 /* Note: result is integral (or NaN/Inf), but may not fit into an integer. */
1228 /* Record unary math.* functions, mapped to IR_FPMATH opcode. */
1229 static void recff_math_unary(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1231 res
[0] = emitir(IRTN(IR_FPMATH
), lj_ir_tonum(J
, arg
[0]), rd
->data
);
1234 /* Record binary math.* functions math.atan2 and math.ldexp. */
1235 static void recff_math_binary(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1237 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1238 res
[0] = emitir(IRTN(rd
->data
), tr
, lj_ir_tonum(J
, arg
[1]));
1241 /* Record math.asin, math.acos, math.atan. */
1242 static void recff_math_atrig(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1244 TRef y
= lj_ir_tonum(J
, arg
[0]);
1245 TRef x
= lj_ir_knum_one(J
);
1246 uint32_t ffid
= rd
->data
;
1247 if (ffid
!= FF_math_atan
) {
1248 TRef tmp
= emitir(IRTN(IR_MUL
), y
, y
);
1249 tmp
= emitir(IRTN(IR_SUB
), x
, tmp
);
1250 tmp
= emitir(IRTN(IR_FPMATH
), tmp
, IRFPM_SQRT
);
1251 if (ffid
== FF_math_asin
) { x
= tmp
; } else { x
= y
; y
= tmp
; }
1253 res
[0] = emitir(IRTN(IR_ATAN2
), y
, x
);
1256 static void recff_math_htrig(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1258 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1259 res
[0] = lj_ir_call(J
, rd
->data
, tr
);
1262 static void recff_math_modf(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1265 if (tref_isinteger(tr
)) {
1267 res
[1] = lj_ir_kint(J
, 0);
1269 tr
= lj_ir_tonum(J
, tr
);
1270 res
[0] = emitir(IRTN(IR_FPMATH
), tr
, IRFPM_TRUNC
);
1271 res
[1] = emitir(IRTN(IR_SUB
), tr
, res
[0]);
1276 static void recff_math_degrad(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1278 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1279 res
[0] = emitir(IRTN(IR_MUL
), tr
, lj_ir_knum(J
, numV(&rd
->fn
->c
.upvalue
[0])));
1282 static void recff_math_pow(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1284 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1285 if (!tref_isnumber_str(arg
[1]))
1286 lj_trace_err(J
, LJ_TRERR_BADTYPE
);
1287 res
[0] = lj_opt_narrow_pow(J
, tr
, arg
[1], &rd
->argv
[1]);
1291 static void recff_math_minmax(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1293 TRef tr
= lj_ir_tonum(J
, arg
[0]);
1294 uint32_t op
= rd
->data
;
1296 for (i
= 1; arg
[i
]; i
++)
1297 tr
= emitir(IRTN(op
), tr
, lj_ir_tonum(J
, arg
[i
]));
1301 static void recff_math_random(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1303 GCudata
*ud
= udataV(&rd
->fn
->c
.upvalue
[0]);
1305 lj_ir_kgc(J
, obj2gco(ud
), IRT_UDATA
); /* Prevent collection. */
1306 tr
= lj_ir_call(J
, IRCALL_lj_math_random_step
, lj_ir_kptr(J
, uddata(ud
)));
1307 one
= lj_ir_knum_one(J
);
1308 tr
= emitir(IRTN(IR_SUB
), tr
, one
);
1310 TRef tr1
= lj_ir_tonum(J
, arg
[0]);
1311 if (arg
[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */
1312 TRef tr2
= lj_ir_tonum(J
, arg
[1]);
1313 tr2
= emitir(IRTN(IR_SUB
), tr2
, tr1
);
1314 tr2
= emitir(IRTN(IR_ADD
), tr2
, one
);
1315 tr
= emitir(IRTN(IR_MUL
), tr
, tr2
);
1316 tr
= emitir(IRTN(IR_FPMATH
), tr
, IRFPM_FLOOR
);
1317 tr
= emitir(IRTN(IR_ADD
), tr
, tr1
);
1318 } else { /* d = floor(d*r1) + 1.0 */
1319 tr
= emitir(IRTN(IR_MUL
), tr
, tr1
);
1320 tr
= emitir(IRTN(IR_FPMATH
), tr
, IRFPM_FLOOR
);
1321 tr
= emitir(IRTN(IR_ADD
), tr
, one
);
1327 /* -- Bit library fast functions ------------------------------------------ */
1329 /* Record unary bit.tobit, bit.bnot, bit.bswap. */
1330 static void recff_bit_unary(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1332 TRef tr
= lj_ir_tobit(J
, arg
[0]);
1333 res
[0] = (rd
->data
== IR_TOBIT
) ? tr
: emitir(IRTI(rd
->data
), tr
, 0);
1336 /* Record N-ary bit.band, bit.bor, bit.bxor. */
1337 static void recff_bit_nary(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1339 TRef tr
= lj_ir_tobit(J
, arg
[0]);
1340 uint32_t op
= rd
->data
;
1342 for (i
= 1; arg
[i
]; i
++)
1343 tr
= emitir(IRTI(op
), tr
, lj_ir_tobit(J
, arg
[i
]));
1347 /* Record bit shifts. */
1348 static void recff_bit_shift(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1350 TRef tr
= lj_ir_tobit(J
, arg
[0]);
1351 TRef tsh
= lj_ir_tobit(J
, arg
[1]);
1352 #if !LJ_TARGET_MASKEDSHIFT
1354 tsh
= emitir(IRTI(IR_BAND
), tsh
, lj_ir_kint(J
, 31));
1356 res
[0] = emitir(IRTI(rd
->data
), tr
, tsh
);
1359 /* -- String library fast functions --------------------------------------- */
1361 static void recff_string_len(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1363 res
[0] = emitir(IRTI(IR_FLOAD
), lj_ir_tostr(J
, arg
[0]), IRFL_STR_LEN
);
1367 /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
1368 static void recff_string_range(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1370 TRef trstr
= lj_ir_tostr(J
, arg
[0]);
1371 TRef trlen
= emitir(IRTI(IR_FLOAD
), trstr
, IRFL_STR_LEN
);
1372 TRef tr0
= lj_ir_kint(J
, 0);
1373 TRef trstart
, trend
;
1374 GCstr
*str
= argv2str(J
, &rd
->argv
[0]);
1376 if (rd
->data
) { /* string.sub(str, start [,end]) */
1377 trstart
= lj_ir_toint(J
, arg
[1]);
1378 trend
= tref_isnil(arg
[2]) ? lj_ir_kint(J
, -1) : lj_ir_toint(J
, arg
[2]);
1379 start
= argv2int(J
, &rd
->argv
[1]);
1380 end
= tref_isnil(arg
[2]) ? -1 : argv2int(J
, &rd
->argv
[2]);
1381 } else { /* string.byte(str, [,start [,end]]) */
1383 trstart
= lj_ir_toint(J
, arg
[1]);
1384 trend
= tref_isnil(arg
[2]) ? trstart
: lj_ir_toint(J
, arg
[2]);
1385 start
= argv2int(J
, &rd
->argv
[1]);
1386 end
= tref_isnil(arg
[2]) ? start
: argv2int(J
, &rd
->argv
[2]);
1388 trend
= trstart
= lj_ir_kint(J
, 1);
1393 emitir(IRTGI(IR_LT
), trend
, tr0
);
1394 trend
= emitir(IRTI(IR_ADD
), emitir(IRTI(IR_ADD
), trlen
, trend
),
1396 end
= end
+(int32_t)str
->len
+1;
1397 } else if ((MSize
)end
<= str
->len
) {
1398 emitir(IRTGI(IR_ULE
), trend
, trlen
);
1400 emitir(IRTGI(IR_GT
), trend
, trlen
);
1401 end
= (int32_t)str
->len
;
1405 emitir(IRTGI(IR_LT
), trstart
, tr0
);
1406 trstart
= emitir(IRTI(IR_ADD
), trlen
, trstart
);
1407 start
= start
+(int32_t)str
->len
;
1408 emitir(start
< 0 ? IRTGI(IR_LT
) : IRTGI(IR_GE
), trstart
, tr0
);
1415 emitir(IRTGI(IR_EQ
), trstart
, tr0
);
1418 trstart
= emitir(IRTI(IR_ADD
), trstart
, lj_ir_kint(J
, -1));
1419 emitir(IRTGI(IR_GE
), trstart
, tr0
);
1423 if (rd
->data
) { /* Return string.sub result. */
1424 if (end
- start
>= 0) {
1425 /* Also handle empty range here, to avoid extra traces. */
1426 TRef trptr
, trslen
= emitir(IRTI(IR_SUB
), trend
, trstart
);
1427 emitir(IRTGI(IR_GE
), trslen
, tr0
);
1428 trptr
= emitir(IRT(IR_STRREF
, IRT_PTR
), trstr
, trstart
);
1429 res
[0] = emitir(IRT(IR_SNEW
, IRT_STR
), trptr
, trslen
);
1430 } else { /* Range underflow: return empty string. */
1431 emitir(IRTGI(IR_LT
), trend
, trstart
);
1432 res
[0] = lj_ir_kstr(J
, lj_str_new(J
->L
, strdata(str
), 0));
1434 } else { /* Return string.byte result(s). */
1435 ptrdiff_t i
, len
= end
- start
;
1437 TRef trslen
= emitir(IRTI(IR_SUB
), trend
, trstart
);
1438 emitir(IRTGI(IR_EQ
), trslen
, lj_ir_kint(J
, len
));
1439 if (res
+ len
> J
->slot
+ LJ_MAX_JSLOTS
)
1440 lj_trace_err(J
, LJ_TRERR_STACKOV
);
1442 for (i
= 0; i
< len
; i
++) {
1443 TRef tmp
= emitir(IRTI(IR_ADD
), trstart
, lj_ir_kint(J
, i
));
1444 tmp
= emitir(IRT(IR_STRREF
, IRT_PTR
), trstr
, tmp
);
1445 res
[i
] = emitir(IRT(IR_XLOAD
, IRT_U8
), tmp
, IRXLOAD_READONLY
);
1447 } else { /* Empty range or range underflow: return no results. */
1448 emitir(IRTGI(IR_LE
), trend
, trstart
);
1454 /* -- Table library fast functions ---------------------------------------- */
1456 static void recff_table_getn(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1458 if (tref_istab(arg
[0])) {
1459 res
[0] = lj_ir_call(J
, IRCALL_lj_tab_len
, arg
[0]);
1460 } /* else: Interpreter will throw. */
1464 static void recff_table_remove(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1466 if (tref_istab(arg
[0])) {
1467 if (!arg
[1] || tref_isnil(arg
[1])) { /* Simple pop: t[#t] = nil */
1468 TRef trlen
= lj_ir_call(J
, IRCALL_lj_tab_len
, arg
[0]);
1469 GCtab
*t
= tabV(&rd
->argv
[0]);
1470 MSize len
= lj_tab_len(t
);
1471 emitir(IRTGI(len
? IR_NE
: IR_EQ
), trlen
, lj_ir_kint(J
, 0));
1476 settabV(J
->L
, &ix
.tabv
, t
);
1477 setintV(&ix
.keyv
, len
);
1479 if (rd
->cres
!= 0) { /* Specialize load only if result needed. */
1481 res
[0] = rec_idx(J
, &ix
); /* Load previous value. */
1482 /* Assumes ix.key/ix.tab is not modified for raw rec_idx(). */
1485 rec_idx(J
, &ix
); /* Remove value. */
1489 } else { /* Complex case: remove in the middle. */
1490 recff_err_nyi(J
, rd
);
1492 } /* else: Interpreter will throw. */
1495 static void recff_table_insert(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1498 if (tref_istab(arg
[0]) && arg
[1]) {
1499 if (!arg
[2]) { /* Simple push: t[#t+1] = v */
1500 TRef trlen
= lj_ir_call(J
, IRCALL_lj_tab_len
, arg
[0]);
1501 GCtab
*t
= tabV(&rd
->argv
[0]);
1505 ix
.key
= emitir(IRTI(IR_ADD
), trlen
, lj_ir_kint(J
, 1));
1506 settabV(J
->L
, &ix
.tabv
, t
);
1507 setintV(&ix
.keyv
, lj_tab_len(t
) + 1);
1509 rec_idx(J
, &ix
); /* Set new value. */
1510 } else { /* Complex case: insert in the middle. */
1511 recff_err_nyi(J
, rd
);
1513 } /* else: Interpreter will throw. */
1516 /* -- I/O library fast functions ------------------------------------------ */
1518 /* Get FILE* for I/O function. Any I/O error aborts recording, so there's
1519 ** no need to encode the alternate cases for any of the guards.
1521 static TRef
recff_io_fp(jit_State
*J
, TRef
*res
, uint32_t id
)
1524 if (id
) { /* io.func() */
1525 tr
= lj_ir_kptr(J
, &J2G(J
)->gcroot
[id
]);
1526 ud
= emitir(IRT(IR_XLOAD
, IRT_UDATA
), tr
, 0);
1527 } else { /* fp:method() */
1529 if (!tref_isudata(ud
))
1530 lj_trace_err(J
, LJ_TRERR_BADTYPE
);
1531 tr
= emitir(IRT(IR_FLOAD
, IRT_U8
), ud
, IRFL_UDATA_UDTYPE
);
1532 emitir(IRTGI(IR_EQ
), tr
, lj_ir_kint(J
, UDTYPE_IO_FILE
));
1534 fp
= emitir(IRT(IR_FLOAD
, IRT_LIGHTUD
), ud
, IRFL_UDATA_FILE
);
1535 emitir(IRTG(IR_NE
, IRT_LIGHTUD
), fp
, lj_ir_knull(J
, IRT_LIGHTUD
));
1539 static void recff_io_write(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1541 TRef fp
= recff_io_fp(J
, res
, rd
->data
);
1542 TRef zero
= lj_ir_kint(J
, 0);
1543 TRef one
= lj_ir_kint(J
, 1);
1544 ptrdiff_t i
= rd
->data
== 0 ? 1 : 0;
1545 for (; arg
[i
]; i
++) {
1546 TRef str
= lj_ir_tostr(J
, arg
[i
]);
1547 TRef buf
= emitir(IRT(IR_STRREF
, IRT_PTR
), str
, zero
);
1548 TRef len
= emitir(IRTI(IR_FLOAD
), str
, IRFL_STR_LEN
);
1549 if (tref_isk(len
) && IR(tref_ref(len
))->i
== 1) {
1550 TRef tr
= emitir(IRT(IR_XLOAD
, IRT_U8
), buf
, IRXLOAD_READONLY
);
1551 tr
= lj_ir_call(J
, IRCALL_fputc
, tr
, fp
);
1552 if (rd
->cres
!= 0) /* Check result only if requested. */
1553 emitir(IRTGI(IR_NE
), tr
, lj_ir_kint(J
, -1));
1555 TRef tr
= lj_ir_call(J
, IRCALL_fwrite
, buf
, one
, len
, fp
);
1556 if (rd
->cres
!= 0) /* Check result only if requested. */
1557 emitir(IRTGI(IR_EQ
), tr
, len
);
1563 static void recff_io_flush(jit_State
*J
, TRef
*res
, RecordFFData
*rd
)
1565 TRef fp
= recff_io_fp(J
, res
, rd
->data
);
1566 TRef tr
= lj_ir_call(J
, IRCALL_fflush
, fp
);
1567 if (rd
->cres
!= 0) /* Check result only if requested. */
1568 emitir(IRTGI(IR_EQ
), tr
, lj_ir_kint(J
, 0));
1572 /* -- Record calls and returns -------------------------------------------- */
1576 #include "lj_recdef.h"
1578 /* Record return. */
1579 static void rec_ret(jit_State
*J
, BCReg rbase
, ptrdiff_t gotresults
)
1581 TValue
*frame
= J
->L
->base
- 1;
1583 for (i
= 0; i
< gotresults
; i
++)
1584 getslot(J
, rbase
+i
); /* Ensure all results have a reference. */
1586 while (frame_ispcall(frame
)) { /* Immediately resolve pcall() returns. */
1587 BCReg cbase
= (BCReg
)frame_delta(frame
);
1588 if (J
->framedepth
-- <= 0)
1589 lj_trace_err(J
, LJ_TRERR_NYIRETL
);
1590 lua_assert(J
->baseslot
> 1);
1591 J
->base
[--rbase
] = TREF_TRUE
; /* Prepend true to results. */
1594 J
->baseslot
-= (BCReg
)cbase
;
1596 frame
= frame_prevd(frame
);
1598 if (frame_islua(frame
)) { /* Return to Lua frame. */
1599 BCIns callins
= *(frame_pc(frame
)-1);
1600 ptrdiff_t nresults
= bc_b(callins
) ? (ptrdiff_t)bc_b(callins
)-1 :gotresults
;
1601 BCReg cbase
= bc_a(callins
);
1602 for (i
= 0; i
< nresults
; i
++) /* Adjust results. */
1603 J
->base
[i
-1] = i
< gotresults
? J
->base
[rbase
+i
] : TREF_NIL
;
1604 J
->maxslot
= cbase
+(BCReg
)nresults
;
1605 if (J
->framedepth
> 0) { /* Return to a frame that is part of the trace. */
1607 lua_assert(J
->baseslot
> cbase
+1);
1608 J
->baseslot
-= cbase
+1;
1610 } else if (J
->parent
== 0) {
1611 /* Return to lower frame would leave the loop in a root trace. */
1612 lj_trace_err(J
, LJ_TRERR_LLEAVE
);
1613 } else { /* Return to lower frame. Guard for the target we return to. */
1614 GCproto
*pt
= funcproto(frame_func(frame
- (cbase
+1)));
1615 TRef trpt
= lj_ir_kgc(J
, obj2gco(pt
), IRT_PROTO
);
1616 TRef trpc
= lj_ir_kptr(J
, (void *)frame_pc(frame
));
1617 emitir(IRTG(IR_RETF
, IRT_PTR
), trpt
, trpc
);
1620 lua_assert(J
->baseslot
== 1);
1621 /* Shift result slots up and clear the slots of the new frame below. */
1622 memmove(J
->base
+ cbase
, J
->base
-1, sizeof(TRef
)*nresults
);
1623 memset(J
->base
-1, 0, sizeof(TRef
)*(cbase
+1));
1625 } else if (frame_iscont(frame
)) { /* Return to continuation frame. */
1626 ASMFunction cont
= frame_contf(frame
);
1627 BCReg cbase
= (BCReg
)frame_delta(frame
);
1628 if ((J
->framedepth
-= 2) <= 0)
1629 lj_trace_err(J
, LJ_TRERR_NYIRETL
);
1630 J
->baseslot
-= (BCReg
)cbase
;
1632 J
->maxslot
= cbase
-2;
1633 if (cont
== lj_cont_ra
) {
1634 /* Copy result to destination slot. */
1635 BCReg dst
= bc_a(*(frame_contpc(frame
)-1));
1636 J
->base
[dst
] = gotresults
? J
->base
[cbase
+rbase
] : TREF_NIL
;
1637 if (dst
> J
->maxslot
) J
->maxslot
= dst
+1;
1638 } else if (cont
== lj_cont_nop
) {
1639 /* Nothing to do here. */
1640 } else if (cont
== lj_cont_cat
) {
1643 /* Result type already specialized. */
1644 lua_assert(cont
== lj_cont_condf
|| cont
== lj_cont_condt
);
1647 lj_trace_err(J
, LJ_TRERR_NYIRETL
); /* NYI: handle return to C frame. */
1649 lua_assert(J
->baseslot
>= 1);
1652 /* Check unroll limits for calls. */
1653 static void check_call_unroll(jit_State
*J
, GCfunc
*fn
)
1655 IRRef fref
= tref_ref(J
->base
[-1]);
1658 for (s
= (ptrdiff_t)J
->baseslot
- 1; s
> 0; s
--)
1659 if ((J
->slot
[s
] & TREF_FRAME
) && tref_ref(J
->slot
[s
]) == fref
)
1661 if (bc_op(J
->cur
.startins
) == BC_CALL
&&
1662 funcproto(fn
) == &gcref(J
->cur
.startpt
)->pt
) {
1663 if (count
+ J
->tailcalled
> J
->param
[JIT_P_recunroll
])
1664 lj_trace_err(J
, LJ_TRERR_NYIRECU
);
1666 if (count
> J
->param
[JIT_P_callunroll
])
1667 lj_trace_err(J
, LJ_TRERR_CUNROLL
);
1671 /* Record call. Returns 0 for pending calls and 1 for resolved calls. */
1672 static int rec_call(jit_State
*J
, BCReg func
, ptrdiff_t cres
, ptrdiff_t nargs
)
1675 TRef trfunc
, *res
= &J
->base
[func
];
1676 TValue
*tv
= &J
->L
->base
[func
];
1678 if (tref_isfunc(res
[0])) { /* Regular function call. */
1682 } else { /* Otherwise resolve __call metamethod for called object. */
1686 copyTV(J
->L
, &ix
.tabv
, tv
);
1687 if (!rec_mm_lookup(J
, &ix
, MM_call
) || !tref_isfunc(ix
.mobj
))
1688 lj_trace_err(J
, LJ_TRERR_NOMM
);
1689 /* Update the recorder state, but not the Lua stack. */
1690 for (i
= ++nargs
; i
> 0; i
--)
1693 rd
.fn
= funcV(&ix
.mobjv
);
1694 rd
.argv
= tv
; /* The called object is the 1st arg. */
1698 /* Specialize to the runtime value of the called function. */
1699 trfunc
= lj_ir_kfunc(J
, rd
.fn
);
1700 emitir(IRTG(IR_EQ
, IRT_FUNC
), res
[0], trfunc
);
1701 res
[0] = trfunc
| TREF_FRAME
;
1703 /* Add frame links. */
1704 J
->frame
[J
->framedepth
++] = SNAP_MKPC(J
->pc
+1);
1705 if (cres
== CALLRES_CONT
) /* Continuations need an extra frame stack slot. */
1706 J
->frame
[J
->framedepth
++] = SNAP_MKFTSZ((func
+1)*sizeof(TValue
)+FRAME_CONT
);
1707 /* NYI: func is wrong if any fast function ever sets up a continuation. */
1708 if (J
->framedepth
> LJ_MAX_JFRAME
)
1709 lj_trace_err(J
, LJ_TRERR_STACKOV
);
1711 if (isluafunc(rd
.fn
)) { /* Record call to Lua function. */
1712 GCproto
*pt
= funcproto(rd
.fn
);
1713 if ((pt
->flags
& PROTO_NO_JIT
))
1714 lj_trace_err(J
, LJ_TRERR_CJITOFF
);
1715 if ((pt
->flags
& PROTO_IS_VARARG
))
1716 lj_trace_err(J
, LJ_TRERR_NYIVF
);
1717 if (cres
== CALLRES_TAILCALL
) {
1719 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
1720 if (++J
->tailcalled
> J
->loopunroll
)
1721 lj_trace_err(J
, LJ_TRERR_LUNROLL
);
1722 for (i
= 0; i
<= nargs
; i
++) /* Move func + args down. */
1723 J
->base
[i
-1] = res
[i
];
1725 /* Note: the new FRAME is now at J->base[-1] (even for slot #0). */
1726 } else { /* Regular call. */
1728 J
->baseslot
+= func
+1;
1730 if (J
->baseslot
+ pt
->framesize
>= LJ_MAX_JSLOTS
)
1731 lj_trace_err(J
, LJ_TRERR_STACKOV
);
1732 /* Fill up missing args with nil. */
1733 while (nargs
< pt
->numparams
)
1734 J
->base
[nargs
++] = TREF_NIL
;
1735 /* The remaining slots should never be read before they are written. */
1736 J
->maxslot
= pt
->numparams
;
1737 check_call_unroll(J
, rd
.fn
);
1738 return 0; /* No result yet. */
1739 } else { /* Record call to C function or fast function. */
1741 BCReg oldmaxslot
= J
->maxslot
;
1744 if (rd
.fn
->c
.ffid
< sizeof(recff_idmap
)/sizeof(recff_idmap
[0]))
1745 m
= recff_idmap
[rd
.fn
->c
.ffid
];
1748 rd
.nres
= 1; /* Default is one result. */
1749 (recff_func
[m
>> 8])(J
, res
, &rd
); /* Call recff_* handler. */
1753 /* Caller takes fixed number of results: local a,b = f() */
1754 J
->maxslot
= func
+ (BCReg
)cres
;
1755 while (rd
.nres
< cres
) /* Fill up missing results with nil. */
1756 res
[rd
.nres
++] = TREF_NIL
;
1757 } else if (cres
== CALLRES_MULTI
) {
1758 /* Caller takes any number of results: return 1,f() */
1759 J
->maxslot
= func
+ (BCReg
)rd
.nres
;
1760 } else if (cres
== CALLRES_TAILCALL
) {
1761 /* Tail call: return f() */
1762 rec_ret(J
, func
, rd
.nres
);
1763 } else if (cres
== CALLRES_CONT
) {
1764 /* Note: immediately resolved continuations must not change J->maxslot. */
1765 J
->maxslot
= oldmaxslot
;
1767 res
[rd
.nres
] = TREF_NIL
; /* Turn 0 results into nil result. */
1769 lua_assert(cres
== CALLRES_PENDING
);
1771 return 0; /* Pending call, no result yet. */
1773 return 1; /* Result resolved immediately. */
1777 /* -- Record allocations -------------------------------------------------- */
1779 static TRef
rec_tnew(jit_State
*J
, uint32_t ah
)
1781 uint32_t asize
= ah
& 0x7ff;
1782 uint32_t hbits
= ah
>> 11;
1783 if (asize
== 0x7ff) asize
= 0x801;
1784 return emitir(IRT(IR_TNEW
, IRT_TAB
), asize
, hbits
);
1787 /* -- Record bytecode ops ------------------------------------------------- */
1789 /* Optimize state after comparison. */
1790 static void optstate_comp(jit_State
*J
, int cond
)
1792 BCIns jmpins
= J
->pc
[1];
1793 const BCIns
*npc
= J
->pc
+ 2 + (cond
? bc_j(jmpins
) : 0);
1794 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
1795 /* Avoid re-recording the comparison in side traces. */
1796 J
->cur
.snapmap
[snap
->mapofs
+ snap
->nent
] = SNAP_MKPC(npc
);
1798 /* Shrink last snapshot if possible. */
1799 if (bc_a(jmpins
) < J
->maxslot
) {
1800 J
->maxslot
= bc_a(jmpins
);
1805 /* Record the next bytecode instruction (_before_ it's executed). */
1806 void lj_record_ins(jit_State
*J
)
1815 /* Need snapshot before recording next bytecode (e.g. after a store). */
1822 /* Record only closed loops for root traces. */
1824 if (J
->framedepth
== 0 &&
1825 (MSize
)((char *)pc
- (char *)J
->bc_min
) >= J
->bc_extent
)
1826 lj_trace_err(J
, LJ_TRERR_LLEAVE
);
1828 #ifdef LUA_USE_ASSERT
1833 /* Keep a copy of the runtime values of var/num/str operands. */
1834 #define rav (&ix.valv)
1835 #define rbv (&ix.tabv)
1836 #define rcv (&ix.keyv)
1843 switch (bcmode_a(op
)) {
1845 copyTV(J
->L
, rav
, &lbase
[ra
]); ix
.val
= ra
= getslot(J
, ra
); break;
1846 default: break; /* Handled later. */
1850 switch (bcmode_b(op
)) {
1851 case BCMnone
: rb
= 0; rc
= bc_d(ins
); break; /* Upgrade rc to 'rd'. */
1853 copyTV(J
->L
, rbv
, &lbase
[rb
]); ix
.tab
= rb
= getslot(J
, rb
); break;
1854 case BCMnum
: { lua_Number n
= proto_knum(J
->pt
, rb
);
1855 setnumV(rbv
, n
); ix
.tab
= rb
= lj_ir_knumint(J
, n
); } break;
1856 default: break; /* Handled later. */
1858 switch (bcmode_c(op
)) {
1860 copyTV(J
->L
, rcv
, &lbase
[rc
]); ix
.key
= rc
= getslot(J
, rc
); break;
1861 case BCMpri
: setitype(rcv
, (int32_t)~rc
); rc
= TREF_PRI(IRT_NIL
+rc
); break;
1862 case BCMnum
: { lua_Number n
= proto_knum(J
->pt
, rc
);
1863 setnumV(rcv
, n
); ix
.key
= rc
= lj_ir_knumint(J
, n
); } break;
1864 case BCMstr
: { GCstr
*s
= gco2str(proto_kgc(J
->pt
, ~(ptrdiff_t)rc
));
1865 setstrV(J
->L
, rcv
, s
); ix
.key
= rc
= lj_ir_kstr(J
, s
); } break;
1866 default: break; /* Handled later. */
1871 /* -- Comparison ops ---------------------------------------------------- */
1873 case BC_ISLT
: case BC_ISGE
: case BC_ISLE
: case BC_ISGT
:
1874 /* Emit nothing for two numeric or string consts. */
1875 if (!(tref_isk2(ra
,rc
) && tref_isnumber_str(ra
) && tref_isnumber_str(rc
))) {
1876 IRType ta
= tref_isinteger(ra
) ? IRT_INT
: tref_type(ra
);
1877 IRType tc
= tref_isinteger(rc
) ? IRT_INT
: tref_type(rc
);
1880 /* Widen mixed number/int comparisons to number/number comparison. */
1881 if (ta
== IRT_INT
&& tc
== IRT_NUM
) {
1882 ra
= emitir(IRTN(IR_TONUM
), ra
, 0);
1884 } else if (ta
== IRT_NUM
&& tc
== IRT_INT
) {
1885 rc
= emitir(IRTN(IR_TONUM
), rc
, 0);
1886 } else if (!((ta
== IRT_FALSE
|| ta
== IRT_TRUE
) &&
1887 (tc
== IRT_FALSE
|| tc
== IRT_TRUE
))) {
1888 break; /* Interpreter will throw for two different types. */
1892 irop
= (int)op
- (int)BC_ISLT
+ (int)IR_LT
;
1893 if (ta
== IRT_NUM
) {
1894 if ((irop
& 1)) irop
^= 4; /* ISGE/ISGT are unordered. */
1895 if (!lj_ir_numcmp(numV(rav
), numV(rcv
), (IROp
)irop
)) irop
^= 5;
1896 } else if (ta
== IRT_INT
) {
1897 if (!lj_ir_numcmp(numV(rav
), numV(rcv
), (IROp
)irop
)) irop
^= 1;
1898 } else if (ta
== IRT_STR
) {
1899 if (!lj_ir_strcmp(strV(rav
), strV(rcv
), (IROp
)irop
)) irop
^= 1;
1900 ra
= lj_ir_call(J
, IRCALL_lj_str_cmp
, ra
, rc
);
1901 rc
= lj_ir_kint(J
, 0);
1904 rec_mm_comp(J
, &ix
, (int)op
);
1907 emitir(IRTG(irop
, ta
), ra
, rc
);
1908 optstate_comp(J
, ((int)op
^ irop
) & 1);
1912 case BC_ISEQV
: case BC_ISNEV
:
1913 case BC_ISEQS
: case BC_ISNES
:
1914 case BC_ISEQN
: case BC_ISNEN
:
1915 case BC_ISEQP
: case BC_ISNEP
:
1916 /* Emit nothing for two non-table, non-udata consts. */
1917 if (!(tref_isk2(ra
, rc
) && !(tref_istab(ra
) || tref_isudata(ra
)))) {
1920 diff
= rec_objcmp(J
, ra
, rc
, rav
, rcv
);
1921 if (diff
== 1 && (tref_istab(ra
) || tref_isudata(ra
))) {
1922 /* Only check __eq if different, but the same type (table or udata). */
1923 rec_mm_equal(J
, &ix
, (int)op
);
1926 optstate_comp(J
, ((int)op
& 1) == !diff
);
1930 /* -- Unary test and copy ops ------------------------------------------- */
1932 case BC_ISTC
: case BC_ISFC
:
1933 if ((op
& 1) == tref_istruecond(rc
))
1934 rc
= 0; /* Don't store if condition is not true. */
1936 case BC_IST
: case BC_ISF
: /* Type specialization suffices. */
1937 if (bc_a(pc
[1]) < J
->maxslot
)
1938 J
->maxslot
= bc_a(pc
[1]); /* Shrink used slots. */
1941 /* -- Unary ops --------------------------------------------------------- */
1944 /* Type specialization already forces const result. */
1945 rc
= tref_istruecond(rc
) ? TREF_FALSE
: TREF_TRUE
;
1949 if (tref_isstr(rc
)) {
1950 rc
= emitir(IRTI(IR_FLOAD
), rc
, IRFL_STR_LEN
);
1951 } else if (tref_istab(rc
)) {
1952 rc
= lj_ir_call(J
, IRCALL_lj_tab_len
, rc
);
1955 copyTV(J
->L
, &ix
.tabv
, &ix
.keyv
);
1958 rc
= rec_mm_arith(J
, &ix
, MM_len
);
1962 /* -- Arithmetic ops ---------------------------------------------------- */
1965 if (tref_isnumber_str(rc
)) {
1966 rc
= lj_ir_tonum(J
, rc
);
1967 rc
= emitir(IRTN(IR_NEG
), rc
, lj_ir_knum_neg(J
));
1970 copyTV(J
->L
, &ix
.tabv
, &ix
.keyv
);
1971 rc
= rec_mm_arith(J
, &ix
, MM_unm
);
1975 case BC_ADDNV
: case BC_SUBNV
: case BC_MULNV
: case BC_DIVNV
: case BC_MODNV
:
1976 ix
.tab
= rc
; ix
.key
= rc
= rb
; rb
= ix
.tab
;
1977 copyTV(J
->L
, &ix
.valv
, &ix
.tabv
);
1978 copyTV(J
->L
, &ix
.tabv
, &ix
.keyv
);
1979 copyTV(J
->L
, &ix
.keyv
, &ix
.valv
);
1983 case BC_ADDVN
: case BC_SUBVN
: case BC_MULVN
: case BC_DIVVN
:
1984 case BC_ADDVV
: case BC_SUBVV
: case BC_MULVV
: case BC_DIVVV
: {
1985 MMS mm
= bcmode_mm(op
);
1986 if (tref_isnumber_str(rb
) && tref_isnumber_str(rc
)) {
1987 rb
= lj_ir_tonum(J
, rb
);
1988 rc
= lj_ir_tonum(J
, rc
);
1989 rc
= emitir(IRTN((int)mm
- (int)MM_add
+ (int)IR_ADD
), rb
, rc
);
1991 rc
= rec_mm_arith(J
, &ix
, mm
);
1996 case BC_MODVN
: case BC_MODVV
:
1998 if (tref_isnumber_str(rb
) && tref_isnumber_str(rc
))
1999 rc
= lj_opt_narrow_mod(J
, rb
, rc
);
2001 rc
= rec_mm_arith(J
, &ix
, MM_mod
);
2005 if (tref_isnumber_str(rb
) && tref_isnumber_str(rc
))
2006 rc
= lj_opt_narrow_pow(J
, lj_ir_tonum(J
, rb
), rc
, rcv
);
2008 rc
= rec_mm_arith(J
, &ix
, MM_pow
);
2011 /* -- Constant and move ops --------------------------------------------- */
2013 case BC_KSTR
: case BC_KNUM
: case BC_KPRI
: case BC_MOV
:
2016 rc
= lj_ir_kint(J
, (int32_t)(int16_t)rc
);
2020 J
->base
[ra
++] = TREF_NIL
;
2021 if (rc
>= J
->maxslot
) J
->maxslot
= rc
+1;
2024 /* -- Upvalue and function ops ------------------------------------------ */
2027 rc
= rec_upvalue(J
, rc
, 0);
2029 case BC_USETV
: case BC_USETS
: case BC_USETN
: case BC_USETP
:
2030 rec_upvalue(J
, ra
, rc
);
2033 /* -- Table ops --------------------------------------------------------- */
2035 case BC_GGET
: case BC_GSET
:
2036 settabV(J
->L
, &ix
.tabv
, tabref(J
->fn
->l
.env
));
2037 ix
.tab
= emitir(IRT(IR_FLOAD
, IRT_TAB
), getcurrf(J
), IRFL_FUNC_ENV
);
2038 ix
.idxchain
= LJ_MAX_IDXCHAIN
;
2039 rc
= rec_idx(J
, &ix
);
2042 case BC_TGETB
: case BC_TSETB
:
2043 setintV(&ix
.keyv
, (int32_t)rc
);
2044 ix
.key
= lj_ir_kint(J
, (int32_t)rc
);
2046 case BC_TGETV
: case BC_TGETS
: case BC_TSETV
: case BC_TSETS
:
2047 ix
.idxchain
= LJ_MAX_IDXCHAIN
;
2048 rc
= rec_idx(J
, &ix
);
2052 rc
= rec_tnew(J
, rc
);
2055 rc
= emitir(IRT(IR_TDUP
, IRT_TAB
),
2056 lj_ir_ktab(J
, gco2tab(proto_kgc(J
->pt
, ~(ptrdiff_t)rc
))), 0);
2059 /* -- Calls and vararg handling ----------------------------------------- */
2062 J
->base
[ra
] = getslot(J
, ra
-3);
2063 J
->base
[ra
+1] = getslot(J
, ra
-2);
2064 J
->base
[ra
+2] = getslot(J
, ra
-1);
2065 { /* Have to do the actual copy now because rec_call needs the values. */
2066 TValue
*b
= &J
->L
->base
[ra
];
2067 copyTV(J
->L
, b
, b
-3);
2068 copyTV(J
->L
, b
+1, b
-2);
2069 copyTV(J
->L
, b
+2, b
-1);
2074 rb
= (TRef
)(CALLRES_TAILCALL
+1);
2077 /* L->top is set to L->base+ra+rc+NRESULTS-1+1, see lj_dispatch_ins(). */
2078 rc
= (BCReg
)(J
->L
->top
- J
->L
->base
) - ra
;
2082 rb
= (TRef
)(CALLRES_TAILCALL
+1);
2086 rec_call(J
, ra
, (ptrdiff_t)rb
-1, (ptrdiff_t)rc
-1);
2089 /* -- Returns ----------------------------------------------------------- */
2092 /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */
2093 rc
= (BCReg
)(J
->L
->top
- J
->L
->base
) - ra
+ 1;
2095 case BC_RET
: case BC_RET0
: case BC_RET1
:
2096 rec_ret(J
, ra
, (ptrdiff_t)rc
-1);
2099 /* -- Loops and branches ------------------------------------------------ */
2102 if (rec_for(J
, pc
, 0) != LOOPEV_LEAVE
)
2103 J
->loopref
= J
->cur
.nins
;
2106 lua_assert(bc_op(pc
[(ptrdiff_t)rc
-BCBIAS_J
]) == BC_JFORL
);
2107 if (rec_for(J
, pc
, 0) != LOOPEV_LEAVE
) /* Link to existing loop. */
2108 rec_stop(J
, bc_d(pc
[(ptrdiff_t)rc
-BCBIAS_J
]));
2109 /* Continue tracing if the loop is not entered. */
2113 rec_loop_interp(J
, pc
, rec_for(J
, pc
+((ptrdiff_t)rc
-BCBIAS_J
), 1));
2116 rec_loop_interp(J
, pc
, rec_iterl(J
, *pc
));
2119 rec_loop_interp(J
, pc
, rec_loop(J
, ra
));
2123 rec_loop_jit(J
, rc
, rec_for(J
, pc
+bc_j(J
->trace
[rc
]->startins
), 1));
2126 rec_loop_jit(J
, rc
, rec_iterl(J
, J
->trace
[rc
]->startins
));
2129 rec_loop_jit(J
, rc
, rec_loop(J
, ra
));
2135 lj_trace_err(J
, LJ_TRERR_LBLACKL
);
2139 if (ra
< J
->maxslot
)
2140 J
->maxslot
= ra
; /* Shrink used slots. */
2149 setintV(&J
->errinfo
, (int32_t)op
);
2150 lj_trace_err_info(J
, LJ_TRERR_NYIBC
);
2154 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2155 if (bcmode_a(op
) == BCMdst
&& rc
) {
2157 if (ra
>= J
->maxslot
) J
->maxslot
= ra
+1;
2164 /* Limit the number of recorded IR instructions. */
2165 if (J
->cur
.nins
> REF_FIRST
+(IRRef
)J
->param
[JIT_P_maxrecord
])
2166 lj_trace_err(J
, LJ_TRERR_TRACEOV
);
2169 /* -- Recording setup ----------------------------------------------------- */
2171 /* Setup recording for a FORL loop. */
2172 static void rec_setup_forl(jit_State
*J
, const BCIns
*fori
)
2174 BCReg ra
= bc_a(*fori
);
2175 cTValue
*forbase
= &J
->L
->base
[ra
];
2176 IRType t
= (J
->flags
& JIT_F_OPT_NARROW
) ? lj_opt_narrow_forl(forbase
)
2178 TRef stop
= fori_arg(J
, fori
, ra
+FORL_STOP
, t
);
2179 TRef step
= fori_arg(J
, fori
, ra
+FORL_STEP
, t
);
2180 int dir
= (0 <= numV(&forbase
[FORL_STEP
]));
2181 lua_assert(bc_op(*fori
) == BC_FORI
|| bc_op(*fori
) == BC_JFORI
);
2182 if (!tref_isk(step
)) {
2183 /* Non-constant step: need a guard for the direction. */
2184 TRef zero
= (t
== IRT_INT
) ? lj_ir_kint(J
, 0) : lj_ir_knum_zero(J
);
2185 emitir(IRTG(dir
? IR_GE
: IR_LT
, t
), step
, zero
);
2186 /* Add hoistable overflow checks for a narrowed FORL index. */
2188 if (tref_isk(stop
)) {
2189 /* Constant stop: optimize check away or to a range check for step. */
2190 int32_t k
= IR(tref_ref(stop
))->i
;
2193 emitir(IRTGI(IR_LE
), step
, lj_ir_kint(J
, (int32_t)0x7fffffff-k
));
2196 emitir(IRTGI(IR_GE
), step
, lj_ir_kint(J
, (int32_t)0x80000000-k
));
2199 /* Stop+step variable: need full overflow check (with dead result). */
2200 emitir(IRTGI(IR_ADDOV
), step
, stop
);
2203 } else if (t
== IRT_INT
&& !tref_isk(stop
)) {
2204 /* Constant step: optimize overflow check to a range check for stop. */
2205 int32_t k
= IR(tref_ref(step
))->i
;
2206 k
= (int32_t)(dir
? 0x7fffffff : 0x80000000) - k
;
2207 emitir(IRTGI(dir
? IR_LE
: IR_GE
), stop
, lj_ir_kint(J
, k
));
2209 if (t
== IRT_INT
&& !find_kinit(J
, fori
, ra
+FORL_IDX
, IRT_INT
))
2211 J
->base
[ra
+FORL_EXT
] = sloadt(J
, (int32_t)(ra
+FORL_IDX
), t
, IRSLOAD_INHERIT
);
2212 J
->maxslot
= ra
+FORL_EXT
+1;
2215 /* Setup recording for a root trace started by a hot loop. */
2216 static const BCIns
*rec_setup_root(jit_State
*J
)
2218 /* Determine the next PC and the bytecode range for the loop. */
2219 const BCIns
*pcj
, *pc
= J
->pc
;
2221 BCReg ra
= bc_a(ins
);
2222 switch (bc_op(ins
)) {
2224 J
->bc_extent
= (MSize
)(-bc_j(ins
))*sizeof(BCIns
);
2229 lua_assert(bc_op(pc
[-1]) == BC_ITERC
);
2230 J
->maxslot
= ra
+ bc_b(pc
[-1]) - 1;
2231 J
->bc_extent
= (MSize
)(-bc_j(ins
))*sizeof(BCIns
);
2233 lua_assert(bc_op(pc
[-1]) == BC_JMP
);
2237 /* Only check BC range for real loops, but not for "repeat until true". */
2238 pcj
= pc
+ bc_j(ins
);
2240 if (bc_op(ins
) == BC_JMP
&& bc_j(ins
) < 0) {
2241 J
->bc_min
= pcj
+1 + bc_j(ins
);
2242 J
->bc_extent
= (MSize
)(-bc_j(ins
))*sizeof(BCIns
);
2254 /* Setup recording for a side trace. */
2255 static void rec_setup_side(jit_State
*J
, Trace
*T
)
2257 SnapShot
*snap
= &T
->snap
[J
->exitno
];
2258 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
2259 MSize n
, nent
= snap
->nent
;
2260 BloomFilter seen
= 0;
2261 /* Emit IR for slots inherited from parent snapshot. */
2262 for (n
= 0; n
< nent
; n
++) {
2263 SnapEntry sn
= map
[n
];
2264 IRRef ref
= snap_ref(sn
);
2265 BCReg s
= snap_slot(sn
);
2266 IRIns
*ir
= &T
->ir
[ref
];
2268 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
2269 if (bloomtest(seen
, ref
)) {
2271 for (j
= 0; j
< n
; j
++)
2272 if (snap_ref(map
[j
]) == ref
) {
2273 tr
= J
->slot
[snap_slot(map
[j
])];
2277 bloomset(seen
, ref
);
2278 switch ((IROp
)ir
->o
) {
2279 /* Only have to deal with constants that can occur in stack slots. */
2280 case IR_KPRI
: tr
= TREF_PRI(irt_type(ir
->t
)); break;
2281 case IR_KINT
: tr
= lj_ir_kint(J
, ir
->i
); break;
2282 case IR_KGC
: tr
= lj_ir_kgc(J
, ir_kgc(ir
), irt_t(ir
->t
)); break;
2283 case IR_KNUM
: tr
= lj_ir_knum_addr(J
, ir_knum(ir
)); break;
2284 case IR_KPTR
: tr
= lj_ir_kptr(J
, ir_kptr(ir
)); break; /* Continuation. */
2285 /* Inherited SLOADs don't need a guard or type check. */
2287 tr
= emitir_raw(ir
->ot
& ~IRT_GUARD
, s
,
2288 (ir
->op2
&IRSLOAD_READONLY
) | IRSLOAD_INHERIT
|IRSLOAD_PARENT
);
2290 /* Parent refs are already typed and don't need a guard. */
2292 tr
= emitir_raw(IRT(IR_SLOAD
, irt_type(ir
->t
)), s
,
2293 IRSLOAD_INHERIT
|IRSLOAD_PARENT
);
2297 J
->slot
[s
] = tr
| (sn
&(SNAP_CONT
|SNAP_FRAME
)); /* Same as TREF_* flags. */
2298 if ((sn
& SNAP_FRAME
) && s
!= 0)
2301 J
->base
= J
->slot
+ J
->baseslot
;
2302 J
->maxslot
= snap
->nslots
- J
->baseslot
;
2303 J
->framedepth
= snap
->depth
; /* Copy frames from snapshot. */
2304 memcpy(J
->frame
, &map
[nent
+1], sizeof(SnapEntry
)*(size_t)snap
->depth
);
2308 /* Setup for recording a new trace. */
2309 void lj_record_setup(jit_State
*J
)
2313 /* Initialize state related to current trace. */
2314 memset(J
->slot
, 0, sizeof(J
->slot
));
2315 memset(J
->chain
, 0, sizeof(J
->chain
));
2316 memset(J
->bpropcache
, 0, sizeof(J
->bpropcache
));
2318 J
->baseslot
= 1; /* Invoking function is at base[-1]. */
2319 J
->base
= J
->slot
+ J
->baseslot
;
2324 J
->instunroll
= J
->param
[JIT_P_instunroll
];
2325 J
->loopunroll
= J
->param
[JIT_P_loopunroll
];
2329 J
->bc_min
= NULL
; /* Means no limit. */
2330 J
->bc_extent
= ~(MSize
)0;
2332 /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2333 emitir_raw(IRT(IR_BASE
, IRT_PTR
), J
->parent
, J
->exitno
);
2334 for (i
= 0; i
<= 2; i
++) {
2335 IRIns
*ir
= IR(REF_NIL
-i
);
2337 ir
->t
.irt
= (uint8_t)(IRT_NIL
+i
);
2341 J
->cur
.nk
= REF_TRUE
;
2343 setgcref(J
->cur
.startpt
, obj2gco(J
->pt
));
2345 if (J
->parent
) { /* Side trace. */
2346 Trace
*T
= J
->trace
[J
->parent
];
2347 TraceNo root
= T
->root
? T
->root
: J
->parent
;
2348 J
->cur
.root
= (uint16_t)root
;
2349 J
->cur
.startins
= BCINS_AD(BC_JMP
, 0, 0);
2350 /* Check whether we could at least potentially form an extra loop. */
2351 if (J
->exitno
== 0 && T
->snap
[0].nent
== 0) {
2352 /* We can narrow a FORL for some side traces, too. */
2353 if (J
->pc
> proto_bc(J
->pt
) && bc_op(J
->pc
[-1]) == BC_JFORI
&&
2354 bc_d(J
->pc
[bc_j(J
->pc
[-1])-1]) == root
) {
2356 rec_setup_forl(J
, J
->pc
-1);
2360 J
->startpc
= NULL
; /* Prevent forming an extra loop. */
2362 rec_setup_side(J
, T
);
2364 if (J
->trace
[J
->cur
.root
]->nchild
>= J
->param
[JIT_P_maxside
] ||
2365 T
->snap
[J
->exitno
].count
>= J
->param
[JIT_P_hotexit
] +
2366 J
->param
[JIT_P_tryside
])
2367 rec_stop(J
, TRACE_INTERP
);
2368 } else { /* Root trace. */
2370 if (J
->pc
>= proto_bc(J
->pt
)) { /* Not a hot CALL? */
2371 J
->cur
.startins
= *J
->pc
;
2372 J
->pc
= rec_setup_root(J
);
2373 /* Note: the loop instruction itself is recorded at the end and not
2374 ** at the start! So snapshot #0 needs to point to the *next* instruction.
2377 J
->cur
.startins
= BCINS_ABC(BC_CALL
, 0, 0, 0);
2380 if (bc_op(J
->cur
.startins
) == BC_FORL
)
2381 rec_setup_forl(J
, J
->pc
-1);
2382 if (1 + J
->pt
->framesize
>= LJ_MAX_JSLOTS
)
2383 lj_trace_err(J
, LJ_TRERR_STACKOV
);