Initialize uv->immutable for upvalues of loaded chunks.
[luajit-2.0.git] / src / lj_snap.c
blob788500afdff4ab668ec3bee694e4ec4d41e25286
1 /*
2 ** Snapshot handling.
3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_snap_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT
13 #include "lj_gc.h"
14 #include "lj_tab.h"
15 #include "lj_state.h"
16 #include "lj_frame.h"
17 #include "lj_bc.h"
18 #include "lj_ir.h"
19 #include "lj_jit.h"
20 #include "lj_iropt.h"
21 #include "lj_trace.h"
22 #include "lj_snap.h"
23 #include "lj_target.h"
24 #if LJ_HASFFI
25 #include "lj_ctype.h"
26 #include "lj_cdata.h"
27 #endif
29 /* Pass IR on to next optimization in chain (FOLD). */
30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
32 /* Emit raw IR without passing through optimizations. */
33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
35 /* -- Snapshot buffer allocation ------------------------------------------ */
37 /* Grow snapshot buffer. */
38 void lj_snap_grow_buf_(jit_State *J, MSize need)
40 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
41 if (need > maxsnap)
42 lj_trace_err(J, LJ_TRERR_SNAPOV);
43 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
44 J->cur.snap = J->snapbuf;
47 /* Grow snapshot map buffer. */
48 void lj_snap_grow_map_(jit_State *J, MSize need)
50 if (need < 2*J->sizesnapmap)
51 need = 2*J->sizesnapmap;
52 else if (need < 64)
53 need = 64;
54 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
55 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
56 J->cur.snapmap = J->snapmapbuf;
57 J->sizesnapmap = need;
60 /* -- Snapshot generation ------------------------------------------------- */
62 /* Add all modified slots to the snapshot. */
63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
65 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
66 BCReg s;
67 MSize n = 0;
68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr);
71 if (ref) {
72 SnapEntry sn = SNAP_TR(s, tr);
73 IRIns *ir = &J->cur.ir[ref];
74 if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
75 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
76 /* No need to snapshot unmodified non-inherited slots. */
77 if (!(ir->op2 & IRSLOAD_INHERIT))
78 continue;
79 /* No need to restore readonly slots and unmodified non-parent slots. */
80 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
81 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
82 sn |= SNAP_NORESTORE;
84 if (LJ_SOFTFP && irt_isnum(ir->t))
85 sn |= SNAP_SOFTFPNUM;
86 map[n++] = sn;
89 return n;
92 /* Add frame links at the end of the snapshot. */
93 static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
95 cTValue *frame = J->L->base - 1;
96 cTValue *lim = J->L->base - J->baseslot;
97 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
98 MSize f = 0;
99 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
100 while (frame > lim) { /* Backwards traversal of all frames above base. */
101 if (frame_islua(frame)) {
102 map[f++] = SNAP_MKPC(frame_pc(frame));
103 frame = frame_prevl(frame);
104 } else if (frame_iscont(frame)) {
105 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
106 map[f++] = SNAP_MKPC(frame_contpc(frame));
107 frame = frame_prevd(frame);
108 } else {
109 lua_assert(!frame_isc(frame));
110 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
111 frame = frame_prevd(frame);
112 continue;
114 if (frame + funcproto(frame_func(frame))->framesize > ftop)
115 ftop = frame + funcproto(frame_func(frame))->framesize;
117 lua_assert(f == (MSize)(1 + J->framedepth));
118 return (BCReg)(ftop - lim);
121 /* Take a snapshot of the current stack. */
122 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
124 BCReg nslots = J->baseslot + J->maxslot;
125 MSize nent;
126 SnapEntry *p;
127 /* Conservative estimate. */
128 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
129 p = &J->cur.snapmap[nsnapmap];
130 nent = snapshot_slots(J, p, nslots);
131 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
132 snap->mapofs = (uint16_t)nsnapmap;
133 snap->ref = (IRRef1)J->cur.nins;
134 snap->nent = (uint8_t)nent;
135 snap->nslots = (uint8_t)nslots;
136 snap->count = 0;
137 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
140 /* Add or merge a snapshot. */
141 void lj_snap_add(jit_State *J)
143 MSize nsnap = J->cur.nsnap;
144 MSize nsnapmap = J->cur.nsnapmap;
145 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
146 if (J->mergesnap ? !irt_isguard(J->guardemit) :
147 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
148 if (nsnap == 1) { /* But preserve snap #0 PC. */
149 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
150 goto nomerge;
152 nsnapmap = J->cur.snap[--nsnap].mapofs;
153 } else {
154 nomerge:
155 lj_snap_grow_buf(J, nsnap+1);
156 J->cur.nsnap = (uint16_t)(nsnap+1);
158 J->mergesnap = 0;
159 J->guardemit.irt = 0;
160 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
163 /* -- Snapshot modification ----------------------------------------------- */
165 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
167 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
168 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
169 const BCIns *pc, BCReg maxslot)
171 BCReg s;
172 GCobj *o;
174 if (maxslot == 0) return 0;
175 #ifdef LUAJIT_USE_VALGRIND
176 /* Avoid errors for harmless reads beyond maxslot. */
177 memset(udf, 1, SNAP_USEDEF_SLOTS);
178 #else
179 memset(udf, 1, maxslot);
180 #endif
182 /* Treat open upvalues as used. */
183 o = gcref(J->L->openupval);
184 while (o) {
185 if (uvval(gco2uv(o)) < J->L->base) break;
186 udf[uvval(gco2uv(o)) - J->L->base] = 0;
187 o = gcref(o->gch.nextgc);
190 #define USE_SLOT(s) udf[(s)] &= ~1
191 #define DEF_SLOT(s) udf[(s)] *= 3
193 /* Scan through following bytecode and check for uses/defs. */
194 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
195 for (;;) {
196 BCIns ins = *pc++;
197 BCOp op = bc_op(ins);
198 switch (bcmode_b(op)) {
199 case BCMvar: USE_SLOT(bc_b(ins)); break;
200 default: break;
202 switch (bcmode_c(op)) {
203 case BCMvar: USE_SLOT(bc_c(ins)); break;
204 case BCMrbase:
205 lua_assert(op == BC_CAT);
206 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
207 for (; s < maxslot; s++) DEF_SLOT(s);
208 break;
209 case BCMjump:
210 handle_jump: {
211 BCReg minslot = bc_a(ins);
212 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
213 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
214 else if (op == BC_UCLO) { pc += bc_j(ins); break; }
215 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
216 return minslot < maxslot ? minslot : maxslot;
218 case BCMlit:
219 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
220 goto handle_jump;
221 } else if (bc_isret(op)) {
222 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
223 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
224 for (; s < top; s++) USE_SLOT(s);
225 for (; s < maxslot; s++) DEF_SLOT(s);
226 return 0;
228 break;
229 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
230 default: break;
232 switch (bcmode_a(op)) {
233 case BCMvar: USE_SLOT(bc_a(ins)); break;
234 case BCMdst:
235 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
236 break;
237 case BCMbase:
238 if (op >= BC_CALLM && op <= BC_VARG) {
239 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
240 maxslot : (bc_a(ins) + bc_c(ins));
241 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
242 for (; s < top; s++) USE_SLOT(s);
243 for (; s < maxslot; s++) DEF_SLOT(s);
244 if (op == BC_CALLT || op == BC_CALLMT) {
245 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
246 return 0;
248 } else if (op == BC_KNIL) {
249 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
250 } else if (op == BC_TSETM) {
251 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
253 break;
254 default: break;
256 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
259 #undef USE_SLOT
260 #undef DEF_SLOT
262 return 0; /* unreachable */
265 /* Purge dead slots before the next snapshot. */
266 void lj_snap_purge(jit_State *J)
268 uint8_t udf[SNAP_USEDEF_SLOTS];
269 BCReg maxslot = J->maxslot;
270 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
271 for (; s < maxslot; s++)
272 if (udf[s] != 0)
273 J->base[s] = 0; /* Purge dead slots. */
276 /* Shrink last snapshot. */
277 void lj_snap_shrink(jit_State *J)
279 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
280 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
281 MSize n, m, nlim, nent = snap->nent;
282 uint8_t udf[SNAP_USEDEF_SLOTS];
283 BCReg maxslot = J->maxslot;
284 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
285 BCReg baseslot = J->baseslot;
286 maxslot += baseslot;
287 minslot += baseslot;
288 snap->nslots = (uint8_t)maxslot;
289 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
290 BCReg s = snap_slot(map[n]);
291 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
292 map[m++] = map[n]; /* Only copy used slots. */
294 snap->nent = (uint8_t)m;
295 nlim = J->cur.nsnapmap - snap->mapofs - 1;
296 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
297 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
300 /* -- Snapshot access ----------------------------------------------------- */
302 /* Initialize a Bloom Filter with all renamed refs.
303 ** There are very few renames (often none), so the filter has
304 ** very few bits set. This makes it suitable for negative filtering.
306 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
308 BloomFilter rfilt = 0;
309 IRIns *ir;
310 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
311 if (ir->op2 <= lim)
312 bloomset(rfilt, ir->op1);
313 return rfilt;
316 /* Process matching renames to find the original RegSP. */
317 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
319 IRIns *ir;
320 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
321 if (ir->op1 == ref && ir->op2 <= lim)
322 rs = ir->prev;
323 return rs;
326 /* Copy RegSP from parent snapshot to the parent links of the IR. */
327 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
329 SnapShot *snap = &T->snap[snapno];
330 SnapEntry *map = &T->snapmap[snap->mapofs];
331 BloomFilter rfilt = snap_renamefilter(T, snapno);
332 MSize n = 0;
333 IRRef ref = 0;
334 for ( ; ; ir++) {
335 uint32_t rs;
336 if (ir->o == IR_SLOAD) {
337 if (!(ir->op2 & IRSLOAD_PARENT)) break;
338 for ( ; ; n++) {
339 lua_assert(n < snap->nent);
340 if (snap_slot(map[n]) == ir->op1) {
341 ref = snap_ref(map[n++]);
342 break;
345 } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
346 ref++;
347 } else if (ir->o == IR_PVAL) {
348 ref = ir->op1 + REF_BIAS;
349 } else {
350 break;
352 rs = T->ir[ref].prev;
353 if (bloomtest(rfilt, ref))
354 rs = snap_renameref(T, snapno, ref, rs);
355 ir->prev = (uint16_t)rs;
356 lua_assert(regsp_used(rs));
358 return ir;
361 /* -- Snapshot replay ----------------------------------------------------- */
363 /* Replay constant from parent trace. */
364 static TRef snap_replay_const(jit_State *J, IRIns *ir)
366 /* Only have to deal with constants that can occur in stack slots. */
367 switch ((IROp)ir->o) {
368 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
369 case IR_KINT: return lj_ir_kint(J, ir->i);
370 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
371 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
372 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
373 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
374 default: lua_assert(0); return TREF_NIL; break;
378 /* De-duplicate parent reference. */
379 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
381 MSize j;
382 for (j = 0; j < nmax; j++)
383 if (snap_ref(map[j]) == ref)
384 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
385 return 0;
388 /* Emit parent reference with de-duplication. */
389 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
390 BloomFilter seen, IRRef ref)
392 IRIns *ir = &T->ir[ref];
393 TRef tr;
394 if (irref_isk(ref))
395 tr = snap_replay_const(J, ir);
396 else if (!regsp_used(ir->prev))
397 tr = 0;
398 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
399 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
400 return tr;
403 /* Check whether a sunk store corresponds to an allocation. Slow path. */
404 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
406 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
407 irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
408 IRIns *irk = &T->ir[irs->op1];
409 if (irk->o == IR_AREF || irk->o == IR_HREFK)
410 irk = &T->ir[irk->op1];
411 return (&T->ir[irk->op1] == ira);
413 return 0;
416 /* Check whether a sunk store corresponds to an allocation. Fast path. */
417 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
419 if (irs->s != 255)
420 return (ira + irs->s == irs); /* Fast check. */
421 return snap_sunk_store2(T, ira, irs);
424 /* Replay snapshot state to setup side trace. */
425 void lj_snap_replay(jit_State *J, GCtrace *T)
427 SnapShot *snap = &T->snap[J->exitno];
428 SnapEntry *map = &T->snapmap[snap->mapofs];
429 MSize n, nent = snap->nent;
430 BloomFilter seen = 0;
431 int pass23 = 0;
432 J->framedepth = 0;
433 /* Emit IR for slots inherited from parent snapshot. */
434 for (n = 0; n < nent; n++) {
435 SnapEntry sn = map[n];
436 BCReg s = snap_slot(sn);
437 IRRef ref = snap_ref(sn);
438 IRIns *ir = &T->ir[ref];
439 TRef tr;
440 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
441 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
442 goto setslot;
443 bloomset(seen, ref);
444 if (irref_isk(ref)) {
445 tr = snap_replay_const(J, ir);
446 } else if (!regsp_used(ir->prev)) {
447 pass23 = 1;
448 lua_assert(s != 0);
449 tr = s;
450 } else {
451 IRType t = irt_type(ir->t);
452 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
453 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
454 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
455 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
457 setslot:
458 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
459 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
460 if ((sn & SNAP_FRAME))
461 J->baseslot = s+1;
463 if (pass23) {
464 IRIns *irlast = &T->ir[snap->ref];
465 pass23 = 0;
466 /* Emit dependent PVALs. */
467 for (n = 0; n < nent; n++) {
468 SnapEntry sn = map[n];
469 IRRef refp = snap_ref(sn);
470 IRIns *ir = &T->ir[refp];
471 if (regsp_reg(ir->r) == RID_SUNK) {
472 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
473 pass23 = 1;
474 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
475 ir->o == IR_CNEW || ir->o == IR_CNEWI);
476 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
477 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
478 if (LJ_HASFFI && ir->o == IR_CNEWI) {
479 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
480 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
481 } else {
482 IRIns *irs;
483 for (irs = ir+1; irs < irlast; irs++)
484 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
485 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
486 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
487 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
488 irs+1 < irlast && (irs+1)->o == IR_HIOP)
489 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
492 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
493 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
494 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
497 /* Replay sunk instructions. */
498 for (n = 0; pass23 && n < nent; n++) {
499 SnapEntry sn = map[n];
500 IRRef refp = snap_ref(sn);
501 IRIns *ir = &T->ir[refp];
502 if (regsp_reg(ir->r) == RID_SUNK) {
503 TRef op1, op2;
504 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
505 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
506 continue;
508 op1 = ir->op1;
509 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
510 op2 = ir->op2;
511 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
512 if (LJ_HASFFI && ir->o == IR_CNEWI) {
513 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
514 lj_needsplit(J); /* Emit joining HIOP. */
515 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
516 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
518 J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
519 } else {
520 IRIns *irs;
521 TRef tr = emitir(ir->ot, op1, op2);
522 J->slot[snap_slot(sn)] = tr;
523 for (irs = ir+1; irs < irlast; irs++)
524 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
525 IRIns *irr = &T->ir[irs->op1];
526 TRef val, key = irr->op2, tmp = tr;
527 if (irr->o != IR_FREF) {
528 IRIns *irk = &T->ir[key];
529 if (irr->o == IR_HREFK)
530 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
531 irk->op2);
532 else
533 key = snap_replay_const(J, irk);
534 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
535 IRIns *irf = &T->ir[irr->op1];
536 tmp = emitir(irf->ot, tmp, irf->op2);
539 tmp = emitir(irr->ot, tmp, key);
540 val = snap_pref(J, T, map, nent, seen, irs->op2);
541 if (val == 0) {
542 IRIns *irc = &T->ir[irs->op2];
543 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
544 val = snap_pref(J, T, map, nent, seen, irc->op1);
545 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
546 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
547 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
548 IRType t = IRT_I64;
549 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
550 t = IRT_NUM;
551 lj_needsplit(J);
552 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
553 uint64_t k = (uint32_t)T->ir[irs->op2].i +
554 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
555 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
556 lj_ir_k64_find(J, k));
557 } else {
558 val = emitir_raw(IRT(IR_HIOP, t), val,
559 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
561 tmp = emitir(IRT(irs->o, t), tmp, val);
562 continue;
564 tmp = emitir(irs->ot, tmp, val);
565 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
566 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
572 J->base = J->slot + J->baseslot;
573 J->maxslot = snap->nslots - J->baseslot;
574 lj_snap_add(J);
575 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
576 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
579 /* -- Snapshot restore ---------------------------------------------------- */
581 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
582 SnapNo snapno, BloomFilter rfilt,
583 IRIns *ir, TValue *o);
585 /* Restore a value from the trace exit state. */
586 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
587 SnapNo snapno, BloomFilter rfilt,
588 IRRef ref, TValue *o)
590 IRIns *ir = &T->ir[ref];
591 IRType1 t = ir->t;
592 RegSP rs = ir->prev;
593 if (irref_isk(ref)) { /* Restore constant slot. */
594 lj_ir_kvalue(J->L, o, ir);
595 return;
597 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
598 rs = snap_renameref(T, snapno, ref, rs);
599 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
600 int32_t *sps = &ex->spill[regsp_spill(rs)];
601 if (irt_isinteger(t)) {
602 setintV(o, *sps);
603 #if !LJ_SOFTFP
604 } else if (irt_isnum(t)) {
605 o->u64 = *(uint64_t *)sps;
606 #endif
607 } else if (LJ_64 && irt_islightud(t)) {
608 /* 64 bit lightuserdata which may escape already has the tag bits. */
609 o->u64 = *(uint64_t *)sps;
610 } else {
611 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
612 setgcrefi(o->gcr, *sps);
613 setitype(o, irt_toitype(t));
615 } else { /* Restore from register. */
616 Reg r = regsp_reg(rs);
617 if (ra_noreg(r)) {
618 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
619 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
620 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
621 return;
622 } else if (irt_isinteger(t)) {
623 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
624 #if !LJ_SOFTFP
625 } else if (irt_isnum(t)) {
626 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
627 #endif
628 } else if (LJ_64 && irt_islightud(t)) {
629 /* 64 bit lightuserdata which may escape already has the tag bits. */
630 o->u64 = ex->gpr[r-RID_MIN_GPR];
631 } else {
632 if (!irt_ispri(t))
633 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
634 setitype(o, irt_toitype(t));
639 #if LJ_HASFFI
640 /* Restore raw data from the trace exit state. */
641 static void snap_restoredata(GCtrace *T, ExitState *ex,
642 SnapNo snapno, BloomFilter rfilt,
643 IRRef ref, void *dst, CTSize sz)
645 IRIns *ir = &T->ir[ref];
646 RegSP rs = ir->prev;
647 int32_t *src;
648 uint64_t tmp;
649 if (irref_isk(ref)) {
650 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
651 src = mref(ir->ptr, int32_t);
652 } else if (sz == 8) {
653 tmp = (uint64_t)(uint32_t)ir->i;
654 src = (int32_t *)&tmp;
655 } else {
656 src = &ir->i;
658 } else {
659 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
660 rs = snap_renameref(T, snapno, ref, rs);
661 if (ra_hasspill(regsp_spill(rs))) {
662 src = &ex->spill[regsp_spill(rs)];
663 if (sz == 8 && !irt_is64(ir->t)) {
664 tmp = (uint64_t)(uint32_t)*src;
665 src = (int32_t *)&tmp;
667 } else {
668 Reg r = regsp_reg(rs);
669 if (ra_noreg(r)) {
670 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
671 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
672 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
673 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
674 return;
676 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
677 #if !LJ_SOFTFP
678 if (r >= RID_MAX_GPR) {
679 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
680 #if LJ_TARGET_PPC
681 if (sz == 4) { /* PPC FPRs are always doubles. */
682 *(float *)dst = (float)*(double *)src;
683 return;
685 #else
686 if (LJ_BE && sz == 4) src++;
687 #endif
689 #endif
692 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
693 if (sz == 4) *(int32_t *)dst = *src;
694 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
695 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
696 else *(int16_t *)dst = (int16_t)*src;
698 #endif
700 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
701 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
702 SnapNo snapno, BloomFilter rfilt,
703 IRIns *ir, TValue *o)
705 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
706 ir->o == IR_CNEW || ir->o == IR_CNEWI);
707 #if LJ_HASFFI
708 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
709 CTState *cts = ctype_cts(J->L);
710 CTypeID id = (CTypeID)T->ir[ir->op1].i;
711 CTSize sz = lj_ctype_size(cts, id);
712 GCcdata *cd = lj_cdata_new(cts, id, sz);
713 setcdataV(J->L, o, cd);
714 if (ir->o == IR_CNEWI) {
715 uint8_t *p = (uint8_t *)cdataptr(cd);
716 lua_assert(sz == 4 || sz == 8);
717 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
718 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
719 if (LJ_BE) p += 4;
720 sz = 4;
722 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
723 } else {
724 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
725 for (irs = ir+1; irs < irlast; irs++)
726 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
727 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
728 uint8_t *p = (uint8_t *)cd;
729 CTSize szs;
730 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
731 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
732 if (irt_is64(irs->t)) szs = 8;
733 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
734 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
735 else szs = 4;
736 if (LJ_64 && iro->o == IR_KINT64)
737 p += (int64_t)ir_k64(iro)->u64;
738 else
739 p += iro->i;
740 lua_assert(p >= (uint8_t *)cdataptr(cd) &&
741 p + szs <= (uint8_t *)cdataptr(cd) + sz);
742 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
743 lua_assert(szs == 4);
744 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
745 if (LJ_BE) p += 4;
747 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
750 } else
751 #endif
753 IRIns *irs, *irlast;
754 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
755 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
756 settabV(J->L, o, t);
757 irlast = &T->ir[T->snap[snapno].ref];
758 for (irs = ir+1; irs < irlast; irs++)
759 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
760 IRIns *irk = &T->ir[irs->op1];
761 TValue tmp, *val;
762 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
763 irs->o == IR_FSTORE);
764 if (irk->o == IR_FREF) {
765 lua_assert(irk->op2 == IRFL_TAB_META);
766 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
767 /* NOBARRIER: The table is new (marked white). */
768 setgcref(t->metatable, obj2gco(tabV(&tmp)));
769 } else {
770 irk = &T->ir[irk->op2];
771 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
772 lj_ir_kvalue(J->L, &tmp, irk);
773 val = lj_tab_set(J->L, t, &tmp);
774 /* NOBARRIER: The table is new (marked white). */
775 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
776 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
777 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
778 val->u32.hi = tmp.u32.lo;
785 /* Restore interpreter state from exit state with the help of a snapshot. */
786 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
788 ExitState *ex = (ExitState *)exptr;
789 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
790 GCtrace *T = traceref(J, J->parent);
791 SnapShot *snap = &T->snap[snapno];
792 MSize n, nent = snap->nent;
793 SnapEntry *map = &T->snapmap[snap->mapofs];
794 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
795 int32_t ftsz0;
796 TValue *frame;
797 BloomFilter rfilt = snap_renamefilter(T, snapno);
798 const BCIns *pc = snap_pc(map[nent]);
799 lua_State *L = J->L;
801 /* Set interpreter PC to the next PC to get correct error messages. */
802 setcframe_pc(cframe_raw(L->cframe), pc+1);
804 /* Make sure the stack is big enough for the slots from the snapshot. */
805 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
806 L->top = curr_topL(L);
807 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
810 /* Fill stack slots with data from the registers and spill slots. */
811 frame = L->base-1;
812 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
813 for (n = 0; n < nent; n++) {
814 SnapEntry sn = map[n];
815 if (!(sn & SNAP_NORESTORE)) {
816 TValue *o = &frame[snap_slot(sn)];
817 IRRef ref = snap_ref(sn);
818 IRIns *ir = &T->ir[ref];
819 if (ir->r == RID_SUNK) {
820 MSize j;
821 for (j = 0; j < n; j++)
822 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
823 copyTV(L, o, &frame[snap_slot(map[j])]);
824 goto dupslot;
826 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
827 dupslot:
828 continue;
830 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
831 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
832 TValue tmp;
833 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
834 o->u32.hi = tmp.u32.lo;
835 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
836 /* Overwrite tag with frame link. */
837 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
838 L->base = o+1;
842 lua_assert(map + nent == flinks);
844 /* Compute current stack top. */
845 switch (bc_op(*pc)) {
846 default:
847 if (bc_op(*pc) < BC_FUNCF) {
848 L->top = curr_topL(L);
849 break;
851 /* fallthrough */
852 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
853 L->top = frame + snap->nslots;
854 break;
856 return pc;
859 #undef emitir_raw
860 #undef emitir
862 #endif