Add required PHIs for implicit conversions (via XREF fwd).
[luajit-2.0.git] / src / lj_snap.c
blobddf07b6a33907106b7938d8c61486a9475a4c352
1 /*
2 ** Snapshot handling.
3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_snap_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT
13 #include "lj_gc.h"
14 #include "lj_state.h"
15 #include "lj_frame.h"
16 #include "lj_bc.h"
17 #include "lj_ir.h"
18 #include "lj_jit.h"
19 #include "lj_iropt.h"
20 #include "lj_trace.h"
21 #include "lj_snap.h"
22 #include "lj_target.h"
24 /* Some local macros to save typing. Undef'd at the end. */
25 #define IR(ref) (&J->cur.ir[(ref)])
27 /* -- Snapshot buffer allocation ------------------------------------------ */
29 /* Grow snapshot buffer. */
30 void lj_snap_grow_buf_(jit_State *J, MSize need)
32 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
33 if (need > maxsnap)
34 lj_trace_err(J, LJ_TRERR_SNAPOV);
35 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
36 J->cur.snap = J->snapbuf;
39 /* Grow snapshot map buffer. */
40 void lj_snap_grow_map_(jit_State *J, MSize need)
42 if (need < 2*J->sizesnapmap)
43 need = 2*J->sizesnapmap;
44 else if (need < 64)
45 need = 64;
46 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
47 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
48 J->cur.snapmap = J->snapmapbuf;
49 J->sizesnapmap = need;
52 /* -- Snapshot generation ------------------------------------------------- */
54 /* Add all modified slots to the snapshot. */
55 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
57 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
58 BCReg s;
59 MSize n = 0;
60 for (s = 0; s < nslots; s++) {
61 TRef tr = J->slot[s];
62 IRRef ref = tref_ref(tr);
63 if (ref) {
64 SnapEntry sn = SNAP_TR(s, tr);
65 IRIns *ir = IR(ref);
66 if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
67 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
68 /* No need to snapshot unmodified non-inherited slots. */
69 if (!(ir->op2 & IRSLOAD_INHERIT))
70 continue;
71 /* No need to restore readonly slots and unmodified non-parent slots. */
72 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
73 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
74 sn |= SNAP_NORESTORE;
76 if (LJ_SOFTFP && irt_isnum(ir->t))
77 sn |= SNAP_SOFTFPNUM;
78 map[n++] = sn;
81 return n;
84 /* Add frame links at the end of the snapshot. */
85 static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
87 cTValue *frame = J->L->base - 1;
88 cTValue *lim = J->L->base - J->baseslot;
89 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
90 MSize f = 0;
91 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
92 while (frame > lim) { /* Backwards traversal of all frames above base. */
93 if (frame_islua(frame)) {
94 map[f++] = SNAP_MKPC(frame_pc(frame));
95 frame = frame_prevl(frame);
96 if (frame + funcproto(frame_func(frame))->framesize > ftop)
97 ftop = frame + funcproto(frame_func(frame))->framesize;
98 } else if (frame_iscont(frame)) {
99 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
100 map[f++] = SNAP_MKPC(frame_contpc(frame));
101 frame = frame_prevd(frame);
102 } else {
103 lua_assert(!frame_isc(frame));
104 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
105 frame = frame_prevd(frame);
108 lua_assert(f == (MSize)(1 + J->framedepth));
109 return (BCReg)(ftop - lim);
112 /* Take a snapshot of the current stack. */
113 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
115 BCReg nslots = J->baseslot + J->maxslot;
116 MSize nent;
117 SnapEntry *p;
118 /* Conservative estimate. */
119 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
120 p = &J->cur.snapmap[nsnapmap];
121 nent = snapshot_slots(J, p, nslots);
122 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
123 snap->mapofs = (uint16_t)nsnapmap;
124 snap->ref = (IRRef1)J->cur.nins;
125 snap->nent = (uint8_t)nent;
126 snap->nslots = (uint8_t)nslots;
127 snap->count = 0;
128 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
131 /* Add or merge a snapshot. */
132 void lj_snap_add(jit_State *J)
134 MSize nsnap = J->cur.nsnap;
135 MSize nsnapmap = J->cur.nsnapmap;
136 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
137 if (J->mergesnap ? !irt_isguard(J->guardemit) :
138 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
139 nsnapmap = J->cur.snap[--nsnap].mapofs;
140 } else {
141 lj_snap_grow_buf(J, nsnap+1);
142 J->cur.nsnap = (uint16_t)(nsnap+1);
144 J->mergesnap = 0;
145 J->guardemit.irt = 0;
146 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
149 /* -- Snapshot modification ----------------------------------------------- */
151 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
153 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
154 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
155 const BCIns *pc, BCReg maxslot)
157 BCReg s;
158 GCobj *o;
160 if (maxslot == 0) return 0;
161 #ifdef LUAJIT_USE_VALGRIND
162 /* Avoid errors for harmless reads beyond maxslot. */
163 memset(udf, 1, SNAP_USEDEF_SLOTS);
164 #else
165 memset(udf, 1, maxslot);
166 #endif
168 /* Treat open upvalues as used. */
169 o = gcref(J->L->openupval);
170 while (o) {
171 if (uvval(gco2uv(o)) < J->L->base) break;
172 udf[uvval(gco2uv(o)) - J->L->base] = 0;
173 o = gcref(o->gch.nextgc);
176 #define USE_SLOT(s) udf[(s)] &= ~1
177 #define DEF_SLOT(s) udf[(s)] *= 3
179 /* Scan through following bytecode and check for uses/defs. */
180 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
181 for (;;) {
182 BCIns ins = *pc++;
183 BCOp op = bc_op(ins);
184 switch (bcmode_b(op)) {
185 case BCMvar: USE_SLOT(bc_b(ins)); break;
186 default: break;
188 switch (bcmode_c(op)) {
189 case BCMvar: USE_SLOT(bc_c(ins)); break;
190 case BCMrbase:
191 lua_assert(op == BC_CAT);
192 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
193 for (; s < maxslot; s++) DEF_SLOT(s);
194 break;
195 case BCMjump:
196 handle_jump: {
197 BCReg minslot = bc_a(ins);
198 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
199 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
200 else if (op == BC_UCLO) { pc += bc_j(ins); break; }
201 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
202 return minslot < maxslot ? minslot : maxslot;
204 case BCMlit:
205 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
206 goto handle_jump;
207 } else if (bc_isret(op)) {
208 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
209 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
210 for (; s < top; s++) USE_SLOT(s);
211 for (; s < maxslot; s++) DEF_SLOT(s);
212 return 0;
214 break;
215 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
216 default: break;
218 switch (bcmode_a(op)) {
219 case BCMvar: USE_SLOT(bc_a(ins)); break;
220 case BCMdst:
221 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
222 break;
223 case BCMbase:
224 if (op >= BC_CALLM && op <= BC_VARG) {
225 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
226 maxslot : (bc_a(ins) + bc_c(ins));
227 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
228 for (; s < top; s++) USE_SLOT(s);
229 for (; s < maxslot; s++) DEF_SLOT(s);
230 if (op == BC_CALLT || op == BC_CALLMT) {
231 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
232 return 0;
234 } else if (op == BC_KNIL) {
235 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
236 } else if (op == BC_TSETM) {
237 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
239 break;
240 default: break;
242 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
245 #undef USE_SLOT
246 #undef DEF_SLOT
248 return 0; /* unreachable */
251 /* Purge dead slots before the next snapshot. */
252 void lj_snap_purge(jit_State *J)
254 uint8_t udf[SNAP_USEDEF_SLOTS];
255 BCReg maxslot = J->maxslot;
256 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
257 for (; s < maxslot; s++)
258 if (udf[s] != 0)
259 J->base[s] = 0; /* Purge dead slots. */
262 /* Shrink last snapshot. */
263 void lj_snap_shrink(jit_State *J)
265 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
266 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
267 MSize n, m, nlim, nent = snap->nent;
268 uint8_t udf[SNAP_USEDEF_SLOTS];
269 BCReg maxslot = J->maxslot;
270 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
271 BCReg baseslot = J->baseslot;
272 maxslot += baseslot;
273 minslot += baseslot;
274 snap->nslots = (uint8_t)maxslot;
275 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
276 BCReg s = snap_slot(map[n]);
277 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
278 map[m++] = map[n]; /* Only copy used slots. */
280 snap->nent = (uint8_t)m;
281 nlim = J->cur.nsnapmap - snap->mapofs - 1;
282 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
283 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
286 /* -- Snapshot access ----------------------------------------------------- */
288 /* Initialize a Bloom Filter with all renamed refs.
289 ** There are very few renames (often none), so the filter has
290 ** very few bits set. This makes it suitable for negative filtering.
292 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
294 BloomFilter rfilt = 0;
295 IRIns *ir;
296 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
297 if (ir->op2 <= lim)
298 bloomset(rfilt, ir->op1);
299 return rfilt;
302 /* Process matching renames to find the original RegSP. */
303 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
305 IRIns *ir;
306 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
307 if (ir->op1 == ref && ir->op2 <= lim)
308 rs = ir->prev;
309 return rs;
312 /* Convert a snapshot into a linear slot -> RegSP map.
313 ** Note: unused slots are not initialized!
315 void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi)
317 SnapShot *snap = &T->snap[snapno];
318 MSize n, nent = snap->nent;
319 SnapEntry *map = &T->snapmap[snap->mapofs];
320 BloomFilter rfilt = snap_renamefilter(T, snapno);
321 for (n = 0; n < nent; n++) {
322 SnapEntry sn = map[n];
323 IRRef ref = snap_ref(sn);
324 if (!irref_isk(ref) &&
325 ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : 1)) {
326 IRIns *ir = &T->ir[ref];
327 uint32_t rs = ir->prev;
328 if (bloomtest(rfilt, ref))
329 rs = snap_renameref(T, snapno, ref, rs);
330 rsmap[snap_slot(sn)] = (uint16_t)rs;
335 /* Restore interpreter state from exit state with the help of a snapshot. */
336 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
338 ExitState *ex = (ExitState *)exptr;
339 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
340 GCtrace *T = traceref(J, J->parent);
341 SnapShot *snap = &T->snap[snapno];
342 MSize n, nent = snap->nent;
343 SnapEntry *map = &T->snapmap[snap->mapofs];
344 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
345 int32_t ftsz0;
346 TValue *frame;
347 BloomFilter rfilt = snap_renamefilter(T, snapno);
348 const BCIns *pc = snap_pc(map[nent]);
349 lua_State *L = J->L;
351 /* Set interpreter PC to the next PC to get correct error messages. */
352 setcframe_pc(cframe_raw(L->cframe), pc+1);
354 /* Make sure the stack is big enough for the slots from the snapshot. */
355 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
356 L->top = curr_topL(L);
357 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
360 /* Fill stack slots with data from the registers and spill slots. */
361 frame = L->base-1;
362 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
363 for (n = 0; n < nent; n++) {
364 SnapEntry sn = map[n];
365 IRRef ref = snap_ref(sn);
366 BCReg s = snap_slot(sn);
367 TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
368 IRIns *ir = &T->ir[ref];
369 if (irref_isk(ref)) { /* Restore constant slot. */
370 lj_ir_kvalue(L, o, ir);
371 } else if (!(sn & SNAP_NORESTORE)) {
372 IRType1 t = ir->t;
373 RegSP rs = ir->prev;
374 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
375 rs = snap_renameref(T, snapno, ref, rs);
376 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
377 int32_t *sps = &ex->spill[regsp_spill(rs)];
378 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
379 o->u32.lo = (uint32_t)*sps;
380 } else if (irt_isinteger(t)) {
381 setintV(o, *sps);
382 #if !LJ_SOFTFP
383 } else if (irt_isnum(t)) {
384 o->u64 = *(uint64_t *)sps;
385 #endif
386 #if LJ_64
387 } else if (irt_islightud(t)) {
388 /* 64 bit lightuserdata which may escape already has the tag bits. */
389 o->u64 = *(uint64_t *)sps;
390 #endif
391 } else {
392 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
393 setgcrefi(o->gcr, *sps);
394 setitype(o, irt_toitype(t));
396 } else { /* Restore from register. */
397 Reg r = regsp_reg(rs);
398 lua_assert(ra_hasreg(r));
399 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
400 o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR];
401 } else if (irt_isinteger(t)) {
402 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
403 #if !LJ_SOFTFP
404 } else if (irt_isnum(t)) {
405 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
406 #endif
407 #if LJ_64
408 } else if (irt_islightud(t)) {
409 /* 64 bit lightuserdata which may escape already has the tag bits. */
410 o->u64 = ex->gpr[r-RID_MIN_GPR];
411 #endif
412 } else {
413 if (!irt_ispri(t))
414 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
415 setitype(o, irt_toitype(t));
418 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
419 rs = (ir+1)->prev;
420 if (LJ_UNLIKELY(bloomtest(rfilt, ref+1)))
421 rs = snap_renameref(T, snapno, ref+1, rs);
422 o->u32.hi = (ra_hasspill(regsp_spill(rs))) ?
423 (uint32_t)*&ex->spill[regsp_spill(rs)] :
424 (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR];
427 if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */
428 o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0;
429 L->base = o+1;
432 switch (bc_op(*pc)) {
433 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
434 L->top = frame + snap->nslots;
435 break;
436 default:
437 L->top = curr_topL(L);
438 break;
440 lua_assert(map + nent == flinks);
441 return pc;
444 #undef IR
446 #endif