source/libs/luajit/LuaJIT-src/src/lj_snap.c

   1 /*
   2 ** Snapshot handling.
   3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5
   6 #define lj_snap_c
   7 #define LUA_CORE
   8
   9 #include "lj_obj.h"
  10
  11 #if LJ_HASJIT
  12
  13 #include "lj_gc.h"
  14 #include "lj_tab.h"
  15 #include "lj_state.h"
  16 #include "lj_frame.h"
  17 #include "lj_bc.h"
  18 #include "lj_ir.h"
  19 #include "lj_jit.h"
  20 #include "lj_iropt.h"
  21 #include "lj_trace.h"
  22 #include "lj_snap.h"
  23 #include "lj_target.h"
  24 #if LJ_HASFFI
  25 #include "lj_ctype.h"
  26 #include "lj_cdata.h"
  27 #endif
  28
  29 /* Pass IR on to next optimization in chain (FOLD). */
  30 #define emitir(ot, a, b)        (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
  31
  32 /* Emit raw IR without passing through optimizations. */
  33 #define emitir_raw(ot, a, b)    (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
  34
  35 /* -- Snapshot buffer allocation ------------------------------------------ */
  36
  37 /* Grow snapshot buffer. */
  38 void lj_snap_grow_buf_(jit_State *J, MSize need)
  39 {
  40   MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
  41   if (need > maxsnap)
  42     lj_trace_err(J, LJ_TRERR_SNAPOV);
  43   lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
  44   J->cur.snap = J->snapbuf;
  45 }
  46
  47 /* Grow snapshot map buffer. */
  48 void lj_snap_grow_map_(jit_State *J, MSize need)
  49 {
  50   if (need < 2*J->sizesnapmap)
  51     need = 2*J->sizesnapmap;
  52   else if (need < 64)
  53     need = 64;
  54   J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
  55                     J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
  56   J->cur.snapmap = J->snapmapbuf;
  57   J->sizesnapmap = need;
  58 }
  59
  60 /* -- Snapshot generation ------------------------------------------------- */
  61
  62 /* Add all modified slots to the snapshot. */
  63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
  64 {
  65   IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */
  66   BCReg s;
  67   MSize n = 0;
  68   for (s = 0; s < nslots; s++) {
  69     TRef tr = J->slot[s];
  70     IRRef ref = tref_ref(tr);
  71     if (ref) {
  72       SnapEntry sn = SNAP_TR(s, tr);
  73       IRIns *ir = &J->cur.ir[ref];
  74       if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
  75           ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
  76         /* No need to snapshot unmodified non-inherited slots. */
  77         if (!(ir->op2 & IRSLOAD_INHERIT))
  78           continue;
  79         /* No need to restore readonly slots and unmodified non-parent slots. */
  80         if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
  81             (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
  82           sn |= SNAP_NORESTORE;
  83       }
  84       if (LJ_SOFTFP && irt_isnum(ir->t))
  85         sn |= SNAP_SOFTFPNUM;
  86       map[n++] = sn;
  87     }
  88   }
  89   return n;
  90 }
  91
  92 /* Add frame links at the end of the snapshot. */
  93 static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
  94 {
  95   cTValue *frame = J->L->base - 1;
  96   cTValue *lim = J->L->base - J->baseslot;
  97   GCfunc *fn = frame_func(frame);
  98   cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
  99   MSize f = 0;
 100   lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
 101   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
 102   while (frame > lim) {  /* Backwards traversal of all frames above base. */
 103     if (frame_islua(frame)) {
 104       map[f++] = SNAP_MKPC(frame_pc(frame));
 105       frame = frame_prevl(frame);
 106     } else if (frame_iscont(frame)) {
 107       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
 108       map[f++] = SNAP_MKPC(frame_contpc(frame));
 109       frame = frame_prevd(frame);
 110     } else {
 111       lua_assert(!frame_isc(frame));
 112       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
 113       frame = frame_prevd(frame);
 114       continue;
 115     }
 116     if (frame + funcproto(frame_func(frame))->framesize > ftop)
 117       ftop = frame + funcproto(frame_func(frame))->framesize;
 118   }
 119   lua_assert(f == (MSize)(1 + J->framedepth));
 120   return (BCReg)(ftop - lim);
 121 }
 122
 123 /* Take a snapshot of the current stack. */
 124 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
 125 {
 126   BCReg nslots = J->baseslot + J->maxslot;
 127   MSize nent;
 128   SnapEntry *p;
 129   /* Conservative estimate. */
 130   lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
 131   p = &J->cur.snapmap[nsnapmap];
 132   nent = snapshot_slots(J, p, nslots);
 133   snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
 134   snap->mapofs = (uint16_t)nsnapmap;
 135   snap->ref = (IRRef1)J->cur.nins;
 136   snap->nent = (uint8_t)nent;
 137   snap->nslots = (uint8_t)nslots;
 138   snap->count = 0;
 139   J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
 140 }
 141
 142 /* Add or merge a snapshot. */
 143 void lj_snap_add(jit_State *J)
 144 {
 145   MSize nsnap = J->cur.nsnap;
 146   MSize nsnapmap = J->cur.nsnapmap;
 147   /* Merge if no ins. inbetween or if requested and no guard inbetween. */
 148   if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
 149       (J->mergesnap && !irt_isguard(J->guardemit))) {
 150     if (nsnap == 1) {  /* But preserve snap #0 PC. */
 151       emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
 152       goto nomerge;
 153     }
 154     nsnapmap = J->cur.snap[--nsnap].mapofs;
 155   } else {
 156   nomerge:
 157     lj_snap_grow_buf(J, nsnap+1);
 158     J->cur.nsnap = (uint16_t)(nsnap+1);
 159   }
 160   J->mergesnap = 0;
 161   J->guardemit.irt = 0;
 162   snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
 163 }
 164
 165 /* -- Snapshot modification ----------------------------------------------- */
 166
 167 #define SNAP_USEDEF_SLOTS       (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
 168
 169 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
 170 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
 171                          const BCIns *pc, BCReg maxslot)
 172 {
 173   BCReg s;
 174   GCobj *o;
 175
 176   if (maxslot == 0) return 0;
 177 #ifdef LUAJIT_USE_VALGRIND
 178   /* Avoid errors for harmless reads beyond maxslot. */
 179   memset(udf, 1, SNAP_USEDEF_SLOTS);
 180 #else
 181   memset(udf, 1, maxslot);
 182 #endif
 183
 184   /* Treat open upvalues as used. */
 185   o = gcref(J->L->openupval);
 186   while (o) {
 187     if (uvval(gco2uv(o)) < J->L->base) break;
 188     udf[uvval(gco2uv(o)) - J->L->base] = 0;
 189     o = gcref(o->gch.nextgc);
 190   }
 191
 192 #define USE_SLOT(s)             udf[(s)] &= ~1
 193 #define DEF_SLOT(s)             udf[(s)] *= 3
 194
 195   /* Scan through following bytecode and check for uses/defs. */
 196   lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
 197   for (;;) {
 198     BCIns ins = *pc++;
 199     BCOp op = bc_op(ins);
 200     switch (bcmode_b(op)) {
 201     case BCMvar: USE_SLOT(bc_b(ins)); break;
 202     default: break;
 203     }
 204     switch (bcmode_c(op)) {
 205     case BCMvar: USE_SLOT(bc_c(ins)); break;
 206     case BCMrbase:
 207       lua_assert(op == BC_CAT);
 208       for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
 209       for (; s < maxslot; s++) DEF_SLOT(s);
 210       break;
 211     case BCMjump:
 212     handle_jump: {
 213       BCReg minslot = bc_a(ins);
 214       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
 215       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
 216       else if (op == BC_UCLO) { pc += bc_j(ins); break; }
 217       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
 218       return minslot < maxslot ? minslot : maxslot;
 219       }
 220     case BCMlit:
 221       if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
 222         goto handle_jump;
 223       } else if (bc_isret(op)) {
 224         BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
 225         for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 226         for (; s < top; s++) USE_SLOT(s);
 227         for (; s < maxslot; s++) DEF_SLOT(s);
 228         return 0;
 229       }
 230       break;
 231     case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */
 232     default: break;
 233     }
 234     switch (bcmode_a(op)) {
 235     case BCMvar: USE_SLOT(bc_a(ins)); break;
 236     case BCMdst:
 237        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
 238        break;
 239     case BCMbase:
 240       if (op >= BC_CALLM && op <= BC_VARG) {
 241         BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
 242                     maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
 243         if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
 244         s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
 245         for (; s < top; s++) USE_SLOT(s);
 246         for (; s < maxslot; s++) DEF_SLOT(s);
 247         if (op == BC_CALLT || op == BC_CALLMT) {
 248           for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 249           return 0;
 250         }
 251       } else if (op == BC_KNIL) {
 252         for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
 253       } else if (op == BC_TSETM) {
 254         for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
 255       }
 256       break;
 257     default: break;
 258     }
 259     lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
 260   }
 261
 262 #undef USE_SLOT
 263 #undef DEF_SLOT
 264
 265   return 0;  /* unreachable */
 266 }
 267
 268 /* Purge dead slots before the next snapshot. */
 269 void lj_snap_purge(jit_State *J)
 270 {
 271   uint8_t udf[SNAP_USEDEF_SLOTS];
 272   BCReg maxslot = J->maxslot;
 273   BCReg s = snap_usedef(J, udf, J->pc, maxslot);
 274   for (; s < maxslot; s++)
 275     if (udf[s] != 0)
 276       J->base[s] = 0;  /* Purge dead slots. */
 277 }
 278
 279 /* Shrink last snapshot. */
 280 void lj_snap_shrink(jit_State *J)
 281 {
 282   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
 283   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
 284   MSize n, m, nlim, nent = snap->nent;
 285   uint8_t udf[SNAP_USEDEF_SLOTS];
 286   BCReg maxslot = J->maxslot;
 287   BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
 288   BCReg baseslot = J->baseslot;
 289   maxslot += baseslot;
 290   minslot += baseslot;
 291   snap->nslots = (uint8_t)maxslot;
 292   for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */
 293     BCReg s = snap_slot(map[n]);
 294     if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
 295       map[m++] = map[n];  /* Only copy used slots. */
 296   }
 297   snap->nent = (uint8_t)m;
 298   nlim = J->cur.nsnapmap - snap->mapofs - 1;
 299   while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */
 300   J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */
 301 }
 302
 303 /* -- Snapshot access ----------------------------------------------------- */
 304
 305 /* Initialize a Bloom Filter with all renamed refs.
 306 ** There are very few renames (often none), so the filter has
 307 ** very few bits set. This makes it suitable for negative filtering.
 308 */
 309 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
 310 {
 311   BloomFilter rfilt = 0;
 312   IRIns *ir;
 313   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
 314     if (ir->op2 <= lim)
 315       bloomset(rfilt, ir->op1);
 316   return rfilt;
 317 }
 318
 319 /* Process matching renames to find the original RegSP. */
 320 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
 321 {
 322   IRIns *ir;
 323   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
 324     if (ir->op1 == ref && ir->op2 <= lim)
 325       rs = ir->prev;
 326   return rs;
 327 }
 328
 329 /* Copy RegSP from parent snapshot to the parent links of the IR. */
 330 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
 331 {
 332   SnapShot *snap = &T->snap[snapno];
 333   SnapEntry *map = &T->snapmap[snap->mapofs];
 334   BloomFilter rfilt = snap_renamefilter(T, snapno);
 335   MSize n = 0;
 336   IRRef ref = 0;
 337   for ( ; ; ir++) {
 338     uint32_t rs;
 339     if (ir->o == IR_SLOAD) {
 340       if (!(ir->op2 & IRSLOAD_PARENT)) break;
 341       for ( ; ; n++) {
 342         lua_assert(n < snap->nent);
 343         if (snap_slot(map[n]) == ir->op1) {
 344           ref = snap_ref(map[n++]);
 345           break;
 346         }
 347       }
 348     } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
 349       ref++;
 350     } else if (ir->o == IR_PVAL) {
 351       ref = ir->op1 + REF_BIAS;
 352     } else {
 353       break;
 354     }
 355     rs = T->ir[ref].prev;
 356     if (bloomtest(rfilt, ref))
 357       rs = snap_renameref(T, snapno, ref, rs);
 358     ir->prev = (uint16_t)rs;
 359     lua_assert(regsp_used(rs));
 360   }
 361   return ir;
 362 }
 363
 364 /* -- Snapshot replay ----------------------------------------------------- */
 365
 366 /* Replay constant from parent trace. */
 367 static TRef snap_replay_const(jit_State *J, IRIns *ir)
 368 {
 369   /* Only have to deal with constants that can occur in stack slots. */
 370   switch ((IROp)ir->o) {
 371   case IR_KPRI: return TREF_PRI(irt_type(ir->t));
 372   case IR_KINT: return lj_ir_kint(J, ir->i);
 373   case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
 374   case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
 375   case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
 376   case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */
 377   default: lua_assert(0); return TREF_NIL; break;
 378   }
 379 }
 380
 381 /* De-duplicate parent reference. */
 382 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
 383 {
 384   MSize j;
 385   for (j = 0; j < nmax; j++)
 386     if (snap_ref(map[j]) == ref)
 387       return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
 388   return 0;
 389 }
 390
 391 /* Emit parent reference with de-duplication. */
 392 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
 393                       BloomFilter seen, IRRef ref)
 394 {
 395   IRIns *ir = &T->ir[ref];
 396   TRef tr;
 397   if (irref_isk(ref))
 398     tr = snap_replay_const(J, ir);
 399   else if (!regsp_used(ir->prev))
 400     tr = 0;
 401   else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
 402     tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
 403   return tr;
 404 }
 405
 406 /* Check whether a sunk store corresponds to an allocation. Slow path. */
 407 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
 408 {
 409   if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
 410       irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
 411     IRIns *irk = &T->ir[irs->op1];
 412     if (irk->o == IR_AREF || irk->o == IR_HREFK)
 413       irk = &T->ir[irk->op1];
 414     return (&T->ir[irk->op1] == ira);
 415   }
 416   return 0;
 417 }
 418
 419 /* Check whether a sunk store corresponds to an allocation. Fast path. */
 420 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
 421 {
 422   if (irs->s != 255)
 423     return (ira + irs->s == irs);  /* Fast check. */
 424   return snap_sunk_store2(T, ira, irs);
 425 }
 426
 427 /* Replay snapshot state to setup side trace. */
 428 void lj_snap_replay(jit_State *J, GCtrace *T)
 429 {
 430   SnapShot *snap = &T->snap[J->exitno];
 431   SnapEntry *map = &T->snapmap[snap->mapofs];
 432   MSize n, nent = snap->nent;
 433   BloomFilter seen = 0;
 434   int pass23 = 0;
 435   J->framedepth = 0;
 436   /* Emit IR for slots inherited from parent snapshot. */
 437   for (n = 0; n < nent; n++) {
 438     SnapEntry sn = map[n];
 439     BCReg s = snap_slot(sn);
 440     IRRef ref = snap_ref(sn);
 441     IRIns *ir = &T->ir[ref];
 442     TRef tr;
 443     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
 444     if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
 445       goto setslot;
 446     bloomset(seen, ref);
 447     if (irref_isk(ref)) {
 448       tr = snap_replay_const(J, ir);
 449     } else if (!regsp_used(ir->prev)) {
 450       pass23 = 1;
 451       lua_assert(s != 0);
 452       tr = s;
 453     } else {
 454       IRType t = irt_type(ir->t);
 455       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
 456       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
 457       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
 458       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
 459     }
 460   setslot:
 461     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
 462     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
 463     if ((sn & SNAP_FRAME))
 464       J->baseslot = s+1;
 465   }
 466   if (pass23) {
 467     IRIns *irlast = &T->ir[snap->ref];
 468     pass23 = 0;
 469     /* Emit dependent PVALs. */
 470     for (n = 0; n < nent; n++) {
 471       SnapEntry sn = map[n];
 472       IRRef refp = snap_ref(sn);
 473       IRIns *ir = &T->ir[refp];
 474       if (regsp_reg(ir->r) == RID_SUNK) {
 475         if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
 476         pass23 = 1;
 477         lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
 478                    ir->o == IR_CNEW || ir->o == IR_CNEWI);
 479         if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
 480         if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
 481         if (LJ_HASFFI && ir->o == IR_CNEWI) {
 482           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
 483             snap_pref(J, T, map, nent, seen, (ir+1)->op2);
 484         } else {
 485           IRIns *irs;
 486           for (irs = ir+1; irs < irlast; irs++)
 487             if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 488               if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
 489                 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
 490               else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
 491                        irs+1 < irlast && (irs+1)->o == IR_HIOP)
 492                 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
 493             }
 494         }
 495       } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
 496         lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 497         J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
 498       }
 499     }
 500     /* Replay sunk instructions. */
 501     for (n = 0; pass23 && n < nent; n++) {
 502       SnapEntry sn = map[n];
 503       IRRef refp = snap_ref(sn);
 504       IRIns *ir = &T->ir[refp];
 505       if (regsp_reg(ir->r) == RID_SUNK) {
 506         TRef op1, op2;
 507         if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
 508           J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
 509           continue;
 510         }
 511         op1 = ir->op1;
 512         if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
 513         op2 = ir->op2;
 514         if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
 515         if (LJ_HASFFI && ir->o == IR_CNEWI) {
 516           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
 517             lj_needsplit(J);  /* Emit joining HIOP. */
 518             op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
 519                              snap_pref(J, T, map, nent, seen, (ir+1)->op2));
 520           }
 521           J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
 522         } else {
 523           IRIns *irs;
 524           TRef tr = emitir(ir->ot, op1, op2);
 525           J->slot[snap_slot(sn)] = tr;
 526           for (irs = ir+1; irs < irlast; irs++)
 527             if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 528               IRIns *irr = &T->ir[irs->op1];
 529               TRef val, key = irr->op2, tmp = tr;
 530               if (irr->o != IR_FREF) {
 531                 IRIns *irk = &T->ir[key];
 532                 if (irr->o == IR_HREFK)
 533                   key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
 534                                     irk->op2);
 535                 else
 536                   key = snap_replay_const(J, irk);
 537                 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
 538                   IRIns *irf = &T->ir[irr->op1];
 539                   tmp = emitir(irf->ot, tmp, irf->op2);
 540                 }
 541               }
 542               tmp = emitir(irr->ot, tmp, key);
 543               val = snap_pref(J, T, map, nent, seen, irs->op2);
 544               if (val == 0) {
 545                 IRIns *irc = &T->ir[irs->op2];
 546                 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
 547                 val = snap_pref(J, T, map, nent, seen, irc->op1);
 548                 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
 549               } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
 550                          irs+1 < irlast && (irs+1)->o == IR_HIOP) {
 551                 IRType t = IRT_I64;
 552                 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
 553                   t = IRT_NUM;
 554                 lj_needsplit(J);
 555                 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
 556                   uint64_t k = (uint32_t)T->ir[irs->op2].i +
 557                                ((uint64_t)T->ir[(irs+1)->op2].i << 32);
 558                   val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
 559                                   lj_ir_k64_find(J, k));
 560                 } else {
 561                   val = emitir_raw(IRT(IR_HIOP, t), val,
 562                           snap_pref(J, T, map, nent, seen, (irs+1)->op2));
 563                 }
 564                 tmp = emitir(IRT(irs->o, t), tmp, val);
 565                 continue;
 566               }
 567               tmp = emitir(irs->ot, tmp, val);
 568             } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
 569               emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
 570             }
 571         }
 572       }
 573     }
 574   }
 575   J->base = J->slot + J->baseslot;
 576   J->maxslot = snap->nslots - J->baseslot;
 577   lj_snap_add(J);
 578   if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
 579     emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
 580 }
 581
 582 /* -- Snapshot restore ---------------------------------------------------- */
 583
 584 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
 585                         SnapNo snapno, BloomFilter rfilt,
 586                         IRIns *ir, TValue *o);
 587
 588 /* Restore a value from the trace exit state. */
 589 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
 590                             SnapNo snapno, BloomFilter rfilt,
 591                             IRRef ref, TValue *o)
 592 {
 593   IRIns *ir = &T->ir[ref];
 594   IRType1 t = ir->t;
 595   RegSP rs = ir->prev;
 596   if (irref_isk(ref)) {  /* Restore constant slot. */
 597     lj_ir_kvalue(J->L, o, ir);
 598     return;
 599   }
 600   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
 601     rs = snap_renameref(T, snapno, ref, rs);
 602   lua_assert(!LJ_GC64);  /* TODO_GC64: handle 64 bit references. */
 603   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
 604     int32_t *sps = &ex->spill[regsp_spill(rs)];
 605     if (irt_isinteger(t)) {
 606       setintV(o, *sps);
 607 #if !LJ_SOFTFP
 608     } else if (irt_isnum(t)) {
 609       o->u64 = *(uint64_t *)sps;
 610 #endif
 611     } else if (LJ_64 && irt_islightud(t)) {
 612       /* 64 bit lightuserdata which may escape already has the tag bits. */
 613       o->u64 = *(uint64_t *)sps;
 614     } else {
 615       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
 616       setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
 617     }
 618   } else {  /* Restore from register. */
 619     Reg r = regsp_reg(rs);
 620     if (ra_noreg(r)) {
 621       lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 622       snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
 623       if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
 624       return;
 625     } else if (irt_isinteger(t)) {
 626       setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
 627 #if !LJ_SOFTFP
 628     } else if (irt_isnum(t)) {
 629       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
 630 #endif
 631     } else if (LJ_64 && irt_is64(t)) {
 632       /* 64 bit values that already have the tag bits. */
 633       o->u64 = ex->gpr[r-RID_MIN_GPR];
 634     } else if (irt_ispri(t)) {
 635       setpriV(o, irt_toitype(t));
 636     } else {
 637       setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
 638     }
 639   }
 640 }
 641
 642 #if LJ_HASFFI
 643 /* Restore raw data from the trace exit state. */
 644 static void snap_restoredata(GCtrace *T, ExitState *ex,
 645                              SnapNo snapno, BloomFilter rfilt,
 646                              IRRef ref, void *dst, CTSize sz)
 647 {
 648   IRIns *ir = &T->ir[ref];
 649   RegSP rs = ir->prev;
 650   int32_t *src;
 651   uint64_t tmp;
 652   if (irref_isk(ref)) {
 653     if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
 654       src = mref(ir->ptr, int32_t);
 655     } else if (sz == 8) {
 656       tmp = (uint64_t)(uint32_t)ir->i;
 657       src = (int32_t *)&tmp;
 658     } else {
 659       src = &ir->i;
 660     }
 661   } else {
 662     if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
 663       rs = snap_renameref(T, snapno, ref, rs);
 664     if (ra_hasspill(regsp_spill(rs))) {
 665       src = &ex->spill[regsp_spill(rs)];
 666       if (sz == 8 && !irt_is64(ir->t)) {
 667         tmp = (uint64_t)(uint32_t)*src;
 668         src = (int32_t *)&tmp;
 669       }
 670     } else {
 671       Reg r = regsp_reg(rs);
 672       if (ra_noreg(r)) {
 673         /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
 674         lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 675         snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
 676         *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
 677         return;
 678       }
 679       src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
 680 #if !LJ_SOFTFP
 681       if (r >= RID_MAX_GPR) {
 682         src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
 683 #if LJ_TARGET_PPC
 684         if (sz == 4) {  /* PPC FPRs are always doubles. */
 685           *(float *)dst = (float)*(double *)src;
 686           return;
 687         }
 688 #else
 689         if (LJ_BE && sz == 4) src++;
 690 #endif
 691       }
 692 #endif
 693     }
 694   }
 695   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
 696   if (sz == 4) *(int32_t *)dst = *src;
 697   else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
 698   else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
 699   else *(int16_t *)dst = (int16_t)*src;
 700 }
 701 #endif
 702
 703 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
 704 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
 705                         SnapNo snapno, BloomFilter rfilt,
 706                         IRIns *ir, TValue *o)
 707 {
 708   lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
 709              ir->o == IR_CNEW || ir->o == IR_CNEWI);
 710 #if LJ_HASFFI
 711   if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
 712     CTState *cts = ctype_cts(J->L);
 713     CTypeID id = (CTypeID)T->ir[ir->op1].i;
 714     CTSize sz;
 715     CTInfo info = lj_ctype_info(cts, id, &sz);
 716     GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
 717     setcdataV(J->L, o, cd);
 718     if (ir->o == IR_CNEWI) {
 719       uint8_t *p = (uint8_t *)cdataptr(cd);
 720       lua_assert(sz == 4 || sz == 8);
 721       if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
 722         snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
 723         if (LJ_BE) p += 4;
 724         sz = 4;
 725       }
 726       snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
 727     } else {
 728       IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
 729       for (irs = ir+1; irs < irlast; irs++)
 730         if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 731           IRIns *iro = &T->ir[T->ir[irs->op1].op2];
 732           uint8_t *p = (uint8_t *)cd;
 733           CTSize szs;
 734           lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
 735           lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
 736           if (irt_is64(irs->t)) szs = 8;
 737           else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
 738           else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
 739           else szs = 4;
 740           if (LJ_64 && iro->o == IR_KINT64)
 741             p += (int64_t)ir_k64(iro)->u64;
 742           else
 743             p += iro->i;
 744           lua_assert(p >= (uint8_t *)cdataptr(cd) &&
 745                      p + szs <= (uint8_t *)cdataptr(cd) + sz);
 746           if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
 747             lua_assert(szs == 4);
 748             snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
 749             if (LJ_BE) p += 4;
 750           }
 751           snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
 752         }
 753     }
 754   } else
 755 #endif
 756   {
 757     IRIns *irs, *irlast;
 758     GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
 759                                   lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
 760     settabV(J->L, o, t);
 761     irlast = &T->ir[T->snap[snapno].ref];
 762     for (irs = ir+1; irs < irlast; irs++)
 763       if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 764         IRIns *irk = &T->ir[irs->op1];
 765         TValue tmp, *val;
 766         lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
 767                    irs->o == IR_FSTORE);
 768         if (irk->o == IR_FREF) {
 769           lua_assert(irk->op2 == IRFL_TAB_META);
 770           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
 771           /* NOBARRIER: The table is new (marked white). */
 772           setgcref(t->metatable, obj2gco(tabV(&tmp)));
 773         } else {
 774           irk = &T->ir[irk->op2];
 775           if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
 776           lj_ir_kvalue(J->L, &tmp, irk);
 777           val = lj_tab_set(J->L, t, &tmp);
 778           /* NOBARRIER: The table is new (marked white). */
 779           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
 780           if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
 781             snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
 782             val->u32.hi = tmp.u32.lo;
 783           }
 784         }
 785       }
 786   }
 787 }
 788
 789 /* Restore interpreter state from exit state with the help of a snapshot. */
 790 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
 791 {
 792   ExitState *ex = (ExitState *)exptr;
 793   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
 794   GCtrace *T = traceref(J, J->parent);
 795   SnapShot *snap = &T->snap[snapno];
 796   MSize n, nent = snap->nent;
 797   SnapEntry *map = &T->snapmap[snap->mapofs];
 798   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
 799   ptrdiff_t ftsz0;
 800   TValue *frame;
 801   BloomFilter rfilt = snap_renamefilter(T, snapno);
 802   const BCIns *pc = snap_pc(map[nent]);
 803   lua_State *L = J->L;
 804
 805   /* Set interpreter PC to the next PC to get correct error messages. */
 806   setcframe_pc(cframe_raw(L->cframe), pc+1);
 807
 808   /* Make sure the stack is big enough for the slots from the snapshot. */
 809   if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
 810     L->top = curr_topL(L);
 811     lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
 812   }
 813
 814   /* Fill stack slots with data from the registers and spill slots. */
 815   frame = L->base-1;
 816   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
 817   for (n = 0; n < nent; n++) {
 818     SnapEntry sn = map[n];
 819     if (!(sn & SNAP_NORESTORE)) {
 820       TValue *o = &frame[snap_slot(sn)];
 821       IRRef ref = snap_ref(sn);
 822       IRIns *ir = &T->ir[ref];
 823       if (ir->r == RID_SUNK) {
 824         MSize j;
 825         for (j = 0; j < n; j++)
 826           if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */
 827             copyTV(L, o, &frame[snap_slot(map[j])]);
 828             goto dupslot;
 829           }
 830         snap_unsink(J, T, ex, snapno, rfilt, ir, o);
 831       dupslot:
 832         continue;
 833       }
 834       snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
 835       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
 836         TValue tmp;
 837         snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
 838         o->u32.hi = tmp.u32.lo;
 839       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
 840         lua_assert(!LJ_FR2);  /* TODO_FR2: store 64 bit PCs. */
 841         /* Overwrite tag with frame link. */
 842         setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
 843         L->base = o+1;
 844       }
 845     }
 846   }
 847   lua_assert(map + nent == flinks);
 848
 849   /* Compute current stack top. */
 850   switch (bc_op(*pc)) {
 851   default:
 852     if (bc_op(*pc) < BC_FUNCF) {
 853       L->top = curr_topL(L);
 854       break;
 855     }
 856     /* fallthrough */
 857   case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
 858     L->top = frame + snap->nslots;
 859     break;
 860   }
 861   return pc;
 862 }
 863
 864 #undef emitir_raw
 865 #undef emitir
 866
 867 #endif