3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
23 #include "lj_target.h"
29 /* Pass IR on to next optimization in chain (FOLD). */
30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
32 /* Emit raw IR without passing through optimizations. */
33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
35 /* -- Snapshot buffer allocation ------------------------------------------ */
37 /* Grow snapshot buffer. */
38 void lj_snap_grow_buf_(jit_State
*J
, MSize need
)
40 MSize maxsnap
= (MSize
)J
->param
[JIT_P_maxsnap
];
42 lj_trace_err(J
, LJ_TRERR_SNAPOV
);
43 lj_mem_growvec(J
->L
, J
->snapbuf
, J
->sizesnap
, maxsnap
, SnapShot
);
44 J
->cur
.snap
= J
->snapbuf
;
47 /* Grow snapshot map buffer. */
48 void lj_snap_grow_map_(jit_State
*J
, MSize need
)
50 if (need
< 2*J
->sizesnapmap
)
51 need
= 2*J
->sizesnapmap
;
54 J
->snapmapbuf
= (SnapEntry
*)lj_mem_realloc(J
->L
, J
->snapmapbuf
,
55 J
->sizesnapmap
*sizeof(SnapEntry
), need
*sizeof(SnapEntry
));
56 J
->cur
.snapmap
= J
->snapmapbuf
;
57 J
->sizesnapmap
= need
;
60 /* -- Snapshot generation ------------------------------------------------- */
62 /* Add all modified slots to the snapshot. */
63 static MSize
snapshot_slots(jit_State
*J
, SnapEntry
*map
, BCReg nslots
)
65 IRRef retf
= J
->chain
[IR_RETF
]; /* Limits SLOAD restore elimination. */
68 for (s
= 0; s
< nslots
; s
++) {
70 IRRef ref
= tref_ref(tr
);
72 SnapEntry sn
= SNAP_TR(s
, tr
);
73 IRIns
*ir
= &J
->cur
.ir
[ref
];
74 if (!(sn
& (SNAP_CONT
|SNAP_FRAME
)) &&
75 ir
->o
== IR_SLOAD
&& ir
->op1
== s
&& ref
> retf
) {
76 /* No need to snapshot unmodified non-inherited slots. */
77 if (!(ir
->op2
& IRSLOAD_INHERIT
))
79 /* No need to restore readonly slots and unmodified non-parent slots. */
80 if (!(LJ_DUALNUM
&& (ir
->op2
& IRSLOAD_CONVERT
)) &&
81 (ir
->op2
& (IRSLOAD_READONLY
|IRSLOAD_PARENT
)) != IRSLOAD_PARENT
)
84 if (LJ_SOFTFP
&& irt_isnum(ir
->t
))
92 /* Add frame links at the end of the snapshot. */
93 static BCReg
snapshot_framelinks(jit_State
*J
, SnapEntry
*map
)
95 cTValue
*frame
= J
->L
->base
- 1;
96 cTValue
*lim
= J
->L
->base
- J
->baseslot
;
97 GCfunc
*fn
= frame_func(frame
);
98 cTValue
*ftop
= isluafunc(fn
) ? (frame
+funcproto(fn
)->framesize
) : J
->L
->top
;
100 lua_assert(!LJ_FR2
); /* TODO_FR2: store 64 bit PCs. */
101 map
[f
++] = SNAP_MKPC(J
->pc
); /* The current PC is always the first entry. */
102 while (frame
> lim
) { /* Backwards traversal of all frames above base. */
103 if (frame_islua(frame
)) {
104 map
[f
++] = SNAP_MKPC(frame_pc(frame
));
105 frame
= frame_prevl(frame
);
106 } else if (frame_iscont(frame
)) {
107 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
108 map
[f
++] = SNAP_MKPC(frame_contpc(frame
));
109 frame
= frame_prevd(frame
);
111 lua_assert(!frame_isc(frame
));
112 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
113 frame
= frame_prevd(frame
);
116 if (frame
+ funcproto(frame_func(frame
))->framesize
> ftop
)
117 ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
119 lua_assert(f
== (MSize
)(1 + J
->framedepth
));
120 return (BCReg
)(ftop
- lim
);
123 /* Take a snapshot of the current stack. */
124 static void snapshot_stack(jit_State
*J
, SnapShot
*snap
, MSize nsnapmap
)
126 BCReg nslots
= J
->baseslot
+ J
->maxslot
;
129 /* Conservative estimate. */
130 lj_snap_grow_map(J
, nsnapmap
+ nslots
+ (MSize
)J
->framedepth
+1);
131 p
= &J
->cur
.snapmap
[nsnapmap
];
132 nent
= snapshot_slots(J
, p
, nslots
);
133 snap
->topslot
= (uint8_t)snapshot_framelinks(J
, p
+ nent
);
134 snap
->mapofs
= (uint16_t)nsnapmap
;
135 snap
->ref
= (IRRef1
)J
->cur
.nins
;
136 snap
->nent
= (uint8_t)nent
;
137 snap
->nslots
= (uint8_t)nslots
;
139 J
->cur
.nsnapmap
= (uint16_t)(nsnapmap
+ nent
+ 1 + J
->framedepth
);
142 /* Add or merge a snapshot. */
143 void lj_snap_add(jit_State
*J
)
145 MSize nsnap
= J
->cur
.nsnap
;
146 MSize nsnapmap
= J
->cur
.nsnapmap
;
147 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
148 if ((nsnap
> 0 && J
->cur
.snap
[nsnap
-1].ref
== J
->cur
.nins
) ||
149 (J
->mergesnap
&& !irt_isguard(J
->guardemit
))) {
150 if (nsnap
== 1) { /* But preserve snap #0 PC. */
151 emitir_raw(IRT(IR_NOP
, IRT_NIL
), 0, 0);
154 nsnapmap
= J
->cur
.snap
[--nsnap
].mapofs
;
157 lj_snap_grow_buf(J
, nsnap
+1);
158 J
->cur
.nsnap
= (uint16_t)(nsnap
+1);
161 J
->guardemit
.irt
= 0;
162 snapshot_stack(J
, &J
->cur
.snap
[nsnap
], nsnapmap
);
165 /* -- Snapshot modification ----------------------------------------------- */
167 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
169 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
170 static BCReg
snap_usedef(jit_State
*J
, uint8_t *udf
,
171 const BCIns
*pc
, BCReg maxslot
)
176 if (maxslot
== 0) return 0;
177 #ifdef LUAJIT_USE_VALGRIND
178 /* Avoid errors for harmless reads beyond maxslot. */
179 memset(udf
, 1, SNAP_USEDEF_SLOTS
);
181 memset(udf
, 1, maxslot
);
184 /* Treat open upvalues as used. */
185 o
= gcref(J
->L
->openupval
);
187 if (uvval(gco2uv(o
)) < J
->L
->base
) break;
188 udf
[uvval(gco2uv(o
)) - J
->L
->base
] = 0;
189 o
= gcref(o
->gch
.nextgc
);
192 #define USE_SLOT(s) udf[(s)] &= ~1
193 #define DEF_SLOT(s) udf[(s)] *= 3
195 /* Scan through following bytecode and check for uses/defs. */
196 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
199 BCOp op
= bc_op(ins
);
200 switch (bcmode_b(op
)) {
201 case BCMvar
: USE_SLOT(bc_b(ins
)); break;
204 switch (bcmode_c(op
)) {
205 case BCMvar
: USE_SLOT(bc_c(ins
)); break;
207 lua_assert(op
== BC_CAT
);
208 for (s
= bc_b(ins
); s
<= bc_c(ins
); s
++) USE_SLOT(s
);
209 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
213 BCReg minslot
= bc_a(ins
);
214 if (op
>= BC_FORI
&& op
<= BC_JFORL
) minslot
+= FORL_EXT
;
215 else if (op
>= BC_ITERL
&& op
<= BC_JITERL
) minslot
+= bc_b(pc
[-2])-1;
216 else if (op
== BC_UCLO
) { pc
+= bc_j(ins
); break; }
217 for (s
= minslot
; s
< maxslot
; s
++) DEF_SLOT(s
);
218 return minslot
< maxslot
? minslot
: maxslot
;
221 if (op
== BC_JFORL
|| op
== BC_JITERL
|| op
== BC_JLOOP
) {
223 } else if (bc_isret(op
)) {
224 BCReg top
= op
== BC_RETM
? maxslot
: (bc_a(ins
) + bc_d(ins
)-1);
225 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
226 for (; s
< top
; s
++) USE_SLOT(s
);
227 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
231 case BCMfunc
: return maxslot
; /* NYI: will abort, anyway. */
234 switch (bcmode_a(op
)) {
235 case BCMvar
: USE_SLOT(bc_a(ins
)); break;
237 if (!(op
== BC_ISTC
|| op
== BC_ISFC
)) DEF_SLOT(bc_a(ins
));
240 if (op
>= BC_CALLM
&& op
<= BC_VARG
) {
241 BCReg top
= (op
== BC_CALLM
|| op
== BC_CALLMT
|| bc_c(ins
) == 0) ?
242 maxslot
: (bc_a(ins
) + bc_c(ins
)+LJ_FR2
);
243 if (LJ_FR2
) DEF_SLOT(bc_a(ins
)+1);
244 s
= bc_a(ins
) - ((op
== BC_ITERC
|| op
== BC_ITERN
) ? 3 : 0);
245 for (; s
< top
; s
++) USE_SLOT(s
);
246 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
247 if (op
== BC_CALLT
|| op
== BC_CALLMT
) {
248 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
251 } else if (op
== BC_KNIL
) {
252 for (s
= bc_a(ins
); s
<= bc_d(ins
); s
++) DEF_SLOT(s
);
253 } else if (op
== BC_TSETM
) {
254 for (s
= bc_a(ins
)-1; s
< maxslot
; s
++) USE_SLOT(s
);
259 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
265 return 0; /* unreachable */
268 /* Purge dead slots before the next snapshot. */
269 void lj_snap_purge(jit_State
*J
)
271 uint8_t udf
[SNAP_USEDEF_SLOTS
];
272 BCReg maxslot
= J
->maxslot
;
273 BCReg s
= snap_usedef(J
, udf
, J
->pc
, maxslot
);
274 for (; s
< maxslot
; s
++)
276 J
->base
[s
] = 0; /* Purge dead slots. */
279 /* Shrink last snapshot. */
280 void lj_snap_shrink(jit_State
*J
)
282 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
283 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
284 MSize n
, m
, nlim
, nent
= snap
->nent
;
285 uint8_t udf
[SNAP_USEDEF_SLOTS
];
286 BCReg maxslot
= J
->maxslot
;
287 BCReg minslot
= snap_usedef(J
, udf
, snap_pc(map
[nent
]), maxslot
);
288 BCReg baseslot
= J
->baseslot
;
291 snap
->nslots
= (uint8_t)maxslot
;
292 for (n
= m
= 0; n
< nent
; n
++) { /* Remove unused slots from snapshot. */
293 BCReg s
= snap_slot(map
[n
]);
294 if (s
< minslot
|| (s
< maxslot
&& udf
[s
-baseslot
] == 0))
295 map
[m
++] = map
[n
]; /* Only copy used slots. */
297 snap
->nent
= (uint8_t)m
;
298 nlim
= J
->cur
.nsnapmap
- snap
->mapofs
- 1;
299 while (n
<= nlim
) map
[m
++] = map
[n
++]; /* Move PC + frame links down. */
300 J
->cur
.nsnapmap
= (uint16_t)(snap
->mapofs
+ m
); /* Free up space in map. */
303 /* -- Snapshot access ----------------------------------------------------- */
305 /* Initialize a Bloom Filter with all renamed refs.
306 ** There are very few renames (often none), so the filter has
307 ** very few bits set. This makes it suitable for negative filtering.
309 static BloomFilter
snap_renamefilter(GCtrace
*T
, SnapNo lim
)
311 BloomFilter rfilt
= 0;
313 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
315 bloomset(rfilt
, ir
->op1
);
319 /* Process matching renames to find the original RegSP. */
320 static RegSP
snap_renameref(GCtrace
*T
, SnapNo lim
, IRRef ref
, RegSP rs
)
323 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
324 if (ir
->op1
== ref
&& ir
->op2
<= lim
)
329 /* Copy RegSP from parent snapshot to the parent links of the IR. */
330 IRIns
*lj_snap_regspmap(GCtrace
*T
, SnapNo snapno
, IRIns
*ir
)
332 SnapShot
*snap
= &T
->snap
[snapno
];
333 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
334 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
339 if (ir
->o
== IR_SLOAD
) {
340 if (!(ir
->op2
& IRSLOAD_PARENT
)) break;
342 lua_assert(n
< snap
->nent
);
343 if (snap_slot(map
[n
]) == ir
->op1
) {
344 ref
= snap_ref(map
[n
++]);
348 } else if (LJ_SOFTFP
&& ir
->o
== IR_HIOP
) {
350 } else if (ir
->o
== IR_PVAL
) {
351 ref
= ir
->op1
+ REF_BIAS
;
355 rs
= T
->ir
[ref
].prev
;
356 if (bloomtest(rfilt
, ref
))
357 rs
= snap_renameref(T
, snapno
, ref
, rs
);
358 ir
->prev
= (uint16_t)rs
;
359 lua_assert(regsp_used(rs
));
364 /* -- Snapshot replay ----------------------------------------------------- */
366 /* Replay constant from parent trace. */
367 static TRef
snap_replay_const(jit_State
*J
, IRIns
*ir
)
369 /* Only have to deal with constants that can occur in stack slots. */
370 switch ((IROp
)ir
->o
) {
371 case IR_KPRI
: return TREF_PRI(irt_type(ir
->t
));
372 case IR_KINT
: return lj_ir_kint(J
, ir
->i
);
373 case IR_KGC
: return lj_ir_kgc(J
, ir_kgc(ir
), irt_t(ir
->t
));
374 case IR_KNUM
: return lj_ir_k64(J
, IR_KNUM
, ir_knum(ir
));
375 case IR_KINT64
: return lj_ir_k64(J
, IR_KINT64
, ir_kint64(ir
));
376 case IR_KPTR
: return lj_ir_kptr(J
, ir_kptr(ir
)); /* Continuation. */
377 default: lua_assert(0); return TREF_NIL
; break;
381 /* De-duplicate parent reference. */
382 static TRef
snap_dedup(jit_State
*J
, SnapEntry
*map
, MSize nmax
, IRRef ref
)
385 for (j
= 0; j
< nmax
; j
++)
386 if (snap_ref(map
[j
]) == ref
)
387 return J
->slot
[snap_slot(map
[j
])] & ~(SNAP_CONT
|SNAP_FRAME
);
391 /* Emit parent reference with de-duplication. */
392 static TRef
snap_pref(jit_State
*J
, GCtrace
*T
, SnapEntry
*map
, MSize nmax
,
393 BloomFilter seen
, IRRef ref
)
395 IRIns
*ir
= &T
->ir
[ref
];
398 tr
= snap_replay_const(J
, ir
);
399 else if (!regsp_used(ir
->prev
))
401 else if (!bloomtest(seen
, ref
) || (tr
= snap_dedup(J
, map
, nmax
, ref
)) == 0)
402 tr
= emitir(IRT(IR_PVAL
, irt_type(ir
->t
)), ref
- REF_BIAS
, 0);
406 /* Check whether a sunk store corresponds to an allocation. Slow path. */
407 static int snap_sunk_store2(GCtrace
*T
, IRIns
*ira
, IRIns
*irs
)
409 if (irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
410 irs
->o
== IR_FSTORE
|| irs
->o
== IR_XSTORE
) {
411 IRIns
*irk
= &T
->ir
[irs
->op1
];
412 if (irk
->o
== IR_AREF
|| irk
->o
== IR_HREFK
)
413 irk
= &T
->ir
[irk
->op1
];
414 return (&T
->ir
[irk
->op1
] == ira
);
419 /* Check whether a sunk store corresponds to an allocation. Fast path. */
420 static LJ_AINLINE
int snap_sunk_store(GCtrace
*T
, IRIns
*ira
, IRIns
*irs
)
423 return (ira
+ irs
->s
== irs
); /* Fast check. */
424 return snap_sunk_store2(T
, ira
, irs
);
427 /* Replay snapshot state to setup side trace. */
428 void lj_snap_replay(jit_State
*J
, GCtrace
*T
)
430 SnapShot
*snap
= &T
->snap
[J
->exitno
];
431 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
432 MSize n
, nent
= snap
->nent
;
433 BloomFilter seen
= 0;
436 /* Emit IR for slots inherited from parent snapshot. */
437 for (n
= 0; n
< nent
; n
++) {
438 SnapEntry sn
= map
[n
];
439 BCReg s
= snap_slot(sn
);
440 IRRef ref
= snap_ref(sn
);
441 IRIns
*ir
= &T
->ir
[ref
];
443 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
444 if (bloomtest(seen
, ref
) && (tr
= snap_dedup(J
, map
, n
, ref
)) != 0)
447 if (irref_isk(ref
)) {
448 tr
= snap_replay_const(J
, ir
);
449 } else if (!regsp_used(ir
->prev
)) {
454 IRType t
= irt_type(ir
->t
);
455 uint32_t mode
= IRSLOAD_INHERIT
|IRSLOAD_PARENT
;
456 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) t
= IRT_NUM
;
457 if (ir
->o
== IR_SLOAD
) mode
|= (ir
->op2
& IRSLOAD_READONLY
);
458 tr
= emitir_raw(IRT(IR_SLOAD
, t
), s
, mode
);
461 J
->slot
[s
] = tr
| (sn
&(SNAP_CONT
|SNAP_FRAME
)); /* Same as TREF_* flags. */
462 J
->framedepth
+= ((sn
& (SNAP_CONT
|SNAP_FRAME
)) && s
);
463 if ((sn
& SNAP_FRAME
))
467 IRIns
*irlast
= &T
->ir
[snap
->ref
];
469 /* Emit dependent PVALs. */
470 for (n
= 0; n
< nent
; n
++) {
471 SnapEntry sn
= map
[n
];
472 IRRef refp
= snap_ref(sn
);
473 IRIns
*ir
= &T
->ir
[refp
];
474 if (regsp_reg(ir
->r
) == RID_SUNK
) {
475 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) continue;
477 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
478 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
479 if (ir
->op1
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
480 if (ir
->op2
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op2
);
481 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
482 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
)
483 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
);
486 for (irs
= ir
+1; irs
< irlast
; irs
++)
487 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
488 if (snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
) == 0)
489 snap_pref(J
, T
, map
, nent
, seen
, T
->ir
[irs
->op2
].op1
);
490 else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
491 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
)
492 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
);
495 } else if (!irref_isk(refp
) && !regsp_used(ir
->prev
)) {
496 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
497 J
->slot
[snap_slot(sn
)] = snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
500 /* Replay sunk instructions. */
501 for (n
= 0; pass23
&& n
< nent
; n
++) {
502 SnapEntry sn
= map
[n
];
503 IRRef refp
= snap_ref(sn
);
504 IRIns
*ir
= &T
->ir
[refp
];
505 if (regsp_reg(ir
->r
) == RID_SUNK
) {
507 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) { /* De-dup allocs. */
508 J
->slot
[snap_slot(sn
)] = J
->slot
[J
->slot
[snap_slot(sn
)]];
512 if (op1
>= T
->nk
) op1
= snap_pref(J
, T
, map
, nent
, seen
, op1
);
514 if (op2
>= T
->nk
) op2
= snap_pref(J
, T
, map
, nent
, seen
, op2
);
515 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
516 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
517 lj_needsplit(J
); /* Emit joining HIOP. */
518 op2
= emitir_raw(IRT(IR_HIOP
, IRT_I64
), op2
,
519 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
));
521 J
->slot
[snap_slot(sn
)] = emitir(ir
->ot
& ~(IRT_MARK
|IRT_ISPHI
), op1
, op2
);
524 TRef tr
= emitir(ir
->ot
, op1
, op2
);
525 J
->slot
[snap_slot(sn
)] = tr
;
526 for (irs
= ir
+1; irs
< irlast
; irs
++)
527 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
528 IRIns
*irr
= &T
->ir
[irs
->op1
];
529 TRef val
, key
= irr
->op2
, tmp
= tr
;
530 if (irr
->o
!= IR_FREF
) {
531 IRIns
*irk
= &T
->ir
[key
];
532 if (irr
->o
== IR_HREFK
)
533 key
= lj_ir_kslot(J
, snap_replay_const(J
, &T
->ir
[irk
->op1
]),
536 key
= snap_replay_const(J
, irk
);
537 if (irr
->o
== IR_HREFK
|| irr
->o
== IR_AREF
) {
538 IRIns
*irf
= &T
->ir
[irr
->op1
];
539 tmp
= emitir(irf
->ot
, tmp
, irf
->op2
);
542 tmp
= emitir(irr
->ot
, tmp
, key
);
543 val
= snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
);
545 IRIns
*irc
= &T
->ir
[irs
->op2
];
546 lua_assert(irc
->o
== IR_CONV
&& irc
->op2
== IRCONV_NUM_INT
);
547 val
= snap_pref(J
, T
, map
, nent
, seen
, irc
->op1
);
548 val
= emitir(IRTN(IR_CONV
), val
, IRCONV_NUM_INT
);
549 } else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
550 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
) {
552 if (LJ_SOFTFP
&& irt_type((irs
+1)->t
) == IRT_SOFTFP
)
555 if (irref_isk(irs
->op2
) && irref_isk((irs
+1)->op2
)) {
556 uint64_t k
= (uint32_t)T
->ir
[irs
->op2
].i
+
557 ((uint64_t)T
->ir
[(irs
+1)->op2
].i
<< 32);
558 val
= lj_ir_k64(J
, t
== IRT_I64
? IR_KINT64
: IR_KNUM
,
559 lj_ir_k64_find(J
, k
));
561 val
= emitir_raw(IRT(IR_HIOP
, t
), val
,
562 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
));
564 tmp
= emitir(IRT(irs
->o
, t
), tmp
, val
);
567 tmp
= emitir(irs
->ot
, tmp
, val
);
568 } else if (LJ_HASFFI
&& irs
->o
== IR_XBAR
&& ir
->o
== IR_CNEW
) {
569 emitir(IRT(IR_XBAR
, IRT_NIL
), 0, 0);
575 J
->base
= J
->slot
+ J
->baseslot
;
576 J
->maxslot
= snap
->nslots
- J
->baseslot
;
578 if (pass23
) /* Need explicit GC step _after_ initial snapshot. */
579 emitir_raw(IRTG(IR_GCSTEP
, IRT_NIL
), 0, 0);
582 /* -- Snapshot restore ---------------------------------------------------- */
584 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
585 SnapNo snapno
, BloomFilter rfilt
,
586 IRIns
*ir
, TValue
*o
);
588 /* Restore a value from the trace exit state. */
589 static void snap_restoreval(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
590 SnapNo snapno
, BloomFilter rfilt
,
591 IRRef ref
, TValue
*o
)
593 IRIns
*ir
= &T
->ir
[ref
];
596 if (irref_isk(ref
)) { /* Restore constant slot. */
597 lj_ir_kvalue(J
->L
, o
, ir
);
600 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
601 rs
= snap_renameref(T
, snapno
, ref
, rs
);
602 lua_assert(!LJ_GC64
); /* TODO_GC64: handle 64 bit references. */
603 if (ra_hasspill(regsp_spill(rs
))) { /* Restore from spill slot. */
604 int32_t *sps
= &ex
->spill
[regsp_spill(rs
)];
605 if (irt_isinteger(t
)) {
608 } else if (irt_isnum(t
)) {
609 o
->u64
= *(uint64_t *)sps
;
611 } else if (LJ_64
&& irt_islightud(t
)) {
612 /* 64 bit lightuserdata which may escape already has the tag bits. */
613 o
->u64
= *(uint64_t *)sps
;
615 lua_assert(!irt_ispri(t
)); /* PRI refs never have a spill slot. */
616 setgcV(J
->L
, o
, (GCobj
*)(uintptr_t)*(GCSize
*)sps
, irt_toitype(t
));
618 } else { /* Restore from register. */
619 Reg r
= regsp_reg(rs
);
621 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
622 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ir
->op1
, o
);
623 if (LJ_DUALNUM
) setnumV(o
, (lua_Number
)intV(o
));
625 } else if (irt_isinteger(t
)) {
626 setintV(o
, (int32_t)ex
->gpr
[r
-RID_MIN_GPR
]);
628 } else if (irt_isnum(t
)) {
629 setnumV(o
, ex
->fpr
[r
-RID_MIN_FPR
]);
631 } else if (LJ_64
&& irt_is64(t
)) {
632 /* 64 bit values that already have the tag bits. */
633 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
634 } else if (irt_ispri(t
)) {
635 setpriV(o
, irt_toitype(t
));
637 setgcV(J
->L
, o
, (GCobj
*)ex
->gpr
[r
-RID_MIN_GPR
], irt_toitype(t
));
643 /* Restore raw data from the trace exit state. */
644 static void snap_restoredata(GCtrace
*T
, ExitState
*ex
,
645 SnapNo snapno
, BloomFilter rfilt
,
646 IRRef ref
, void *dst
, CTSize sz
)
648 IRIns
*ir
= &T
->ir
[ref
];
652 if (irref_isk(ref
)) {
653 if (ir
->o
== IR_KNUM
|| ir
->o
== IR_KINT64
) {
654 src
= mref(ir
->ptr
, int32_t);
655 } else if (sz
== 8) {
656 tmp
= (uint64_t)(uint32_t)ir
->i
;
657 src
= (int32_t *)&tmp
;
662 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
663 rs
= snap_renameref(T
, snapno
, ref
, rs
);
664 if (ra_hasspill(regsp_spill(rs
))) {
665 src
= &ex
->spill
[regsp_spill(rs
)];
666 if (sz
== 8 && !irt_is64(ir
->t
)) {
667 tmp
= (uint64_t)(uint32_t)*src
;
668 src
= (int32_t *)&tmp
;
671 Reg r
= regsp_reg(rs
);
673 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
674 lua_assert(sz
== 8 && ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
675 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op1
, dst
, 4);
676 *(lua_Number
*)dst
= (lua_Number
)*(int32_t *)dst
;
679 src
= (int32_t *)&ex
->gpr
[r
-RID_MIN_GPR
];
681 if (r
>= RID_MAX_GPR
) {
682 src
= (int32_t *)&ex
->fpr
[r
-RID_MIN_FPR
];
684 if (sz
== 4) { /* PPC FPRs are always doubles. */
685 *(float *)dst
= (float)*(double *)src
;
689 if (LJ_BE
&& sz
== 4) src
++;
695 lua_assert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
696 if (sz
== 4) *(int32_t *)dst
= *src
;
697 else if (sz
== 8) *(int64_t *)dst
= *(int64_t *)src
;
698 else if (sz
== 1) *(int8_t *)dst
= (int8_t)*src
;
699 else *(int16_t *)dst
= (int16_t)*src
;
703 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
704 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
705 SnapNo snapno
, BloomFilter rfilt
,
706 IRIns
*ir
, TValue
*o
)
708 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
709 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
711 if (ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
) {
712 CTState
*cts
= ctype_cts(J
->L
);
713 CTypeID id
= (CTypeID
)T
->ir
[ir
->op1
].i
;
715 CTInfo info
= lj_ctype_info(cts
, id
, &sz
);
716 GCcdata
*cd
= lj_cdata_newx(cts
, id
, sz
, info
);
717 setcdataV(J
->L
, o
, cd
);
718 if (ir
->o
== IR_CNEWI
) {
719 uint8_t *p
= (uint8_t *)cdataptr(cd
);
720 lua_assert(sz
== 4 || sz
== 8);
721 if (LJ_32
&& sz
== 8 && ir
+1 < T
->ir
+ T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
722 snap_restoredata(T
, ex
, snapno
, rfilt
, (ir
+1)->op2
, LJ_LE
?p
+4:p
, 4);
726 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op2
, p
, sz
);
728 IRIns
*irs
, *irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
729 for (irs
= ir
+1; irs
< irlast
; irs
++)
730 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
731 IRIns
*iro
= &T
->ir
[T
->ir
[irs
->op1
].op2
];
732 uint8_t *p
= (uint8_t *)cd
;
734 lua_assert(irs
->o
== IR_XSTORE
&& T
->ir
[irs
->op1
].o
== IR_ADD
);
735 lua_assert(iro
->o
== IR_KINT
|| iro
->o
== IR_KINT64
);
736 if (irt_is64(irs
->t
)) szs
= 8;
737 else if (irt_isi8(irs
->t
) || irt_isu8(irs
->t
)) szs
= 1;
738 else if (irt_isi16(irs
->t
) || irt_isu16(irs
->t
)) szs
= 2;
740 if (LJ_64
&& iro
->o
== IR_KINT64
)
741 p
+= (int64_t)ir_k64(iro
)->u64
;
744 lua_assert(p
>= (uint8_t *)cdataptr(cd
) &&
745 p
+ szs
<= (uint8_t *)cdataptr(cd
) + sz
);
746 if (LJ_32
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
747 lua_assert(szs
== 4);
748 snap_restoredata(T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, LJ_LE
?p
+4:p
,4);
751 snap_restoredata(T
, ex
, snapno
, rfilt
, irs
->op2
, p
, szs
);
758 GCtab
*t
= ir
->o
== IR_TNEW
? lj_tab_new(J
->L
, ir
->op1
, ir
->op2
) :
759 lj_tab_dup(J
->L
, ir_ktab(&T
->ir
[ir
->op1
]));
761 irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
762 for (irs
= ir
+1; irs
< irlast
; irs
++)
763 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
764 IRIns
*irk
= &T
->ir
[irs
->op1
];
766 lua_assert(irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
767 irs
->o
== IR_FSTORE
);
768 if (irk
->o
== IR_FREF
) {
769 lua_assert(irk
->op2
== IRFL_TAB_META
);
770 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, &tmp
);
771 /* NOBARRIER: The table is new (marked white). */
772 setgcref(t
->metatable
, obj2gco(tabV(&tmp
)));
774 irk
= &T
->ir
[irk
->op2
];
775 if (irk
->o
== IR_KSLOT
) irk
= &T
->ir
[irk
->op1
];
776 lj_ir_kvalue(J
->L
, &tmp
, irk
);
777 val
= lj_tab_set(J
->L
, t
, &tmp
);
778 /* NOBARRIER: The table is new (marked white). */
779 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, val
);
780 if (LJ_SOFTFP
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
781 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, &tmp
);
782 val
->u32
.hi
= tmp
.u32
.lo
;
789 /* Restore interpreter state from exit state with the help of a snapshot. */
790 const BCIns
*lj_snap_restore(jit_State
*J
, void *exptr
)
792 ExitState
*ex
= (ExitState
*)exptr
;
793 SnapNo snapno
= J
->exitno
; /* For now, snapno == exitno. */
794 GCtrace
*T
= traceref(J
, J
->parent
);
795 SnapShot
*snap
= &T
->snap
[snapno
];
796 MSize n
, nent
= snap
->nent
;
797 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
798 SnapEntry
*flinks
= &T
->snapmap
[snap_nextofs(T
, snap
)-1];
801 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
802 const BCIns
*pc
= snap_pc(map
[nent
]);
805 /* Set interpreter PC to the next PC to get correct error messages. */
806 setcframe_pc(cframe_raw(L
->cframe
), pc
+1);
808 /* Make sure the stack is big enough for the slots from the snapshot. */
809 if (LJ_UNLIKELY(L
->base
+ snap
->topslot
>= tvref(L
->maxstack
))) {
810 L
->top
= curr_topL(L
);
811 lj_state_growstack(L
, snap
->topslot
- curr_proto(L
)->framesize
);
814 /* Fill stack slots with data from the registers and spill slots. */
816 ftsz0
= frame_ftsz(frame
); /* Preserve link to previous frame in slot #0. */
817 for (n
= 0; n
< nent
; n
++) {
818 SnapEntry sn
= map
[n
];
819 if (!(sn
& SNAP_NORESTORE
)) {
820 TValue
*o
= &frame
[snap_slot(sn
)];
821 IRRef ref
= snap_ref(sn
);
822 IRIns
*ir
= &T
->ir
[ref
];
823 if (ir
->r
== RID_SUNK
) {
825 for (j
= 0; j
< n
; j
++)
826 if (snap_ref(map
[j
]) == ref
) { /* De-duplicate sunk allocations. */
827 copyTV(L
, o
, &frame
[snap_slot(map
[j
])]);
830 snap_unsink(J
, T
, ex
, snapno
, rfilt
, ir
, o
);
834 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
, o
);
835 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
) && tvisint(o
)) {
837 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
+1, &tmp
);
838 o
->u32
.hi
= tmp
.u32
.lo
;
839 } else if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) {
840 lua_assert(!LJ_FR2
); /* TODO_FR2: store 64 bit PCs. */
841 /* Overwrite tag with frame link. */
842 setframe_ftsz(o
, snap_slot(sn
) != 0 ? (int32_t)*flinks
-- : ftsz0
);
847 lua_assert(map
+ nent
== flinks
);
849 /* Compute current stack top. */
850 switch (bc_op(*pc
)) {
852 if (bc_op(*pc
) < BC_FUNCF
) {
853 L
->top
= curr_topL(L
);
857 case BC_CALLM
: case BC_CALLMT
: case BC_RETM
: case BC_TSETM
:
858 L
->top
= frame
+ snap
->nslots
;