3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
23 #include "lj_target.h"
29 /* Some local macros to save typing. Undef'd at the end. */
30 #define IR(ref) (&J->cur.ir[(ref)])
32 /* Pass IR on to next optimization in chain (FOLD). */
33 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
35 /* Emit raw IR without passing through optimizations. */
36 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
38 /* -- Snapshot buffer allocation ------------------------------------------ */
40 /* Grow snapshot buffer. */
41 void lj_snap_grow_buf_(jit_State
*J
, MSize need
)
43 MSize maxsnap
= (MSize
)J
->param
[JIT_P_maxsnap
];
45 lj_trace_err(J
, LJ_TRERR_SNAPOV
);
46 lj_mem_growvec(J
->L
, J
->snapbuf
, J
->sizesnap
, maxsnap
, SnapShot
);
47 J
->cur
.snap
= J
->snapbuf
;
50 /* Grow snapshot map buffer. */
51 void lj_snap_grow_map_(jit_State
*J
, MSize need
)
53 if (need
< 2*J
->sizesnapmap
)
54 need
= 2*J
->sizesnapmap
;
57 J
->snapmapbuf
= (SnapEntry
*)lj_mem_realloc(J
->L
, J
->snapmapbuf
,
58 J
->sizesnapmap
*sizeof(SnapEntry
), need
*sizeof(SnapEntry
));
59 J
->cur
.snapmap
= J
->snapmapbuf
;
60 J
->sizesnapmap
= need
;
63 /* -- Snapshot generation ------------------------------------------------- */
65 /* Add all modified slots to the snapshot. */
66 static MSize
snapshot_slots(jit_State
*J
, SnapEntry
*map
, BCReg nslots
)
68 IRRef retf
= J
->chain
[IR_RETF
]; /* Limits SLOAD restore elimination. */
71 for (s
= 0; s
< nslots
; s
++) {
73 IRRef ref
= tref_ref(tr
);
75 SnapEntry sn
= SNAP_TR(s
, tr
);
77 if (!(sn
& (SNAP_CONT
|SNAP_FRAME
)) &&
78 ir
->o
== IR_SLOAD
&& ir
->op1
== s
&& ref
> retf
) {
79 /* No need to snapshot unmodified non-inherited slots. */
80 if (!(ir
->op2
& IRSLOAD_INHERIT
))
82 /* No need to restore readonly slots and unmodified non-parent slots. */
83 if (!(LJ_DUALNUM
&& (ir
->op2
& IRSLOAD_CONVERT
)) &&
84 (ir
->op2
& (IRSLOAD_READONLY
|IRSLOAD_PARENT
)) != IRSLOAD_PARENT
)
87 if (LJ_SOFTFP
&& irt_isnum(ir
->t
))
95 /* Add frame links at the end of the snapshot. */
96 static BCReg
snapshot_framelinks(jit_State
*J
, SnapEntry
*map
)
98 cTValue
*frame
= J
->L
->base
- 1;
99 cTValue
*lim
= J
->L
->base
- J
->baseslot
;
100 cTValue
*ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
102 map
[f
++] = SNAP_MKPC(J
->pc
); /* The current PC is always the first entry. */
103 while (frame
> lim
) { /* Backwards traversal of all frames above base. */
104 if (frame_islua(frame
)) {
105 map
[f
++] = SNAP_MKPC(frame_pc(frame
));
106 frame
= frame_prevl(frame
);
107 if (frame
+ funcproto(frame_func(frame
))->framesize
> ftop
)
108 ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
109 } else if (frame_iscont(frame
)) {
110 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
111 map
[f
++] = SNAP_MKPC(frame_contpc(frame
));
112 frame
= frame_prevd(frame
);
114 lua_assert(!frame_isc(frame
));
115 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
116 frame
= frame_prevd(frame
);
119 lua_assert(f
== (MSize
)(1 + J
->framedepth
));
120 return (BCReg
)(ftop
- lim
);
123 /* Take a snapshot of the current stack. */
124 static void snapshot_stack(jit_State
*J
, SnapShot
*snap
, MSize nsnapmap
)
126 BCReg nslots
= J
->baseslot
+ J
->maxslot
;
129 /* Conservative estimate. */
130 lj_snap_grow_map(J
, nsnapmap
+ nslots
+ (MSize
)J
->framedepth
+1);
131 p
= &J
->cur
.snapmap
[nsnapmap
];
132 nent
= snapshot_slots(J
, p
, nslots
);
133 snap
->topslot
= (uint8_t)snapshot_framelinks(J
, p
+ nent
);
134 snap
->mapofs
= (uint16_t)nsnapmap
;
135 snap
->ref
= (IRRef1
)J
->cur
.nins
;
136 snap
->nent
= (uint8_t)nent
;
137 snap
->nslots
= (uint8_t)nslots
;
139 J
->cur
.nsnapmap
= (uint16_t)(nsnapmap
+ nent
+ 1 + J
->framedepth
);
142 /* Add or merge a snapshot. */
143 void lj_snap_add(jit_State
*J
)
145 MSize nsnap
= J
->cur
.nsnap
;
146 MSize nsnapmap
= J
->cur
.nsnapmap
;
147 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
148 if (J
->mergesnap
? !irt_isguard(J
->guardemit
) :
149 (nsnap
> 0 && J
->cur
.snap
[nsnap
-1].ref
== J
->cur
.nins
)) {
150 if (nsnap
== 1) { /* But preserve snap #0 PC. */
151 emitir_raw(IRT(IR_NOP
, IRT_NIL
), 0, 0);
154 nsnapmap
= J
->cur
.snap
[--nsnap
].mapofs
;
157 lj_snap_grow_buf(J
, nsnap
+1);
158 J
->cur
.nsnap
= (uint16_t)(nsnap
+1);
161 J
->guardemit
.irt
= 0;
162 snapshot_stack(J
, &J
->cur
.snap
[nsnap
], nsnapmap
);
165 /* -- Snapshot modification ----------------------------------------------- */
167 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
169 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
170 static BCReg
snap_usedef(jit_State
*J
, uint8_t *udf
,
171 const BCIns
*pc
, BCReg maxslot
)
176 if (maxslot
== 0) return 0;
177 #ifdef LUAJIT_USE_VALGRIND
178 /* Avoid errors for harmless reads beyond maxslot. */
179 memset(udf
, 1, SNAP_USEDEF_SLOTS
);
181 memset(udf
, 1, maxslot
);
184 /* Treat open upvalues as used. */
185 o
= gcref(J
->L
->openupval
);
187 if (uvval(gco2uv(o
)) < J
->L
->base
) break;
188 udf
[uvval(gco2uv(o
)) - J
->L
->base
] = 0;
189 o
= gcref(o
->gch
.nextgc
);
192 #define USE_SLOT(s) udf[(s)] &= ~1
193 #define DEF_SLOT(s) udf[(s)] *= 3
195 /* Scan through following bytecode and check for uses/defs. */
196 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
199 BCOp op
= bc_op(ins
);
200 switch (bcmode_b(op
)) {
201 case BCMvar
: USE_SLOT(bc_b(ins
)); break;
204 switch (bcmode_c(op
)) {
205 case BCMvar
: USE_SLOT(bc_c(ins
)); break;
207 lua_assert(op
== BC_CAT
);
208 for (s
= bc_b(ins
); s
<= bc_c(ins
); s
++) USE_SLOT(s
);
209 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
213 BCReg minslot
= bc_a(ins
);
214 if (op
>= BC_FORI
&& op
<= BC_JFORL
) minslot
+= FORL_EXT
;
215 else if (op
>= BC_ITERL
&& op
<= BC_JITERL
) minslot
+= bc_b(pc
[-2])-1;
216 else if (op
== BC_UCLO
) { pc
+= bc_j(ins
); break; }
217 for (s
= minslot
; s
< maxslot
; s
++) DEF_SLOT(s
);
218 return minslot
< maxslot
? minslot
: maxslot
;
221 if (op
== BC_JFORL
|| op
== BC_JITERL
|| op
== BC_JLOOP
) {
223 } else if (bc_isret(op
)) {
224 BCReg top
= op
== BC_RETM
? maxslot
: (bc_a(ins
) + bc_d(ins
)-1);
225 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
226 for (; s
< top
; s
++) USE_SLOT(s
);
227 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
231 case BCMfunc
: return maxslot
; /* NYI: will abort, anyway. */
234 switch (bcmode_a(op
)) {
235 case BCMvar
: USE_SLOT(bc_a(ins
)); break;
237 if (!(op
== BC_ISTC
|| op
== BC_ISFC
)) DEF_SLOT(bc_a(ins
));
240 if (op
>= BC_CALLM
&& op
<= BC_VARG
) {
241 BCReg top
= (op
== BC_CALLM
|| op
== BC_CALLMT
|| bc_c(ins
) == 0) ?
242 maxslot
: (bc_a(ins
) + bc_c(ins
));
243 s
= bc_a(ins
) - ((op
== BC_ITERC
|| op
== BC_ITERN
) ? 3 : 0);
244 for (; s
< top
; s
++) USE_SLOT(s
);
245 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
246 if (op
== BC_CALLT
|| op
== BC_CALLMT
) {
247 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
250 } else if (op
== BC_KNIL
) {
251 for (s
= bc_a(ins
); s
<= bc_d(ins
); s
++) DEF_SLOT(s
);
252 } else if (op
== BC_TSETM
) {
253 for (s
= bc_a(ins
)-1; s
< maxslot
; s
++) USE_SLOT(s
);
258 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
264 return 0; /* unreachable */
267 /* Purge dead slots before the next snapshot. */
268 void lj_snap_purge(jit_State
*J
)
270 uint8_t udf
[SNAP_USEDEF_SLOTS
];
271 BCReg maxslot
= J
->maxslot
;
272 BCReg s
= snap_usedef(J
, udf
, J
->pc
, maxslot
);
273 for (; s
< maxslot
; s
++)
275 J
->base
[s
] = 0; /* Purge dead slots. */
278 /* Shrink last snapshot. */
279 void lj_snap_shrink(jit_State
*J
)
281 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
282 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
283 MSize n
, m
, nlim
, nent
= snap
->nent
;
284 uint8_t udf
[SNAP_USEDEF_SLOTS
];
285 BCReg maxslot
= J
->maxslot
;
286 BCReg minslot
= snap_usedef(J
, udf
, snap_pc(map
[nent
]), maxslot
);
287 BCReg baseslot
= J
->baseslot
;
290 snap
->nslots
= (uint8_t)maxslot
;
291 for (n
= m
= 0; n
< nent
; n
++) { /* Remove unused slots from snapshot. */
292 BCReg s
= snap_slot(map
[n
]);
293 if (s
< minslot
|| (s
< maxslot
&& udf
[s
-baseslot
] == 0))
294 map
[m
++] = map
[n
]; /* Only copy used slots. */
296 snap
->nent
= (uint8_t)m
;
297 nlim
= J
->cur
.nsnapmap
- snap
->mapofs
- 1;
298 while (n
<= nlim
) map
[m
++] = map
[n
++]; /* Move PC + frame links down. */
299 J
->cur
.nsnapmap
= (uint16_t)(snap
->mapofs
+ m
); /* Free up space in map. */
302 /* -- Snapshot access ----------------------------------------------------- */
304 /* Initialize a Bloom Filter with all renamed refs.
305 ** There are very few renames (often none), so the filter has
306 ** very few bits set. This makes it suitable for negative filtering.
308 static BloomFilter
snap_renamefilter(GCtrace
*T
, SnapNo lim
)
310 BloomFilter rfilt
= 0;
312 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
314 bloomset(rfilt
, ir
->op1
);
318 /* Process matching renames to find the original RegSP. */
319 static RegSP
snap_renameref(GCtrace
*T
, SnapNo lim
, IRRef ref
, RegSP rs
)
322 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
323 if (ir
->op1
== ref
&& ir
->op2
<= lim
)
328 /* Copy RegSP from parent snapshot to the parent links of the IR. */
329 IRIns
*lj_snap_regspmap(GCtrace
*T
, SnapNo snapno
, IRIns
*ir
)
331 SnapShot
*snap
= &T
->snap
[snapno
];
332 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
333 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
338 if (ir
->o
== IR_SLOAD
) {
339 if (!(ir
->op2
& IRSLOAD_PARENT
)) break;
341 lua_assert(n
< snap
->nent
);
342 if (snap_slot(map
[n
]) == ir
->op1
) {
343 ref
= snap_ref(map
[n
++]);
347 } else if (LJ_SOFTFP
&& ir
->o
== IR_HIOP
) {
349 } else if (ir
->o
== IR_PVAL
) {
350 ref
= ir
->op1
+ REF_BIAS
;
354 rs
= T
->ir
[ref
].prev
;
355 if (bloomtest(rfilt
, ref
))
356 rs
= snap_renameref(T
, snapno
, ref
, rs
);
357 ir
->prev
= (uint16_t)rs
;
358 lua_assert(regsp_used(rs
));
363 /* -- Snapshot replay ----------------------------------------------------- */
365 /* Replay constant from parent trace. */
366 static TRef
snap_replay_const(jit_State
*J
, IRIns
*ir
)
368 /* Only have to deal with constants that can occur in stack slots. */
369 switch ((IROp
)ir
->o
) {
370 case IR_KPRI
: return TREF_PRI(irt_type(ir
->t
));
371 case IR_KINT
: return lj_ir_kint(J
, ir
->i
);
372 case IR_KGC
: return lj_ir_kgc(J
, ir_kgc(ir
), irt_t(ir
->t
));
373 case IR_KNUM
: return lj_ir_k64(J
, IR_KNUM
, ir_knum(ir
));
374 case IR_KINT64
: return lj_ir_k64(J
, IR_KINT64
, ir_kint64(ir
));
375 case IR_KPTR
: return lj_ir_kptr(J
, ir_kptr(ir
)); /* Continuation. */
376 default: lua_assert(0); return TREF_NIL
; break;
380 /* De-duplicate parent reference. */
381 static TRef
snap_dedup(jit_State
*J
, SnapEntry
*map
, MSize nmax
, IRRef ref
)
384 for (j
= 0; j
< nmax
; j
++)
385 if (snap_ref(map
[j
]) == ref
)
386 return J
->slot
[snap_slot(map
[j
])] & ~(SNAP_CONT
|SNAP_FRAME
);
390 /* Emit parent reference with de-duplication. */
391 static TRef
snap_pref(jit_State
*J
, GCtrace
*T
, SnapEntry
*map
, MSize nmax
,
392 BloomFilter seen
, IRRef ref
)
394 IRIns
*ir
= &T
->ir
[ref
];
397 tr
= snap_replay_const(J
, ir
);
398 else if (!regsp_used(ir
->prev
))
400 else if (!bloomtest(seen
, ref
) || (tr
= snap_dedup(J
, map
, nmax
, ref
)) == 0)
401 tr
= emitir(IRT(IR_PVAL
, irt_type(ir
->t
)), ref
- REF_BIAS
, 0);
405 /* Check whether a sunk store corresponds to an allocation. Slow path. */
406 static int snap_sunk_store2(jit_State
*J
, IRIns
*ira
, IRIns
*irs
)
408 if (irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
409 irs
->o
== IR_FSTORE
|| irs
->o
== IR_XSTORE
) {
410 IRIns
*irk
= IR(irs
->op1
);
411 if (irk
->o
== IR_AREF
|| irk
->o
== IR_HREFK
)
413 return (IR(irk
->op1
) == ira
);
418 /* Check whether a sunk store corresponds to an allocation. Fast path. */
419 static LJ_AINLINE
int snap_sunk_store(jit_State
*J
, IRIns
*ira
, IRIns
*irs
)
422 return (ira
+ irs
->s
== irs
); /* Fast check. */
423 return snap_sunk_store2(J
, ira
, irs
);
426 /* Replay snapshot state to setup side trace. */
427 void lj_snap_replay(jit_State
*J
, GCtrace
*T
)
429 SnapShot
*snap
= &T
->snap
[J
->exitno
];
430 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
431 MSize n
, nent
= snap
->nent
;
432 BloomFilter seen
= 0;
435 /* Emit IR for slots inherited from parent snapshot. */
436 for (n
= 0; n
< nent
; n
++) {
437 SnapEntry sn
= map
[n
];
438 BCReg s
= snap_slot(sn
);
439 IRRef ref
= snap_ref(sn
);
440 IRIns
*ir
= &T
->ir
[ref
];
442 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
443 if (bloomtest(seen
, ref
) && (tr
= snap_dedup(J
, map
, n
, ref
)) != 0)
446 if (irref_isk(ref
)) {
447 tr
= snap_replay_const(J
, ir
);
448 } else if (!regsp_used(ir
->prev
)) {
453 IRType t
= irt_type(ir
->t
);
454 uint32_t mode
= IRSLOAD_INHERIT
|IRSLOAD_PARENT
;
455 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) t
= IRT_NUM
;
456 if (ir
->o
== IR_SLOAD
) mode
|= (ir
->op2
& IRSLOAD_READONLY
);
457 tr
= emitir_raw(IRT(IR_SLOAD
, t
), s
, mode
);
460 J
->slot
[s
] = tr
| (sn
&(SNAP_CONT
|SNAP_FRAME
)); /* Same as TREF_* flags. */
461 J
->framedepth
+= ((sn
& (SNAP_CONT
|SNAP_FRAME
)) && s
);
462 if ((sn
& SNAP_FRAME
))
466 IRIns
*irlast
= &T
->ir
[snap
->ref
];
468 /* Emit dependent PVALs. */
469 for (n
= 0; n
< nent
; n
++) {
470 SnapEntry sn
= map
[n
];
471 IRRef refp
= snap_ref(sn
);
472 IRIns
*ir
= &T
->ir
[refp
];
473 if (regsp_reg(ir
->r
) == RID_SUNK
) {
474 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) continue;
476 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
477 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
478 if (ir
->op1
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
479 if (ir
->op2
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op2
);
480 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
481 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
)
482 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
);
485 for (irs
= ir
+1; irs
< irlast
; irs
++)
486 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
487 if (snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
) == 0)
488 snap_pref(J
, T
, map
, nent
, seen
, T
->ir
[irs
->op2
].op1
);
489 else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
490 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
)
491 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
);
494 } else if (!irref_isk(refp
) && !regsp_used(ir
->prev
)) {
495 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
496 J
->slot
[snap_slot(sn
)] = snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
499 /* Replay sunk instructions. */
500 for (n
= 0; pass23
&& n
< nent
; n
++) {
501 SnapEntry sn
= map
[n
];
502 IRRef refp
= snap_ref(sn
);
503 IRIns
*ir
= &T
->ir
[refp
];
504 if (regsp_reg(ir
->r
) == RID_SUNK
) {
506 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) { /* De-dup allocs. */
507 J
->slot
[snap_slot(sn
)] = J
->slot
[J
->slot
[snap_slot(sn
)]];
511 if (op1
>= T
->nk
) op1
= snap_pref(J
, T
, map
, nent
, seen
, op1
);
513 if (op2
>= T
->nk
) op2
= snap_pref(J
, T
, map
, nent
, seen
, op2
);
514 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
515 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
516 lj_needsplit(J
); /* Emit joining HIOP. */
517 op2
= emitir_raw(IRT(IR_HIOP
, IRT_I64
), op2
,
518 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
));
520 J
->slot
[snap_slot(sn
)] = emitir(ir
->ot
, op1
, op2
);
523 TRef tr
= emitir(ir
->ot
, op1
, op2
);
524 J
->slot
[snap_slot(sn
)] = tr
;
525 for (irs
= ir
+1; irs
< irlast
; irs
++)
526 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
527 IRIns
*irr
= &T
->ir
[irs
->op1
];
528 TRef val
, key
= irr
->op2
, tmp
= tr
;
529 if (irr
->o
!= IR_FREF
) {
530 IRIns
*irk
= &T
->ir
[key
];
531 if (irr
->o
== IR_HREFK
)
532 key
= lj_ir_kslot(J
, snap_replay_const(J
, &T
->ir
[irk
->op1
]),
535 key
= snap_replay_const(J
, irk
);
536 if (irr
->o
== IR_HREFK
|| irr
->o
== IR_AREF
) {
537 IRIns
*irf
= &T
->ir
[irr
->op1
];
538 tmp
= emitir(irf
->ot
, tmp
, irf
->op2
);
541 tmp
= emitir(irr
->ot
, tmp
, key
);
542 val
= snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
);
544 IRIns
*irc
= &T
->ir
[irs
->op2
];
545 lua_assert(irc
->o
== IR_CONV
&& irc
->op2
== IRCONV_NUM_INT
);
546 val
= snap_pref(J
, T
, map
, nent
, seen
, irc
->op1
);
547 val
= emitir(IRTN(IR_CONV
), val
, IRCONV_NUM_INT
);
548 } else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
549 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
) {
551 if (LJ_SOFTFP
&& irt_type((irs
+1)->t
) == IRT_SOFTFP
)
554 if (irref_isk(irs
->op2
) && irref_isk((irs
+1)->op2
)) {
555 uint64_t k
= (uint32_t)T
->ir
[irs
->op2
].i
+
556 ((uint64_t)T
->ir
[(irs
+1)->op2
].i
<< 32);
557 val
= lj_ir_k64(J
, t
== IRT_I64
? IR_KINT64
: IR_KNUM
,
558 lj_ir_k64_find(J
, k
));
560 val
= emitir_raw(IRT(IR_HIOP
, t
), val
,
561 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
));
563 tmp
= emitir(IRT(irs
->o
, t
), tmp
, val
);
566 tmp
= emitir(irs
->ot
, tmp
, val
);
572 J
->base
= J
->slot
+ J
->baseslot
;
573 J
->maxslot
= snap
->nslots
- J
->baseslot
;
575 if (pass23
) /* Need explicit GC step _after_ initial snapshot. */
576 emitir_raw(IRTG(IR_GCSTEP
, IRT_NIL
), 0, 0);
579 /* -- Snapshot restore ---------------------------------------------------- */
581 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
582 SnapNo snapno
, BloomFilter rfilt
,
583 IRIns
*ir
, TValue
*o
);
585 /* Restore a value from the trace exit state. */
586 static void snap_restoreval(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
587 SnapNo snapno
, BloomFilter rfilt
,
588 IRRef ref
, TValue
*o
)
590 IRIns
*ir
= &T
->ir
[ref
];
593 if (irref_isk(ref
)) { /* Restore constant slot. */
594 lj_ir_kvalue(J
->L
, o
, ir
);
597 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
598 rs
= snap_renameref(T
, snapno
, ref
, rs
);
599 if (ra_hasspill(regsp_spill(rs
))) { /* Restore from spill slot. */
600 int32_t *sps
= &ex
->spill
[regsp_spill(rs
)];
601 if (irt_isinteger(t
)) {
604 } else if (irt_isnum(t
)) {
605 o
->u64
= *(uint64_t *)sps
;
607 } else if (LJ_64
&& irt_islightud(t
)) {
608 /* 64 bit lightuserdata which may escape already has the tag bits. */
609 o
->u64
= *(uint64_t *)sps
;
611 lua_assert(!irt_ispri(t
)); /* PRI refs never have a spill slot. */
612 setgcrefi(o
->gcr
, *sps
);
613 setitype(o
, irt_toitype(t
));
615 } else { /* Restore from register. */
616 Reg r
= regsp_reg(rs
);
618 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
619 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ir
->op1
, o
);
620 if (LJ_DUALNUM
) setnumV(o
, (lua_Number
)intV(o
));
622 } else if (irt_isinteger(t
)) {
623 setintV(o
, (int32_t)ex
->gpr
[r
-RID_MIN_GPR
]);
625 } else if (irt_isnum(t
)) {
626 setnumV(o
, ex
->fpr
[r
-RID_MIN_FPR
]);
628 } else if (LJ_64
&& irt_islightud(t
)) {
629 /* 64 bit lightuserdata which may escape already has the tag bits. */
630 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
633 setgcrefi(o
->gcr
, ex
->gpr
[r
-RID_MIN_GPR
]);
634 setitype(o
, irt_toitype(t
));
640 /* Restore raw data from the trace exit state. */
641 static void snap_restoredata(GCtrace
*T
, ExitState
*ex
,
642 SnapNo snapno
, BloomFilter rfilt
,
643 IRRef ref
, void *dst
, CTSize sz
)
645 IRIns
*ir
= &T
->ir
[ref
];
649 if (irref_isk(ref
)) {
650 if (ir
->o
== IR_KNUM
|| ir
->o
== IR_KINT64
) {
651 src
= mref(ir
->ptr
, int32_t);
652 } else if (sz
== 8) {
653 tmp
= (uint64_t)(uint32_t)ir
->i
;
654 src
= (int32_t *)&tmp
;
659 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
660 rs
= snap_renameref(T
, snapno
, ref
, rs
);
661 if (ra_hasspill(regsp_spill(rs
))) {
662 src
= &ex
->spill
[regsp_spill(rs
)];
663 if (sz
== 8 && !irt_is64(ir
->t
)) {
664 tmp
= (uint64_t)(uint32_t)*src
;
665 src
= (int32_t *)&tmp
;
668 Reg r
= regsp_reg(rs
);
670 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
671 lua_assert(sz
== 8 && ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
672 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op1
, dst
, 4);
673 *(lua_Number
*)dst
= (lua_Number
)*(int32_t *)dst
;
676 src
= (int32_t *)&ex
->gpr
[r
-RID_MIN_GPR
];
678 if (r
>= RID_MAX_GPR
) {
679 src
= (int32_t *)&ex
->fpr
[r
-RID_MIN_FPR
];
681 if (sz
== 4) { /* PPC FPRs are always doubles. */
682 *(float *)dst
= (float)*(double *)src
;
686 if (LJ_BE
&& sz
== 4) src
++;
692 lua_assert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
693 if (sz
== 4) *(int32_t *)dst
= *src
;
694 else if (sz
== 8) *(int64_t *)dst
= *(int64_t *)src
;
695 else if (sz
== 1) *(int8_t *)dst
= (int8_t)*src
;
696 else *(int16_t *)dst
= (int16_t)*src
;
700 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
701 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
702 SnapNo snapno
, BloomFilter rfilt
,
703 IRIns
*ir
, TValue
*o
)
705 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
706 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
708 if (ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
) {
709 CTState
*cts
= ctype_ctsG(J2G(J
));
710 CTypeID id
= (CTypeID
)T
->ir
[ir
->op1
].i
;
711 CTSize sz
= lj_ctype_size(cts
, id
);
712 GCcdata
*cd
= lj_cdata_new(cts
, id
, sz
);
713 setcdataV(J
->L
, o
, cd
);
714 if (ir
->o
== IR_CNEWI
) {
715 uint8_t *p
= (uint8_t *)cdataptr(cd
);
716 lua_assert(sz
== 4 || sz
== 8);
717 if (LJ_32
&& sz
== 8 && ir
+1 < T
->ir
+ T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
718 snap_restoredata(T
, ex
, snapno
, rfilt
, (ir
+1)->op2
, LJ_LE
?p
+4:p
, 4);
722 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op2
, p
, sz
);
724 IRIns
*irs
, *irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
725 for (irs
= ir
+1; irs
< irlast
; irs
++)
726 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
727 IRIns
*iro
= &T
->ir
[T
->ir
[irs
->op1
].op2
];
728 uint8_t *p
= (uint8_t *)cd
;
730 lua_assert(irs
->o
== IR_XSTORE
&& T
->ir
[irs
->op1
].o
== IR_ADD
);
731 lua_assert(iro
->o
== IR_KINT
|| iro
->o
== IR_KINT64
);
732 if (irt_is64(irs
->t
)) szs
= 8;
733 else if (irt_isi8(irs
->t
) || irt_isu8(irs
->t
)) szs
= 1;
734 else if (irt_isi16(irs
->t
) || irt_isu16(irs
->t
)) szs
= 2;
736 if (LJ_64
&& iro
->o
== IR_KINT64
)
737 p
+= (int64_t)ir_k64(iro
)->u64
;
740 lua_assert(p
>= (uint8_t *)cdataptr(cd
) &&
741 p
+ szs
<= (uint8_t *)cdataptr(cd
) + sz
);
742 if (LJ_32
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
743 lua_assert(szs
== 4);
744 snap_restoredata(T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, LJ_LE
?p
+4:p
,4);
747 snap_restoredata(T
, ex
, snapno
, rfilt
, irs
->op2
, p
, szs
);
754 GCtab
*t
= ir
->o
== IR_TNEW
? lj_tab_new(J
->L
, ir
->op1
, ir
->op2
) :
755 lj_tab_dup(J
->L
, ir_ktab(&T
->ir
[ir
->op1
]));
757 irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
758 for (irs
= ir
+1; irs
< irlast
; irs
++)
759 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
760 IRIns
*irk
= &T
->ir
[irs
->op1
];
762 lua_assert(irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
763 irs
->o
== IR_FSTORE
);
764 if (irk
->o
== IR_FREF
) {
765 lua_assert(irk
->op2
== IRFL_TAB_META
);
766 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, &tmp
);
767 /* NOBARRIER: The table is new (marked white). */
768 setgcref(t
->metatable
, obj2gco(tabV(&tmp
)));
770 irk
= &T
->ir
[irk
->op2
];
771 if (irk
->o
== IR_KSLOT
) irk
= &T
->ir
[irk
->op1
];
772 lj_ir_kvalue(J
->L
, &tmp
, irk
);
773 val
= lj_tab_set(J
->L
, t
, &tmp
);
774 /* NOBARRIER: The table is new (marked white). */
775 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, val
);
776 if (LJ_SOFTFP
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
777 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, &tmp
);
778 val
->u32
.hi
= tmp
.u32
.lo
;
785 /* Restore interpreter state from exit state with the help of a snapshot. */
786 const BCIns
*lj_snap_restore(jit_State
*J
, void *exptr
)
788 ExitState
*ex
= (ExitState
*)exptr
;
789 SnapNo snapno
= J
->exitno
; /* For now, snapno == exitno. */
790 GCtrace
*T
= traceref(J
, J
->parent
);
791 SnapShot
*snap
= &T
->snap
[snapno
];
792 MSize n
, nent
= snap
->nent
;
793 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
794 SnapEntry
*flinks
= &T
->snapmap
[snap_nextofs(T
, snap
)-1];
797 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
798 const BCIns
*pc
= snap_pc(map
[nent
]);
801 /* Set interpreter PC to the next PC to get correct error messages. */
802 setcframe_pc(cframe_raw(L
->cframe
), pc
+1);
804 /* Make sure the stack is big enough for the slots from the snapshot. */
805 if (LJ_UNLIKELY(L
->base
+ snap
->topslot
>= tvref(L
->maxstack
))) {
806 L
->top
= curr_topL(L
);
807 lj_state_growstack(L
, snap
->topslot
- curr_proto(L
)->framesize
);
810 /* Fill stack slots with data from the registers and spill slots. */
812 ftsz0
= frame_ftsz(frame
); /* Preserve link to previous frame in slot #0. */
813 for (n
= 0; n
< nent
; n
++) {
814 SnapEntry sn
= map
[n
];
815 if (!(sn
& SNAP_NORESTORE
)) {
816 TValue
*o
= &frame
[snap_slot(sn
)];
817 IRRef ref
= snap_ref(sn
);
818 IRIns
*ir
= &T
->ir
[ref
];
819 if (ir
->r
== RID_SUNK
) {
821 for (j
= 0; j
< n
; j
++)
822 if (snap_ref(map
[j
]) == ref
) { /* De-duplicate sunk allocations. */
823 copyTV(L
, o
, &frame
[snap_slot(map
[j
])]);
826 snap_unsink(J
, T
, ex
, snapno
, rfilt
, ir
, o
);
830 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
, o
);
831 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
) && tvisint(o
)) {
833 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
+1, &tmp
);
834 o
->u32
.hi
= tmp
.u32
.lo
;
835 } else if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) {
836 /* Overwrite tag with frame link. */
837 o
->fr
.tp
.ftsz
= snap_slot(sn
) != 0 ? (int32_t)*flinks
-- : ftsz0
;
842 lua_assert(map
+ nent
== flinks
);
844 /* Compute current stack top. */
845 switch (bc_op(*pc
)) {
846 case BC_CALLM
: case BC_CALLMT
: case BC_RETM
: case BC_TSETM
:
847 L
->top
= frame
+ snap
->nslots
;
850 L
->top
= curr_topL(L
);