3 ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
23 #include "lj_target.h"
29 /* Some local macros to save typing. Undef'd at the end. */
30 #define IR(ref) (&J->cur.ir[(ref)])
32 /* Pass IR on to next optimization in chain (FOLD). */
33 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
35 /* Emit raw IR without passing through optimizations. */
36 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
38 /* -- Snapshot buffer allocation ------------------------------------------ */
40 /* Grow snapshot buffer. */
41 void lj_snap_grow_buf_(jit_State
*J
, MSize need
)
43 MSize maxsnap
= (MSize
)J
->param
[JIT_P_maxsnap
];
45 lj_trace_err(J
, LJ_TRERR_SNAPOV
);
46 lj_mem_growvec(J
->L
, J
->snapbuf
, J
->sizesnap
, maxsnap
, SnapShot
);
47 J
->cur
.snap
= J
->snapbuf
;
50 /* Grow snapshot map buffer. */
51 void lj_snap_grow_map_(jit_State
*J
, MSize need
)
53 if (need
< 2*J
->sizesnapmap
)
54 need
= 2*J
->sizesnapmap
;
57 J
->snapmapbuf
= (SnapEntry
*)lj_mem_realloc(J
->L
, J
->snapmapbuf
,
58 J
->sizesnapmap
*sizeof(SnapEntry
), need
*sizeof(SnapEntry
));
59 J
->cur
.snapmap
= J
->snapmapbuf
;
60 J
->sizesnapmap
= need
;
63 /* -- Snapshot generation ------------------------------------------------- */
65 /* Add all modified slots to the snapshot. */
66 static MSize
snapshot_slots(jit_State
*J
, SnapEntry
*map
, BCReg nslots
)
68 IRRef retf
= J
->chain
[IR_RETF
]; /* Limits SLOAD restore elimination. */
71 for (s
= 0; s
< nslots
; s
++) {
73 IRRef ref
= tref_ref(tr
);
75 SnapEntry sn
= SNAP_TR(s
, tr
);
77 if (!(sn
& (SNAP_CONT
|SNAP_FRAME
)) &&
78 ir
->o
== IR_SLOAD
&& ir
->op1
== s
&& ref
> retf
) {
79 /* No need to snapshot unmodified non-inherited slots. */
80 if (!(ir
->op2
& IRSLOAD_INHERIT
))
82 /* No need to restore readonly slots and unmodified non-parent slots. */
83 if (!(LJ_DUALNUM
&& (ir
->op2
& IRSLOAD_CONVERT
)) &&
84 (ir
->op2
& (IRSLOAD_READONLY
|IRSLOAD_PARENT
)) != IRSLOAD_PARENT
)
87 if (LJ_SOFTFP
&& irt_isnum(ir
->t
))
95 /* Add frame links at the end of the snapshot. */
96 static BCReg
snapshot_framelinks(jit_State
*J
, SnapEntry
*map
)
98 cTValue
*frame
= J
->L
->base
- 1;
99 cTValue
*lim
= J
->L
->base
- J
->baseslot
;
100 GCfunc
*fn
= frame_func(frame
);
101 cTValue
*ftop
= isluafunc(fn
) ? (frame
+funcproto(fn
)->framesize
) : J
->L
->top
;
103 map
[f
++] = SNAP_MKPC(J
->pc
); /* The current PC is always the first entry. */
104 while (frame
> lim
) { /* Backwards traversal of all frames above base. */
105 if (frame_islua(frame
)) {
106 map
[f
++] = SNAP_MKPC(frame_pc(frame
));
107 frame
= frame_prevl(frame
);
108 } else if (frame_iscont(frame
)) {
109 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
110 map
[f
++] = SNAP_MKPC(frame_contpc(frame
));
111 frame
= frame_prevd(frame
);
113 lua_assert(!frame_isc(frame
));
114 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
115 frame
= frame_prevd(frame
);
118 if (frame
+ funcproto(frame_func(frame
))->framesize
> ftop
)
119 ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
121 lua_assert(f
== (MSize
)(1 + J
->framedepth
));
122 return (BCReg
)(ftop
- lim
);
125 /* Take a snapshot of the current stack. */
126 static void snapshot_stack(jit_State
*J
, SnapShot
*snap
, MSize nsnapmap
)
128 BCReg nslots
= J
->baseslot
+ J
->maxslot
;
131 /* Conservative estimate. */
132 lj_snap_grow_map(J
, nsnapmap
+ nslots
+ (MSize
)J
->framedepth
+1);
133 p
= &J
->cur
.snapmap
[nsnapmap
];
134 nent
= snapshot_slots(J
, p
, nslots
);
135 snap
->topslot
= (uint8_t)snapshot_framelinks(J
, p
+ nent
);
136 snap
->mapofs
= (uint16_t)nsnapmap
;
137 snap
->ref
= (IRRef1
)J
->cur
.nins
;
138 snap
->nent
= (uint8_t)nent
;
139 snap
->nslots
= (uint8_t)nslots
;
141 J
->cur
.nsnapmap
= (uint16_t)(nsnapmap
+ nent
+ 1 + J
->framedepth
);
144 /* Add or merge a snapshot. */
145 void lj_snap_add(jit_State
*J
)
147 MSize nsnap
= J
->cur
.nsnap
;
148 MSize nsnapmap
= J
->cur
.nsnapmap
;
149 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
150 if (J
->mergesnap
? !irt_isguard(J
->guardemit
) :
151 (nsnap
> 0 && J
->cur
.snap
[nsnap
-1].ref
== J
->cur
.nins
)) {
152 if (nsnap
== 1) { /* But preserve snap #0 PC. */
153 emitir_raw(IRT(IR_NOP
, IRT_NIL
), 0, 0);
156 nsnapmap
= J
->cur
.snap
[--nsnap
].mapofs
;
159 lj_snap_grow_buf(J
, nsnap
+1);
160 J
->cur
.nsnap
= (uint16_t)(nsnap
+1);
163 J
->guardemit
.irt
= 0;
164 snapshot_stack(J
, &J
->cur
.snap
[nsnap
], nsnapmap
);
167 /* -- Snapshot modification ----------------------------------------------- */
169 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
171 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
172 static BCReg
snap_usedef(jit_State
*J
, uint8_t *udf
,
173 const BCIns
*pc
, BCReg maxslot
)
178 if (maxslot
== 0) return 0;
179 #ifdef LUAJIT_USE_VALGRIND
180 /* Avoid errors for harmless reads beyond maxslot. */
181 memset(udf
, 1, SNAP_USEDEF_SLOTS
);
183 memset(udf
, 1, maxslot
);
186 /* Treat open upvalues as used. */
187 o
= gcref(J
->L
->openupval
);
189 if (uvval(gco2uv(o
)) < J
->L
->base
) break;
190 udf
[uvval(gco2uv(o
)) - J
->L
->base
] = 0;
191 o
= gcref(o
->gch
.nextgc
);
194 #define USE_SLOT(s) udf[(s)] &= ~1
195 #define DEF_SLOT(s) udf[(s)] *= 3
197 /* Scan through following bytecode and check for uses/defs. */
198 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
201 BCOp op
= bc_op(ins
);
202 switch (bcmode_b(op
)) {
203 case BCMvar
: USE_SLOT(bc_b(ins
)); break;
206 switch (bcmode_c(op
)) {
207 case BCMvar
: USE_SLOT(bc_c(ins
)); break;
209 lua_assert(op
== BC_CAT
);
210 for (s
= bc_b(ins
); s
<= bc_c(ins
); s
++) USE_SLOT(s
);
211 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
215 BCReg minslot
= bc_a(ins
);
216 if (op
>= BC_FORI
&& op
<= BC_JFORL
) minslot
+= FORL_EXT
;
217 else if (op
>= BC_ITERL
&& op
<= BC_JITERL
) minslot
+= bc_b(pc
[-2])-1;
218 else if (op
== BC_UCLO
) { pc
+= bc_j(ins
); break; }
219 for (s
= minslot
; s
< maxslot
; s
++) DEF_SLOT(s
);
220 return minslot
< maxslot
? minslot
: maxslot
;
223 if (op
== BC_JFORL
|| op
== BC_JITERL
|| op
== BC_JLOOP
) {
225 } else if (bc_isret(op
)) {
226 BCReg top
= op
== BC_RETM
? maxslot
: (bc_a(ins
) + bc_d(ins
)-1);
227 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
228 for (; s
< top
; s
++) USE_SLOT(s
);
229 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
233 case BCMfunc
: return maxslot
; /* NYI: will abort, anyway. */
236 switch (bcmode_a(op
)) {
237 case BCMvar
: USE_SLOT(bc_a(ins
)); break;
239 if (!(op
== BC_ISTC
|| op
== BC_ISFC
)) DEF_SLOT(bc_a(ins
));
242 if (op
>= BC_CALLM
&& op
<= BC_VARG
) {
243 BCReg top
= (op
== BC_CALLM
|| op
== BC_CALLMT
|| bc_c(ins
) == 0) ?
244 maxslot
: (bc_a(ins
) + bc_c(ins
));
245 s
= bc_a(ins
) - ((op
== BC_ITERC
|| op
== BC_ITERN
) ? 3 : 0);
246 for (; s
< top
; s
++) USE_SLOT(s
);
247 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
248 if (op
== BC_CALLT
|| op
== BC_CALLMT
) {
249 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
252 } else if (op
== BC_KNIL
) {
253 for (s
= bc_a(ins
); s
<= bc_d(ins
); s
++) DEF_SLOT(s
);
254 } else if (op
== BC_TSETM
) {
255 for (s
= bc_a(ins
)-1; s
< maxslot
; s
++) USE_SLOT(s
);
260 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
266 return 0; /* unreachable */
269 /* Purge dead slots before the next snapshot. */
270 void lj_snap_purge(jit_State
*J
)
272 uint8_t udf
[SNAP_USEDEF_SLOTS
];
273 BCReg maxslot
= J
->maxslot
;
274 BCReg s
= snap_usedef(J
, udf
, J
->pc
, maxslot
);
275 for (; s
< maxslot
; s
++)
277 J
->base
[s
] = 0; /* Purge dead slots. */
280 /* Shrink last snapshot. */
281 void lj_snap_shrink(jit_State
*J
)
283 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
284 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
285 MSize n
, m
, nlim
, nent
= snap
->nent
;
286 uint8_t udf
[SNAP_USEDEF_SLOTS
];
287 BCReg maxslot
= J
->maxslot
;
288 BCReg minslot
= snap_usedef(J
, udf
, snap_pc(map
[nent
]), maxslot
);
289 BCReg baseslot
= J
->baseslot
;
292 snap
->nslots
= (uint8_t)maxslot
;
293 for (n
= m
= 0; n
< nent
; n
++) { /* Remove unused slots from snapshot. */
294 BCReg s
= snap_slot(map
[n
]);
295 if (s
< minslot
|| (s
< maxslot
&& udf
[s
-baseslot
] == 0))
296 map
[m
++] = map
[n
]; /* Only copy used slots. */
298 snap
->nent
= (uint8_t)m
;
299 nlim
= J
->cur
.nsnapmap
- snap
->mapofs
- 1;
300 while (n
<= nlim
) map
[m
++] = map
[n
++]; /* Move PC + frame links down. */
301 J
->cur
.nsnapmap
= (uint16_t)(snap
->mapofs
+ m
); /* Free up space in map. */
304 /* -- Snapshot access ----------------------------------------------------- */
306 /* Initialize a Bloom Filter with all renamed refs.
307 ** There are very few renames (often none), so the filter has
308 ** very few bits set. This makes it suitable for negative filtering.
310 static BloomFilter
snap_renamefilter(GCtrace
*T
, SnapNo lim
)
312 BloomFilter rfilt
= 0;
314 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
316 bloomset(rfilt
, ir
->op1
);
320 /* Process matching renames to find the original RegSP. */
321 static RegSP
snap_renameref(GCtrace
*T
, SnapNo lim
, IRRef ref
, RegSP rs
)
324 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
325 if (ir
->op1
== ref
&& ir
->op2
<= lim
)
330 /* Copy RegSP from parent snapshot to the parent links of the IR. */
331 IRIns
*lj_snap_regspmap(GCtrace
*T
, SnapNo snapno
, IRIns
*ir
)
333 SnapShot
*snap
= &T
->snap
[snapno
];
334 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
335 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
340 if (ir
->o
== IR_SLOAD
) {
341 if (!(ir
->op2
& IRSLOAD_PARENT
)) break;
343 lua_assert(n
< snap
->nent
);
344 if (snap_slot(map
[n
]) == ir
->op1
) {
345 ref
= snap_ref(map
[n
++]);
349 } else if (LJ_SOFTFP
&& ir
->o
== IR_HIOP
) {
351 } else if (ir
->o
== IR_PVAL
) {
352 ref
= ir
->op1
+ REF_BIAS
;
356 rs
= T
->ir
[ref
].prev
;
357 if (bloomtest(rfilt
, ref
))
358 rs
= snap_renameref(T
, snapno
, ref
, rs
);
359 ir
->prev
= (uint16_t)rs
;
360 lua_assert(regsp_used(rs
));
365 /* -- Snapshot replay ----------------------------------------------------- */
367 /* Replay constant from parent trace. */
368 static TRef
snap_replay_const(jit_State
*J
, IRIns
*ir
)
370 /* Only have to deal with constants that can occur in stack slots. */
371 switch ((IROp
)ir
->o
) {
372 case IR_KPRI
: return TREF_PRI(irt_type(ir
->t
));
373 case IR_KINT
: return lj_ir_kint(J
, ir
->i
);
374 case IR_KGC
: return lj_ir_kgc(J
, ir_kgc(ir
), irt_t(ir
->t
));
375 case IR_KNUM
: return lj_ir_k64(J
, IR_KNUM
, ir_knum(ir
));
376 case IR_KINT64
: return lj_ir_k64(J
, IR_KINT64
, ir_kint64(ir
));
377 case IR_KPTR
: return lj_ir_kptr(J
, ir_kptr(ir
)); /* Continuation. */
378 default: lua_assert(0); return TREF_NIL
; break;
382 /* De-duplicate parent reference. */
383 static TRef
snap_dedup(jit_State
*J
, SnapEntry
*map
, MSize nmax
, IRRef ref
)
386 for (j
= 0; j
< nmax
; j
++)
387 if (snap_ref(map
[j
]) == ref
)
388 return J
->slot
[snap_slot(map
[j
])] & ~(SNAP_CONT
|SNAP_FRAME
);
392 /* Emit parent reference with de-duplication. */
393 static TRef
snap_pref(jit_State
*J
, GCtrace
*T
, SnapEntry
*map
, MSize nmax
,
394 BloomFilter seen
, IRRef ref
)
396 IRIns
*ir
= &T
->ir
[ref
];
399 tr
= snap_replay_const(J
, ir
);
400 else if (!regsp_used(ir
->prev
))
402 else if (!bloomtest(seen
, ref
) || (tr
= snap_dedup(J
, map
, nmax
, ref
)) == 0)
403 tr
= emitir(IRT(IR_PVAL
, irt_type(ir
->t
)), ref
- REF_BIAS
, 0);
407 /* Check whether a sunk store corresponds to an allocation. Slow path. */
408 static int snap_sunk_store2(jit_State
*J
, IRIns
*ira
, IRIns
*irs
)
410 if (irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
411 irs
->o
== IR_FSTORE
|| irs
->o
== IR_XSTORE
) {
412 IRIns
*irk
= IR(irs
->op1
);
413 if (irk
->o
== IR_AREF
|| irk
->o
== IR_HREFK
)
415 return (IR(irk
->op1
) == ira
);
420 /* Check whether a sunk store corresponds to an allocation. Fast path. */
421 static LJ_AINLINE
int snap_sunk_store(jit_State
*J
, IRIns
*ira
, IRIns
*irs
)
424 return (ira
+ irs
->s
== irs
); /* Fast check. */
425 return snap_sunk_store2(J
, ira
, irs
);
428 /* Replay snapshot state to setup side trace. */
429 void lj_snap_replay(jit_State
*J
, GCtrace
*T
)
431 SnapShot
*snap
= &T
->snap
[J
->exitno
];
432 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
433 MSize n
, nent
= snap
->nent
;
434 BloomFilter seen
= 0;
437 /* Emit IR for slots inherited from parent snapshot. */
438 for (n
= 0; n
< nent
; n
++) {
439 SnapEntry sn
= map
[n
];
440 BCReg s
= snap_slot(sn
);
441 IRRef ref
= snap_ref(sn
);
442 IRIns
*ir
= &T
->ir
[ref
];
444 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
445 if (bloomtest(seen
, ref
) && (tr
= snap_dedup(J
, map
, n
, ref
)) != 0)
448 if (irref_isk(ref
)) {
449 tr
= snap_replay_const(J
, ir
);
450 } else if (!regsp_used(ir
->prev
)) {
455 IRType t
= irt_type(ir
->t
);
456 uint32_t mode
= IRSLOAD_INHERIT
|IRSLOAD_PARENT
;
457 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) t
= IRT_NUM
;
458 if (ir
->o
== IR_SLOAD
) mode
|= (ir
->op2
& IRSLOAD_READONLY
);
459 tr
= emitir_raw(IRT(IR_SLOAD
, t
), s
, mode
);
462 J
->slot
[s
] = tr
| (sn
&(SNAP_CONT
|SNAP_FRAME
)); /* Same as TREF_* flags. */
463 J
->framedepth
+= ((sn
& (SNAP_CONT
|SNAP_FRAME
)) && s
);
464 if ((sn
& SNAP_FRAME
))
468 IRIns
*irlast
= &T
->ir
[snap
->ref
];
470 /* Emit dependent PVALs. */
471 for (n
= 0; n
< nent
; n
++) {
472 SnapEntry sn
= map
[n
];
473 IRRef refp
= snap_ref(sn
);
474 IRIns
*ir
= &T
->ir
[refp
];
475 if (regsp_reg(ir
->r
) == RID_SUNK
) {
476 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) continue;
478 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
479 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
480 if (ir
->op1
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
481 if (ir
->op2
>= T
->nk
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op2
);
482 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
483 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
)
484 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
);
487 for (irs
= ir
+1; irs
< irlast
; irs
++)
488 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
489 if (snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
) == 0)
490 snap_pref(J
, T
, map
, nent
, seen
, T
->ir
[irs
->op2
].op1
);
491 else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
492 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
)
493 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
);
496 } else if (!irref_isk(refp
) && !regsp_used(ir
->prev
)) {
497 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
498 J
->slot
[snap_slot(sn
)] = snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
501 /* Replay sunk instructions. */
502 for (n
= 0; pass23
&& n
< nent
; n
++) {
503 SnapEntry sn
= map
[n
];
504 IRRef refp
= snap_ref(sn
);
505 IRIns
*ir
= &T
->ir
[refp
];
506 if (regsp_reg(ir
->r
) == RID_SUNK
) {
508 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) { /* De-dup allocs. */
509 J
->slot
[snap_slot(sn
)] = J
->slot
[J
->slot
[snap_slot(sn
)]];
513 if (op1
>= T
->nk
) op1
= snap_pref(J
, T
, map
, nent
, seen
, op1
);
515 if (op2
>= T
->nk
) op2
= snap_pref(J
, T
, map
, nent
, seen
, op2
);
516 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
517 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
518 lj_needsplit(J
); /* Emit joining HIOP. */
519 op2
= emitir_raw(IRT(IR_HIOP
, IRT_I64
), op2
,
520 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
));
522 J
->slot
[snap_slot(sn
)] = emitir(ir
->ot
, op1
, op2
);
525 TRef tr
= emitir(ir
->ot
, op1
, op2
);
526 J
->slot
[snap_slot(sn
)] = tr
;
527 for (irs
= ir
+1; irs
< irlast
; irs
++)
528 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
529 IRIns
*irr
= &T
->ir
[irs
->op1
];
530 TRef val
, key
= irr
->op2
, tmp
= tr
;
531 if (irr
->o
!= IR_FREF
) {
532 IRIns
*irk
= &T
->ir
[key
];
533 if (irr
->o
== IR_HREFK
)
534 key
= lj_ir_kslot(J
, snap_replay_const(J
, &T
->ir
[irk
->op1
]),
537 key
= snap_replay_const(J
, irk
);
538 if (irr
->o
== IR_HREFK
|| irr
->o
== IR_AREF
) {
539 IRIns
*irf
= &T
->ir
[irr
->op1
];
540 tmp
= emitir(irf
->ot
, tmp
, irf
->op2
);
543 tmp
= emitir(irr
->ot
, tmp
, key
);
544 val
= snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
);
546 IRIns
*irc
= &T
->ir
[irs
->op2
];
547 lua_assert(irc
->o
== IR_CONV
&& irc
->op2
== IRCONV_NUM_INT
);
548 val
= snap_pref(J
, T
, map
, nent
, seen
, irc
->op1
);
549 val
= emitir(IRTN(IR_CONV
), val
, IRCONV_NUM_INT
);
550 } else if ((LJ_SOFTFP
|| (LJ_32
&& LJ_HASFFI
)) &&
551 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
) {
553 if (LJ_SOFTFP
&& irt_type((irs
+1)->t
) == IRT_SOFTFP
)
556 if (irref_isk(irs
->op2
) && irref_isk((irs
+1)->op2
)) {
557 uint64_t k
= (uint32_t)T
->ir
[irs
->op2
].i
+
558 ((uint64_t)T
->ir
[(irs
+1)->op2
].i
<< 32);
559 val
= lj_ir_k64(J
, t
== IRT_I64
? IR_KINT64
: IR_KNUM
,
560 lj_ir_k64_find(J
, k
));
562 val
= emitir_raw(IRT(IR_HIOP
, t
), val
,
563 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
));
565 tmp
= emitir(IRT(irs
->o
, t
), tmp
, val
);
568 tmp
= emitir(irs
->ot
, tmp
, val
);
569 } else if (LJ_HASFFI
&& irs
->o
== IR_XBAR
&& ir
->o
== IR_CNEW
) {
570 emitir(IRT(IR_XBAR
, IRT_NIL
), 0, 0);
576 J
->base
= J
->slot
+ J
->baseslot
;
577 J
->maxslot
= snap
->nslots
- J
->baseslot
;
579 if (pass23
) /* Need explicit GC step _after_ initial snapshot. */
580 emitir_raw(IRTG(IR_GCSTEP
, IRT_NIL
), 0, 0);
583 /* -- Snapshot restore ---------------------------------------------------- */
585 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
586 SnapNo snapno
, BloomFilter rfilt
,
587 IRIns
*ir
, TValue
*o
);
589 /* Restore a value from the trace exit state. */
590 static void snap_restoreval(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
591 SnapNo snapno
, BloomFilter rfilt
,
592 IRRef ref
, TValue
*o
)
594 IRIns
*ir
= &T
->ir
[ref
];
597 if (irref_isk(ref
)) { /* Restore constant slot. */
598 lj_ir_kvalue(J
->L
, o
, ir
);
601 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
602 rs
= snap_renameref(T
, snapno
, ref
, rs
);
603 if (ra_hasspill(regsp_spill(rs
))) { /* Restore from spill slot. */
604 int32_t *sps
= &ex
->spill
[regsp_spill(rs
)];
605 if (irt_isinteger(t
)) {
608 } else if (irt_isnum(t
)) {
609 o
->u64
= *(uint64_t *)sps
;
611 } else if (LJ_64
&& irt_islightud(t
)) {
612 /* 64 bit lightuserdata which may escape already has the tag bits. */
613 o
->u64
= *(uint64_t *)sps
;
615 lua_assert(!irt_ispri(t
)); /* PRI refs never have a spill slot. */
616 setgcrefi(o
->gcr
, *sps
);
617 setitype(o
, irt_toitype(t
));
619 } else { /* Restore from register. */
620 Reg r
= regsp_reg(rs
);
622 lua_assert(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
623 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ir
->op1
, o
);
624 if (LJ_DUALNUM
) setnumV(o
, (lua_Number
)intV(o
));
626 } else if (irt_isinteger(t
)) {
627 setintV(o
, (int32_t)ex
->gpr
[r
-RID_MIN_GPR
]);
629 } else if (irt_isnum(t
)) {
630 setnumV(o
, ex
->fpr
[r
-RID_MIN_FPR
]);
632 } else if (LJ_64
&& irt_islightud(t
)) {
633 /* 64 bit lightuserdata which may escape already has the tag bits. */
634 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
637 setgcrefi(o
->gcr
, ex
->gpr
[r
-RID_MIN_GPR
]);
638 setitype(o
, irt_toitype(t
));
644 /* Restore raw data from the trace exit state. */
645 static void snap_restoredata(GCtrace
*T
, ExitState
*ex
,
646 SnapNo snapno
, BloomFilter rfilt
,
647 IRRef ref
, void *dst
, CTSize sz
)
649 IRIns
*ir
= &T
->ir
[ref
];
653 if (irref_isk(ref
)) {
654 if (ir
->o
== IR_KNUM
|| ir
->o
== IR_KINT64
) {
655 src
= mref(ir
->ptr
, int32_t);
656 } else if (sz
== 8) {
657 tmp
= (uint64_t)(uint32_t)ir
->i
;
658 src
= (int32_t *)&tmp
;
663 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
664 rs
= snap_renameref(T
, snapno
, ref
, rs
);
665 if (ra_hasspill(regsp_spill(rs
))) {
666 src
= &ex
->spill
[regsp_spill(rs
)];
667 if (sz
== 8 && !irt_is64(ir
->t
)) {
668 tmp
= (uint64_t)(uint32_t)*src
;
669 src
= (int32_t *)&tmp
;
672 Reg r
= regsp_reg(rs
);
674 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
675 lua_assert(sz
== 8 && ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
);
676 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op1
, dst
, 4);
677 *(lua_Number
*)dst
= (lua_Number
)*(int32_t *)dst
;
680 src
= (int32_t *)&ex
->gpr
[r
-RID_MIN_GPR
];
682 if (r
>= RID_MAX_GPR
) {
683 src
= (int32_t *)&ex
->fpr
[r
-RID_MIN_FPR
];
685 if (sz
== 4) { /* PPC FPRs are always doubles. */
686 *(float *)dst
= (float)*(double *)src
;
690 if (LJ_BE
&& sz
== 4) src
++;
696 lua_assert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
697 if (sz
== 4) *(int32_t *)dst
= *src
;
698 else if (sz
== 8) *(int64_t *)dst
= *(int64_t *)src
;
699 else if (sz
== 1) *(int8_t *)dst
= (int8_t)*src
;
700 else *(int16_t *)dst
= (int16_t)*src
;
704 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
705 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
706 SnapNo snapno
, BloomFilter rfilt
,
707 IRIns
*ir
, TValue
*o
)
709 lua_assert(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
710 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
);
712 if (ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
) {
713 CTState
*cts
= ctype_cts(J
->L
);
714 CTypeID id
= (CTypeID
)T
->ir
[ir
->op1
].i
;
715 CTSize sz
= lj_ctype_size(cts
, id
);
716 GCcdata
*cd
= lj_cdata_new(cts
, id
, sz
);
717 setcdataV(J
->L
, o
, cd
);
718 if (ir
->o
== IR_CNEWI
) {
719 uint8_t *p
= (uint8_t *)cdataptr(cd
);
720 lua_assert(sz
== 4 || sz
== 8);
721 if (LJ_32
&& sz
== 8 && ir
+1 < T
->ir
+ T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
722 snap_restoredata(T
, ex
, snapno
, rfilt
, (ir
+1)->op2
, LJ_LE
?p
+4:p
, 4);
726 snap_restoredata(T
, ex
, snapno
, rfilt
, ir
->op2
, p
, sz
);
728 IRIns
*irs
, *irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
729 for (irs
= ir
+1; irs
< irlast
; irs
++)
730 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
731 IRIns
*iro
= &T
->ir
[T
->ir
[irs
->op1
].op2
];
732 uint8_t *p
= (uint8_t *)cd
;
734 lua_assert(irs
->o
== IR_XSTORE
&& T
->ir
[irs
->op1
].o
== IR_ADD
);
735 lua_assert(iro
->o
== IR_KINT
|| iro
->o
== IR_KINT64
);
736 if (irt_is64(irs
->t
)) szs
= 8;
737 else if (irt_isi8(irs
->t
) || irt_isu8(irs
->t
)) szs
= 1;
738 else if (irt_isi16(irs
->t
) || irt_isu16(irs
->t
)) szs
= 2;
740 if (LJ_64
&& iro
->o
== IR_KINT64
)
741 p
+= (int64_t)ir_k64(iro
)->u64
;
744 lua_assert(p
>= (uint8_t *)cdataptr(cd
) &&
745 p
+ szs
<= (uint8_t *)cdataptr(cd
) + sz
);
746 if (LJ_32
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
747 lua_assert(szs
== 4);
748 snap_restoredata(T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, LJ_LE
?p
+4:p
,4);
751 snap_restoredata(T
, ex
, snapno
, rfilt
, irs
->op2
, p
, szs
);
758 GCtab
*t
= ir
->o
== IR_TNEW
? lj_tab_new(J
->L
, ir
->op1
, ir
->op2
) :
759 lj_tab_dup(J
->L
, ir_ktab(&T
->ir
[ir
->op1
]));
761 irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
762 for (irs
= ir
+1; irs
< irlast
; irs
++)
763 if (irs
->r
== RID_SINK
&& snap_sunk_store(J
, ir
, irs
)) {
764 IRIns
*irk
= &T
->ir
[irs
->op1
];
766 lua_assert(irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
767 irs
->o
== IR_FSTORE
);
768 if (irk
->o
== IR_FREF
) {
769 lua_assert(irk
->op2
== IRFL_TAB_META
);
770 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, &tmp
);
771 /* NOBARRIER: The table is new (marked white). */
772 setgcref(t
->metatable
, obj2gco(tabV(&tmp
)));
774 irk
= &T
->ir
[irk
->op2
];
775 if (irk
->o
== IR_KSLOT
) irk
= &T
->ir
[irk
->op1
];
776 lj_ir_kvalue(J
->L
, &tmp
, irk
);
777 val
= lj_tab_set(J
->L
, t
, &tmp
);
778 /* NOBARRIER: The table is new (marked white). */
779 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, val
);
780 if (LJ_SOFTFP
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
781 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, &tmp
);
782 val
->u32
.hi
= tmp
.u32
.lo
;
789 /* Restore interpreter state from exit state with the help of a snapshot. */
790 const BCIns
*lj_snap_restore(jit_State
*J
, void *exptr
)
792 ExitState
*ex
= (ExitState
*)exptr
;
793 SnapNo snapno
= J
->exitno
; /* For now, snapno == exitno. */
794 GCtrace
*T
= traceref(J
, J
->parent
);
795 SnapShot
*snap
= &T
->snap
[snapno
];
796 MSize n
, nent
= snap
->nent
;
797 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
798 SnapEntry
*flinks
= &T
->snapmap
[snap_nextofs(T
, snap
)-1];
801 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
802 const BCIns
*pc
= snap_pc(map
[nent
]);
805 /* Set interpreter PC to the next PC to get correct error messages. */
806 setcframe_pc(cframe_raw(L
->cframe
), pc
+1);
808 /* Make sure the stack is big enough for the slots from the snapshot. */
809 if (LJ_UNLIKELY(L
->base
+ snap
->topslot
>= tvref(L
->maxstack
))) {
810 L
->top
= curr_topL(L
);
811 lj_state_growstack(L
, snap
->topslot
- curr_proto(L
)->framesize
);
814 /* Fill stack slots with data from the registers and spill slots. */
816 ftsz0
= frame_ftsz(frame
); /* Preserve link to previous frame in slot #0. */
817 for (n
= 0; n
< nent
; n
++) {
818 SnapEntry sn
= map
[n
];
819 if (!(sn
& SNAP_NORESTORE
)) {
820 TValue
*o
= &frame
[snap_slot(sn
)];
821 IRRef ref
= snap_ref(sn
);
822 IRIns
*ir
= &T
->ir
[ref
];
823 if (ir
->r
== RID_SUNK
) {
825 for (j
= 0; j
< n
; j
++)
826 if (snap_ref(map
[j
]) == ref
) { /* De-duplicate sunk allocations. */
827 copyTV(L
, o
, &frame
[snap_slot(map
[j
])]);
830 snap_unsink(J
, T
, ex
, snapno
, rfilt
, ir
, o
);
834 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
, o
);
835 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
) && tvisint(o
)) {
837 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
+1, &tmp
);
838 o
->u32
.hi
= tmp
.u32
.lo
;
839 } else if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) {
840 /* Overwrite tag with frame link. */
841 o
->fr
.tp
.ftsz
= snap_slot(sn
) != 0 ? (int32_t)*flinks
-- : ftsz0
;
846 lua_assert(map
+ nent
== flinks
);
848 /* Compute current stack top. */
849 switch (bc_op(*pc
)) {
851 if (bc_op(*pc
) < BC_FUNCF
) {
852 L
->top
= curr_topL(L
);
856 case BC_CALLM
: case BC_CALLMT
: case BC_RETM
: case BC_TSETM
:
857 L
->top
= frame
+ snap
->nslots
;