3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
22 #include "lj_target.h"
24 /* Some local macros to save typing. Undef'd at the end. */
25 #define IR(ref) (&J->cur.ir[(ref)])
27 /* -- Snapshot buffer allocation ------------------------------------------ */
29 /* Grow snapshot buffer. */
30 void lj_snap_grow_buf_(jit_State
*J
, MSize need
)
32 MSize maxsnap
= (MSize
)J
->param
[JIT_P_maxsnap
];
34 lj_trace_err(J
, LJ_TRERR_SNAPOV
);
35 lj_mem_growvec(J
->L
, J
->snapbuf
, J
->sizesnap
, maxsnap
, SnapShot
);
36 J
->cur
.snap
= J
->snapbuf
;
39 /* Grow snapshot map buffer. */
40 void lj_snap_grow_map_(jit_State
*J
, MSize need
)
42 if (need
< 2*J
->sizesnapmap
)
43 need
= 2*J
->sizesnapmap
;
46 J
->snapmapbuf
= (SnapEntry
*)lj_mem_realloc(J
->L
, J
->snapmapbuf
,
47 J
->sizesnapmap
*sizeof(SnapEntry
), need
*sizeof(SnapEntry
));
48 J
->cur
.snapmap
= J
->snapmapbuf
;
49 J
->sizesnapmap
= need
;
52 /* -- Snapshot generation ------------------------------------------------- */
54 /* Add all modified slots to the snapshot. */
55 static MSize
snapshot_slots(jit_State
*J
, SnapEntry
*map
, BCReg nslots
)
57 IRRef retf
= J
->chain
[IR_RETF
]; /* Limits SLOAD restore elimination. */
60 for (s
= 0; s
< nslots
; s
++) {
62 IRRef ref
= tref_ref(tr
);
64 SnapEntry sn
= SNAP_TR(s
, tr
);
66 if (!(sn
& (SNAP_CONT
|SNAP_FRAME
)) &&
67 ir
->o
== IR_SLOAD
&& ir
->op1
== s
&& ref
> retf
) {
68 /* No need to snapshot unmodified non-inherited slots. */
69 if (!(ir
->op2
& IRSLOAD_INHERIT
))
71 /* No need to restore readonly slots and unmodified non-parent slots. */
72 if (!(LJ_DUALNUM
&& (ir
->op2
& IRSLOAD_CONVERT
)) &&
73 (ir
->op2
& (IRSLOAD_READONLY
|IRSLOAD_PARENT
)) != IRSLOAD_PARENT
)
76 if (LJ_SOFTFP
&& irt_isnum(ir
->t
))
84 /* Add frame links at the end of the snapshot. */
85 static BCReg
snapshot_framelinks(jit_State
*J
, SnapEntry
*map
)
87 cTValue
*frame
= J
->L
->base
- 1;
88 cTValue
*lim
= J
->L
->base
- J
->baseslot
;
89 cTValue
*ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
91 map
[f
++] = SNAP_MKPC(J
->pc
); /* The current PC is always the first entry. */
92 while (frame
> lim
) { /* Backwards traversal of all frames above base. */
93 if (frame_islua(frame
)) {
94 map
[f
++] = SNAP_MKPC(frame_pc(frame
));
95 frame
= frame_prevl(frame
);
96 if (frame
+ funcproto(frame_func(frame
))->framesize
> ftop
)
97 ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
98 } else if (frame_iscont(frame
)) {
99 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
100 map
[f
++] = SNAP_MKPC(frame_contpc(frame
));
101 frame
= frame_prevd(frame
);
103 lua_assert(!frame_isc(frame
));
104 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
105 frame
= frame_prevd(frame
);
108 lua_assert(f
== (MSize
)(1 + J
->framedepth
));
109 return (BCReg
)(ftop
- lim
);
112 /* Take a snapshot of the current stack. */
113 static void snapshot_stack(jit_State
*J
, SnapShot
*snap
, MSize nsnapmap
)
115 BCReg nslots
= J
->baseslot
+ J
->maxslot
;
118 /* Conservative estimate. */
119 lj_snap_grow_map(J
, nsnapmap
+ nslots
+ (MSize
)J
->framedepth
+1);
120 p
= &J
->cur
.snapmap
[nsnapmap
];
121 nent
= snapshot_slots(J
, p
, nslots
);
122 snap
->topslot
= (uint8_t)snapshot_framelinks(J
, p
+ nent
);
123 snap
->mapofs
= (uint16_t)nsnapmap
;
124 snap
->ref
= (IRRef1
)J
->cur
.nins
;
125 snap
->nent
= (uint8_t)nent
;
126 snap
->nslots
= (uint8_t)nslots
;
128 J
->cur
.nsnapmap
= (uint16_t)(nsnapmap
+ nent
+ 1 + J
->framedepth
);
131 /* Add or merge a snapshot. */
132 void lj_snap_add(jit_State
*J
)
134 MSize nsnap
= J
->cur
.nsnap
;
135 MSize nsnapmap
= J
->cur
.nsnapmap
;
136 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
137 if (J
->mergesnap
? !irt_isguard(J
->guardemit
) :
138 (nsnap
> 0 && J
->cur
.snap
[nsnap
-1].ref
== J
->cur
.nins
)) {
139 nsnapmap
= J
->cur
.snap
[--nsnap
].mapofs
;
141 lj_snap_grow_buf(J
, nsnap
+1);
142 J
->cur
.nsnap
= (uint16_t)(nsnap
+1);
145 J
->guardemit
.irt
= 0;
146 snapshot_stack(J
, &J
->cur
.snap
[nsnap
], nsnapmap
);
149 /* -- Snapshot modification ----------------------------------------------- */
151 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
153 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
154 static BCReg
snap_usedef(jit_State
*J
, uint8_t *udf
,
155 const BCIns
*pc
, BCReg maxslot
)
160 if (maxslot
== 0) return 0;
161 #ifdef LUAJIT_USE_VALGRIND
162 /* Avoid errors for harmless reads beyond maxslot. */
163 memset(udf
, 1, SNAP_USEDEF_SLOTS
);
165 memset(udf
, 1, maxslot
);
168 /* Treat open upvalues as used. */
169 o
= gcref(J
->L
->openupval
);
171 if (uvval(gco2uv(o
)) < J
->L
->base
) break;
172 udf
[uvval(gco2uv(o
)) - J
->L
->base
] = 0;
173 o
= gcref(o
->gch
.nextgc
);
176 #define USE_SLOT(s) udf[(s)] &= ~1
177 #define DEF_SLOT(s) udf[(s)] *= 3
179 /* Scan through following bytecode and check for uses/defs. */
180 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
183 BCOp op
= bc_op(ins
);
184 switch (bcmode_b(op
)) {
185 case BCMvar
: USE_SLOT(bc_b(ins
)); break;
188 switch (bcmode_c(op
)) {
189 case BCMvar
: USE_SLOT(bc_c(ins
)); break;
191 lua_assert(op
== BC_CAT
);
192 for (s
= bc_b(ins
); s
<= bc_c(ins
); s
++) USE_SLOT(s
);
193 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
197 BCReg minslot
= bc_a(ins
);
198 if (op
>= BC_FORI
&& op
<= BC_JFORL
) minslot
+= FORL_EXT
;
199 else if (op
>= BC_ITERL
&& op
<= BC_JITERL
) minslot
+= bc_b(pc
[-2])-1;
200 else if (op
== BC_UCLO
) { pc
+= bc_j(ins
); break; }
201 for (s
= minslot
; s
< maxslot
; s
++) DEF_SLOT(s
);
202 return minslot
< maxslot
? minslot
: maxslot
;
205 if (op
== BC_JFORL
|| op
== BC_JITERL
|| op
== BC_JLOOP
) {
207 } else if (bc_isret(op
)) {
208 BCReg top
= op
== BC_RETM
? maxslot
: (bc_a(ins
) + bc_d(ins
)-1);
209 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
210 for (; s
< top
; s
++) USE_SLOT(s
);
211 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
215 case BCMfunc
: return maxslot
; /* NYI: will abort, anyway. */
218 switch (bcmode_a(op
)) {
219 case BCMvar
: USE_SLOT(bc_a(ins
)); break;
221 if (!(op
== BC_ISTC
|| op
== BC_ISFC
)) DEF_SLOT(bc_a(ins
));
224 if (op
>= BC_CALLM
&& op
<= BC_VARG
) {
225 BCReg top
= (op
== BC_CALLM
|| op
== BC_CALLMT
|| bc_c(ins
) == 0) ?
226 maxslot
: (bc_a(ins
) + bc_c(ins
));
227 s
= bc_a(ins
) - ((op
== BC_ITERC
|| op
== BC_ITERN
) ? 3 : 0);
228 for (; s
< top
; s
++) USE_SLOT(s
);
229 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
230 if (op
== BC_CALLT
|| op
== BC_CALLMT
) {
231 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
234 } else if (op
== BC_KNIL
) {
235 for (s
= bc_a(ins
); s
<= bc_d(ins
); s
++) DEF_SLOT(s
);
236 } else if (op
== BC_TSETM
) {
237 for (s
= bc_a(ins
)-1; s
< maxslot
; s
++) USE_SLOT(s
);
242 lua_assert(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
);
248 return 0; /* unreachable */
251 /* Purge dead slots before the next snapshot. */
252 void lj_snap_purge(jit_State
*J
)
254 uint8_t udf
[SNAP_USEDEF_SLOTS
];
255 BCReg maxslot
= J
->maxslot
;
256 BCReg s
= snap_usedef(J
, udf
, J
->pc
, maxslot
);
257 for (; s
< maxslot
; s
++)
259 J
->base
[s
] = 0; /* Purge dead slots. */
262 /* Shrink last snapshot. */
263 void lj_snap_shrink(jit_State
*J
)
265 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
266 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
267 MSize n
, m
, nlim
, nent
= snap
->nent
;
268 uint8_t udf
[SNAP_USEDEF_SLOTS
];
269 BCReg maxslot
= J
->maxslot
;
270 BCReg minslot
= snap_usedef(J
, udf
, snap_pc(map
[nent
]), maxslot
);
271 BCReg baseslot
= J
->baseslot
;
274 snap
->nslots
= (uint8_t)maxslot
;
275 for (n
= m
= 0; n
< nent
; n
++) { /* Remove unused slots from snapshot. */
276 BCReg s
= snap_slot(map
[n
]);
277 if (s
< minslot
|| (s
< maxslot
&& udf
[s
-baseslot
] == 0))
278 map
[m
++] = map
[n
]; /* Only copy used slots. */
280 snap
->nent
= (uint8_t)m
;
281 nlim
= J
->cur
.nsnapmap
- snap
->mapofs
- 1;
282 while (n
<= nlim
) map
[m
++] = map
[n
++]; /* Move PC + frame links down. */
283 J
->cur
.nsnapmap
= (uint16_t)(snap
->mapofs
+ m
); /* Free up space in map. */
286 /* -- Snapshot access ----------------------------------------------------- */
288 /* Initialize a Bloom Filter with all renamed refs.
289 ** There are very few renames (often none), so the filter has
290 ** very few bits set. This makes it suitable for negative filtering.
292 static BloomFilter
snap_renamefilter(GCtrace
*T
, SnapNo lim
)
294 BloomFilter rfilt
= 0;
296 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
298 bloomset(rfilt
, ir
->op1
);
302 /* Process matching renames to find the original RegSP. */
303 static RegSP
snap_renameref(GCtrace
*T
, SnapNo lim
, IRRef ref
, RegSP rs
)
306 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
307 if (ir
->op1
== ref
&& ir
->op2
<= lim
)
312 /* Convert a snapshot into a linear slot -> RegSP map.
313 ** Note: unused slots are not initialized!
315 void lj_snap_regspmap(uint16_t *rsmap
, GCtrace
*T
, SnapNo snapno
, int hi
)
317 SnapShot
*snap
= &T
->snap
[snapno
];
318 MSize n
, nent
= snap
->nent
;
319 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
320 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
321 for (n
= 0; n
< nent
; n
++) {
322 SnapEntry sn
= map
[n
];
323 IRRef ref
= snap_ref(sn
);
324 if (!irref_isk(ref
) &&
325 ((LJ_SOFTFP
&& hi
) ? (ref
++, (sn
& SNAP_SOFTFPNUM
)) : 1)) {
326 IRIns
*ir
= &T
->ir
[ref
];
327 uint32_t rs
= ir
->prev
;
328 if (bloomtest(rfilt
, ref
))
329 rs
= snap_renameref(T
, snapno
, ref
, rs
);
330 rsmap
[snap_slot(sn
)] = (uint16_t)rs
;
335 /* Restore interpreter state from exit state with the help of a snapshot. */
336 const BCIns
*lj_snap_restore(jit_State
*J
, void *exptr
)
338 ExitState
*ex
= (ExitState
*)exptr
;
339 SnapNo snapno
= J
->exitno
; /* For now, snapno == exitno. */
340 GCtrace
*T
= traceref(J
, J
->parent
);
341 SnapShot
*snap
= &T
->snap
[snapno
];
342 MSize n
, nent
= snap
->nent
;
343 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
344 SnapEntry
*flinks
= &T
->snapmap
[snap_nextofs(T
, snap
)-1];
347 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
348 const BCIns
*pc
= snap_pc(map
[nent
]);
351 /* Set interpreter PC to the next PC to get correct error messages. */
352 setcframe_pc(cframe_raw(L
->cframe
), pc
+1);
354 /* Make sure the stack is big enough for the slots from the snapshot. */
355 if (LJ_UNLIKELY(L
->base
+ snap
->topslot
>= tvref(L
->maxstack
))) {
356 L
->top
= curr_topL(L
);
357 lj_state_growstack(L
, snap
->topslot
- curr_proto(L
)->framesize
);
360 /* Fill stack slots with data from the registers and spill slots. */
362 ftsz0
= frame_ftsz(frame
); /* Preserve link to previous frame in slot #0. */
363 for (n
= 0; n
< nent
; n
++) {
364 SnapEntry sn
= map
[n
];
365 IRRef ref
= snap_ref(sn
);
366 BCReg s
= snap_slot(sn
);
367 TValue
*o
= &frame
[s
]; /* Stack slots are relative to start frame. */
368 IRIns
*ir
= &T
->ir
[ref
];
369 if (irref_isk(ref
)) { /* Restore constant slot. */
370 lj_ir_kvalue(L
, o
, ir
);
371 } else if (!(sn
& SNAP_NORESTORE
)) {
374 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
375 rs
= snap_renameref(T
, snapno
, ref
, rs
);
376 if (ra_hasspill(regsp_spill(rs
))) { /* Restore from spill slot. */
377 int32_t *sps
= &ex
->spill
[regsp_spill(rs
)];
378 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) {
379 o
->u32
.lo
= (uint32_t)*sps
;
380 } else if (irt_isinteger(t
)) {
383 } else if (irt_isnum(t
)) {
384 o
->u64
= *(uint64_t *)sps
;
387 } else if (irt_islightud(t
)) {
388 /* 64 bit lightuserdata which may escape already has the tag bits. */
389 o
->u64
= *(uint64_t *)sps
;
392 lua_assert(!irt_ispri(t
)); /* PRI refs never have a spill slot. */
393 setgcrefi(o
->gcr
, *sps
);
394 setitype(o
, irt_toitype(t
));
396 } else { /* Restore from register. */
397 Reg r
= regsp_reg(rs
);
398 lua_assert(ra_hasreg(r
));
399 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) {
400 o
->u32
.lo
= (uint32_t)ex
->gpr
[r
-RID_MIN_GPR
];
401 } else if (irt_isinteger(t
)) {
402 setintV(o
, (int32_t)ex
->gpr
[r
-RID_MIN_GPR
]);
404 } else if (irt_isnum(t
)) {
405 setnumV(o
, ex
->fpr
[r
-RID_MIN_FPR
]);
408 } else if (irt_islightud(t
)) {
409 /* 64 bit lightuserdata which may escape already has the tag bits. */
410 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
414 setgcrefi(o
->gcr
, ex
->gpr
[r
-RID_MIN_GPR
]);
415 setitype(o
, irt_toitype(t
));
418 if (LJ_SOFTFP
&& (sn
& SNAP_SOFTFPNUM
)) {
420 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
+1)))
421 rs
= snap_renameref(T
, snapno
, ref
+1, rs
);
422 o
->u32
.hi
= (ra_hasspill(regsp_spill(rs
))) ?
423 (uint32_t)*&ex
->spill
[regsp_spill(rs
)] :
424 (uint32_t)ex
->gpr
[regsp_reg(rs
)-RID_MIN_GPR
];
427 if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) { /* Overwrite tag with frame link. */
428 o
->fr
.tp
.ftsz
= s
!= 0 ? (int32_t)*flinks
-- : ftsz0
;
432 switch (bc_op(*pc
)) {
433 case BC_CALLM
: case BC_CALLMT
: case BC_RETM
: case BC_TSETM
:
434 L
->top
= frame
+ snap
->nslots
;
437 L
->top
= curr_topL(L
);
440 lua_assert(map
+ nent
== flinks
);