2 ** Object de/serialization.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
24 #include "lj_serialize.h"
26 /* Tags for internal serialization format. */
28 SER_TAG_NIL
, /* 0x00 */
36 SER_TAG_TAB
, /* 0x08 */
37 SER_TAG_DICT_MT
= SER_TAG_TAB
+6,
39 SER_TAG_INT64
, /* 0x10 */
47 SER_TAG_0x18
, /* 0x18 */
55 SER_TAG_STR
, /* 0x20 + str->len */
57 LJ_STATIC_ASSERT((SER_TAG_TAB
& 7) == 0);
59 /* -- Helper functions ---------------------------------------------------- */
61 static LJ_AINLINE
char *serialize_more(char *w
, SBufExt
*sbx
, MSize sz
)
63 if (LJ_UNLIKELY(sz
> (MSize
)(sbx
->e
- w
))) {
65 w
= lj_buf_more2((SBuf
*)sbx
, sz
);
70 /* Write U124 to buffer. */
71 static LJ_NOINLINE
char *serialize_wu124_(char *w
, uint32_t v
)
75 *w
++ = (char)(0xe0 | (v
>> 8)); *w
++ = (char)v
;
81 memcpy(w
, &v
, 4); w
+= 4;
86 static LJ_AINLINE
char *serialize_wu124(char *w
, uint32_t v
)
88 if (LJ_LIKELY(v
< 0xe0)) {
92 return serialize_wu124_(w
, v
);
96 static LJ_NOINLINE
char *serialize_ru124_(char *r
, char *w
, uint32_t *pv
)
100 if (r
>= w
) return NULL
;
101 v
= ((v
& 0x1f) << 8) + *(uint8_t *)r
+ 0xe0; r
++;
103 if (r
+ 4 > w
) return NULL
;
104 v
= lj_getu32(r
); r
+= 4;
113 static LJ_AINLINE
char *serialize_ru124(char *r
, char *w
, uint32_t *pv
)
115 if (LJ_LIKELY(r
< w
)) {
116 uint32_t v
= *(uint8_t *)r
; r
++;
118 if (LJ_UNLIKELY(v
>= 0xe0)) {
119 r
= serialize_ru124_(r
, w
, pv
);
126 /* Prepare string dictionary for use (once). */
127 void LJ_FASTCALL
lj_serialize_dict_prep_str(lua_State
*L
, GCtab
*dict
)
129 if (!dict
->hmask
) { /* No hash part means not prepared, yet. */
130 MSize i
, len
= lj_tab_len(dict
);
132 lj_tab_resize(L
, dict
, dict
->asize
, hsize2hbits(len
));
133 for (i
= 1; i
<= len
&& i
< dict
->asize
; i
++) {
134 cTValue
*o
= arrayslot(dict
, i
);
136 if (!lj_tab_getstr(dict
, strV(o
))) { /* Ignore dups. */
137 lj_tab_newkey(L
, dict
, o
)->u64
= (uint64_t)(i
-1);
139 } else if (!tvisfalse(o
)) {
140 lj_err_caller(L
, LJ_ERR_BUFFER_BADOPT
);
146 /* Prepare metatable dictionary for use (once). */
147 void LJ_FASTCALL
lj_serialize_dict_prep_mt(lua_State
*L
, GCtab
*dict
)
149 if (!dict
->hmask
) { /* No hash part means not prepared, yet. */
150 MSize i
, len
= lj_tab_len(dict
);
152 lj_tab_resize(L
, dict
, dict
->asize
, hsize2hbits(len
));
153 for (i
= 1; i
<= len
&& i
< dict
->asize
; i
++) {
154 cTValue
*o
= arrayslot(dict
, i
);
156 if (tvisnil(lj_tab_get(L
, dict
, o
))) { /* Ignore dups. */
157 lj_tab_newkey(L
, dict
, o
)->u64
= (uint64_t)(i
-1);
159 } else if (!tvisfalse(o
)) {
160 lj_err_caller(L
, LJ_ERR_BUFFER_BADOPT
);
166 /* -- Internal serializer ------------------------------------------------- */
168 /* Put serialized object into buffer. */
169 static char *serialize_put(char *w
, SBufExt
*sbx
, cTValue
*o
)
171 if (LJ_LIKELY(tvisstr(o
))) {
172 const GCstr
*str
= strV(o
);
173 MSize len
= str
->len
;
174 w
= serialize_more(w
, sbx
, 5+len
);
175 w
= serialize_wu124(w
, SER_TAG_STR
+ len
);
176 w
= lj_buf_wmem(w
, strdata(str
), len
);
177 } else if (tvisint(o
)) {
178 uint32_t x
= LJ_BE
? lj_bswap((uint32_t)intV(o
)) : (uint32_t)intV(o
);
179 w
= serialize_more(w
, sbx
, 1+4);
180 *w
++ = SER_TAG_INT
; memcpy(w
, &x
, 4); w
+= 4;
181 } else if (tvisnum(o
)) {
182 uint64_t x
= LJ_BE
? lj_bswap64(o
->u64
) : o
->u64
;
183 w
= serialize_more(w
, sbx
, 1+sizeof(lua_Number
));
184 *w
++ = SER_TAG_NUM
; memcpy(w
, &x
, 8); w
+= 8;
185 } else if (tvispri(o
)) {
186 w
= serialize_more(w
, sbx
, 1);
187 *w
++ = (char)(SER_TAG_NIL
+ ~itype(o
));
188 } else if (tvistab(o
)) {
189 const GCtab
*t
= tabV(o
);
190 uint32_t narray
= 0, nhash
= 0, one
= 2;
191 if (sbx
->depth
<= 0) lj_err_caller(sbufL(sbx
), LJ_ERR_BUFFER_DEPTH
);
193 if (t
->asize
> 0) { /* Determine max. length of array part. */
195 TValue
*array
= tvref(t
->array
);
196 for (i
= (ptrdiff_t)t
->asize
-1; i
>= 0; i
--)
197 if (!tvisnil(&array
[i
]))
199 narray
= (uint32_t)(i
+1);
200 if (narray
&& tvisnil(&array
[0])) one
= 4;
202 if (t
->hmask
> 0) { /* Count number of used hash slots. */
203 uint32_t i
, hmask
= t
->hmask
;
204 Node
*node
= noderef(t
->node
);
205 for (i
= 0; i
<= hmask
; i
++)
206 nhash
+= !tvisnil(&node
[i
].val
);
208 /* Write metatable index. */
209 if (LJ_UNLIKELY(tabref(sbx
->dict_mt
)) && tabref(t
->metatable
)) {
212 settabV(sbufL(sbx
), &mto
, tabref(t
->metatable
));
213 n
= hashgcref(tabref(sbx
->dict_mt
), mto
.gcr
);
215 if (n
->key
.u64
== mto
.u64
) {
216 uint32_t idx
= n
->val
.u32
.lo
;
217 w
= serialize_more(w
, sbx
, 1+5);
218 *w
++ = SER_TAG_DICT_MT
;
219 w
= serialize_wu124(w
, idx
);
222 } while ((n
= nextnode(n
)));
224 /* Write number of array slots and hash slots. */
225 w
= serialize_more(w
, sbx
, 1+2*5);
226 *w
++ = (char)(SER_TAG_TAB
+ (nhash
? 1 : 0) + (narray
? one
: 0));
227 if (narray
) w
= serialize_wu124(w
, narray
);
228 if (nhash
) w
= serialize_wu124(w
, nhash
);
229 if (narray
) { /* Write array entries. */
230 cTValue
*oa
= tvref(t
->array
) + (one
>> 2);
231 cTValue
*oe
= tvref(t
->array
) + narray
;
232 while (oa
< oe
) w
= serialize_put(w
, sbx
, oa
++);
234 if (nhash
) { /* Write hash entries. */
235 const Node
*node
= noderef(t
->node
) + t
->hmask
;
236 GCtab
*dict_str
= tabref(sbx
->dict_str
);
237 if (LJ_UNLIKELY(dict_str
)) {
239 if (!tvisnil(&node
->val
)) {
240 if (LJ_LIKELY(tvisstr(&node
->key
))) {
241 /* Inlined lj_tab_getstr is 30% faster. */
242 const GCstr
*str
= strV(&node
->key
);
243 Node
*n
= hashstr(dict_str
, str
);
245 if (tvisstr(&n
->key
) && strV(&n
->key
) == str
) {
246 uint32_t idx
= n
->val
.u32
.lo
;
247 w
= serialize_more(w
, sbx
, 1+5);
248 *w
++ = SER_TAG_DICT_STR
;
249 w
= serialize_wu124(w
, idx
);
254 MSize len
= str
->len
;
255 w
= serialize_more(w
, sbx
, 5+len
);
256 w
= serialize_wu124(w
, SER_TAG_STR
+ len
);
257 w
= lj_buf_wmem(w
, strdata(str
), len
);
262 w
= serialize_put(w
, sbx
, &node
->key
);
264 w
= serialize_put(w
, sbx
, &node
->val
);
265 if (--nhash
== 0) break;
269 if (!tvisnil(&node
->val
)) {
270 w
= serialize_put(w
, sbx
, &node
->key
);
271 w
= serialize_put(w
, sbx
, &node
->val
);
272 if (--nhash
== 0) break;
278 } else if (tviscdata(o
)) {
279 CTState
*cts
= ctype_cts(sbufL(sbx
));
280 CType
*s
= ctype_raw(cts
, cdataV(o
)->ctypeid
);
281 uint8_t *sp
= cdataptr(cdataV(o
));
282 if (ctype_isinteger(s
->info
) && s
->size
== 8) {
283 w
= serialize_more(w
, sbx
, 1+8);
284 *w
++ = (s
->info
& CTF_UNSIGNED
) ? SER_TAG_UINT64
: SER_TAG_INT64
;
286 { uint64_t u
= lj_bswap64(*(uint64_t *)sp
); memcpy(w
, &u
, 8); }
291 } else if (ctype_iscomplex(s
->info
) && s
->size
== 16) {
292 w
= serialize_more(w
, sbx
, 1+16);
293 *w
++ = SER_TAG_COMPLEX
;
295 { /* Only swap the doubles. The re/im order stays the same. */
296 uint64_t u
= lj_bswap64(((uint64_t *)sp
)[0]); memcpy(w
, &u
, 8);
297 u
= lj_bswap64(((uint64_t *)sp
)[1]); memcpy(w
+8, &u
, 8);
304 goto badenc
; /* NYI other cdata */
307 } else if (tvislightud(o
)) {
308 uintptr_t ud
= (uintptr_t)lightudV(G(sbufL(sbx
)), o
);
309 w
= serialize_more(w
, sbx
, 1+sizeof(ud
));
312 } else if (LJ_32
|| checku32(ud
)) {
318 *w
++ = SER_TAG_LIGHTUD32
; memcpy(w
, &ud
, 4); w
+= 4;
324 *w
++ = SER_TAG_LIGHTUD64
; memcpy(w
, &ud
, 8); w
+= 8;
332 lj_err_callerv(sbufL(sbx
), LJ_ERR_BUFFER_BADENC
, lj_typename(o
));
337 /* Get serialized object from buffer. */
338 static char *serialize_get(char *r
, SBufExt
*sbx
, TValue
*o
)
342 r
= serialize_ru124(r
, w
, &tp
); if (LJ_UNLIKELY(!r
)) goto eob
;
343 if (LJ_LIKELY(tp
>= SER_TAG_STR
)) {
344 uint32_t len
= tp
- SER_TAG_STR
;
345 if (LJ_UNLIKELY(len
> (uint32_t)(w
- r
))) goto eob
;
346 setstrV(sbufL(sbx
), o
, lj_str_new(sbufL(sbx
), r
, len
));
348 } else if (tp
== SER_TAG_INT
) {
349 if (LJ_UNLIKELY(r
+ 4 > w
)) goto eob
;
350 setintV(o
, (int32_t)(LJ_BE
? lj_bswap(lj_getu32(r
)) : lj_getu32(r
)));
352 } else if (tp
== SER_TAG_NUM
) {
353 if (LJ_UNLIKELY(r
+ 8 > w
)) goto eob
;
354 memcpy(o
, r
, 8); r
+= 8;
356 o
->u64
= lj_bswap64(o
->u64
);
358 if (!tvisnum(o
)) setnanV(o
); /* Fix non-canonical NaNs. */
359 } else if (tp
<= SER_TAG_TRUE
) {
361 } else if (tp
== SER_TAG_DICT_STR
) {
364 r
= serialize_ru124(r
, w
, &idx
); if (LJ_UNLIKELY(!r
)) goto eob
;
366 dict_str
= tabref(sbx
->dict_str
);
367 if (dict_str
&& idx
< dict_str
->asize
&& tvisstr(arrayslot(dict_str
, idx
)))
368 copyTV(sbufL(sbx
), o
, arrayslot(dict_str
, idx
));
370 lj_err_callerv(sbufL(sbx
), LJ_ERR_BUFFER_BADDICTX
, idx
);
371 } else if (tp
>= SER_TAG_TAB
&& tp
<= SER_TAG_DICT_MT
) {
372 uint32_t narray
= 0, nhash
= 0;
373 GCtab
*t
, *mt
= NULL
;
374 if (sbx
->depth
<= 0) lj_err_caller(sbufL(sbx
), LJ_ERR_BUFFER_DEPTH
);
376 if (tp
== SER_TAG_DICT_MT
) {
379 r
= serialize_ru124(r
, w
, &idx
); if (LJ_UNLIKELY(!r
)) goto eob
;
381 dict_mt
= tabref(sbx
->dict_mt
);
382 if (dict_mt
&& idx
< dict_mt
->asize
&& tvistab(arrayslot(dict_mt
, idx
)))
383 mt
= tabV(arrayslot(dict_mt
, idx
));
385 lj_err_callerv(sbufL(sbx
), LJ_ERR_BUFFER_BADDICTX
, idx
);
386 r
= serialize_ru124(r
, w
, &tp
); if (LJ_UNLIKELY(!r
)) goto eob
;
387 if (!(tp
>= SER_TAG_TAB
&& tp
< SER_TAG_DICT_MT
)) goto badtag
;
389 if (tp
>= SER_TAG_TAB
+2) {
390 r
= serialize_ru124(r
, w
, &narray
); if (LJ_UNLIKELY(!r
)) goto eob
;
393 r
= serialize_ru124(r
, w
, &nhash
); if (LJ_UNLIKELY(!r
)) goto eob
;
395 t
= lj_tab_new(sbufL(sbx
), narray
, hsize2hbits(nhash
));
396 /* NOBARRIER: The table is new (marked white). */
397 setgcref(t
->metatable
, obj2gco(mt
));
398 settabV(sbufL(sbx
), o
, t
);
400 TValue
*oa
= tvref(t
->array
) + (tp
>= SER_TAG_TAB
+4);
401 TValue
*oe
= tvref(t
->array
) + narray
;
402 while (oa
< oe
) r
= serialize_get(r
, sbx
, oa
++);
407 r
= serialize_get(r
, sbx
, &k
);
408 v
= lj_tab_set(sbufL(sbx
), t
, &k
);
409 if (LJ_UNLIKELY(!tvisnil(v
)))
410 lj_err_caller(sbufL(sbx
), LJ_ERR_BUFFER_DUPKEY
);
411 r
= serialize_get(r
, sbx
, v
);
416 } else if (tp
>= SER_TAG_INT64
&& tp
<= SER_TAG_COMPLEX
) {
417 uint32_t sz
= tp
== SER_TAG_COMPLEX
? 16 : 8;
419 if (LJ_UNLIKELY(r
+ sz
> w
)) goto eob
;
420 if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx
))))) goto badtag
;
421 cd
= lj_cdata_new_(sbufL(sbx
),
422 tp
== SER_TAG_INT64
? CTID_INT64
:
423 tp
== SER_TAG_UINT64
? CTID_UINT64
: CTID_COMPLEX_DOUBLE
,
425 memcpy(cdataptr(cd
), r
, sz
); r
+= sz
;
427 *(uint64_t *)cdataptr(cd
) = lj_bswap64(*(uint64_t *)cdataptr(cd
));
429 ((uint64_t *)cdataptr(cd
))[1] = lj_bswap64(((uint64_t *)cdataptr(cd
))[1]);
431 if (sz
== 16) { /* Fix non-canonical NaNs. */
432 TValue
*cdo
= (TValue
*)cdataptr(cd
);
433 if (!tvisnum(&cdo
[0])) setnanV(&cdo
[0]);
434 if (!tvisnum(&cdo
[1])) setnanV(&cdo
[1]);
436 setcdataV(sbufL(sbx
), o
, cd
);
438 } else if (tp
<= (LJ_64
? SER_TAG_LIGHTUD64
: SER_TAG_LIGHTUD32
)) {
440 if (tp
== SER_TAG_LIGHTUD32
) {
441 if (LJ_UNLIKELY(r
+ 4 > w
)) goto eob
;
442 ud
= (uintptr_t)(LJ_BE
? lj_bswap(lj_getu32(r
)) : lj_getu32(r
));
446 else if (tp
== SER_TAG_LIGHTUD64
) {
447 if (LJ_UNLIKELY(r
+ 8 > w
)) goto eob
;
448 memcpy(&ud
, r
, 8); r
+= 8;
453 setrawlightudV(o
, lj_lightud_intern(sbufL(sbx
), (void *)ud
));
455 setrawlightudV(o
, (void *)ud
);
459 lj_err_callerv(sbufL(sbx
), LJ_ERR_BUFFER_BADDEC
, tp
);
463 lj_err_caller(sbufL(sbx
), LJ_ERR_BUFFER_EOB
);
467 /* -- External serialization API ------------------------------------------ */
469 /* Encode to buffer. */
470 SBufExt
* LJ_FASTCALL
lj_serialize_put(SBufExt
*sbx
, cTValue
*o
)
472 sbx
->depth
= LJ_SERIALIZE_DEPTH
;
473 sbx
->w
= serialize_put(sbx
->w
, sbx
, o
);
477 /* Decode from buffer. */
478 char * LJ_FASTCALL
lj_serialize_get(SBufExt
*sbx
, TValue
*o
)
480 sbx
->depth
= LJ_SERIALIZE_DEPTH
;
481 return serialize_get(sbx
->r
, sbx
, o
);
484 /* Stand-alone encoding, borrowing from global temporary buffer. */
485 GCstr
* LJ_FASTCALL
lj_serialize_encode(lua_State
*L
, cTValue
*o
)
489 memset(&sbx
, 0, sizeof(SBufExt
));
490 lj_bufx_set_borrow(L
, &sbx
, &G(L
)->tmpbuf
);
491 sbx
.depth
= LJ_SERIALIZE_DEPTH
;
492 w
= serialize_put(sbx
.w
, &sbx
, o
);
493 return lj_str_new(L
, sbx
.b
, (size_t)(w
- sbx
.b
));
496 /* Stand-alone decoding, copy-on-write from string. */
497 void lj_serialize_decode(lua_State
*L
, TValue
*o
, GCstr
*str
)
501 memset(&sbx
, 0, sizeof(SBufExt
));
502 lj_bufx_set_cow(L
, &sbx
, strdata(str
), str
->len
);
503 /* No need to set sbx.cowref here. */
504 sbx
.depth
= LJ_SERIALIZE_DEPTH
;
505 r
= serialize_get(sbx
.r
, &sbx
, o
);
506 if (r
!= sbx
.w
) lj_err_caller(L
, LJ_ERR_BUFFER_LEFTOV
);
510 /* Peek into buffer to find the result IRType for specialization purposes. */
511 LJ_FUNC MSize LJ_FASTCALL
lj_serialize_peektype(SBufExt
*sbx
)
514 if (serialize_ru124(sbx
->r
, sbx
->w
, &tp
)) {
515 /* This must match the handling of all tags in the decoder above. */
517 case SER_TAG_NIL
: return IRT_NIL
;
518 case SER_TAG_FALSE
: return IRT_FALSE
;
519 case SER_TAG_TRUE
: return IRT_TRUE
;
520 case SER_TAG_NULL
: case SER_TAG_LIGHTUD32
: case SER_TAG_LIGHTUD64
:
522 case SER_TAG_INT
: return LJ_DUALNUM
? IRT_INT
: IRT_NUM
;
523 case SER_TAG_NUM
: return IRT_NUM
;
524 case SER_TAG_TAB
: case SER_TAG_TAB
+1: case SER_TAG_TAB
+2:
525 case SER_TAG_TAB
+3: case SER_TAG_TAB
+4: case SER_TAG_TAB
+5:
526 case SER_TAG_DICT_MT
:
528 case SER_TAG_INT64
: case SER_TAG_UINT64
: case SER_TAG_COMPLEX
:
530 case SER_TAG_DICT_STR
:
535 return IRT_NIL
; /* Will fail on actual decode. */