Cleanup various endianess issues in assembler backend.
[luajit-2.0.git] / src / lj_bcread.c
blob30479230d3149a789285617152a1a7907edbbf88
1 /*
2 ** Bytecode reader.
3 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_bcread_c
7 #define LUA_CORE
9 #include "lj_obj.h"
10 #include "lj_gc.h"
11 #include "lj_err.h"
12 #include "lj_str.h"
13 #include "lj_tab.h"
14 #include "lj_bc.h"
15 #if LJ_HASFFI
16 #include "lj_ctype.h"
17 #include "lj_cdata.h"
18 #endif
19 #include "lj_lex.h"
20 #include "lj_bcdump.h"
21 #include "lj_state.h"
23 /* Reuse some lexer fields for our own purposes. */
24 #define bcread_flags(ls) ls->level
25 #define bcread_swap(ls) \
26 ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
27 #define bcread_oldtop(L, ls) restorestack(L, ls->lastline)
28 #define bcread_savetop(L, ls, top) \
29 ls->lastline = (BCLine)savestack(L, (top))
31 /* -- Input buffer handling ----------------------------------------------- */
33 /* Throw reader error. */
34 static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
36 lua_State *L = ls->L;
37 const char *name = ls->chunkarg;
38 if (*name == BCDUMP_HEAD1) name = "(binary)";
39 else if (*name == '@' || *name == '=') name++;
40 lj_str_pushf(L, "%s: %s", name, err2msg(em));
41 lj_err_throw(L, LUA_ERRSYNTAX);
44 /* Resize input buffer. */
45 static void bcread_resize(LexState *ls, MSize len)
47 if (ls->sb.sz < len) {
48 MSize sz = ls->sb.sz * 2;
49 while (len > sz) sz = sz * 2;
50 lj_str_resizebuf(ls->L, &ls->sb, sz);
51 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
55 /* Refill buffer if needed. */
56 static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58 lua_assert(len != 0);
59 if (len > LJ_MAX_MEM || ls->current < 0)
60 bcread_error(ls, LJ_ERR_BCBAD);
61 do {
62 const char *buf;
63 size_t size;
64 if (ls->n) { /* Copy remainder to buffer. */
65 if (ls->sb.n) { /* Move down in buffer. */
66 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
67 if (ls->n != ls->sb.n)
68 memmove(ls->sb.buf, ls->p, ls->n);
69 } else { /* Copy from buffer provided by reader. */
70 bcread_resize(ls, len);
71 memcpy(ls->sb.buf, ls->p, ls->n);
73 ls->p = ls->sb.buf;
75 ls->sb.n = ls->n;
76 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */
77 if (buf == NULL || size == 0) { /* EOF? */
78 if (need) bcread_error(ls, LJ_ERR_BCBAD);
79 ls->current = -1; /* Only bad if we get called again. */
80 break;
82 if (ls->sb.n) { /* Append to buffer. */
83 MSize n = ls->sb.n + (MSize)size;
84 bcread_resize(ls, n < len ? len : n);
85 memcpy(ls->sb.buf + ls->sb.n, buf, size);
86 ls->n = ls->sb.n = n;
87 ls->p = ls->sb.buf;
88 } else { /* Return buffer provided by reader. */
89 ls->n = (MSize)size;
90 ls->p = buf;
92 } while (ls->n < len);
95 /* Need a certain number of bytes. */
96 static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
98 if (LJ_UNLIKELY(ls->n < len))
99 bcread_fill(ls, len, 1);
102 /* Want to read up to a certain number of bytes, but may need less. */
103 static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
105 if (LJ_UNLIKELY(ls->n < len))
106 bcread_fill(ls, len, 0);
109 #define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
110 #define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
112 /* Return memory block from buffer. */
113 static uint8_t *bcread_mem(LexState *ls, MSize len)
115 uint8_t *p = (uint8_t *)ls->p;
116 bcread_consume(ls, len);
117 ls->p = (char *)p + len;
118 return p;
121 /* Copy memory block from buffer. */
122 static void bcread_block(LexState *ls, void *q, MSize len)
124 memcpy(q, bcread_mem(ls, len), len);
127 /* Read byte from buffer. */
128 static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
130 bcread_dec(ls);
131 return (uint32_t)(uint8_t)*ls->p++;
134 /* Read ULEB128 value from buffer. */
135 static uint32_t bcread_uleb128(LexState *ls)
137 const uint8_t *p = (const uint8_t *)ls->p;
138 uint32_t v = *p++;
139 if (LJ_UNLIKELY(v >= 0x80)) {
140 int sh = 0;
141 v &= 0x7f;
142 do {
143 v |= ((*p & 0x7f) << (sh += 7));
144 bcread_dec(ls);
145 } while (*p++ >= 0x80);
147 bcread_dec(ls);
148 ls->p = (char *)p;
149 return v;
152 /* Read top 32 bits of 33 bit ULEB128 value from buffer. */
153 static uint32_t bcread_uleb128_33(LexState *ls)
155 const uint8_t *p = (const uint8_t *)ls->p;
156 uint32_t v = (*p++ >> 1);
157 if (LJ_UNLIKELY(v >= 0x40)) {
158 int sh = -1;
159 v &= 0x3f;
160 do {
161 v |= ((*p & 0x7f) << (sh += 7));
162 bcread_dec(ls);
163 } while (*p++ >= 0x80);
165 bcread_dec(ls);
166 ls->p = (char *)p;
167 return v;
170 /* -- Bytecode reader ----------------------------------------------------- */
172 /* Read debug info of a prototype. */
173 static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
175 void *lineinfo = (void *)proto_lineinfo(pt);
176 bcread_block(ls, lineinfo, sizedbg);
177 /* Swap lineinfo if the endianess differs. */
178 if (bcread_swap(ls) && pt->numline >= 256) {
179 MSize i, n = pt->sizebc-1;
180 if (pt->numline < 65536) {
181 uint16_t *p = (uint16_t *)lineinfo;
182 for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
183 } else {
184 uint32_t *p = (uint32_t *)lineinfo;
185 for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
190 /* Find pointer to varinfo. */
191 static const void *bcread_varinfo(GCproto *pt)
193 const uint8_t *p = proto_uvinfo(pt);
194 MSize n = pt->sizeuv;
195 if (n) while (*p++ || --n) ;
196 return p;
199 /* Read a single constant key/value of a template table. */
200 static void bcread_ktabk(LexState *ls, TValue *o)
202 MSize tp = bcread_uleb128(ls);
203 if (tp >= BCDUMP_KTAB_STR) {
204 MSize len = tp - BCDUMP_KTAB_STR;
205 const char *p = (const char *)bcread_mem(ls, len);
206 setstrV(ls->L, o, lj_str_new(ls->L, p, len));
207 } else if (tp == BCDUMP_KTAB_INT) {
208 setintV(o, (int32_t)bcread_uleb128(ls));
209 } else if (tp == BCDUMP_KTAB_NUM) {
210 o->u32.lo = bcread_uleb128(ls);
211 o->u32.hi = bcread_uleb128(ls);
212 } else {
213 lua_assert(tp <= BCDUMP_KTAB_TRUE);
214 setitype(o, ~tp);
218 /* Read a template table. */
219 static GCtab *bcread_ktab(LexState *ls)
221 MSize narray = bcread_uleb128(ls);
222 MSize nhash = bcread_uleb128(ls);
223 GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
224 if (narray) { /* Read array entries. */
225 MSize i;
226 TValue *o = tvref(t->array);
227 for (i = 0; i < narray; i++, o++)
228 bcread_ktabk(ls, o);
230 if (nhash) { /* Read hash entries. */
231 MSize i;
232 for (i = 0; i < nhash; i++) {
233 TValue key;
234 bcread_ktabk(ls, &key);
235 lua_assert(!tvisnil(&key));
236 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
239 return t;
242 /* Read GC constants of a prototype. */
243 static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
245 MSize i;
246 GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
247 for (i = 0; i < sizekgc; i++, kr++) {
248 MSize tp = bcread_uleb128(ls);
249 if (tp >= BCDUMP_KGC_STR) {
250 MSize len = tp - BCDUMP_KGC_STR;
251 const char *p = (const char *)bcread_mem(ls, len);
252 setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
253 } else if (tp == BCDUMP_KGC_TAB) {
254 setgcref(*kr, obj2gco(bcread_ktab(ls)));
255 #if LJ_HASFFI
256 } else if (tp != BCDUMP_KGC_CHILD) {
257 CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
258 tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
259 CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
260 GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
261 TValue *p = (TValue *)cdataptr(cd);
262 setgcref(*kr, obj2gco(cd));
263 p[0].u32.lo = bcread_uleb128(ls);
264 p[0].u32.hi = bcread_uleb128(ls);
265 if (tp == BCDUMP_KGC_COMPLEX) {
266 p[1].u32.lo = bcread_uleb128(ls);
267 p[1].u32.hi = bcread_uleb128(ls);
269 #endif
270 } else {
271 lua_State *L = ls->L;
272 lua_assert(tp == BCDUMP_KGC_CHILD);
273 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
274 bcread_error(ls, LJ_ERR_BCBAD);
275 L->top--;
276 setgcref(*kr, obj2gco(protoV(L->top)));
281 /* Read number constants of a prototype. */
282 static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
284 MSize i;
285 TValue *o = mref(pt->k, TValue);
286 for (i = 0; i < sizekn; i++, o++) {
287 int isnum = (ls->p[0] & 1);
288 uint32_t lo = bcread_uleb128_33(ls);
289 if (isnum) {
290 o->u32.lo = lo;
291 o->u32.hi = bcread_uleb128(ls);
292 } else {
293 setintV(o, lo);
298 /* Read bytecode instructions. */
299 static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
301 BCIns *bc = proto_bc(pt);
302 bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
303 pt->framesize, 0);
304 bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
305 /* Swap bytecode instructions if the endianess differs. */
306 if (bcread_swap(ls)) {
307 MSize i;
308 for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
312 /* Read upvalue refs. */
313 static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
315 if (sizeuv) {
316 uint16_t *uv = proto_uv(pt);
317 bcread_block(ls, uv, sizeuv*2);
318 /* Swap upvalue refs if the endianess differs. */
319 if (bcread_swap(ls)) {
320 MSize i;
321 for (i = 0; i < sizeuv; i++)
322 uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
327 /* Read a prototype. */
328 static GCproto *bcread_proto(LexState *ls)
330 GCproto *pt;
331 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
332 MSize ofsk, ofsuv, ofsdbg;
333 MSize sizedbg = 0;
334 BCLine firstline = 0, numline = 0;
335 MSize len, startn;
337 /* Read length. */
338 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
339 ls->n--; ls->p++;
340 return NULL;
342 bcread_want(ls, 5);
343 len = bcread_uleb128(ls);
344 if (!len) return NULL; /* EOF */
345 bcread_need(ls, len);
346 startn = ls->n;
348 /* Read prototype header. */
349 flags = bcread_byte(ls);
350 numparams = bcread_byte(ls);
351 framesize = bcread_byte(ls);
352 sizeuv = bcread_byte(ls);
353 sizekgc = bcread_uleb128(ls);
354 sizekn = bcread_uleb128(ls);
355 sizebc = bcread_uleb128(ls) + 1;
356 if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
357 sizedbg = bcread_uleb128(ls);
358 if (sizedbg) {
359 firstline = bcread_uleb128(ls);
360 numline = bcread_uleb128(ls);
364 /* Calculate total size of prototype including all colocated arrays. */
365 sizept = (MSize)sizeof(GCproto) +
366 sizebc*(MSize)sizeof(BCIns) +
367 sizekgc*(MSize)sizeof(GCRef);
368 sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
369 ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
370 ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
371 ofsdbg = sizept; sizept += sizedbg;
373 /* Allocate prototype object and initialize its fields. */
374 pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
375 pt->gct = ~LJ_TPROTO;
376 pt->numparams = (uint8_t)numparams;
377 pt->framesize = (uint8_t)framesize;
378 pt->sizebc = sizebc;
379 setmref(pt->k, (char *)pt + ofsk);
380 setmref(pt->uv, (char *)pt + ofsuv);
381 pt->sizekgc = 0; /* Set to zero until fully initialized. */
382 pt->sizekn = sizekn;
383 pt->sizept = sizept;
384 pt->sizeuv = (uint8_t)sizeuv;
385 pt->flags = (uint8_t)flags;
386 pt->trace = 0;
387 setgcref(pt->chunkname, obj2gco(ls->chunkname));
389 /* Close potentially uninitialized gap between bc and kgc. */
390 *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
392 /* Read bytecode instructions and upvalue refs. */
393 bcread_bytecode(ls, pt, sizebc);
394 bcread_uv(ls, pt, sizeuv);
396 /* Read constants. */
397 bcread_kgc(ls, pt, sizekgc);
398 pt->sizekgc = sizekgc;
399 bcread_knum(ls, pt, sizekn);
401 /* Read and initialize debug info. */
402 pt->firstline = firstline;
403 pt->numline = numline;
404 if (sizedbg) {
405 MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
406 setmref(pt->lineinfo, (char *)pt + ofsdbg);
407 setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
408 bcread_dbg(ls, pt, sizedbg);
409 setmref(pt->varinfo, bcread_varinfo(pt));
410 } else {
411 setmref(pt->lineinfo, NULL);
412 setmref(pt->uvinfo, NULL);
413 setmref(pt->varinfo, NULL);
416 if (len != startn - ls->n)
417 bcread_error(ls, LJ_ERR_BCBAD);
418 return pt;
421 /* Read and check header of bytecode dump. */
422 static int bcread_header(LexState *ls)
424 uint32_t flags;
425 bcread_want(ls, 3+5+5);
426 if (bcread_byte(ls) != BCDUMP_HEAD2 ||
427 bcread_byte(ls) != BCDUMP_HEAD3 ||
428 bcread_byte(ls) != BCDUMP_VERSION) return 0;
429 bcread_flags(ls) = flags = bcread_uleb128(ls);
430 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
431 #if !LJ_HASFFI
432 if ((flags & BCDUMP_F_FFI)) return 0;
433 #endif
434 if ((flags & BCDUMP_F_STRIP)) {
435 ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
436 } else {
437 MSize len = bcread_uleb128(ls);
438 bcread_need(ls, len);
439 ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
441 return 1; /* Ok. */
444 /* Read a bytecode dump. */
445 GCproto *lj_bcread(LexState *ls)
447 lua_State *L = ls->L;
448 lua_assert(ls->current == BCDUMP_HEAD1);
449 bcread_savetop(L, ls, L->top);
450 lj_str_resetbuf(&ls->sb);
451 /* Check for a valid bytecode dump header. */
452 if (!bcread_header(ls))
453 bcread_error(ls, LJ_ERR_BCFMT);
454 for (;;) { /* Process all prototypes in the bytecode dump. */
455 GCproto *pt = bcread_proto(ls);
456 if (!pt) break;
457 setprotoV(L, L->top, pt);
458 incr_top(L);
460 if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
461 bcread_error(ls, LJ_ERR_BCBAD);
462 /* Pop off last prototype. */
463 L->top--;
464 return protoV(L->top);