3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
5 ** Major portions taken verbatim or adapted from the Lua interpreter.
6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
27 /* Lua lexer token names. */
28 static const char *const tokennames
[] = {
29 #define TKSTR1(name) #name,
30 #define TKSTR2(name, sym) #sym,
37 /* -- Buffer handling ----------------------------------------------------- */
39 #define char2int(c) ((int)(uint8_t)(c))
41 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
42 #define save_and_next(ls) (save(ls, ls->current), next(ls))
43 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
44 #define END_OF_STREAM (-1)
46 static int fillbuf(LexState
*ls
)
49 const char *buf
= ls
->rfunc(ls
->L
, ls
->rdata
, &sz
);
50 if (buf
== NULL
|| sz
== 0) return END_OF_STREAM
;
51 ls
->n
= (MSize
)sz
- 1;
53 return char2int(*(ls
->p
++));
56 static LJ_NOINLINE
void save_grow(LexState
*ls
, int c
)
59 if (ls
->sb
.sz
>= LJ_MAX_STR
/2)
60 lj_lex_error(ls
, 0, LJ_ERR_XELEM
);
61 newsize
= ls
->sb
.sz
* 2;
62 lj_str_resizebuf(ls
->L
, &ls
->sb
, newsize
);
63 ls
->sb
.buf
[ls
->sb
.n
++] = (char)c
;
66 static LJ_AINLINE
void save(LexState
*ls
, int c
)
68 if (LJ_UNLIKELY(ls
->sb
.n
+ 1 > ls
->sb
.sz
))
71 ls
->sb
.buf
[ls
->sb
.n
++] = (char)c
;
74 static void inclinenumber(LexState
*ls
)
76 int old
= ls
->current
;
77 lua_assert(currIsNewline(ls
));
78 next(ls
); /* skip `\n' or `\r' */
79 if (currIsNewline(ls
) && ls
->current
!= old
)
80 next(ls
); /* skip `\n\r' or `\r\n' */
81 if (++ls
->linenumber
>= LJ_MAX_LINE
)
82 lj_lex_error(ls
, ls
->token
, LJ_ERR_XLINES
);
85 /* -- Scanner for terminals ----------------------------------------------- */
88 /* Load FFI library on-demand. Needed if we create cdata objects. */
89 static void lex_loadffi(lua_State
*L
)
91 ptrdiff_t oldtop
= savestack(L
, L
->top
);
93 L
->top
= restorestack(L
, oldtop
);
96 /* Parse 64 bit integer. */
97 static int lex_number64(LexState
*ls
, TValue
*tv
)
100 uint8_t *p
= (uint8_t *)ls
->sb
.buf
;
101 CTypeID id
= CTID_INT64
;
104 if (p
[0] == '0' && (p
[1] & ~0x20) == 'X') { /* Hexadecimal. */
106 if (!lj_char_isxdigit(*p
)) return 0;
108 n
= n
*16 + (*p
& 15);
109 if (!lj_char_isdigit(*p
)) n
+= 9;
111 } while (lj_char_isxdigit(*p
));
112 } else { /* Decimal. */
113 if (!lj_char_isdigit(*p
)) return 0;
115 n
= n
*10 + (*p
- '0');
117 } while (lj_char_isdigit(*p
));
119 for (;;) { /* Parse suffixes. */
120 if ((*p
& ~0x20) == 'U')
122 else if ((*p
& ~0x20) == 'L')
128 if (numl
!= 2 || *p
!= '\0') return 0;
129 /* Return cdata holding a 64 bit integer. */
130 cd
= lj_cdata_new_(ls
->L
, id
, 8);
131 *(uint64_t *)cdataptr(cd
) = n
;
132 lj_parse_keepcdata(ls
, tv
, cd
);
137 /* Parse a number literal. */
138 static void lex_number(LexState
*ls
, TValue
*tv
)
141 lua_assert(lj_char_isdigit(ls
->current
));
142 if ((c
= ls
->current
) == '0') {
144 if ((ls
->current
& ~0x20) == 'X') xp
= 'P';
146 while (lj_char_isident(ls
->current
) || ls
->current
== '.' ||
147 ((ls
->current
== '-' || ls
->current
== '+') && (c
& ~0x20) == xp
)) {
153 if ((c
== 'I' || c
== 'L' || c
== 'U') && !ctype_ctsG(G(ls
->L
)))
155 if (c
== 'I') /* Parse imaginary part of complex number. */
160 if ((c
== 'L' || c
== 'U') && lex_number64(ls
, tv
)) { /* Parse 64 bit int. */
164 if (lj_str_numconv(ls
->sb
.buf
, tv
)) {
166 if (c
== 'I') { /* Return cdata holding a complex number. */
167 GCcdata
*cd
= lj_cdata_new_(ls
->L
, CTID_COMPLEX_DOUBLE
, 2*sizeof(double));
168 ((double *)cdataptr(cd
))[0] = 0;
169 ((double *)cdataptr(cd
))[1] = numberVnum(tv
);
170 lj_parse_keepcdata(ls
, tv
, cd
);
173 if (LJ_DUALNUM
&& tvisnum(tv
)) {
174 int32_t k
= lj_num2int(numV(tv
));
175 if ((lua_Number
)k
== numV(tv
)) /* -0 cannot end up here. */
180 lj_lex_error(ls
, TK_number
, LJ_ERR_XNUMBER
);
183 static int skip_sep(LexState
*ls
)
187 lua_assert(s
== '[' || s
== ']');
189 while (ls
->current
== '=') {
193 return (ls
->current
== s
) ? count
: (-count
) - 1;
196 static void read_long_string(LexState
*ls
, TValue
*tv
, int sep
)
198 save_and_next(ls
); /* skip 2nd `[' */
199 if (currIsNewline(ls
)) /* string starts with a newline? */
200 inclinenumber(ls
); /* skip it */
202 switch (ls
->current
) {
204 lj_lex_error(ls
, TK_eof
, tv
? LJ_ERR_XLSTR
: LJ_ERR_XLCOM
);
207 if (skip_sep(ls
) == sep
) {
208 save_and_next(ls
); /* skip 2nd `]' */
216 if (!tv
) lj_str_resetbuf(&ls
->sb
); /* avoid wasting space */
219 if (tv
) save_and_next(ls
);
225 GCstr
*str
= lj_parse_keepstr(ls
, ls
->sb
.buf
+ (2 + (MSize
)sep
),
226 ls
->sb
.n
- 2*(2 + (MSize
)sep
));
227 setstrV(ls
->L
, tv
, str
);
231 static void read_string(LexState
*ls
, int delim
, TValue
*tv
)
234 while (ls
->current
!= delim
) {
235 switch (ls
->current
) {
237 lj_lex_error(ls
, TK_eof
, LJ_ERR_XSTR
);
241 lj_lex_error(ls
, TK_string
, LJ_ERR_XSTR
);
244 int c
= next(ls
); /* Skip the '\\'. */
246 case 'a': c
= '\a'; break;
247 case 'b': c
= '\b'; break;
248 case 'f': c
= '\f'; break;
249 case 'n': c
= '\n'; break;
250 case 'r': c
= '\r'; break;
251 case 't': c
= '\t'; break;
252 case 'v': c
= '\v'; break;
253 case 'x': /* Hexadecimal escape '\xXX'. */
254 c
= (next(ls
) & 15u) << 4;
255 if (!lj_char_isdigit(ls
->current
)) {
256 if (!lj_char_isxdigit(ls
->current
)) goto err_xesc
;
259 c
+= (next(ls
) & 15u);
260 if (!lj_char_isdigit(ls
->current
)) {
261 if (!lj_char_isxdigit(ls
->current
)) goto err_xesc
;
265 case 'z': /* Skip whitespace. */
267 while (lj_char_isspace(ls
->current
))
268 if (currIsNewline(ls
)) inclinenumber(ls
); else next(ls
);
270 case '\n': case '\r': save(ls
, '\n'); inclinenumber(ls
); continue;
271 case '\\': case '\"': case '\'': break;
272 case END_OF_STREAM
: continue;
274 if (!lj_char_isdigit(c
))
276 c
-= '0'; /* Decimal escape '\ddd'. */
277 if (lj_char_isdigit(next(ls
))) {
278 c
= c
*10 + (ls
->current
- '0');
279 if (lj_char_isdigit(next(ls
))) {
280 c
= c
*10 + (ls
->current
- '0');
283 lj_lex_error(ls
, TK_string
, LJ_ERR_XESC
);
300 save_and_next(ls
); /* skip delimiter */
301 setstrV(ls
->L
, tv
, lj_parse_keepstr(ls
, ls
->sb
.buf
+ 1, ls
->sb
.n
- 2));
304 /* -- Main lexical scanner ------------------------------------------------ */
306 static int llex(LexState
*ls
, TValue
*tv
)
308 lj_str_resetbuf(&ls
->sb
);
310 if (lj_char_isident(ls
->current
)) {
312 if (lj_char_isdigit(ls
->current
)) { /* Numeric literal. */
316 /* Identifier or reserved word. */
319 } while (lj_char_isident(ls
->current
));
320 s
= lj_parse_keepstr(ls
, ls
->sb
.buf
, ls
->sb
.n
);
321 if (s
->reserved
> 0) /* Reserved word? */
322 return TK_OFS
+ s
->reserved
;
323 setstrV(ls
->L
, tv
, s
);
326 switch (ls
->current
) {
339 if (ls
->current
!= '-') return '-';
340 /* else is a comment */
342 if (ls
->current
== '[') {
343 int sep
= skip_sep(ls
);
344 lj_str_resetbuf(&ls
->sb
); /* `skip_sep' may dirty the buffer */
346 read_long_string(ls
, NULL
, sep
); /* long comment */
347 lj_str_resetbuf(&ls
->sb
);
351 /* else short comment */
352 while (!currIsNewline(ls
) && ls
->current
!= END_OF_STREAM
)
356 int sep
= skip_sep(ls
);
358 read_long_string(ls
, tv
, sep
);
360 } else if (sep
== -1) {
363 lj_lex_error(ls
, TK_string
, LJ_ERR_XLDELIM
);
369 if (ls
->current
!= '=') return '='; else { next(ls
); return TK_eq
; }
372 if (ls
->current
!= '=') return '<'; else { next(ls
); return TK_le
; }
375 if (ls
->current
!= '=') return '>'; else { next(ls
); return TK_ge
; }
378 if (ls
->current
!= '=') return '~'; else { next(ls
); return TK_ne
; }
381 read_string(ls
, ls
->current
, tv
);
385 if (ls
->current
== '.') {
387 if (ls
->current
== '.') {
389 return TK_dots
; /* ... */
391 return TK_concat
; /* .. */
392 } else if (!lj_char_isdigit(ls
->current
)) {
403 return c
; /* Single-char tokens (+ - / ...). */
409 /* -- Lexer API ----------------------------------------------------------- */
411 /* Setup lexer state. */
412 int lj_lex_setup(lua_State
*L
, LexState
*ls
)
423 ls
->lookahead
= TK_eof
; /* No look-ahead token. */
426 lj_str_resizebuf(ls
->L
, &ls
->sb
, LJ_MIN_SBUF
);
427 next(ls
); /* Read-ahead first char. */
428 if (ls
->current
== 0xef && ls
->n
>= 2 && char2int(ls
->p
[0]) == 0xbb &&
429 char2int(ls
->p
[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
434 if (ls
->current
== '#') { /* Skip POSIX #! header line. */
437 if (ls
->current
== END_OF_STREAM
) return 0;
438 } while (!currIsNewline(ls
));
441 return (ls
->current
== LUA_SIGNATURE
[0]); /* Bytecode dump? */
444 /* Cleanup lexer state. */
445 void lj_lex_cleanup(lua_State
*L
, LexState
*ls
)
447 global_State
*g
= G(L
);
448 lj_mem_freevec(g
, ls
->bcstack
, ls
->sizebcstack
, BCInsLine
);
449 lj_mem_freevec(g
, ls
->vstack
, ls
->sizevstack
, VarInfo
);
450 lj_str_freebuf(g
, &ls
->sb
);
453 void lj_lex_next(LexState
*ls
)
455 ls
->lastline
= ls
->linenumber
;
456 if (LJ_LIKELY(ls
->lookahead
== TK_eof
)) { /* No lookahead token? */
457 ls
->token
= llex(ls
, &ls
->tokenval
); /* Get next token. */
458 } else { /* Otherwise return lookahead token. */
459 ls
->token
= ls
->lookahead
;
460 ls
->lookahead
= TK_eof
;
461 ls
->tokenval
= ls
->lookaheadval
;
465 LexToken
lj_lex_lookahead(LexState
*ls
)
467 lua_assert(ls
->lookahead
== TK_eof
);
468 ls
->lookahead
= llex(ls
, &ls
->lookaheadval
);
469 return ls
->lookahead
;
472 const char *lj_lex_token2str(LexState
*ls
, LexToken token
)
475 return tokennames
[token
-TK_OFS
-1];
476 else if (!lj_char_iscntrl(token
))
477 return lj_str_pushf(ls
->L
, "%c", token
);
479 return lj_str_pushf(ls
->L
, "char(%d)", token
);
482 void lj_lex_error(LexState
*ls
, LexToken token
, ErrMsg em
, ...)
488 } else if (token
== TK_name
|| token
== TK_string
|| token
== TK_number
) {
492 tok
= lj_lex_token2str(ls
, token
);
495 lj_err_lex(ls
->L
, ls
->chunkname
, tok
, ls
->linenumber
, em
, argp
);
499 void lj_lex_init(lua_State
*L
)
502 for (i
= 0; i
< TK_RESERVED
; i
++) {
503 GCstr
*s
= lj_str_newz(L
, tokennames
[i
]);
504 fixstring(s
); /* Reserved words are never collected. */
505 s
->reserved
= (uint8_t)(i
+1);