3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
5 ** Major portions taken verbatim or adapted from the Lua interpreter.
6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
26 #include "lj_strscan.h"
28 /* Lua lexer token names. */
29 static const char *const tokennames
[] = {
30 #define TKSTR1(name) #name,
31 #define TKSTR2(name, sym) #sym,
38 /* -- Buffer handling ----------------------------------------------------- */
40 #define char2int(c) ((int)(uint8_t)(c))
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43 #define save_and_next(ls) (save(ls, ls->current), next(ls))
44 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45 #define END_OF_STREAM (-1)
47 static int fillbuf(LexState
*ls
)
50 const char *buf
= ls
->rfunc(ls
->L
, ls
->rdata
, &sz
);
51 if (buf
== NULL
|| sz
== 0) return END_OF_STREAM
;
52 ls
->n
= (MSize
)sz
- 1;
54 return char2int(*(ls
->p
++));
57 static LJ_NOINLINE
void save_grow(LexState
*ls
, int c
)
60 if (ls
->sb
.sz
>= LJ_MAX_STR
/2)
61 lj_lex_error(ls
, 0, LJ_ERR_XELEM
);
62 newsize
= ls
->sb
.sz
* 2;
63 lj_str_resizebuf(ls
->L
, &ls
->sb
, newsize
);
64 ls
->sb
.buf
[ls
->sb
.n
++] = (char)c
;
67 static LJ_AINLINE
void save(LexState
*ls
, int c
)
69 if (LJ_UNLIKELY(ls
->sb
.n
+ 1 > ls
->sb
.sz
))
72 ls
->sb
.buf
[ls
->sb
.n
++] = (char)c
;
75 static void inclinenumber(LexState
*ls
)
77 int old
= ls
->current
;
78 lua_assert(currIsNewline(ls
));
79 next(ls
); /* skip `\n' or `\r' */
80 if (currIsNewline(ls
) && ls
->current
!= old
)
81 next(ls
); /* skip `\n\r' or `\r\n' */
82 if (++ls
->linenumber
>= LJ_MAX_LINE
)
83 lj_lex_error(ls
, ls
->token
, LJ_ERR_XLINES
);
86 /* -- Scanner for terminals ----------------------------------------------- */
88 /* Parse a number literal. */
89 static void lex_number(LexState
*ls
, TValue
*tv
)
93 lua_assert(lj_char_isdigit(ls
->current
));
94 if ((c
= ls
->current
) == '0') {
96 if ((ls
->current
| 0x20) == 'x') xp
= 'p';
98 while (lj_char_isident(ls
->current
) || ls
->current
== '.' ||
99 ((ls
->current
== '-' || ls
->current
== '+') && (c
| 0x20) == xp
)) {
104 fmt
= lj_strscan_scan((const uint8_t *)ls
->sb
.buf
, tv
,
105 (LJ_DUALNUM
? STRSCAN_OPT_TOINT
: STRSCAN_OPT_TONUM
) |
106 (LJ_HASFFI
? (STRSCAN_OPT_LL
|STRSCAN_OPT_IMAG
) : 0));
107 if (LJ_DUALNUM
&& fmt
== STRSCAN_INT
) {
108 setitype(tv
, LJ_TISNUM
);
109 } else if (fmt
== STRSCAN_NUM
) {
110 /* Already in correct format. */
112 } else if (fmt
!= STRSCAN_ERROR
) {
113 lua_State
*L
= ls
->L
;
115 lua_assert(fmt
== STRSCAN_I64
|| fmt
== STRSCAN_U64
|| fmt
== STRSCAN_IMAG
);
116 if (!ctype_ctsG(G(L
))) {
117 ptrdiff_t oldtop
= savestack(L
, L
->top
);
118 luaopen_ffi(L
); /* Load FFI library on-demand. */
119 L
->top
= restorestack(L
, oldtop
);
121 if (fmt
== STRSCAN_IMAG
) {
122 cd
= lj_cdata_new_(L
, CTID_COMPLEX_DOUBLE
, 2*sizeof(double));
123 ((double *)cdataptr(cd
))[0] = 0;
124 ((double *)cdataptr(cd
))[1] = numV(tv
);
126 cd
= lj_cdata_new_(L
, fmt
==STRSCAN_I64
? CTID_INT64
: CTID_UINT64
, 8);
127 *(uint64_t *)cdataptr(cd
) = tv
->u64
;
129 lj_parse_keepcdata(ls
, tv
, cd
);
132 lua_assert(fmt
== STRSCAN_ERROR
);
133 lj_lex_error(ls
, TK_number
, LJ_ERR_XNUMBER
);
137 static int skip_sep(LexState
*ls
)
141 lua_assert(s
== '[' || s
== ']');
143 while (ls
->current
== '=') {
147 return (ls
->current
== s
) ? count
: (-count
) - 1;
150 static void read_long_string(LexState
*ls
, TValue
*tv
, int sep
)
152 save_and_next(ls
); /* skip 2nd `[' */
153 if (currIsNewline(ls
)) /* string starts with a newline? */
154 inclinenumber(ls
); /* skip it */
156 switch (ls
->current
) {
158 lj_lex_error(ls
, TK_eof
, tv
? LJ_ERR_XLSTR
: LJ_ERR_XLCOM
);
161 if (skip_sep(ls
) == sep
) {
162 save_and_next(ls
); /* skip 2nd `]' */
170 if (!tv
) lj_str_resetbuf(&ls
->sb
); /* avoid wasting space */
173 if (tv
) save_and_next(ls
);
179 GCstr
*str
= lj_parse_keepstr(ls
, ls
->sb
.buf
+ (2 + (MSize
)sep
),
180 ls
->sb
.n
- 2*(2 + (MSize
)sep
));
181 setstrV(ls
->L
, tv
, str
);
185 static void read_string(LexState
*ls
, int delim
, TValue
*tv
)
188 while (ls
->current
!= delim
) {
189 switch (ls
->current
) {
191 lj_lex_error(ls
, TK_eof
, LJ_ERR_XSTR
);
195 lj_lex_error(ls
, TK_string
, LJ_ERR_XSTR
);
198 int c
= next(ls
); /* Skip the '\\'. */
200 case 'a': c
= '\a'; break;
201 case 'b': c
= '\b'; break;
202 case 'f': c
= '\f'; break;
203 case 'n': c
= '\n'; break;
204 case 'r': c
= '\r'; break;
205 case 't': c
= '\t'; break;
206 case 'v': c
= '\v'; break;
207 case 'x': /* Hexadecimal escape '\xXX'. */
208 c
= (next(ls
) & 15u) << 4;
209 if (!lj_char_isdigit(ls
->current
)) {
210 if (!lj_char_isxdigit(ls
->current
)) goto err_xesc
;
213 c
+= (next(ls
) & 15u);
214 if (!lj_char_isdigit(ls
->current
)) {
215 if (!lj_char_isxdigit(ls
->current
)) goto err_xesc
;
219 case 'z': /* Skip whitespace. */
221 while (lj_char_isspace(ls
->current
))
222 if (currIsNewline(ls
)) inclinenumber(ls
); else next(ls
);
224 case '\n': case '\r': save(ls
, '\n'); inclinenumber(ls
); continue;
225 case '\\': case '\"': case '\'': break;
226 case END_OF_STREAM
: continue;
228 if (!lj_char_isdigit(c
))
230 c
-= '0'; /* Decimal escape '\ddd'. */
231 if (lj_char_isdigit(next(ls
))) {
232 c
= c
*10 + (ls
->current
- '0');
233 if (lj_char_isdigit(next(ls
))) {
234 c
= c
*10 + (ls
->current
- '0');
237 lj_lex_error(ls
, TK_string
, LJ_ERR_XESC
);
254 save_and_next(ls
); /* skip delimiter */
255 setstrV(ls
->L
, tv
, lj_parse_keepstr(ls
, ls
->sb
.buf
+ 1, ls
->sb
.n
- 2));
258 /* -- Main lexical scanner ------------------------------------------------ */
260 static int llex(LexState
*ls
, TValue
*tv
)
262 lj_str_resetbuf(&ls
->sb
);
264 if (lj_char_isident(ls
->current
)) {
266 if (lj_char_isdigit(ls
->current
)) { /* Numeric literal. */
270 /* Identifier or reserved word. */
273 } while (lj_char_isident(ls
->current
));
274 s
= lj_parse_keepstr(ls
, ls
->sb
.buf
, ls
->sb
.n
);
275 setstrV(ls
->L
, tv
, s
);
276 if (s
->reserved
> 0) /* Reserved word? */
277 return TK_OFS
+ s
->reserved
;
280 switch (ls
->current
) {
293 if (ls
->current
!= '-') return '-';
294 /* else is a comment */
296 if (ls
->current
== '[') {
297 int sep
= skip_sep(ls
);
298 lj_str_resetbuf(&ls
->sb
); /* `skip_sep' may dirty the buffer */
300 read_long_string(ls
, NULL
, sep
); /* long comment */
301 lj_str_resetbuf(&ls
->sb
);
305 /* else short comment */
306 while (!currIsNewline(ls
) && ls
->current
!= END_OF_STREAM
)
310 int sep
= skip_sep(ls
);
312 read_long_string(ls
, tv
, sep
);
314 } else if (sep
== -1) {
317 lj_lex_error(ls
, TK_string
, LJ_ERR_XLDELIM
);
323 if (ls
->current
!= '=') return '='; else { next(ls
); return TK_eq
; }
326 if (ls
->current
!= '=') return '<'; else { next(ls
); return TK_le
; }
329 if (ls
->current
!= '=') return '>'; else { next(ls
); return TK_ge
; }
332 if (ls
->current
!= '=') return '~'; else { next(ls
); return TK_ne
; }
335 if (ls
->current
!= ':') return ':'; else { next(ls
); return TK_label
; }
338 read_string(ls
, ls
->current
, tv
);
342 if (ls
->current
== '.') {
344 if (ls
->current
== '.') {
346 return TK_dots
; /* ... */
348 return TK_concat
; /* .. */
349 } else if (!lj_char_isdigit(ls
->current
)) {
360 return c
; /* Single-char tokens (+ - / ...). */
366 /* -- Lexer API ----------------------------------------------------------- */
368 /* Setup lexer state. */
369 int lj_lex_setup(lua_State
*L
, LexState
*ls
)
381 ls
->lookahead
= TK_eof
; /* No look-ahead token. */
384 lj_str_resizebuf(ls
->L
, &ls
->sb
, LJ_MIN_SBUF
);
385 next(ls
); /* Read-ahead first char. */
386 if (ls
->current
== 0xef && ls
->n
>= 2 && char2int(ls
->p
[0]) == 0xbb &&
387 char2int(ls
->p
[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
393 if (ls
->current
== '#') { /* Skip POSIX #! header line. */
396 if (ls
->current
== END_OF_STREAM
) return 0;
397 } while (!currIsNewline(ls
));
401 if (ls
->current
== LUA_SIGNATURE
[0]) { /* Bytecode dump. */
404 ** Loading bytecode with an extra header is disabled for security
405 ** reasons. This may circumvent the usual check for bytecode vs.
406 ** Lua code by looking at the first char. Since this is a potential
407 ** security violation no attempt is made to echo the chunkname either.
409 setstrV(L
, L
->top
++, lj_err_str(L
, LJ_ERR_BCBAD
));
410 lj_err_throw(L
, LUA_ERRSYNTAX
);
417 /* Cleanup lexer state. */
418 void lj_lex_cleanup(lua_State
*L
, LexState
*ls
)
420 global_State
*g
= G(L
);
421 lj_mem_freevec(g
, ls
->bcstack
, ls
->sizebcstack
, BCInsLine
);
422 lj_mem_freevec(g
, ls
->vstack
, ls
->sizevstack
, VarInfo
);
423 lj_str_freebuf(g
, &ls
->sb
);
426 void lj_lex_next(LexState
*ls
)
428 ls
->lastline
= ls
->linenumber
;
429 if (LJ_LIKELY(ls
->lookahead
== TK_eof
)) { /* No lookahead token? */
430 ls
->token
= llex(ls
, &ls
->tokenval
); /* Get next token. */
431 } else { /* Otherwise return lookahead token. */
432 ls
->token
= ls
->lookahead
;
433 ls
->lookahead
= TK_eof
;
434 ls
->tokenval
= ls
->lookaheadval
;
438 LexToken
lj_lex_lookahead(LexState
*ls
)
440 lua_assert(ls
->lookahead
== TK_eof
);
441 ls
->lookahead
= llex(ls
, &ls
->lookaheadval
);
442 return ls
->lookahead
;
445 const char *lj_lex_token2str(LexState
*ls
, LexToken token
)
448 return tokennames
[token
-TK_OFS
-1];
449 else if (!lj_char_iscntrl(token
))
450 return lj_str_pushf(ls
->L
, "%c", token
);
452 return lj_str_pushf(ls
->L
, "char(%d)", token
);
455 void lj_lex_error(LexState
*ls
, LexToken token
, ErrMsg em
, ...)
461 } else if (token
== TK_name
|| token
== TK_string
|| token
== TK_number
) {
465 tok
= lj_lex_token2str(ls
, token
);
468 lj_err_lex(ls
->L
, ls
->chunkname
, tok
, ls
->linenumber
, em
, argp
);
472 void lj_lex_init(lua_State
*L
)
475 for (i
= 0; i
< TK_RESERVED
; i
++) {
476 GCstr
*s
= lj_str_newz(L
, tokennames
[i
]);
477 fixstring(s
); /* Reserved words are never collected. */
478 s
->reserved
= (uint8_t)(i
+1);