Bug 564076: Small parser cleanup changes. (r=mrbkap)
[mozilla-central.git] / js / src / jsscan.h
blob04c7984742c462ad9e158fb0ab49e4fd795f4231
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is Mozilla Communicator client code, released
17 * March 31, 1998.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
24 * Contributor(s):
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsscan_h___
41 #define jsscan_h___
43 * JS lexical scanner interface.
45 #include <stddef.h>
46 #include <stdio.h>
47 #include <stdarg.h>
48 #include "jsversion.h"
49 #include "jsopcode.h"
50 #include "jsprvtd.h"
51 #include "jspubtd.h"
52 #include "jsvector.h"
54 #define JS_KEYWORD(keyword, type, op, version) \
55 extern const char js_##keyword##_str[];
56 #include "jskeyword.tbl"
57 #undef JS_KEYWORD
59 namespace js {
61 enum TokenKind {
62 TOK_ERROR = -1, /* well-known as the only code < EOF */
63 TOK_EOF = 0, /* end of file */
64 TOK_EOL = 1, /* end of line */
65 TOK_SEMI = 2, /* semicolon */
66 TOK_COMMA = 3, /* comma operator */
67 TOK_ASSIGN = 4, /* assignment ops (= += -= etc.) */
68 TOK_HOOK = 5, TOK_COLON = 6, /* conditional (?:) */
69 TOK_OR = 7, /* logical or (||) */
70 TOK_AND = 8, /* logical and (&&) */
71 TOK_BITOR = 9, /* bitwise-or (|) */
72 TOK_BITXOR = 10, /* bitwise-xor (^) */
73 TOK_BITAND = 11, /* bitwise-and (&) */
74 TOK_EQOP = 12, /* equality ops (== !=) */
75 TOK_RELOP = 13, /* relational ops (< <= > >=) */
76 TOK_SHOP = 14, /* shift ops (<< >> >>>) */
77 TOK_PLUS = 15, /* plus */
78 TOK_MINUS = 16, /* minus */
79 TOK_STAR = 17, TOK_DIVOP = 18, /* multiply/divide ops (* / %) */
80 TOK_UNARYOP = 19, /* unary prefix operator */
81 TOK_INC = 20, TOK_DEC = 21, /* increment/decrement (++ --) */
82 TOK_DOT = 22, /* member operator (.) */
83 TOK_LB = 23, TOK_RB = 24, /* left and right brackets */
84 TOK_LC = 25, TOK_RC = 26, /* left and right curlies (braces) */
85 TOK_LP = 27, TOK_RP = 28, /* left and right parentheses */
86 TOK_NAME = 29, /* identifier */
87 TOK_NUMBER = 30, /* numeric constant */
88 TOK_STRING = 31, /* string constant */
89 TOK_REGEXP = 32, /* RegExp constant */
90 TOK_PRIMARY = 33, /* true, false, null, this, super */
91 TOK_FUNCTION = 34, /* function keyword */
92 TOK_IF = 35, /* if keyword */
93 TOK_ELSE = 36, /* else keyword */
94 TOK_SWITCH = 37, /* switch keyword */
95 TOK_CASE = 38, /* case keyword */
96 TOK_DEFAULT = 39, /* default keyword */
97 TOK_WHILE = 40, /* while keyword */
98 TOK_DO = 41, /* do keyword */
99 TOK_FOR = 42, /* for keyword */
100 TOK_BREAK = 43, /* break keyword */
101 TOK_CONTINUE = 44, /* continue keyword */
102 TOK_IN = 45, /* in keyword */
103 TOK_VAR = 46, /* var keyword */
104 TOK_WITH = 47, /* with keyword */
105 TOK_RETURN = 48, /* return keyword */
106 TOK_NEW = 49, /* new keyword */
107 TOK_DELETE = 50, /* delete keyword */
108 TOK_DEFSHARP = 51, /* #n= for object/array initializers */
109 TOK_USESHARP = 52, /* #n# for object/array initializers */
110 TOK_TRY = 53, /* try keyword */
111 TOK_CATCH = 54, /* catch keyword */
112 TOK_FINALLY = 55, /* finally keyword */
113 TOK_THROW = 56, /* throw keyword */
114 TOK_INSTANCEOF = 57, /* instanceof keyword */
115 TOK_DEBUGGER = 58, /* debugger keyword */
116 TOK_XMLSTAGO = 59, /* XML start tag open (<) */
117 TOK_XMLETAGO = 60, /* XML end tag open (</) */
118 TOK_XMLPTAGC = 61, /* XML point tag close (/>) */
119 TOK_XMLTAGC = 62, /* XML start or end tag close (>) */
120 TOK_XMLNAME = 63, /* XML start-tag non-final fragment */
121 TOK_XMLATTR = 64, /* XML quoted attribute value */
122 TOK_XMLSPACE = 65, /* XML whitespace */
123 TOK_XMLTEXT = 66, /* XML text */
124 TOK_XMLCOMMENT = 67, /* XML comment */
125 TOK_XMLCDATA = 68, /* XML CDATA section */
126 TOK_XMLPI = 69, /* XML processing instruction */
127 TOK_AT = 70, /* XML attribute op (@) */
128 TOK_DBLCOLON = 71, /* namespace qualified name op (::) */
129 TOK_ANYNAME = 72, /* XML AnyName singleton (*) */
130 TOK_DBLDOT = 73, /* XML descendant op (..) */
131 TOK_FILTER = 74, /* XML filtering predicate op (.()) */
132 TOK_XMLELEM = 75, /* XML element node type (no token) */
133 TOK_XMLLIST = 76, /* XML list node type (no token) */
134 TOK_YIELD = 77, /* yield from generator function */
135 TOK_ARRAYCOMP = 78, /* array comprehension initialiser */
136 TOK_ARRAYPUSH = 79, /* array push within comprehension */
137 TOK_LEXICALSCOPE = 80, /* block scope AST node label */
138 TOK_LET = 81, /* let keyword */
139 TOK_SEQ = 82, /* synthetic sequence of statements,
140 not a block */
141 TOK_FORHEAD = 83, /* head of for(;;)-style loop */
142 TOK_ARGSBODY = 84, /* formal args in list + body at end */
143 TOK_UPVARS = 85, /* lexical dependencies as JSAtomList
144 of definitions paired with a parse
145 tree full of uses of those names */
146 TOK_RESERVED, /* reserved keywords */
147 TOK_LIMIT /* domain size */
150 static inline bool
151 TokenKindIsXML(TokenKind tt)
153 return tt == TOK_AT || tt == TOK_DBLCOLON || tt == TOK_ANYNAME;
156 static inline bool
157 TreeTypeIsXML(TokenKind tt)
159 return tt == TOK_XMLCOMMENT || tt == TOK_XMLCDATA || tt == TOK_XMLPI ||
160 tt == TOK_XMLELEM || tt == TOK_XMLLIST;
163 static inline bool
164 TokenKindIsDecl(TokenKind tt)
166 #if JS_HAS_BLOCK_SCOPE
167 return tt == TOK_VAR || tt == TOK_LET;
168 #else
169 return tt == TOK_VAR;
170 #endif
173 struct TokenPtr {
174 uint32 index; /* index of char in physical line */
175 uint32 lineno; /* physical line number */
177 bool operator==(const TokenPtr& bptr) {
178 return index == bptr.index && lineno == bptr.lineno;
181 bool operator!=(const TokenPtr& bptr) {
182 return index != bptr.index || lineno != bptr.lineno;
185 bool operator <(const TokenPtr& bptr) {
186 return lineno < bptr.lineno ||
187 (lineno == bptr.lineno && index < bptr.index);
190 bool operator <=(const TokenPtr& bptr) {
191 return lineno < bptr.lineno ||
192 (lineno == bptr.lineno && index <= bptr.index);
195 bool operator >(const TokenPtr& bptr) {
196 return !(*this <= bptr);
199 bool operator >=(const TokenPtr& bptr) {
200 return !(*this < bptr);
204 struct TokenPos {
205 TokenPtr begin; /* first character and line of token */
206 TokenPtr end; /* index 1 past last char, last line */
208 bool operator==(const TokenPos& bpos) {
209 return begin == bpos.begin && end == bpos.end;
212 bool operator!=(const TokenPos& bpos) {
213 return begin != bpos.begin || end != bpos.end;
216 bool operator <(const TokenPos& bpos) {
217 return begin < bpos.begin;
220 bool operator <=(const TokenPos& bpos) {
221 return begin <= bpos.begin;
224 bool operator >(const TokenPos& bpos) {
225 return !(*this <= bpos);
228 bool operator >=(const TokenPos& bpos) {
229 return !(*this < bpos);
233 struct Token {
234 TokenKind type; /* char value or above enumerator */
235 TokenPos pos; /* token position in file */
236 jschar *ptr; /* beginning of token in line buffer */
237 union {
238 struct { /* name or string literal */
239 JSOp op; /* operator, for minimal parser */
240 JSAtom *atom; /* atom table entry */
241 } s;
242 uintN reflags; /* regexp flags, use tokenbuf to access
243 regexp chars */
244 struct { /* atom pair, for XML PIs */
245 JSAtom *atom2; /* auxiliary atom table entry */
246 JSAtom *atom; /* main atom table entry */
247 } p;
248 jsdouble dval; /* floating point number */
249 } u;
252 enum TokenStreamFlags
254 TSF_ERROR = 0x01, /* fatal error while compiling */
255 TSF_EOF = 0x02, /* hit end of file */
256 TSF_NEWLINES = 0x04, /* tokenize newlines */
257 TSF_OPERAND = 0x08, /* looking for operand, not operator */
258 TSF_NLFLAG = 0x20, /* last linebuf ended with \n */
259 TSF_CRFLAG = 0x40, /* linebuf would have ended with \r */
260 TSF_DIRTYLINE = 0x80, /* non-whitespace since start of line */
261 TSF_OWNFILENAME = 0x100, /* ts->filename is malloc'd */
262 TSF_XMLTAGMODE = 0x200, /* scanning within an XML tag in E4X */
263 TSF_XMLTEXTMODE = 0x400, /* scanning XMLText terminal from E4X */
264 TSF_XMLONLYMODE = 0x800, /* don't scan {expr} within text/tag */
266 /* Flag indicating unexpected end of input, i.e. TOK_EOF not at top-level. */
267 TSF_UNEXPECTED_EOF = 0x1000,
270 * To handle the hard case of contiguous HTML comments, we want to clear the
271 * TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
272 * scan for --> within every //-style comment unless we have to. So we set
273 * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
274 * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
275 * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
277 * This still works as before given a malformed comment hiding hack such as:
279 * <script>
280 * <!-- comment hiding hack #1
281 * code goes here
282 * // --> oops, markup for script-unaware browsers goes here!
283 * </script>
285 * It does not cope with malformed comment hiding hacks where --> is hidden
286 * by C-style comments, or on a dirty line. Such cases are already broken.
288 TSF_IN_HTML_COMMENT = 0x2000,
290 /* Ignore keywords and return TOK_NAME instead to the parser. */
291 TSF_KEYWORD_IS_NAME = 0x4000,
293 /* Tokenize as appropriate for strict mode code. */
294 TSF_STRICT_MODE_CODE = 0x8000
297 #define t_op u.s.op
298 #define t_reflags u.reflags
299 #define t_atom u.s.atom
300 #define t_atom2 u.p.atom2
301 #define t_dval u.dval
303 const size_t LINE_LIMIT = 256; /* logical line buffer size limit
304 -- physical line length is unlimited */
306 class TokenStream
308 static const size_t ntokens = 4; /* 1 current + 2 lookahead, rounded
309 to power of 2 to avoid divmod by 3 */
310 static const uintN ntokensMask = ntokens - 1;
312 public:
314 * To construct a TokenStream, first call the constructor, which is
315 * infallible, then call |init|, which can fail. To destroy a TokenStream,
316 * first call |close| then call the destructor. If |init| fails, do not call
317 * |close|.
319 * This class uses JSContext.tempPool to allocate internal buffers. The
320 * caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE
321 * after calling |close|.
323 TokenStream(JSContext *);
326 * Create a new token stream, either from an input buffer or from a file.
327 * Return false on file-open or memory-allocation failure.
329 bool init(const jschar *base, size_t length, FILE *fp, const char *filename, uintN lineno);
330 void close();
331 ~TokenStream() {}
333 /* Accessors. */
334 JSContext *getContext() const { return cx; }
335 bool onCurrentLine(const TokenPos &pos) const { return lineno == pos.end.lineno; }
336 const Token &currentToken() const { return tokens[cursor]; }
337 const JSCharBuffer &getTokenbuf() const { return tokenbuf; }
338 const char *getFilename() const { return filename; }
339 uintN getLineno() const { return lineno; }
341 /* Flag methods. */
342 void setStrictMode(bool enabled = true) { setFlag(enabled, TSF_STRICT_MODE_CODE); }
343 void setXMLTagMode(bool enabled = true) { setFlag(enabled, TSF_XMLTAGMODE); }
344 void setXMLOnlyMode(bool enabled = true) { setFlag(enabled, TSF_XMLONLYMODE); }
345 void setUnexpectedEOF(bool enabled = true) { setFlag(enabled, TSF_UNEXPECTED_EOF); }
346 bool isStrictMode() { return !!(flags & TSF_STRICT_MODE_CODE); }
347 bool isXMLTagMode() { return !!(flags & TSF_XMLTAGMODE); }
348 bool isXMLOnlyMode() { return !!(flags & TSF_XMLONLYMODE); }
349 bool isUnexpectedEOF() { return !!(flags & TSF_UNEXPECTED_EOF); }
350 bool isEOF() const { return !!(flags & TSF_EOF); }
351 bool isError() const { return !!(flags & TSF_ERROR); }
353 /* Mutators. */
354 bool reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber, va_list ap);
355 void mungeCurrentToken(TokenKind newKind) { tokens[cursor].type = newKind; }
356 void mungeCurrentToken(JSOp newOp) { tokens[cursor].t_op = newOp; }
357 void mungeCurrentToken(TokenKind newKind, JSOp newOp) {
358 mungeCurrentToken(newKind);
359 mungeCurrentToken(newOp);
362 private:
364 * Enables flags in the associated tokenstream for the object lifetime.
365 * Useful for lexically-scoped flag toggles.
367 class Flagger {
368 TokenStream * const parent;
369 uintN flags;
370 public:
371 Flagger(TokenStream *parent, uintN withFlags) : parent(parent), flags(withFlags) {
372 parent->flags |= flags;
375 ~Flagger() { parent->flags &= ~flags; }
377 friend class Flagger;
379 void setFlag(bool enabled, TokenStreamFlags flag) {
380 if (enabled)
381 flags |= flag;
382 else
383 flags &= ~flag;
386 public:
388 * Get the next token from the stream, make it the current token, and
389 * return its kind.
391 TokenKind getToken(uintN withFlags = 0) {
392 Flagger flagger(this, withFlags);
393 /* Check for a pushed-back token resulting from mismatching lookahead. */
394 while (lookahead != 0) {
395 JS_ASSERT(!(flags & TSF_XMLTEXTMODE));
396 lookahead--;
397 cursor = (cursor + 1) & ntokensMask;
398 TokenKind tt = currentToken().type;
399 JS_ASSERT(!(flags & TSF_NEWLINES));
400 if (tt != TOK_EOL)
401 return tt;
404 /* If there was a fatal error, keep returning TOK_ERROR. */
405 if (flags & TSF_ERROR)
406 return TOK_ERROR;
408 return getTokenInternal();
412 * Push the last scanned token back into the stream.
414 void ungetToken() {
415 JS_ASSERT(lookahead < ntokensMask);
416 lookahead++;
417 cursor = (cursor - 1) & ntokensMask;
420 TokenKind peekToken(uintN withFlags = 0) {
421 Flagger flagger(this, withFlags);
422 if (lookahead != 0) {
423 JS_ASSERT(lookahead == 1);
424 return tokens[(cursor + lookahead) & ntokensMask].type;
426 TokenKind tt = getToken();
427 ungetToken();
428 return tt;
431 TokenKind peekTokenSameLine(uintN withFlags = 0) {
432 Flagger flagger(this, withFlags);
433 if (!onCurrentLine(currentToken().pos))
434 return TOK_EOL;
435 TokenKind tt = peekToken(TSF_NEWLINES);
436 return tt;
440 * Get the next token from the stream if its kind is |tt|.
442 JSBool matchToken(TokenKind tt, uintN withFlags = 0) {
443 Flagger flagger(this, withFlags);
444 if (getToken() == tt)
445 return JS_TRUE;
446 ungetToken();
447 return JS_FALSE;
450 private:
451 typedef struct TokenBuf {
452 jschar *base; /* base of line or stream buffer */
453 jschar *limit; /* limit for quick bounds check */
454 jschar *ptr; /* next char to get, or slot to use */
455 } TokenBuf;
457 TokenKind getTokenInternal(); /* doesn't check for pushback or error flag. */
458 int32 getChar();
459 void ungetChar(int32 c);
460 Token *newToken(ptrdiff_t adjust);
461 int32 getUnicodeEscape();
462 JSBool peekChars(intN n, jschar *cp);
463 JSBool getXMLEntity();
465 JSBool matchChar(int32 expect) {
466 int32 c = getChar();
467 if (c == expect)
468 return JS_TRUE;
469 ungetChar(c);
470 return JS_FALSE;
473 int32 peekChar() {
474 int32 c = getChar();
475 ungetChar(c);
476 return c;
479 void skipChars(intN n) {
480 while (--n >= 0)
481 getChar();
484 JSContext * const cx;
485 Token tokens[ntokens];/* circular token buffer */
486 uintN cursor; /* index of last parsed token */
487 uintN lookahead; /* count of lookahead tokens */
488 uintN lineno; /* current line number */
489 uintN ungetpos; /* next free char slot in ungetbuf */
490 jschar ungetbuf[6]; /* at most 6, for \uXXXX lookahead */
491 uintN flags; /* flags -- see above */
492 uint32 linelen; /* physical linebuf segment length */
493 uint32 linepos; /* linebuf offset in physical line */
494 TokenBuf linebuf; /* line buffer for diagnostics */
495 TokenBuf userbuf; /* user input buffer if !file */
496 const char *filename; /* input filename or null */
497 FILE *file; /* stdio stream if reading from file */
498 JSSourceHandler listener; /* callback for source; eg debugger */
499 void *listenerData; /* listener 'this' data */
500 void *listenerTSData;/* listener data for this TokenStream */
501 jschar *saveEOL; /* save next end of line in userbuf, to
502 optimize for very long lines */
503 JSCharBuffer tokenbuf; /* current token string buffer */
506 } /* namespace js */
508 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
509 #define LINE_SEPARATOR 0x2028
510 #define PARA_SEPARATOR 0x2029
512 extern void
513 js_CloseTokenStream(JSContext *cx, js::TokenStream *ts);
515 extern JS_FRIEND_API(int)
516 js_fgets(char *buf, int size, FILE *file);
519 * If the given char array forms JavaScript keyword, return corresponding
520 * token. Otherwise return TOK_EOF.
522 extern js::TokenKind
523 js_CheckKeyword(const jschar *chars, size_t length);
526 * Friend-exported API entry point to call a mapping function on each reserved
527 * identifier in the scanner's keyword table.
529 typedef void (*JSMapKeywordFun)(const char *);
532 * Check that str forms a valid JS identifier name. The function does not
533 * check if str is a JS keyword.
535 extern JSBool
536 js_IsIdentifier(JSString *str);
539 * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
540 * message have const jschar* type, not const char*.
542 #define JSREPORT_UC 0x100
544 namespace js {
547 * Report a compile-time error by its number. Return true for a warning, false
548 * for an error. When pn is not null, use it to report error's location.
549 * Otherwise use ts, which must not be null.
551 bool
552 ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn, uintN flags,
553 uintN errorNumber, ...);
556 * Report a condition that should elicit a warning with JSOPTION_STRICT,
557 * or an error if ts or tc is handling strict mode code. This function
558 * defers to ReportCompileErrorNumber to do the real work. Either tc
559 * or ts may be NULL, if there is no tree context or token stream state
560 * whose strictness should affect the report.
562 * One could have ReportCompileErrorNumber recognize the
563 * JSREPORT_STRICT_MODE_ERROR flag instead of having a separate function
564 * like this one. However, the strict mode code flag we need to test is
565 * in the JSTreeContext structure for that code; we would have to change
566 * the ~120 ReportCompileErrorNumber calls to pass the additional
567 * argument, even though many of those sites would never use it. Using
568 * ts's TSF_STRICT_MODE_CODE flag instead of tc's would be brittle: at some
569 * points ts's flags don't correspond to those of the tc relevant to the
570 * error.
572 bool
573 ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
574 uintN errorNumber, ...);
576 } /* namespace js */
578 #endif /* jsscan_h___ */