Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / js / src / jsscan.h
blobb65219e7d947b76589fcdd30aa75098b26a42f92
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is Mozilla Communicator client code, released
17 * March 31, 1998.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
24 * Contributor(s):
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsscan_h___
41 #define jsscan_h___
43 * JS lexical scanner interface.
45 #include <stddef.h>
46 #include <stdio.h>
47 #include <stdarg.h>
48 #include "jsversion.h"
49 #include "jsopcode.h"
50 #include "jsprvtd.h"
51 #include "jspubtd.h"
52 #include "jsvector.h"
54 #define JS_KEYWORD(keyword, type, op, version) \
55 extern const char js_##keyword##_str[];
56 #include "jskeyword.tbl"
57 #undef JS_KEYWORD
59 namespace js {
61 enum TokenKind {
62 TOK_ERROR = -1, /* well-known as the only code < EOF */
63 TOK_EOF = 0, /* end of file */
64 TOK_EOL = 1, /* end of line */
65 TOK_SEMI = 2, /* semicolon */
66 TOK_COMMA = 3, /* comma operator */
67 TOK_ASSIGN = 4, /* assignment ops (= += -= etc.) */
68 TOK_HOOK = 5, TOK_COLON = 6, /* conditional (?:) */
69 TOK_OR = 7, /* logical or (||) */
70 TOK_AND = 8, /* logical and (&&) */
71 TOK_BITOR = 9, /* bitwise-or (|) */
72 TOK_BITXOR = 10, /* bitwise-xor (^) */
73 TOK_BITAND = 11, /* bitwise-and (&) */
74 TOK_EQOP = 12, /* equality ops (== !=) */
75 TOK_RELOP = 13, /* relational ops (< <= > >=) */
76 TOK_SHOP = 14, /* shift ops (<< >> >>>) */
77 TOK_PLUS = 15, /* plus */
78 TOK_MINUS = 16, /* minus */
79 TOK_STAR = 17, TOK_DIVOP = 18, /* multiply/divide ops (* / %) */
80 TOK_UNARYOP = 19, /* unary prefix operator */
81 TOK_INC = 20, TOK_DEC = 21, /* increment/decrement (++ --) */
82 TOK_DOT = 22, /* member operator (.) */
83 TOK_LB = 23, TOK_RB = 24, /* left and right brackets */
84 TOK_LC = 25, TOK_RC = 26, /* left and right curlies (braces) */
85 TOK_LP = 27, TOK_RP = 28, /* left and right parentheses */
86 TOK_NAME = 29, /* identifier */
87 TOK_NUMBER = 30, /* numeric constant */
88 TOK_STRING = 31, /* string constant */
89 TOK_REGEXP = 32, /* RegExp constant */
90 TOK_PRIMARY = 33, /* true, false, null, this, super */
91 TOK_FUNCTION = 34, /* function keyword */
92 TOK_IF = 35, /* if keyword */
93 TOK_ELSE = 36, /* else keyword */
94 TOK_SWITCH = 37, /* switch keyword */
95 TOK_CASE = 38, /* case keyword */
96 TOK_DEFAULT = 39, /* default keyword */
97 TOK_WHILE = 40, /* while keyword */
98 TOK_DO = 41, /* do keyword */
99 TOK_FOR = 42, /* for keyword */
100 TOK_BREAK = 43, /* break keyword */
101 TOK_CONTINUE = 44, /* continue keyword */
102 TOK_IN = 45, /* in keyword */
103 TOK_VAR = 46, /* var keyword */
104 TOK_WITH = 47, /* with keyword */
105 TOK_RETURN = 48, /* return keyword */
106 TOK_NEW = 49, /* new keyword */
107 TOK_DELETE = 50, /* delete keyword */
108 TOK_DEFSHARP = 51, /* #n= for object/array initializers */
109 TOK_USESHARP = 52, /* #n# for object/array initializers */
110 TOK_TRY = 53, /* try keyword */
111 TOK_CATCH = 54, /* catch keyword */
112 TOK_FINALLY = 55, /* finally keyword */
113 TOK_THROW = 56, /* throw keyword */
114 TOK_INSTANCEOF = 57, /* instanceof keyword */
115 TOK_DEBUGGER = 58, /* debugger keyword */
116 TOK_XMLSTAGO = 59, /* XML start tag open (<) */
117 TOK_XMLETAGO = 60, /* XML end tag open (</) */
118 TOK_XMLPTAGC = 61, /* XML point tag close (/>) */
119 TOK_XMLTAGC = 62, /* XML start or end tag close (>) */
120 TOK_XMLNAME = 63, /* XML start-tag non-final fragment */
121 TOK_XMLATTR = 64, /* XML quoted attribute value */
122 TOK_XMLSPACE = 65, /* XML whitespace */
123 TOK_XMLTEXT = 66, /* XML text */
124 TOK_XMLCOMMENT = 67, /* XML comment */
125 TOK_XMLCDATA = 68, /* XML CDATA section */
126 TOK_XMLPI = 69, /* XML processing instruction */
127 TOK_AT = 70, /* XML attribute op (@) */
128 TOK_DBLCOLON = 71, /* namespace qualified name op (::) */
129 TOK_ANYNAME = 72, /* XML AnyName singleton (*) */
130 TOK_DBLDOT = 73, /* XML descendant op (..) */
131 TOK_FILTER = 74, /* XML filtering predicate op (.()) */
132 TOK_XMLELEM = 75, /* XML element node type (no token) */
133 TOK_XMLLIST = 76, /* XML list node type (no token) */
134 TOK_YIELD = 77, /* yield from generator function */
135 TOK_ARRAYCOMP = 78, /* array comprehension initialiser */
136 TOK_ARRAYPUSH = 79, /* array push within comprehension */
137 TOK_LEXICALSCOPE = 80, /* block scope AST node label */
138 TOK_LET = 81, /* let keyword */
139 TOK_SEQ = 82, /* synthetic sequence of statements,
140 not a block */
141 TOK_FORHEAD = 83, /* head of for(;;)-style loop */
142 TOK_ARGSBODY = 84, /* formal args in list + body at end */
143 TOK_UPVARS = 85, /* lexical dependencies as JSAtomList
144 of definitions paired with a parse
145 tree full of uses of those names */
146 TOK_RESERVED, /* reserved keywords */
147 TOK_STRICT_RESERVED, /* reserved keywords in strict mode */
148 TOK_LIMIT /* domain size */
151 static inline bool
152 TokenKindIsXML(TokenKind tt)
154 return tt == TOK_AT || tt == TOK_DBLCOLON || tt == TOK_ANYNAME;
157 static inline bool
158 TreeTypeIsXML(TokenKind tt)
160 return tt == TOK_XMLCOMMENT || tt == TOK_XMLCDATA || tt == TOK_XMLPI ||
161 tt == TOK_XMLELEM || tt == TOK_XMLLIST;
164 static inline bool
165 TokenKindIsDecl(TokenKind tt)
167 #if JS_HAS_BLOCK_SCOPE
168 return tt == TOK_VAR || tt == TOK_LET;
169 #else
170 return tt == TOK_VAR;
171 #endif
174 struct TokenPtr {
175 uint32 index; /* index of char in physical line */
176 uint32 lineno; /* physical line number */
178 bool operator==(const TokenPtr& bptr) {
179 return index == bptr.index && lineno == bptr.lineno;
182 bool operator!=(const TokenPtr& bptr) {
183 return index != bptr.index || lineno != bptr.lineno;
186 bool operator <(const TokenPtr& bptr) {
187 return lineno < bptr.lineno ||
188 (lineno == bptr.lineno && index < bptr.index);
191 bool operator <=(const TokenPtr& bptr) {
192 return lineno < bptr.lineno ||
193 (lineno == bptr.lineno && index <= bptr.index);
196 bool operator >(const TokenPtr& bptr) {
197 return !(*this <= bptr);
200 bool operator >=(const TokenPtr& bptr) {
201 return !(*this < bptr);
205 struct TokenPos {
206 TokenPtr begin; /* first character and line of token */
207 TokenPtr end; /* index 1 past last char, last line */
209 bool operator==(const TokenPos& bpos) {
210 return begin == bpos.begin && end == bpos.end;
213 bool operator!=(const TokenPos& bpos) {
214 return begin != bpos.begin || end != bpos.end;
217 bool operator <(const TokenPos& bpos) {
218 return begin < bpos.begin;
221 bool operator <=(const TokenPos& bpos) {
222 return begin <= bpos.begin;
225 bool operator >(const TokenPos& bpos) {
226 return !(*this <= bpos);
229 bool operator >=(const TokenPos& bpos) {
230 return !(*this < bpos);
234 struct Token {
235 TokenKind type; /* char value or above enumerator */
236 TokenPos pos; /* token position in file */
237 jschar *ptr; /* beginning of token in line buffer */
238 union {
239 struct { /* name or string literal */
240 JSOp op; /* operator, for minimal parser */
241 JSAtom *atom; /* atom table entry */
242 } s;
243 uintN reflags; /* regexp flags, use tokenbuf to access
244 regexp chars */
245 struct { /* atom pair, for XML PIs */
246 JSAtom *atom2; /* auxiliary atom table entry */
247 JSAtom *atom; /* main atom table entry */
248 } p;
249 jsdouble dval; /* floating point number */
250 } u;
253 enum TokenStreamFlags
255 TSF_ERROR = 0x01, /* fatal error while compiling */
256 TSF_EOF = 0x02, /* hit end of file */
257 TSF_NEWLINES = 0x04, /* tokenize newlines */
258 TSF_OPERAND = 0x08, /* looking for operand, not operator */
259 TSF_UNEXPECTED_EOF = 0x10, /* unexpected end of input, i.e. TOK_EOF not at top-level. */
260 TSF_KEYWORD_IS_NAME = 0x20, /* Ignore keywords and return TOK_NAME instead to the parser. */
261 TSF_STRICT_MODE_CODE = 0x40,/* Tokenize as appropriate for strict mode code. */
262 TSF_DIRTYLINE = 0x80, /* non-whitespace since start of line */
263 TSF_OWNFILENAME = 0x100, /* ts->filename is malloc'd */
264 TSF_XMLTAGMODE = 0x200, /* scanning within an XML tag in E4X */
265 TSF_XMLTEXTMODE = 0x400, /* scanning XMLText terminal from E4X */
266 TSF_XMLONLYMODE = 0x800, /* don't scan {expr} within text/tag */
267 TSF_OCTAL_CHAR = 0x1000, /* observed a octal character escape */
270 * To handle the hard case of contiguous HTML comments, we want to clear the
271 * TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
272 * scan for --> within every //-style comment unless we have to. So we set
273 * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
274 * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
275 * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
277 * This still works as before given a malformed comment hiding hack such as:
279 * <script>
280 * <!-- comment hiding hack #1
281 * code goes here
282 * // --> oops, markup for script-unaware browsers goes here!
283 * </script>
285 * It does not cope with malformed comment hiding hacks where --> is hidden
286 * by C-style comments, or on a dirty line. Such cases are already broken.
288 TSF_IN_HTML_COMMENT = 0x2000
291 #define t_op u.s.op
292 #define t_reflags u.reflags
293 #define t_atom u.s.atom
294 #define t_atom2 u.p.atom2
295 #define t_dval u.dval
297 class TokenStream
299 static const size_t ntokens = 4; /* 1 current + 2 lookahead, rounded
300 to power of 2 to avoid divmod by 3 */
301 static const uintN ntokensMask = ntokens - 1;
303 public:
304 typedef Vector<jschar, 32> CharBuffer;
307 * To construct a TokenStream, first call the constructor, which is
308 * infallible, then call |init|, which can fail. To destroy a TokenStream,
309 * first call |close| then call the destructor. If |init| fails, do not call
310 * |close|.
312 * This class uses JSContext.tempPool to allocate internal buffers. The
313 * caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE
314 * after calling |close|.
316 TokenStream(JSContext *);
319 * Create a new token stream from an input buffer.
320 * Return false on memory-allocation failure.
322 bool init(const jschar *base, size_t length, const char *filename, uintN lineno,
323 JSVersion version);
324 void close();
325 ~TokenStream() {}
327 /* Accessors. */
328 JSContext *getContext() const { return cx; }
329 bool onCurrentLine(const TokenPos &pos) const { return lineno == pos.end.lineno; }
330 const Token &currentToken() const { return tokens[cursor]; }
331 const CharBuffer &getTokenbuf() const { return tokenbuf; }
332 const char *getFilename() const { return filename; }
333 uintN getLineno() const { return lineno; }
334 /* Note that the version and hasXML can get out of sync via setXML. */
335 JSVersion versionNumber() const { return VersionNumber(version); }
336 JSVersion versionWithFlags() const { return version; }
337 bool hasAnonFunFix() const { return VersionHasAnonFunFix(version); }
338 bool hasXML() const { return xml || VersionShouldParseXML(versionNumber()); }
339 void setXML(bool enabled) { xml = enabled; }
341 /* Flag methods. */
342 void setStrictMode(bool enabled = true) { setFlag(enabled, TSF_STRICT_MODE_CODE); }
343 void setXMLTagMode(bool enabled = true) { setFlag(enabled, TSF_XMLTAGMODE); }
344 void setXMLOnlyMode(bool enabled = true) { setFlag(enabled, TSF_XMLONLYMODE); }
345 void setUnexpectedEOF(bool enabled = true) { setFlag(enabled, TSF_UNEXPECTED_EOF); }
346 void setOctalCharacterEscape(bool enabled = true) { setFlag(enabled, TSF_OCTAL_CHAR); }
348 bool isStrictMode() { return !!(flags & TSF_STRICT_MODE_CODE); }
349 bool isXMLTagMode() { return !!(flags & TSF_XMLTAGMODE); }
350 bool isXMLOnlyMode() { return !!(flags & TSF_XMLONLYMODE); }
351 bool isUnexpectedEOF() { return !!(flags & TSF_UNEXPECTED_EOF); }
352 bool isEOF() const { return !!(flags & TSF_EOF); }
353 bool isError() const { return !!(flags & TSF_ERROR); }
354 bool hasOctalCharacterEscape() const { return flags & TSF_OCTAL_CHAR; }
356 /* Mutators. */
357 bool reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber, va_list ap);
358 void mungeCurrentToken(TokenKind newKind) { tokens[cursor].type = newKind; }
359 void mungeCurrentToken(JSOp newOp) { tokens[cursor].t_op = newOp; }
360 void mungeCurrentToken(TokenKind newKind, JSOp newOp) {
361 mungeCurrentToken(newKind);
362 mungeCurrentToken(newOp);
365 private:
366 static JSAtom *atomize(JSContext *cx, CharBuffer &cb);
369 * Enables flags in the associated tokenstream for the object lifetime.
370 * Useful for lexically-scoped flag toggles.
372 class Flagger {
373 TokenStream * const parent;
374 uintN flags;
375 public:
376 Flagger(TokenStream *parent, uintN withFlags) : parent(parent), flags(withFlags) {
377 parent->flags |= flags;
380 ~Flagger() { parent->flags &= ~flags; }
382 friend class Flagger;
384 void setFlag(bool enabled, TokenStreamFlags flag) {
385 if (enabled)
386 flags |= flag;
387 else
388 flags &= ~flag;
391 public:
393 * Get the next token from the stream, make it the current token, and
394 * return its kind.
396 TokenKind getToken() {
397 /* Check for a pushed-back token resulting from mismatching lookahead. */
398 while (lookahead != 0) {
399 JS_ASSERT(!(flags & TSF_XMLTEXTMODE));
400 lookahead--;
401 cursor = (cursor + 1) & ntokensMask;
402 TokenKind tt = currentToken().type;
403 JS_ASSERT(!(flags & TSF_NEWLINES));
404 if (tt != TOK_EOL)
405 return tt;
408 /* If there was a fatal error, keep returning TOK_ERROR. */
409 if (flags & TSF_ERROR)
410 return TOK_ERROR;
412 return getTokenInternal();
415 /* Similar, but also sets flags. */
416 TokenKind getToken(uintN withFlags) {
417 Flagger flagger(this, withFlags);
418 return getToken();
422 * Push the last scanned token back into the stream.
424 void ungetToken() {
425 JS_ASSERT(lookahead < ntokensMask);
426 lookahead++;
427 cursor = (cursor - 1) & ntokensMask;
430 TokenKind peekToken(uintN withFlags = 0) {
431 Flagger flagger(this, withFlags);
432 if (lookahead != 0) {
433 JS_ASSERT(lookahead == 1);
434 return tokens[(cursor + lookahead) & ntokensMask].type;
436 TokenKind tt = getToken();
437 ungetToken();
438 return tt;
441 TokenKind peekTokenSameLine(uintN withFlags = 0) {
442 Flagger flagger(this, withFlags);
443 if (!onCurrentLine(currentToken().pos))
444 return TOK_EOL;
445 TokenKind tt = peekToken(TSF_NEWLINES);
446 return tt;
450 * Get the next token from the stream if its kind is |tt|.
452 JSBool matchToken(TokenKind tt, uintN withFlags = 0) {
453 Flagger flagger(this, withFlags);
454 if (getToken() == tt)
455 return JS_TRUE;
456 ungetToken();
457 return JS_FALSE;
460 private:
461 typedef struct TokenBuf {
462 jschar *base; /* base of line or stream buffer */
463 jschar *limit; /* limit for quick bounds check */
464 jschar *ptr; /* next char to get, or slot to use */
465 } TokenBuf;
467 TokenKind getTokenInternal(); /* doesn't check for pushback or error flag. */
469 int32 getChar();
470 int32 getCharIgnoreEOL();
471 void ungetChar(int32 c);
472 void ungetCharIgnoreEOL(int32 c);
473 Token *newToken(ptrdiff_t adjust);
474 bool peekUnicodeEscape(int32 *c);
475 bool matchUnicodeEscapeIdStart(int32 *c);
476 bool matchUnicodeEscapeIdent(int32 *c);
477 JSBool peekChars(intN n, jschar *cp);
478 JSBool getXMLEntity();
479 jschar *findEOL();
481 JSBool matchChar(int32 expect) {
482 int32 c = getChar();
483 if (c == expect)
484 return JS_TRUE;
485 ungetChar(c);
486 return JS_FALSE;
489 int32 peekChar() {
490 int32 c = getChar();
491 ungetChar(c);
492 return c;
495 void skipChars(intN n) {
496 while (--n >= 0)
497 getChar();
500 JSContext * const cx;
501 Token tokens[ntokens];/* circular token buffer */
502 uintN cursor; /* index of last parsed token */
503 uintN lookahead; /* count of lookahead tokens */
504 uintN lineno; /* current line number */
505 uintN flags; /* flags -- see above */
506 jschar *linebase; /* start of current line; points into userbuf */
507 jschar *prevLinebase; /* start of previous line; NULL if on the first line */
508 TokenBuf userbuf; /* user input buffer */
509 const char *filename; /* input filename or null */
510 JSSourceHandler listener; /* callback for source; eg debugger */
511 void *listenerData; /* listener 'this' data */
512 void *listenerTSData;/* listener data for this TokenStream */
513 CharBuffer tokenbuf; /* current token string buffer */
514 bool maybeEOL[256]; /* probabilistic EOL lookup table */
515 bool maybeStrSpecial[256];/* speeds up string scanning */
516 JSVersion version; /* (i.e. to identify keywords) */
517 bool xml; /* see JSOPTION_XML */
520 } /* namespace js */
522 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
523 #define LINE_SEPARATOR 0x2028
524 #define PARA_SEPARATOR 0x2029
526 extern void
527 js_CloseTokenStream(JSContext *cx, js::TokenStream *ts);
529 extern JS_FRIEND_API(int)
530 js_fgets(char *buf, int size, FILE *file);
532 namespace js {
534 struct KeywordInfo {
535 const char *chars; /* C string with keyword text */
536 TokenKind tokentype;
537 JSOp op; /* JSOp */
538 JSVersion version; /* JSVersion */
542 * Returns a KeywordInfo for the specified characters, or NULL if the string is
543 * not a keyword.
545 extern const KeywordInfo *
546 FindKeyword(const jschar *s, size_t length);
548 } // namespace js
551 * Friend-exported API entry point to call a mapping function on each reserved
552 * identifier in the scanner's keyword table.
554 typedef void (*JSMapKeywordFun)(const char *);
557 * Check that str forms a valid JS identifier name. The function does not
558 * check if str is a JS keyword.
560 extern JSBool
561 js_IsIdentifier(JSLinearString *str);
564 * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
565 * message have const jschar* type, not const char*.
567 #define JSREPORT_UC 0x100
569 namespace js {
572 * Report a compile-time error by its number. Return true for a warning, false
573 * for an error. When pn is not null, use it to report error's location.
574 * Otherwise use ts, which must not be null.
576 bool
577 ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn, uintN flags,
578 uintN errorNumber, ...);
581 * Report a condition that should elicit a warning with JSOPTION_STRICT,
582 * or an error if ts or tc is handling strict mode code. This function
583 * defers to ReportCompileErrorNumber to do the real work. Either tc
584 * or ts may be NULL, if there is no tree context or token stream state
585 * whose strictness should affect the report.
587 * One could have ReportCompileErrorNumber recognize the
588 * JSREPORT_STRICT_MODE_ERROR flag instead of having a separate function
589 * like this one. However, the strict mode code flag we need to test is
590 * in the JSTreeContext structure for that code; we would have to change
591 * the ~120 ReportCompileErrorNumber calls to pass the additional
592 * argument, even though many of those sites would never use it. Using
593 * ts's TSF_STRICT_MODE_CODE flag instead of tc's would be brittle: at some
594 * points ts's flags don't correspond to those of the tc relevant to the
595 * error.
597 bool
598 ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
599 uintN errorNumber, ...);
601 } /* namespace js */
603 #endif /* jsscan_h___ */