1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is Mozilla Communicator client code, released
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
43 * JS lexical scanner interface.
48 #include "jsversion.h"
54 #define JS_KEYWORD(keyword, type, op, version) \
55 extern const char js_##keyword##_str[];
56 #include "jskeyword.tbl"
62 TOK_ERROR
= -1, /* well-known as the only code < EOF */
63 TOK_EOF
= 0, /* end of file */
64 TOK_EOL
= 1, /* end of line */
65 TOK_SEMI
= 2, /* semicolon */
66 TOK_COMMA
= 3, /* comma operator */
67 TOK_ASSIGN
= 4, /* assignment ops (= += -= etc.) */
68 TOK_HOOK
= 5, TOK_COLON
= 6, /* conditional (?:) */
69 TOK_OR
= 7, /* logical or (||) */
70 TOK_AND
= 8, /* logical and (&&) */
71 TOK_BITOR
= 9, /* bitwise-or (|) */
72 TOK_BITXOR
= 10, /* bitwise-xor (^) */
73 TOK_BITAND
= 11, /* bitwise-and (&) */
74 TOK_EQOP
= 12, /* equality ops (== !=) */
75 TOK_RELOP
= 13, /* relational ops (< <= > >=) */
76 TOK_SHOP
= 14, /* shift ops (<< >> >>>) */
77 TOK_PLUS
= 15, /* plus */
78 TOK_MINUS
= 16, /* minus */
79 TOK_STAR
= 17, TOK_DIVOP
= 18, /* multiply/divide ops (* / %) */
80 TOK_UNARYOP
= 19, /* unary prefix operator */
81 TOK_INC
= 20, TOK_DEC
= 21, /* increment/decrement (++ --) */
82 TOK_DOT
= 22, /* member operator (.) */
83 TOK_LB
= 23, TOK_RB
= 24, /* left and right brackets */
84 TOK_LC
= 25, TOK_RC
= 26, /* left and right curlies (braces) */
85 TOK_LP
= 27, TOK_RP
= 28, /* left and right parentheses */
86 TOK_NAME
= 29, /* identifier */
87 TOK_NUMBER
= 30, /* numeric constant */
88 TOK_STRING
= 31, /* string constant */
89 TOK_REGEXP
= 32, /* RegExp constant */
90 TOK_PRIMARY
= 33, /* true, false, null, this, super */
91 TOK_FUNCTION
= 34, /* function keyword */
92 TOK_IF
= 35, /* if keyword */
93 TOK_ELSE
= 36, /* else keyword */
94 TOK_SWITCH
= 37, /* switch keyword */
95 TOK_CASE
= 38, /* case keyword */
96 TOK_DEFAULT
= 39, /* default keyword */
97 TOK_WHILE
= 40, /* while keyword */
98 TOK_DO
= 41, /* do keyword */
99 TOK_FOR
= 42, /* for keyword */
100 TOK_BREAK
= 43, /* break keyword */
101 TOK_CONTINUE
= 44, /* continue keyword */
102 TOK_IN
= 45, /* in keyword */
103 TOK_VAR
= 46, /* var keyword */
104 TOK_WITH
= 47, /* with keyword */
105 TOK_RETURN
= 48, /* return keyword */
106 TOK_NEW
= 49, /* new keyword */
107 TOK_DELETE
= 50, /* delete keyword */
108 TOK_DEFSHARP
= 51, /* #n= for object/array initializers */
109 TOK_USESHARP
= 52, /* #n# for object/array initializers */
110 TOK_TRY
= 53, /* try keyword */
111 TOK_CATCH
= 54, /* catch keyword */
112 TOK_FINALLY
= 55, /* finally keyword */
113 TOK_THROW
= 56, /* throw keyword */
114 TOK_INSTANCEOF
= 57, /* instanceof keyword */
115 TOK_DEBUGGER
= 58, /* debugger keyword */
116 TOK_XMLSTAGO
= 59, /* XML start tag open (<) */
117 TOK_XMLETAGO
= 60, /* XML end tag open (</) */
118 TOK_XMLPTAGC
= 61, /* XML point tag close (/>) */
119 TOK_XMLTAGC
= 62, /* XML start or end tag close (>) */
120 TOK_XMLNAME
= 63, /* XML start-tag non-final fragment */
121 TOK_XMLATTR
= 64, /* XML quoted attribute value */
122 TOK_XMLSPACE
= 65, /* XML whitespace */
123 TOK_XMLTEXT
= 66, /* XML text */
124 TOK_XMLCOMMENT
= 67, /* XML comment */
125 TOK_XMLCDATA
= 68, /* XML CDATA section */
126 TOK_XMLPI
= 69, /* XML processing instruction */
127 TOK_AT
= 70, /* XML attribute op (@) */
128 TOK_DBLCOLON
= 71, /* namespace qualified name op (::) */
129 TOK_ANYNAME
= 72, /* XML AnyName singleton (*) */
130 TOK_DBLDOT
= 73, /* XML descendant op (..) */
131 TOK_FILTER
= 74, /* XML filtering predicate op (.()) */
132 TOK_XMLELEM
= 75, /* XML element node type (no token) */
133 TOK_XMLLIST
= 76, /* XML list node type (no token) */
134 TOK_YIELD
= 77, /* yield from generator function */
135 TOK_ARRAYCOMP
= 78, /* array comprehension initialiser */
136 TOK_ARRAYPUSH
= 79, /* array push within comprehension */
137 TOK_LEXICALSCOPE
= 80, /* block scope AST node label */
138 TOK_LET
= 81, /* let keyword */
139 TOK_SEQ
= 82, /* synthetic sequence of statements,
141 TOK_FORHEAD
= 83, /* head of for(;;)-style loop */
142 TOK_ARGSBODY
= 84, /* formal args in list + body at end */
143 TOK_UPVARS
= 85, /* lexical dependencies as JSAtomList
144 of definitions paired with a parse
145 tree full of uses of those names */
146 TOK_RESERVED
, /* reserved keywords */
147 TOK_STRICT_RESERVED
, /* reserved keywords in strict mode */
148 TOK_LIMIT
/* domain size */
152 TokenKindIsXML(TokenKind tt
)
154 return tt
== TOK_AT
|| tt
== TOK_DBLCOLON
|| tt
== TOK_ANYNAME
;
158 TreeTypeIsXML(TokenKind tt
)
160 return tt
== TOK_XMLCOMMENT
|| tt
== TOK_XMLCDATA
|| tt
== TOK_XMLPI
||
161 tt
== TOK_XMLELEM
|| tt
== TOK_XMLLIST
;
165 TokenKindIsDecl(TokenKind tt
)
167 #if JS_HAS_BLOCK_SCOPE
168 return tt
== TOK_VAR
|| tt
== TOK_LET
;
170 return tt
== TOK_VAR
;
175 uint32 index
; /* index of char in physical line */
176 uint32 lineno
; /* physical line number */
178 bool operator==(const TokenPtr
& bptr
) {
179 return index
== bptr
.index
&& lineno
== bptr
.lineno
;
182 bool operator!=(const TokenPtr
& bptr
) {
183 return index
!= bptr
.index
|| lineno
!= bptr
.lineno
;
186 bool operator <(const TokenPtr
& bptr
) {
187 return lineno
< bptr
.lineno
||
188 (lineno
== bptr
.lineno
&& index
< bptr
.index
);
191 bool operator <=(const TokenPtr
& bptr
) {
192 return lineno
< bptr
.lineno
||
193 (lineno
== bptr
.lineno
&& index
<= bptr
.index
);
196 bool operator >(const TokenPtr
& bptr
) {
197 return !(*this <= bptr
);
200 bool operator >=(const TokenPtr
& bptr
) {
201 return !(*this < bptr
);
206 TokenPtr begin
; /* first character and line of token */
207 TokenPtr end
; /* index 1 past last char, last line */
209 bool operator==(const TokenPos
& bpos
) {
210 return begin
== bpos
.begin
&& end
== bpos
.end
;
213 bool operator!=(const TokenPos
& bpos
) {
214 return begin
!= bpos
.begin
|| end
!= bpos
.end
;
217 bool operator <(const TokenPos
& bpos
) {
218 return begin
< bpos
.begin
;
221 bool operator <=(const TokenPos
& bpos
) {
222 return begin
<= bpos
.begin
;
225 bool operator >(const TokenPos
& bpos
) {
226 return !(*this <= bpos
);
229 bool operator >=(const TokenPos
& bpos
) {
230 return !(*this < bpos
);
235 TokenKind type
; /* char value or above enumerator */
236 TokenPos pos
; /* token position in file */
237 jschar
*ptr
; /* beginning of token in line buffer */
239 struct { /* name or string literal */
240 JSOp op
; /* operator, for minimal parser */
241 JSAtom
*atom
; /* atom table entry */
243 uintN reflags
; /* regexp flags, use tokenbuf to access
245 struct { /* atom pair, for XML PIs */
246 JSAtom
*atom2
; /* auxiliary atom table entry */
247 JSAtom
*atom
; /* main atom table entry */
249 jsdouble dval
; /* floating point number */
253 enum TokenStreamFlags
255 TSF_ERROR
= 0x01, /* fatal error while compiling */
256 TSF_EOF
= 0x02, /* hit end of file */
257 TSF_NEWLINES
= 0x04, /* tokenize newlines */
258 TSF_OPERAND
= 0x08, /* looking for operand, not operator */
259 TSF_UNEXPECTED_EOF
= 0x10, /* unexpected end of input, i.e. TOK_EOF not at top-level. */
260 TSF_KEYWORD_IS_NAME
= 0x20, /* Ignore keywords and return TOK_NAME instead to the parser. */
261 TSF_STRICT_MODE_CODE
= 0x40,/* Tokenize as appropriate for strict mode code. */
262 TSF_DIRTYLINE
= 0x80, /* non-whitespace since start of line */
263 TSF_OWNFILENAME
= 0x100, /* ts->filename is malloc'd */
264 TSF_XMLTAGMODE
= 0x200, /* scanning within an XML tag in E4X */
265 TSF_XMLTEXTMODE
= 0x400, /* scanning XMLText terminal from E4X */
266 TSF_XMLONLYMODE
= 0x800, /* don't scan {expr} within text/tag */
267 TSF_OCTAL_CHAR
= 0x1000, /* observed a octal character escape */
270 * To handle the hard case of contiguous HTML comments, we want to clear the
271 * TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
272 * scan for --> within every //-style comment unless we have to. So we set
273 * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
274 * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
275 * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
277 * This still works as before given a malformed comment hiding hack such as:
280 * <!-- comment hiding hack #1
282 * // --> oops, markup for script-unaware browsers goes here!
285 * It does not cope with malformed comment hiding hacks where --> is hidden
286 * by C-style comments, or on a dirty line. Such cases are already broken.
288 TSF_IN_HTML_COMMENT
= 0x2000
292 #define t_reflags u.reflags
293 #define t_atom u.s.atom
294 #define t_atom2 u.p.atom2
295 #define t_dval u.dval
299 static const size_t ntokens
= 4; /* 1 current + 2 lookahead, rounded
300 to power of 2 to avoid divmod by 3 */
301 static const uintN ntokensMask
= ntokens
- 1;
304 typedef Vector
<jschar
, 32> CharBuffer
;
307 * To construct a TokenStream, first call the constructor, which is
308 * infallible, then call |init|, which can fail. To destroy a TokenStream,
309 * first call |close| then call the destructor. If |init| fails, do not call
312 * This class uses JSContext.tempPool to allocate internal buffers. The
313 * caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE
314 * after calling |close|.
316 TokenStream(JSContext
*);
319 * Create a new token stream from an input buffer.
320 * Return false on memory-allocation failure.
322 bool init(const jschar
*base
, size_t length
, const char *filename
, uintN lineno
,
328 JSContext
*getContext() const { return cx
; }
329 bool onCurrentLine(const TokenPos
&pos
) const { return lineno
== pos
.end
.lineno
; }
330 const Token
¤tToken() const { return tokens
[cursor
]; }
331 const CharBuffer
&getTokenbuf() const { return tokenbuf
; }
332 const char *getFilename() const { return filename
; }
333 uintN
getLineno() const { return lineno
; }
334 /* Note that the version and hasXML can get out of sync via setXML. */
335 JSVersion
versionNumber() const { return VersionNumber(version
); }
336 JSVersion
versionWithFlags() const { return version
; }
337 bool hasAnonFunFix() const { return VersionHasAnonFunFix(version
); }
338 bool hasXML() const { return xml
|| VersionShouldParseXML(versionNumber()); }
339 void setXML(bool enabled
) { xml
= enabled
; }
342 void setStrictMode(bool enabled
= true) { setFlag(enabled
, TSF_STRICT_MODE_CODE
); }
343 void setXMLTagMode(bool enabled
= true) { setFlag(enabled
, TSF_XMLTAGMODE
); }
344 void setXMLOnlyMode(bool enabled
= true) { setFlag(enabled
, TSF_XMLONLYMODE
); }
345 void setUnexpectedEOF(bool enabled
= true) { setFlag(enabled
, TSF_UNEXPECTED_EOF
); }
346 void setOctalCharacterEscape(bool enabled
= true) { setFlag(enabled
, TSF_OCTAL_CHAR
); }
348 bool isStrictMode() { return !!(flags
& TSF_STRICT_MODE_CODE
); }
349 bool isXMLTagMode() { return !!(flags
& TSF_XMLTAGMODE
); }
350 bool isXMLOnlyMode() { return !!(flags
& TSF_XMLONLYMODE
); }
351 bool isUnexpectedEOF() { return !!(flags
& TSF_UNEXPECTED_EOF
); }
352 bool isEOF() const { return !!(flags
& TSF_EOF
); }
353 bool isError() const { return !!(flags
& TSF_ERROR
); }
354 bool hasOctalCharacterEscape() const { return flags
& TSF_OCTAL_CHAR
; }
357 bool reportCompileErrorNumberVA(JSParseNode
*pn
, uintN flags
, uintN errorNumber
, va_list ap
);
358 void mungeCurrentToken(TokenKind newKind
) { tokens
[cursor
].type
= newKind
; }
359 void mungeCurrentToken(JSOp newOp
) { tokens
[cursor
].t_op
= newOp
; }
360 void mungeCurrentToken(TokenKind newKind
, JSOp newOp
) {
361 mungeCurrentToken(newKind
);
362 mungeCurrentToken(newOp
);
366 static JSAtom
*atomize(JSContext
*cx
, CharBuffer
&cb
);
369 * Enables flags in the associated tokenstream for the object lifetime.
370 * Useful for lexically-scoped flag toggles.
373 TokenStream
* const parent
;
376 Flagger(TokenStream
*parent
, uintN withFlags
) : parent(parent
), flags(withFlags
) {
377 parent
->flags
|= flags
;
380 ~Flagger() { parent
->flags
&= ~flags
; }
382 friend class Flagger
;
384 void setFlag(bool enabled
, TokenStreamFlags flag
) {
393 * Get the next token from the stream, make it the current token, and
396 TokenKind
getToken() {
397 /* Check for a pushed-back token resulting from mismatching lookahead. */
398 while (lookahead
!= 0) {
399 JS_ASSERT(!(flags
& TSF_XMLTEXTMODE
));
401 cursor
= (cursor
+ 1) & ntokensMask
;
402 TokenKind tt
= currentToken().type
;
403 JS_ASSERT(!(flags
& TSF_NEWLINES
));
408 /* If there was a fatal error, keep returning TOK_ERROR. */
409 if (flags
& TSF_ERROR
)
412 return getTokenInternal();
415 /* Similar, but also sets flags. */
416 TokenKind
getToken(uintN withFlags
) {
417 Flagger
flagger(this, withFlags
);
422 * Push the last scanned token back into the stream.
425 JS_ASSERT(lookahead
< ntokensMask
);
427 cursor
= (cursor
- 1) & ntokensMask
;
430 TokenKind
peekToken(uintN withFlags
= 0) {
431 Flagger
flagger(this, withFlags
);
432 if (lookahead
!= 0) {
433 JS_ASSERT(lookahead
== 1);
434 return tokens
[(cursor
+ lookahead
) & ntokensMask
].type
;
436 TokenKind tt
= getToken();
441 TokenKind
peekTokenSameLine(uintN withFlags
= 0) {
442 Flagger
flagger(this, withFlags
);
443 if (!onCurrentLine(currentToken().pos
))
445 TokenKind tt
= peekToken(TSF_NEWLINES
);
450 * Get the next token from the stream if its kind is |tt|.
452 JSBool
matchToken(TokenKind tt
, uintN withFlags
= 0) {
453 Flagger
flagger(this, withFlags
);
454 if (getToken() == tt
)
461 typedef struct TokenBuf
{
462 jschar
*base
; /* base of line or stream buffer */
463 jschar
*limit
; /* limit for quick bounds check */
464 jschar
*ptr
; /* next char to get, or slot to use */
467 TokenKind
getTokenInternal(); /* doesn't check for pushback or error flag. */
470 int32
getCharIgnoreEOL();
471 void ungetChar(int32 c
);
472 void ungetCharIgnoreEOL(int32 c
);
473 Token
*newToken(ptrdiff_t adjust
);
474 bool peekUnicodeEscape(int32
*c
);
475 bool matchUnicodeEscapeIdStart(int32
*c
);
476 bool matchUnicodeEscapeIdent(int32
*c
);
477 JSBool
peekChars(intN n
, jschar
*cp
);
478 JSBool
getXMLEntity();
481 JSBool
matchChar(int32 expect
) {
495 void skipChars(intN n
) {
500 JSContext
* const cx
;
501 Token tokens
[ntokens
];/* circular token buffer */
502 uintN cursor
; /* index of last parsed token */
503 uintN lookahead
; /* count of lookahead tokens */
504 uintN lineno
; /* current line number */
505 uintN flags
; /* flags -- see above */
506 jschar
*linebase
; /* start of current line; points into userbuf */
507 jschar
*prevLinebase
; /* start of previous line; NULL if on the first line */
508 TokenBuf userbuf
; /* user input buffer */
509 const char *filename
; /* input filename or null */
510 JSSourceHandler listener
; /* callback for source; eg debugger */
511 void *listenerData
; /* listener 'this' data */
512 void *listenerTSData
;/* listener data for this TokenStream */
513 CharBuffer tokenbuf
; /* current token string buffer */
514 bool maybeEOL
[256]; /* probabilistic EOL lookup table */
515 bool maybeStrSpecial
[256];/* speeds up string scanning */
516 JSVersion version
; /* (i.e. to identify keywords) */
517 bool xml
; /* see JSOPTION_XML */
522 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
523 #define LINE_SEPARATOR 0x2028
524 #define PARA_SEPARATOR 0x2029
527 js_CloseTokenStream(JSContext
*cx
, js::TokenStream
*ts
);
529 extern JS_FRIEND_API(int)
530 js_fgets(char *buf
, int size
, FILE *file
);
533 * If the given char array forms JavaScript keyword, return corresponding
534 * token. Otherwise return TOK_EOF.
537 js_CheckKeyword(const jschar
*chars
, size_t length
);
540 * Friend-exported API entry point to call a mapping function on each reserved
541 * identifier in the scanner's keyword table.
543 typedef void (*JSMapKeywordFun
)(const char *);
546 * Check that str forms a valid JS identifier name. The function does not
547 * check if str is a JS keyword.
550 js_IsIdentifier(JSLinearString
*str
);
553 * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
554 * message have const jschar* type, not const char*.
556 #define JSREPORT_UC 0x100
561 * Report a compile-time error by its number. Return true for a warning, false
562 * for an error. When pn is not null, use it to report error's location.
563 * Otherwise use ts, which must not be null.
566 ReportCompileErrorNumber(JSContext
*cx
, TokenStream
*ts
, JSParseNode
*pn
, uintN flags
,
567 uintN errorNumber
, ...);
570 * Report a condition that should elicit a warning with JSOPTION_STRICT,
571 * or an error if ts or tc is handling strict mode code. This function
572 * defers to ReportCompileErrorNumber to do the real work. Either tc
573 * or ts may be NULL, if there is no tree context or token stream state
574 * whose strictness should affect the report.
576 * One could have ReportCompileErrorNumber recognize the
577 * JSREPORT_STRICT_MODE_ERROR flag instead of having a separate function
578 * like this one. However, the strict mode code flag we need to test is
579 * in the JSTreeContext structure for that code; we would have to change
580 * the ~120 ReportCompileErrorNumber calls to pass the additional
581 * argument, even though many of those sites would never use it. Using
582 * ts's TSF_STRICT_MODE_CODE flag instead of tc's would be brittle: at some
583 * points ts's flags don't correspond to those of the tc relevant to the
587 ReportStrictModeError(JSContext
*cx
, TokenStream
*ts
, JSTreeContext
*tc
, JSParseNode
*pn
,
588 uintN errorNumber
, ...);
592 #endif /* jsscan_h___ */