1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is Mozilla Communicator client code, released
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
43 * JS lexical scanner interface.
48 #include "jsversion.h"
54 #define JS_KEYWORD(keyword, type, op, version) \
55 extern const char js_##keyword##_str[];
56 #include "jskeyword.tbl"
62 TOK_ERROR
= -1, /* well-known as the only code < EOF */
63 TOK_EOF
= 0, /* end of file */
64 TOK_EOL
= 1, /* end of line */
65 TOK_SEMI
= 2, /* semicolon */
66 TOK_COMMA
= 3, /* comma operator */
67 TOK_ASSIGN
= 4, /* assignment ops (= += -= etc.) */
68 TOK_HOOK
= 5, TOK_COLON
= 6, /* conditional (?:) */
69 TOK_OR
= 7, /* logical or (||) */
70 TOK_AND
= 8, /* logical and (&&) */
71 TOK_BITOR
= 9, /* bitwise-or (|) */
72 TOK_BITXOR
= 10, /* bitwise-xor (^) */
73 TOK_BITAND
= 11, /* bitwise-and (&) */
74 TOK_EQOP
= 12, /* equality ops (== !=) */
75 TOK_RELOP
= 13, /* relational ops (< <= > >=) */
76 TOK_SHOP
= 14, /* shift ops (<< >> >>>) */
77 TOK_PLUS
= 15, /* plus */
78 TOK_MINUS
= 16, /* minus */
79 TOK_STAR
= 17, TOK_DIVOP
= 18, /* multiply/divide ops (* / %) */
80 TOK_UNARYOP
= 19, /* unary prefix operator */
81 TOK_INC
= 20, TOK_DEC
= 21, /* increment/decrement (++ --) */
82 TOK_DOT
= 22, /* member operator (.) */
83 TOK_LB
= 23, TOK_RB
= 24, /* left and right brackets */
84 TOK_LC
= 25, TOK_RC
= 26, /* left and right curlies (braces) */
85 TOK_LP
= 27, TOK_RP
= 28, /* left and right parentheses */
86 TOK_NAME
= 29, /* identifier */
87 TOK_NUMBER
= 30, /* numeric constant */
88 TOK_STRING
= 31, /* string constant */
89 TOK_REGEXP
= 32, /* RegExp constant */
90 TOK_PRIMARY
= 33, /* true, false, null, this, super */
91 TOK_FUNCTION
= 34, /* function keyword */
92 TOK_IF
= 35, /* if keyword */
93 TOK_ELSE
= 36, /* else keyword */
94 TOK_SWITCH
= 37, /* switch keyword */
95 TOK_CASE
= 38, /* case keyword */
96 TOK_DEFAULT
= 39, /* default keyword */
97 TOK_WHILE
= 40, /* while keyword */
98 TOK_DO
= 41, /* do keyword */
99 TOK_FOR
= 42, /* for keyword */
100 TOK_BREAK
= 43, /* break keyword */
101 TOK_CONTINUE
= 44, /* continue keyword */
102 TOK_IN
= 45, /* in keyword */
103 TOK_VAR
= 46, /* var keyword */
104 TOK_WITH
= 47, /* with keyword */
105 TOK_RETURN
= 48, /* return keyword */
106 TOK_NEW
= 49, /* new keyword */
107 TOK_DELETE
= 50, /* delete keyword */
108 TOK_DEFSHARP
= 51, /* #n= for object/array initializers */
109 TOK_USESHARP
= 52, /* #n# for object/array initializers */
110 TOK_TRY
= 53, /* try keyword */
111 TOK_CATCH
= 54, /* catch keyword */
112 TOK_FINALLY
= 55, /* finally keyword */
113 TOK_THROW
= 56, /* throw keyword */
114 TOK_INSTANCEOF
= 57, /* instanceof keyword */
115 TOK_DEBUGGER
= 58, /* debugger keyword */
116 TOK_XMLSTAGO
= 59, /* XML start tag open (<) */
117 TOK_XMLETAGO
= 60, /* XML end tag open (</) */
118 TOK_XMLPTAGC
= 61, /* XML point tag close (/>) */
119 TOK_XMLTAGC
= 62, /* XML start or end tag close (>) */
120 TOK_XMLNAME
= 63, /* XML start-tag non-final fragment */
121 TOK_XMLATTR
= 64, /* XML quoted attribute value */
122 TOK_XMLSPACE
= 65, /* XML whitespace */
123 TOK_XMLTEXT
= 66, /* XML text */
124 TOK_XMLCOMMENT
= 67, /* XML comment */
125 TOK_XMLCDATA
= 68, /* XML CDATA section */
126 TOK_XMLPI
= 69, /* XML processing instruction */
127 TOK_AT
= 70, /* XML attribute op (@) */
128 TOK_DBLCOLON
= 71, /* namespace qualified name op (::) */
129 TOK_ANYNAME
= 72, /* XML AnyName singleton (*) */
130 TOK_DBLDOT
= 73, /* XML descendant op (..) */
131 TOK_FILTER
= 74, /* XML filtering predicate op (.()) */
132 TOK_XMLELEM
= 75, /* XML element node type (no token) */
133 TOK_XMLLIST
= 76, /* XML list node type (no token) */
134 TOK_YIELD
= 77, /* yield from generator function */
135 TOK_ARRAYCOMP
= 78, /* array comprehension initialiser */
136 TOK_ARRAYPUSH
= 79, /* array push within comprehension */
137 TOK_LEXICALSCOPE
= 80, /* block scope AST node label */
138 TOK_LET
= 81, /* let keyword */
139 TOK_SEQ
= 82, /* synthetic sequence of statements,
141 TOK_FORHEAD
= 83, /* head of for(;;)-style loop */
142 TOK_ARGSBODY
= 84, /* formal args in list + body at end */
143 TOK_UPVARS
= 85, /* lexical dependencies as JSAtomList
144 of definitions paired with a parse
145 tree full of uses of those names */
146 TOK_RESERVED
, /* reserved keywords */
147 TOK_LIMIT
/* domain size */
151 TokenKindIsXML(TokenKind tt
)
153 return tt
== TOK_AT
|| tt
== TOK_DBLCOLON
|| tt
== TOK_ANYNAME
;
157 TreeTypeIsXML(TokenKind tt
)
159 return tt
== TOK_XMLCOMMENT
|| tt
== TOK_XMLCDATA
|| tt
== TOK_XMLPI
||
160 tt
== TOK_XMLELEM
|| tt
== TOK_XMLLIST
;
164 TokenKindIsDecl(TokenKind tt
)
166 #if JS_HAS_BLOCK_SCOPE
167 return tt
== TOK_VAR
|| tt
== TOK_LET
;
169 return tt
== TOK_VAR
;
174 uint32 index
; /* index of char in physical line */
175 uint32 lineno
; /* physical line number */
177 bool operator==(const TokenPtr
& bptr
) {
178 return index
== bptr
.index
&& lineno
== bptr
.lineno
;
181 bool operator!=(const TokenPtr
& bptr
) {
182 return index
!= bptr
.index
|| lineno
!= bptr
.lineno
;
185 bool operator <(const TokenPtr
& bptr
) {
186 return lineno
< bptr
.lineno
||
187 (lineno
== bptr
.lineno
&& index
< bptr
.index
);
190 bool operator <=(const TokenPtr
& bptr
) {
191 return lineno
< bptr
.lineno
||
192 (lineno
== bptr
.lineno
&& index
<= bptr
.index
);
195 bool operator >(const TokenPtr
& bptr
) {
196 return !(*this <= bptr
);
199 bool operator >=(const TokenPtr
& bptr
) {
200 return !(*this < bptr
);
205 TokenPtr begin
; /* first character and line of token */
206 TokenPtr end
; /* index 1 past last char, last line */
208 bool operator==(const TokenPos
& bpos
) {
209 return begin
== bpos
.begin
&& end
== bpos
.end
;
212 bool operator!=(const TokenPos
& bpos
) {
213 return begin
!= bpos
.begin
|| end
!= bpos
.end
;
216 bool operator <(const TokenPos
& bpos
) {
217 return begin
< bpos
.begin
;
220 bool operator <=(const TokenPos
& bpos
) {
221 return begin
<= bpos
.begin
;
224 bool operator >(const TokenPos
& bpos
) {
225 return !(*this <= bpos
);
228 bool operator >=(const TokenPos
& bpos
) {
229 return !(*this < bpos
);
234 TokenKind type
; /* char value or above enumerator */
235 TokenPos pos
; /* token position in file */
236 jschar
*ptr
; /* beginning of token in line buffer */
238 struct { /* name or string literal */
239 JSOp op
; /* operator, for minimal parser */
240 JSAtom
*atom
; /* atom table entry */
242 uintN reflags
; /* regexp flags, use tokenbuf to access
244 struct { /* atom pair, for XML PIs */
245 JSAtom
*atom2
; /* auxiliary atom table entry */
246 JSAtom
*atom
; /* main atom table entry */
248 jsdouble dval
; /* floating point number */
252 enum TokenStreamFlags
254 TSF_ERROR
= 0x01, /* fatal error while compiling */
255 TSF_EOF
= 0x02, /* hit end of file */
256 TSF_NEWLINES
= 0x04, /* tokenize newlines */
257 TSF_OPERAND
= 0x08, /* looking for operand, not operator */
258 TSF_NLFLAG
= 0x20, /* last linebuf ended with \n */
259 TSF_CRFLAG
= 0x40, /* linebuf would have ended with \r */
260 TSF_DIRTYLINE
= 0x80, /* non-whitespace since start of line */
261 TSF_OWNFILENAME
= 0x100, /* ts->filename is malloc'd */
262 TSF_XMLTAGMODE
= 0x200, /* scanning within an XML tag in E4X */
263 TSF_XMLTEXTMODE
= 0x400, /* scanning XMLText terminal from E4X */
264 TSF_XMLONLYMODE
= 0x800, /* don't scan {expr} within text/tag */
266 /* Flag indicating unexpected end of input, i.e. TOK_EOF not at top-level. */
267 TSF_UNEXPECTED_EOF
= 0x1000,
270 * To handle the hard case of contiguous HTML comments, we want to clear the
271 * TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
272 * scan for --> within every //-style comment unless we have to. So we set
273 * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
274 * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
275 * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
277 * This still works as before given a malformed comment hiding hack such as:
280 * <!-- comment hiding hack #1
282 * // --> oops, markup for script-unaware browsers goes here!
285 * It does not cope with malformed comment hiding hacks where --> is hidden
286 * by C-style comments, or on a dirty line. Such cases are already broken.
288 TSF_IN_HTML_COMMENT
= 0x2000,
290 /* Ignore keywords and return TOK_NAME instead to the parser. */
291 TSF_KEYWORD_IS_NAME
= 0x4000,
293 /* Tokenize as appropriate for strict mode code. */
294 TSF_STRICT_MODE_CODE
= 0x8000
298 #define t_reflags u.reflags
299 #define t_atom u.s.atom
300 #define t_atom2 u.p.atom2
301 #define t_dval u.dval
303 const size_t LINE_LIMIT
= 256; /* logical line buffer size limit
304 -- physical line length is unlimited */
308 static const size_t ntokens
= 4; /* 1 current + 2 lookahead, rounded
309 to power of 2 to avoid divmod by 3 */
310 static const uintN ntokensMask
= ntokens
- 1;
314 * To construct a TokenStream, first call the constructor, which is
315 * infallible, then call |init|, which can fail. To destroy a TokenStream,
316 * first call |close| then call the destructor. If |init| fails, do not call
319 * This class uses JSContext.tempPool to allocate internal buffers. The
320 * caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE
321 * after calling |close|.
323 TokenStream(JSContext
*);
326 * Create a new token stream, either from an input buffer or from a file.
327 * Return false on file-open or memory-allocation failure.
329 bool init(const jschar
*base
, size_t length
, FILE *fp
, const char *filename
, uintN lineno
);
334 JSContext
*getContext() const { return cx
; }
335 bool onCurrentLine(const TokenPos
&pos
) const { return lineno
== pos
.end
.lineno
; }
336 const Token
¤tToken() const { return tokens
[cursor
]; }
337 const JSCharBuffer
&getTokenbuf() const { return tokenbuf
; }
338 const char *getFilename() const { return filename
; }
339 uintN
getLineno() const { return lineno
; }
342 void setStrictMode(bool enabled
= true) { setFlag(enabled
, TSF_STRICT_MODE_CODE
); }
343 void setXMLTagMode(bool enabled
= true) { setFlag(enabled
, TSF_XMLTAGMODE
); }
344 void setXMLOnlyMode(bool enabled
= true) { setFlag(enabled
, TSF_XMLONLYMODE
); }
345 void setUnexpectedEOF(bool enabled
= true) { setFlag(enabled
, TSF_UNEXPECTED_EOF
); }
346 bool isStrictMode() { return !!(flags
& TSF_STRICT_MODE_CODE
); }
347 bool isXMLTagMode() { return !!(flags
& TSF_XMLTAGMODE
); }
348 bool isXMLOnlyMode() { return !!(flags
& TSF_XMLONLYMODE
); }
349 bool isUnexpectedEOF() { return !!(flags
& TSF_UNEXPECTED_EOF
); }
350 bool isEOF() const { return !!(flags
& TSF_EOF
); }
351 bool isError() const { return !!(flags
& TSF_ERROR
); }
354 bool reportCompileErrorNumberVA(JSParseNode
*pn
, uintN flags
, uintN errorNumber
, va_list ap
);
355 void mungeCurrentToken(TokenKind newKind
) { tokens
[cursor
].type
= newKind
; }
356 void mungeCurrentToken(JSOp newOp
) { tokens
[cursor
].t_op
= newOp
; }
357 void mungeCurrentToken(TokenKind newKind
, JSOp newOp
) {
358 mungeCurrentToken(newKind
);
359 mungeCurrentToken(newOp
);
364 * Enables flags in the associated tokenstream for the object lifetime.
365 * Useful for lexically-scoped flag toggles.
368 TokenStream
* const parent
;
371 Flagger(TokenStream
*parent
, uintN withFlags
) : parent(parent
), flags(withFlags
) {
372 parent
->flags
|= flags
;
375 ~Flagger() { parent
->flags
&= ~flags
; }
377 friend class Flagger
;
379 void setFlag(bool enabled
, TokenStreamFlags flag
) {
388 * Get the next token from the stream, make it the current token, and
391 TokenKind
getToken(uintN withFlags
= 0) {
392 Flagger
flagger(this, withFlags
);
393 /* Check for a pushed-back token resulting from mismatching lookahead. */
394 while (lookahead
!= 0) {
395 JS_ASSERT(!(flags
& TSF_XMLTEXTMODE
));
397 cursor
= (cursor
+ 1) & ntokensMask
;
398 TokenKind tt
= currentToken().type
;
399 JS_ASSERT(!(flags
& TSF_NEWLINES
));
404 /* If there was a fatal error, keep returning TOK_ERROR. */
405 if (flags
& TSF_ERROR
)
408 return getTokenInternal();
412 * Push the last scanned token back into the stream.
415 JS_ASSERT(lookahead
< ntokensMask
);
417 cursor
= (cursor
- 1) & ntokensMask
;
420 TokenKind
peekToken(uintN withFlags
= 0) {
421 Flagger
flagger(this, withFlags
);
422 if (lookahead
!= 0) {
423 JS_ASSERT(lookahead
== 1);
424 return tokens
[(cursor
+ lookahead
) & ntokensMask
].type
;
426 TokenKind tt
= getToken();
431 TokenKind
peekTokenSameLine(uintN withFlags
= 0) {
432 Flagger
flagger(this, withFlags
);
433 if (!onCurrentLine(currentToken().pos
))
435 TokenKind tt
= peekToken(TSF_NEWLINES
);
440 * Get the next token from the stream if its kind is |tt|.
442 JSBool
matchToken(TokenKind tt
, uintN withFlags
= 0) {
443 Flagger
flagger(this, withFlags
);
444 if (getToken() == tt
)
451 typedef struct TokenBuf
{
452 jschar
*base
; /* base of line or stream buffer */
453 jschar
*limit
; /* limit for quick bounds check */
454 jschar
*ptr
; /* next char to get, or slot to use */
457 TokenKind
getTokenInternal(); /* doesn't check for pushback or error flag. */
459 void ungetChar(int32 c
);
460 Token
*newToken(ptrdiff_t adjust
);
461 int32
getUnicodeEscape();
462 JSBool
peekChars(intN n
, jschar
*cp
);
463 JSBool
getXMLEntity();
465 JSBool
matchChar(int32 expect
) {
479 void skipChars(intN n
) {
484 JSContext
* const cx
;
485 Token tokens
[ntokens
];/* circular token buffer */
486 uintN cursor
; /* index of last parsed token */
487 uintN lookahead
; /* count of lookahead tokens */
488 uintN lineno
; /* current line number */
489 uintN ungetpos
; /* next free char slot in ungetbuf */
490 jschar ungetbuf
[6]; /* at most 6, for \uXXXX lookahead */
491 uintN flags
; /* flags -- see above */
492 uint32 linelen
; /* physical linebuf segment length */
493 uint32 linepos
; /* linebuf offset in physical line */
494 TokenBuf linebuf
; /* line buffer for diagnostics */
495 TokenBuf userbuf
; /* user input buffer if !file */
496 const char *filename
; /* input filename or null */
497 FILE *file
; /* stdio stream if reading from file */
498 JSSourceHandler listener
; /* callback for source; eg debugger */
499 void *listenerData
; /* listener 'this' data */
500 void *listenerTSData
;/* listener data for this TokenStream */
501 jschar
*saveEOL
; /* save next end of line in userbuf, to
502 optimize for very long lines */
503 JSCharBuffer tokenbuf
; /* current token string buffer */
508 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
509 #define LINE_SEPARATOR 0x2028
510 #define PARA_SEPARATOR 0x2029
513 js_CloseTokenStream(JSContext
*cx
, js::TokenStream
*ts
);
515 extern JS_FRIEND_API(int)
516 js_fgets(char *buf
, int size
, FILE *file
);
519 * If the given char array forms JavaScript keyword, return corresponding
520 * token. Otherwise return TOK_EOF.
523 js_CheckKeyword(const jschar
*chars
, size_t length
);
526 * Friend-exported API entry point to call a mapping function on each reserved
527 * identifier in the scanner's keyword table.
529 typedef void (*JSMapKeywordFun
)(const char *);
532 * Check that str forms a valid JS identifier name. The function does not
533 * check if str is a JS keyword.
536 js_IsIdentifier(JSString
*str
);
539 * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
540 * message have const jschar* type, not const char*.
542 #define JSREPORT_UC 0x100
547 * Report a compile-time error by its number. Return true for a warning, false
548 * for an error. When pn is not null, use it to report error's location.
549 * Otherwise use ts, which must not be null.
552 ReportCompileErrorNumber(JSContext
*cx
, TokenStream
*ts
, JSParseNode
*pn
, uintN flags
,
553 uintN errorNumber
, ...);
556 * Report a condition that should elicit a warning with JSOPTION_STRICT,
557 * or an error if ts or tc is handling strict mode code. This function
558 * defers to ReportCompileErrorNumber to do the real work. Either tc
559 * or ts may be NULL, if there is no tree context or token stream state
560 * whose strictness should affect the report.
562 * One could have ReportCompileErrorNumber recognize the
563 * JSREPORT_STRICT_MODE_ERROR flag instead of having a separate function
564 * like this one. However, the strict mode code flag we need to test is
565 * in the JSTreeContext structure for that code; we would have to change
566 * the ~120 ReportCompileErrorNumber calls to pass the additional
567 * argument, even though many of those sites would never use it. Using
568 * ts's TSF_STRICT_MODE_CODE flag instead of tc's would be brittle: at some
569 * points ts's flags don't correspond to those of the tc relevant to the
573 ReportStrictModeError(JSContext
*cx
, TokenStream
*ts
, JSTreeContext
*tc
, JSParseNode
*pn
,
574 uintN errorNumber
, ...);
578 #endif /* jsscan_h___ */