Bug 559408: Arena pool macros to methods. (r=gal)
[mozilla-central.git] / js / src / jsscan.cpp
blob5e87bed03419af27c91e4154d13a59a773ff304d
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS lexical scanner.
44 #include <stdio.h> /* first to avoid trouble on some systems */
45 #include <errno.h>
46 #include <limits.h>
47 #include <math.h>
48 #ifdef HAVE_MEMORY_H
49 #include <memory.h>
50 #endif
51 #include <stdarg.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsstdint.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsbit.h"
58 #include "jsutil.h" /* Added by JSIFY */
59 #include "jsdtoa.h"
60 #include "jsprf.h"
61 #include "jsapi.h"
62 #include "jsatom.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsemit.h"
66 #include "jsexn.h"
67 #include "jsnum.h"
68 #include "jsopcode.h"
69 #include "jsparse.h"
70 #include "jsregexp.h"
71 #include "jsscan.h"
72 #include "jsscript.h"
73 #include "jsstaticcheck.h"
74 #include "jsvector.h"
76 #if JS_HAS_XML_SUPPORT
77 #include "jsxml.h"
78 #endif
80 using namespace js;
82 #define JS_KEYWORD(keyword, type, op, version) \
83 const char js_##keyword##_str[] = #keyword;
84 #include "jskeyword.tbl"
85 #undef JS_KEYWORD
87 struct keyword {
88 const char *chars; /* C string with keyword text */
89 TokenKind tokentype;
90 JSOp op; /* JSOp */
91 JSVersion version; /* JSVersion */
94 static const struct keyword keyword_defs[] = {
95 #define JS_KEYWORD(keyword, type, op, version) \
96 {js_##keyword##_str, type, op, version},
97 #include "jskeyword.tbl"
98 #undef JS_KEYWORD
101 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
103 static const struct keyword *
104 FindKeyword(const jschar *s, size_t length)
106 register size_t i;
107 const struct keyword *kw;
108 const char *chars;
110 JS_ASSERT(length != 0);
112 #define JSKW_LENGTH() length
113 #define JSKW_AT(column) s[column]
114 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
115 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
116 #define JSKW_NO_MATCH() goto no_match;
117 #include "jsautokw.h"
118 #undef JSKW_NO_MATCH
119 #undef JSKW_TEST_GUESS
120 #undef JSKW_GOT_MATCH
121 #undef JSKW_AT
122 #undef JSKW_LENGTH
124 got_match:
125 return &keyword_defs[i];
127 test_guess:
128 kw = &keyword_defs[i];
129 chars = kw->chars;
130 do {
131 if (*s++ != (unsigned char)(*chars++))
132 goto no_match;
133 } while (--length != 0);
134 return kw;
136 no_match:
137 return NULL;
140 TokenKind
141 js_CheckKeyword(const jschar *str, size_t length)
143 const struct keyword *kw;
145 JS_ASSERT(length != 0);
146 kw = FindKeyword(str, length);
147 return kw ? kw->tokentype : TOK_EOF;
150 JSBool
151 js_IsIdentifier(JSString *str)
153 size_t length;
154 jschar c;
155 const jschar *chars, *end;
157 str->getCharsAndLength(chars, length);
158 if (length == 0)
159 return JS_FALSE;
160 c = *chars;
161 if (!JS_ISIDSTART(c))
162 return JS_FALSE;
163 end = chars + length;
164 while (++chars != end) {
165 c = *chars;
166 if (!JS_ISIDENT(c))
167 return JS_FALSE;
169 return JS_TRUE;
172 #ifdef _MSC_VER
173 #pragma warning(push)
174 #pragma warning(disable:4351)
175 #endif
177 /* Initialize members that aren't initialized in |init|. */
178 TokenStream::TokenStream(JSContext *cx)
179 : cx(cx), tokens(), cursor(), lookahead(), ungetpos(), ungetbuf(), flags(),
180 linelen(), linepos(), file(), listenerTSData(), saveEOL(), tokenbuf(cx)
183 #ifdef _MSC_VER
184 #pragma warning(pop)
185 #endif
187 bool
188 TokenStream::init(const jschar *base, size_t length, FILE *fp, const char *fn, uintN ln)
190 jschar *buf;
192 JS_ASSERT_IF(fp, !base);
193 JS_ASSERT_IF(!base, length == 0);
194 size_t nb = fp
195 ? 2 * LINE_LIMIT * sizeof(jschar)
196 : LINE_LIMIT * sizeof(jschar);
197 cx->tempPool.allocateCast<jschar *>(buf, nb);
198 if (!buf) {
199 js_ReportOutOfScriptQuota(cx);
200 return false;
202 memset(buf, 0, nb);
204 /* Initialize members. */
205 filename = fn;
206 lineno = ln;
207 linebuf.base = linebuf.limit = linebuf.ptr = buf;
208 if (fp) {
209 file = fp;
210 userbuf.base = buf + LINE_LIMIT;
211 userbuf.ptr = userbuf.limit = userbuf.base + LINE_LIMIT;
212 } else {
213 userbuf.base = (jschar *)base;
214 userbuf.limit = (jschar *)base + length;
215 userbuf.ptr = (jschar *)base;
217 listener = cx->debugHooks->sourceHandler;
218 listenerData = cx->debugHooks->sourceHandlerData;
219 return true;
222 void
223 TokenStream::close()
225 if (flags & TSF_OWNFILENAME)
226 cx->free((void *) filename);
229 /* Use the fastest available getc. */
230 #if defined(HAVE_GETC_UNLOCKED)
231 # define fast_getc getc_unlocked
232 #elif defined(HAVE__GETC_NOLOCK)
233 # define fast_getc _getc_nolock
234 #else
235 # define fast_getc getc
236 #endif
238 JS_FRIEND_API(int)
239 js_fgets(char *buf, int size, FILE *file)
241 int n, i, c;
242 JSBool crflag;
244 n = size - 1;
245 if (n < 0)
246 return -1;
248 crflag = JS_FALSE;
249 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
250 buf[i] = c;
251 if (c == '\n') { /* any \n ends a line */
252 i++; /* keep the \n; we know there is room for \0 */
253 break;
255 if (crflag) { /* \r not followed by \n ends line at the \r */
256 ungetc(c, file);
257 break; /* and overwrite c in buf with \0 */
259 crflag = (c == '\r');
262 buf[i] = '\0';
263 return i;
266 int32
267 TokenStream::getChar()
269 int32 c;
270 ptrdiff_t i, j, len, olen;
271 JSBool crflag;
272 char cbuf[LINE_LIMIT];
273 jschar *ubuf, *nl;
275 if (ungetpos != 0) {
276 c = ungetbuf[--ungetpos];
277 } else {
278 if (linebuf.ptr == linebuf.limit) {
279 len = userbuf.limit - userbuf.ptr;
280 if (len <= 0) {
281 if (!file) {
282 flags |= TSF_EOF;
283 return EOF;
286 /* Fill userbuf so that \r and \r\n convert to \n. */
287 crflag = (flags & TSF_CRFLAG) != 0;
288 len = js_fgets(cbuf, LINE_LIMIT - crflag, file);
289 if (len <= 0) {
290 flags |= TSF_EOF;
291 return EOF;
293 olen = len;
294 ubuf = userbuf.base;
295 i = 0;
296 if (crflag) {
297 flags &= ~TSF_CRFLAG;
298 if (cbuf[0] != '\n') {
299 ubuf[i++] = '\n';
300 len++;
301 linepos--;
304 for (j = 0; i < len; i++, j++)
305 ubuf[i] = (jschar) (unsigned char) cbuf[j];
306 userbuf.limit = ubuf + len;
307 userbuf.ptr = ubuf;
309 if (listener)
310 listener(filename, lineno, userbuf.ptr, len, &listenerTSData, listenerData);
312 nl = saveEOL;
313 if (!nl) {
315 * Any one of \n, \r, or \r\n ends a line (the longest
316 * match wins). Also allow the Unicode line and paragraph
317 * separators.
319 for (nl = userbuf.ptr; nl < userbuf.limit; nl++) {
321 * Try to prevent value-testing on most characters by
322 * filtering out characters that aren't 000x or 202x.
324 if ((*nl & 0xDFD0) == 0) {
325 if (*nl == '\n')
326 break;
327 if (*nl == '\r') {
328 if (nl + 1 < userbuf.limit && nl[1] == '\n')
329 nl++;
330 break;
332 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
333 break;
339 * If there was a line terminator, copy thru it into linebuf.
340 * Else copy LINE_LIMIT-1 bytes into linebuf.
342 if (nl < userbuf.limit)
343 len = (nl - userbuf.ptr) + 1;
344 if (len >= (ptrdiff_t) LINE_LIMIT) {
345 len = LINE_LIMIT - 1;
346 saveEOL = nl;
347 } else {
348 saveEOL = NULL;
350 js_strncpy(linebuf.base, userbuf.ptr, len);
351 userbuf.ptr += len;
352 olen = len;
355 * Make sure linebuf contains \n for EOL (don't do this in
356 * userbuf because the user's string might be readonly).
358 if (nl < userbuf.limit) {
359 if (*nl == '\r') {
360 if (linebuf.base[len-1] == '\r') {
362 * Does the line segment end in \r? We must check
363 * for a \n at the front of the next segment before
364 * storing a \n into linebuf. This case matters
365 * only when we're reading from a file.
367 if (nl + 1 == userbuf.limit && file) {
368 len--;
369 flags |= TSF_CRFLAG; /* clear NLFLAG? */
370 if (len == 0) {
372 * This can happen when a segment ends in
373 * \r\r. Start over. ptr == limit in this
374 * case, so we'll fall into buffer-filling
375 * code.
377 return getChar();
379 } else {
380 linebuf.base[len-1] = '\n';
383 } else if (*nl == '\n') {
384 if (nl > userbuf.base &&
385 nl[-1] == '\r' &&
386 linebuf.base[len-2] == '\r') {
387 len--;
388 JS_ASSERT(linebuf.base[len] == '\n');
389 linebuf.base[len-1] = '\n';
391 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
392 linebuf.base[len-1] = '\n';
396 /* Reset linebuf based on adjusted segment length. */
397 linebuf.limit = linebuf.base + len;
398 linebuf.ptr = linebuf.base;
400 /* Update position of linebuf within physical userbuf line. */
401 if (!(flags & TSF_NLFLAG))
402 linepos += linelen;
403 else
404 linepos = 0;
405 if (linebuf.limit[-1] == '\n')
406 flags |= TSF_NLFLAG;
407 else
408 flags &= ~TSF_NLFLAG;
410 /* Update linelen from original segment length. */
411 linelen = olen;
413 c = *linebuf.ptr++;
415 if (c == '\n')
416 lineno++;
417 return c;
420 void
421 TokenStream::ungetChar(int32 c)
423 if (c == EOF)
424 return;
425 JS_ASSERT(ungetpos < JS_ARRAY_LENGTH(ungetbuf));
426 if (c == '\n')
427 lineno--;
428 ungetbuf[ungetpos++] = (jschar)c;
432 * Peek n chars ahead into ts. Return true if n chars were read, false if
433 * there weren't enough characters in the input stream. This function cannot
434 * be used to peek into or past a newline.
436 JSBool
437 TokenStream::peekChars(intN n, jschar *cp)
439 intN i, j;
440 int32 c;
442 for (i = 0; i < n; i++) {
443 c = getChar();
444 if (c == EOF)
445 break;
446 if (c == '\n') {
447 ungetChar(c);
448 break;
450 cp[i] = (jschar)c;
452 for (j = i - 1; j >= 0; j--)
453 ungetChar(cp[j]);
454 return i == n;
457 bool
458 TokenStream::reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber,
459 va_list ap)
461 JSErrorReport report;
462 char *message;
463 size_t linelength;
464 jschar *linechars;
465 char *linebytes;
466 bool warning;
467 JSBool ok;
468 TokenPos *tp;
469 uintN index, i;
470 JSErrorReporter onError;
472 JS_ASSERT(linebuf.limit < linebuf.base + LINE_LIMIT);
474 if (JSREPORT_IS_STRICT(flags) && !JS_HAS_STRICT_OPTION(cx))
475 return JS_TRUE;
477 warning = JSREPORT_IS_WARNING(flags);
478 if (warning && JS_HAS_WERROR_OPTION(cx)) {
479 flags &= ~JSREPORT_WARNING;
480 warning = false;
483 PodZero(&report);
484 report.flags = flags;
485 report.errorNumber = errorNumber;
486 message = NULL;
487 linechars = NULL;
488 linebytes = NULL;
490 MUST_FLOW_THROUGH("out");
491 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
492 errorNumber, &message, &report,
493 !(flags & JSREPORT_UC), ap);
494 if (!ok) {
495 warning = false;
496 goto out;
499 report.filename = filename;
501 if (pn) {
502 report.lineno = pn->pn_pos.begin.lineno;
503 if (report.lineno != lineno)
504 goto report;
505 tp = &pn->pn_pos;
506 } else {
507 /* Point to the current token, not the next one to get. */
508 tp = &tokens[cursor].pos;
510 report.lineno = lineno;
511 linelength = linebuf.limit - linebuf.base;
512 linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
513 if (!linechars) {
514 warning = false;
515 goto out;
517 memcpy(linechars, linebuf.base, linelength * sizeof(jschar));
518 linechars[linelength] = 0;
519 linebytes = js_DeflateString(cx, linechars, linelength);
520 if (!linebytes) {
521 warning = false;
522 goto out;
524 report.linebuf = linebytes;
527 * FIXME: What should instead happen here is that we should
528 * find error-tokens in userbuf, if !file. That will
529 * allow us to deliver a more helpful error message, which
530 * includes all or part of the bad string or bad token. The
531 * code here yields something that looks truncated.
532 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
534 index = 0;
535 if (tp->begin.lineno == tp->end.lineno) {
536 if (tp->begin.index < linepos)
537 goto report;
539 index = tp->begin.index - linepos;
542 report.tokenptr = report.linebuf + index;
543 report.uclinebuf = linechars;
544 report.uctokenptr = report.uclinebuf + index;
547 * If there's a runtime exception type associated with this error
548 * number, set that as the pending exception. For errors occuring at
549 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
551 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
552 * flag will be set in report.flags. Proper behavior for an error
553 * reporter is to ignore a report with this flag for all but top-level
554 * compilation errors. The exception will remain pending, and so long
555 * as the non-top-level "load", "eval", or "compile" native function
556 * returns false, the top-level reporter will eventually receive the
557 * uncaught exception report.
559 * XXX it'd probably be best if there was only one call to this
560 * function, but there seem to be two error reporter call points.
562 report:
563 onError = cx->errorReporter;
566 * Try to raise an exception only if there isn't one already set --
567 * otherwise the exception will describe the last compile-time error,
568 * which is likely spurious.
570 if (!(flags & TSF_ERROR)) {
571 if (js_ErrorToException(cx, message, &report, NULL, NULL))
572 onError = NULL;
576 * Suppress any compile-time errors that don't occur at the top level.
577 * This may still fail, as interplevel may be zero in contexts where we
578 * don't really want to call the error reporter, as when js is called
579 * by other code which could catch the error.
581 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
582 onError = NULL;
584 if (onError) {
585 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
588 * If debugErrorHook is present then we give it a chance to veto
589 * sending the error on to the regular error reporter.
591 if (hook && !hook(cx, message, &report,
592 cx->debugHooks->debugErrorHookData)) {
593 onError = NULL;
596 if (onError)
597 (*onError)(cx, message, &report);
599 out:
600 if (linebytes)
601 cx->free(linebytes);
602 if (linechars)
603 cx->free(linechars);
604 if (message)
605 cx->free(message);
606 if (report.ucmessage)
607 cx->free((void *)report.ucmessage);
609 if (report.messageArgs) {
610 if (!(flags & JSREPORT_UC)) {
611 i = 0;
612 while (report.messageArgs[i])
613 cx->free((void *)report.messageArgs[i++]);
615 cx->free((void *)report.messageArgs);
618 if (!JSREPORT_IS_WARNING(flags)) {
619 /* Set the error flag to suppress spurious reports. */
620 flags |= TSF_ERROR;
623 return warning;
626 bool
627 js::ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
628 uintN errorNumber, ...)
630 JS_ASSERT(ts || tc);
631 JS_ASSERT(cx == ts->getContext());
633 /* In strict mode code, this is an error, not just a warning. */
634 uintN flags;
635 if ((tc && tc->flags & TCF_STRICT_MODE_CODE) || (ts && ts->isStrictMode()))
636 flags = JSREPORT_ERROR;
637 else if (JS_HAS_STRICT_OPTION(cx))
638 flags = JSREPORT_WARNING;
639 else
640 return true;
642 va_list ap;
643 va_start(ap, errorNumber);
644 bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
645 va_end(ap);
647 return result;
650 bool
651 js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn,
652 uintN flags, uintN errorNumber, ...)
654 va_list ap;
657 * We don't accept a JSTreeContext argument, so we can't implement
658 * JSREPORT_STRICT_MODE_ERROR here. Use ReportStrictModeError instead,
659 * or do the checks in the caller and pass plain old JSREPORT_ERROR.
661 JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
663 va_start(ap, errorNumber);
664 JS_ASSERT(cx == ts->getContext());
665 bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
666 va_end(ap);
668 return result;
671 #if JS_HAS_XML_SUPPORT
673 JSBool
674 TokenStream::getXMLEntity()
676 ptrdiff_t offset, length, i;
677 int c, d;
678 JSBool ispair;
679 jschar *bp, digit;
680 char *bytes;
681 JSErrNum msg;
683 JSCharBuffer &tb = tokenbuf;
685 /* Put the entity, including the '&' already scanned, in tokenbuf. */
686 offset = tb.length();
687 if (!tb.append('&'))
688 return JS_FALSE;
689 while ((c = getChar()) != ';') {
690 if (c == EOF || c == '\n') {
691 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
692 return JS_FALSE;
694 if (!tb.append(c))
695 return JS_FALSE;
698 /* Let length be the number of jschars after the '&', including the ';'. */
699 length = tb.length() - offset;
700 bp = tb.begin() + offset;
701 c = d = 0;
702 ispair = JS_FALSE;
703 if (length > 2 && bp[1] == '#') {
704 /* Match a well-formed XML Character Reference. */
705 i = 2;
706 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
707 if (length > 9) /* at most 6 hex digits allowed */
708 goto badncr;
709 while (++i < length) {
710 digit = bp[i];
711 if (!JS7_ISHEX(digit))
712 goto badncr;
713 c = (c << 4) + JS7_UNHEX(digit);
715 } else {
716 while (i < length) {
717 digit = bp[i++];
718 if (!JS7_ISDEC(digit))
719 goto badncr;
720 c = (c * 10) + JS7_UNDEC(digit);
721 if (c < 0)
722 goto badncr;
726 if (0x10000 <= c && c <= 0x10FFFF) {
727 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
728 d = 0xDC00 + (c & 0x3FF);
729 c = 0xD7C0 + (c >> 10);
730 ispair = JS_TRUE;
731 } else {
732 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
733 if (c != 0x9 && c != 0xA && c != 0xD &&
734 !(0x20 <= c && c <= 0xD7FF) &&
735 !(0xE000 <= c && c <= 0xFFFD)) {
736 goto badncr;
739 } else {
740 /* Try to match one of the five XML 1.0 predefined entities. */
741 switch (length) {
742 case 3:
743 if (bp[2] == 't') {
744 if (bp[1] == 'l')
745 c = '<';
746 else if (bp[1] == 'g')
747 c = '>';
749 break;
750 case 4:
751 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
752 c = '&';
753 break;
754 case 5:
755 if (bp[3] == 'o') {
756 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
757 c = '\'';
758 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
759 c = '"';
761 break;
763 if (c == 0) {
764 msg = JSMSG_UNKNOWN_XML_ENTITY;
765 goto bad;
769 /* If we matched, retract tokenbuf and store the entity's value. */
770 *bp++ = (jschar) c;
771 if (ispair)
772 *bp++ = (jschar) d;
773 tb.shrinkBy(tb.end() - bp);
774 return JS_TRUE;
776 badncr:
777 msg = JSMSG_BAD_XML_NCR;
778 bad:
779 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
780 JS_ASSERT((tb.end() - bp) >= 1);
781 bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
782 if (bytes) {
783 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
784 cx->free(bytes);
786 return JS_FALSE;
789 #endif /* JS_HAS_XML_SUPPORT */
792 * We have encountered a '\': check for a Unicode escape sequence after it,
793 * returning the character code value if we found a Unicode escape sequence.
794 * Otherwise, non-destructively return the original '\'.
796 int32
797 TokenStream::getUnicodeEscape()
799 jschar cp[5];
800 int32 c;
802 if (peekChars(5, cp) && cp[0] == 'u' &&
803 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
804 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
806 c = (((((JS7_UNHEX(cp[1]) << 4)
807 + JS7_UNHEX(cp[2])) << 4)
808 + JS7_UNHEX(cp[3])) << 4)
809 + JS7_UNHEX(cp[4]);
810 skipChars(5);
811 return c;
813 return '\\';
816 Token *
817 TokenStream::newToken(ptrdiff_t adjust)
819 cursor = (cursor + 1) & ntokensMask;
820 Token *tp = &tokens[cursor];
821 tp->ptr = linebuf.ptr + adjust;
822 tp->pos.begin.index = linepos + (tp->ptr - linebuf.base) - ungetpos;
823 tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
824 return tp;
827 static JS_ALWAYS_INLINE JSBool
828 ScanAsSpace(jschar c)
830 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
831 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
832 return JS_TRUE;
833 return JS_FALSE;
836 static JS_ALWAYS_INLINE JSAtom *
837 atomize(JSContext *cx, JSCharBuffer &cb)
839 return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
842 TokenKind
843 TokenStream::getTokenInternal()
845 TokenKind tt;
846 int c, qc;
847 Token *tp;
848 JSAtom *atom;
849 JSBool hadUnicodeEscape;
850 const struct keyword *kw;
851 #if JS_HAS_XML_SUPPORT
852 JSBool inTarget;
853 size_t targetLength;
854 ptrdiff_t contentIndex;
855 #endif
857 #if JS_HAS_XML_SUPPORT
858 if (flags & TSF_XMLTEXTMODE) {
859 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
860 tp = newToken(0);
861 tokenbuf.clear();
862 qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
864 while ((c = getChar()) != qc && c != '<' && c != EOF) {
865 if (c == '&' && qc == '<') {
866 if (!getXMLEntity())
867 goto error;
868 tt = TOK_XMLTEXT;
869 continue;
872 if (!JS_ISXMLSPACE(c))
873 tt = TOK_XMLTEXT;
874 if (!tokenbuf.append(c))
875 goto error;
877 ungetChar(c);
879 if (tokenbuf.empty()) {
880 atom = NULL;
881 } else {
882 atom = atomize(cx, tokenbuf);
883 if (!atom)
884 goto error;
886 tp->pos.end.lineno = lineno;
887 tp->t_op = JSOP_STRING;
888 tp->t_atom = atom;
889 goto out;
892 if (flags & TSF_XMLTAGMODE) {
893 tp = newToken(0);
894 c = getChar();
895 if (JS_ISXMLSPACE(c)) {
896 do {
897 c = getChar();
898 } while (JS_ISXMLSPACE(c));
899 ungetChar(c);
900 tt = TOK_XMLSPACE;
901 goto out;
904 if (c == EOF) {
905 tt = TOK_EOF;
906 goto out;
909 tokenbuf.clear();
910 if (JS_ISXMLNSSTART(c)) {
911 JSBool sawColon = JS_FALSE;
913 if (!tokenbuf.append(c))
914 goto error;
915 while ((c = getChar()) != EOF && JS_ISXMLNAME(c)) {
916 if (c == ':') {
917 int nextc;
919 if (sawColon ||
920 (nextc = peekChar(),
921 ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
922 !JS_ISXMLNAME(nextc))) {
923 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
924 JSMSG_BAD_XML_QNAME);
925 goto error;
927 sawColon = JS_TRUE;
930 if (!tokenbuf.append(c))
931 goto error;
934 ungetChar(c);
935 atom = atomize(cx, tokenbuf);
936 if (!atom)
937 goto error;
938 tp->t_op = JSOP_STRING;
939 tp->t_atom = atom;
940 tt = TOK_XMLNAME;
941 goto out;
944 switch (c) {
945 case '{':
946 if (flags & TSF_XMLONLYMODE)
947 goto bad_xml_char;
948 tt = TOK_LC;
949 goto out;
951 case '=':
952 tt = TOK_ASSIGN;
953 goto out;
955 case '"':
956 case '\'':
957 qc = c;
958 while ((c = getChar()) != qc) {
959 if (c == EOF) {
960 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
961 JSMSG_UNTERMINATED_STRING);
962 goto error;
966 * XML attribute values are double-quoted when pretty-printed,
967 * so escape " if it is expressed directly in a single-quoted
968 * attribute value.
970 if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
971 JS_ASSERT(qc == '\'');
972 if (!tokenbuf.append(js_quot_entity_str,
973 strlen(js_quot_entity_str)))
974 goto error;
975 continue;
978 if (c == '&' && (flags & TSF_XMLONLYMODE)) {
979 if (!getXMLEntity())
980 goto error;
981 continue;
984 if (!tokenbuf.append(c))
985 goto error;
987 atom = atomize(cx, tokenbuf);
988 if (!atom)
989 goto error;
990 tp->pos.end.lineno = lineno;
991 tp->t_op = JSOP_STRING;
992 tp->t_atom = atom;
993 tt = TOK_XMLATTR;
994 goto out;
996 case '>':
997 tt = TOK_XMLTAGC;
998 goto out;
1000 case '/':
1001 if (matchChar('>')) {
1002 tt = TOK_XMLPTAGC;
1003 goto out;
1005 /* FALL THROUGH */
1007 bad_xml_char:
1008 default:
1009 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
1010 goto error;
1012 /* NOTREACHED */
1014 #endif /* JS_HAS_XML_SUPPORT */
1016 retry:
1017 do {
1018 c = getChar();
1019 if (c == '\n') {
1020 flags &= ~TSF_DIRTYLINE;
1021 if (flags & TSF_NEWLINES)
1022 break;
1024 } while (ScanAsSpace((jschar)c));
1026 tp = newToken(-1);
1027 if (c == EOF) {
1028 tt = TOK_EOF;
1029 goto out;
1032 hadUnicodeEscape = JS_FALSE;
1033 if (JS_ISIDSTART(c) ||
1034 (c == '\\' &&
1035 (qc = getUnicodeEscape(),
1036 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1037 if (hadUnicodeEscape)
1038 c = qc;
1039 tokenbuf.clear();
1040 for (;;) {
1041 if (!tokenbuf.append(c))
1042 goto error;
1043 c = getChar();
1044 if (c == '\\') {
1045 qc = getUnicodeEscape();
1046 if (!JS_ISIDENT(qc))
1047 break;
1048 c = qc;
1049 hadUnicodeEscape = JS_TRUE;
1050 } else {
1051 if (!JS_ISIDENT(c))
1052 break;
1055 ungetChar(c);
1058 * Check for keywords unless we saw Unicode escape or parser asks
1059 * to ignore keywords.
1061 if (!hadUnicodeEscape &&
1062 !(flags & TSF_KEYWORD_IS_NAME) &&
1063 (kw = FindKeyword(tokenbuf.begin(), tokenbuf.length()))) {
1064 if (kw->tokentype == TOK_RESERVED) {
1065 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1066 JSMSG_RESERVED_ID, kw->chars)) {
1067 goto error;
1069 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1070 tt = kw->tokentype;
1071 tp->t_op = (JSOp) kw->op;
1072 goto out;
1076 atom = atomize(cx, tokenbuf);
1077 if (!atom)
1078 goto error;
1079 tp->t_op = JSOP_NAME;
1080 tp->t_atom = atom;
1081 tt = TOK_NAME;
1082 goto out;
1085 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(peekChar()))) {
1086 jsint radix;
1087 const jschar *endptr;
1088 jsdouble dval;
1090 radix = 10;
1091 tokenbuf.clear();
1093 if (c == '0') {
1094 if (!tokenbuf.append(c))
1095 goto error;
1096 c = getChar();
1097 if (JS_TOLOWER(c) == 'x') {
1098 if (!tokenbuf.append(c))
1099 goto error;
1100 c = getChar();
1101 radix = 16;
1102 } else if (JS7_ISDEC(c)) {
1103 radix = 8;
1107 while (JS7_ISHEX(c)) {
1108 if (radix < 16) {
1109 if (JS7_ISLET(c))
1110 break;
1112 if (radix == 8) {
1113 /* Octal integer literals are not permitted in strict mode code. */
1114 if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1115 goto error;
1118 * Outside strict mode, we permit 08 and 09 as decimal numbers, which
1119 * makes our behaviour a superset of the ECMA numeric grammar. We
1120 * might not always be so permissive, so we warn about it.
1122 if (c >= '8') {
1123 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1124 JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1125 goto error;
1127 radix = 10;
1131 if (!tokenbuf.append(c))
1132 goto error;
1133 c = getChar();
1136 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1137 if (c == '.') {
1138 do {
1139 if (!tokenbuf.append(c))
1140 goto error;
1141 c = getChar();
1142 } while (JS7_ISDEC(c));
1144 if (JS_TOLOWER(c) == 'e') {
1145 if (!tokenbuf.append(c))
1146 goto error;
1147 c = getChar();
1148 if (c == '+' || c == '-') {
1149 if (!tokenbuf.append(c))
1150 goto error;
1151 c = getChar();
1153 if (!JS7_ISDEC(c)) {
1154 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1155 JSMSG_MISSING_EXPONENT);
1156 goto error;
1158 do {
1159 if (!tokenbuf.append(c))
1160 goto error;
1161 c = getChar();
1162 } while (JS7_ISDEC(c));
1166 if (JS_ISIDSTART(c)) {
1167 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1168 goto error;
1171 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1172 ungetChar(c);
1173 if (!tokenbuf.append(0))
1174 goto error;
1176 if (radix == 10) {
1177 if (!js_strtod(cx, tokenbuf.begin(), tokenbuf.end(), &endptr, &dval)) {
1178 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1179 goto error;
1181 } else {
1182 if (!js_strtointeger(cx, tokenbuf.begin(), tokenbuf.end(),
1183 &endptr, radix, &dval)) {
1184 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1185 goto error;
1188 tp->t_dval = dval;
1189 tt = TOK_NUMBER;
1190 goto out;
1193 if (c == '"' || c == '\'') {
1194 qc = c;
1195 tokenbuf.clear();
1196 while ((c = getChar()) != qc) {
1197 if (c == '\n' || c == EOF) {
1198 ungetChar(c);
1199 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1200 JSMSG_UNTERMINATED_STRING);
1201 goto error;
1203 if (c == '\\') {
1204 switch (c = getChar()) {
1205 case 'b': c = '\b'; break;
1206 case 'f': c = '\f'; break;
1207 case 'n': c = '\n'; break;
1208 case 'r': c = '\r'; break;
1209 case 't': c = '\t'; break;
1210 case 'v': c = '\v'; break;
1212 default:
1213 if ('0' <= c && c < '8') {
1214 int32 val = JS7_UNDEC(c);
1216 c = peekChar();
1217 /* Strict mode code allows only \0, then a non-digit. */
1218 if (val != 0 || JS7_ISDEC(c)) {
1219 if (!ReportStrictModeError(cx, this, NULL, NULL,
1220 JSMSG_DEPRECATED_OCTAL)) {
1221 goto error;
1224 if ('0' <= c && c < '8') {
1225 val = 8 * val + JS7_UNDEC(c);
1226 getChar();
1227 c = peekChar();
1228 if ('0' <= c && c < '8') {
1229 int32 save = val;
1230 val = 8 * val + JS7_UNDEC(c);
1231 if (val <= 0377)
1232 getChar();
1233 else
1234 val = save;
1238 c = (jschar)val;
1239 } else if (c == 'u') {
1240 jschar cp[4];
1241 if (peekChars(4, cp) &&
1242 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1243 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1244 c = (((((JS7_UNHEX(cp[0]) << 4)
1245 + JS7_UNHEX(cp[1])) << 4)
1246 + JS7_UNHEX(cp[2])) << 4)
1247 + JS7_UNHEX(cp[3]);
1248 skipChars(4);
1250 } else if (c == 'x') {
1251 jschar cp[2];
1252 if (peekChars(2, cp) &&
1253 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1254 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1255 skipChars(2);
1257 } else if (c == '\n') {
1258 /* ECMA follows C by removing escaped newlines. */
1259 continue;
1261 break;
1264 if (!tokenbuf.append(c))
1265 goto error;
1267 atom = atomize(cx, tokenbuf);
1268 if (!atom)
1269 goto error;
1270 tp->pos.end.lineno = lineno;
1271 tp->t_op = JSOP_STRING;
1272 tp->t_atom = atom;
1273 tt = TOK_STRING;
1274 goto out;
1277 switch (c) {
1278 case '\n': tt = TOK_EOL; goto eol_out;
1279 case ';': tt = TOK_SEMI; break;
1280 case '[': tt = TOK_LB; break;
1281 case ']': tt = TOK_RB; break;
1282 case '{': tt = TOK_LC; break;
1283 case '}': tt = TOK_RC; break;
1284 case '(': tt = TOK_LP; break;
1285 case ')': tt = TOK_RP; break;
1286 case ',': tt = TOK_COMMA; break;
1287 case '?': tt = TOK_HOOK; break;
1289 case '.':
1290 #if JS_HAS_XML_SUPPORT
1291 if (matchChar(c))
1292 tt = TOK_DBLDOT;
1293 else
1294 #endif
1295 tt = TOK_DOT;
1296 break;
1298 case ':':
1299 #if JS_HAS_XML_SUPPORT
1300 if (matchChar(c)) {
1301 tt = TOK_DBLCOLON;
1302 break;
1304 #endif
1306 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1307 * object initializer, likewise for setter.
1309 tp->t_op = JSOP_NOP;
1310 tt = TOK_COLON;
1311 break;
1313 case '|':
1314 if (matchChar(c)) {
1315 tt = TOK_OR;
1316 } else if (matchChar('=')) {
1317 tp->t_op = JSOP_BITOR;
1318 tt = TOK_ASSIGN;
1319 } else {
1320 tt = TOK_BITOR;
1322 break;
1324 case '^':
1325 if (matchChar('=')) {
1326 tp->t_op = JSOP_BITXOR;
1327 tt = TOK_ASSIGN;
1328 } else {
1329 tt = TOK_BITXOR;
1331 break;
1333 case '&':
1334 if (matchChar(c)) {
1335 tt = TOK_AND;
1336 } else if (matchChar('=')) {
1337 tp->t_op = JSOP_BITAND;
1338 tt = TOK_ASSIGN;
1339 } else {
1340 tt = TOK_BITAND;
1342 break;
1344 case '=':
1345 if (matchChar(c)) {
1346 tp->t_op = matchChar(c) ? JSOP_STRICTEQ : JSOP_EQ;
1347 tt = TOK_EQOP;
1348 } else {
1349 tp->t_op = JSOP_NOP;
1350 tt = TOK_ASSIGN;
1352 break;
1354 case '!':
1355 if (matchChar('=')) {
1356 tp->t_op = matchChar('=') ? JSOP_STRICTNE : JSOP_NE;
1357 tt = TOK_EQOP;
1358 } else {
1359 tp->t_op = JSOP_NOT;
1360 tt = TOK_UNARYOP;
1362 break;
1364 #if JS_HAS_XML_SUPPORT
1365 case '@':
1366 tt = TOK_AT;
1367 break;
1368 #endif
1370 case '<':
1371 #if JS_HAS_XML_SUPPORT
1373 * After much testing, it's clear that Postel's advice to protocol
1374 * designers ("be liberal in what you accept, and conservative in what
1375 * you send") invites a natural-law repercussion for JS as "protocol":
1377 * "If you are liberal in what you accept, others will utterly fail to
1378 * be conservative in what they send."
1380 * Which means you will get <!-- comments to end of line in the middle
1381 * of .js files, and after if conditions whose then statements are on
1382 * the next line, and other wonders. See at least the following bugs:
1383 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1384 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1385 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1387 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1388 * an XML comment or CDATA literal. Instead, we always scan <! as the
1389 * start of an HTML comment hack to end of line, used since Netscape 2
1390 * to hide script tag content from script-unaware browsers.
1392 * But this still leaves XML resources with certain internal structure
1393 * vulnerable to being loaded as script cross-origin, and some internal
1394 * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1395 * source consists only of XML literals. See:
1397 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1399 * The check for this is in jsparse.cpp, Compiler::compileScript.
1401 if ((flags & TSF_OPERAND) &&
1402 (JS_HAS_XML_OPTION(cx) || peekChar() != '!')) {
1403 /* Check for XML comment or CDATA section. */
1404 if (matchChar('!')) {
1405 tokenbuf.clear();
1407 /* Scan XML comment. */
1408 if (matchChar('-')) {
1409 if (!matchChar('-'))
1410 goto bad_xml_markup;
1411 while ((c = getChar()) != '-' || !matchChar('-')) {
1412 if (c == EOF)
1413 goto bad_xml_markup;
1414 if (!tokenbuf.append(c))
1415 goto error;
1417 tt = TOK_XMLCOMMENT;
1418 tp->t_op = JSOP_XMLCOMMENT;
1419 goto finish_xml_markup;
1422 /* Scan CDATA section. */
1423 if (matchChar('[')) {
1424 jschar cp[6];
1425 if (peekChars(6, cp) &&
1426 cp[0] == 'C' &&
1427 cp[1] == 'D' &&
1428 cp[2] == 'A' &&
1429 cp[3] == 'T' &&
1430 cp[4] == 'A' &&
1431 cp[5] == '[') {
1432 skipChars(6);
1433 while ((c = getChar()) != ']' ||
1434 !peekChars(2, cp) ||
1435 cp[0] != ']' ||
1436 cp[1] != '>') {
1437 if (c == EOF)
1438 goto bad_xml_markup;
1439 if (!tokenbuf.append(c))
1440 goto error;
1442 getChar(); /* discard ] but not > */
1443 tt = TOK_XMLCDATA;
1444 tp->t_op = JSOP_XMLCDATA;
1445 goto finish_xml_markup;
1447 goto bad_xml_markup;
1451 /* Check for processing instruction. */
1452 if (matchChar('?')) {
1453 inTarget = JS_TRUE;
1454 targetLength = 0;
1455 contentIndex = -1;
1457 tokenbuf.clear();
1458 while ((c = getChar()) != '?' || peekChar() != '>') {
1459 if (c == EOF)
1460 goto bad_xml_markup;
1461 if (inTarget) {
1462 if (JS_ISXMLSPACE(c)) {
1463 if (tokenbuf.empty())
1464 goto bad_xml_markup;
1465 inTarget = JS_FALSE;
1466 } else {
1467 if (!(tokenbuf.empty()
1468 ? JS_ISXMLNSSTART(c)
1469 : JS_ISXMLNS(c))) {
1470 goto bad_xml_markup;
1472 ++targetLength;
1474 } else {
1475 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1476 contentIndex = tokenbuf.length();
1478 if (!tokenbuf.append(c))
1479 goto error;
1481 if (targetLength == 0)
1482 goto bad_xml_markup;
1483 if (contentIndex < 0) {
1484 atom = cx->runtime->atomState.emptyAtom;
1485 } else {
1486 atom = js_AtomizeChars(cx,
1487 tokenbuf.begin() + contentIndex,
1488 tokenbuf.length() - contentIndex,
1490 if (!atom)
1491 goto error;
1493 tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1494 tp->t_atom2 = atom;
1495 tt = TOK_XMLPI;
1497 finish_xml_markup:
1498 if (!matchChar('>'))
1499 goto bad_xml_markup;
1500 atom = atomize(cx, tokenbuf);
1501 if (!atom)
1502 goto error;
1503 tp->t_atom = atom;
1504 tp->pos.end.lineno = lineno;
1505 goto out;
1508 /* An XML start-of-tag character. */
1509 tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1510 goto out;
1512 bad_xml_markup:
1513 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1514 goto error;
1516 #endif /* JS_HAS_XML_SUPPORT */
1518 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1519 if (matchChar('!')) {
1520 if (matchChar('-')) {
1521 if (matchChar('-')) {
1522 flags |= TSF_IN_HTML_COMMENT;
1523 goto skipline;
1525 ungetChar('-');
1527 ungetChar('!');
1529 if (matchChar(c)) {
1530 tp->t_op = JSOP_LSH;
1531 tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1532 } else {
1533 tp->t_op = matchChar('=') ? JSOP_LE : JSOP_LT;
1534 tt = TOK_RELOP;
1536 break;
1538 case '>':
1539 if (matchChar(c)) {
1540 tp->t_op = matchChar(c) ? JSOP_URSH : JSOP_RSH;
1541 tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1542 } else {
1543 tp->t_op = matchChar('=') ? JSOP_GE : JSOP_GT;
1544 tt = TOK_RELOP;
1546 break;
1548 case '*':
1549 tp->t_op = JSOP_MUL;
1550 tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
1551 break;
1553 case '/':
1554 if (matchChar('/')) {
1556 * Hack for source filters such as the Mozilla XUL preprocessor:
1557 * "//@line 123\n" sets the number of the *next* line after the
1558 * comment to 123.
1560 if (JS_HAS_ATLINE_OPTION(cx)) {
1561 jschar cp[5];
1562 uintN i, line, temp;
1563 char filenameBuf[1024];
1565 if (peekChars(5, cp) &&
1566 cp[0] == '@' &&
1567 cp[1] == 'l' &&
1568 cp[2] == 'i' &&
1569 cp[3] == 'n' &&
1570 cp[4] == 'e') {
1571 skipChars(5);
1572 while ((c = getChar()) != '\n' && ScanAsSpace((jschar)c))
1573 continue;
1574 if (JS7_ISDEC(c)) {
1575 line = JS7_UNDEC(c);
1576 while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1577 temp = 10 * line + JS7_UNDEC(c);
1578 if (temp < line) {
1579 /* Ignore overlarge line numbers. */
1580 goto skipline;
1582 line = temp;
1584 while (c != '\n' && ScanAsSpace((jschar)c))
1585 c = getChar();
1586 i = 0;
1587 if (c == '"') {
1588 while ((c = getChar()) != EOF && c != '"') {
1589 if (c == '\n') {
1590 ungetChar(c);
1591 goto skipline;
1593 if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1594 goto skipline;
1595 filenameBuf[i++] = (char) c;
1597 if (c == '"') {
1598 while ((c = getChar()) != '\n' &&
1599 ScanAsSpace((jschar)c)) {
1600 continue;
1604 filenameBuf[i] = '\0';
1605 if (c == '\n') {
1606 if (i > 0) {
1607 if (flags & TSF_OWNFILENAME)
1608 cx->free((void *) filename);
1609 filename = JS_strdup(cx, filenameBuf);
1610 if (!filename)
1611 goto error;
1612 flags |= TSF_OWNFILENAME;
1614 lineno = line;
1617 ungetChar(c);
1621 skipline:
1622 /* Optimize line skipping if we are not in an HTML comment. */
1623 if (flags & TSF_IN_HTML_COMMENT) {
1624 while ((c = getChar()) != EOF && c != '\n') {
1625 if (c == '-' && matchChar('-') && matchChar('>'))
1626 flags &= ~TSF_IN_HTML_COMMENT;
1628 } else {
1629 while ((c = getChar()) != EOF && c != '\n')
1630 continue;
1632 ungetChar(c);
1633 cursor = (cursor - 1) & ntokensMask;
1634 goto retry;
1637 if (matchChar('*')) {
1638 uintN linenoBefore = lineno;
1639 while ((c = getChar()) != EOF &&
1640 !(c == '*' && matchChar('/'))) {
1641 /* Ignore all characters until comment close. */
1643 if (c == EOF) {
1644 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1645 JSMSG_UNTERMINATED_COMMENT);
1646 goto error;
1648 if ((flags & TSF_NEWLINES) && linenoBefore != lineno) {
1649 flags &= ~TSF_DIRTYLINE;
1650 tt = TOK_EOL;
1651 goto eol_out;
1653 cursor = (cursor - 1) & ntokensMask;
1654 goto retry;
1657 if (flags & TSF_OPERAND) {
1658 uintN reflags, length;
1659 JSBool inCharClass = JS_FALSE;
1661 tokenbuf.clear();
1662 for (;;) {
1663 c = getChar();
1664 if (c == '\n' || c == EOF) {
1665 ungetChar(c);
1666 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1667 JSMSG_UNTERMINATED_REGEXP);
1668 goto error;
1670 if (c == '\\') {
1671 if (!tokenbuf.append(c))
1672 goto error;
1673 c = getChar();
1674 } else if (c == '[') {
1675 inCharClass = JS_TRUE;
1676 } else if (c == ']') {
1677 inCharClass = JS_FALSE;
1678 } else if (c == '/' && !inCharClass) {
1679 /* For compat with IE, allow unescaped / in char classes. */
1680 break;
1682 if (!tokenbuf.append(c))
1683 goto error;
1685 for (reflags = 0, length = tokenbuf.length() + 1; ; length++) {
1686 c = peekChar();
1687 if (c == 'g' && !(reflags & JSREG_GLOB))
1688 reflags |= JSREG_GLOB;
1689 else if (c == 'i' && !(reflags & JSREG_FOLD))
1690 reflags |= JSREG_FOLD;
1691 else if (c == 'm' && !(reflags & JSREG_MULTILINE))
1692 reflags |= JSREG_MULTILINE;
1693 else if (c == 'y' && !(reflags & JSREG_STICKY))
1694 reflags |= JSREG_STICKY;
1695 else
1696 break;
1697 getChar();
1699 c = peekChar();
1700 if (JS7_ISLET(c)) {
1701 char buf[2] = { '\0' };
1702 tp->pos.begin.index += length + 1;
1703 buf[0] = (char)c;
1704 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
1705 buf);
1706 (void) getChar();
1707 goto error;
1709 tp->t_reflags = reflags;
1710 tt = TOK_REGEXP;
1711 break;
1714 tp->t_op = JSOP_DIV;
1715 tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1716 break;
1718 case '%':
1719 tp->t_op = JSOP_MOD;
1720 tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1721 break;
1723 case '~':
1724 tp->t_op = JSOP_BITNOT;
1725 tt = TOK_UNARYOP;
1726 break;
1728 case '+':
1729 if (matchChar('=')) {
1730 tp->t_op = JSOP_ADD;
1731 tt = TOK_ASSIGN;
1732 } else if (matchChar(c)) {
1733 tt = TOK_INC;
1734 } else {
1735 tp->t_op = JSOP_POS;
1736 tt = TOK_PLUS;
1738 break;
1740 case '-':
1741 if (matchChar('=')) {
1742 tp->t_op = JSOP_SUB;
1743 tt = TOK_ASSIGN;
1744 } else if (matchChar(c)) {
1745 if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
1746 flags &= ~TSF_IN_HTML_COMMENT;
1747 goto skipline;
1749 tt = TOK_DEC;
1750 } else {
1751 tp->t_op = JSOP_NEG;
1752 tt = TOK_MINUS;
1754 break;
1756 #if JS_HAS_SHARP_VARS
1757 case '#':
1759 uint32 n;
1761 c = getChar();
1762 if (!JS7_ISDEC(c)) {
1763 ungetChar(c);
1764 goto badchar;
1766 n = (uint32)JS7_UNDEC(c);
1767 for (;;) {
1768 c = getChar();
1769 if (!JS7_ISDEC(c))
1770 break;
1771 n = 10 * n + JS7_UNDEC(c);
1772 if (n >= UINT16_LIMIT) {
1773 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_SHARPVAR_TOO_BIG);
1774 goto error;
1777 tp->t_dval = (jsdouble) n;
1778 if (JS_HAS_STRICT_OPTION(cx) &&
1779 (c == '=' || c == '#')) {
1780 char buf[20];
1781 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1782 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1783 JSMSG_DEPRECATED_USAGE, buf)) {
1784 goto error;
1787 if (c == '=')
1788 tt = TOK_DEFSHARP;
1789 else if (c == '#')
1790 tt = TOK_USESHARP;
1791 else
1792 goto badchar;
1793 break;
1795 #endif /* JS_HAS_SHARP_VARS */
1797 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1798 badchar:
1799 #endif
1801 default:
1802 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
1803 goto error;
1806 out:
1807 JS_ASSERT(tt != TOK_EOL);
1808 flags |= TSF_DIRTYLINE;
1810 eol_out:
1811 JS_ASSERT(tt < TOK_LIMIT);
1812 tp->pos.end.index = linepos + (linebuf.ptr - linebuf.base) - ungetpos;
1813 tp->type = tt;
1814 return tt;
1816 error:
1817 tt = TOK_ERROR;
1818 flags |= TSF_ERROR;
1819 goto out;