Bug 559408: Arena macros to methods. (r=galish)
[mozilla-central.git] / js / src / jsscan.cpp
blob4257b3acf99fbdd8e50c15d5b4cff46d8fd4d6d0
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS lexical scanner.
44 #include <stdio.h> /* first to avoid trouble on some systems */
45 #include <errno.h>
46 #include <limits.h>
47 #include <math.h>
48 #ifdef HAVE_MEMORY_H
49 #include <memory.h>
50 #endif
51 #include <stdarg.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsstdint.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsbit.h"
58 #include "jsutil.h" /* Added by JSIFY */
59 #include "jsdtoa.h"
60 #include "jsprf.h"
61 #include "jsapi.h"
62 #include "jsatom.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsemit.h"
66 #include "jsexn.h"
67 #include "jsnum.h"
68 #include "jsopcode.h"
69 #include "jsparse.h"
70 #include "jsregexp.h"
71 #include "jsscan.h"
72 #include "jsscript.h"
73 #include "jsstaticcheck.h"
74 #include "jsvector.h"
76 #if JS_HAS_XML_SUPPORT
77 #include "jsxml.h"
78 #endif
80 using namespace js;
82 #define JS_KEYWORD(keyword, type, op, version) \
83 const char js_##keyword##_str[] = #keyword;
84 #include "jskeyword.tbl"
85 #undef JS_KEYWORD
87 struct keyword {
88 const char *chars; /* C string with keyword text */
89 TokenKind tokentype;
90 JSOp op; /* JSOp */
91 JSVersion version; /* JSVersion */
94 static const struct keyword keyword_defs[] = {
95 #define JS_KEYWORD(keyword, type, op, version) \
96 {js_##keyword##_str, type, op, version},
97 #include "jskeyword.tbl"
98 #undef JS_KEYWORD
101 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
103 static const struct keyword *
104 FindKeyword(const jschar *s, size_t length)
106 register size_t i;
107 const struct keyword *kw;
108 const char *chars;
110 JS_ASSERT(length != 0);
112 #define JSKW_LENGTH() length
113 #define JSKW_AT(column) s[column]
114 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
115 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
116 #define JSKW_NO_MATCH() goto no_match;
117 #include "jsautokw.h"
118 #undef JSKW_NO_MATCH
119 #undef JSKW_TEST_GUESS
120 #undef JSKW_GOT_MATCH
121 #undef JSKW_AT
122 #undef JSKW_LENGTH
124 got_match:
125 return &keyword_defs[i];
127 test_guess:
128 kw = &keyword_defs[i];
129 chars = kw->chars;
130 do {
131 if (*s++ != (unsigned char)(*chars++))
132 goto no_match;
133 } while (--length != 0);
134 return kw;
136 no_match:
137 return NULL;
140 TokenKind
141 js_CheckKeyword(const jschar *str, size_t length)
143 const struct keyword *kw;
145 JS_ASSERT(length != 0);
146 kw = FindKeyword(str, length);
147 return kw ? kw->tokentype : TOK_EOF;
150 JSBool
151 js_IsIdentifier(JSString *str)
153 size_t length;
154 jschar c;
155 const jschar *chars, *end;
157 str->getCharsAndLength(chars, length);
158 if (length == 0)
159 return JS_FALSE;
160 c = *chars;
161 if (!JS_ISIDSTART(c))
162 return JS_FALSE;
163 end = chars + length;
164 while (++chars != end) {
165 c = *chars;
166 if (!JS_ISIDENT(c))
167 return JS_FALSE;
169 return JS_TRUE;
172 #ifdef _MSC_VER
173 #pragma warning(push)
174 #pragma warning(disable:4351)
175 #endif
177 /* Initialize members that aren't initialized in |init|. */
178 TokenStream::TokenStream(JSContext *cx)
179 : cx(cx), tokens(), cursor(), lookahead(), ungetpos(), ungetbuf(), flags(),
180 linepos(), lineposNext(), file(), listenerTSData(), tokenbuf(cx)
183 #ifdef _MSC_VER
184 #pragma warning(pop)
185 #endif
187 bool
188 TokenStream::init(const jschar *base, size_t length, FILE *fp, const char *fn, uintN ln)
190 jschar *buf;
192 JS_ASSERT_IF(fp, !base);
193 JS_ASSERT_IF(!base, length == 0);
194 size_t nb = fp
195 ? 2 * LINE_LIMIT * sizeof(jschar)
196 : LINE_LIMIT * sizeof(jschar);
197 cx->tempPool.allocateCast<jschar *>(buf, nb);
198 if (!buf) {
199 js_ReportOutOfScriptQuota(cx);
200 return false;
202 memset(buf, 0, nb);
204 /* Initialize members. */
205 filename = fn;
206 lineno = ln;
207 linebuf.base = linebuf.limit = linebuf.ptr = buf;
208 if (fp) {
209 file = fp;
210 userbuf.base = buf + LINE_LIMIT;
211 userbuf.ptr = userbuf.limit = userbuf.base + LINE_LIMIT;
212 } else {
213 userbuf.base = (jschar *)base;
214 userbuf.limit = (jschar *)base + length;
215 userbuf.ptr = (jschar *)base;
217 listener = cx->debugHooks->sourceHandler;
218 listenerData = cx->debugHooks->sourceHandlerData;
219 return true;
222 void
223 TokenStream::close()
225 if (flags & TSF_OWNFILENAME)
226 cx->free((void *) filename);
229 /* Use the fastest available getc. */
230 #if defined(HAVE_GETC_UNLOCKED)
231 # define fast_getc getc_unlocked
232 #elif defined(HAVE__GETC_NOLOCK)
233 # define fast_getc _getc_nolock
234 #else
235 # define fast_getc getc
236 #endif
238 JS_FRIEND_API(int)
239 js_fgets(char *buf, int size, FILE *file)
241 int n, i, c;
242 JSBool crflag;
244 n = size - 1;
245 if (n < 0)
246 return -1;
248 crflag = JS_FALSE;
249 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
250 buf[i] = c;
251 if (c == '\n') { /* any \n ends a line */
252 i++; /* keep the \n; we know there is room for \0 */
253 break;
255 if (crflag) { /* \r not followed by \n ends line at the \r */
256 ungetc(c, file);
257 break; /* and overwrite c in buf with \0 */
259 crflag = (c == '\r');
262 buf[i] = '\0';
263 return i;
267 * Nb: This does *not* append a terminating '\0'. Returns the number of chars
268 * read from the file.
271 TokenStream::fillUserbuf()
274 * We avoid splitting a \r\n pair, because this makes things much easier
275 * for getChar(). To do this, we only try to fill userbuf up with
276 * LINE_LIMIT-1 chars. Once we've reached that number, if the last one is
277 * \r then we check if the following one is \n; if so we get it too,
278 * knowing that we have space for it.
280 jschar *buf = userbuf.base;
281 int n = LINE_LIMIT - 1; /* reserve space for \n following a \r */
282 JS_ASSERT(n > 0);
283 int i;
284 i = 0;
285 while (true) {
286 int c = fast_getc(file);
287 if (c == EOF)
288 break;
289 buf[i] = (jschar) (unsigned char) c;
290 i++;
292 if (i == n) {
293 if (buf[i - 1] == '\r') {
294 /* Look for a following \n. We know we have space in buf for it. */
295 c = fast_getc(file);
296 if (c == EOF)
297 break;
298 if (c == '\n') {
299 buf[i] = (jschar) (unsigned char) c;
300 i++;
301 break;
303 ungetc(c, file); /* \r wasn't followed by \n, unget */
305 break;
308 return i;
311 int32
312 TokenStream::getCharFillLinebuf()
314 ptrdiff_t ulen = userbuf.limit - userbuf.ptr;
315 if (ulen <= 0) {
316 if (!file) {
317 flags |= TSF_EOF;
318 return EOF;
321 /* Fill userbuf so that \r and \r\n convert to \n. */
322 ulen = fillUserbuf();
323 JS_ASSERT(ulen >= 0);
324 if (ulen == 0) {
325 flags |= TSF_EOF;
326 return EOF;
328 userbuf.limit = userbuf.base + ulen;
329 userbuf.ptr = userbuf.base;
331 if (listener)
332 listener(filename, lineno, userbuf.ptr, ulen, &listenerTSData, listenerData);
335 * Copy from userbuf to linebuf. Stop when any of these happen:
336 * (a) we reach the end of userbuf;
337 * (b) we reach the end of linebuf;
338 * (c) we hit an EOL.
340 * "EOL" means any of: \r, \n, \r\n, or the Unicode line and paragraph
341 * separators.
343 jschar *from = userbuf.ptr;
344 jschar *to = linebuf.base;
346 int llenAdjust = 0;
347 int limit = JS_MIN(size_t(ulen), LINE_LIMIT);
348 int i = 0;
349 while (i < limit) {
350 /* Copy the jschar from userbuf to linebuf. */
351 jschar d = to[i] = from[i];
352 i++;
355 * Normalize the copied jschar if it was a newline. Try to
356 * prevent multiple tests on most characters by first
357 * filtering out characters that aren't 000x or 202x.
359 if ((d & 0xDFD0) == 0) {
360 if (d == '\n') {
361 break;
364 if (d == '\r') {
365 to[i - 1] = '\n'; /* overwrite with '\n' */
366 if (i < ulen && from[i] == '\n') {
367 i++; /* skip over '\n' */
368 llenAdjust = -1;
370 break;
373 if (d == LINE_SEPARATOR || d == PARA_SEPARATOR) {
374 to[i - 1] = '\n'; /* overwrite with '\n' */
375 break;
380 /* At this point 'i' is the index one past the last char copied. */
381 ulen = i;
382 userbuf.ptr += ulen;
384 /* Reset linebuf based on normalized length. */
385 linebuf.ptr = linebuf.base;
386 linebuf.limit = linebuf.base + ulen + llenAdjust;
388 /* Update position of linebuf within physical userbuf line. */
389 linepos = lineposNext;
390 if (linebuf.limit[-1] == '\n')
391 lineposNext = 0;
392 else
393 lineposNext += ulen;
395 return *linebuf.ptr++;
399 * This gets the next char, normalizing all EOL sequences to '\n' as it goes.
401 int32
402 TokenStream::getChar()
404 int32 c;
405 if (ungetpos != 0) {
406 c = ungetbuf[--ungetpos];
407 } else if (linebuf.ptr == linebuf.limit) {
408 c = getCharFillLinebuf();
409 } else {
410 c = *linebuf.ptr++;
412 if (c == '\n')
413 lineno++;
414 return c;
417 void
418 TokenStream::ungetChar(int32 c)
420 if (c == EOF)
421 return;
422 JS_ASSERT(ungetpos < JS_ARRAY_LENGTH(ungetbuf));
423 if (c == '\n')
424 lineno--;
425 ungetbuf[ungetpos++] = (jschar)c;
429 * Peek n chars ahead into ts. Return true if n chars were read, false if
430 * there weren't enough characters in the input stream. This function cannot
431 * be used to peek into or past a newline.
433 JSBool
434 TokenStream::peekChars(intN n, jschar *cp)
436 intN i, j;
437 int32 c;
439 for (i = 0; i < n; i++) {
440 c = getChar();
441 if (c == EOF)
442 break;
443 if (c == '\n') {
444 ungetChar(c);
445 break;
447 cp[i] = (jschar)c;
449 for (j = i - 1; j >= 0; j--)
450 ungetChar(cp[j]);
451 return i == n;
454 bool
455 TokenStream::reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber,
456 va_list ap)
458 JSErrorReport report;
459 char *message;
460 size_t linelength;
461 jschar *linechars;
462 char *linebytes;
463 bool warning;
464 JSBool ok;
465 TokenPos *tp;
466 uintN index, i;
467 JSErrorReporter onError;
469 JS_ASSERT(linebuf.limit <= linebuf.base + LINE_LIMIT);
471 if (JSREPORT_IS_STRICT(flags) && !JS_HAS_STRICT_OPTION(cx))
472 return JS_TRUE;
474 warning = JSREPORT_IS_WARNING(flags);
475 if (warning && JS_HAS_WERROR_OPTION(cx)) {
476 flags &= ~JSREPORT_WARNING;
477 warning = false;
480 PodZero(&report);
481 report.flags = flags;
482 report.errorNumber = errorNumber;
483 message = NULL;
484 linechars = NULL;
485 linebytes = NULL;
487 MUST_FLOW_THROUGH("out");
488 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
489 errorNumber, &message, &report,
490 !(flags & JSREPORT_UC), ap);
491 if (!ok) {
492 warning = false;
493 goto out;
496 report.filename = filename;
498 if (pn) {
499 report.lineno = pn->pn_pos.begin.lineno;
500 if (report.lineno != lineno)
501 goto report;
502 tp = &pn->pn_pos;
503 } else {
504 /* Point to the current token, not the next one to get. */
505 tp = &tokens[cursor].pos;
507 report.lineno = lineno;
508 linelength = linebuf.limit - linebuf.base;
509 linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
510 if (!linechars) {
511 warning = false;
512 goto out;
514 memcpy(linechars, linebuf.base, linelength * sizeof(jschar));
515 linechars[linelength] = 0;
516 linebytes = js_DeflateString(cx, linechars, linelength);
517 if (!linebytes) {
518 warning = false;
519 goto out;
521 report.linebuf = linebytes;
524 * FIXME: What should instead happen here is that we should
525 * find error-tokens in userbuf, if !file. That will
526 * allow us to deliver a more helpful error message, which
527 * includes all or part of the bad string or bad token. The
528 * code here yields something that looks truncated.
529 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
531 index = 0;
532 if (tp->begin.lineno == tp->end.lineno) {
533 if (tp->begin.index < linepos)
534 goto report;
536 index = tp->begin.index - linepos;
539 report.tokenptr = report.linebuf + index;
540 report.uclinebuf = linechars;
541 report.uctokenptr = report.uclinebuf + index;
544 * If there's a runtime exception type associated with this error
545 * number, set that as the pending exception. For errors occuring at
546 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
548 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
549 * flag will be set in report.flags. Proper behavior for an error
550 * reporter is to ignore a report with this flag for all but top-level
551 * compilation errors. The exception will remain pending, and so long
552 * as the non-top-level "load", "eval", or "compile" native function
553 * returns false, the top-level reporter will eventually receive the
554 * uncaught exception report.
556 * XXX it'd probably be best if there was only one call to this
557 * function, but there seem to be two error reporter call points.
559 report:
560 onError = cx->errorReporter;
563 * Try to raise an exception only if there isn't one already set --
564 * otherwise the exception will describe the last compile-time error,
565 * which is likely spurious.
567 if (!(flags & TSF_ERROR)) {
568 if (js_ErrorToException(cx, message, &report, NULL, NULL))
569 onError = NULL;
573 * Suppress any compile-time errors that don't occur at the top level.
574 * This may still fail, as interplevel may be zero in contexts where we
575 * don't really want to call the error reporter, as when js is called
576 * by other code which could catch the error.
578 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
579 onError = NULL;
581 if (onError) {
582 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
585 * If debugErrorHook is present then we give it a chance to veto
586 * sending the error on to the regular error reporter.
588 if (hook && !hook(cx, message, &report,
589 cx->debugHooks->debugErrorHookData)) {
590 onError = NULL;
593 if (onError)
594 (*onError)(cx, message, &report);
596 out:
597 if (linebytes)
598 cx->free(linebytes);
599 if (linechars)
600 cx->free(linechars);
601 if (message)
602 cx->free(message);
603 if (report.ucmessage)
604 cx->free((void *)report.ucmessage);
606 if (report.messageArgs) {
607 if (!(flags & JSREPORT_UC)) {
608 i = 0;
609 while (report.messageArgs[i])
610 cx->free((void *)report.messageArgs[i++]);
612 cx->free((void *)report.messageArgs);
615 if (!JSREPORT_IS_WARNING(flags)) {
616 /* Set the error flag to suppress spurious reports. */
617 flags |= TSF_ERROR;
620 return warning;
623 bool
624 js::ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
625 uintN errorNumber, ...)
627 JS_ASSERT(ts || tc);
628 JS_ASSERT(cx == ts->getContext());
630 /* In strict mode code, this is an error, not just a warning. */
631 uintN flags;
632 if ((tc && tc->flags & TCF_STRICT_MODE_CODE) || (ts && ts->isStrictMode()))
633 flags = JSREPORT_ERROR;
634 else if (JS_HAS_STRICT_OPTION(cx))
635 flags = JSREPORT_WARNING;
636 else
637 return true;
639 va_list ap;
640 va_start(ap, errorNumber);
641 bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
642 va_end(ap);
644 return result;
647 bool
648 js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn,
649 uintN flags, uintN errorNumber, ...)
651 va_list ap;
654 * We don't accept a JSTreeContext argument, so we can't implement
655 * JSREPORT_STRICT_MODE_ERROR here. Use ReportStrictModeError instead,
656 * or do the checks in the caller and pass plain old JSREPORT_ERROR.
658 JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
660 va_start(ap, errorNumber);
661 JS_ASSERT(cx == ts->getContext());
662 bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
663 va_end(ap);
665 return result;
668 #if JS_HAS_XML_SUPPORT
670 JSBool
671 TokenStream::getXMLEntity()
673 ptrdiff_t offset, length, i;
674 int c, d;
675 JSBool ispair;
676 jschar *bp, digit;
677 char *bytes;
678 JSErrNum msg;
680 JSCharBuffer &tb = tokenbuf;
682 /* Put the entity, including the '&' already scanned, in tokenbuf. */
683 offset = tb.length();
684 if (!tb.append('&'))
685 return JS_FALSE;
686 while ((c = getChar()) != ';') {
687 if (c == EOF || c == '\n') {
688 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
689 return JS_FALSE;
691 if (!tb.append(c))
692 return JS_FALSE;
695 /* Let length be the number of jschars after the '&', including the ';'. */
696 length = tb.length() - offset;
697 bp = tb.begin() + offset;
698 c = d = 0;
699 ispair = JS_FALSE;
700 if (length > 2 && bp[1] == '#') {
701 /* Match a well-formed XML Character Reference. */
702 i = 2;
703 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
704 if (length > 9) /* at most 6 hex digits allowed */
705 goto badncr;
706 while (++i < length) {
707 digit = bp[i];
708 if (!JS7_ISHEX(digit))
709 goto badncr;
710 c = (c << 4) + JS7_UNHEX(digit);
712 } else {
713 while (i < length) {
714 digit = bp[i++];
715 if (!JS7_ISDEC(digit))
716 goto badncr;
717 c = (c * 10) + JS7_UNDEC(digit);
718 if (c < 0)
719 goto badncr;
723 if (0x10000 <= c && c <= 0x10FFFF) {
724 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
725 d = 0xDC00 + (c & 0x3FF);
726 c = 0xD7C0 + (c >> 10);
727 ispair = JS_TRUE;
728 } else {
729 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
730 if (c != 0x9 && c != 0xA && c != 0xD &&
731 !(0x20 <= c && c <= 0xD7FF) &&
732 !(0xE000 <= c && c <= 0xFFFD)) {
733 goto badncr;
736 } else {
737 /* Try to match one of the five XML 1.0 predefined entities. */
738 switch (length) {
739 case 3:
740 if (bp[2] == 't') {
741 if (bp[1] == 'l')
742 c = '<';
743 else if (bp[1] == 'g')
744 c = '>';
746 break;
747 case 4:
748 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
749 c = '&';
750 break;
751 case 5:
752 if (bp[3] == 'o') {
753 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
754 c = '\'';
755 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
756 c = '"';
758 break;
760 if (c == 0) {
761 msg = JSMSG_UNKNOWN_XML_ENTITY;
762 goto bad;
766 /* If we matched, retract tokenbuf and store the entity's value. */
767 *bp++ = (jschar) c;
768 if (ispair)
769 *bp++ = (jschar) d;
770 tb.shrinkBy(tb.end() - bp);
771 return JS_TRUE;
773 badncr:
774 msg = JSMSG_BAD_XML_NCR;
775 bad:
776 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
777 JS_ASSERT((tb.end() - bp) >= 1);
778 bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
779 if (bytes) {
780 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
781 cx->free(bytes);
783 return JS_FALSE;
786 #endif /* JS_HAS_XML_SUPPORT */
789 * We have encountered a '\': check for a Unicode escape sequence after it,
790 * returning the character code value if we found a Unicode escape sequence.
791 * Otherwise, non-destructively return the original '\'.
793 int32
794 TokenStream::getUnicodeEscape()
796 jschar cp[5];
797 int32 c;
799 if (peekChars(5, cp) && cp[0] == 'u' &&
800 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
801 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
803 c = (((((JS7_UNHEX(cp[1]) << 4)
804 + JS7_UNHEX(cp[2])) << 4)
805 + JS7_UNHEX(cp[3])) << 4)
806 + JS7_UNHEX(cp[4]);
807 skipChars(5);
808 return c;
810 return '\\';
813 Token *
814 TokenStream::newToken(ptrdiff_t adjust)
816 cursor = (cursor + 1) & ntokensMask;
817 Token *tp = &tokens[cursor];
818 tp->ptr = linebuf.ptr + adjust;
819 tp->pos.begin.index = linepos + (tp->ptr - linebuf.base) - ungetpos;
820 tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
821 return tp;
824 static JS_ALWAYS_INLINE JSBool
825 ScanAsSpace(jschar c)
827 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
828 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
829 return JS_TRUE;
830 return JS_FALSE;
833 static JS_ALWAYS_INLINE JSAtom *
834 atomize(JSContext *cx, JSCharBuffer &cb)
836 return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
839 TokenKind
840 TokenStream::getTokenInternal()
842 TokenKind tt;
843 int c, qc;
844 Token *tp;
845 JSAtom *atom;
846 JSBool hadUnicodeEscape;
847 const struct keyword *kw;
848 #if JS_HAS_XML_SUPPORT
849 JSBool inTarget;
850 size_t targetLength;
851 ptrdiff_t contentIndex;
852 #endif
854 #if JS_HAS_XML_SUPPORT
855 if (flags & TSF_XMLTEXTMODE) {
856 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
857 tp = newToken(0);
858 tokenbuf.clear();
859 qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
861 while ((c = getChar()) != qc && c != '<' && c != EOF) {
862 if (c == '&' && qc == '<') {
863 if (!getXMLEntity())
864 goto error;
865 tt = TOK_XMLTEXT;
866 continue;
869 if (!JS_ISXMLSPACE(c))
870 tt = TOK_XMLTEXT;
871 if (!tokenbuf.append(c))
872 goto error;
874 ungetChar(c);
876 if (tokenbuf.empty()) {
877 atom = NULL;
878 } else {
879 atom = atomize(cx, tokenbuf);
880 if (!atom)
881 goto error;
883 tp->pos.end.lineno = lineno;
884 tp->t_op = JSOP_STRING;
885 tp->t_atom = atom;
886 goto out;
889 if (flags & TSF_XMLTAGMODE) {
890 tp = newToken(0);
891 c = getChar();
892 if (JS_ISXMLSPACE(c)) {
893 do {
894 c = getChar();
895 } while (JS_ISXMLSPACE(c));
896 ungetChar(c);
897 tt = TOK_XMLSPACE;
898 goto out;
901 if (c == EOF) {
902 tt = TOK_EOF;
903 goto out;
906 tokenbuf.clear();
907 if (JS_ISXMLNSSTART(c)) {
908 JSBool sawColon = JS_FALSE;
910 if (!tokenbuf.append(c))
911 goto error;
912 while ((c = getChar()) != EOF && JS_ISXMLNAME(c)) {
913 if (c == ':') {
914 int nextc;
916 if (sawColon ||
917 (nextc = peekChar(),
918 ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
919 !JS_ISXMLNAME(nextc))) {
920 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
921 JSMSG_BAD_XML_QNAME);
922 goto error;
924 sawColon = JS_TRUE;
927 if (!tokenbuf.append(c))
928 goto error;
931 ungetChar(c);
932 atom = atomize(cx, tokenbuf);
933 if (!atom)
934 goto error;
935 tp->t_op = JSOP_STRING;
936 tp->t_atom = atom;
937 tt = TOK_XMLNAME;
938 goto out;
941 switch (c) {
942 case '{':
943 if (flags & TSF_XMLONLYMODE)
944 goto bad_xml_char;
945 tt = TOK_LC;
946 goto out;
948 case '=':
949 tt = TOK_ASSIGN;
950 goto out;
952 case '"':
953 case '\'':
954 qc = c;
955 while ((c = getChar()) != qc) {
956 if (c == EOF) {
957 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
958 JSMSG_UNTERMINATED_STRING);
959 goto error;
963 * XML attribute values are double-quoted when pretty-printed,
964 * so escape " if it is expressed directly in a single-quoted
965 * attribute value.
967 if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
968 JS_ASSERT(qc == '\'');
969 if (!tokenbuf.append(js_quot_entity_str,
970 strlen(js_quot_entity_str)))
971 goto error;
972 continue;
975 if (c == '&' && (flags & TSF_XMLONLYMODE)) {
976 if (!getXMLEntity())
977 goto error;
978 continue;
981 if (!tokenbuf.append(c))
982 goto error;
984 atom = atomize(cx, tokenbuf);
985 if (!atom)
986 goto error;
987 tp->pos.end.lineno = lineno;
988 tp->t_op = JSOP_STRING;
989 tp->t_atom = atom;
990 tt = TOK_XMLATTR;
991 goto out;
993 case '>':
994 tt = TOK_XMLTAGC;
995 goto out;
997 case '/':
998 if (matchChar('>')) {
999 tt = TOK_XMLPTAGC;
1000 goto out;
1002 /* FALL THROUGH */
1004 bad_xml_char:
1005 default:
1006 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
1007 goto error;
1009 /* NOTREACHED */
1011 #endif /* JS_HAS_XML_SUPPORT */
1013 retry:
1014 do {
1015 c = getChar();
1016 if (c == '\n') {
1017 flags &= ~TSF_DIRTYLINE;
1018 if (flags & TSF_NEWLINES)
1019 break;
1021 } while (ScanAsSpace((jschar)c));
1023 tp = newToken(-1);
1024 if (c == EOF) {
1025 tt = TOK_EOF;
1026 goto out;
1029 hadUnicodeEscape = JS_FALSE;
1030 if (JS_ISIDSTART(c) ||
1031 (c == '\\' &&
1032 (qc = getUnicodeEscape(),
1033 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1034 if (hadUnicodeEscape)
1035 c = qc;
1036 tokenbuf.clear();
1037 for (;;) {
1038 if (!tokenbuf.append(c))
1039 goto error;
1040 c = getChar();
1041 if (c == '\\') {
1042 qc = getUnicodeEscape();
1043 if (!JS_ISIDENT(qc))
1044 break;
1045 c = qc;
1046 hadUnicodeEscape = JS_TRUE;
1047 } else {
1048 if (!JS_ISIDENT(c))
1049 break;
1052 ungetChar(c);
1055 * Check for keywords unless we saw Unicode escape or parser asks
1056 * to ignore keywords.
1058 if (!hadUnicodeEscape &&
1059 !(flags & TSF_KEYWORD_IS_NAME) &&
1060 (kw = FindKeyword(tokenbuf.begin(), tokenbuf.length()))) {
1061 if (kw->tokentype == TOK_RESERVED) {
1062 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1063 JSMSG_RESERVED_ID, kw->chars)) {
1064 goto error;
1066 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1067 tt = kw->tokentype;
1068 tp->t_op = (JSOp) kw->op;
1069 goto out;
1073 atom = atomize(cx, tokenbuf);
1074 if (!atom)
1075 goto error;
1076 tp->t_op = JSOP_NAME;
1077 tp->t_atom = atom;
1078 tt = TOK_NAME;
1079 goto out;
1082 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(peekChar()))) {
1083 jsint radix;
1084 const jschar *endptr;
1085 jsdouble dval;
1087 radix = 10;
1088 tokenbuf.clear();
1090 if (c == '0') {
1091 if (!tokenbuf.append(c))
1092 goto error;
1093 c = getChar();
1094 if (JS_TOLOWER(c) == 'x') {
1095 if (!tokenbuf.append(c))
1096 goto error;
1097 c = getChar();
1098 radix = 16;
1099 } else if (JS7_ISDEC(c)) {
1100 radix = 8;
1104 while (JS7_ISHEX(c)) {
1105 if (radix < 16) {
1106 if (JS7_ISLET(c))
1107 break;
1109 if (radix == 8) {
1110 /* Octal integer literals are not permitted in strict mode code. */
1111 if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1112 goto error;
1115 * Outside strict mode, we permit 08 and 09 as decimal numbers, which
1116 * makes our behaviour a superset of the ECMA numeric grammar. We
1117 * might not always be so permissive, so we warn about it.
1119 if (c >= '8') {
1120 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1121 JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1122 goto error;
1124 radix = 10;
1128 if (!tokenbuf.append(c))
1129 goto error;
1130 c = getChar();
1133 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1134 if (c == '.') {
1135 do {
1136 if (!tokenbuf.append(c))
1137 goto error;
1138 c = getChar();
1139 } while (JS7_ISDEC(c));
1141 if (JS_TOLOWER(c) == 'e') {
1142 if (!tokenbuf.append(c))
1143 goto error;
1144 c = getChar();
1145 if (c == '+' || c == '-') {
1146 if (!tokenbuf.append(c))
1147 goto error;
1148 c = getChar();
1150 if (!JS7_ISDEC(c)) {
1151 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1152 JSMSG_MISSING_EXPONENT);
1153 goto error;
1155 do {
1156 if (!tokenbuf.append(c))
1157 goto error;
1158 c = getChar();
1159 } while (JS7_ISDEC(c));
1163 if (JS_ISIDSTART(c)) {
1164 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1165 goto error;
1168 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1169 ungetChar(c);
1170 if (!tokenbuf.append(0))
1171 goto error;
1173 if (radix == 10) {
1174 if (!js_strtod(cx, tokenbuf.begin(), tokenbuf.end(), &endptr, &dval)) {
1175 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1176 goto error;
1178 } else {
1179 if (!js_strtointeger(cx, tokenbuf.begin(), tokenbuf.end(),
1180 &endptr, radix, &dval)) {
1181 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1182 goto error;
1185 tp->t_dval = dval;
1186 tt = TOK_NUMBER;
1187 goto out;
1190 if (c == '"' || c == '\'') {
1191 qc = c;
1192 tokenbuf.clear();
1193 while ((c = getChar()) != qc) {
1194 if (c == '\n' || c == EOF) {
1195 ungetChar(c);
1196 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1197 JSMSG_UNTERMINATED_STRING);
1198 goto error;
1200 if (c == '\\') {
1201 switch (c = getChar()) {
1202 case 'b': c = '\b'; break;
1203 case 'f': c = '\f'; break;
1204 case 'n': c = '\n'; break;
1205 case 'r': c = '\r'; break;
1206 case 't': c = '\t'; break;
1207 case 'v': c = '\v'; break;
1209 default:
1210 if ('0' <= c && c < '8') {
1211 int32 val = JS7_UNDEC(c);
1213 c = peekChar();
1214 /* Strict mode code allows only \0, then a non-digit. */
1215 if (val != 0 || JS7_ISDEC(c)) {
1216 if (!ReportStrictModeError(cx, this, NULL, NULL,
1217 JSMSG_DEPRECATED_OCTAL)) {
1218 goto error;
1221 if ('0' <= c && c < '8') {
1222 val = 8 * val + JS7_UNDEC(c);
1223 getChar();
1224 c = peekChar();
1225 if ('0' <= c && c < '8') {
1226 int32 save = val;
1227 val = 8 * val + JS7_UNDEC(c);
1228 if (val <= 0377)
1229 getChar();
1230 else
1231 val = save;
1235 c = (jschar)val;
1236 } else if (c == 'u') {
1237 jschar cp[4];
1238 if (peekChars(4, cp) &&
1239 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1240 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1241 c = (((((JS7_UNHEX(cp[0]) << 4)
1242 + JS7_UNHEX(cp[1])) << 4)
1243 + JS7_UNHEX(cp[2])) << 4)
1244 + JS7_UNHEX(cp[3]);
1245 skipChars(4);
1247 } else if (c == 'x') {
1248 jschar cp[2];
1249 if (peekChars(2, cp) &&
1250 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1251 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1252 skipChars(2);
1254 } else if (c == '\n') {
1255 /* ECMA follows C by removing escaped newlines. */
1256 continue;
1258 break;
1261 if (!tokenbuf.append(c))
1262 goto error;
1264 atom = atomize(cx, tokenbuf);
1265 if (!atom)
1266 goto error;
1267 tp->pos.end.lineno = lineno;
1268 tp->t_op = JSOP_STRING;
1269 tp->t_atom = atom;
1270 tt = TOK_STRING;
1271 goto out;
1274 switch (c) {
1275 case '\n': tt = TOK_EOL; goto eol_out;
1276 case ';': tt = TOK_SEMI; break;
1277 case '[': tt = TOK_LB; break;
1278 case ']': tt = TOK_RB; break;
1279 case '{': tt = TOK_LC; break;
1280 case '}': tt = TOK_RC; break;
1281 case '(': tt = TOK_LP; break;
1282 case ')': tt = TOK_RP; break;
1283 case ',': tt = TOK_COMMA; break;
1284 case '?': tt = TOK_HOOK; break;
1286 case '.':
1287 #if JS_HAS_XML_SUPPORT
1288 if (matchChar(c))
1289 tt = TOK_DBLDOT;
1290 else
1291 #endif
1292 tt = TOK_DOT;
1293 break;
1295 case ':':
1296 #if JS_HAS_XML_SUPPORT
1297 if (matchChar(c)) {
1298 tt = TOK_DBLCOLON;
1299 break;
1301 #endif
1303 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1304 * object initializer, likewise for setter.
1306 tp->t_op = JSOP_NOP;
1307 tt = TOK_COLON;
1308 break;
1310 case '|':
1311 if (matchChar(c)) {
1312 tt = TOK_OR;
1313 } else if (matchChar('=')) {
1314 tp->t_op = JSOP_BITOR;
1315 tt = TOK_ASSIGN;
1316 } else {
1317 tt = TOK_BITOR;
1319 break;
1321 case '^':
1322 if (matchChar('=')) {
1323 tp->t_op = JSOP_BITXOR;
1324 tt = TOK_ASSIGN;
1325 } else {
1326 tt = TOK_BITXOR;
1328 break;
1330 case '&':
1331 if (matchChar(c)) {
1332 tt = TOK_AND;
1333 } else if (matchChar('=')) {
1334 tp->t_op = JSOP_BITAND;
1335 tt = TOK_ASSIGN;
1336 } else {
1337 tt = TOK_BITAND;
1339 break;
1341 case '=':
1342 if (matchChar(c)) {
1343 tp->t_op = matchChar(c) ? JSOP_STRICTEQ : JSOP_EQ;
1344 tt = TOK_EQOP;
1345 } else {
1346 tp->t_op = JSOP_NOP;
1347 tt = TOK_ASSIGN;
1349 break;
1351 case '!':
1352 if (matchChar('=')) {
1353 tp->t_op = matchChar('=') ? JSOP_STRICTNE : JSOP_NE;
1354 tt = TOK_EQOP;
1355 } else {
1356 tp->t_op = JSOP_NOT;
1357 tt = TOK_UNARYOP;
1359 break;
1361 #if JS_HAS_XML_SUPPORT
1362 case '@':
1363 tt = TOK_AT;
1364 break;
1365 #endif
1367 case '<':
1368 #if JS_HAS_XML_SUPPORT
1370 * After much testing, it's clear that Postel's advice to protocol
1371 * designers ("be liberal in what you accept, and conservative in what
1372 * you send") invites a natural-law repercussion for JS as "protocol":
1374 * "If you are liberal in what you accept, others will utterly fail to
1375 * be conservative in what they send."
1377 * Which means you will get <!-- comments to end of line in the middle
1378 * of .js files, and after if conditions whose then statements are on
1379 * the next line, and other wonders. See at least the following bugs:
1380 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1381 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1382 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1384 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1385 * an XML comment or CDATA literal. Instead, we always scan <! as the
1386 * start of an HTML comment hack to end of line, used since Netscape 2
1387 * to hide script tag content from script-unaware browsers.
1389 * But this still leaves XML resources with certain internal structure
1390 * vulnerable to being loaded as script cross-origin, and some internal
1391 * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1392 * source consists only of XML literals. See:
1394 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1396 * The check for this is in jsparse.cpp, Compiler::compileScript.
1398 if ((flags & TSF_OPERAND) &&
1399 (JS_HAS_XML_OPTION(cx) || peekChar() != '!')) {
1400 /* Check for XML comment or CDATA section. */
1401 if (matchChar('!')) {
1402 tokenbuf.clear();
1404 /* Scan XML comment. */
1405 if (matchChar('-')) {
1406 if (!matchChar('-'))
1407 goto bad_xml_markup;
1408 while ((c = getChar()) != '-' || !matchChar('-')) {
1409 if (c == EOF)
1410 goto bad_xml_markup;
1411 if (!tokenbuf.append(c))
1412 goto error;
1414 tt = TOK_XMLCOMMENT;
1415 tp->t_op = JSOP_XMLCOMMENT;
1416 goto finish_xml_markup;
1419 /* Scan CDATA section. */
1420 if (matchChar('[')) {
1421 jschar cp[6];
1422 if (peekChars(6, cp) &&
1423 cp[0] == 'C' &&
1424 cp[1] == 'D' &&
1425 cp[2] == 'A' &&
1426 cp[3] == 'T' &&
1427 cp[4] == 'A' &&
1428 cp[5] == '[') {
1429 skipChars(6);
1430 while ((c = getChar()) != ']' ||
1431 !peekChars(2, cp) ||
1432 cp[0] != ']' ||
1433 cp[1] != '>') {
1434 if (c == EOF)
1435 goto bad_xml_markup;
1436 if (!tokenbuf.append(c))
1437 goto error;
1439 getChar(); /* discard ] but not > */
1440 tt = TOK_XMLCDATA;
1441 tp->t_op = JSOP_XMLCDATA;
1442 goto finish_xml_markup;
1444 goto bad_xml_markup;
1448 /* Check for processing instruction. */
1449 if (matchChar('?')) {
1450 inTarget = JS_TRUE;
1451 targetLength = 0;
1452 contentIndex = -1;
1454 tokenbuf.clear();
1455 while ((c = getChar()) != '?' || peekChar() != '>') {
1456 if (c == EOF)
1457 goto bad_xml_markup;
1458 if (inTarget) {
1459 if (JS_ISXMLSPACE(c)) {
1460 if (tokenbuf.empty())
1461 goto bad_xml_markup;
1462 inTarget = JS_FALSE;
1463 } else {
1464 if (!(tokenbuf.empty()
1465 ? JS_ISXMLNSSTART(c)
1466 : JS_ISXMLNS(c))) {
1467 goto bad_xml_markup;
1469 ++targetLength;
1471 } else {
1472 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1473 contentIndex = tokenbuf.length();
1475 if (!tokenbuf.append(c))
1476 goto error;
1478 if (targetLength == 0)
1479 goto bad_xml_markup;
1480 if (contentIndex < 0) {
1481 atom = cx->runtime->atomState.emptyAtom;
1482 } else {
1483 atom = js_AtomizeChars(cx,
1484 tokenbuf.begin() + contentIndex,
1485 tokenbuf.length() - contentIndex,
1487 if (!atom)
1488 goto error;
1490 tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1491 tp->t_atom2 = atom;
1492 tt = TOK_XMLPI;
1494 finish_xml_markup:
1495 if (!matchChar('>'))
1496 goto bad_xml_markup;
1497 atom = atomize(cx, tokenbuf);
1498 if (!atom)
1499 goto error;
1500 tp->t_atom = atom;
1501 tp->pos.end.lineno = lineno;
1502 goto out;
1505 /* An XML start-of-tag character. */
1506 tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1507 goto out;
1509 bad_xml_markup:
1510 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1511 goto error;
1513 #endif /* JS_HAS_XML_SUPPORT */
1515 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1516 if (matchChar('!')) {
1517 if (matchChar('-')) {
1518 if (matchChar('-')) {
1519 flags |= TSF_IN_HTML_COMMENT;
1520 goto skipline;
1522 ungetChar('-');
1524 ungetChar('!');
1526 if (matchChar(c)) {
1527 tp->t_op = JSOP_LSH;
1528 tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1529 } else {
1530 tp->t_op = matchChar('=') ? JSOP_LE : JSOP_LT;
1531 tt = TOK_RELOP;
1533 break;
1535 case '>':
1536 if (matchChar(c)) {
1537 tp->t_op = matchChar(c) ? JSOP_URSH : JSOP_RSH;
1538 tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1539 } else {
1540 tp->t_op = matchChar('=') ? JSOP_GE : JSOP_GT;
1541 tt = TOK_RELOP;
1543 break;
1545 case '*':
1546 tp->t_op = JSOP_MUL;
1547 tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
1548 break;
1550 case '/':
1551 if (matchChar('/')) {
1553 * Hack for source filters such as the Mozilla XUL preprocessor:
1554 * "//@line 123\n" sets the number of the *next* line after the
1555 * comment to 123.
1557 if (JS_HAS_ATLINE_OPTION(cx)) {
1558 jschar cp[5];
1559 uintN i, line, temp;
1560 char filenameBuf[1024];
1562 if (peekChars(5, cp) &&
1563 cp[0] == '@' &&
1564 cp[1] == 'l' &&
1565 cp[2] == 'i' &&
1566 cp[3] == 'n' &&
1567 cp[4] == 'e') {
1568 skipChars(5);
1569 while ((c = getChar()) != '\n' && ScanAsSpace((jschar)c))
1570 continue;
1571 if (JS7_ISDEC(c)) {
1572 line = JS7_UNDEC(c);
1573 while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1574 temp = 10 * line + JS7_UNDEC(c);
1575 if (temp < line) {
1576 /* Ignore overlarge line numbers. */
1577 goto skipline;
1579 line = temp;
1581 while (c != '\n' && ScanAsSpace((jschar)c))
1582 c = getChar();
1583 i = 0;
1584 if (c == '"') {
1585 while ((c = getChar()) != EOF && c != '"') {
1586 if (c == '\n') {
1587 ungetChar(c);
1588 goto skipline;
1590 if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1591 goto skipline;
1592 filenameBuf[i++] = (char) c;
1594 if (c == '"') {
1595 while ((c = getChar()) != '\n' &&
1596 ScanAsSpace((jschar)c)) {
1597 continue;
1601 filenameBuf[i] = '\0';
1602 if (c == '\n') {
1603 if (i > 0) {
1604 if (flags & TSF_OWNFILENAME)
1605 cx->free((void *) filename);
1606 filename = JS_strdup(cx, filenameBuf);
1607 if (!filename)
1608 goto error;
1609 flags |= TSF_OWNFILENAME;
1611 lineno = line;
1614 ungetChar(c);
1618 skipline:
1619 /* Optimize line skipping if we are not in an HTML comment. */
1620 if (flags & TSF_IN_HTML_COMMENT) {
1621 while ((c = getChar()) != EOF && c != '\n') {
1622 if (c == '-' && matchChar('-') && matchChar('>'))
1623 flags &= ~TSF_IN_HTML_COMMENT;
1625 } else {
1626 while ((c = getChar()) != EOF && c != '\n')
1627 continue;
1629 ungetChar(c);
1630 cursor = (cursor - 1) & ntokensMask;
1631 goto retry;
1634 if (matchChar('*')) {
1635 uintN linenoBefore = lineno;
1636 while ((c = getChar()) != EOF &&
1637 !(c == '*' && matchChar('/'))) {
1638 /* Ignore all characters until comment close. */
1640 if (c == EOF) {
1641 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1642 JSMSG_UNTERMINATED_COMMENT);
1643 goto error;
1645 if ((flags & TSF_NEWLINES) && linenoBefore != lineno) {
1646 flags &= ~TSF_DIRTYLINE;
1647 tt = TOK_EOL;
1648 goto eol_out;
1650 cursor = (cursor - 1) & ntokensMask;
1651 goto retry;
1654 if (flags & TSF_OPERAND) {
1655 uintN reflags, length;
1656 JSBool inCharClass = JS_FALSE;
1658 tokenbuf.clear();
1659 for (;;) {
1660 c = getChar();
1661 if (c == '\n' || c == EOF) {
1662 ungetChar(c);
1663 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1664 JSMSG_UNTERMINATED_REGEXP);
1665 goto error;
1667 if (c == '\\') {
1668 if (!tokenbuf.append(c))
1669 goto error;
1670 c = getChar();
1671 } else if (c == '[') {
1672 inCharClass = JS_TRUE;
1673 } else if (c == ']') {
1674 inCharClass = JS_FALSE;
1675 } else if (c == '/' && !inCharClass) {
1676 /* For compat with IE, allow unescaped / in char classes. */
1677 break;
1679 if (!tokenbuf.append(c))
1680 goto error;
1682 for (reflags = 0, length = tokenbuf.length() + 1; ; length++) {
1683 c = peekChar();
1684 if (c == 'g' && !(reflags & JSREG_GLOB))
1685 reflags |= JSREG_GLOB;
1686 else if (c == 'i' && !(reflags & JSREG_FOLD))
1687 reflags |= JSREG_FOLD;
1688 else if (c == 'm' && !(reflags & JSREG_MULTILINE))
1689 reflags |= JSREG_MULTILINE;
1690 else if (c == 'y' && !(reflags & JSREG_STICKY))
1691 reflags |= JSREG_STICKY;
1692 else
1693 break;
1694 getChar();
1696 c = peekChar();
1697 if (JS7_ISLET(c)) {
1698 char buf[2] = { '\0' };
1699 tp->pos.begin.index += length + 1;
1700 buf[0] = (char)c;
1701 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
1702 buf);
1703 (void) getChar();
1704 goto error;
1706 tp->t_reflags = reflags;
1707 tt = TOK_REGEXP;
1708 break;
1711 tp->t_op = JSOP_DIV;
1712 tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1713 break;
1715 case '%':
1716 tp->t_op = JSOP_MOD;
1717 tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1718 break;
1720 case '~':
1721 tp->t_op = JSOP_BITNOT;
1722 tt = TOK_UNARYOP;
1723 break;
1725 case '+':
1726 if (matchChar('=')) {
1727 tp->t_op = JSOP_ADD;
1728 tt = TOK_ASSIGN;
1729 } else if (matchChar(c)) {
1730 tt = TOK_INC;
1731 } else {
1732 tp->t_op = JSOP_POS;
1733 tt = TOK_PLUS;
1735 break;
1737 case '-':
1738 if (matchChar('=')) {
1739 tp->t_op = JSOP_SUB;
1740 tt = TOK_ASSIGN;
1741 } else if (matchChar(c)) {
1742 if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
1743 flags &= ~TSF_IN_HTML_COMMENT;
1744 goto skipline;
1746 tt = TOK_DEC;
1747 } else {
1748 tp->t_op = JSOP_NEG;
1749 tt = TOK_MINUS;
1751 break;
1753 #if JS_HAS_SHARP_VARS
1754 case '#':
1756 uint32 n;
1758 c = getChar();
1759 if (!JS7_ISDEC(c)) {
1760 ungetChar(c);
1761 goto badchar;
1763 n = (uint32)JS7_UNDEC(c);
1764 for (;;) {
1765 c = getChar();
1766 if (!JS7_ISDEC(c))
1767 break;
1768 n = 10 * n + JS7_UNDEC(c);
1769 if (n >= UINT16_LIMIT) {
1770 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_SHARPVAR_TOO_BIG);
1771 goto error;
1774 tp->t_dval = (jsdouble) n;
1775 if (JS_HAS_STRICT_OPTION(cx) &&
1776 (c == '=' || c == '#')) {
1777 char buf[20];
1778 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1779 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1780 JSMSG_DEPRECATED_USAGE, buf)) {
1781 goto error;
1784 if (c == '=')
1785 tt = TOK_DEFSHARP;
1786 else if (c == '#')
1787 tt = TOK_USESHARP;
1788 else
1789 goto badchar;
1790 break;
1792 #endif /* JS_HAS_SHARP_VARS */
1794 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1795 badchar:
1796 #endif
1798 default:
1799 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
1800 goto error;
1803 out:
1804 JS_ASSERT(tt != TOK_EOL);
1805 flags |= TSF_DIRTYLINE;
1807 eol_out:
1808 JS_ASSERT(tt < TOK_LIMIT);
1809 tp->pos.end.index = linepos + (linebuf.ptr - linebuf.base) - ungetpos;
1810 tp->type = tt;
1811 return tt;
1813 error:
1814 tt = TOK_ERROR;
1815 flags |= TSF_ERROR;
1816 goto out;