Bug 507665 - Avoid imacros for JSOP_GETELEM and JSOP_CALLELEM. r=gal.
[mozilla-central.git] / js / src / jsscan.cpp
blob90f3547671d4a20ea1d43be2e398893b0b4836e3
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS lexical scanner.
44 #include <stdio.h> /* first to avoid trouble on some systems */
45 #include <errno.h>
46 #include <limits.h>
47 #include <math.h>
48 #ifdef HAVE_MEMORY_H
49 #include <memory.h>
50 #endif
51 #include <stdarg.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsstdint.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsbit.h"
58 #include "jsutil.h" /* Added by JSIFY */
59 #include "jsdtoa.h"
60 #include "jsprf.h"
61 #include "jsapi.h"
62 #include "jsatom.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsemit.h"
66 #include "jsexn.h"
67 #include "jsnum.h"
68 #include "jsopcode.h"
69 #include "jsparse.h"
70 #include "jsregexp.h"
71 #include "jsscan.h"
72 #include "jsscript.h"
73 #include "jsstaticcheck.h"
75 #if JS_HAS_XML_SUPPORT
76 #include "jsxml.h"
77 #endif
79 #define JS_KEYWORD(keyword, type, op, version) \
80 const char js_##keyword##_str[] = #keyword;
81 #include "jskeyword.tbl"
82 #undef JS_KEYWORD
84 struct keyword {
85 const char *chars; /* C string with keyword text */
86 JSTokenType tokentype; /* JSTokenType */
87 JSOp op; /* JSOp */
88 JSVersion version; /* JSVersion */
91 static const struct keyword keyword_defs[] = {
92 #define JS_KEYWORD(keyword, type, op, version) \
93 {js_##keyword##_str, type, op, version},
94 #include "jskeyword.tbl"
95 #undef JS_KEYWORD
98 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
100 static const struct keyword *
101 FindKeyword(const jschar *s, size_t length)
103 register size_t i;
104 const struct keyword *kw;
105 const char *chars;
107 JS_ASSERT(length != 0);
109 #define JSKW_LENGTH() length
110 #define JSKW_AT(column) s[column]
111 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
112 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
113 #define JSKW_NO_MATCH() goto no_match;
114 #include "jsautokw.h"
115 #undef JSKW_NO_MATCH
116 #undef JSKW_TEST_GUESS
117 #undef JSKW_GOT_MATCH
118 #undef JSKW_AT
119 #undef JSKW_LENGTH
121 got_match:
122 return &keyword_defs[i];
124 test_guess:
125 kw = &keyword_defs[i];
126 chars = kw->chars;
127 do {
128 if (*s++ != (unsigned char)(*chars++))
129 goto no_match;
130 } while (--length != 0);
131 return kw;
133 no_match:
134 return NULL;
137 JSTokenType
138 js_CheckKeyword(const jschar *str, size_t length)
140 const struct keyword *kw;
142 JS_ASSERT(length != 0);
143 kw = FindKeyword(str, length);
144 return kw ? kw->tokentype : TOK_EOF;
147 JS_FRIEND_API(void)
148 js_MapKeywords(void (*mapfun)(const char *))
150 size_t i;
152 for (i = 0; i != KEYWORD_COUNT; ++i)
153 mapfun(keyword_defs[i].chars);
156 JSBool
157 js_IsIdentifier(JSString *str)
159 size_t length;
160 jschar c;
161 const jschar *chars, *end;
163 str->getCharsAndLength(chars, length);
164 if (length == 0)
165 return JS_FALSE;
166 c = *chars;
167 if (!JS_ISIDSTART(c))
168 return JS_FALSE;
169 end = chars + length;
170 while (++chars != end) {
171 c = *chars;
172 if (!JS_ISIDENT(c))
173 return JS_FALSE;
175 return JS_TRUE;
178 #define TBMIN 64
180 static JSBool
181 GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
183 JSContext *cx;
184 jschar *base;
185 ptrdiff_t offset, length;
186 size_t tbsize;
187 JSArenaPool *pool;
189 cx = (JSContext*) sb->data;
190 base = sb->base;
191 offset = sb->ptr - base;
192 pool = &cx->tempPool;
193 if (!base) {
194 tbsize = TBMIN * sizeof(jschar);
195 length = TBMIN - 1;
196 JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
197 } else {
198 length = sb->limit - base;
199 if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
200 base = NULL;
201 } else {
202 tbsize = (length + 1) * sizeof(jschar);
203 length += length + 1;
204 JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
207 if (!base) {
208 js_ReportOutOfScriptQuota(cx);
209 sb->base = STRING_BUFFER_ERROR_BASE;
210 return JS_FALSE;
212 sb->base = base;
213 sb->limit = base + length;
214 sb->ptr = base + offset;
215 return JS_TRUE;
218 JSBool
219 js_InitTokenStream(JSContext *cx, JSTokenStream *ts,
220 const jschar *base, size_t length,
221 FILE *fp, const char *filename, uintN lineno)
223 jschar *buf;
224 size_t nb;
226 JS_ASSERT_IF(fp, !base);
227 JS_ASSERT_IF(!base, length == 0);
228 nb = fp
229 ? 2 * JS_LINE_LIMIT * sizeof(jschar)
230 : JS_LINE_LIMIT * sizeof(jschar);
231 JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
232 if (!buf) {
233 js_ReportOutOfScriptQuota(cx);
234 return JS_FALSE;
236 memset(buf, 0, nb);
237 memset(ts, 0, sizeof(*ts));
238 ts->filename = filename;
239 ts->lineno = lineno;
240 ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = buf;
241 if (fp) {
242 ts->file = fp;
243 ts->userbuf.base = buf + JS_LINE_LIMIT;
244 ts->userbuf.ptr = ts->userbuf.limit = ts->userbuf.base + JS_LINE_LIMIT;
245 } else {
246 ts->userbuf.base = (jschar *)base;
247 ts->userbuf.limit = (jschar *)base + length;
248 ts->userbuf.ptr = (jschar *)base;
250 ts->tokenbuf.grow = GrowTokenBuf;
251 ts->tokenbuf.data = cx;
252 ts->listener = cx->debugHooks->sourceHandler;
253 ts->listenerData = cx->debugHooks->sourceHandlerData;
254 return JS_TRUE;
257 void
258 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
260 if (ts->flags & TSF_OWNFILENAME)
261 cx->free((void *) ts->filename);
264 JS_FRIEND_API(int)
265 js_fgets(char *buf, int size, FILE *file)
267 int n, i, c;
268 JSBool crflag;
270 n = size - 1;
271 if (n < 0)
272 return -1;
274 crflag = JS_FALSE;
275 for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
276 buf[i] = c;
277 if (c == '\n') { /* any \n ends a line */
278 i++; /* keep the \n; we know there is room for \0 */
279 break;
281 if (crflag) { /* \r not followed by \n ends line at the \r */
282 ungetc(c, file);
283 break; /* and overwrite c in buf with \0 */
285 crflag = (c == '\r');
288 buf[i] = '\0';
289 return i;
292 static int32
293 GetChar(JSTokenStream *ts)
295 int32 c;
296 ptrdiff_t i, j, len, olen;
297 JSBool crflag;
298 char cbuf[JS_LINE_LIMIT];
299 jschar *ubuf, *nl;
301 if (ts->ungetpos != 0) {
302 c = ts->ungetbuf[--ts->ungetpos];
303 } else {
304 if (ts->linebuf.ptr == ts->linebuf.limit) {
305 len = ts->userbuf.limit - ts->userbuf.ptr;
306 if (len <= 0) {
307 if (!ts->file) {
308 ts->flags |= TSF_EOF;
309 return EOF;
312 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
313 crflag = (ts->flags & TSF_CRFLAG) != 0;
314 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
315 if (len <= 0) {
316 ts->flags |= TSF_EOF;
317 return EOF;
319 olen = len;
320 ubuf = ts->userbuf.base;
321 i = 0;
322 if (crflag) {
323 ts->flags &= ~TSF_CRFLAG;
324 if (cbuf[0] != '\n') {
325 ubuf[i++] = '\n';
326 len++;
327 ts->linepos--;
330 for (j = 0; i < len; i++, j++)
331 ubuf[i] = (jschar) (unsigned char) cbuf[j];
332 ts->userbuf.limit = ubuf + len;
333 ts->userbuf.ptr = ubuf;
335 if (ts->listener) {
336 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
337 &ts->listenerTSData, ts->listenerData);
340 nl = ts->saveEOL;
341 if (!nl) {
343 * Any one of \n, \r, or \r\n ends a line (the longest
344 * match wins). Also allow the Unicode line and paragraph
345 * separators.
347 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
349 * Try to prevent value-testing on most characters by
350 * filtering out characters that aren't 000x or 202x.
352 if ((*nl & 0xDFD0) == 0) {
353 if (*nl == '\n')
354 break;
355 if (*nl == '\r') {
356 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
357 nl++;
358 break;
360 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
361 break;
367 * If there was a line terminator, copy thru it into linebuf.
368 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
370 if (nl < ts->userbuf.limit)
371 len = (nl - ts->userbuf.ptr) + 1;
372 if (len >= JS_LINE_LIMIT) {
373 len = JS_LINE_LIMIT - 1;
374 ts->saveEOL = nl;
375 } else {
376 ts->saveEOL = NULL;
378 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
379 ts->userbuf.ptr += len;
380 olen = len;
383 * Make sure linebuf contains \n for EOL (don't do this in
384 * userbuf because the user's string might be readonly).
386 if (nl < ts->userbuf.limit) {
387 if (*nl == '\r') {
388 if (ts->linebuf.base[len-1] == '\r') {
390 * Does the line segment end in \r? We must check
391 * for a \n at the front of the next segment before
392 * storing a \n into linebuf. This case matters
393 * only when we're reading from a file.
395 if (nl + 1 == ts->userbuf.limit && ts->file) {
396 len--;
397 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
398 if (len == 0) {
400 * This can happen when a segment ends in
401 * \r\r. Start over. ptr == limit in this
402 * case, so we'll fall into buffer-filling
403 * code.
405 return GetChar(ts);
407 } else {
408 ts->linebuf.base[len-1] = '\n';
411 } else if (*nl == '\n') {
412 if (nl > ts->userbuf.base &&
413 nl[-1] == '\r' &&
414 ts->linebuf.base[len-2] == '\r') {
415 len--;
416 JS_ASSERT(ts->linebuf.base[len] == '\n');
417 ts->linebuf.base[len-1] = '\n';
419 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
420 ts->linebuf.base[len-1] = '\n';
424 /* Reset linebuf based on adjusted segment length. */
425 ts->linebuf.limit = ts->linebuf.base + len;
426 ts->linebuf.ptr = ts->linebuf.base;
428 /* Update position of linebuf within physical userbuf line. */
429 if (!(ts->flags & TSF_NLFLAG))
430 ts->linepos += ts->linelen;
431 else
432 ts->linepos = 0;
433 if (ts->linebuf.limit[-1] == '\n')
434 ts->flags |= TSF_NLFLAG;
435 else
436 ts->flags &= ~TSF_NLFLAG;
438 /* Update linelen from original segment length. */
439 ts->linelen = olen;
441 c = *ts->linebuf.ptr++;
443 if (c == '\n')
444 ts->lineno++;
445 return c;
448 static void
449 UngetChar(JSTokenStream *ts, int32 c)
451 if (c == EOF)
452 return;
453 JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
454 if (c == '\n')
455 ts->lineno--;
456 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
459 static int32
460 PeekChar(JSTokenStream *ts)
462 int32 c;
464 c = GetChar(ts);
465 UngetChar(ts, c);
466 return c;
470 * Peek n chars ahead into ts. Return true if n chars were read, false if
471 * there weren't enough characters in the input stream. This function cannot
472 * be used to peek into or past a newline.
474 static JSBool
475 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
477 intN i, j;
478 int32 c;
480 for (i = 0; i < n; i++) {
481 c = GetChar(ts);
482 if (c == EOF)
483 break;
484 if (c == '\n') {
485 UngetChar(ts, c);
486 break;
488 cp[i] = (jschar)c;
490 for (j = i - 1; j >= 0; j--)
491 UngetChar(ts, cp[j]);
492 return i == n;
495 static void
496 SkipChars(JSTokenStream *ts, intN n)
498 while (--n >= 0)
499 GetChar(ts);
502 static JSBool
503 MatchChar(JSTokenStream *ts, int32 expect)
505 int32 c;
507 c = GetChar(ts);
508 if (c == expect)
509 return JS_TRUE;
510 UngetChar(ts, c);
511 return JS_FALSE;
514 JSBool
515 js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
516 uintN flags, uintN errorNumber, ...)
518 JSErrorReport report;
519 char *message;
520 size_t linelength;
521 jschar *linechars;
522 char *linebytes;
523 va_list ap;
524 JSBool warning, ok;
525 JSTokenPos *tp;
526 uintN index, i;
527 JSErrorReporter onError;
529 JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
531 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
532 return JS_TRUE;
534 memset(&report, 0, sizeof report);
535 report.flags = flags;
536 report.errorNumber = errorNumber;
537 message = NULL;
538 linechars = NULL;
539 linebytes = NULL;
541 MUST_FLOW_THROUGH("out");
542 va_start(ap, errorNumber);
543 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
544 errorNumber, &message, &report, &warning,
545 !(flags & JSREPORT_UC), ap);
546 va_end(ap);
547 if (!ok) {
548 warning = JS_FALSE;
549 goto out;
552 report.filename = ts->filename;
554 if (pn) {
555 report.lineno = pn->pn_pos.begin.lineno;
556 if (report.lineno != ts->lineno)
557 goto report;
558 tp = &pn->pn_pos;
559 } else {
560 /* Point to the current token, not the next one to get. */
561 tp = &ts->tokens[ts->cursor].pos;
563 report.lineno = ts->lineno;
564 linelength = ts->linebuf.limit - ts->linebuf.base;
565 linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
566 if (!linechars) {
567 warning = JS_FALSE;
568 goto out;
570 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
571 linechars[linelength] = 0;
572 linebytes = js_DeflateString(cx, linechars, linelength);
573 if (!linebytes) {
574 warning = JS_FALSE;
575 goto out;
577 report.linebuf = linebytes;
580 * FIXME: What should instead happen here is that we should
581 * find error-tokens in userbuf, if !ts->file. That will
582 * allow us to deliver a more helpful error message, which
583 * includes all or part of the bad string or bad token. The
584 * code here yields something that looks truncated.
585 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
587 index = 0;
588 if (tp->begin.lineno == tp->end.lineno) {
589 if (tp->begin.index < ts->linepos)
590 goto report;
592 index = tp->begin.index - ts->linepos;
595 report.tokenptr = report.linebuf + index;
596 report.uclinebuf = linechars;
597 report.uctokenptr = report.uclinebuf + index;
600 * If there's a runtime exception type associated with this error
601 * number, set that as the pending exception. For errors occuring at
602 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
604 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
605 * flag will be set in report.flags. Proper behavior for an error
606 * reporter is to ignore a report with this flag for all but top-level
607 * compilation errors. The exception will remain pending, and so long
608 * as the non-top-level "load", "eval", or "compile" native function
609 * returns false, the top-level reporter will eventually receive the
610 * uncaught exception report.
612 * XXX it'd probably be best if there was only one call to this
613 * function, but there seem to be two error reporter call points.
615 report:
616 onError = cx->errorReporter;
619 * Try to raise an exception only if there isn't one already set --
620 * otherwise the exception will describe the last compile-time error,
621 * which is likely spurious.
623 if (!(ts->flags & TSF_ERROR)) {
624 if (js_ErrorToException(cx, message, &report))
625 onError = NULL;
629 * Suppress any compile-time errors that don't occur at the top level.
630 * This may still fail, as interplevel may be zero in contexts where we
631 * don't really want to call the error reporter, as when js is called
632 * by other code which could catch the error.
634 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
635 onError = NULL;
637 if (onError) {
638 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
641 * If debugErrorHook is present then we give it a chance to veto
642 * sending the error on to the regular error reporter.
644 if (hook && !hook(cx, message, &report,
645 cx->debugHooks->debugErrorHookData)) {
646 onError = NULL;
649 if (onError)
650 (*onError)(cx, message, &report);
652 out:
653 if (linebytes)
654 cx->free(linebytes);
655 if (linechars)
656 cx->free(linechars);
657 if (message)
658 cx->free(message);
659 if (report.ucmessage)
660 cx->free((void *)report.ucmessage);
662 if (report.messageArgs) {
663 if (!(flags & JSREPORT_UC)) {
664 i = 0;
665 while (report.messageArgs[i])
666 cx->free((void *)report.messageArgs[i++]);
668 cx->free((void *)report.messageArgs);
671 if (!JSREPORT_IS_WARNING(flags)) {
672 /* Set the error flag to suppress spurious reports. */
673 ts->flags |= TSF_ERROR;
676 return warning;
679 static JSBool
680 GrowStringBuffer(JSStringBuffer *sb, size_t amount)
682 ptrdiff_t offset = sb->ptr - sb->base;
683 JS_ASSERT(offset >= 0);
686 * This addition needs an overflow check, but we can defer bounding against
687 * ~size_t(0) / sizeof(jschar) till later to consolidate that test.
689 size_t newlength = offset + amount + 1;
690 if (size_t(offset) < newlength) {
691 /* Grow by powers of two until 16MB, then grow by that chunk size. */
692 const size_t CHUNK_SIZE_MASK = JS_BITMASK(24);
694 if (newlength <= CHUNK_SIZE_MASK)
695 newlength = JS_BIT(JS_CeilingLog2(newlength));
696 else if (newlength & CHUNK_SIZE_MASK)
697 newlength = (newlength | CHUNK_SIZE_MASK) + 1;
699 /* Now do the full overflow check. */
700 if (size_t(offset) < newlength && newlength < ~size_t(0) / sizeof(jschar)) {
701 jschar *bp = (jschar *) js_realloc(sb->base, newlength * sizeof(jschar));
702 if (bp) {
703 sb->base = bp;
704 sb->ptr = bp + offset;
705 sb->limit = bp + newlength - 1;
706 return true;
711 /* Either newlength overflow or realloc failure: poison the well. */
712 js_free(sb->base);
713 sb->base = STRING_BUFFER_ERROR_BASE;
714 return false;
717 static void
718 FreeStringBuffer(JSStringBuffer *sb)
720 JS_ASSERT(STRING_BUFFER_OK(sb));
721 if (sb->base)
722 js_free(sb->base);
725 void
726 js_InitStringBuffer(JSStringBuffer *sb)
728 sb->base = sb->limit = sb->ptr = NULL;
729 sb->data = NULL;
730 sb->grow = GrowStringBuffer;
731 sb->free = FreeStringBuffer;
734 void
735 js_FinishStringBuffer(JSStringBuffer *sb)
737 sb->free(sb);
740 void
741 js_AppendChar(JSStringBuffer *sb, jschar c)
743 jschar *bp;
745 if (!STRING_BUFFER_OK(sb))
746 return;
747 if (!ENSURE_STRING_BUFFER(sb, 1))
748 return;
749 bp = sb->ptr;
750 *bp++ = c;
751 *bp = 0;
752 sb->ptr = bp;
755 void
756 js_AppendUCString(JSStringBuffer *sb, const jschar *buf, uintN len)
758 jschar *bp;
760 if (!STRING_BUFFER_OK(sb))
761 return;
762 if (len == 0 || !ENSURE_STRING_BUFFER(sb, len))
763 return;
764 bp = sb->ptr;
765 js_strncpy(bp, buf, len);
766 bp += len;
767 *bp = 0;
768 sb->ptr = bp;
771 #if JS_HAS_XML_SUPPORT
773 void
774 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
776 jschar *bp;
778 if (!STRING_BUFFER_OK(sb) || count == 0)
779 return;
780 if (!ENSURE_STRING_BUFFER(sb, count))
781 return;
782 for (bp = sb->ptr; count; --count)
783 *bp++ = c;
784 *bp = 0;
785 sb->ptr = bp;
788 void
789 js_AppendCString(JSStringBuffer *sb, const char *asciiz)
791 size_t length;
792 jschar *bp;
794 if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
795 return;
796 length = strlen(asciiz);
797 if (!ENSURE_STRING_BUFFER(sb, length))
798 return;
799 for (bp = sb->ptr; length; --length)
800 *bp++ = (jschar) *asciiz++;
801 *bp = 0;
802 sb->ptr = bp;
805 void
806 js_AppendJSString(JSStringBuffer *sb, JSString *str)
808 js_AppendUCString(sb, str->chars(), str->length());
811 static JSBool
812 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
814 ptrdiff_t offset, length, i;
815 int32 c, d;
816 JSBool ispair;
817 jschar *bp, digit;
818 char *bytes;
819 JSErrNum msg;
821 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
822 offset = ts->tokenbuf.ptr - ts->tokenbuf.base;
823 js_FastAppendChar(&ts->tokenbuf, '&');
824 if (!STRING_BUFFER_OK(&ts->tokenbuf))
825 return JS_FALSE;
826 while ((c = GetChar(ts)) != ';') {
827 if (c == EOF || c == '\n') {
828 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
829 JSMSG_END_OF_XML_ENTITY);
830 return JS_FALSE;
832 js_FastAppendChar(&ts->tokenbuf, (jschar) c);
833 if (!STRING_BUFFER_OK(&ts->tokenbuf))
834 return JS_FALSE;
837 /* Let length be the number of jschars after the '&', including the ';'. */
838 length = (ts->tokenbuf.ptr - ts->tokenbuf.base) - offset;
839 bp = ts->tokenbuf.base + offset;
840 c = d = 0;
841 ispair = JS_FALSE;
842 if (length > 2 && bp[1] == '#') {
843 /* Match a well-formed XML Character Reference. */
844 i = 2;
845 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
846 if (length > 9) /* at most 6 hex digits allowed */
847 goto badncr;
848 while (++i < length) {
849 digit = bp[i];
850 if (!JS7_ISHEX(digit))
851 goto badncr;
852 c = (c << 4) + JS7_UNHEX(digit);
854 } else {
855 while (i < length) {
856 digit = bp[i++];
857 if (!JS7_ISDEC(digit))
858 goto badncr;
859 c = (c * 10) + JS7_UNDEC(digit);
860 if (c < 0)
861 goto badncr;
865 if (0x10000 <= c && c <= 0x10FFFF) {
866 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
867 d = 0xDC00 + (c & 0x3FF);
868 c = 0xD7C0 + (c >> 10);
869 ispair = JS_TRUE;
870 } else {
871 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
872 if (c != 0x9 && c != 0xA && c != 0xD &&
873 !(0x20 <= c && c <= 0xD7FF) &&
874 !(0xE000 <= c && c <= 0xFFFD)) {
875 goto badncr;
878 } else {
879 /* Try to match one of the five XML 1.0 predefined entities. */
880 switch (length) {
881 case 3:
882 if (bp[2] == 't') {
883 if (bp[1] == 'l')
884 c = '<';
885 else if (bp[1] == 'g')
886 c = '>';
888 break;
889 case 4:
890 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
891 c = '&';
892 break;
893 case 5:
894 if (bp[3] == 'o') {
895 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
896 c = '\'';
897 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
898 c = '"';
900 break;
902 if (c == 0) {
903 msg = JSMSG_UNKNOWN_XML_ENTITY;
904 goto bad;
908 /* If we matched, retract ts->tokenbuf and store the entity's value. */
909 *bp++ = (jschar) c;
910 if (ispair)
911 *bp++ = (jschar) d;
912 *bp = 0;
913 ts->tokenbuf.ptr = bp;
914 return JS_TRUE;
916 badncr:
917 msg = JSMSG_BAD_XML_NCR;
918 bad:
919 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
920 JS_ASSERT(STRING_BUFFER_OK(&ts->tokenbuf));
921 JS_ASSERT((ts->tokenbuf.ptr - bp) >= 1);
922 bytes = js_DeflateString(cx, bp + 1,
923 (ts->tokenbuf.ptr - bp) - 1);
924 if (bytes) {
925 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
926 msg, bytes);
927 cx->free(bytes);
929 return JS_FALSE;
932 #endif /* JS_HAS_XML_SUPPORT */
934 JSTokenType
935 js_PeekToken(JSContext *cx, JSTokenStream *ts)
937 JSTokenType tt;
939 if (ts->lookahead != 0) {
940 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
941 } else {
942 tt = js_GetToken(cx, ts);
943 js_UngetToken(ts);
945 return tt;
948 JSTokenType
949 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
951 JSTokenType tt;
953 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
954 return TOK_EOL;
955 ts->flags |= TSF_NEWLINES;
956 tt = js_PeekToken(cx, ts);
957 ts->flags &= ~TSF_NEWLINES;
958 return tt;
962 * We have encountered a '\': check for a Unicode escape sequence after it,
963 * returning the character code value if we found a Unicode escape sequence.
964 * Otherwise, non-destructively return the original '\'.
966 static int32
967 GetUnicodeEscape(JSTokenStream *ts)
969 jschar cp[5];
970 int32 c;
972 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
973 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
974 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
976 c = (((((JS7_UNHEX(cp[1]) << 4)
977 + JS7_UNHEX(cp[2])) << 4)
978 + JS7_UNHEX(cp[3])) << 4)
979 + JS7_UNHEX(cp[4]);
980 SkipChars(ts, 5);
981 return c;
983 return '\\';
986 static JSToken *
987 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
989 JSToken *tp;
991 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
992 tp = &CURRENT_TOKEN(ts);
993 tp->ptr = ts->linebuf.ptr + adjust;
994 tp->pos.begin.index = ts->linepos +
995 (tp->ptr - ts->linebuf.base) -
996 ts->ungetpos;
997 tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
998 return tp;
1001 static JS_ALWAYS_INLINE JSBool
1002 ScanAsSpace(jschar c)
1004 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
1005 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
1006 return JS_TRUE;
1007 return JS_FALSE;
1010 JSTokenType
1011 js_GetToken(JSContext *cx, JSTokenStream *ts)
1013 JSTokenType tt;
1014 int32 c, qc;
1015 JSToken *tp;
1016 JSAtom *atom;
1017 JSBool hadUnicodeEscape;
1018 const struct keyword *kw;
1019 #if JS_HAS_XML_SUPPORT
1020 JSBool inTarget;
1021 size_t targetLength;
1022 ptrdiff_t contentIndex;
1023 #endif
1025 #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1026 #define TOKENBUF_LENGTH() (ts->tokenbuf.ptr - ts->tokenbuf.base)
1027 #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1028 #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1029 ? js_AtomizeChars(cx, \
1030 TOKENBUF_BASE(), \
1031 TOKENBUF_LENGTH(), \
1032 0) \
1033 : NULL)
1034 #define ADD_TO_TOKENBUF(c) JS_BEGIN_MACRO \
1035 js_FastAppendChar(&ts->tokenbuf, jschar(c)); \
1036 if (!TOKENBUF_OK()) \
1037 goto error; \
1038 JS_END_MACRO
1040 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1041 #define TOKENBUF_BASE() (ts->tokenbuf.base)
1042 #define TOKENBUF_END() (ts->tokenbuf.ptr)
1043 #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1044 #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1045 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1047 /* Check for a pushed-back token resulting from mismatching lookahead. */
1048 while (ts->lookahead != 0) {
1049 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1050 ts->lookahead--;
1051 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1052 tt = CURRENT_TOKEN(ts).type;
1053 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1054 return tt;
1057 /* If there was a fatal error, keep returning TOK_ERROR. */
1058 if (ts->flags & TSF_ERROR)
1059 return TOK_ERROR;
1061 #if JS_HAS_XML_SUPPORT
1062 if (ts->flags & TSF_XMLTEXTMODE) {
1063 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1064 tp = NewToken(ts, 0);
1065 INIT_TOKENBUF();
1066 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1068 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1069 if (c == '&' && qc == '<') {
1070 if (!GetXMLEntity(cx, ts))
1071 goto error;
1072 tt = TOK_XMLTEXT;
1073 continue;
1076 if (!JS_ISXMLSPACE(c))
1077 tt = TOK_XMLTEXT;
1078 ADD_TO_TOKENBUF(c);
1080 UngetChar(ts, c);
1082 if (TOKENBUF_LENGTH() == 0) {
1083 atom = NULL;
1084 } else {
1085 atom = TOKENBUF_TO_ATOM();
1086 if (!atom)
1087 goto error;
1089 tp->pos.end.lineno = (uint16)ts->lineno;
1090 tp->t_op = JSOP_STRING;
1091 tp->t_atom = atom;
1092 goto out;
1095 if (ts->flags & TSF_XMLTAGMODE) {
1096 tp = NewToken(ts, 0);
1097 c = GetChar(ts);
1098 if (JS_ISXMLSPACE(c)) {
1099 do {
1100 c = GetChar(ts);
1101 } while (JS_ISXMLSPACE(c));
1102 UngetChar(ts, c);
1103 tt = TOK_XMLSPACE;
1104 goto out;
1107 if (c == EOF) {
1108 tt = TOK_EOF;
1109 goto out;
1112 INIT_TOKENBUF();
1113 if (JS_ISXMLNSSTART(c)) {
1114 JSBool sawColon = JS_FALSE;
1116 ADD_TO_TOKENBUF(c);
1117 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1118 if (c == ':') {
1119 int nextc;
1121 if (sawColon ||
1122 (nextc = PeekChar(ts),
1123 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1124 !JS_ISXMLNAME(nextc))) {
1125 js_ReportCompileErrorNumber(cx, ts, NULL,
1126 JSREPORT_ERROR,
1127 JSMSG_BAD_XML_QNAME);
1128 goto error;
1130 sawColon = JS_TRUE;
1133 ADD_TO_TOKENBUF(c);
1136 UngetChar(ts, c);
1137 atom = TOKENBUF_TO_ATOM();
1138 if (!atom)
1139 goto error;
1140 tp->t_op = JSOP_STRING;
1141 tp->t_atom = atom;
1142 tt = TOK_XMLNAME;
1143 goto out;
1146 switch (c) {
1147 case '{':
1148 if (ts->flags & TSF_XMLONLYMODE)
1149 goto bad_xml_char;
1150 tt = TOK_LC;
1151 goto out;
1153 case '=':
1154 tt = TOK_ASSIGN;
1155 goto out;
1157 case '"':
1158 case '\'':
1159 qc = c;
1160 while ((c = GetChar(ts)) != qc) {
1161 if (c == EOF) {
1162 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1163 JSMSG_UNTERMINATED_STRING);
1164 goto error;
1168 * XML attribute values are double-quoted when pretty-printed,
1169 * so escape " if it is expressed directly in a single-quoted
1170 * attribute value.
1172 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1173 JS_ASSERT(qc == '\'');
1174 js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1175 continue;
1178 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1179 if (!GetXMLEntity(cx, ts))
1180 goto error;
1181 continue;
1184 ADD_TO_TOKENBUF(c);
1186 atom = TOKENBUF_TO_ATOM();
1187 if (!atom)
1188 goto error;
1189 tp->pos.end.lineno = (uint16)ts->lineno;
1190 tp->t_op = JSOP_STRING;
1191 tp->t_atom = atom;
1192 tt = TOK_XMLATTR;
1193 goto out;
1195 case '>':
1196 tt = TOK_XMLTAGC;
1197 goto out;
1199 case '/':
1200 if (MatchChar(ts, '>')) {
1201 tt = TOK_XMLPTAGC;
1202 goto out;
1204 /* FALL THROUGH */
1206 bad_xml_char:
1207 default:
1208 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1209 JSMSG_BAD_XML_CHARACTER);
1210 goto error;
1212 /* NOTREACHED */
1214 #endif /* JS_HAS_XML_SUPPORT */
1216 retry:
1217 do {
1218 c = GetChar(ts);
1219 if (c == '\n') {
1220 ts->flags &= ~TSF_DIRTYLINE;
1221 if (ts->flags & TSF_NEWLINES)
1222 break;
1224 } while (ScanAsSpace((jschar)c));
1226 tp = NewToken(ts, -1);
1227 if (c == EOF) {
1228 tt = TOK_EOF;
1229 goto out;
1232 hadUnicodeEscape = JS_FALSE;
1233 if (JS_ISIDSTART(c) ||
1234 (c == '\\' &&
1235 (qc = GetUnicodeEscape(ts),
1236 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1237 if (hadUnicodeEscape)
1238 c = qc;
1239 INIT_TOKENBUF();
1240 for (;;) {
1241 ADD_TO_TOKENBUF(c);
1242 c = GetChar(ts);
1243 if (c == '\\') {
1244 qc = GetUnicodeEscape(ts);
1245 if (!JS_ISIDENT(qc))
1246 break;
1247 c = qc;
1248 hadUnicodeEscape = JS_TRUE;
1249 } else {
1250 if (!JS_ISIDENT(c))
1251 break;
1254 UngetChar(ts, c);
1257 * Check for keywords unless we saw Unicode escape or parser asks
1258 * to ignore keywords.
1260 if (!hadUnicodeEscape &&
1261 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1262 TOKENBUF_OK() &&
1263 (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1264 if (kw->tokentype == TOK_RESERVED) {
1265 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1266 JSREPORT_WARNING |
1267 JSREPORT_STRICT,
1268 JSMSG_RESERVED_ID,
1269 kw->chars)) {
1270 goto error;
1272 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1273 tt = kw->tokentype;
1274 tp->t_op = (JSOp) kw->op;
1275 goto out;
1279 atom = TOKENBUF_TO_ATOM();
1280 if (!atom)
1281 goto error;
1282 tp->t_op = JSOP_NAME;
1283 tp->t_atom = atom;
1284 tt = TOK_NAME;
1285 goto out;
1288 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1289 jsint radix;
1290 const jschar *endptr;
1291 jsdouble dval;
1293 radix = 10;
1294 INIT_TOKENBUF();
1296 if (c == '0') {
1297 ADD_TO_TOKENBUF(c);
1298 c = GetChar(ts);
1299 if (JS_TOLOWER(c) == 'x') {
1300 ADD_TO_TOKENBUF(c);
1301 c = GetChar(ts);
1302 radix = 16;
1303 } else if (JS7_ISDEC(c)) {
1304 radix = 8;
1308 while (JS7_ISHEX(c)) {
1309 if (radix < 16) {
1310 if (JS7_ISLET(c))
1311 break;
1314 * We permit 08 and 09 as decimal numbers, which makes our
1315 * behaviour a superset of the ECMA numeric grammar. We might
1316 * not always be so permissive, so we warn about it.
1318 if (radix == 8 && c >= '8') {
1319 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1320 JSREPORT_WARNING,
1321 JSMSG_BAD_OCTAL,
1322 c == '8' ? "08" : "09")) {
1323 goto error;
1325 radix = 10;
1328 ADD_TO_TOKENBUF(c);
1329 c = GetChar(ts);
1332 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1333 if (c == '.') {
1334 do {
1335 ADD_TO_TOKENBUF(c);
1336 c = GetChar(ts);
1337 } while (JS7_ISDEC(c));
1339 if (JS_TOLOWER(c) == 'e') {
1340 ADD_TO_TOKENBUF(c);
1341 c = GetChar(ts);
1342 if (c == '+' || c == '-') {
1343 ADD_TO_TOKENBUF(c);
1344 c = GetChar(ts);
1346 if (!JS7_ISDEC(c)) {
1347 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1348 JSMSG_MISSING_EXPONENT);
1349 goto error;
1351 do {
1352 ADD_TO_TOKENBUF(c);
1353 c = GetChar(ts);
1354 } while (JS7_ISDEC(c));
1358 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1359 UngetChar(ts, c);
1360 ADD_TO_TOKENBUF(0);
1362 if (!TOKENBUF_OK())
1363 goto error;
1364 if (radix == 10) {
1365 if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1366 &endptr, &dval)) {
1367 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1368 JSMSG_OUT_OF_MEMORY);
1369 goto error;
1371 } else {
1372 if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1373 &endptr, radix, &dval)) {
1374 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1375 JSMSG_OUT_OF_MEMORY);
1376 goto error;
1379 tp->t_dval = dval;
1380 tt = TOK_NUMBER;
1381 goto out;
1384 if (c == '"' || c == '\'') {
1385 qc = c;
1386 INIT_TOKENBUF();
1387 while ((c = GetChar(ts)) != qc) {
1388 if (c == '\n' || c == EOF) {
1389 UngetChar(ts, c);
1390 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1391 JSMSG_UNTERMINATED_STRING);
1392 goto error;
1394 if (c == '\\') {
1395 switch (c = GetChar(ts)) {
1396 case 'b': c = '\b'; break;
1397 case 'f': c = '\f'; break;
1398 case 'n': c = '\n'; break;
1399 case 'r': c = '\r'; break;
1400 case 't': c = '\t'; break;
1401 case 'v': c = '\v'; break;
1403 default:
1404 if ('0' <= c && c < '8') {
1405 int32 val = JS7_UNDEC(c);
1407 c = PeekChar(ts);
1408 if ('0' <= c && c < '8') {
1409 val = 8 * val + JS7_UNDEC(c);
1410 GetChar(ts);
1411 c = PeekChar(ts);
1412 if ('0' <= c && c < '8') {
1413 int32 save = val;
1414 val = 8 * val + JS7_UNDEC(c);
1415 if (val <= 0377)
1416 GetChar(ts);
1417 else
1418 val = save;
1422 c = (jschar)val;
1423 } else if (c == 'u') {
1424 jschar cp[4];
1425 if (PeekChars(ts, 4, cp) &&
1426 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1427 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1428 c = (((((JS7_UNHEX(cp[0]) << 4)
1429 + JS7_UNHEX(cp[1])) << 4)
1430 + JS7_UNHEX(cp[2])) << 4)
1431 + JS7_UNHEX(cp[3]);
1432 SkipChars(ts, 4);
1434 } else if (c == 'x') {
1435 jschar cp[2];
1436 if (PeekChars(ts, 2, cp) &&
1437 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1438 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1439 SkipChars(ts, 2);
1441 } else if (c == '\n') {
1442 /* ECMA follows C by removing escaped newlines. */
1443 continue;
1445 break;
1448 ADD_TO_TOKENBUF(c);
1450 atom = TOKENBUF_TO_ATOM();
1451 if (!atom)
1452 goto error;
1453 tp->pos.end.lineno = (uint16)ts->lineno;
1454 tp->t_op = JSOP_STRING;
1455 tp->t_atom = atom;
1456 tt = TOK_STRING;
1457 goto out;
1460 switch (c) {
1461 case '\n': tt = TOK_EOL; goto eol_out;
1462 case ';': tt = TOK_SEMI; break;
1463 case '[': tt = TOK_LB; break;
1464 case ']': tt = TOK_RB; break;
1465 case '{': tt = TOK_LC; break;
1466 case '}': tt = TOK_RC; break;
1467 case '(': tt = TOK_LP; break;
1468 case ')': tt = TOK_RP; break;
1469 case ',': tt = TOK_COMMA; break;
1470 case '?': tt = TOK_HOOK; break;
1472 case '.':
1473 #if JS_HAS_XML_SUPPORT
1474 if (MatchChar(ts, c))
1475 tt = TOK_DBLDOT;
1476 else
1477 #endif
1478 tt = TOK_DOT;
1479 break;
1481 case ':':
1482 #if JS_HAS_XML_SUPPORT
1483 if (MatchChar(ts, c)) {
1484 tt = TOK_DBLCOLON;
1485 break;
1487 #endif
1489 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1490 * object initializer, likewise for setter.
1492 tp->t_op = JSOP_NOP;
1493 tt = TOK_COLON;
1494 break;
1496 case '|':
1497 if (MatchChar(ts, c)) {
1498 tt = TOK_OR;
1499 } else if (MatchChar(ts, '=')) {
1500 tp->t_op = JSOP_BITOR;
1501 tt = TOK_ASSIGN;
1502 } else {
1503 tt = TOK_BITOR;
1505 break;
1507 case '^':
1508 if (MatchChar(ts, '=')) {
1509 tp->t_op = JSOP_BITXOR;
1510 tt = TOK_ASSIGN;
1511 } else {
1512 tt = TOK_BITXOR;
1514 break;
1516 case '&':
1517 if (MatchChar(ts, c)) {
1518 tt = TOK_AND;
1519 } else if (MatchChar(ts, '=')) {
1520 tp->t_op = JSOP_BITAND;
1521 tt = TOK_ASSIGN;
1522 } else {
1523 tt = TOK_BITAND;
1525 break;
1527 case '=':
1528 if (MatchChar(ts, c)) {
1529 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1530 tt = TOK_EQOP;
1531 } else {
1532 tp->t_op = JSOP_NOP;
1533 tt = TOK_ASSIGN;
1535 break;
1537 case '!':
1538 if (MatchChar(ts, '=')) {
1539 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1540 tt = TOK_EQOP;
1541 } else {
1542 tp->t_op = JSOP_NOT;
1543 tt = TOK_UNARYOP;
1545 break;
1547 #if JS_HAS_XML_SUPPORT
1548 case '@':
1549 tt = TOK_AT;
1550 break;
1551 #endif
1553 case '<':
1554 #if JS_HAS_XML_SUPPORT
1556 * After much testing, it's clear that Postel's advice to protocol
1557 * designers ("be liberal in what you accept, and conservative in what
1558 * you send") invites a natural-law repercussion for JS as "protocol":
1560 * "If you are liberal in what you accept, others will utterly fail to
1561 * be conservative in what they send."
1563 * Which means you will get <!-- comments to end of line in the middle
1564 * of .js files, and after if conditions whose then statements are on
1565 * the next line, and other wonders. See at least the following bugs:
1566 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1567 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1568 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1570 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1571 * an XML comment or CDATA literal. Instead, we always scan <! as the
1572 * start of an HTML comment hack to end of line, used since Netscape 2
1573 * to hide script tag content from script-unaware browsers.
1575 * But this still leaves XML resources with certain internal structure
1576 * vulnerable to being loaded as script cross-origin, and some internal
1577 * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1578 * source consists only of XML literals. See:
1580 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1582 * The check for this is in jsparse.cpp, JSCompiler::compileScript.
1584 if ((ts->flags & TSF_OPERAND) &&
1585 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1586 /* Check for XML comment or CDATA section. */
1587 if (MatchChar(ts, '!')) {
1588 INIT_TOKENBUF();
1590 /* Scan XML comment. */
1591 if (MatchChar(ts, '-')) {
1592 if (!MatchChar(ts, '-'))
1593 goto bad_xml_markup;
1594 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1595 if (c == EOF)
1596 goto bad_xml_markup;
1597 ADD_TO_TOKENBUF(c);
1599 tt = TOK_XMLCOMMENT;
1600 tp->t_op = JSOP_XMLCOMMENT;
1601 goto finish_xml_markup;
1604 /* Scan CDATA section. */
1605 if (MatchChar(ts, '[')) {
1606 jschar cp[6];
1607 if (PeekChars(ts, 6, cp) &&
1608 cp[0] == 'C' &&
1609 cp[1] == 'D' &&
1610 cp[2] == 'A' &&
1611 cp[3] == 'T' &&
1612 cp[4] == 'A' &&
1613 cp[5] == '[') {
1614 SkipChars(ts, 6);
1615 while ((c = GetChar(ts)) != ']' ||
1616 !PeekChars(ts, 2, cp) ||
1617 cp[0] != ']' ||
1618 cp[1] != '>') {
1619 if (c == EOF)
1620 goto bad_xml_markup;
1621 ADD_TO_TOKENBUF(c);
1623 GetChar(ts); /* discard ] but not > */
1624 tt = TOK_XMLCDATA;
1625 tp->t_op = JSOP_XMLCDATA;
1626 goto finish_xml_markup;
1628 goto bad_xml_markup;
1632 /* Check for processing instruction. */
1633 if (MatchChar(ts, '?')) {
1634 inTarget = JS_TRUE;
1635 targetLength = 0;
1636 contentIndex = -1;
1638 INIT_TOKENBUF();
1639 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1640 if (c == EOF)
1641 goto bad_xml_markup;
1642 if (inTarget) {
1643 if (JS_ISXMLSPACE(c)) {
1644 if (TOKENBUF_LENGTH() == 0)
1645 goto bad_xml_markup;
1646 inTarget = JS_FALSE;
1647 } else {
1648 if (!((TOKENBUF_LENGTH() == 0)
1649 ? JS_ISXMLNSSTART(c)
1650 : JS_ISXMLNS(c))) {
1651 goto bad_xml_markup;
1653 ++targetLength;
1655 } else {
1656 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1657 contentIndex = TOKENBUF_LENGTH();
1659 ADD_TO_TOKENBUF(c);
1661 if (targetLength == 0)
1662 goto bad_xml_markup;
1663 if (!TOKENBUF_OK())
1664 goto error;
1665 if (contentIndex < 0) {
1666 atom = cx->runtime->atomState.emptyAtom;
1667 } else {
1668 atom = js_AtomizeChars(cx,
1669 &TOKENBUF_CHAR(contentIndex),
1670 TOKENBUF_LENGTH() - contentIndex,
1672 if (!atom)
1673 goto error;
1675 TRIM_TOKENBUF(targetLength);
1676 tp->t_atom2 = atom;
1677 tt = TOK_XMLPI;
1679 finish_xml_markup:
1680 if (!MatchChar(ts, '>'))
1681 goto bad_xml_markup;
1682 atom = TOKENBUF_TO_ATOM();
1683 if (!atom)
1684 goto error;
1685 tp->t_atom = atom;
1686 tp->pos.end.lineno = (uint16)ts->lineno;
1687 goto out;
1690 /* An XML start-of-tag character. */
1691 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1692 goto out;
1694 bad_xml_markup:
1695 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1696 JSMSG_BAD_XML_MARKUP);
1697 goto error;
1699 #endif /* JS_HAS_XML_SUPPORT */
1701 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1702 if (MatchChar(ts, '!')) {
1703 if (MatchChar(ts, '-')) {
1704 if (MatchChar(ts, '-')) {
1705 ts->flags |= TSF_IN_HTML_COMMENT;
1706 goto skipline;
1708 UngetChar(ts, '-');
1710 UngetChar(ts, '!');
1712 if (MatchChar(ts, c)) {
1713 tp->t_op = JSOP_LSH;
1714 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1715 } else {
1716 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1717 tt = TOK_RELOP;
1719 break;
1721 case '>':
1722 if (MatchChar(ts, c)) {
1723 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1724 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1725 } else {
1726 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1727 tt = TOK_RELOP;
1729 break;
1731 case '*':
1732 tp->t_op = JSOP_MUL;
1733 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1734 break;
1736 case '/':
1737 if (MatchChar(ts, '/')) {
1739 * Hack for source filters such as the Mozilla XUL preprocessor:
1740 * "//@line 123\n" sets the number of the *next* line after the
1741 * comment to 123.
1743 if (JS_HAS_ATLINE_OPTION(cx)) {
1744 jschar cp[5];
1745 uintN i, line, temp;
1746 char filename[1024];
1748 if (PeekChars(ts, 5, cp) &&
1749 cp[0] == '@' &&
1750 cp[1] == 'l' &&
1751 cp[2] == 'i' &&
1752 cp[3] == 'n' &&
1753 cp[4] == 'e') {
1754 SkipChars(ts, 5);
1755 while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1756 continue;
1757 if (JS7_ISDEC(c)) {
1758 line = JS7_UNDEC(c);
1759 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1760 temp = 10 * line + JS7_UNDEC(c);
1761 if (temp < line) {
1762 /* Ignore overlarge line numbers. */
1763 goto skipline;
1765 line = temp;
1767 while (c != '\n' && ScanAsSpace((jschar)c))
1768 c = GetChar(ts);
1769 i = 0;
1770 if (c == '"') {
1771 while ((c = GetChar(ts)) != EOF && c != '"') {
1772 if (c == '\n') {
1773 UngetChar(ts, c);
1774 goto skipline;
1776 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1777 goto skipline;
1778 filename[i++] = (char) c;
1780 if (c == '"') {
1781 while ((c = GetChar(ts)) != '\n' &&
1782 ScanAsSpace((jschar)c)) {
1783 continue;
1787 filename[i] = '\0';
1788 if (c == '\n') {
1789 if (i > 0) {
1790 if (ts->flags & TSF_OWNFILENAME)
1791 cx->free((void *) ts->filename);
1792 ts->filename = JS_strdup(cx, filename);
1793 if (!ts->filename)
1794 goto error;
1795 ts->flags |= TSF_OWNFILENAME;
1797 ts->lineno = line;
1800 UngetChar(ts, c);
1804 skipline:
1805 /* Optimize line skipping if we are not in an HTML comment. */
1806 if (ts->flags & TSF_IN_HTML_COMMENT) {
1807 while ((c = GetChar(ts)) != EOF && c != '\n') {
1808 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1809 ts->flags &= ~TSF_IN_HTML_COMMENT;
1811 } else {
1812 while ((c = GetChar(ts)) != EOF && c != '\n')
1813 continue;
1815 UngetChar(ts, c);
1816 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1817 goto retry;
1820 if (MatchChar(ts, '*')) {
1821 uintN lineno = ts->lineno;
1822 while ((c = GetChar(ts)) != EOF &&
1823 !(c == '*' && MatchChar(ts, '/'))) {
1824 /* Ignore all characters until comment close. */
1826 if (c == EOF) {
1827 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1828 JSMSG_UNTERMINATED_COMMENT);
1829 goto error;
1831 if ((ts->flags & TSF_NEWLINES) && lineno != ts->lineno) {
1832 ts->flags &= ~TSF_DIRTYLINE;
1833 tt = TOK_EOL;
1834 goto eol_out;
1836 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1837 goto retry;
1840 if (ts->flags & TSF_OPERAND) {
1841 uintN flags, length;
1842 JSBool inCharClass = JS_FALSE;
1844 INIT_TOKENBUF();
1845 for (;;) {
1846 c = GetChar(ts);
1847 if (c == '\n' || c == EOF) {
1848 UngetChar(ts, c);
1849 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1850 JSMSG_UNTERMINATED_REGEXP);
1851 goto error;
1853 if (c == '\\') {
1854 ADD_TO_TOKENBUF(c);
1855 c = GetChar(ts);
1856 } else if (c == '[') {
1857 inCharClass = JS_TRUE;
1858 } else if (c == ']') {
1859 inCharClass = JS_FALSE;
1860 } else if (c == '/' && !inCharClass) {
1861 /* For compat with IE, allow unescaped / in char classes. */
1862 break;
1864 ADD_TO_TOKENBUF(c);
1866 for (flags = 0, length = TOKENBUF_LENGTH() + 1; ; length++) {
1867 c = PeekChar(ts);
1868 if (c == 'g' && !(flags & JSREG_GLOB))
1869 flags |= JSREG_GLOB;
1870 else if (c == 'i' && !(flags & JSREG_FOLD))
1871 flags |= JSREG_FOLD;
1872 else if (c == 'm' && !(flags & JSREG_MULTILINE))
1873 flags |= JSREG_MULTILINE;
1874 else if (c == 'y' && !(flags & JSREG_STICKY))
1875 flags |= JSREG_STICKY;
1876 else
1877 break;
1878 GetChar(ts);
1880 c = PeekChar(ts);
1881 if (JS7_ISLET(c)) {
1882 char buf[2] = { '\0' };
1883 tp->pos.begin.index += length + 1;
1884 buf[0] = (char)c;
1885 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1886 JSMSG_BAD_REGEXP_FLAG, buf);
1887 (void) GetChar(ts);
1888 goto error;
1890 /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1891 if (!TOKENBUF_OK())
1892 goto error;
1893 NUL_TERM_TOKENBUF();
1894 tp->t_reflags = flags;
1895 tt = TOK_REGEXP;
1896 break;
1899 tp->t_op = JSOP_DIV;
1900 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1901 break;
1903 case '%':
1904 tp->t_op = JSOP_MOD;
1905 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1906 break;
1908 case '~':
1909 tp->t_op = JSOP_BITNOT;
1910 tt = TOK_UNARYOP;
1911 break;
1913 case '+':
1914 if (MatchChar(ts, '=')) {
1915 tp->t_op = JSOP_ADD;
1916 tt = TOK_ASSIGN;
1917 } else if (MatchChar(ts, c)) {
1918 tt = TOK_INC;
1919 } else {
1920 tp->t_op = JSOP_POS;
1921 tt = TOK_PLUS;
1923 break;
1925 case '-':
1926 if (MatchChar(ts, '=')) {
1927 tp->t_op = JSOP_SUB;
1928 tt = TOK_ASSIGN;
1929 } else if (MatchChar(ts, c)) {
1930 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1931 ts->flags &= ~TSF_IN_HTML_COMMENT;
1932 goto skipline;
1934 tt = TOK_DEC;
1935 } else {
1936 tp->t_op = JSOP_NEG;
1937 tt = TOK_MINUS;
1939 break;
1941 #if JS_HAS_SHARP_VARS
1942 case '#':
1944 uint32 n;
1946 c = GetChar(ts);
1947 if (!JS7_ISDEC(c)) {
1948 UngetChar(ts, c);
1949 goto badchar;
1951 n = (uint32)JS7_UNDEC(c);
1952 for (;;) {
1953 c = GetChar(ts);
1954 if (!JS7_ISDEC(c))
1955 break;
1956 n = 10 * n + JS7_UNDEC(c);
1957 if (n >= UINT16_LIMIT) {
1958 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1959 JSMSG_SHARPVAR_TOO_BIG);
1960 goto error;
1963 tp->t_dval = (jsdouble) n;
1964 if (JS_HAS_STRICT_OPTION(cx) &&
1965 (c == '=' || c == '#')) {
1966 char buf[20];
1967 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1968 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1969 JSREPORT_WARNING |
1970 JSREPORT_STRICT,
1971 JSMSG_DEPRECATED_USAGE,
1972 buf)) {
1973 goto error;
1976 if (c == '=')
1977 tt = TOK_DEFSHARP;
1978 else if (c == '#')
1979 tt = TOK_USESHARP;
1980 else
1981 goto badchar;
1982 break;
1984 #endif /* JS_HAS_SHARP_VARS */
1986 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1987 badchar:
1988 #endif
1990 default:
1991 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1992 JSMSG_ILLEGAL_CHARACTER);
1993 goto error;
1996 out:
1997 JS_ASSERT(tt != TOK_EOL);
1998 ts->flags |= TSF_DIRTYLINE;
2000 eol_out:
2001 if (!STRING_BUFFER_OK(&ts->tokenbuf))
2002 tt = TOK_ERROR;
2003 JS_ASSERT(tt < TOK_LIMIT);
2004 tp->pos.end.index = ts->linepos +
2005 (ts->linebuf.ptr - ts->linebuf.base) -
2006 ts->ungetpos;
2007 tp->type = tt;
2008 return tt;
2010 error:
2011 tt = TOK_ERROR;
2012 ts->flags |= TSF_ERROR;
2013 goto out;
2015 #undef INIT_TOKENBUF
2016 #undef TOKENBUF_LENGTH
2017 #undef TOKENBUF_OK
2018 #undef TOKENBUF_TO_ATOM
2019 #undef ADD_TO_TOKENBUF
2020 #undef TOKENBUF_BASE
2021 #undef TOKENBUF_CHAR
2022 #undef TRIM_TOKENBUF
2023 #undef NUL_TERM_TOKENBUF
2026 void
2027 js_UngetToken(JSTokenStream *ts)
2029 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
2030 ts->lookahead++;
2031 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
2034 JSBool
2035 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2037 if (js_GetToken(cx, ts) == tt)
2038 return JS_TRUE;
2039 js_UngetToken(ts);
2040 return JS_FALSE;