Save all modification
[mozilla-1.9/m8.git] / js / src / jsscan.c
blob8bc756273bb591f615eb69d4f25d8e9d0c8d1a88
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
25 * Contributor(s):
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 * JS lexical scanner.
44 #include "jsstddef.h"
45 #include <stdio.h> /* first to avoid trouble on some systems */
46 #include <errno.h>
47 #include <limits.h>
48 #include <math.h>
49 #ifdef HAVE_MEMORY_H
50 #include <memory.h>
51 #endif
52 #include <stdarg.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include "jstypes.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsutil.h" /* Added by JSIFY */
58 #include "jsdtoa.h"
59 #include "jsprf.h"
60 #include "jsapi.h"
61 #include "jsatom.h"
62 #include "jscntxt.h"
63 #include "jsconfig.h"
64 #include "jsemit.h"
65 #include "jsexn.h"
66 #include "jsnum.h"
67 #include "jsopcode.h"
68 #include "jsparse.h"
69 #include "jsregexp.h"
70 #include "jsscan.h"
71 #include "jsscript.h"
73 #if JS_HAS_XML_SUPPORT
74 #include "jsxml.h"
75 #endif
77 #define JS_KEYWORD(keyword, type, op, version) \
78 const char js_##keyword##_str[] = #keyword;
79 #include "jskeyword.tbl"
80 #undef JS_KEYWORD
82 struct keyword {
83 const char *chars; /* C string with keyword text */
84 JSTokenType tokentype; /* JSTokenType */
85 JSOp op; /* JSOp */
86 JSVersion version; /* JSVersion */
89 static const struct keyword keyword_defs[] = {
90 #define JS_KEYWORD(keyword, type, op, version) \
91 {js_##keyword##_str, type, op, version},
92 #include "jskeyword.tbl"
93 #undef JS_KEYWORD
96 #define KEYWORD_COUNT (sizeof keyword_defs / sizeof keyword_defs[0])
98 static const struct keyword *
99 FindKeyword(const jschar *s, size_t length)
101 register size_t i;
102 const struct keyword *kw;
103 const char *chars;
105 JS_ASSERT(length != 0);
107 #define JSKW_LENGTH() length
108 #define JSKW_AT(column) s[column]
109 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
110 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
111 #define JSKW_NO_MATCH() goto no_match;
112 #include "jsautokw.h"
113 #undef JSKW_NO_MATCH
114 #undef JSKW_TEST_GUESS
115 #undef JSKW_GOT_MATCH
116 #undef JSKW_AT
117 #undef JSKW_LENGTH
119 got_match:
120 return &keyword_defs[i];
122 test_guess:
123 kw = &keyword_defs[i];
124 chars = kw->chars;
125 do {
126 if (*s++ != (unsigned char)(*chars++))
127 goto no_match;
128 } while (--length != 0);
129 return kw;
131 no_match:
132 return NULL;
135 JSTokenType
136 js_CheckKeyword(const jschar *str, size_t length)
138 const struct keyword *kw;
140 JS_ASSERT(length != 0);
141 kw = FindKeyword(str, length);
142 return kw ? kw->tokentype : TOK_EOF;
145 JS_FRIEND_API(void)
146 js_MapKeywords(void (*mapfun)(const char *))
148 size_t i;
150 for (i = 0; i != KEYWORD_COUNT; ++i)
151 mapfun(keyword_defs[i].chars);
154 JSBool
155 js_IsIdentifier(JSString *str)
157 size_t length;
158 jschar c, *chars, *end;
160 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
161 if (length == 0)
162 return JS_FALSE;
163 c = *chars;
164 if (!JS_ISIDSTART(c))
165 return JS_FALSE;
166 end = chars + length;
167 while (++chars != end) {
168 c = *chars;
169 if (!JS_ISIDENT(c))
170 return JS_FALSE;
172 return JS_TRUE;
175 JSTokenStream *
176 js_NewTokenStream(JSContext *cx, const jschar *base, size_t length,
177 const char *filename, uintN lineno,
178 JSPrincipals *principals)
180 JSTokenStream *ts;
182 ts = js_NewBufferTokenStream(cx, base, length);
183 if (!ts)
184 return NULL;
185 ts->filename = filename;
186 ts->lineno = lineno;
187 if (principals)
188 JSPRINCIPALS_HOLD(cx, principals);
189 ts->principals = principals;
190 return ts;
193 #define TBMIN 64
195 static JSBool
196 GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
198 JSContext *cx;
199 jschar *base;
200 ptrdiff_t offset, length;
201 size_t tbsize;
202 JSArenaPool *pool;
204 cx = (JSContext*) sb->data;
205 base = sb->base;
206 offset = PTRDIFF(sb->ptr, base, jschar);
207 pool = &cx->tempPool;
208 if (!base) {
209 tbsize = TBMIN * sizeof(jschar);
210 length = TBMIN - 1;
211 JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
212 } else {
213 length = PTRDIFF(sb->limit, base, jschar);
214 if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
215 base = NULL;
216 } else {
217 tbsize = (length + 1) * sizeof(jschar);
218 length += length + 1;
219 JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
222 if (!base) {
223 JS_ReportOutOfMemory(cx);
224 sb->base = STRING_BUFFER_ERROR_BASE;
225 return JS_FALSE;
227 sb->base = base;
228 sb->limit = base + length;
229 sb->ptr = base + offset;
230 return JS_TRUE;
233 JS_FRIEND_API(JSTokenStream *)
234 js_NewBufferTokenStream(JSContext *cx, const jschar *base, size_t length)
236 size_t nb;
237 JSTokenStream *ts;
239 nb = sizeof(JSTokenStream) + JS_LINE_LIMIT * sizeof(jschar);
240 JS_ARENA_ALLOCATE_CAST(ts, JSTokenStream *, &cx->tempPool, nb);
241 if (!ts) {
242 JS_ReportOutOfMemory(cx);
243 return NULL;
245 memset(ts, 0, nb);
246 ts->lineno = 1;
247 ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = (jschar *)(ts + 1);
248 ts->userbuf.base = (jschar *)base;
249 ts->userbuf.limit = (jschar *)base + length;
250 ts->userbuf.ptr = (jschar *)base;
251 ts->tokenbuf.grow = GrowTokenBuf;
252 ts->tokenbuf.data = cx;
253 ts->listener = cx->debugHooks->sourceHandler;
254 ts->listenerData = cx->debugHooks->sourceHandlerData;
255 return ts;
258 JS_FRIEND_API(JSTokenStream *)
259 js_NewFileTokenStream(JSContext *cx, const char *filename, FILE *defaultfp)
261 jschar *base;
262 JSTokenStream *ts;
263 FILE *file;
265 JS_ARENA_ALLOCATE_CAST(base, jschar *, &cx->tempPool,
266 JS_LINE_LIMIT * sizeof(jschar));
267 if (!base)
268 return NULL;
269 ts = js_NewBufferTokenStream(cx, base, JS_LINE_LIMIT);
270 if (!ts)
271 return NULL;
272 if (!filename || strcmp(filename, "-") == 0) {
273 file = defaultfp;
274 } else {
275 file = fopen(filename, "r");
276 if (!file) {
277 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_OPEN,
278 filename, "No such file or directory");
279 return NULL;
282 ts->userbuf.ptr = ts->userbuf.limit;
283 ts->file = file;
284 ts->filename = filename;
285 return ts;
288 JS_FRIEND_API(JSBool)
289 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
291 if (ts->flags & TSF_OWNFILENAME)
292 JS_free(cx, (void *) ts->filename);
293 if (ts->principals)
294 JSPRINCIPALS_DROP(cx, ts->principals);
295 return !ts->file || fclose(ts->file) == 0;
298 JS_FRIEND_API(int)
299 js_fgets(char *buf, int size, FILE *file)
301 int n, i, c;
302 JSBool crflag;
304 n = size - 1;
305 if (n < 0)
306 return -1;
308 crflag = JS_FALSE;
309 for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
310 buf[i] = c;
311 if (c == '\n') { /* any \n ends a line */
312 i++; /* keep the \n; we know there is room for \0 */
313 break;
315 if (crflag) { /* \r not followed by \n ends line at the \r */
316 ungetc(c, file);
317 break; /* and overwrite c in buf with \0 */
319 crflag = (c == '\r');
322 buf[i] = '\0';
323 return i;
326 static int32
327 GetChar(JSTokenStream *ts)
329 int32 c;
330 ptrdiff_t i, j, len, olen;
331 JSBool crflag;
332 char cbuf[JS_LINE_LIMIT];
333 jschar *ubuf, *nl;
335 if (ts->ungetpos != 0) {
336 c = ts->ungetbuf[--ts->ungetpos];
337 } else {
338 if (ts->linebuf.ptr == ts->linebuf.limit) {
339 len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
340 if (len <= 0) {
341 if (!ts->file) {
342 ts->flags |= TSF_EOF;
343 return EOF;
346 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
347 crflag = (ts->flags & TSF_CRFLAG) != 0;
348 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
349 if (len <= 0) {
350 ts->flags |= TSF_EOF;
351 return EOF;
353 olen = len;
354 ubuf = ts->userbuf.base;
355 i = 0;
356 if (crflag) {
357 ts->flags &= ~TSF_CRFLAG;
358 if (cbuf[0] != '\n') {
359 ubuf[i++] = '\n';
360 len++;
361 ts->linepos--;
364 for (j = 0; i < len; i++, j++)
365 ubuf[i] = (jschar) (unsigned char) cbuf[j];
366 ts->userbuf.limit = ubuf + len;
367 ts->userbuf.ptr = ubuf;
369 if (ts->listener) {
370 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
371 &ts->listenerTSData, ts->listenerData);
374 nl = ts->saveEOL;
375 if (!nl) {
377 * Any one of \n, \r, or \r\n ends a line (the longest
378 * match wins). Also allow the Unicode line and paragraph
379 * separators.
381 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
383 * Try to prevent value-testing on most characters by
384 * filtering out characters that aren't 000x or 202x.
386 if ((*nl & 0xDFD0) == 0) {
387 if (*nl == '\n')
388 break;
389 if (*nl == '\r') {
390 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
391 nl++;
392 break;
394 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
395 break;
401 * If there was a line terminator, copy thru it into linebuf.
402 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
404 if (nl < ts->userbuf.limit)
405 len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
406 if (len >= JS_LINE_LIMIT) {
407 len = JS_LINE_LIMIT - 1;
408 ts->saveEOL = nl;
409 } else {
410 ts->saveEOL = NULL;
412 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
413 ts->userbuf.ptr += len;
414 olen = len;
417 * Make sure linebuf contains \n for EOL (don't do this in
418 * userbuf because the user's string might be readonly).
420 if (nl < ts->userbuf.limit) {
421 if (*nl == '\r') {
422 if (ts->linebuf.base[len-1] == '\r') {
424 * Does the line segment end in \r? We must check
425 * for a \n at the front of the next segment before
426 * storing a \n into linebuf. This case matters
427 * only when we're reading from a file.
429 if (nl + 1 == ts->userbuf.limit && ts->file) {
430 len--;
431 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
432 if (len == 0) {
434 * This can happen when a segment ends in
435 * \r\r. Start over. ptr == limit in this
436 * case, so we'll fall into buffer-filling
437 * code.
439 return GetChar(ts);
441 } else {
442 ts->linebuf.base[len-1] = '\n';
445 } else if (*nl == '\n') {
446 if (nl > ts->userbuf.base &&
447 nl[-1] == '\r' &&
448 ts->linebuf.base[len-2] == '\r') {
449 len--;
450 JS_ASSERT(ts->linebuf.base[len] == '\n');
451 ts->linebuf.base[len-1] = '\n';
453 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
454 ts->linebuf.base[len-1] = '\n';
458 /* Reset linebuf based on adjusted segment length. */
459 ts->linebuf.limit = ts->linebuf.base + len;
460 ts->linebuf.ptr = ts->linebuf.base;
462 /* Update position of linebuf within physical userbuf line. */
463 if (!(ts->flags & TSF_NLFLAG))
464 ts->linepos += ts->linelen;
465 else
466 ts->linepos = 0;
467 if (ts->linebuf.limit[-1] == '\n')
468 ts->flags |= TSF_NLFLAG;
469 else
470 ts->flags &= ~TSF_NLFLAG;
472 /* Update linelen from original segment length. */
473 ts->linelen = olen;
475 c = *ts->linebuf.ptr++;
477 if (c == '\n')
478 ts->lineno++;
479 return c;
482 static void
483 UngetChar(JSTokenStream *ts, int32 c)
485 if (c == EOF)
486 return;
487 JS_ASSERT(ts->ungetpos < sizeof ts->ungetbuf / sizeof ts->ungetbuf[0]);
488 if (c == '\n')
489 ts->lineno--;
490 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
493 static int32
494 PeekChar(JSTokenStream *ts)
496 int32 c;
498 c = GetChar(ts);
499 UngetChar(ts, c);
500 return c;
504 * Peek n chars ahead into ts. Return true if n chars were read, false if
505 * there weren't enough characters in the input stream. This function cannot
506 * be used to peek into or past a newline.
508 static JSBool
509 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
511 intN i, j;
512 int32 c;
514 for (i = 0; i < n; i++) {
515 c = GetChar(ts);
516 if (c == EOF)
517 break;
518 if (c == '\n') {
519 UngetChar(ts, c);
520 break;
522 cp[i] = (jschar)c;
524 for (j = i - 1; j >= 0; j--)
525 UngetChar(ts, cp[j]);
526 return i == n;
529 static void
530 SkipChars(JSTokenStream *ts, intN n)
532 while (--n >= 0)
533 GetChar(ts);
536 static JSBool
537 MatchChar(JSTokenStream *ts, int32 expect)
539 int32 c;
541 c = GetChar(ts);
542 if (c == expect)
543 return JS_TRUE;
544 UngetChar(ts, c);
545 return JS_FALSE;
548 static JSBool
549 ReportCompileErrorNumber(JSContext *cx, void *handle, uintN flags,
550 uintN errorNumber, JSErrorReport *report,
551 JSBool charArgs, va_list ap)
553 size_t linelength;
554 jschar *linechars = NULL;
555 char *linebytes = NULL;
556 JSTokenStream *ts = NULL;
557 JSCodeGenerator *cg = NULL;
558 JSParseNode *pn = NULL;
559 JSErrorReporter onError;
560 JSTokenPos *tp;
561 JSStackFrame *fp;
562 uintN index;
563 char *message;
564 JSBool warning;
566 memset(report, 0, sizeof (struct JSErrorReport));
567 report->flags = flags;
568 report->errorNumber = errorNumber;
569 message = NULL;
571 if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
572 errorNumber, &message, report, &warning,
573 charArgs, ap)) {
574 return JS_FALSE;
577 switch (flags & JSREPORT_HANDLE) {
578 case JSREPORT_TS:
579 ts = (JSTokenStream *) handle;
580 break;
581 case JSREPORT_CG:
582 cg = (JSCodeGenerator *) handle;
583 break;
584 case JSREPORT_PN:
585 pn = (JSParseNode *) handle;
586 ts = pn->pn_ts;
587 break;
590 JS_ASSERT(!ts || ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
593 * We are typically called with non-null ts and null cg from jsparse.c.
594 * We can be called with null ts from the regexp compilation functions.
595 * The code generator (jsemit.c) may pass null ts and non-null cg.
597 if (ts) {
598 report->filename = ts->filename;
599 if (pn) {
600 report->lineno = pn->pn_pos.begin.lineno;
601 if (report->lineno != ts->lineno)
602 goto report;
604 report->lineno = ts->lineno;
605 linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar);
606 linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar));
607 if (!linechars) {
608 warning = JS_FALSE;
609 goto out;
611 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
612 linechars[linelength] = 0;
613 linebytes = js_DeflateString(cx, linechars, linelength);
614 if (!linebytes) {
615 warning = JS_FALSE;
616 goto out;
618 report->linebuf = linebytes;
619 tp = &ts->tokens[(ts->cursor+ts->lookahead) & NTOKENS_MASK].pos;
620 if (pn)
621 tp = &pn->pn_pos;
624 * FIXME: What should instead happen here is that we should
625 * find error-tokens in userbuf, if !ts->file. That will
626 * allow us to deliver a more helpful error message, which
627 * includes all or part of the bad string or bad token. The
628 * code here yields something that looks truncated.
629 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
631 index = 0;
632 if (tp->begin.lineno == tp->end.lineno) {
633 if (tp->begin.index < ts->linepos)
634 goto report;
636 index = tp->begin.index - ts->linepos;
639 report->tokenptr = report->linebuf + index;
640 report->uclinebuf = linechars;
641 report->uctokenptr = report->uclinebuf + index;
642 } else if (cg) {
643 report->filename = cg->filename;
644 report->lineno = CG_CURRENT_LINE(cg);
645 } else {
647 * If we can't find out where the error was based on the current
648 * frame, see if the next frame has a script/pc combo we can use.
650 for (fp = cx->fp; fp; fp = fp->down) {
651 if (fp->script && fp->pc) {
652 report->filename = fp->script->filename;
653 report->lineno = js_PCToLineNumber(cx, fp->script, fp->pc);
654 break;
660 * If there's a runtime exception type associated with this error
661 * number, set that as the pending exception. For errors occuring at
662 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
664 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
665 * flag will be set in report.flags. Proper behavior for an error
666 * reporter is to ignore a report with this flag for all but top-level
667 * compilation errors. The exception will remain pending, and so long
668 * as the non-top-level "load", "eval", or "compile" native function
669 * returns false, the top-level reporter will eventually receive the
670 * uncaught exception report.
672 * XXX it'd probably be best if there was only one call to this
673 * function, but there seem to be two error reporter call points.
675 report:
676 onError = cx->errorReporter;
679 * Try to raise an exception only if there isn't one already set --
680 * otherwise the exception will describe the last compile-time error,
681 * which is likely spurious.
683 if (!ts || !(ts->flags & TSF_ERROR)) {
684 if (js_ErrorToException(cx, message, report))
685 onError = NULL;
689 * Suppress any compile-time errors that don't occur at the top level.
690 * This may still fail, as interplevel may be zero in contexts where we
691 * don't really want to call the error reporter, as when js is called
692 * by other code which could catch the error.
694 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
695 onError = NULL;
697 if (onError) {
698 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
701 * If debugErrorHook is present then we give it a chance to veto
702 * sending the error on to the regular error reporter.
704 if (hook && !hook(cx, message, report,
705 cx->debugHooks->debugErrorHookData)) {
706 onError = NULL;
709 if (onError)
710 (*onError)(cx, message, report);
712 out:
713 if (linebytes)
714 JS_free(cx, linebytes);
715 if (linechars)
716 JS_free(cx, linechars);
717 if (message)
718 JS_free(cx, message);
719 if (report->ucmessage)
720 JS_free(cx, (void *)report->ucmessage);
722 if (ts && !JSREPORT_IS_WARNING(flags)) {
723 /* Set the error flag to suppress spurious reports. */
724 ts->flags |= TSF_ERROR;
727 return warning;
730 JSBool
731 js_ReportCompileErrorNumber(JSContext *cx, void *handle, uintN flags,
732 uintN errorNumber, ...)
734 va_list ap;
735 JSErrorReport report;
736 JSBool warning;
738 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
739 return JS_TRUE;
741 va_start(ap, errorNumber);
742 warning = ReportCompileErrorNumber(cx, handle, flags, errorNumber,
743 &report, JS_TRUE, ap);
744 va_end(ap);
747 * We have to do this here because js_ReportCompileErrorNumberUC doesn't
748 * need to do this.
750 if (report.messageArgs) {
751 int i = 0;
752 while (report.messageArgs[i])
753 JS_free(cx, (void *)report.messageArgs[i++]);
754 JS_free(cx, (void *)report.messageArgs);
757 return warning;
760 JSBool
761 js_ReportCompileErrorNumberUC(JSContext *cx, void *handle, uintN flags,
762 uintN errorNumber, ...)
764 va_list ap;
765 JSErrorReport report;
766 JSBool warning;
768 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
769 return JS_TRUE;
771 va_start(ap, errorNumber);
772 warning = ReportCompileErrorNumber(cx, handle, flags, errorNumber,
773 &report, JS_FALSE, ap);
774 va_end(ap);
776 if (report.messageArgs)
777 JS_free(cx, (void *)report.messageArgs);
779 return warning;
782 static JSBool
783 GrowStringBuffer(JSStringBuffer *sb, size_t newlength)
785 ptrdiff_t offset;
786 jschar *bp;
788 offset = PTRDIFF(sb->ptr, sb->base, jschar);
789 JS_ASSERT(offset >= 0);
790 newlength += offset + 1;
791 if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar))
792 bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
793 else
794 bp = NULL;
795 if (!bp) {
796 free(sb->base);
797 sb->base = STRING_BUFFER_ERROR_BASE;
798 return JS_FALSE;
800 sb->base = bp;
801 sb->ptr = bp + offset;
802 sb->limit = bp + newlength - 1;
803 return JS_TRUE;
806 static void
807 FreeStringBuffer(JSStringBuffer *sb)
809 JS_ASSERT(STRING_BUFFER_OK(sb));
810 if (sb->base)
811 free(sb->base);
814 void
815 js_InitStringBuffer(JSStringBuffer *sb)
817 sb->base = sb->limit = sb->ptr = NULL;
818 sb->data = NULL;
819 sb->grow = GrowStringBuffer;
820 sb->free = FreeStringBuffer;
823 void
824 js_FinishStringBuffer(JSStringBuffer *sb)
826 sb->free(sb);
829 #define ENSURE_STRING_BUFFER(sb,n) \
830 ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
832 static void
833 FastAppendChar(JSStringBuffer *sb, jschar c)
835 if (!STRING_BUFFER_OK(sb))
836 return;
837 if (!ENSURE_STRING_BUFFER(sb, 1))
838 return;
839 *sb->ptr++ = c;
842 void
843 js_AppendChar(JSStringBuffer *sb, jschar c)
845 jschar *bp;
847 if (!STRING_BUFFER_OK(sb))
848 return;
849 if (!ENSURE_STRING_BUFFER(sb, 1))
850 return;
851 bp = sb->ptr;
852 *bp++ = c;
853 *bp = 0;
854 sb->ptr = bp;
857 #if JS_HAS_XML_SUPPORT
859 void
860 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
862 jschar *bp;
864 if (!STRING_BUFFER_OK(sb) || count == 0)
865 return;
866 if (!ENSURE_STRING_BUFFER(sb, count))
867 return;
868 for (bp = sb->ptr; count; --count)
869 *bp++ = c;
870 *bp = 0;
871 sb->ptr = bp;
874 void
875 js_AppendCString(JSStringBuffer *sb, const char *asciiz)
877 size_t length;
878 jschar *bp;
880 if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
881 return;
882 length = strlen(asciiz);
883 if (!ENSURE_STRING_BUFFER(sb, length))
884 return;
885 for (bp = sb->ptr; length; --length)
886 *bp++ = (jschar) *asciiz++;
887 *bp = 0;
888 sb->ptr = bp;
891 void
892 js_AppendJSString(JSStringBuffer *sb, JSString *str)
894 size_t length;
895 jschar *bp;
897 if (!STRING_BUFFER_OK(sb))
898 return;
899 length = JSSTRING_LENGTH(str);
900 if (length == 0 || !ENSURE_STRING_BUFFER(sb, length))
901 return;
902 bp = sb->ptr;
903 js_strncpy(bp, JSSTRING_CHARS(str), length);
904 bp += length;
905 *bp = 0;
906 sb->ptr = bp;
909 static JSBool
910 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
912 ptrdiff_t offset, length, i;
913 int32 c, d;
914 JSBool ispair;
915 jschar *bp, digit;
916 char *bytes;
917 JSErrNum msg;
919 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
920 offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar);
921 FastAppendChar(&ts->tokenbuf, '&');
922 while ((c = GetChar(ts)) != ';') {
923 if (c == EOF || c == '\n') {
924 js_ReportCompileErrorNumber(cx, ts,
925 JSREPORT_TS | JSREPORT_ERROR,
926 JSMSG_END_OF_XML_ENTITY);
927 return JS_FALSE;
929 FastAppendChar(&ts->tokenbuf, (jschar) c);
932 /* Let length be the number of jschars after the '&', including the ';'. */
933 length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset;
934 bp = ts->tokenbuf.base + offset;
935 c = d = 0;
936 ispair = JS_FALSE;
937 if (length > 2 && bp[1] == '#') {
938 /* Match a well-formed XML Character Reference. */
939 i = 2;
940 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
941 if (length > 9) /* at most 6 hex digits allowed */
942 goto badncr;
943 while (++i < length) {
944 digit = bp[i];
945 if (!JS7_ISHEX(digit))
946 goto badncr;
947 c = (c << 4) + JS7_UNHEX(digit);
949 } else {
950 while (i < length) {
951 digit = bp[i++];
952 if (!JS7_ISDEC(digit))
953 goto badncr;
954 c = (c * 10) + JS7_UNDEC(digit);
955 if (c < 0)
956 goto badncr;
960 if (0x10000 <= c && c <= 0x10FFFF) {
961 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
962 d = 0xDC00 + (c & 0x3FF);
963 c = 0xD7C0 + (c >> 10);
964 ispair = JS_TRUE;
965 } else {
966 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
967 if (c != 0x9 && c != 0xA && c != 0xD &&
968 !(0x20 <= c && c <= 0xD7FF) &&
969 !(0xE000 <= c && c <= 0xFFFD)) {
970 goto badncr;
973 } else {
974 /* Try to match one of the five XML 1.0 predefined entities. */
975 switch (length) {
976 case 3:
977 if (bp[2] == 't') {
978 if (bp[1] == 'l')
979 c = '<';
980 else if (bp[1] == 'g')
981 c = '>';
983 break;
984 case 4:
985 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
986 c = '&';
987 break;
988 case 5:
989 if (bp[3] == 'o') {
990 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
991 c = '\'';
992 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
993 c = '"';
995 break;
997 if (c == 0) {
998 msg = JSMSG_UNKNOWN_XML_ENTITY;
999 goto bad;
1003 /* If we matched, retract ts->tokenbuf and store the entity's value. */
1004 *bp++ = (jschar) c;
1005 if (ispair)
1006 *bp++ = (jschar) d;
1007 *bp = 0;
1008 ts->tokenbuf.ptr = bp;
1009 return JS_TRUE;
1011 badncr:
1012 msg = JSMSG_BAD_XML_NCR;
1013 bad:
1014 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
1015 bytes = js_DeflateString(cx, bp + 1,
1016 PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1);
1017 if (bytes) {
1018 js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR,
1019 msg, bytes);
1020 JS_free(cx, bytes);
1022 return JS_FALSE;
1025 #endif /* JS_HAS_XML_SUPPORT */
1027 JSTokenType
1028 js_PeekToken(JSContext *cx, JSTokenStream *ts)
1030 JSTokenType tt;
1032 if (ts->lookahead != 0) {
1033 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
1034 } else {
1035 tt = js_GetToken(cx, ts);
1036 js_UngetToken(ts);
1038 return tt;
1041 JSTokenType
1042 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
1044 JSTokenType tt;
1046 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
1047 return TOK_EOL;
1048 ts->flags |= TSF_NEWLINES;
1049 tt = js_PeekToken(cx, ts);
1050 ts->flags &= ~TSF_NEWLINES;
1051 return tt;
1055 * We have encountered a '\': check for a Unicode escape sequence after it,
1056 * returning the character code value if we found a Unicode escape sequence.
1057 * Otherwise, non-destructively return the original '\'.
1059 static int32
1060 GetUnicodeEscape(JSTokenStream *ts)
1062 jschar cp[5];
1063 int32 c;
1065 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
1066 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
1067 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
1069 c = (((((JS7_UNHEX(cp[1]) << 4)
1070 + JS7_UNHEX(cp[2])) << 4)
1071 + JS7_UNHEX(cp[3])) << 4)
1072 + JS7_UNHEX(cp[4]);
1073 SkipChars(ts, 5);
1074 return c;
1076 return '\\';
1079 static JSToken *
1080 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
1082 JSToken *tp;
1084 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1085 tp = &CURRENT_TOKEN(ts);
1086 tp->ptr = ts->linebuf.ptr + adjust;
1087 tp->pos.begin.index = ts->linepos +
1088 PTRDIFF(tp->ptr, ts->linebuf.base, jschar) -
1089 ts->ungetpos;
1090 tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
1091 return tp;
1094 JSTokenType
1095 js_GetToken(JSContext *cx, JSTokenStream *ts)
1097 JSTokenType tt;
1098 int32 c, qc;
1099 JSToken *tp;
1100 JSAtom *atom;
1101 JSBool hadUnicodeEscape;
1102 const struct keyword *kw;
1103 JSBool inTarget;
1104 size_t targetLength;
1105 ptrdiff_t contentIndex;
1108 #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1109 #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar)
1110 #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1111 #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1112 ? js_AtomizeChars(cx, \
1113 TOKENBUF_BASE(), \
1114 TOKENBUF_LENGTH(), \
1115 0) \
1116 : NULL)
1117 #define ADD_TO_TOKENBUF(c) FastAppendChar(&ts->tokenbuf, (jschar) (c))
1119 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1120 #define TOKENBUF_BASE() (ts->tokenbuf.base)
1121 #define TOKENBUF_END() (ts->tokenbuf.ptr)
1122 #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1123 #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1124 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1126 /* Check for a pushed-back token resulting from mismatching lookahead. */
1127 while (ts->lookahead != 0) {
1128 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1129 ts->lookahead--;
1130 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1131 tt = CURRENT_TOKEN(ts).type;
1132 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1133 return tt;
1136 /* If there was a fatal error, keep returning TOK_ERROR. */
1137 if (ts->flags & TSF_ERROR)
1138 return TOK_ERROR;
1140 #if JS_HAS_XML_SUPPORT
1141 if (ts->flags & TSF_XMLTEXTMODE) {
1142 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1143 tp = NewToken(ts, 0);
1144 INIT_TOKENBUF();
1145 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1147 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1148 if (c == '&' && qc == '<') {
1149 if (!GetXMLEntity(cx, ts))
1150 goto error;
1151 tt = TOK_XMLTEXT;
1152 continue;
1155 if (!JS_ISXMLSPACE(c))
1156 tt = TOK_XMLTEXT;
1157 ADD_TO_TOKENBUF(c);
1159 UngetChar(ts, c);
1161 if (TOKENBUF_LENGTH() == 0) {
1162 atom = NULL;
1163 } else {
1164 atom = TOKENBUF_TO_ATOM();
1165 if (!atom)
1166 goto error;
1168 tp->pos.end.lineno = (uint16)ts->lineno;
1169 tp->t_op = JSOP_STRING;
1170 tp->t_atom = atom;
1171 goto out;
1174 if (ts->flags & TSF_XMLTAGMODE) {
1175 tp = NewToken(ts, 0);
1176 c = GetChar(ts);
1177 if (JS_ISXMLSPACE(c)) {
1178 do {
1179 c = GetChar(ts);
1180 } while (JS_ISXMLSPACE(c));
1181 UngetChar(ts, c);
1182 tt = TOK_XMLSPACE;
1183 goto out;
1186 if (c == EOF) {
1187 tt = TOK_EOF;
1188 goto out;
1191 INIT_TOKENBUF();
1192 if (JS_ISXMLNSSTART(c)) {
1193 JSBool sawColon = JS_FALSE;
1195 ADD_TO_TOKENBUF(c);
1196 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1197 if (c == ':') {
1198 int nextc;
1200 if (sawColon ||
1201 (nextc = PeekChar(ts),
1202 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1203 !JS_ISXMLNAME(nextc))) {
1204 js_ReportCompileErrorNumber(cx, ts,
1205 JSREPORT_TS |
1206 JSREPORT_ERROR,
1207 JSMSG_BAD_XML_QNAME);
1208 goto error;
1210 sawColon = JS_TRUE;
1213 ADD_TO_TOKENBUF(c);
1216 UngetChar(ts, c);
1217 atom = TOKENBUF_TO_ATOM();
1218 if (!atom)
1219 goto error;
1220 tp->t_op = JSOP_STRING;
1221 tp->t_atom = atom;
1222 tt = TOK_XMLNAME;
1223 goto out;
1226 switch (c) {
1227 case '{':
1228 if (ts->flags & TSF_XMLONLYMODE)
1229 goto bad_xml_char;
1230 tt = TOK_LC;
1231 goto out;
1233 case '=':
1234 tt = TOK_ASSIGN;
1235 goto out;
1237 case '"':
1238 case '\'':
1239 qc = c;
1240 while ((c = GetChar(ts)) != qc) {
1241 if (c == EOF) {
1242 js_ReportCompileErrorNumber(cx, ts,
1243 JSREPORT_TS | JSREPORT_ERROR,
1244 JSMSG_UNTERMINATED_STRING);
1245 goto error;
1249 * XML attribute values are double-quoted when pretty-printed,
1250 * so escape " if it is expressed directly in a single-quoted
1251 * attribute value.
1253 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1254 JS_ASSERT(qc == '\'');
1255 js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1256 continue;
1259 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1260 if (!GetXMLEntity(cx, ts))
1261 goto error;
1262 continue;
1265 ADD_TO_TOKENBUF(c);
1267 atom = TOKENBUF_TO_ATOM();
1268 if (!atom)
1269 goto error;
1270 tp->pos.end.lineno = (uint16)ts->lineno;
1271 tp->t_op = JSOP_STRING;
1272 tp->t_atom = atom;
1273 tt = TOK_XMLATTR;
1274 goto out;
1276 case '>':
1277 tt = TOK_XMLTAGC;
1278 goto out;
1280 case '/':
1281 if (MatchChar(ts, '>')) {
1282 tt = TOK_XMLPTAGC;
1283 goto out;
1285 /* FALL THROUGH */
1287 bad_xml_char:
1288 default:
1289 js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR,
1290 JSMSG_BAD_XML_CHARACTER);
1291 goto error;
1293 /* NOTREACHED */
1295 #endif /* JS_HAS_XML_SUPPORT */
1297 retry:
1298 do {
1299 c = GetChar(ts);
1300 if (c == '\n') {
1301 ts->flags &= ~TSF_DIRTYLINE;
1302 if (ts->flags & TSF_NEWLINES)
1303 break;
1305 } while (JS_ISSPACE(c));
1307 tp = NewToken(ts, -1);
1308 if (c == EOF) {
1309 tt = TOK_EOF;
1310 goto out;
1313 hadUnicodeEscape = JS_FALSE;
1314 if (JS_ISIDSTART(c) ||
1315 (c == '\\' &&
1316 (c = GetUnicodeEscape(ts),
1317 hadUnicodeEscape = JS_ISIDSTART(c)))) {
1318 INIT_TOKENBUF();
1319 for (;;) {
1320 ADD_TO_TOKENBUF(c);
1321 c = GetChar(ts);
1322 if (c == '\\') {
1323 c = GetUnicodeEscape(ts);
1324 if (!JS_ISIDENT(c))
1325 break;
1326 hadUnicodeEscape = JS_TRUE;
1327 } else {
1328 if (!JS_ISIDENT(c))
1329 break;
1332 UngetChar(ts, c);
1335 * Check for keywords unless we saw Unicode escape or parser asks
1336 * to ignore keywords.
1338 if (!hadUnicodeEscape &&
1339 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1340 TOKENBUF_OK() &&
1341 (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1342 if (kw->tokentype == TOK_RESERVED) {
1343 if (!js_ReportCompileErrorNumber(cx, ts,
1344 JSREPORT_TS |
1345 JSREPORT_WARNING |
1346 JSREPORT_STRICT,
1347 JSMSG_RESERVED_ID,
1348 kw->chars)) {
1349 goto error;
1351 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1352 tt = kw->tokentype;
1353 tp->t_op = (JSOp) kw->op;
1354 goto out;
1358 atom = TOKENBUF_TO_ATOM();
1359 if (!atom)
1360 goto error;
1361 tp->t_op = JSOP_NAME;
1362 tp->t_atom = atom;
1363 tt = TOK_NAME;
1364 goto out;
1367 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1368 jsint radix;
1369 const jschar *endptr;
1370 jsdouble dval;
1372 radix = 10;
1373 INIT_TOKENBUF();
1375 if (c == '0') {
1376 ADD_TO_TOKENBUF(c);
1377 c = GetChar(ts);
1378 if (JS_TOLOWER(c) == 'x') {
1379 ADD_TO_TOKENBUF(c);
1380 c = GetChar(ts);
1381 radix = 16;
1382 } else if (JS7_ISDEC(c)) {
1383 radix = 8;
1387 while (JS7_ISHEX(c)) {
1388 if (radix < 16) {
1389 if (JS7_ISLET(c))
1390 break;
1393 * We permit 08 and 09 as decimal numbers, which makes our
1394 * behaviour a superset of the ECMA numeric grammar. We might
1395 * not always be so permissive, so we warn about it.
1397 if (radix == 8 && c >= '8') {
1398 if (!js_ReportCompileErrorNumber(cx, ts,
1399 JSREPORT_TS |
1400 JSREPORT_WARNING,
1401 JSMSG_BAD_OCTAL,
1402 c == '8' ? "08" : "09")) {
1403 goto error;
1405 radix = 10;
1408 ADD_TO_TOKENBUF(c);
1409 c = GetChar(ts);
1412 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1413 if (c == '.') {
1414 do {
1415 ADD_TO_TOKENBUF(c);
1416 c = GetChar(ts);
1417 } while (JS7_ISDEC(c));
1419 if (JS_TOLOWER(c) == 'e') {
1420 ADD_TO_TOKENBUF(c);
1421 c = GetChar(ts);
1422 if (c == '+' || c == '-') {
1423 ADD_TO_TOKENBUF(c);
1424 c = GetChar(ts);
1426 if (!JS7_ISDEC(c)) {
1427 js_ReportCompileErrorNumber(cx, ts,
1428 JSREPORT_TS | JSREPORT_ERROR,
1429 JSMSG_MISSING_EXPONENT);
1430 goto error;
1432 do {
1433 ADD_TO_TOKENBUF(c);
1434 c = GetChar(ts);
1435 } while (JS7_ISDEC(c));
1439 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1440 UngetChar(ts, c);
1441 ADD_TO_TOKENBUF(0);
1443 if (!TOKENBUF_OK())
1444 goto error;
1445 if (radix == 10) {
1446 if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1447 &endptr, &dval)) {
1448 js_ReportCompileErrorNumber(cx, ts,
1449 JSREPORT_TS | JSREPORT_ERROR,
1450 JSMSG_OUT_OF_MEMORY);
1451 goto error;
1453 } else {
1454 if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1455 &endptr, radix, &dval)) {
1456 js_ReportCompileErrorNumber(cx, ts,
1457 JSREPORT_TS | JSREPORT_ERROR,
1458 JSMSG_OUT_OF_MEMORY);
1459 goto error;
1462 tp->t_dval = dval;
1463 tt = TOK_NUMBER;
1464 goto out;
1467 if (c == '"' || c == '\'') {
1468 qc = c;
1469 INIT_TOKENBUF();
1470 while ((c = GetChar(ts)) != qc) {
1471 if (c == '\n' || c == EOF) {
1472 UngetChar(ts, c);
1473 js_ReportCompileErrorNumber(cx, ts,
1474 JSREPORT_TS | JSREPORT_ERROR,
1475 JSMSG_UNTERMINATED_STRING);
1476 goto error;
1478 if (c == '\\') {
1479 switch (c = GetChar(ts)) {
1480 case 'b': c = '\b'; break;
1481 case 'f': c = '\f'; break;
1482 case 'n': c = '\n'; break;
1483 case 'r': c = '\r'; break;
1484 case 't': c = '\t'; break;
1485 case 'v': c = '\v'; break;
1487 default:
1488 if ('0' <= c && c < '8') {
1489 int32 val = JS7_UNDEC(c);
1491 c = PeekChar(ts);
1492 if ('0' <= c && c < '8') {
1493 val = 8 * val + JS7_UNDEC(c);
1494 GetChar(ts);
1495 c = PeekChar(ts);
1496 if ('0' <= c && c < '8') {
1497 int32 save = val;
1498 val = 8 * val + JS7_UNDEC(c);
1499 if (val <= 0377)
1500 GetChar(ts);
1501 else
1502 val = save;
1506 c = (jschar)val;
1507 } else if (c == 'u') {
1508 jschar cp[4];
1509 if (PeekChars(ts, 4, cp) &&
1510 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1511 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1512 c = (((((JS7_UNHEX(cp[0]) << 4)
1513 + JS7_UNHEX(cp[1])) << 4)
1514 + JS7_UNHEX(cp[2])) << 4)
1515 + JS7_UNHEX(cp[3]);
1516 SkipChars(ts, 4);
1518 } else if (c == 'x') {
1519 jschar cp[2];
1520 if (PeekChars(ts, 2, cp) &&
1521 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1522 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1523 SkipChars(ts, 2);
1525 } else if (c == '\n') {
1526 /* ECMA follows C by removing escaped newlines. */
1527 continue;
1529 break;
1532 ADD_TO_TOKENBUF(c);
1534 atom = TOKENBUF_TO_ATOM();
1535 if (!atom)
1536 goto error;
1537 tp->pos.end.lineno = (uint16)ts->lineno;
1538 tp->t_op = JSOP_STRING;
1539 tp->t_atom = atom;
1540 tt = TOK_STRING;
1541 goto out;
1544 switch (c) {
1545 case '\n': tt = TOK_EOL; goto eol_out;
1546 case ';': tt = TOK_SEMI; break;
1547 case '[': tt = TOK_LB; break;
1548 case ']': tt = TOK_RB; break;
1549 case '{': tt = TOK_LC; break;
1550 case '}': tt = TOK_RC; break;
1551 case '(': tt = TOK_LP; break;
1552 case ')': tt = TOK_RP; break;
1553 case ',': tt = TOK_COMMA; break;
1554 case '?': tt = TOK_HOOK; break;
1556 case '.':
1557 #if JS_HAS_XML_SUPPORT
1558 if (MatchChar(ts, c))
1559 tt = TOK_DBLDOT;
1560 else
1561 #endif
1562 tt = TOK_DOT;
1563 break;
1565 case ':':
1566 #if JS_HAS_XML_SUPPORT
1567 if (MatchChar(ts, c)) {
1568 tt = TOK_DBLCOLON;
1569 break;
1571 #endif
1573 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1574 * object initializer, likewise for setter.
1576 tp->t_op = JSOP_NOP;
1577 tt = TOK_COLON;
1578 break;
1580 case '|':
1581 if (MatchChar(ts, c)) {
1582 tt = TOK_OR;
1583 } else if (MatchChar(ts, '=')) {
1584 tp->t_op = JSOP_BITOR;
1585 tt = TOK_ASSIGN;
1586 } else {
1587 tt = TOK_BITOR;
1589 break;
1591 case '^':
1592 if (MatchChar(ts, '=')) {
1593 tp->t_op = JSOP_BITXOR;
1594 tt = TOK_ASSIGN;
1595 } else {
1596 tt = TOK_BITXOR;
1598 break;
1600 case '&':
1601 if (MatchChar(ts, c)) {
1602 tt = TOK_AND;
1603 } else if (MatchChar(ts, '=')) {
1604 tp->t_op = JSOP_BITAND;
1605 tt = TOK_ASSIGN;
1606 } else {
1607 tt = TOK_BITAND;
1609 break;
1611 case '=':
1612 if (MatchChar(ts, c)) {
1613 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1614 tt = TOK_EQOP;
1615 } else {
1616 tp->t_op = JSOP_NOP;
1617 tt = TOK_ASSIGN;
1619 break;
1621 case '!':
1622 if (MatchChar(ts, '=')) {
1623 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1624 tt = TOK_EQOP;
1625 } else {
1626 tp->t_op = JSOP_NOT;
1627 tt = TOK_UNARYOP;
1629 break;
1631 #if JS_HAS_XML_SUPPORT
1632 case '@':
1633 tt = TOK_AT;
1634 break;
1635 #endif
1637 case '<':
1638 #if JS_HAS_XML_SUPPORT
1640 * After much testing, it's clear that Postel's advice to protocol
1641 * designers ("be liberal in what you accept, and conservative in what
1642 * you send") invites a natural-law repercussion for JS as "protocol":
1644 * "If you are liberal in what you accept, others will utterly fail to
1645 * be conservative in what they send."
1647 * Which means you will get <!-- comments to end of line in the middle
1648 * of .js files, and after if conditions whose then statements are on
1649 * the next line, and other wonders. See at least the following bugs:
1650 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1651 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1652 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1654 * So without JSOPTION_XML, we never scan an XML comment or CDATA
1655 * literal. We always scan <! as the start of an HTML comment hack
1656 * to end of line, used since Netscape 2 to hide script tag content
1657 * from script-unaware browsers.
1659 if ((ts->flags & TSF_OPERAND) &&
1660 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1661 /* Check for XML comment or CDATA section. */
1662 if (MatchChar(ts, '!')) {
1663 INIT_TOKENBUF();
1665 /* Scan XML comment. */
1666 if (MatchChar(ts, '-')) {
1667 if (!MatchChar(ts, '-'))
1668 goto bad_xml_markup;
1669 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1670 if (c == EOF)
1671 goto bad_xml_markup;
1672 ADD_TO_TOKENBUF(c);
1674 tt = TOK_XMLCOMMENT;
1675 tp->t_op = JSOP_XMLCOMMENT;
1676 goto finish_xml_markup;
1679 /* Scan CDATA section. */
1680 if (MatchChar(ts, '[')) {
1681 jschar cp[6];
1682 if (PeekChars(ts, 6, cp) &&
1683 cp[0] == 'C' &&
1684 cp[1] == 'D' &&
1685 cp[2] == 'A' &&
1686 cp[3] == 'T' &&
1687 cp[4] == 'A' &&
1688 cp[5] == '[') {
1689 SkipChars(ts, 6);
1690 while ((c = GetChar(ts)) != ']' ||
1691 !PeekChars(ts, 2, cp) ||
1692 cp[0] != ']' ||
1693 cp[1] != '>') {
1694 if (c == EOF)
1695 goto bad_xml_markup;
1696 ADD_TO_TOKENBUF(c);
1698 GetChar(ts); /* discard ] but not > */
1699 tt = TOK_XMLCDATA;
1700 tp->t_op = JSOP_XMLCDATA;
1701 goto finish_xml_markup;
1703 goto bad_xml_markup;
1707 /* Check for processing instruction. */
1708 if (MatchChar(ts, '?')) {
1709 inTarget = JS_TRUE;
1710 targetLength = 0;
1711 contentIndex = -1;
1713 INIT_TOKENBUF();
1714 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1715 if (c == EOF)
1716 goto bad_xml_markup;
1717 if (inTarget) {
1718 if (JS_ISXMLSPACE(c)) {
1719 if (TOKENBUF_LENGTH() == 0)
1720 goto bad_xml_markup;
1721 inTarget = JS_FALSE;
1722 } else {
1723 if (!((TOKENBUF_LENGTH() == 0)
1724 ? JS_ISXMLNSSTART(c)
1725 : JS_ISXMLNS(c))) {
1726 goto bad_xml_markup;
1728 ++targetLength;
1730 } else {
1731 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1732 contentIndex = TOKENBUF_LENGTH();
1734 ADD_TO_TOKENBUF(c);
1736 if (targetLength == 0)
1737 goto bad_xml_markup;
1738 if (!TOKENBUF_OK())
1739 goto error;
1740 if (contentIndex < 0) {
1741 atom = cx->runtime->atomState.emptyAtom;
1742 } else {
1743 atom = js_AtomizeChars(cx,
1744 &TOKENBUF_CHAR(contentIndex),
1745 TOKENBUF_LENGTH() - contentIndex,
1747 if (!atom)
1748 goto error;
1750 TRIM_TOKENBUF(targetLength);
1751 tp->t_atom2 = atom;
1752 tt = TOK_XMLPI;
1754 finish_xml_markup:
1755 if (!MatchChar(ts, '>'))
1756 goto bad_xml_markup;
1757 atom = TOKENBUF_TO_ATOM();
1758 if (!atom)
1759 goto error;
1760 tp->t_atom = atom;
1761 tp->pos.end.lineno = (uint16)ts->lineno;
1762 goto out;
1765 /* An XML start-of-tag character. */
1766 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1767 goto out;
1769 bad_xml_markup:
1770 js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR,
1771 JSMSG_BAD_XML_MARKUP);
1772 goto error;
1774 #endif /* JS_HAS_XML_SUPPORT */
1776 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1777 if (MatchChar(ts, '!')) {
1778 if (MatchChar(ts, '-')) {
1779 if (MatchChar(ts, '-')) {
1780 ts->flags |= TSF_IN_HTML_COMMENT;
1781 goto skipline;
1783 UngetChar(ts, '-');
1785 UngetChar(ts, '!');
1787 if (MatchChar(ts, c)) {
1788 tp->t_op = JSOP_LSH;
1789 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1790 } else {
1791 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1792 tt = TOK_RELOP;
1794 break;
1796 case '>':
1797 if (MatchChar(ts, c)) {
1798 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1799 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1800 } else {
1801 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1802 tt = TOK_RELOP;
1804 break;
1806 case '*':
1807 tp->t_op = JSOP_MUL;
1808 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1809 break;
1811 case '/':
1812 if (MatchChar(ts, '/')) {
1814 * Hack for source filters such as the Mozilla XUL preprocessor:
1815 * "//@line 123\n" sets the number of the *next* line after the
1816 * comment to 123.
1818 if (JS_HAS_ATLINE_OPTION(cx)) {
1819 jschar cp[5];
1820 uintN i, line, temp;
1821 char filename[1024];
1823 if (PeekChars(ts, 5, cp) &&
1824 cp[0] == '@' &&
1825 cp[1] == 'l' &&
1826 cp[2] == 'i' &&
1827 cp[3] == 'n' &&
1828 cp[4] == 'e') {
1829 SkipChars(ts, 5);
1830 while ((c = GetChar(ts)) != '\n' && JS_ISSPACE(c))
1831 continue;
1832 if (JS7_ISDEC(c)) {
1833 line = JS7_UNDEC(c);
1834 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1835 temp = 10 * line + JS7_UNDEC(c);
1836 if (temp < line) {
1837 /* Ignore overlarge line numbers. */
1838 goto skipline;
1840 line = temp;
1842 while (c != '\n' && JS_ISSPACE(c))
1843 c = GetChar(ts);
1844 i = 0;
1845 if (c == '"') {
1846 while ((c = GetChar(ts)) != EOF && c != '"') {
1847 if (c == '\n') {
1848 UngetChar(ts, c);
1849 goto skipline;
1851 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1852 goto skipline;
1853 filename[i++] = (char) c;
1855 if (c == '"') {
1856 while ((c = GetChar(ts)) != '\n' &&
1857 JS_ISSPACE(c)) {
1858 continue;
1862 filename[i] = '\0';
1863 if (c == '\n') {
1864 if (i > 0) {
1865 if (ts->flags & TSF_OWNFILENAME)
1866 JS_free(cx, (void *) ts->filename);
1867 ts->filename = JS_strdup(cx, filename);
1868 if (!ts->filename)
1869 goto error;
1870 ts->flags |= TSF_OWNFILENAME;
1872 ts->lineno = line;
1875 UngetChar(ts, c);
1879 skipline:
1880 /* Optimize line skipping if we are not in an HTML comment. */
1881 if (ts->flags & TSF_IN_HTML_COMMENT) {
1882 while ((c = GetChar(ts)) != EOF && c != '\n') {
1883 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1884 ts->flags &= ~TSF_IN_HTML_COMMENT;
1886 } else {
1887 while ((c = GetChar(ts)) != EOF && c != '\n')
1888 continue;
1890 UngetChar(ts, c);
1891 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1892 goto retry;
1895 if (MatchChar(ts, '*')) {
1896 while ((c = GetChar(ts)) != EOF &&
1897 !(c == '*' && MatchChar(ts, '/'))) {
1898 /* Ignore all characters until comment close. */
1900 if (c == EOF) {
1901 js_ReportCompileErrorNumber(cx, ts,
1902 JSREPORT_TS | JSREPORT_ERROR,
1903 JSMSG_UNTERMINATED_COMMENT);
1904 goto error;
1906 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1907 goto retry;
1910 if (ts->flags & TSF_OPERAND) {
1911 JSObject *obj;
1912 uintN flags;
1913 JSBool inCharClass = JS_FALSE;
1914 JSParsedObjectBox *regexpPob;
1916 INIT_TOKENBUF();
1917 for (;;) {
1918 c = GetChar(ts);
1919 if (c == '\n' || c == EOF) {
1920 UngetChar(ts, c);
1921 js_ReportCompileErrorNumber(cx, ts,
1922 JSREPORT_TS | JSREPORT_ERROR,
1923 JSMSG_UNTERMINATED_REGEXP);
1924 goto error;
1926 if (c == '\\') {
1927 ADD_TO_TOKENBUF(c);
1928 c = GetChar(ts);
1929 } else if (c == '[') {
1930 inCharClass = JS_TRUE;
1931 } else if (c == ']') {
1932 inCharClass = JS_FALSE;
1933 } else if (c == '/' && !inCharClass) {
1934 /* For compat with IE, allow unescaped / in char classes. */
1935 break;
1937 ADD_TO_TOKENBUF(c);
1939 for (flags = 0; ; ) {
1940 c = PeekChar(ts);
1941 if (c == 'g')
1942 flags |= JSREG_GLOB;
1943 else if (c == 'i')
1944 flags |= JSREG_FOLD;
1945 else if (c == 'm')
1946 flags |= JSREG_MULTILINE;
1947 else if (c == 'y')
1948 flags |= JSREG_STICKY;
1949 else
1950 break;
1951 GetChar(ts);
1953 c = PeekChar(ts);
1954 if (JS7_ISLET(c)) {
1955 tp->ptr = ts->linebuf.ptr - 1;
1956 js_ReportCompileErrorNumber(cx, ts,
1957 JSREPORT_TS | JSREPORT_ERROR,
1958 JSMSG_BAD_REGEXP_FLAG);
1959 (void) GetChar(ts);
1960 goto error;
1962 /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1963 if (!TOKENBUF_OK())
1964 goto error;
1965 NUL_TERM_TOKENBUF();
1966 obj = js_NewRegExpObject(cx, ts,
1967 TOKENBUF_BASE(),
1968 TOKENBUF_LENGTH(),
1969 flags);
1970 if (!obj)
1971 goto error;
1973 regexpPob = js_NewParsedObjectBox(cx, ts->parseContext, obj);
1974 if (!regexpPob)
1975 goto error;
1978 * If the regexp's script is one-shot, we can avoid the extra
1979 * fork-on-exec costs of JSOP_REGEXP by selecting JSOP_OBJECT.
1980 * Otherwise, to avoid incorrect proto, parent, and lastIndex
1981 * sharing among threads and sequentially across re-execution,
1982 * select JSOP_REGEXP.
1984 tp->t_op = (cx->fp->flags & (JSFRAME_EVAL | JSFRAME_COMPILE_N_GO))
1985 ? JSOP_OBJECT
1986 : JSOP_REGEXP;
1987 tp->t_pob = regexpPob;
1988 tt = TOK_OBJECT;
1989 break;
1992 tp->t_op = JSOP_DIV;
1993 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1994 break;
1996 case '%':
1997 tp->t_op = JSOP_MOD;
1998 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1999 break;
2001 case '~':
2002 tp->t_op = JSOP_BITNOT;
2003 tt = TOK_UNARYOP;
2004 break;
2006 case '+':
2007 if (MatchChar(ts, '=')) {
2008 tp->t_op = JSOP_ADD;
2009 tt = TOK_ASSIGN;
2010 } else if (MatchChar(ts, c)) {
2011 tt = TOK_INC;
2012 } else {
2013 tp->t_op = JSOP_POS;
2014 tt = TOK_PLUS;
2016 break;
2018 case '-':
2019 if (MatchChar(ts, '=')) {
2020 tp->t_op = JSOP_SUB;
2021 tt = TOK_ASSIGN;
2022 } else if (MatchChar(ts, c)) {
2023 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
2024 ts->flags &= ~TSF_IN_HTML_COMMENT;
2025 goto skipline;
2027 tt = TOK_DEC;
2028 } else {
2029 tp->t_op = JSOP_NEG;
2030 tt = TOK_MINUS;
2032 break;
2034 #if JS_HAS_SHARP_VARS
2035 case '#':
2037 uint32 n;
2039 c = GetChar(ts);
2040 if (!JS7_ISDEC(c)) {
2041 UngetChar(ts, c);
2042 goto badchar;
2044 n = (uint32)JS7_UNDEC(c);
2045 for (;;) {
2046 c = GetChar(ts);
2047 if (!JS7_ISDEC(c))
2048 break;
2049 n = 10 * n + JS7_UNDEC(c);
2050 if (n >= UINT16_LIMIT) {
2051 js_ReportCompileErrorNumber(cx, ts,
2052 JSREPORT_TS | JSREPORT_ERROR,
2053 JSMSG_SHARPVAR_TOO_BIG);
2054 goto error;
2057 tp->t_dval = (jsdouble) n;
2058 if (JS_HAS_STRICT_OPTION(cx) &&
2059 (c == '=' || c == '#')) {
2060 char buf[20];
2061 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
2062 if (!js_ReportCompileErrorNumber(cx, ts,
2063 JSREPORT_TS |
2064 JSREPORT_WARNING |
2065 JSREPORT_STRICT,
2066 JSMSG_DEPRECATED_USAGE,
2067 buf)) {
2068 goto error;
2071 if (c == '=')
2072 tt = TOK_DEFSHARP;
2073 else if (c == '#')
2074 tt = TOK_USESHARP;
2075 else
2076 goto badchar;
2077 break;
2079 #endif /* JS_HAS_SHARP_VARS */
2081 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
2082 badchar:
2083 #endif
2085 default:
2086 js_ReportCompileErrorNumber(cx, ts, JSREPORT_TS | JSREPORT_ERROR,
2087 JSMSG_ILLEGAL_CHARACTER);
2088 goto error;
2091 out:
2092 JS_ASSERT(tt != TOK_EOL);
2093 ts->flags |= TSF_DIRTYLINE;
2095 eol_out:
2096 if (!STRING_BUFFER_OK(&ts->tokenbuf))
2097 tt = TOK_ERROR;
2098 JS_ASSERT(tt < TOK_LIMIT);
2099 tp->pos.end.index = ts->linepos +
2100 PTRDIFF(ts->linebuf.ptr, ts->linebuf.base, jschar) -
2101 ts->ungetpos;
2102 tp->type = tt;
2103 return tt;
2105 error:
2106 tt = TOK_ERROR;
2107 ts->flags |= TSF_ERROR;
2108 goto out;
2110 #undef INIT_TOKENBUF
2111 #undef TOKENBUF_LENGTH
2112 #undef TOKENBUF_OK
2113 #undef TOKENBUF_TO_ATOM
2114 #undef ADD_TO_TOKENBUF
2115 #undef TOKENBUF_BASE
2116 #undef TOKENBUF_CHAR
2117 #undef TRIM_TOKENBUF
2118 #undef NUL_TERM_TOKENBUF
2121 void
2122 js_UngetToken(JSTokenStream *ts)
2124 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
2125 ts->lookahead++;
2126 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
2129 JSBool
2130 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2132 if (js_GetToken(cx, ts) == tt)
2133 return JS_TRUE;
2134 js_UngetToken(ts);
2135 return JS_FALSE;