js/src/jsscan.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2  * vim: set sw=4 ts=8 et tw=78:
   3  *
   4  * ***** BEGIN LICENSE BLOCK *****
   5  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version
   8  * 1.1 (the "License"); you may not use this file except in compliance with
   9  * the License. You may obtain a copy of the License at
  10  * http://www.mozilla.org/MPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS IS" basis,
  13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14  * for the specific language governing rights and limitations under the
  15  * License.
  16  *
  17  * The Original Code is Mozilla Communicator client code, released
  18  * March 31, 1998.
  19  *
  20  * The Initial Developer of the Original Code is
  21  * Netscape Communications Corporation.
  22  * Portions created by the Initial Developer are Copyright (C) 1998
  23  * the Initial Developer. All Rights Reserved.
  24  *
  25  * Contributor(s):
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either of the GNU General Public License Version 2 or later (the "GPL"),
  29  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41 /*
  42  * JS lexical scanner.
  43  */
  44 #include <stdio.h>      /* first to avoid trouble on some systems */
  45 #include <errno.h>
  46 #include <limits.h>
  47 #include <math.h>
  48 #ifdef HAVE_MEMORY_H
  49 #include <memory.h>
  50 #endif
  51 #include <stdarg.h>
  52 #include <stdlib.h>
  53 #include <string.h>
  54 #include "jstypes.h"
  55 #include "jsstdint.h"
  56 #include "jsarena.h" /* Added by JSIFY */
  57 #include "jsbit.h"
  58 #include "jsutil.h" /* Added by JSIFY */
  59 #include "jsdtoa.h"
  60 #include "jsprf.h"
  61 #include "jsapi.h"
  62 #include "jsatom.h"
  63 #include "jscntxt.h"
  64 #include "jsversion.h"
  65 #include "jsemit.h"
  66 #include "jsexn.h"
  67 #include "jsnum.h"
  68 #include "jsopcode.h"
  69 #include "jsparse.h"
  70 #include "jsregexp.h"
  71 #include "jsscan.h"
  72 #include "jsscript.h"
  73 #include "jsstaticcheck.h"
  74 #include "jsvector.h"
  75
  76 #if JS_HAS_XML_SUPPORT
  77 #include "jsxml.h"
  78 #endif
  79
  80 using namespace js;
  81
  82 #define JS_KEYWORD(keyword, type, op, version) \
  83     const char js_##keyword##_str[] = #keyword;
  84 #include "jskeyword.tbl"
  85 #undef JS_KEYWORD
  86
  87 struct keyword {
  88     const char  *chars;         /* C string with keyword text */
  89     TokenKind   tokentype;
  90     JSOp        op;             /* JSOp */
  91     JSVersion   version;        /* JSVersion */
  92 };
  93
  94 static const struct keyword keyword_defs[] = {
  95 #define JS_KEYWORD(keyword, type, op, version) \
  96     {js_##keyword##_str, type, op, version},
  97 #include "jskeyword.tbl"
  98 #undef JS_KEYWORD
  99 };
 100
 101 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
 102
 103 static const struct keyword *
 104 FindKeyword(const jschar *s, size_t length)
 105 {
 106     register size_t i;
 107     const struct keyword *kw;
 108     const char *chars;
 109
 110     JS_ASSERT(length != 0);
 111
 112 #define JSKW_LENGTH()           length
 113 #define JSKW_AT(column)         s[column]
 114 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
 115 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
 116 #define JSKW_NO_MATCH()         goto no_match;
 117 #include "jsautokw.h"
 118 #undef JSKW_NO_MATCH
 119 #undef JSKW_TEST_GUESS
 120 #undef JSKW_GOT_MATCH
 121 #undef JSKW_AT
 122 #undef JSKW_LENGTH
 123
 124   got_match:
 125     return &keyword_defs[i];
 126
 127   test_guess:
 128     kw = &keyword_defs[i];
 129     chars = kw->chars;
 130     do {
 131         if (*s++ != (unsigned char)(*chars++))
 132             goto no_match;
 133     } while (--length != 0);
 134     return kw;
 135
 136   no_match:
 137     return NULL;
 138 }
 139
 140 TokenKind
 141 js_CheckKeyword(const jschar *str, size_t length)
 142 {
 143     const struct keyword *kw;
 144
 145     JS_ASSERT(length != 0);
 146     kw = FindKeyword(str, length);
 147     return kw ? kw->tokentype : TOK_EOF;
 148 }
 149
 150 JSBool
 151 js_IsIdentifier(JSString *str)
 152 {
 153     size_t length;
 154     jschar c;
 155     const jschar *chars, *end;
 156
 157     str->getCharsAndLength(chars, length);
 158     if (length == 0)
 159         return JS_FALSE;
 160     c = *chars;
 161     if (!JS_ISIDSTART(c))
 162         return JS_FALSE;
 163     end = chars + length;
 164     while (++chars != end) {
 165         c = *chars;
 166         if (!JS_ISIDENT(c))
 167             return JS_FALSE;
 168     }
 169     return JS_TRUE;
 170 }
 171
 172 #ifdef _MSC_VER
 173 #pragma warning(push)
 174 #pragma warning(disable:4351)
 175 #endif
 176
 177 /* Initialize members that aren't initialized in |init|. */
 178 TokenStream::TokenStream(JSContext *cx)
 179   : cx(cx), tokens(), cursor(), lookahead(), ungetpos(), ungetbuf(), flags(),
 180     linepos(), lineposNext(), file(), listenerTSData(), tokenbuf(cx)
 181 {}
 182
 183 #ifdef _MSC_VER
 184 #pragma warning(pop)
 185 #endif
 186
 187 bool
 188 TokenStream::init(const jschar *base, size_t length, FILE *fp, const char *fn, uintN ln)
 189 {
 190     jschar *buf;
 191
 192     JS_ASSERT_IF(fp, !base);
 193     JS_ASSERT_IF(!base, length == 0);
 194     size_t nb = fp
 195          ? 2 * LINE_LIMIT * sizeof(jschar)
 196          : LINE_LIMIT * sizeof(jschar);
 197     cx->tempPool.allocateCast<jschar *>(buf, nb);
 198     if (!buf) {
 199         js_ReportOutOfScriptQuota(cx);
 200         return false;
 201     }
 202     memset(buf, 0, nb);
 203
 204     /* Initialize members. */
 205     filename = fn;
 206     lineno = ln;
 207     linebuf.base = linebuf.limit = linebuf.ptr = buf;
 208     if (fp) {
 209         file = fp;
 210         userbuf.base = buf + LINE_LIMIT;
 211         userbuf.ptr = userbuf.limit = userbuf.base + LINE_LIMIT;
 212     } else {
 213         userbuf.base = (jschar *)base;
 214         userbuf.limit = (jschar *)base + length;
 215         userbuf.ptr = (jschar *)base;
 216     }
 217     listener = cx->debugHooks->sourceHandler;
 218     listenerData = cx->debugHooks->sourceHandlerData;
 219     return true;
 220 }
 221
 222 void
 223 TokenStream::close()
 224 {
 225     if (flags & TSF_OWNFILENAME)
 226         cx->free((void *) filename);
 227 }
 228
 229 /* Use the fastest available getc. */
 230 #if defined(HAVE_GETC_UNLOCKED)
 231 # define fast_getc getc_unlocked
 232 #elif defined(HAVE__GETC_NOLOCK)
 233 # define fast_getc _getc_nolock
 234 #else
 235 # define fast_getc getc
 236 #endif
 237
 238 JS_FRIEND_API(int)
 239 js_fgets(char *buf, int size, FILE *file)
 240 {
 241     int n, i, c;
 242     JSBool crflag;
 243
 244     n = size - 1;
 245     if (n < 0)
 246         return -1;
 247
 248     crflag = JS_FALSE;
 249     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
 250         buf[i] = c;
 251         if (c == '\n') {        /* any \n ends a line */
 252             i++;                /* keep the \n; we know there is room for \0 */
 253             break;
 254         }
 255         if (crflag) {           /* \r not followed by \n ends line at the \r */
 256             ungetc(c, file);
 257             break;              /* and overwrite c in buf with \0 */
 258         }
 259         crflag = (c == '\r');
 260     }
 261
 262     buf[i] = '\0';
 263     return i;
 264 }
 265
 266 /*
 267  * Nb: This does *not* append a terminating '\0'.  Returns the number of chars
 268  * read from the file.
 269  */
 270 int
 271 TokenStream::fillUserbuf()
 272 {
 273     /*
 274      * We avoid splitting a \r\n pair, because this makes things much easier
 275      * for getChar().  To do this, we only try to fill userbuf up with
 276      * LINE_LIMIT-1 chars.  Once we've reached that number, if the last one is
 277      * \r then we check if the following one is \n;  if so we get it too,
 278      * knowing that we have space for it.
 279      */
 280     jschar *buf = userbuf.base;
 281     int n = LINE_LIMIT - 1;     /* reserve space for \n following a \r */
 282     JS_ASSERT(n > 0);
 283     int i;
 284     i = 0;
 285     while (true) {
 286         int c = fast_getc(file);
 287         if (c == EOF)
 288             break;
 289         buf[i] = (jschar) (unsigned char) c;
 290         i++;
 291
 292         if (i == n) {
 293             if (buf[i - 1] == '\r') {
 294                 /* Look for a following \n.  We know we have space in buf for it. */
 295                 c = fast_getc(file);
 296                 if (c == EOF)
 297                     break;
 298                 if (c == '\n') {
 299                     buf[i] = (jschar) (unsigned char) c;
 300                     i++;
 301                     break;
 302                 }
 303                 ungetc(c, file);    /* \r wasn't followed by \n, unget */
 304             }
 305             break;
 306         }
 307     }
 308     return i;
 309 }
 310
 311 int32
 312 TokenStream::getCharFillLinebuf()
 313 {
 314     ptrdiff_t ulen = userbuf.limit - userbuf.ptr;
 315     if (ulen <= 0) {
 316         if (!file) {
 317             flags |= TSF_EOF;
 318             return EOF;
 319         }
 320
 321         /* Fill userbuf so that \r and \r\n convert to \n. */
 322         ulen = fillUserbuf();
 323         JS_ASSERT(ulen >= 0);
 324         if (ulen == 0) {
 325             flags |= TSF_EOF;
 326             return EOF;
 327         }
 328         userbuf.limit = userbuf.base + ulen;
 329         userbuf.ptr = userbuf.base;
 330     }
 331     if (listener)
 332         listener(filename, lineno, userbuf.ptr, ulen, &listenerTSData, listenerData);
 333
 334     /*
 335      * Copy from userbuf to linebuf.  Stop when any of these happen:
 336      * (a) we reach the end of userbuf;
 337      * (b) we reach the end of linebuf;
 338      * (c) we hit an EOL.
 339      *
 340      * "EOL" means any of: \r, \n, \r\n, or the Unicode line and paragraph
 341      * separators.
 342      */
 343     jschar *from = userbuf.ptr;
 344     jschar *to = linebuf.base;
 345
 346     int llenAdjust = 0;
 347     int limit = JS_MIN(size_t(ulen), LINE_LIMIT);
 348     int i = 0;
 349     while (i < limit) {
 350         /* Copy the jschar from userbuf to linebuf. */
 351         jschar d = to[i] = from[i];
 352         i++;
 353
 354         /*
 355          * Normalize the copied jschar if it was a newline.  Try to
 356          * prevent multiple tests on most characters by first
 357          * filtering out characters that aren't 000x or 202x.
 358          */
 359         if ((d & 0xDFD0) == 0) {
 360             if (d == '\n') {
 361                 break;
 362             }
 363
 364             if (d == '\r') {
 365                 to[i - 1] = '\n';       /* overwrite with '\n' */
 366                 if (i < ulen && from[i] == '\n') {
 367                     i++;                /* skip over '\n' */
 368                     llenAdjust = -1;
 369                 }
 370                 break;
 371             }
 372
 373             if (d == LINE_SEPARATOR || d == PARA_SEPARATOR) {
 374                 to[i - 1] = '\n';       /* overwrite with '\n' */
 375                 break;
 376             }
 377         }
 378     }
 379
 380     /* At this point 'i' is the index one past the last char copied. */
 381     ulen = i;
 382     userbuf.ptr += ulen;
 383
 384     /* Reset linebuf based on normalized length. */
 385     linebuf.ptr = linebuf.base;
 386     linebuf.limit = linebuf.base + ulen + llenAdjust;
 387
 388     /* Update position of linebuf within physical userbuf line. */
 389     linepos = lineposNext;
 390     if (linebuf.limit[-1] == '\n')
 391         lineposNext = 0;
 392     else
 393         lineposNext += ulen;
 394
 395     return *linebuf.ptr++;
 396 }
 397
 398 /*
 399  * This gets the next char, normalizing all EOL sequences to '\n' as it goes.
 400  */
 401 int32
 402 TokenStream::getChar()
 403 {
 404     int32 c;
 405     if (ungetpos != 0) {
 406         c = ungetbuf[--ungetpos];
 407     } else if (linebuf.ptr == linebuf.limit) {
 408         c = getCharFillLinebuf();
 409     } else {
 410         c = *linebuf.ptr++;
 411     }
 412     if (c == '\n')
 413         lineno++;
 414     return c;
 415 }
 416
 417 void
 418 TokenStream::ungetChar(int32 c)
 419 {
 420     if (c == EOF)
 421         return;
 422     JS_ASSERT(ungetpos < JS_ARRAY_LENGTH(ungetbuf));
 423     if (c == '\n')
 424         lineno--;
 425     ungetbuf[ungetpos++] = (jschar)c;
 426 }
 427
 428 /*
 429  * Peek n chars ahead into ts.  Return true if n chars were read, false if
 430  * there weren't enough characters in the input stream.  This function cannot
 431  * be used to peek into or past a newline.
 432  */
 433 JSBool
 434 TokenStream::peekChars(intN n, jschar *cp)
 435 {
 436     intN i, j;
 437     int32 c;
 438
 439     for (i = 0; i < n; i++) {
 440         c = getChar();
 441         if (c == EOF)
 442             break;
 443         if (c == '\n') {
 444             ungetChar(c);
 445             break;
 446         }
 447         cp[i] = (jschar)c;
 448     }
 449     for (j = i - 1; j >= 0; j--)
 450         ungetChar(cp[j]);
 451     return i == n;
 452 }
 453
 454 bool
 455 TokenStream::reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber,
 456                                         va_list ap)
 457 {
 458     JSErrorReport report;
 459     char *message;
 460     size_t linelength;
 461     jschar *linechars;
 462     char *linebytes;
 463     bool warning;
 464     JSBool ok;
 465     TokenPos *tp;
 466     uintN index, i;
 467     JSErrorReporter onError;
 468
 469     JS_ASSERT(linebuf.limit <= linebuf.base + LINE_LIMIT);
 470
 471     if (JSREPORT_IS_STRICT(flags) && !JS_HAS_STRICT_OPTION(cx))
 472         return JS_TRUE;
 473
 474     warning = JSREPORT_IS_WARNING(flags);
 475     if (warning && JS_HAS_WERROR_OPTION(cx)) {
 476         flags &= ~JSREPORT_WARNING;
 477         warning = false;
 478     }
 479
 480     PodZero(&report);
 481     report.flags = flags;
 482     report.errorNumber = errorNumber;
 483     message = NULL;
 484     linechars = NULL;
 485     linebytes = NULL;
 486
 487     MUST_FLOW_THROUGH("out");
 488     ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
 489                                  errorNumber, &message, &report,
 490                                  !(flags & JSREPORT_UC), ap);
 491     if (!ok) {
 492         warning = false;
 493         goto out;
 494     }
 495
 496     report.filename = filename;
 497
 498     if (pn) {
 499         report.lineno = pn->pn_pos.begin.lineno;
 500         if (report.lineno != lineno)
 501             goto report;
 502         tp = &pn->pn_pos;
 503     } else {
 504         /* Point to the current token, not the next one to get. */
 505         tp = &tokens[cursor].pos;
 506     }
 507     report.lineno = lineno;
 508     linelength = linebuf.limit - linebuf.base;
 509     linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
 510     if (!linechars) {
 511         warning = false;
 512         goto out;
 513     }
 514     memcpy(linechars, linebuf.base, linelength * sizeof(jschar));
 515     linechars[linelength] = 0;
 516     linebytes = js_DeflateString(cx, linechars, linelength);
 517     if (!linebytes) {
 518         warning = false;
 519         goto out;
 520     }
 521     report.linebuf = linebytes;
 522
 523     /*
 524      * FIXME: What should instead happen here is that we should
 525      * find error-tokens in userbuf, if !file.  That will
 526      * allow us to deliver a more helpful error message, which
 527      * includes all or part of the bad string or bad token.  The
 528      * code here yields something that looks truncated.
 529      * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
 530      */
 531     index = 0;
 532     if (tp->begin.lineno == tp->end.lineno) {
 533         if (tp->begin.index < linepos)
 534             goto report;
 535
 536         index = tp->begin.index - linepos;
 537     }
 538
 539     report.tokenptr = report.linebuf + index;
 540     report.uclinebuf = linechars;
 541     report.uctokenptr = report.uclinebuf + index;
 542
 543     /*
 544      * If there's a runtime exception type associated with this error
 545      * number, set that as the pending exception.  For errors occuring at
 546      * compile time, this is very likely to be a JSEXN_SYNTAXERR.
 547      *
 548      * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
 549      * flag will be set in report.flags.  Proper behavior for an error
 550      * reporter is to ignore a report with this flag for all but top-level
 551      * compilation errors.  The exception will remain pending, and so long
 552      * as the non-top-level "load", "eval", or "compile" native function
 553      * returns false, the top-level reporter will eventually receive the
 554      * uncaught exception report.
 555      *
 556      * XXX it'd probably be best if there was only one call to this
 557      * function, but there seem to be two error reporter call points.
 558      */
 559   report:
 560     onError = cx->errorReporter;
 561
 562     /*
 563      * Try to raise an exception only if there isn't one already set --
 564      * otherwise the exception will describe the last compile-time error,
 565      * which is likely spurious.
 566      */
 567     if (!(flags & TSF_ERROR)) {
 568         if (js_ErrorToException(cx, message, &report, NULL, NULL))
 569             onError = NULL;
 570     }
 571
 572     /*
 573      * Suppress any compile-time errors that don't occur at the top level.
 574      * This may still fail, as interplevel may be zero in contexts where we
 575      * don't really want to call the error reporter, as when js is called
 576      * by other code which could catch the error.
 577      */
 578     if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
 579         onError = NULL;
 580
 581     if (onError) {
 582         JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
 583
 584         /*
 585          * If debugErrorHook is present then we give it a chance to veto
 586          * sending the error on to the regular error reporter.
 587          */
 588         if (hook && !hook(cx, message, &report,
 589                           cx->debugHooks->debugErrorHookData)) {
 590             onError = NULL;
 591         }
 592     }
 593     if (onError)
 594         (*onError)(cx, message, &report);
 595
 596   out:
 597     if (linebytes)
 598         cx->free(linebytes);
 599     if (linechars)
 600         cx->free(linechars);
 601     if (message)
 602         cx->free(message);
 603     if (report.ucmessage)
 604         cx->free((void *)report.ucmessage);
 605
 606     if (report.messageArgs) {
 607         if (!(flags & JSREPORT_UC)) {
 608             i = 0;
 609             while (report.messageArgs[i])
 610                 cx->free((void *)report.messageArgs[i++]);
 611         }
 612         cx->free((void *)report.messageArgs);
 613     }
 614
 615     if (!JSREPORT_IS_WARNING(flags)) {
 616         /* Set the error flag to suppress spurious reports. */
 617         flags |= TSF_ERROR;
 618     }
 619
 620     return warning;
 621 }
 622
 623 bool
 624 js::ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
 625                           uintN errorNumber, ...)
 626 {
 627     JS_ASSERT(ts || tc);
 628     JS_ASSERT(cx == ts->getContext());
 629
 630     /* In strict mode code, this is an error, not just a warning. */
 631     uintN flags;
 632     if ((tc && tc->flags & TCF_STRICT_MODE_CODE) || (ts && ts->isStrictMode()))
 633         flags = JSREPORT_ERROR;
 634     else if (JS_HAS_STRICT_OPTION(cx))
 635         flags = JSREPORT_WARNING;
 636     else
 637         return true;
 638
 639     va_list ap;
 640     va_start(ap, errorNumber);
 641     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 642     va_end(ap);
 643
 644     return result;
 645 }
 646
 647 bool
 648 js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn,
 649                              uintN flags, uintN errorNumber, ...)
 650 {
 651     va_list ap;
 652
 653     /*
 654      * We don't accept a JSTreeContext argument, so we can't implement
 655      * JSREPORT_STRICT_MODE_ERROR here.  Use ReportStrictModeError instead,
 656      * or do the checks in the caller and pass plain old JSREPORT_ERROR.
 657      */
 658     JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
 659
 660     va_start(ap, errorNumber);
 661     JS_ASSERT(cx == ts->getContext());
 662     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 663     va_end(ap);
 664
 665     return result;
 666 }
 667
 668 #if JS_HAS_XML_SUPPORT
 669
 670 JSBool
 671 TokenStream::getXMLEntity()
 672 {
 673     ptrdiff_t offset, length, i;
 674     int c, d;
 675     JSBool ispair;
 676     jschar *bp, digit;
 677     char *bytes;
 678     JSErrNum msg;
 679
 680     JSCharBuffer &tb = tokenbuf;
 681
 682     /* Put the entity, including the '&' already scanned, in tokenbuf. */
 683     offset = tb.length();
 684     if (!tb.append('&'))
 685         return JS_FALSE;
 686     while ((c = getChar()) != ';') {
 687         if (c == EOF || c == '\n') {
 688             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
 689             return JS_FALSE;
 690         }
 691         if (!tb.append(c))
 692             return JS_FALSE;
 693     }
 694
 695     /* Let length be the number of jschars after the '&', including the ';'. */
 696     length = tb.length() - offset;
 697     bp = tb.begin() + offset;
 698     c = d = 0;
 699     ispair = JS_FALSE;
 700     if (length > 2 && bp[1] == '#') {
 701         /* Match a well-formed XML Character Reference. */
 702         i = 2;
 703         if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
 704             if (length > 9)     /* at most 6 hex digits allowed */
 705                 goto badncr;
 706             while (++i < length) {
 707                 digit = bp[i];
 708                 if (!JS7_ISHEX(digit))
 709                     goto badncr;
 710                 c = (c << 4) + JS7_UNHEX(digit);
 711             }
 712         } else {
 713             while (i < length) {
 714                 digit = bp[i++];
 715                 if (!JS7_ISDEC(digit))
 716                     goto badncr;
 717                 c = (c * 10) + JS7_UNDEC(digit);
 718                 if (c < 0)
 719                     goto badncr;
 720             }
 721         }
 722
 723         if (0x10000 <= c && c <= 0x10FFFF) {
 724             /* Form a surrogate pair (c, d) -- c is the high surrogate. */
 725             d = 0xDC00 + (c & 0x3FF);
 726             c = 0xD7C0 + (c >> 10);
 727             ispair = JS_TRUE;
 728         } else {
 729             /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
 730             if (c != 0x9 && c != 0xA && c != 0xD &&
 731                 !(0x20 <= c && c <= 0xD7FF) &&
 732                 !(0xE000 <= c && c <= 0xFFFD)) {
 733                 goto badncr;
 734             }
 735         }
 736     } else {
 737         /* Try to match one of the five XML 1.0 predefined entities. */
 738         switch (length) {
 739           case 3:
 740             if (bp[2] == 't') {
 741                 if (bp[1] == 'l')
 742                     c = '<';
 743                 else if (bp[1] == 'g')
 744                     c = '>';
 745             }
 746             break;
 747           case 4:
 748             if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
 749                 c = '&';
 750             break;
 751           case 5:
 752             if (bp[3] == 'o') {
 753                 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
 754                     c = '\'';
 755                 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
 756                     c = '"';
 757             }
 758             break;
 759         }
 760         if (c == 0) {
 761             msg = JSMSG_UNKNOWN_XML_ENTITY;
 762             goto bad;
 763         }
 764     }
 765
 766     /* If we matched, retract tokenbuf and store the entity's value. */
 767     *bp++ = (jschar) c;
 768     if (ispair)
 769         *bp++ = (jschar) d;
 770     tb.shrinkBy(tb.end() - bp);
 771     return JS_TRUE;
 772
 773   badncr:
 774     msg = JSMSG_BAD_XML_NCR;
 775   bad:
 776     /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
 777     JS_ASSERT((tb.end() - bp) >= 1);
 778     bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
 779     if (bytes) {
 780         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
 781         cx->free(bytes);
 782     }
 783     return JS_FALSE;
 784 }
 785
 786 #endif /* JS_HAS_XML_SUPPORT */
 787
 788 /*
 789  * We have encountered a '\': check for a Unicode escape sequence after it,
 790  * returning the character code value if we found a Unicode escape sequence.
 791  * Otherwise, non-destructively return the original '\'.
 792  */
 793 int32
 794 TokenStream::getUnicodeEscape()
 795 {
 796     jschar cp[5];
 797     int32 c;
 798
 799     if (peekChars(5, cp) && cp[0] == 'u' &&
 800         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
 801         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
 802     {
 803         c = (((((JS7_UNHEX(cp[1]) << 4)
 804                 + JS7_UNHEX(cp[2])) << 4)
 805               + JS7_UNHEX(cp[3])) << 4)
 806             + JS7_UNHEX(cp[4]);
 807         skipChars(5);
 808         return c;
 809     }
 810     return '\\';
 811 }
 812
 813 Token *
 814 TokenStream::newToken(ptrdiff_t adjust)
 815 {
 816     cursor = (cursor + 1) & ntokensMask;
 817     Token *tp = &tokens[cursor];
 818     tp->ptr = linebuf.ptr + adjust;
 819     tp->pos.begin.index = linepos + (tp->ptr - linebuf.base) - ungetpos;
 820     tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
 821     return tp;
 822 }
 823
 824 static JS_ALWAYS_INLINE JSBool
 825 ScanAsSpace(jschar c)
 826 {
 827     /* Treat little- and big-endian BOMs as whitespace for compatibility. */
 828     if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
 829         return JS_TRUE;
 830     return JS_FALSE;
 831 }
 832
 833 static JS_ALWAYS_INLINE JSAtom *
 834 atomize(JSContext *cx, JSCharBuffer &cb)
 835 {
 836     return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
 837 }
 838
 839 TokenKind
 840 TokenStream::getTokenInternal()
 841 {
 842     TokenKind tt;
 843     int c, qc;
 844     Token *tp;
 845     JSAtom *atom;
 846     JSBool hadUnicodeEscape;
 847     const struct keyword *kw;
 848 #if JS_HAS_XML_SUPPORT
 849     JSBool inTarget;
 850     size_t targetLength;
 851     ptrdiff_t contentIndex;
 852 #endif
 853
 854 #if JS_HAS_XML_SUPPORT
 855     if (flags & TSF_XMLTEXTMODE) {
 856         tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
 857         tp = newToken(0);
 858         tokenbuf.clear();
 859         qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
 860
 861         while ((c = getChar()) != qc && c != '<' && c != EOF) {
 862             if (c == '&' && qc == '<') {
 863                 if (!getXMLEntity())
 864                     goto error;
 865                 tt = TOK_XMLTEXT;
 866                 continue;
 867             }
 868
 869             if (!JS_ISXMLSPACE(c))
 870                 tt = TOK_XMLTEXT;
 871             if (!tokenbuf.append(c))
 872                 goto error;
 873         }
 874         ungetChar(c);
 875
 876         if (tokenbuf.empty()) {
 877             atom = NULL;
 878         } else {
 879             atom = atomize(cx, tokenbuf);
 880             if (!atom)
 881                 goto error;
 882         }
 883         tp->pos.end.lineno = lineno;
 884         tp->t_op = JSOP_STRING;
 885         tp->t_atom = atom;
 886         goto out;
 887     }
 888
 889     if (flags & TSF_XMLTAGMODE) {
 890         tp = newToken(0);
 891         c = getChar();
 892         if (JS_ISXMLSPACE(c)) {
 893             do {
 894                 c = getChar();
 895             } while (JS_ISXMLSPACE(c));
 896             ungetChar(c);
 897             tt = TOK_XMLSPACE;
 898             goto out;
 899         }
 900
 901         if (c == EOF) {
 902             tt = TOK_EOF;
 903             goto out;
 904         }
 905
 906         tokenbuf.clear();
 907         if (JS_ISXMLNSSTART(c)) {
 908             JSBool sawColon = JS_FALSE;
 909
 910             if (!tokenbuf.append(c))
 911                 goto error;
 912             while ((c = getChar()) != EOF && JS_ISXMLNAME(c)) {
 913                 if (c == ':') {
 914                     int nextc;
 915
 916                     if (sawColon ||
 917                         (nextc = peekChar(),
 918                          ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
 919                          !JS_ISXMLNAME(nextc))) {
 920                         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 921                                                  JSMSG_BAD_XML_QNAME);
 922                         goto error;
 923                     }
 924                     sawColon = JS_TRUE;
 925                 }
 926
 927                 if (!tokenbuf.append(c))
 928                     goto error;
 929             }
 930
 931             ungetChar(c);
 932             atom = atomize(cx, tokenbuf);
 933             if (!atom)
 934                 goto error;
 935             tp->t_op = JSOP_STRING;
 936             tp->t_atom = atom;
 937             tt = TOK_XMLNAME;
 938             goto out;
 939         }
 940
 941         switch (c) {
 942           case '{':
 943             if (flags & TSF_XMLONLYMODE)
 944                 goto bad_xml_char;
 945             tt = TOK_LC;
 946             goto out;
 947
 948           case '=':
 949             tt = TOK_ASSIGN;
 950             goto out;
 951
 952           case '"':
 953           case '\'':
 954             qc = c;
 955             while ((c = getChar()) != qc) {
 956                 if (c == EOF) {
 957                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 958                                              JSMSG_UNTERMINATED_STRING);
 959                     goto error;
 960                 }
 961
 962                 /*
 963                  * XML attribute values are double-quoted when pretty-printed,
 964                  * so escape " if it is expressed directly in a single-quoted
 965                  * attribute value.
 966                  */
 967                 if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
 968                     JS_ASSERT(qc == '\'');
 969                     if (!tokenbuf.append(js_quot_entity_str,
 970                                      strlen(js_quot_entity_str)))
 971                         goto error;
 972                     continue;
 973                 }
 974
 975                 if (c == '&' && (flags & TSF_XMLONLYMODE)) {
 976                     if (!getXMLEntity())
 977                         goto error;
 978                     continue;
 979                 }
 980
 981                 if (!tokenbuf.append(c))
 982                     goto error;
 983             }
 984             atom = atomize(cx, tokenbuf);
 985             if (!atom)
 986                 goto error;
 987             tp->pos.end.lineno = lineno;
 988             tp->t_op = JSOP_STRING;
 989             tp->t_atom = atom;
 990             tt = TOK_XMLATTR;
 991             goto out;
 992
 993           case '>':
 994             tt = TOK_XMLTAGC;
 995             goto out;
 996
 997           case '/':
 998             if (matchChar('>')) {
 999                 tt = TOK_XMLPTAGC;
1000                 goto out;
1001             }
1002             /* FALL THROUGH */
1003
1004           bad_xml_char:
1005           default:
1006             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
1007             goto error;
1008         }
1009         /* NOTREACHED */
1010     }
1011 #endif /* JS_HAS_XML_SUPPORT */
1012
1013   retry:
1014     do {
1015         c = getChar();
1016         if (c == '\n') {
1017             flags &= ~TSF_DIRTYLINE;
1018             if (flags & TSF_NEWLINES)
1019                 break;
1020         }
1021     } while (ScanAsSpace((jschar)c));
1022
1023     tp = newToken(-1);
1024     if (c == EOF) {
1025         tt = TOK_EOF;
1026         goto out;
1027     }
1028
1029     hadUnicodeEscape = JS_FALSE;
1030     if (JS_ISIDSTART(c) ||
1031         (c == '\\' &&
1032          (qc = getUnicodeEscape(),
1033           hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1034         if (hadUnicodeEscape)
1035             c = qc;
1036         tokenbuf.clear();
1037         for (;;) {
1038             if (!tokenbuf.append(c))
1039                 goto error;
1040             c = getChar();
1041             if (c == '\\') {
1042                 qc = getUnicodeEscape();
1043                 if (!JS_ISIDENT(qc))
1044                     break;
1045                 c = qc;
1046                 hadUnicodeEscape = JS_TRUE;
1047             } else {
1048                 if (!JS_ISIDENT(c))
1049                     break;
1050             }
1051         }
1052         ungetChar(c);
1053
1054         /*
1055          * Check for keywords unless we saw Unicode escape or parser asks
1056          * to ignore keywords.
1057          */
1058         if (!hadUnicodeEscape &&
1059             !(flags & TSF_KEYWORD_IS_NAME) &&
1060             (kw = FindKeyword(tokenbuf.begin(), tokenbuf.length()))) {
1061             if (kw->tokentype == TOK_RESERVED) {
1062                 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1063                                               JSMSG_RESERVED_ID, kw->chars)) {
1064                     goto error;
1065                 }
1066             } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1067                 tt = kw->tokentype;
1068                 tp->t_op = (JSOp) kw->op;
1069                 goto out;
1070             }
1071         }
1072
1073         atom = atomize(cx, tokenbuf);
1074         if (!atom)
1075             goto error;
1076         tp->t_op = JSOP_NAME;
1077         tp->t_atom = atom;
1078         tt = TOK_NAME;
1079         goto out;
1080     }
1081
1082     if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(peekChar()))) {
1083         jsint radix;
1084         const jschar *endptr;
1085         jsdouble dval;
1086
1087         radix = 10;
1088         tokenbuf.clear();
1089
1090         if (c == '0') {
1091             if (!tokenbuf.append(c))
1092                 goto error;
1093             c = getChar();
1094             if (JS_TOLOWER(c) == 'x') {
1095                 if (!tokenbuf.append(c))
1096                     goto error;
1097                 c = getChar();
1098                 radix = 16;
1099             } else if (JS7_ISDEC(c)) {
1100                 radix = 8;
1101             }
1102         }
1103
1104         while (JS7_ISHEX(c)) {
1105             if (radix < 16) {
1106                 if (JS7_ISLET(c))
1107                     break;
1108
1109                 if (radix == 8) {
1110                     /* Octal integer literals are not permitted in strict mode code. */
1111                     if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1112                         goto error;
1113
1114                     /*
1115                      * Outside strict mode, we permit 08 and 09 as decimal numbers, which
1116                      * makes our behaviour a superset of the ECMA numeric grammar. We
1117                      * might not always be so permissive, so we warn about it.
1118                      */
1119                     if (c >= '8') {
1120                         if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1121                                                       JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1122                             goto error;
1123                         }
1124                         radix = 10;
1125                     }
1126                 }
1127             }
1128             if (!tokenbuf.append(c))
1129                 goto error;
1130             c = getChar();
1131         }
1132
1133         if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1134             if (c == '.') {
1135                 do {
1136                     if (!tokenbuf.append(c))
1137                         goto error;
1138                     c = getChar();
1139                 } while (JS7_ISDEC(c));
1140             }
1141             if (JS_TOLOWER(c) == 'e') {
1142                 if (!tokenbuf.append(c))
1143                     goto error;
1144                 c = getChar();
1145                 if (c == '+' || c == '-') {
1146                     if (!tokenbuf.append(c))
1147                         goto error;
1148                     c = getChar();
1149                 }
1150                 if (!JS7_ISDEC(c)) {
1151                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1152                                              JSMSG_MISSING_EXPONENT);
1153                     goto error;
1154                 }
1155                 do {
1156                     if (!tokenbuf.append(c))
1157                         goto error;
1158                     c = getChar();
1159                 } while (JS7_ISDEC(c));
1160             }
1161         }
1162
1163         if (JS_ISIDSTART(c)) {
1164             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1165             goto error;
1166         }
1167
1168         /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1169         ungetChar(c);
1170         if (!tokenbuf.append(0))
1171             goto error;
1172
1173         if (radix == 10) {
1174             if (!js_strtod(cx, tokenbuf.begin(), tokenbuf.end(), &endptr, &dval)) {
1175                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1176                 goto error;
1177             }
1178         } else {
1179             if (!js_strtointeger(cx, tokenbuf.begin(), tokenbuf.end(),
1180                                  &endptr, radix, &dval)) {
1181                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1182                 goto error;
1183             }
1184         }
1185         tp->t_dval = dval;
1186         tt = TOK_NUMBER;
1187         goto out;
1188     }
1189
1190     if (c == '"' || c == '\'') {
1191         qc = c;
1192         tokenbuf.clear();
1193         while ((c = getChar()) != qc) {
1194             if (c == '\n' || c == EOF) {
1195                 ungetChar(c);
1196                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1197                                          JSMSG_UNTERMINATED_STRING);
1198                 goto error;
1199             }
1200             if (c == '\\') {
1201                 switch (c = getChar()) {
1202                   case 'b': c = '\b'; break;
1203                   case 'f': c = '\f'; break;
1204                   case 'n': c = '\n'; break;
1205                   case 'r': c = '\r'; break;
1206                   case 't': c = '\t'; break;
1207                   case 'v': c = '\v'; break;
1208
1209                   default:
1210                     if ('0' <= c && c < '8') {
1211                         int32 val = JS7_UNDEC(c);
1212
1213                         c = peekChar();
1214                         /* Strict mode code allows only \0, then a non-digit. */
1215                         if (val != 0 || JS7_ISDEC(c)) {
1216                             if (!ReportStrictModeError(cx, this, NULL, NULL,
1217                                                        JSMSG_DEPRECATED_OCTAL)) {
1218                                 goto error;
1219                             }
1220                         }
1221                         if ('0' <= c && c < '8') {
1222                             val = 8 * val + JS7_UNDEC(c);
1223                             getChar();
1224                             c = peekChar();
1225                             if ('0' <= c && c < '8') {
1226                                 int32 save = val;
1227                                 val = 8 * val + JS7_UNDEC(c);
1228                                 if (val <= 0377)
1229                                     getChar();
1230                                 else
1231                                     val = save;
1232                             }
1233                         }
1234
1235                         c = (jschar)val;
1236                     } else if (c == 'u') {
1237                         jschar cp[4];
1238                         if (peekChars(4, cp) &&
1239                             JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1240                             JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1241                             c = (((((JS7_UNHEX(cp[0]) << 4)
1242                                     + JS7_UNHEX(cp[1])) << 4)
1243                                   + JS7_UNHEX(cp[2])) << 4)
1244                                 + JS7_UNHEX(cp[3]);
1245                             skipChars(4);
1246                         }
1247                     } else if (c == 'x') {
1248                         jschar cp[2];
1249                         if (peekChars(2, cp) &&
1250                             JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1251                             c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1252                             skipChars(2);
1253                         }
1254                     } else if (c == '\n') {
1255                         /* ECMA follows C by removing escaped newlines. */
1256                         continue;
1257                     }
1258                     break;
1259                 }
1260             }
1261             if (!tokenbuf.append(c))
1262                 goto error;
1263         }
1264         atom = atomize(cx, tokenbuf);
1265         if (!atom)
1266             goto error;
1267         tp->pos.end.lineno = lineno;
1268         tp->t_op = JSOP_STRING;
1269         tp->t_atom = atom;
1270         tt = TOK_STRING;
1271         goto out;
1272     }
1273
1274     switch (c) {
1275       case '\n': tt = TOK_EOL; goto eol_out;
1276       case ';':  tt = TOK_SEMI; break;
1277       case '[':  tt = TOK_LB; break;
1278       case ']':  tt = TOK_RB; break;
1279       case '{':  tt = TOK_LC; break;
1280       case '}':  tt = TOK_RC; break;
1281       case '(':  tt = TOK_LP; break;
1282       case ')':  tt = TOK_RP; break;
1283       case ',':  tt = TOK_COMMA; break;
1284       case '?':  tt = TOK_HOOK; break;
1285
1286       case '.':
1287 #if JS_HAS_XML_SUPPORT
1288         if (matchChar(c))
1289             tt = TOK_DBLDOT;
1290         else
1291 #endif
1292             tt = TOK_DOT;
1293         break;
1294
1295       case ':':
1296 #if JS_HAS_XML_SUPPORT
1297         if (matchChar(c)) {
1298             tt = TOK_DBLCOLON;
1299             break;
1300         }
1301 #endif
1302         /*
1303          * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1304          * object initializer, likewise for setter.
1305          */
1306         tp->t_op = JSOP_NOP;
1307         tt = TOK_COLON;
1308         break;
1309
1310       case '|':
1311         if (matchChar(c)) {
1312             tt = TOK_OR;
1313         } else if (matchChar('=')) {
1314             tp->t_op = JSOP_BITOR;
1315             tt = TOK_ASSIGN;
1316         } else {
1317             tt = TOK_BITOR;
1318         }
1319         break;
1320
1321       case '^':
1322         if (matchChar('=')) {
1323             tp->t_op = JSOP_BITXOR;
1324             tt = TOK_ASSIGN;
1325         } else {
1326             tt = TOK_BITXOR;
1327         }
1328         break;
1329
1330       case '&':
1331         if (matchChar(c)) {
1332             tt = TOK_AND;
1333         } else if (matchChar('=')) {
1334             tp->t_op = JSOP_BITAND;
1335             tt = TOK_ASSIGN;
1336         } else {
1337             tt = TOK_BITAND;
1338         }
1339         break;
1340
1341       case '=':
1342         if (matchChar(c)) {
1343             tp->t_op = matchChar(c) ? JSOP_STRICTEQ : JSOP_EQ;
1344             tt = TOK_EQOP;
1345         } else {
1346             tp->t_op = JSOP_NOP;
1347             tt = TOK_ASSIGN;
1348         }
1349         break;
1350
1351       case '!':
1352         if (matchChar('=')) {
1353             tp->t_op = matchChar('=') ? JSOP_STRICTNE : JSOP_NE;
1354             tt = TOK_EQOP;
1355         } else {
1356             tp->t_op = JSOP_NOT;
1357             tt = TOK_UNARYOP;
1358         }
1359         break;
1360
1361 #if JS_HAS_XML_SUPPORT
1362       case '@':
1363         tt = TOK_AT;
1364         break;
1365 #endif
1366
1367       case '<':
1368 #if JS_HAS_XML_SUPPORT
1369         /*
1370          * After much testing, it's clear that Postel's advice to protocol
1371          * designers ("be liberal in what you accept, and conservative in what
1372          * you send") invites a natural-law repercussion for JS as "protocol":
1373          *
1374          * "If you are liberal in what you accept, others will utterly fail to
1375          *  be conservative in what they send."
1376          *
1377          * Which means you will get <!-- comments to end of line in the middle
1378          * of .js files, and after if conditions whose then statements are on
1379          * the next line, and other wonders.  See at least the following bugs:
1380          * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1381          * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1382          * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1383          *
1384          * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1385          * an XML comment or CDATA literal.  Instead, we always scan <! as the
1386          * start of an HTML comment hack to end of line, used since Netscape 2
1387          * to hide script tag content from script-unaware browsers.
1388          *
1389          * But this still leaves XML resources with certain internal structure
1390          * vulnerable to being loaded as script cross-origin, and some internal
1391          * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1392          * source consists only of XML literals. See:
1393          *
1394          * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1395          *
1396          * The check for this is in jsparse.cpp, Compiler::compileScript.
1397          */
1398         if ((flags & TSF_OPERAND) &&
1399             (JS_HAS_XML_OPTION(cx) || peekChar() != '!')) {
1400             /* Check for XML comment or CDATA section. */
1401             if (matchChar('!')) {
1402                 tokenbuf.clear();
1403
1404                 /* Scan XML comment. */
1405                 if (matchChar('-')) {
1406                     if (!matchChar('-'))
1407                         goto bad_xml_markup;
1408                     while ((c = getChar()) != '-' || !matchChar('-')) {
1409                         if (c == EOF)
1410                             goto bad_xml_markup;
1411                         if (!tokenbuf.append(c))
1412                             goto error;
1413                     }
1414                     tt = TOK_XMLCOMMENT;
1415                     tp->t_op = JSOP_XMLCOMMENT;
1416                     goto finish_xml_markup;
1417                 }
1418
1419                 /* Scan CDATA section. */
1420                 if (matchChar('[')) {
1421                     jschar cp[6];
1422                     if (peekChars(6, cp) &&
1423                         cp[0] == 'C' &&
1424                         cp[1] == 'D' &&
1425                         cp[2] == 'A' &&
1426                         cp[3] == 'T' &&
1427                         cp[4] == 'A' &&
1428                         cp[5] == '[') {
1429                         skipChars(6);
1430                         while ((c = getChar()) != ']' ||
1431                                !peekChars(2, cp) ||
1432                                cp[0] != ']' ||
1433                                cp[1] != '>') {
1434                             if (c == EOF)
1435                                 goto bad_xml_markup;
1436                             if (!tokenbuf.append(c))
1437                                 goto error;
1438                         }
1439                         getChar();            /* discard ] but not > */
1440                         tt = TOK_XMLCDATA;
1441                         tp->t_op = JSOP_XMLCDATA;
1442                         goto finish_xml_markup;
1443                     }
1444                     goto bad_xml_markup;
1445                 }
1446             }
1447
1448             /* Check for processing instruction. */
1449             if (matchChar('?')) {
1450                 inTarget = JS_TRUE;
1451                 targetLength = 0;
1452                 contentIndex = -1;
1453
1454                 tokenbuf.clear();
1455                 while ((c = getChar()) != '?' || peekChar() != '>') {
1456                     if (c == EOF)
1457                         goto bad_xml_markup;
1458                     if (inTarget) {
1459                         if (JS_ISXMLSPACE(c)) {
1460                             if (tokenbuf.empty())
1461                                 goto bad_xml_markup;
1462                             inTarget = JS_FALSE;
1463                         } else {
1464                             if (!(tokenbuf.empty()
1465                                   ? JS_ISXMLNSSTART(c)
1466                                   : JS_ISXMLNS(c))) {
1467                                 goto bad_xml_markup;
1468                             }
1469                             ++targetLength;
1470                         }
1471                     } else {
1472                         if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1473                             contentIndex = tokenbuf.length();
1474                     }
1475                     if (!tokenbuf.append(c))
1476                         goto error;
1477                 }
1478                 if (targetLength == 0)
1479                     goto bad_xml_markup;
1480                 if (contentIndex < 0) {
1481                     atom = cx->runtime->atomState.emptyAtom;
1482                 } else {
1483                     atom = js_AtomizeChars(cx,
1484                                            tokenbuf.begin() + contentIndex,
1485                                            tokenbuf.length() - contentIndex,
1486                                            0);
1487                     if (!atom)
1488                         goto error;
1489                 }
1490                 tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1491                 tp->t_atom2 = atom;
1492                 tt = TOK_XMLPI;
1493
1494         finish_xml_markup:
1495                 if (!matchChar('>'))
1496                     goto bad_xml_markup;
1497                 atom = atomize(cx, tokenbuf);
1498                 if (!atom)
1499                     goto error;
1500                 tp->t_atom = atom;
1501                 tp->pos.end.lineno = lineno;
1502                 goto out;
1503             }
1504
1505             /* An XML start-of-tag character. */
1506             tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1507             goto out;
1508
1509         bad_xml_markup:
1510             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1511             goto error;
1512         }
1513 #endif /* JS_HAS_XML_SUPPORT */
1514
1515         /* NB: treat HTML begin-comment as comment-till-end-of-line */
1516         if (matchChar('!')) {
1517             if (matchChar('-')) {
1518                 if (matchChar('-')) {
1519                     flags |= TSF_IN_HTML_COMMENT;
1520                     goto skipline;
1521                 }
1522                 ungetChar('-');
1523             }
1524             ungetChar('!');
1525         }
1526         if (matchChar(c)) {
1527             tp->t_op = JSOP_LSH;
1528             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1529         } else {
1530             tp->t_op = matchChar('=') ? JSOP_LE : JSOP_LT;
1531             tt = TOK_RELOP;
1532         }
1533         break;
1534
1535       case '>':
1536         if (matchChar(c)) {
1537             tp->t_op = matchChar(c) ? JSOP_URSH : JSOP_RSH;
1538             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1539         } else {
1540             tp->t_op = matchChar('=') ? JSOP_GE : JSOP_GT;
1541             tt = TOK_RELOP;
1542         }
1543         break;
1544
1545       case '*':
1546         tp->t_op = JSOP_MUL;
1547         tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
1548         break;
1549
1550       case '/':
1551         if (matchChar('/')) {
1552             /*
1553              * Hack for source filters such as the Mozilla XUL preprocessor:
1554              * "//@line 123\n" sets the number of the *next* line after the
1555              * comment to 123.
1556              */
1557             if (JS_HAS_ATLINE_OPTION(cx)) {
1558                 jschar cp[5];
1559                 uintN i, line, temp;
1560                 char filenameBuf[1024];
1561
1562                 if (peekChars(5, cp) &&
1563                     cp[0] == '@' &&
1564                     cp[1] == 'l' &&
1565                     cp[2] == 'i' &&
1566                     cp[3] == 'n' &&
1567                     cp[4] == 'e') {
1568                     skipChars(5);
1569                     while ((c = getChar()) != '\n' && ScanAsSpace((jschar)c))
1570                         continue;
1571                     if (JS7_ISDEC(c)) {
1572                         line = JS7_UNDEC(c);
1573                         while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1574                             temp = 10 * line + JS7_UNDEC(c);
1575                             if (temp < line) {
1576                                 /* Ignore overlarge line numbers. */
1577                                 goto skipline;
1578                             }
1579                             line = temp;
1580                         }
1581                         while (c != '\n' && ScanAsSpace((jschar)c))
1582                             c = getChar();
1583                         i = 0;
1584                         if (c == '"') {
1585                             while ((c = getChar()) != EOF && c != '"') {
1586                                 if (c == '\n') {
1587                                     ungetChar(c);
1588                                     goto skipline;
1589                                 }
1590                                 if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1591                                     goto skipline;
1592                                 filenameBuf[i++] = (char) c;
1593                             }
1594                             if (c == '"') {
1595                                 while ((c = getChar()) != '\n' &&
1596                                        ScanAsSpace((jschar)c)) {
1597                                     continue;
1598                                 }
1599                             }
1600                         }
1601                         filenameBuf[i] = '\0';
1602                         if (c == '\n') {
1603                             if (i > 0) {
1604                                 if (flags & TSF_OWNFILENAME)
1605                                     cx->free((void *) filename);
1606                                 filename = JS_strdup(cx, filenameBuf);
1607                                 if (!filename)
1608                                     goto error;
1609                                 flags |= TSF_OWNFILENAME;
1610                             }
1611                             lineno = line;
1612                         }
1613                     }
1614                     ungetChar(c);
1615                 }
1616             }
1617
1618   skipline:
1619             /* Optimize line skipping if we are not in an HTML comment. */
1620             if (flags & TSF_IN_HTML_COMMENT) {
1621                 while ((c = getChar()) != EOF && c != '\n') {
1622                     if (c == '-' && matchChar('-') && matchChar('>'))
1623                         flags &= ~TSF_IN_HTML_COMMENT;
1624                 }
1625             } else {
1626                 while ((c = getChar()) != EOF && c != '\n')
1627                     continue;
1628             }
1629             ungetChar(c);
1630             cursor = (cursor - 1) & ntokensMask;
1631             goto retry;
1632         }
1633
1634         if (matchChar('*')) {
1635             uintN linenoBefore = lineno;
1636             while ((c = getChar()) != EOF &&
1637                    !(c == '*' && matchChar('/'))) {
1638                 /* Ignore all characters until comment close. */
1639             }
1640             if (c == EOF) {
1641                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1642                                          JSMSG_UNTERMINATED_COMMENT);
1643                 goto error;
1644             }
1645             if ((flags & TSF_NEWLINES) && linenoBefore != lineno) {
1646                 flags &= ~TSF_DIRTYLINE;
1647                 tt = TOK_EOL;
1648                 goto eol_out;
1649             }
1650             cursor = (cursor - 1) & ntokensMask;
1651             goto retry;
1652         }
1653
1654         if (flags & TSF_OPERAND) {
1655             uintN reflags, length;
1656             JSBool inCharClass = JS_FALSE;
1657
1658             tokenbuf.clear();
1659             for (;;) {
1660                 c = getChar();
1661                 if (c == '\n' || c == EOF) {
1662                     ungetChar(c);
1663                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1664                                              JSMSG_UNTERMINATED_REGEXP);
1665                     goto error;
1666                 }
1667                 if (c == '\\') {
1668                     if (!tokenbuf.append(c))
1669                         goto error;
1670                     c = getChar();
1671                 } else if (c == '[') {
1672                     inCharClass = JS_TRUE;
1673                 } else if (c == ']') {
1674                     inCharClass = JS_FALSE;
1675                 } else if (c == '/' && !inCharClass) {
1676                     /* For compat with IE, allow unescaped / in char classes. */
1677                     break;
1678                 }
1679                 if (!tokenbuf.append(c))
1680                     goto error;
1681             }
1682             for (reflags = 0, length = tokenbuf.length() + 1; ; length++) {
1683                 c = peekChar();
1684                 if (c == 'g' && !(reflags & JSREG_GLOB))
1685                     reflags |= JSREG_GLOB;
1686                 else if (c == 'i' && !(reflags & JSREG_FOLD))
1687                     reflags |= JSREG_FOLD;
1688                 else if (c == 'm' && !(reflags & JSREG_MULTILINE))
1689                     reflags |= JSREG_MULTILINE;
1690                 else if (c == 'y' && !(reflags & JSREG_STICKY))
1691                     reflags |= JSREG_STICKY;
1692                 else
1693                     break;
1694                 getChar();
1695             }
1696             c = peekChar();
1697             if (JS7_ISLET(c)) {
1698                 char buf[2] = { '\0' };
1699                 tp->pos.begin.index += length + 1;
1700                 buf[0] = (char)c;
1701                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
1702                                          buf);
1703                 (void) getChar();
1704                 goto error;
1705             }
1706             tp->t_reflags = reflags;
1707             tt = TOK_REGEXP;
1708             break;
1709         }
1710
1711         tp->t_op = JSOP_DIV;
1712         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1713         break;
1714
1715       case '%':
1716         tp->t_op = JSOP_MOD;
1717         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1718         break;
1719
1720       case '~':
1721         tp->t_op = JSOP_BITNOT;
1722         tt = TOK_UNARYOP;
1723         break;
1724
1725       case '+':
1726         if (matchChar('=')) {
1727             tp->t_op = JSOP_ADD;
1728             tt = TOK_ASSIGN;
1729         } else if (matchChar(c)) {
1730             tt = TOK_INC;
1731         } else {
1732             tp->t_op = JSOP_POS;
1733             tt = TOK_PLUS;
1734         }
1735         break;
1736
1737       case '-':
1738         if (matchChar('=')) {
1739             tp->t_op = JSOP_SUB;
1740             tt = TOK_ASSIGN;
1741         } else if (matchChar(c)) {
1742             if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
1743                 flags &= ~TSF_IN_HTML_COMMENT;
1744                 goto skipline;
1745             }
1746             tt = TOK_DEC;
1747         } else {
1748             tp->t_op = JSOP_NEG;
1749             tt = TOK_MINUS;
1750         }
1751         break;
1752
1753 #if JS_HAS_SHARP_VARS
1754       case '#':
1755       {
1756         uint32 n;
1757
1758         c = getChar();
1759         if (!JS7_ISDEC(c)) {
1760             ungetChar(c);
1761             goto badchar;
1762         }
1763         n = (uint32)JS7_UNDEC(c);
1764         for (;;) {
1765             c = getChar();
1766             if (!JS7_ISDEC(c))
1767                 break;
1768             n = 10 * n + JS7_UNDEC(c);
1769             if (n >= UINT16_LIMIT) {
1770                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_SHARPVAR_TOO_BIG);
1771                 goto error;
1772             }
1773         }
1774         tp->t_dval = (jsdouble) n;
1775         if (JS_HAS_STRICT_OPTION(cx) &&
1776             (c == '=' || c == '#')) {
1777             char buf[20];
1778             JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1779             if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1780                                           JSMSG_DEPRECATED_USAGE, buf)) {
1781                 goto error;
1782             }
1783         }
1784         if (c == '=')
1785             tt = TOK_DEFSHARP;
1786         else if (c == '#')
1787             tt = TOK_USESHARP;
1788         else
1789             goto badchar;
1790         break;
1791       }
1792 #endif /* JS_HAS_SHARP_VARS */
1793
1794 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1795       badchar:
1796 #endif
1797
1798       default:
1799         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
1800         goto error;
1801     }
1802
1803   out:
1804     JS_ASSERT(tt != TOK_EOL);
1805     flags |= TSF_DIRTYLINE;
1806
1807   eol_out:
1808     JS_ASSERT(tt < TOK_LIMIT);
1809     tp->pos.end.index = linepos + (linebuf.ptr - linebuf.base) - ungetpos;
1810     tp->type = tt;
1811     return tt;
1812
1813   error:
1814     tt = TOK_ERROR;
1815     flags |= TSF_ERROR;
1816     goto out;
1817 }
1818