js/src/jsscan.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2  * vim: set sw=4 ts=8 et tw=78:
   3  *
   4  * ***** BEGIN LICENSE BLOCK *****
   5  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version
   8  * 1.1 (the "License"); you may not use this file except in compliance with
   9  * the License. You may obtain a copy of the License at
  10  * http://www.mozilla.org/MPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS IS" basis,
  13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14  * for the specific language governing rights and limitations under the
  15  * License.
  16  *
  17  * The Original Code is Mozilla Communicator client code, released
  18  * March 31, 1998.
  19  *
  20  * The Initial Developer of the Original Code is
  21  * Netscape Communications Corporation.
  22  * Portions created by the Initial Developer are Copyright (C) 1998
  23  * the Initial Developer. All Rights Reserved.
  24  *
  25  * Contributor(s):
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either of the GNU General Public License Version 2 or later (the "GPL"),
  29  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41 /*
  42  * JS lexical scanner.
  43  */
  44 #include <stdio.h>      /* first to avoid trouble on some systems */
  45 #include <errno.h>
  46 #include <limits.h>
  47 #include <math.h>
  48 #ifdef HAVE_MEMORY_H
  49 #include <memory.h>
  50 #endif
  51 #include <stdarg.h>
  52 #include <stdlib.h>
  53 #include <string.h>
  54 #include "jstypes.h"
  55 #include "jsstdint.h"
  56 #include "jsarena.h" /* Added by JSIFY */
  57 #include "jsbit.h"
  58 #include "jsutil.h" /* Added by JSIFY */
  59 #include "jsdtoa.h"
  60 #include "jsprf.h"
  61 #include "jsapi.h"
  62 #include "jsatom.h"
  63 #include "jscntxt.h"
  64 #include "jsversion.h"
  65 #include "jsemit.h"
  66 #include "jsexn.h"
  67 #include "jsnum.h"
  68 #include "jsopcode.h"
  69 #include "jsparse.h"
  70 #include "jsregexp.h"
  71 #include "jsscan.h"
  72 #include "jsscript.h"
  73 #include "jsstaticcheck.h"
  74 #include "jsvector.h"
  75
  76 #if JS_HAS_XML_SUPPORT
  77 #include "jsxml.h"
  78 #endif
  79
  80 using namespace js;
  81
  82 #define JS_KEYWORD(keyword, type, op, version) \
  83     const char js_##keyword##_str[] = #keyword;
  84 #include "jskeyword.tbl"
  85 #undef JS_KEYWORD
  86
  87 struct keyword {
  88     const char  *chars;         /* C string with keyword text */
  89     TokenKind   tokentype;
  90     JSOp        op;             /* JSOp */
  91     JSVersion   version;        /* JSVersion */
  92 };
  93
  94 static const struct keyword keyword_defs[] = {
  95 #define JS_KEYWORD(keyword, type, op, version) \
  96     {js_##keyword##_str, type, op, version},
  97 #include "jskeyword.tbl"
  98 #undef JS_KEYWORD
  99 };
 100
 101 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
 102
 103 static const struct keyword *
 104 FindKeyword(const jschar *s, size_t length)
 105 {
 106     register size_t i;
 107     const struct keyword *kw;
 108     const char *chars;
 109
 110     JS_ASSERT(length != 0);
 111
 112 #define JSKW_LENGTH()           length
 113 #define JSKW_AT(column)         s[column]
 114 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
 115 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
 116 #define JSKW_NO_MATCH()         goto no_match;
 117 #include "jsautokw.h"
 118 #undef JSKW_NO_MATCH
 119 #undef JSKW_TEST_GUESS
 120 #undef JSKW_GOT_MATCH
 121 #undef JSKW_AT
 122 #undef JSKW_LENGTH
 123
 124   got_match:
 125     return &keyword_defs[i];
 126
 127   test_guess:
 128     kw = &keyword_defs[i];
 129     chars = kw->chars;
 130     do {
 131         if (*s++ != (unsigned char)(*chars++))
 132             goto no_match;
 133     } while (--length != 0);
 134     return kw;
 135
 136   no_match:
 137     return NULL;
 138 }
 139
 140 TokenKind
 141 js_CheckKeyword(const jschar *str, size_t length)
 142 {
 143     const struct keyword *kw;
 144
 145     JS_ASSERT(length != 0);
 146     kw = FindKeyword(str, length);
 147     return kw ? kw->tokentype : TOK_EOF;
 148 }
 149
 150 JSBool
 151 js_IsIdentifier(JSString *str)
 152 {
 153     size_t length;
 154     jschar c;
 155     const jschar *chars, *end;
 156
 157     str->getCharsAndLength(chars, length);
 158     if (length == 0)
 159         return JS_FALSE;
 160     c = *chars;
 161     if (!JS_ISIDSTART(c))
 162         return JS_FALSE;
 163     end = chars + length;
 164     while (++chars != end) {
 165         c = *chars;
 166         if (!JS_ISIDENT(c))
 167             return JS_FALSE;
 168     }
 169     return JS_TRUE;
 170 }
 171
 172 #ifdef _MSC_VER
 173 #pragma warning(push)
 174 #pragma warning(disable:4351)
 175 #endif
 176
 177 /* Initialize members that aren't initialized in |init|. */
 178 TokenStream::TokenStream(JSContext *cx)
 179   : cx(cx), tokens(), cursor(), lookahead(), ungetpos(), ungetbuf(), flags(),
 180     linelen(), linepos(), file(), listenerTSData(), saveEOL(), tokenbuf(cx)
 181 {}
 182
 183 #ifdef _MSC_VER
 184 #pragma warning(pop)
 185 #endif
 186
 187 bool
 188 TokenStream::init(const jschar *base, size_t length, FILE *fp, const char *fn, uintN ln)
 189 {
 190     jschar *buf;
 191
 192     JS_ASSERT_IF(fp, !base);
 193     JS_ASSERT_IF(!base, length == 0);
 194     size_t nb = fp
 195          ? 2 * LINE_LIMIT * sizeof(jschar)
 196          : LINE_LIMIT * sizeof(jschar);
 197     cx->tempPool.allocateCast<jschar *>(buf, nb);
 198     if (!buf) {
 199         js_ReportOutOfScriptQuota(cx);
 200         return false;
 201     }
 202     memset(buf, 0, nb);
 203
 204     /* Initialize members. */
 205     filename = fn;
 206     lineno = ln;
 207     linebuf.base = linebuf.limit = linebuf.ptr = buf;
 208     if (fp) {
 209         file = fp;
 210         userbuf.base = buf + LINE_LIMIT;
 211         userbuf.ptr = userbuf.limit = userbuf.base + LINE_LIMIT;
 212     } else {
 213         userbuf.base = (jschar *)base;
 214         userbuf.limit = (jschar *)base + length;
 215         userbuf.ptr = (jschar *)base;
 216     }
 217     listener = cx->debugHooks->sourceHandler;
 218     listenerData = cx->debugHooks->sourceHandlerData;
 219     return true;
 220 }
 221
 222 void
 223 TokenStream::close()
 224 {
 225     if (flags & TSF_OWNFILENAME)
 226         cx->free((void *) filename);
 227 }
 228
 229 /* Use the fastest available getc. */
 230 #if defined(HAVE_GETC_UNLOCKED)
 231 # define fast_getc getc_unlocked
 232 #elif defined(HAVE__GETC_NOLOCK)
 233 # define fast_getc _getc_nolock
 234 #else
 235 # define fast_getc getc
 236 #endif
 237
 238 JS_FRIEND_API(int)
 239 js_fgets(char *buf, int size, FILE *file)
 240 {
 241     int n, i, c;
 242     JSBool crflag;
 243
 244     n = size - 1;
 245     if (n < 0)
 246         return -1;
 247
 248     crflag = JS_FALSE;
 249     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
 250         buf[i] = c;
 251         if (c == '\n') {        /* any \n ends a line */
 252             i++;                /* keep the \n; we know there is room for \0 */
 253             break;
 254         }
 255         if (crflag) {           /* \r not followed by \n ends line at the \r */
 256             ungetc(c, file);
 257             break;              /* and overwrite c in buf with \0 */
 258         }
 259         crflag = (c == '\r');
 260     }
 261
 262     buf[i] = '\0';
 263     return i;
 264 }
 265
 266 int32
 267 TokenStream::getChar()
 268 {
 269     int32 c;
 270     ptrdiff_t i, j, len, olen;
 271     JSBool crflag;
 272     char cbuf[LINE_LIMIT];
 273     jschar *ubuf, *nl;
 274
 275     if (ungetpos != 0) {
 276         c = ungetbuf[--ungetpos];
 277     } else {
 278         if (linebuf.ptr == linebuf.limit) {
 279             len = userbuf.limit - userbuf.ptr;
 280             if (len <= 0) {
 281                 if (!file) {
 282                     flags |= TSF_EOF;
 283                     return EOF;
 284                 }
 285
 286                 /* Fill userbuf so that \r and \r\n convert to \n. */
 287                 crflag = (flags & TSF_CRFLAG) != 0;
 288                 len = js_fgets(cbuf, LINE_LIMIT - crflag, file);
 289                 if (len <= 0) {
 290                     flags |= TSF_EOF;
 291                     return EOF;
 292                 }
 293                 olen = len;
 294                 ubuf = userbuf.base;
 295                 i = 0;
 296                 if (crflag) {
 297                     flags &= ~TSF_CRFLAG;
 298                     if (cbuf[0] != '\n') {
 299                         ubuf[i++] = '\n';
 300                         len++;
 301                         linepos--;
 302                     }
 303                 }
 304                 for (j = 0; i < len; i++, j++)
 305                     ubuf[i] = (jschar) (unsigned char) cbuf[j];
 306                 userbuf.limit = ubuf + len;
 307                 userbuf.ptr = ubuf;
 308             }
 309             if (listener)
 310                 listener(filename, lineno, userbuf.ptr, len, &listenerTSData, listenerData);
 311
 312             nl = saveEOL;
 313             if (!nl) {
 314                 /*
 315                  * Any one of \n, \r, or \r\n ends a line (the longest
 316                  * match wins).  Also allow the Unicode line and paragraph
 317                  * separators.
 318                  */
 319                 for (nl = userbuf.ptr; nl < userbuf.limit; nl++) {
 320                     /*
 321                      * Try to prevent value-testing on most characters by
 322                      * filtering out characters that aren't 000x or 202x.
 323                      */
 324                     if ((*nl & 0xDFD0) == 0) {
 325                         if (*nl == '\n')
 326                             break;
 327                         if (*nl == '\r') {
 328                             if (nl + 1 < userbuf.limit && nl[1] == '\n')
 329                                 nl++;
 330                             break;
 331                         }
 332                         if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
 333                             break;
 334                     }
 335                 }
 336             }
 337
 338             /*
 339              * If there was a line terminator, copy thru it into linebuf.
 340              * Else copy LINE_LIMIT-1 bytes into linebuf.
 341              */
 342             if (nl < userbuf.limit)
 343                 len = (nl - userbuf.ptr) + 1;
 344             if (len >= (ptrdiff_t) LINE_LIMIT) {
 345                 len = LINE_LIMIT - 1;
 346                 saveEOL = nl;
 347             } else {
 348                 saveEOL = NULL;
 349             }
 350             js_strncpy(linebuf.base, userbuf.ptr, len);
 351             userbuf.ptr += len;
 352             olen = len;
 353
 354             /*
 355              * Make sure linebuf contains \n for EOL (don't do this in
 356              * userbuf because the user's string might be readonly).
 357              */
 358             if (nl < userbuf.limit) {
 359                 if (*nl == '\r') {
 360                     if (linebuf.base[len-1] == '\r') {
 361                         /*
 362                          * Does the line segment end in \r?  We must check
 363                          * for a \n at the front of the next segment before
 364                          * storing a \n into linebuf.  This case matters
 365                          * only when we're reading from a file.
 366                          */
 367                         if (nl + 1 == userbuf.limit && file) {
 368                             len--;
 369                             flags |= TSF_CRFLAG; /* clear NLFLAG? */
 370                             if (len == 0) {
 371                                 /*
 372                                  * This can happen when a segment ends in
 373                                  * \r\r.  Start over.  ptr == limit in this
 374                                  * case, so we'll fall into buffer-filling
 375                                  * code.
 376                                  */
 377                                 return getChar();
 378                             }
 379                         } else {
 380                             linebuf.base[len-1] = '\n';
 381                         }
 382                     }
 383                 } else if (*nl == '\n') {
 384                     if (nl > userbuf.base &&
 385                         nl[-1] == '\r' &&
 386                         linebuf.base[len-2] == '\r') {
 387                         len--;
 388                         JS_ASSERT(linebuf.base[len] == '\n');
 389                         linebuf.base[len-1] = '\n';
 390                     }
 391                 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
 392                     linebuf.base[len-1] = '\n';
 393                 }
 394             }
 395
 396             /* Reset linebuf based on adjusted segment length. */
 397             linebuf.limit = linebuf.base + len;
 398             linebuf.ptr = linebuf.base;
 399
 400             /* Update position of linebuf within physical userbuf line. */
 401             if (!(flags & TSF_NLFLAG))
 402                 linepos += linelen;
 403             else
 404                 linepos = 0;
 405             if (linebuf.limit[-1] == '\n')
 406                 flags |= TSF_NLFLAG;
 407             else
 408                 flags &= ~TSF_NLFLAG;
 409
 410             /* Update linelen from original segment length. */
 411             linelen = olen;
 412         }
 413         c = *linebuf.ptr++;
 414     }
 415     if (c == '\n')
 416         lineno++;
 417     return c;
 418 }
 419
 420 void
 421 TokenStream::ungetChar(int32 c)
 422 {
 423     if (c == EOF)
 424         return;
 425     JS_ASSERT(ungetpos < JS_ARRAY_LENGTH(ungetbuf));
 426     if (c == '\n')
 427         lineno--;
 428     ungetbuf[ungetpos++] = (jschar)c;
 429 }
 430
 431 /*
 432  * Peek n chars ahead into ts.  Return true if n chars were read, false if
 433  * there weren't enough characters in the input stream.  This function cannot
 434  * be used to peek into or past a newline.
 435  */
 436 JSBool
 437 TokenStream::peekChars(intN n, jschar *cp)
 438 {
 439     intN i, j;
 440     int32 c;
 441
 442     for (i = 0; i < n; i++) {
 443         c = getChar();
 444         if (c == EOF)
 445             break;
 446         if (c == '\n') {
 447             ungetChar(c);
 448             break;
 449         }
 450         cp[i] = (jschar)c;
 451     }
 452     for (j = i - 1; j >= 0; j--)
 453         ungetChar(cp[j]);
 454     return i == n;
 455 }
 456
 457 bool
 458 TokenStream::reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber,
 459                                         va_list ap)
 460 {
 461     JSErrorReport report;
 462     char *message;
 463     size_t linelength;
 464     jschar *linechars;
 465     char *linebytes;
 466     bool warning;
 467     JSBool ok;
 468     TokenPos *tp;
 469     uintN index, i;
 470     JSErrorReporter onError;
 471
 472     JS_ASSERT(linebuf.limit < linebuf.base + LINE_LIMIT);
 473
 474     if (JSREPORT_IS_STRICT(flags) && !JS_HAS_STRICT_OPTION(cx))
 475         return JS_TRUE;
 476
 477     warning = JSREPORT_IS_WARNING(flags);
 478     if (warning && JS_HAS_WERROR_OPTION(cx)) {
 479         flags &= ~JSREPORT_WARNING;
 480         warning = false;
 481     }
 482
 483     PodZero(&report);
 484     report.flags = flags;
 485     report.errorNumber = errorNumber;
 486     message = NULL;
 487     linechars = NULL;
 488     linebytes = NULL;
 489
 490     MUST_FLOW_THROUGH("out");
 491     ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
 492                                  errorNumber, &message, &report,
 493                                  !(flags & JSREPORT_UC), ap);
 494     if (!ok) {
 495         warning = false;
 496         goto out;
 497     }
 498
 499     report.filename = filename;
 500
 501     if (pn) {
 502         report.lineno = pn->pn_pos.begin.lineno;
 503         if (report.lineno != lineno)
 504             goto report;
 505         tp = &pn->pn_pos;
 506     } else {
 507         /* Point to the current token, not the next one to get. */
 508         tp = &tokens[cursor].pos;
 509     }
 510     report.lineno = lineno;
 511     linelength = linebuf.limit - linebuf.base;
 512     linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
 513     if (!linechars) {
 514         warning = false;
 515         goto out;
 516     }
 517     memcpy(linechars, linebuf.base, linelength * sizeof(jschar));
 518     linechars[linelength] = 0;
 519     linebytes = js_DeflateString(cx, linechars, linelength);
 520     if (!linebytes) {
 521         warning = false;
 522         goto out;
 523     }
 524     report.linebuf = linebytes;
 525
 526     /*
 527      * FIXME: What should instead happen here is that we should
 528      * find error-tokens in userbuf, if !file.  That will
 529      * allow us to deliver a more helpful error message, which
 530      * includes all or part of the bad string or bad token.  The
 531      * code here yields something that looks truncated.
 532      * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
 533      */
 534     index = 0;
 535     if (tp->begin.lineno == tp->end.lineno) {
 536         if (tp->begin.index < linepos)
 537             goto report;
 538
 539         index = tp->begin.index - linepos;
 540     }
 541
 542     report.tokenptr = report.linebuf + index;
 543     report.uclinebuf = linechars;
 544     report.uctokenptr = report.uclinebuf + index;
 545
 546     /*
 547      * If there's a runtime exception type associated with this error
 548      * number, set that as the pending exception.  For errors occuring at
 549      * compile time, this is very likely to be a JSEXN_SYNTAXERR.
 550      *
 551      * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
 552      * flag will be set in report.flags.  Proper behavior for an error
 553      * reporter is to ignore a report with this flag for all but top-level
 554      * compilation errors.  The exception will remain pending, and so long
 555      * as the non-top-level "load", "eval", or "compile" native function
 556      * returns false, the top-level reporter will eventually receive the
 557      * uncaught exception report.
 558      *
 559      * XXX it'd probably be best if there was only one call to this
 560      * function, but there seem to be two error reporter call points.
 561      */
 562   report:
 563     onError = cx->errorReporter;
 564
 565     /*
 566      * Try to raise an exception only if there isn't one already set --
 567      * otherwise the exception will describe the last compile-time error,
 568      * which is likely spurious.
 569      */
 570     if (!(flags & TSF_ERROR)) {
 571         if (js_ErrorToException(cx, message, &report, NULL, NULL))
 572             onError = NULL;
 573     }
 574
 575     /*
 576      * Suppress any compile-time errors that don't occur at the top level.
 577      * This may still fail, as interplevel may be zero in contexts where we
 578      * don't really want to call the error reporter, as when js is called
 579      * by other code which could catch the error.
 580      */
 581     if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
 582         onError = NULL;
 583
 584     if (onError) {
 585         JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
 586
 587         /*
 588          * If debugErrorHook is present then we give it a chance to veto
 589          * sending the error on to the regular error reporter.
 590          */
 591         if (hook && !hook(cx, message, &report,
 592                           cx->debugHooks->debugErrorHookData)) {
 593             onError = NULL;
 594         }
 595     }
 596     if (onError)
 597         (*onError)(cx, message, &report);
 598
 599   out:
 600     if (linebytes)
 601         cx->free(linebytes);
 602     if (linechars)
 603         cx->free(linechars);
 604     if (message)
 605         cx->free(message);
 606     if (report.ucmessage)
 607         cx->free((void *)report.ucmessage);
 608
 609     if (report.messageArgs) {
 610         if (!(flags & JSREPORT_UC)) {
 611             i = 0;
 612             while (report.messageArgs[i])
 613                 cx->free((void *)report.messageArgs[i++]);
 614         }
 615         cx->free((void *)report.messageArgs);
 616     }
 617
 618     if (!JSREPORT_IS_WARNING(flags)) {
 619         /* Set the error flag to suppress spurious reports. */
 620         flags |= TSF_ERROR;
 621     }
 622
 623     return warning;
 624 }
 625
 626 bool
 627 js::ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
 628                           uintN errorNumber, ...)
 629 {
 630     JS_ASSERT(ts || tc);
 631     JS_ASSERT(cx == ts->getContext());
 632
 633     /* In strict mode code, this is an error, not just a warning. */
 634     uintN flags;
 635     if ((tc && tc->flags & TCF_STRICT_MODE_CODE) || (ts && ts->isStrictMode()))
 636         flags = JSREPORT_ERROR;
 637     else if (JS_HAS_STRICT_OPTION(cx))
 638         flags = JSREPORT_WARNING;
 639     else
 640         return true;
 641
 642     va_list ap;
 643     va_start(ap, errorNumber);
 644     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 645     va_end(ap);
 646
 647     return result;
 648 }
 649
 650 bool
 651 js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn,
 652                              uintN flags, uintN errorNumber, ...)
 653 {
 654     va_list ap;
 655
 656     /*
 657      * We don't accept a JSTreeContext argument, so we can't implement
 658      * JSREPORT_STRICT_MODE_ERROR here.  Use ReportStrictModeError instead,
 659      * or do the checks in the caller and pass plain old JSREPORT_ERROR.
 660      */
 661     JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
 662
 663     va_start(ap, errorNumber);
 664     JS_ASSERT(cx == ts->getContext());
 665     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 666     va_end(ap);
 667
 668     return result;
 669 }
 670
 671 #if JS_HAS_XML_SUPPORT
 672
 673 JSBool
 674 TokenStream::getXMLEntity()
 675 {
 676     ptrdiff_t offset, length, i;
 677     int c, d;
 678     JSBool ispair;
 679     jschar *bp, digit;
 680     char *bytes;
 681     JSErrNum msg;
 682
 683     JSCharBuffer &tb = tokenbuf;
 684
 685     /* Put the entity, including the '&' already scanned, in tokenbuf. */
 686     offset = tb.length();
 687     if (!tb.append('&'))
 688         return JS_FALSE;
 689     while ((c = getChar()) != ';') {
 690         if (c == EOF || c == '\n') {
 691             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
 692             return JS_FALSE;
 693         }
 694         if (!tb.append(c))
 695             return JS_FALSE;
 696     }
 697
 698     /* Let length be the number of jschars after the '&', including the ';'. */
 699     length = tb.length() - offset;
 700     bp = tb.begin() + offset;
 701     c = d = 0;
 702     ispair = JS_FALSE;
 703     if (length > 2 && bp[1] == '#') {
 704         /* Match a well-formed XML Character Reference. */
 705         i = 2;
 706         if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
 707             if (length > 9)     /* at most 6 hex digits allowed */
 708                 goto badncr;
 709             while (++i < length) {
 710                 digit = bp[i];
 711                 if (!JS7_ISHEX(digit))
 712                     goto badncr;
 713                 c = (c << 4) + JS7_UNHEX(digit);
 714             }
 715         } else {
 716             while (i < length) {
 717                 digit = bp[i++];
 718                 if (!JS7_ISDEC(digit))
 719                     goto badncr;
 720                 c = (c * 10) + JS7_UNDEC(digit);
 721                 if (c < 0)
 722                     goto badncr;
 723             }
 724         }
 725
 726         if (0x10000 <= c && c <= 0x10FFFF) {
 727             /* Form a surrogate pair (c, d) -- c is the high surrogate. */
 728             d = 0xDC00 + (c & 0x3FF);
 729             c = 0xD7C0 + (c >> 10);
 730             ispair = JS_TRUE;
 731         } else {
 732             /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
 733             if (c != 0x9 && c != 0xA && c != 0xD &&
 734                 !(0x20 <= c && c <= 0xD7FF) &&
 735                 !(0xE000 <= c && c <= 0xFFFD)) {
 736                 goto badncr;
 737             }
 738         }
 739     } else {
 740         /* Try to match one of the five XML 1.0 predefined entities. */
 741         switch (length) {
 742           case 3:
 743             if (bp[2] == 't') {
 744                 if (bp[1] == 'l')
 745                     c = '<';
 746                 else if (bp[1] == 'g')
 747                     c = '>';
 748             }
 749             break;
 750           case 4:
 751             if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
 752                 c = '&';
 753             break;
 754           case 5:
 755             if (bp[3] == 'o') {
 756                 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
 757                     c = '\'';
 758                 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
 759                     c = '"';
 760             }
 761             break;
 762         }
 763         if (c == 0) {
 764             msg = JSMSG_UNKNOWN_XML_ENTITY;
 765             goto bad;
 766         }
 767     }
 768
 769     /* If we matched, retract tokenbuf and store the entity's value. */
 770     *bp++ = (jschar) c;
 771     if (ispair)
 772         *bp++ = (jschar) d;
 773     tb.shrinkBy(tb.end() - bp);
 774     return JS_TRUE;
 775
 776   badncr:
 777     msg = JSMSG_BAD_XML_NCR;
 778   bad:
 779     /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
 780     JS_ASSERT((tb.end() - bp) >= 1);
 781     bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
 782     if (bytes) {
 783         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
 784         cx->free(bytes);
 785     }
 786     return JS_FALSE;
 787 }
 788
 789 #endif /* JS_HAS_XML_SUPPORT */
 790
 791 /*
 792  * We have encountered a '\': check for a Unicode escape sequence after it,
 793  * returning the character code value if we found a Unicode escape sequence.
 794  * Otherwise, non-destructively return the original '\'.
 795  */
 796 int32
 797 TokenStream::getUnicodeEscape()
 798 {
 799     jschar cp[5];
 800     int32 c;
 801
 802     if (peekChars(5, cp) && cp[0] == 'u' &&
 803         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
 804         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
 805     {
 806         c = (((((JS7_UNHEX(cp[1]) << 4)
 807                 + JS7_UNHEX(cp[2])) << 4)
 808               + JS7_UNHEX(cp[3])) << 4)
 809             + JS7_UNHEX(cp[4]);
 810         skipChars(5);
 811         return c;
 812     }
 813     return '\\';
 814 }
 815
 816 Token *
 817 TokenStream::newToken(ptrdiff_t adjust)
 818 {
 819     cursor = (cursor + 1) & ntokensMask;
 820     Token *tp = &tokens[cursor];
 821     tp->ptr = linebuf.ptr + adjust;
 822     tp->pos.begin.index = linepos + (tp->ptr - linebuf.base) - ungetpos;
 823     tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
 824     return tp;
 825 }
 826
 827 static JS_ALWAYS_INLINE JSBool
 828 ScanAsSpace(jschar c)
 829 {
 830     /* Treat little- and big-endian BOMs as whitespace for compatibility. */
 831     if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
 832         return JS_TRUE;
 833     return JS_FALSE;
 834 }
 835
 836 static JS_ALWAYS_INLINE JSAtom *
 837 atomize(JSContext *cx, JSCharBuffer &cb)
 838 {
 839     return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
 840 }
 841
 842 TokenKind
 843 TokenStream::getTokenInternal()
 844 {
 845     TokenKind tt;
 846     int c, qc;
 847     Token *tp;
 848     JSAtom *atom;
 849     JSBool hadUnicodeEscape;
 850     const struct keyword *kw;
 851 #if JS_HAS_XML_SUPPORT
 852     JSBool inTarget;
 853     size_t targetLength;
 854     ptrdiff_t contentIndex;
 855 #endif
 856
 857 #if JS_HAS_XML_SUPPORT
 858     if (flags & TSF_XMLTEXTMODE) {
 859         tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
 860         tp = newToken(0);
 861         tokenbuf.clear();
 862         qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
 863
 864         while ((c = getChar()) != qc && c != '<' && c != EOF) {
 865             if (c == '&' && qc == '<') {
 866                 if (!getXMLEntity())
 867                     goto error;
 868                 tt = TOK_XMLTEXT;
 869                 continue;
 870             }
 871
 872             if (!JS_ISXMLSPACE(c))
 873                 tt = TOK_XMLTEXT;
 874             if (!tokenbuf.append(c))
 875                 goto error;
 876         }
 877         ungetChar(c);
 878
 879         if (tokenbuf.empty()) {
 880             atom = NULL;
 881         } else {
 882             atom = atomize(cx, tokenbuf);
 883             if (!atom)
 884                 goto error;
 885         }
 886         tp->pos.end.lineno = lineno;
 887         tp->t_op = JSOP_STRING;
 888         tp->t_atom = atom;
 889         goto out;
 890     }
 891
 892     if (flags & TSF_XMLTAGMODE) {
 893         tp = newToken(0);
 894         c = getChar();
 895         if (JS_ISXMLSPACE(c)) {
 896             do {
 897                 c = getChar();
 898             } while (JS_ISXMLSPACE(c));
 899             ungetChar(c);
 900             tt = TOK_XMLSPACE;
 901             goto out;
 902         }
 903
 904         if (c == EOF) {
 905             tt = TOK_EOF;
 906             goto out;
 907         }
 908
 909         tokenbuf.clear();
 910         if (JS_ISXMLNSSTART(c)) {
 911             JSBool sawColon = JS_FALSE;
 912
 913             if (!tokenbuf.append(c))
 914                 goto error;
 915             while ((c = getChar()) != EOF && JS_ISXMLNAME(c)) {
 916                 if (c == ':') {
 917                     int nextc;
 918
 919                     if (sawColon ||
 920                         (nextc = peekChar(),
 921                          ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
 922                          !JS_ISXMLNAME(nextc))) {
 923                         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 924                                                  JSMSG_BAD_XML_QNAME);
 925                         goto error;
 926                     }
 927                     sawColon = JS_TRUE;
 928                 }
 929
 930                 if (!tokenbuf.append(c))
 931                     goto error;
 932             }
 933
 934             ungetChar(c);
 935             atom = atomize(cx, tokenbuf);
 936             if (!atom)
 937                 goto error;
 938             tp->t_op = JSOP_STRING;
 939             tp->t_atom = atom;
 940             tt = TOK_XMLNAME;
 941             goto out;
 942         }
 943
 944         switch (c) {
 945           case '{':
 946             if (flags & TSF_XMLONLYMODE)
 947                 goto bad_xml_char;
 948             tt = TOK_LC;
 949             goto out;
 950
 951           case '=':
 952             tt = TOK_ASSIGN;
 953             goto out;
 954
 955           case '"':
 956           case '\'':
 957             qc = c;
 958             while ((c = getChar()) != qc) {
 959                 if (c == EOF) {
 960                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 961                                              JSMSG_UNTERMINATED_STRING);
 962                     goto error;
 963                 }
 964
 965                 /*
 966                  * XML attribute values are double-quoted when pretty-printed,
 967                  * so escape " if it is expressed directly in a single-quoted
 968                  * attribute value.
 969                  */
 970                 if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
 971                     JS_ASSERT(qc == '\'');
 972                     if (!tokenbuf.append(js_quot_entity_str,
 973                                      strlen(js_quot_entity_str)))
 974                         goto error;
 975                     continue;
 976                 }
 977
 978                 if (c == '&' && (flags & TSF_XMLONLYMODE)) {
 979                     if (!getXMLEntity())
 980                         goto error;
 981                     continue;
 982                 }
 983
 984                 if (!tokenbuf.append(c))
 985                     goto error;
 986             }
 987             atom = atomize(cx, tokenbuf);
 988             if (!atom)
 989                 goto error;
 990             tp->pos.end.lineno = lineno;
 991             tp->t_op = JSOP_STRING;
 992             tp->t_atom = atom;
 993             tt = TOK_XMLATTR;
 994             goto out;
 995
 996           case '>':
 997             tt = TOK_XMLTAGC;
 998             goto out;
 999
1000           case '/':
1001             if (matchChar('>')) {
1002                 tt = TOK_XMLPTAGC;
1003                 goto out;
1004             }
1005             /* FALL THROUGH */
1006
1007           bad_xml_char:
1008           default:
1009             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
1010             goto error;
1011         }
1012         /* NOTREACHED */
1013     }
1014 #endif /* JS_HAS_XML_SUPPORT */
1015
1016   retry:
1017     do {
1018         c = getChar();
1019         if (c == '\n') {
1020             flags &= ~TSF_DIRTYLINE;
1021             if (flags & TSF_NEWLINES)
1022                 break;
1023         }
1024     } while (ScanAsSpace((jschar)c));
1025
1026     tp = newToken(-1);
1027     if (c == EOF) {
1028         tt = TOK_EOF;
1029         goto out;
1030     }
1031
1032     hadUnicodeEscape = JS_FALSE;
1033     if (JS_ISIDSTART(c) ||
1034         (c == '\\' &&
1035          (qc = getUnicodeEscape(),
1036           hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1037         if (hadUnicodeEscape)
1038             c = qc;
1039         tokenbuf.clear();
1040         for (;;) {
1041             if (!tokenbuf.append(c))
1042                 goto error;
1043             c = getChar();
1044             if (c == '\\') {
1045                 qc = getUnicodeEscape();
1046                 if (!JS_ISIDENT(qc))
1047                     break;
1048                 c = qc;
1049                 hadUnicodeEscape = JS_TRUE;
1050             } else {
1051                 if (!JS_ISIDENT(c))
1052                     break;
1053             }
1054         }
1055         ungetChar(c);
1056
1057         /*
1058          * Check for keywords unless we saw Unicode escape or parser asks
1059          * to ignore keywords.
1060          */
1061         if (!hadUnicodeEscape &&
1062             !(flags & TSF_KEYWORD_IS_NAME) &&
1063             (kw = FindKeyword(tokenbuf.begin(), tokenbuf.length()))) {
1064             if (kw->tokentype == TOK_RESERVED) {
1065                 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1066                                               JSMSG_RESERVED_ID, kw->chars)) {
1067                     goto error;
1068                 }
1069             } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1070                 tt = kw->tokentype;
1071                 tp->t_op = (JSOp) kw->op;
1072                 goto out;
1073             }
1074         }
1075
1076         atom = atomize(cx, tokenbuf);
1077         if (!atom)
1078             goto error;
1079         tp->t_op = JSOP_NAME;
1080         tp->t_atom = atom;
1081         tt = TOK_NAME;
1082         goto out;
1083     }
1084
1085     if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(peekChar()))) {
1086         jsint radix;
1087         const jschar *endptr;
1088         jsdouble dval;
1089
1090         radix = 10;
1091         tokenbuf.clear();
1092
1093         if (c == '0') {
1094             if (!tokenbuf.append(c))
1095                 goto error;
1096             c = getChar();
1097             if (JS_TOLOWER(c) == 'x') {
1098                 if (!tokenbuf.append(c))
1099                     goto error;
1100                 c = getChar();
1101                 radix = 16;
1102             } else if (JS7_ISDEC(c)) {
1103                 radix = 8;
1104             }
1105         }
1106
1107         while (JS7_ISHEX(c)) {
1108             if (radix < 16) {
1109                 if (JS7_ISLET(c))
1110                     break;
1111
1112                 if (radix == 8) {
1113                     /* Octal integer literals are not permitted in strict mode code. */
1114                     if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1115                         goto error;
1116
1117                     /*
1118                      * Outside strict mode, we permit 08 and 09 as decimal numbers, which
1119                      * makes our behaviour a superset of the ECMA numeric grammar. We
1120                      * might not always be so permissive, so we warn about it.
1121                      */
1122                     if (c >= '8') {
1123                         if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1124                                                       JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1125                             goto error;
1126                         }
1127                         radix = 10;
1128                     }
1129                 }
1130             }
1131             if (!tokenbuf.append(c))
1132                 goto error;
1133             c = getChar();
1134         }
1135
1136         if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1137             if (c == '.') {
1138                 do {
1139                     if (!tokenbuf.append(c))
1140                         goto error;
1141                     c = getChar();
1142                 } while (JS7_ISDEC(c));
1143             }
1144             if (JS_TOLOWER(c) == 'e') {
1145                 if (!tokenbuf.append(c))
1146                     goto error;
1147                 c = getChar();
1148                 if (c == '+' || c == '-') {
1149                     if (!tokenbuf.append(c))
1150                         goto error;
1151                     c = getChar();
1152                 }
1153                 if (!JS7_ISDEC(c)) {
1154                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1155                                              JSMSG_MISSING_EXPONENT);
1156                     goto error;
1157                 }
1158                 do {
1159                     if (!tokenbuf.append(c))
1160                         goto error;
1161                     c = getChar();
1162                 } while (JS7_ISDEC(c));
1163             }
1164         }
1165
1166         if (JS_ISIDSTART(c)) {
1167             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1168             goto error;
1169         }
1170
1171         /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1172         ungetChar(c);
1173         if (!tokenbuf.append(0))
1174             goto error;
1175
1176         if (radix == 10) {
1177             if (!js_strtod(cx, tokenbuf.begin(), tokenbuf.end(), &endptr, &dval)) {
1178                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1179                 goto error;
1180             }
1181         } else {
1182             if (!js_strtointeger(cx, tokenbuf.begin(), tokenbuf.end(),
1183                                  &endptr, radix, &dval)) {
1184                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_OUT_OF_MEMORY);
1185                 goto error;
1186             }
1187         }
1188         tp->t_dval = dval;
1189         tt = TOK_NUMBER;
1190         goto out;
1191     }
1192
1193     if (c == '"' || c == '\'') {
1194         qc = c;
1195         tokenbuf.clear();
1196         while ((c = getChar()) != qc) {
1197             if (c == '\n' || c == EOF) {
1198                 ungetChar(c);
1199                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1200                                          JSMSG_UNTERMINATED_STRING);
1201                 goto error;
1202             }
1203             if (c == '\\') {
1204                 switch (c = getChar()) {
1205                   case 'b': c = '\b'; break;
1206                   case 'f': c = '\f'; break;
1207                   case 'n': c = '\n'; break;
1208                   case 'r': c = '\r'; break;
1209                   case 't': c = '\t'; break;
1210                   case 'v': c = '\v'; break;
1211
1212                   default:
1213                     if ('0' <= c && c < '8') {
1214                         int32 val = JS7_UNDEC(c);
1215
1216                         c = peekChar();
1217                         /* Strict mode code allows only \0, then a non-digit. */
1218                         if (val != 0 || JS7_ISDEC(c)) {
1219                             if (!ReportStrictModeError(cx, this, NULL, NULL,
1220                                                        JSMSG_DEPRECATED_OCTAL)) {
1221                                 goto error;
1222                             }
1223                         }
1224                         if ('0' <= c && c < '8') {
1225                             val = 8 * val + JS7_UNDEC(c);
1226                             getChar();
1227                             c = peekChar();
1228                             if ('0' <= c && c < '8') {
1229                                 int32 save = val;
1230                                 val = 8 * val + JS7_UNDEC(c);
1231                                 if (val <= 0377)
1232                                     getChar();
1233                                 else
1234                                     val = save;
1235                             }
1236                         }
1237
1238                         c = (jschar)val;
1239                     } else if (c == 'u') {
1240                         jschar cp[4];
1241                         if (peekChars(4, cp) &&
1242                             JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1243                             JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1244                             c = (((((JS7_UNHEX(cp[0]) << 4)
1245                                     + JS7_UNHEX(cp[1])) << 4)
1246                                   + JS7_UNHEX(cp[2])) << 4)
1247                                 + JS7_UNHEX(cp[3]);
1248                             skipChars(4);
1249                         }
1250                     } else if (c == 'x') {
1251                         jschar cp[2];
1252                         if (peekChars(2, cp) &&
1253                             JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1254                             c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1255                             skipChars(2);
1256                         }
1257                     } else if (c == '\n') {
1258                         /* ECMA follows C by removing escaped newlines. */
1259                         continue;
1260                     }
1261                     break;
1262                 }
1263             }
1264             if (!tokenbuf.append(c))
1265                 goto error;
1266         }
1267         atom = atomize(cx, tokenbuf);
1268         if (!atom)
1269             goto error;
1270         tp->pos.end.lineno = lineno;
1271         tp->t_op = JSOP_STRING;
1272         tp->t_atom = atom;
1273         tt = TOK_STRING;
1274         goto out;
1275     }
1276
1277     switch (c) {
1278       case '\n': tt = TOK_EOL; goto eol_out;
1279       case ';':  tt = TOK_SEMI; break;
1280       case '[':  tt = TOK_LB; break;
1281       case ']':  tt = TOK_RB; break;
1282       case '{':  tt = TOK_LC; break;
1283       case '}':  tt = TOK_RC; break;
1284       case '(':  tt = TOK_LP; break;
1285       case ')':  tt = TOK_RP; break;
1286       case ',':  tt = TOK_COMMA; break;
1287       case '?':  tt = TOK_HOOK; break;
1288
1289       case '.':
1290 #if JS_HAS_XML_SUPPORT
1291         if (matchChar(c))
1292             tt = TOK_DBLDOT;
1293         else
1294 #endif
1295             tt = TOK_DOT;
1296         break;
1297
1298       case ':':
1299 #if JS_HAS_XML_SUPPORT
1300         if (matchChar(c)) {
1301             tt = TOK_DBLCOLON;
1302             break;
1303         }
1304 #endif
1305         /*
1306          * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1307          * object initializer, likewise for setter.
1308          */
1309         tp->t_op = JSOP_NOP;
1310         tt = TOK_COLON;
1311         break;
1312
1313       case '|':
1314         if (matchChar(c)) {
1315             tt = TOK_OR;
1316         } else if (matchChar('=')) {
1317             tp->t_op = JSOP_BITOR;
1318             tt = TOK_ASSIGN;
1319         } else {
1320             tt = TOK_BITOR;
1321         }
1322         break;
1323
1324       case '^':
1325         if (matchChar('=')) {
1326             tp->t_op = JSOP_BITXOR;
1327             tt = TOK_ASSIGN;
1328         } else {
1329             tt = TOK_BITXOR;
1330         }
1331         break;
1332
1333       case '&':
1334         if (matchChar(c)) {
1335             tt = TOK_AND;
1336         } else if (matchChar('=')) {
1337             tp->t_op = JSOP_BITAND;
1338             tt = TOK_ASSIGN;
1339         } else {
1340             tt = TOK_BITAND;
1341         }
1342         break;
1343
1344       case '=':
1345         if (matchChar(c)) {
1346             tp->t_op = matchChar(c) ? JSOP_STRICTEQ : JSOP_EQ;
1347             tt = TOK_EQOP;
1348         } else {
1349             tp->t_op = JSOP_NOP;
1350             tt = TOK_ASSIGN;
1351         }
1352         break;
1353
1354       case '!':
1355         if (matchChar('=')) {
1356             tp->t_op = matchChar('=') ? JSOP_STRICTNE : JSOP_NE;
1357             tt = TOK_EQOP;
1358         } else {
1359             tp->t_op = JSOP_NOT;
1360             tt = TOK_UNARYOP;
1361         }
1362         break;
1363
1364 #if JS_HAS_XML_SUPPORT
1365       case '@':
1366         tt = TOK_AT;
1367         break;
1368 #endif
1369
1370       case '<':
1371 #if JS_HAS_XML_SUPPORT
1372         /*
1373          * After much testing, it's clear that Postel's advice to protocol
1374          * designers ("be liberal in what you accept, and conservative in what
1375          * you send") invites a natural-law repercussion for JS as "protocol":
1376          *
1377          * "If you are liberal in what you accept, others will utterly fail to
1378          *  be conservative in what they send."
1379          *
1380          * Which means you will get <!-- comments to end of line in the middle
1381          * of .js files, and after if conditions whose then statements are on
1382          * the next line, and other wonders.  See at least the following bugs:
1383          * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1384          * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1385          * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1386          *
1387          * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1388          * an XML comment or CDATA literal.  Instead, we always scan <! as the
1389          * start of an HTML comment hack to end of line, used since Netscape 2
1390          * to hide script tag content from script-unaware browsers.
1391          *
1392          * But this still leaves XML resources with certain internal structure
1393          * vulnerable to being loaded as script cross-origin, and some internal
1394          * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1395          * source consists only of XML literals. See:
1396          *
1397          * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1398          *
1399          * The check for this is in jsparse.cpp, Compiler::compileScript.
1400          */
1401         if ((flags & TSF_OPERAND) &&
1402             (JS_HAS_XML_OPTION(cx) || peekChar() != '!')) {
1403             /* Check for XML comment or CDATA section. */
1404             if (matchChar('!')) {
1405                 tokenbuf.clear();
1406
1407                 /* Scan XML comment. */
1408                 if (matchChar('-')) {
1409                     if (!matchChar('-'))
1410                         goto bad_xml_markup;
1411                     while ((c = getChar()) != '-' || !matchChar('-')) {
1412                         if (c == EOF)
1413                             goto bad_xml_markup;
1414                         if (!tokenbuf.append(c))
1415                             goto error;
1416                     }
1417                     tt = TOK_XMLCOMMENT;
1418                     tp->t_op = JSOP_XMLCOMMENT;
1419                     goto finish_xml_markup;
1420                 }
1421
1422                 /* Scan CDATA section. */
1423                 if (matchChar('[')) {
1424                     jschar cp[6];
1425                     if (peekChars(6, cp) &&
1426                         cp[0] == 'C' &&
1427                         cp[1] == 'D' &&
1428                         cp[2] == 'A' &&
1429                         cp[3] == 'T' &&
1430                         cp[4] == 'A' &&
1431                         cp[5] == '[') {
1432                         skipChars(6);
1433                         while ((c = getChar()) != ']' ||
1434                                !peekChars(2, cp) ||
1435                                cp[0] != ']' ||
1436                                cp[1] != '>') {
1437                             if (c == EOF)
1438                                 goto bad_xml_markup;
1439                             if (!tokenbuf.append(c))
1440                                 goto error;
1441                         }
1442                         getChar();            /* discard ] but not > */
1443                         tt = TOK_XMLCDATA;
1444                         tp->t_op = JSOP_XMLCDATA;
1445                         goto finish_xml_markup;
1446                     }
1447                     goto bad_xml_markup;
1448                 }
1449             }
1450
1451             /* Check for processing instruction. */
1452             if (matchChar('?')) {
1453                 inTarget = JS_TRUE;
1454                 targetLength = 0;
1455                 contentIndex = -1;
1456
1457                 tokenbuf.clear();
1458                 while ((c = getChar()) != '?' || peekChar() != '>') {
1459                     if (c == EOF)
1460                         goto bad_xml_markup;
1461                     if (inTarget) {
1462                         if (JS_ISXMLSPACE(c)) {
1463                             if (tokenbuf.empty())
1464                                 goto bad_xml_markup;
1465                             inTarget = JS_FALSE;
1466                         } else {
1467                             if (!(tokenbuf.empty()
1468                                   ? JS_ISXMLNSSTART(c)
1469                                   : JS_ISXMLNS(c))) {
1470                                 goto bad_xml_markup;
1471                             }
1472                             ++targetLength;
1473                         }
1474                     } else {
1475                         if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1476                             contentIndex = tokenbuf.length();
1477                     }
1478                     if (!tokenbuf.append(c))
1479                         goto error;
1480                 }
1481                 if (targetLength == 0)
1482                     goto bad_xml_markup;
1483                 if (contentIndex < 0) {
1484                     atom = cx->runtime->atomState.emptyAtom;
1485                 } else {
1486                     atom = js_AtomizeChars(cx,
1487                                            tokenbuf.begin() + contentIndex,
1488                                            tokenbuf.length() - contentIndex,
1489                                            0);
1490                     if (!atom)
1491                         goto error;
1492                 }
1493                 tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1494                 tp->t_atom2 = atom;
1495                 tt = TOK_XMLPI;
1496
1497         finish_xml_markup:
1498                 if (!matchChar('>'))
1499                     goto bad_xml_markup;
1500                 atom = atomize(cx, tokenbuf);
1501                 if (!atom)
1502                     goto error;
1503                 tp->t_atom = atom;
1504                 tp->pos.end.lineno = lineno;
1505                 goto out;
1506             }
1507
1508             /* An XML start-of-tag character. */
1509             tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1510             goto out;
1511
1512         bad_xml_markup:
1513             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1514             goto error;
1515         }
1516 #endif /* JS_HAS_XML_SUPPORT */
1517
1518         /* NB: treat HTML begin-comment as comment-till-end-of-line */
1519         if (matchChar('!')) {
1520             if (matchChar('-')) {
1521                 if (matchChar('-')) {
1522                     flags |= TSF_IN_HTML_COMMENT;
1523                     goto skipline;
1524                 }
1525                 ungetChar('-');
1526             }
1527             ungetChar('!');
1528         }
1529         if (matchChar(c)) {
1530             tp->t_op = JSOP_LSH;
1531             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1532         } else {
1533             tp->t_op = matchChar('=') ? JSOP_LE : JSOP_LT;
1534             tt = TOK_RELOP;
1535         }
1536         break;
1537
1538       case '>':
1539         if (matchChar(c)) {
1540             tp->t_op = matchChar(c) ? JSOP_URSH : JSOP_RSH;
1541             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1542         } else {
1543             tp->t_op = matchChar('=') ? JSOP_GE : JSOP_GT;
1544             tt = TOK_RELOP;
1545         }
1546         break;
1547
1548       case '*':
1549         tp->t_op = JSOP_MUL;
1550         tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
1551         break;
1552
1553       case '/':
1554         if (matchChar('/')) {
1555             /*
1556              * Hack for source filters such as the Mozilla XUL preprocessor:
1557              * "//@line 123\n" sets the number of the *next* line after the
1558              * comment to 123.
1559              */
1560             if (JS_HAS_ATLINE_OPTION(cx)) {
1561                 jschar cp[5];
1562                 uintN i, line, temp;
1563                 char filenameBuf[1024];
1564
1565                 if (peekChars(5, cp) &&
1566                     cp[0] == '@' &&
1567                     cp[1] == 'l' &&
1568                     cp[2] == 'i' &&
1569                     cp[3] == 'n' &&
1570                     cp[4] == 'e') {
1571                     skipChars(5);
1572                     while ((c = getChar()) != '\n' && ScanAsSpace((jschar)c))
1573                         continue;
1574                     if (JS7_ISDEC(c)) {
1575                         line = JS7_UNDEC(c);
1576                         while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1577                             temp = 10 * line + JS7_UNDEC(c);
1578                             if (temp < line) {
1579                                 /* Ignore overlarge line numbers. */
1580                                 goto skipline;
1581                             }
1582                             line = temp;
1583                         }
1584                         while (c != '\n' && ScanAsSpace((jschar)c))
1585                             c = getChar();
1586                         i = 0;
1587                         if (c == '"') {
1588                             while ((c = getChar()) != EOF && c != '"') {
1589                                 if (c == '\n') {
1590                                     ungetChar(c);
1591                                     goto skipline;
1592                                 }
1593                                 if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1594                                     goto skipline;
1595                                 filenameBuf[i++] = (char) c;
1596                             }
1597                             if (c == '"') {
1598                                 while ((c = getChar()) != '\n' &&
1599                                        ScanAsSpace((jschar)c)) {
1600                                     continue;
1601                                 }
1602                             }
1603                         }
1604                         filenameBuf[i] = '\0';
1605                         if (c == '\n') {
1606                             if (i > 0) {
1607                                 if (flags & TSF_OWNFILENAME)
1608                                     cx->free((void *) filename);
1609                                 filename = JS_strdup(cx, filenameBuf);
1610                                 if (!filename)
1611                                     goto error;
1612                                 flags |= TSF_OWNFILENAME;
1613                             }
1614                             lineno = line;
1615                         }
1616                     }
1617                     ungetChar(c);
1618                 }
1619             }
1620
1621   skipline:
1622             /* Optimize line skipping if we are not in an HTML comment. */
1623             if (flags & TSF_IN_HTML_COMMENT) {
1624                 while ((c = getChar()) != EOF && c != '\n') {
1625                     if (c == '-' && matchChar('-') && matchChar('>'))
1626                         flags &= ~TSF_IN_HTML_COMMENT;
1627                 }
1628             } else {
1629                 while ((c = getChar()) != EOF && c != '\n')
1630                     continue;
1631             }
1632             ungetChar(c);
1633             cursor = (cursor - 1) & ntokensMask;
1634             goto retry;
1635         }
1636
1637         if (matchChar('*')) {
1638             uintN linenoBefore = lineno;
1639             while ((c = getChar()) != EOF &&
1640                    !(c == '*' && matchChar('/'))) {
1641                 /* Ignore all characters until comment close. */
1642             }
1643             if (c == EOF) {
1644                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1645                                          JSMSG_UNTERMINATED_COMMENT);
1646                 goto error;
1647             }
1648             if ((flags & TSF_NEWLINES) && linenoBefore != lineno) {
1649                 flags &= ~TSF_DIRTYLINE;
1650                 tt = TOK_EOL;
1651                 goto eol_out;
1652             }
1653             cursor = (cursor - 1) & ntokensMask;
1654             goto retry;
1655         }
1656
1657         if (flags & TSF_OPERAND) {
1658             uintN reflags, length;
1659             JSBool inCharClass = JS_FALSE;
1660
1661             tokenbuf.clear();
1662             for (;;) {
1663                 c = getChar();
1664                 if (c == '\n' || c == EOF) {
1665                     ungetChar(c);
1666                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1667                                              JSMSG_UNTERMINATED_REGEXP);
1668                     goto error;
1669                 }
1670                 if (c == '\\') {
1671                     if (!tokenbuf.append(c))
1672                         goto error;
1673                     c = getChar();
1674                 } else if (c == '[') {
1675                     inCharClass = JS_TRUE;
1676                 } else if (c == ']') {
1677                     inCharClass = JS_FALSE;
1678                 } else if (c == '/' && !inCharClass) {
1679                     /* For compat with IE, allow unescaped / in char classes. */
1680                     break;
1681                 }
1682                 if (!tokenbuf.append(c))
1683                     goto error;
1684             }
1685             for (reflags = 0, length = tokenbuf.length() + 1; ; length++) {
1686                 c = peekChar();
1687                 if (c == 'g' && !(reflags & JSREG_GLOB))
1688                     reflags |= JSREG_GLOB;
1689                 else if (c == 'i' && !(reflags & JSREG_FOLD))
1690                     reflags |= JSREG_FOLD;
1691                 else if (c == 'm' && !(reflags & JSREG_MULTILINE))
1692                     reflags |= JSREG_MULTILINE;
1693                 else if (c == 'y' && !(reflags & JSREG_STICKY))
1694                     reflags |= JSREG_STICKY;
1695                 else
1696                     break;
1697                 getChar();
1698             }
1699             c = peekChar();
1700             if (JS7_ISLET(c)) {
1701                 char buf[2] = { '\0' };
1702                 tp->pos.begin.index += length + 1;
1703                 buf[0] = (char)c;
1704                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
1705                                          buf);
1706                 (void) getChar();
1707                 goto error;
1708             }
1709             tp->t_reflags = reflags;
1710             tt = TOK_REGEXP;
1711             break;
1712         }
1713
1714         tp->t_op = JSOP_DIV;
1715         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1716         break;
1717
1718       case '%':
1719         tp->t_op = JSOP_MOD;
1720         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1721         break;
1722
1723       case '~':
1724         tp->t_op = JSOP_BITNOT;
1725         tt = TOK_UNARYOP;
1726         break;
1727
1728       case '+':
1729         if (matchChar('=')) {
1730             tp->t_op = JSOP_ADD;
1731             tt = TOK_ASSIGN;
1732         } else if (matchChar(c)) {
1733             tt = TOK_INC;
1734         } else {
1735             tp->t_op = JSOP_POS;
1736             tt = TOK_PLUS;
1737         }
1738         break;
1739
1740       case '-':
1741         if (matchChar('=')) {
1742             tp->t_op = JSOP_SUB;
1743             tt = TOK_ASSIGN;
1744         } else if (matchChar(c)) {
1745             if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
1746                 flags &= ~TSF_IN_HTML_COMMENT;
1747                 goto skipline;
1748             }
1749             tt = TOK_DEC;
1750         } else {
1751             tp->t_op = JSOP_NEG;
1752             tt = TOK_MINUS;
1753         }
1754         break;
1755
1756 #if JS_HAS_SHARP_VARS
1757       case '#':
1758       {
1759         uint32 n;
1760
1761         c = getChar();
1762         if (!JS7_ISDEC(c)) {
1763             ungetChar(c);
1764             goto badchar;
1765         }
1766         n = (uint32)JS7_UNDEC(c);
1767         for (;;) {
1768             c = getChar();
1769             if (!JS7_ISDEC(c))
1770                 break;
1771             n = 10 * n + JS7_UNDEC(c);
1772             if (n >= UINT16_LIMIT) {
1773                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_SHARPVAR_TOO_BIG);
1774                 goto error;
1775             }
1776         }
1777         tp->t_dval = (jsdouble) n;
1778         if (JS_HAS_STRICT_OPTION(cx) &&
1779             (c == '=' || c == '#')) {
1780             char buf[20];
1781             JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1782             if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1783                                           JSMSG_DEPRECATED_USAGE, buf)) {
1784                 goto error;
1785             }
1786         }
1787         if (c == '=')
1788             tt = TOK_DEFSHARP;
1789         else if (c == '#')
1790             tt = TOK_USESHARP;
1791         else
1792             goto badchar;
1793         break;
1794       }
1795 #endif /* JS_HAS_SHARP_VARS */
1796
1797 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1798       badchar:
1799 #endif
1800
1801       default:
1802         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
1803         goto error;
1804     }
1805
1806   out:
1807     JS_ASSERT(tt != TOK_EOL);
1808     flags |= TSF_DIRTYLINE;
1809
1810   eol_out:
1811     JS_ASSERT(tt < TOK_LIMIT);
1812     tp->pos.end.index = linepos + (linebuf.ptr - linebuf.base) - ungetpos;
1813     tp->type = tt;
1814     return tt;
1815
1816   error:
1817     tt = TOK_ERROR;
1818     flags |= TSF_ERROR;
1819     goto out;
1820 }
1821