layout/style/nsCSSScanner.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6
   7 /* tokenization of CSS style sheets */
   8
   9 #include "nsCSSScanner.h"
  10 #include "nsStyleUtil.h"
  11 #include "nsISupportsImpl.h"
  12 #include "mozilla/ArrayUtils.h"
  13 #include "mozilla/css/ErrorReporter.h"
  14 #include "mozilla/Likely.h"
  15 #include <algorithm>
  16
  17 /* Character class tables and related helper functions. */
  18
  19 static const uint8_t IS_HEX_DIGIT  = 0x01;
  20 static const uint8_t IS_IDSTART    = 0x02;
  21 static const uint8_t IS_IDCHAR     = 0x04;
  22 static const uint8_t IS_URL_CHAR   = 0x08;
  23 static const uint8_t IS_HSPACE     = 0x10;
  24 static const uint8_t IS_VSPACE     = 0x20;
  25 static const uint8_t IS_SPACE      = IS_HSPACE|IS_VSPACE;
  26 static const uint8_t IS_STRING     = 0x40;
  27
  28 #define H    IS_HSPACE
  29 #define V    IS_VSPACE
  30 #define I    IS_IDCHAR
  31 #define J    IS_IDSTART
  32 #define U    IS_URL_CHAR
  33 #define S    IS_STRING
  34 #define X    IS_HEX_DIGIT
  35
  36 #define SH    S|H
  37 #define SU    S|U
  38 #define SUI   S|U|I
  39 #define SUIJ  S|U|I|J
  40 #define SUIX  S|U|I|X
  41 #define SUIJX S|U|I|J|X
  42
  43 static const uint8_t gLexTable[] = {
  44 // 00    01    02    03    04    05    06    07
  45     0,    S,    S,    S,    S,    S,    S,    S,
  46 // 08   TAB    LF    0B    FF    CR    0E    0F
  47     S,   SH,    V,    S,    V,    V,    S,    S,
  48 // 10    11    12    13    14    15    16    17
  49     S,    S,    S,    S,    S,    S,    S,    S,
  50 // 18    19    1A    1B    1C    1D    1E    1F
  51     S,    S,    S,    S,    S,    S,    S,    S,
  52 //SPC     !     "     #     $     %     &     '
  53    SH,   SU,    0,   SU,   SU,   SU,   SU,    0,
  54 //  (     )     *     +     ,     -     .     /
  55     S,    S,   SU,   SU,   SU,  SUI,   SU,   SU,
  56 //  0     1     2     3     4     5     6     7
  57  SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
  58 //  8     9     :     ;     <     =     >     ?
  59  SUIX, SUIX,   SU,   SU,   SU,   SU,   SU,   SU,
  60 //  @     A     B     C     D     E     F     G
  61    SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
  62 //  H     I     J     K     L     M     N     O
  63  SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  64 //  P     Q     R     S     T     U     V     W
  65  SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  66 //  X     Y     Z     [     \     ]     ^     _
  67  SUIJ, SUIJ, SUIJ,   SU,    J,   SU,   SU, SUIJ,
  68 //  `     a     b     c     d     e     f     g
  69    SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
  70 //  h     i     j     k     l     m     n     o
  71  SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  72 //  p     q     r     s     t     u     v     w
  73  SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
  74 //  x     y     z     {     |     }     ~    7F
  75  SUIJ, SUIJ, SUIJ,   SU,   SU,   SU,   SU,    S,
  76 };
  77
  78 static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
  79               "gLexTable expected to cover all 128 ASCII characters");
  80
  81 #undef I
  82 #undef J
  83 #undef U
  84 #undef S
  85 #undef X
  86 #undef SH
  87 #undef SU
  88 #undef SUI
  89 #undef SUIJ
  90 #undef SUIX
  91 #undef SUIJX
  92
  93 /**
  94  * True if 'ch' is in character class 'cls', which should be one of
  95  * the constants above or some combination of them.  All characters
  96  * above U+007F are considered to be in 'cls'.  EOF is never in 'cls'.
  97  */
  98 static inline bool
  99 IsOpenCharClass(int32_t ch, uint8_t cls) {
 100   return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
 101 }
 102
 103 /**
 104  * True if 'ch' is in character class 'cls', which should be one of
 105  * the constants above or some combination of them.  No characters
 106  * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
 107  */
 108 static inline bool
 109 IsClosedCharClass(int32_t ch, uint8_t cls) {
 110   return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
 111 }
 112
 113 /**
 114  * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
 115  * TAB, LF, FF, CR, or SPC.
 116  */
 117 static inline bool
 118 IsWhitespace(int32_t ch) {
 119   return IsClosedCharClass(ch, IS_SPACE);
 120 }
 121
 122 /**
 123  * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
 124  */
 125 static inline bool
 126 IsHorzSpace(int32_t ch) {
 127   return IsClosedCharClass(ch, IS_HSPACE);
 128 }
 129
 130 /**
 131  * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR.  Vertical
 132  * whitespace requires special handling when consumed, see AdvanceLine.
 133  */
 134 static inline bool
 135 IsVertSpace(int32_t ch) {
 136   return IsClosedCharClass(ch, IS_VSPACE);
 137 }
 138
 139 /**
 140  * True if 'ch' is a character that can appear in the middle of an identifier.
 141  * This includes U+0000 since it is handled as U+FFFD, but for purposes of
 142  * GatherText it should not be included in IsOpenCharClass.
 143  */
 144 static inline bool
 145 IsIdentChar(int32_t ch) {
 146   return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
 147 }
 148
 149 /**
 150  * True if 'ch' is a character that by itself begins an identifier.
 151  * This includes U+0000 since it is handled as U+FFFD, but for purposes of
 152  * GatherText it should not be included in IsOpenCharClass.
 153  * (This is a subset of IsIdentChar.)
 154  */
 155 static inline bool
 156 IsIdentStart(int32_t ch) {
 157   return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
 158 }
 159
 160 /**
 161  * True if the two-character sequence aFirstChar+aSecondChar begins an
 162  * identifier.
 163  */
 164 static inline bool
 165 StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
 166 {
 167   return IsIdentStart(aFirstChar) ||
 168     (aFirstChar == '-' && IsIdentStart(aSecondChar));
 169 }
 170
 171 /**
 172  * True if 'ch' is a decimal digit.
 173  */
 174 static inline bool
 175 IsDigit(int32_t ch) {
 176   return (ch >= '0') && (ch <= '9');
 177 }
 178
 179 /**
 180  * True if 'ch' is a hexadecimal digit.
 181  */
 182 static inline bool
 183 IsHexDigit(int32_t ch) {
 184   return IsClosedCharClass(ch, IS_HEX_DIGIT);
 185 }
 186
 187 /**
 188  * Assuming that 'ch' is a decimal digit, return its numeric value.
 189  */
 190 static inline uint32_t
 191 DecimalDigitValue(int32_t ch)
 192 {
 193   return ch - '0';
 194 }
 195
 196 /**
 197  * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
 198  */
 199 static inline uint32_t
 200 HexDigitValue(int32_t ch)
 201 {
 202   if (IsDigit(ch)) {
 203     return DecimalDigitValue(ch);
 204   } else {
 205     // Note: c&7 just keeps the low three bits which causes
 206     // upper and lower case alphabetics to both yield their
 207     // "relative to 10" value for computing the hex value.
 208     return (ch & 0x7) + 9;
 209   }
 210 }
 211
 212 /**
 213  * If 'ch' can be the first character of a two-character match operator
 214  * token, return the token type code for that token, otherwise return
 215  * eCSSToken_Symbol to indicate that it can't.
 216  */
 217 static inline nsCSSTokenType
 218 MatchOperatorType(int32_t ch)
 219 {
 220   switch (ch) {
 221   case '~': return eCSSToken_Includes;
 222   case '|': return eCSSToken_Dashmatch;
 223   case '^': return eCSSToken_Beginsmatch;
 224   case '$': return eCSSToken_Endsmatch;
 225   case '*': return eCSSToken_Containsmatch;
 226   default:  return eCSSToken_Symbol;
 227   }
 228 }
 229
 230 /* Out-of-line nsCSSToken methods. */
 231
 232 /**
 233  * Append the textual representation of |this| to |aBuffer|.
 234  */
 235 void
 236 nsCSSToken::AppendToString(nsString& aBuffer) const
 237 {
 238   switch (mType) {
 239     case eCSSToken_Ident:
 240       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
 241       break;
 242
 243     case eCSSToken_AtKeyword:
 244       aBuffer.Append('@');
 245       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
 246       break;
 247
 248     case eCSSToken_ID:
 249     case eCSSToken_Hash:
 250       aBuffer.Append('#');
 251       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
 252       break;
 253
 254     case eCSSToken_Function:
 255       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
 256       aBuffer.Append('(');
 257       break;
 258
 259     case eCSSToken_URL:
 260     case eCSSToken_Bad_URL:
 261       aBuffer.AppendLiteral("url(");
 262       if (mSymbol != char16_t(0)) {
 263         nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
 264       } else {
 265         aBuffer.Append(mIdent);
 266       }
 267       if (mType == eCSSToken_URL) {
 268         aBuffer.Append(char16_t(')'));
 269       }
 270       break;
 271
 272     case eCSSToken_Number:
 273       if (mIntegerValid) {
 274         aBuffer.AppendInt(mInteger, 10);
 275       } else {
 276         aBuffer.AppendFloat(mNumber);
 277       }
 278       break;
 279
 280     case eCSSToken_Percentage:
 281       aBuffer.AppendFloat(mNumber * 100.0f);
 282       aBuffer.Append(char16_t('%'));
 283       break;
 284
 285     case eCSSToken_Dimension:
 286       if (mIntegerValid) {
 287         aBuffer.AppendInt(mInteger, 10);
 288       } else {
 289         aBuffer.AppendFloat(mNumber);
 290       }
 291       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
 292       break;
 293
 294     case eCSSToken_Bad_String:
 295       nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
 296       // remove the trailing quote character
 297       aBuffer.Truncate(aBuffer.Length() - 1);
 298       break;
 299
 300     case eCSSToken_String:
 301       nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
 302       break;
 303
 304     case eCSSToken_Symbol:
 305       aBuffer.Append(mSymbol);
 306       break;
 307
 308     case eCSSToken_Whitespace:
 309       aBuffer.Append(' ');
 310       break;
 311
 312     case eCSSToken_HTMLComment:
 313     case eCSSToken_URange:
 314       aBuffer.Append(mIdent);
 315       break;
 316
 317     case eCSSToken_Includes:
 318       aBuffer.AppendLiteral("~=");
 319       break;
 320     case eCSSToken_Dashmatch:
 321       aBuffer.AppendLiteral("|=");
 322       break;
 323     case eCSSToken_Beginsmatch:
 324       aBuffer.AppendLiteral("^=");
 325       break;
 326     case eCSSToken_Endsmatch:
 327       aBuffer.AppendLiteral("$=");
 328       break;
 329     case eCSSToken_Containsmatch:
 330       aBuffer.AppendLiteral("*=");
 331       break;
 332
 333     default:
 334       NS_ERROR("invalid token type");
 335       break;
 336   }
 337 }
 338
 339 /* nsCSSScanner methods. */
 340
 341 nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
 342   : mBuffer(aBuffer.BeginReading())
 343   , mOffset(0)
 344   , mCount(aBuffer.Length())
 345   , mLineNumber(aLineNumber)
 346   , mLineOffset(0)
 347   , mTokenLineNumber(aLineNumber)
 348   , mTokenLineOffset(0)
 349   , mTokenOffset(0)
 350   , mRecordStartOffset(0)
 351   , mEOFCharacters(eEOFCharacters_None)
 352   , mReporter(nullptr)
 353   , mSVGMode(false)
 354   , mRecording(false)
 355   , mSeenBadToken(false)
 356   , mSeenVariableReference(false)
 357 {
 358   MOZ_COUNT_CTOR(nsCSSScanner);
 359 }
 360
 361 nsCSSScanner::~nsCSSScanner()
 362 {
 363   MOZ_COUNT_DTOR(nsCSSScanner);
 364 }
 365
 366 void
 367 nsCSSScanner::StartRecording()
 368 {
 369   MOZ_ASSERT(!mRecording, "already started recording");
 370   mRecording = true;
 371   mRecordStartOffset = mOffset;
 372 }
 373
 374 void
 375 nsCSSScanner::StopRecording()
 376 {
 377   MOZ_ASSERT(mRecording, "haven't started recording");
 378   mRecording = false;
 379 }
 380
 381 void
 382 nsCSSScanner::StopRecording(nsString& aBuffer)
 383 {
 384   MOZ_ASSERT(mRecording, "haven't started recording");
 385   mRecording = false;
 386   aBuffer.Append(mBuffer + mRecordStartOffset,
 387                  mOffset - mRecordStartOffset);
 388 }
 389
 390 uint32_t
 391 nsCSSScanner::RecordingLength() const
 392 {
 393   MOZ_ASSERT(mRecording, "haven't started recording");
 394   return mOffset - mRecordStartOffset;
 395 }
 396
 397 #ifdef DEBUG
 398 bool
 399 nsCSSScanner::IsRecording() const
 400 {
 401   return mRecording;
 402 }
 403 #endif
 404
 405 nsDependentSubstring
 406 nsCSSScanner::GetCurrentLine() const
 407 {
 408   uint32_t end = mTokenOffset;
 409   while (end < mCount && !IsVertSpace(mBuffer[end])) {
 410     end++;
 411   }
 412   return nsDependentSubstring(mBuffer + mTokenLineOffset,
 413                               mBuffer + end);
 414 }
 415
 416 /**
 417  * Return the raw UTF-16 code unit at position |mOffset + n| within
 418  * the read buffer.  If that is beyond the end of the buffer, returns
 419  * -1 to indicate end of input.
 420  */
 421 inline int32_t
 422 nsCSSScanner::Peek(uint32_t n)
 423 {
 424   if (mOffset + n >= mCount) {
 425     return -1;
 426   }
 427   return mBuffer[mOffset + n];
 428 }
 429
 430 /**
 431  * Advance |mOffset| over |n| code units.  Advance(0) is a no-op.
 432  * If |n| is greater than the distance to end of input, will silently
 433  * stop at the end.  May not be used to advance over a line boundary;
 434  * AdvanceLine() must be used instead.
 435  */
 436 inline void
 437 nsCSSScanner::Advance(uint32_t n)
 438 {
 439 #ifdef DEBUG
 440   while (mOffset < mCount && n > 0) {
 441     MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
 442                "may not Advance() over a line boundary");
 443     mOffset++;
 444     n--;
 445   }
 446 #else
 447   if (mOffset + n >= mCount || mOffset + n < mOffset)
 448     mOffset = mCount;
 449   else
 450     mOffset += n;
 451 #endif
 452 }
 453
 454 /**
 455  * Advance |mOffset| over a line boundary.
 456  */
 457 void
 458 nsCSSScanner::AdvanceLine()
 459 {
 460   MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
 461              "may not AdvanceLine() over a horizontal character");
 462   // Advance over \r\n as a unit.
 463   if (mBuffer[mOffset]   == '\r' && mOffset + 1 < mCount &&
 464       mBuffer[mOffset+1] == '\n')
 465     mOffset += 2;
 466   else
 467     mOffset += 1;
 468   // 0 is a magical line number meaning that we don't know (i.e., script)
 469   if (mLineNumber != 0)
 470     mLineNumber++;
 471   mLineOffset = mOffset;
 472 }
 473
 474 /**
 475  * Back up |mOffset| over |n| code units.  Backup(0) is a no-op.
 476  * If |n| is greater than the distance to beginning of input, will
 477  * silently stop at the beginning.  May not be used to back up over a
 478  * line boundary.
 479  */
 480 void
 481 nsCSSScanner::Backup(uint32_t n)
 482 {
 483 #ifdef DEBUG
 484   while (mOffset > 0 && n > 0) {
 485     MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
 486                "may not Backup() over a line boundary");
 487     mOffset--;
 488     n--;
 489   }
 490 #else
 491   if (mOffset < n)
 492     mOffset = 0;
 493   else
 494     mOffset -= n;
 495 #endif
 496 }
 497
 498 void
 499 nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
 500 {
 501   aState.mOffset = mOffset;
 502   aState.mLineNumber = mLineNumber;
 503   aState.mLineOffset = mLineOffset;
 504   aState.mTokenLineNumber = mTokenLineNumber;
 505   aState.mTokenLineOffset = mTokenLineOffset;
 506   aState.mTokenOffset = mTokenOffset;
 507   aState.mInitialized = true;
 508 }
 509
 510 void
 511 nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
 512 {
 513   MOZ_ASSERT(aState.mInitialized, "have not saved state");
 514   if (aState.mInitialized) {
 515     mOffset = aState.mOffset;
 516     mLineNumber = aState.mLineNumber;
 517     mLineOffset = aState.mLineOffset;
 518     mTokenLineNumber = aState.mTokenLineNumber;
 519     mTokenLineOffset = aState.mTokenLineOffset;
 520     mTokenOffset = aState.mTokenOffset;
 521   }
 522 }
 523
 524 /**
 525  * Skip over a sequence of whitespace characters (vertical or
 526  * horizontal) starting at the current read position.
 527  */
 528 void
 529 nsCSSScanner::SkipWhitespace()
 530 {
 531   for (;;) {
 532     int32_t ch = Peek();
 533     if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
 534       break;
 535     }
 536     if (IsVertSpace(ch)) {
 537       AdvanceLine();
 538     } else {
 539       Advance();
 540     }
 541   }
 542 }
 543
 544 /**
 545  * Skip over one CSS comment starting at the current read position.
 546  */
 547 void
 548 nsCSSScanner::SkipComment()
 549 {
 550   MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
 551   Advance(2);
 552   for (;;) {
 553     int32_t ch = Peek();
 554     if (ch < 0) {
 555       mReporter->ReportUnexpectedEOF("PECommentEOF");
 556       SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
 557       return;
 558     }
 559     if (ch == '*') {
 560       Advance();
 561       ch = Peek();
 562       if (ch < 0) {
 563         mReporter->ReportUnexpectedEOF("PECommentEOF");
 564         SetEOFCharacters(eEOFCharacters_Slash);
 565         return;
 566       }
 567       if (ch == '/') {
 568         Advance();
 569         return;
 570       }
 571     } else if (IsVertSpace(ch)) {
 572       AdvanceLine();
 573     } else {
 574       Advance();
 575     }
 576   }
 577 }
 578
 579 /**
 580  * If there is a valid escape sequence starting at the current read
 581  * position, consume it, decode it, append the result to |aOutput|,
 582  * and return true.  Otherwise, consume nothing, leave |aOutput|
 583  * unmodified, and return false.  If |aInString| is true, accept the
 584  * additional form of escape sequence allowed within string-like tokens.
 585  */
 586 bool
 587 nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
 588 {
 589   MOZ_ASSERT(Peek() == '\\', "should not have been called");
 590   int32_t ch = Peek(1);
 591   if (ch < 0) {
 592     // If we are in a string (or a url() containing a string), we want to drop
 593     // the backslash on the floor.  Otherwise, we want to treat it as a U+FFFD
 594     // character.
 595     Advance();
 596     if (aInString) {
 597       SetEOFCharacters(eEOFCharacters_DropBackslash);
 598     } else {
 599       aOutput.Append(UCS2_REPLACEMENT_CHAR);
 600       SetEOFCharacters(eEOFCharacters_ReplacementChar);
 601     }
 602     return true;
 603   }
 604   if (IsVertSpace(ch)) {
 605     if (aInString) {
 606       // In strings (and in url() containing a string), escaped
 607       // newlines are completely removed, to allow splitting over
 608       // multiple lines.
 609       Advance();
 610       AdvanceLine();
 611       return true;
 612     }
 613     // Outside of strings, backslash followed by a newline is not an escape.
 614     return false;
 615   }
 616
 617   if (!IsHexDigit(ch)) {
 618     // "Any character (except a hexadecimal digit, linefeed, carriage
 619     // return, or form feed) can be escaped with a backslash to remove
 620     // its special meaning." -- CSS2.1 section 4.1.3
 621     Advance(2);
 622     if (ch == 0) {
 623       aOutput.Append(UCS2_REPLACEMENT_CHAR);
 624     } else {
 625       aOutput.Append(ch);
 626     }
 627     return true;
 628   }
 629
 630   // "[at most six hexadecimal digits following a backslash] stand
 631   // for the ISO 10646 character with that number, which must not be
 632   // zero. (It is undefined in CSS 2.1 what happens if a style sheet
 633   // does contain a character with Unicode codepoint zero.)"
 634   //   -- CSS2.1 section 4.1.3
 635
 636   // At this point we know we have \ followed by at least one
 637   // hexadecimal digit, therefore the escape sequence is valid and we
 638   // can go ahead and consume the backslash.
 639   Advance();
 640   uint32_t val = 0;
 641   int i = 0;
 642   do {
 643     val = val * 16 + HexDigitValue(ch);
 644     i++;
 645     Advance();
 646     ch = Peek();
 647   } while (i < 6 && IsHexDigit(ch));
 648
 649   // "Interpret the hex digits as a hexadecimal number. If this number is zero,
 650   // or is greater than the maximum allowed codepoint, return U+FFFD
 651   // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
 652   if (MOZ_UNLIKELY(val == 0)) {
 653     aOutput.Append(UCS2_REPLACEMENT_CHAR);
 654   } else {
 655     AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
 656   }
 657
 658   // Consume exactly one whitespace character after a
 659   // hexadecimal escape sequence.
 660   if (IsVertSpace(ch)) {
 661     AdvanceLine();
 662   } else if (IsHorzSpace(ch)) {
 663     Advance();
 664   }
 665   return true;
 666 }
 667
 668 /**
 669  * Consume a run of "text" beginning with the current read position,
 670  * consisting of characters in the class |aClass| (which must be a
 671  * suitable argument to IsOpenCharClass) plus escape sequences.
 672  * Append the text to |aText|, after decoding escape sequences.
 673  *
 674  * Returns true if at least one character was appended to |aText|,
 675  * false otherwise.
 676  */
 677 bool
 678 nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
 679 {
 680   // This is all of the character classes currently used with
 681   // GatherText.  If you have a need to use this function with a
 682   // different class, go ahead and add it.
 683   MOZ_ASSERT(aClass == IS_STRING ||
 684              aClass == IS_IDCHAR ||
 685              aClass == IS_URL_CHAR,
 686              "possibly-inappropriate character class");
 687
 688   uint32_t start = mOffset;
 689   bool inString = aClass == IS_STRING;
 690
 691   for (;;) {
 692     // Consume runs of unescaped characters in one go.
 693     uint32_t n = mOffset;
 694     while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
 695       n++;
 696     }
 697     if (n > mOffset) {
 698       aText.Append(&mBuffer[mOffset], n - mOffset);
 699       mOffset = n;
 700     }
 701     if (n == mCount) {
 702       break;
 703     }
 704
 705     int32_t ch = Peek();
 706     MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
 707                "should not have exited the inner loop");
 708     if (ch == 0) {
 709       Advance();
 710       aText.Append(UCS2_REPLACEMENT_CHAR);
 711       continue;
 712     }
 713
 714     if (ch != '\\') {
 715       break;
 716     }
 717     if (!GatherEscape(aText, inString)) {
 718       break;
 719     }
 720   }
 721
 722   return mOffset > start;
 723 }
 724
 725 /**
 726  * Scan an Ident token.  This also handles Function and URL tokens,
 727  * both of which begin indistinguishably from an identifier.  It can
 728  * produce a Symbol token when an apparent identifier actually led
 729  * into an invalid escape sequence.
 730  */
 731 bool
 732 nsCSSScanner::ScanIdent(nsCSSToken& aToken)
 733 {
 734   if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
 735     MOZ_ASSERT(Peek() == '\\',
 736                "unexpected IsIdentStart character that did not begin an ident");
 737     aToken.mSymbol = Peek();
 738     Advance();
 739     return true;
 740   }
 741
 742   if (MOZ_LIKELY(Peek() != '(')) {
 743     aToken.mType = eCSSToken_Ident;
 744     return true;
 745   }
 746
 747   Advance();
 748   aToken.mType = eCSSToken_Function;
 749   if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
 750     NextURL(aToken);
 751   } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
 752     mSeenVariableReference = true;
 753   }
 754   return true;
 755 }
 756
 757 /**
 758  * Scan an AtKeyword token.  Also handles production of Symbol when
 759  * an '@' is not followed by an identifier.
 760  */
 761 bool
 762 nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
 763 {
 764   MOZ_ASSERT(Peek() == '@', "should not have been called");
 765
 766   // Fall back for when '@' isn't followed by an identifier.
 767   aToken.mSymbol = '@';
 768   Advance();
 769
 770   int32_t ch = Peek();
 771   if (StartsIdent(ch, Peek(1))) {
 772     if (GatherText(IS_IDCHAR, aToken.mIdent)) {
 773        aToken.mType = eCSSToken_AtKeyword;
 774      }
 775   }
 776   return true;
 777 }
 778
 779 /**
 780  * Scan a Hash token.  Handles the distinction between eCSSToken_ID
 781  * and eCSSToken_Hash, and handles production of Symbol when a '#'
 782  * is not followed by identifier characters.
 783  */
 784 bool
 785 nsCSSScanner::ScanHash(nsCSSToken& aToken)
 786 {
 787   MOZ_ASSERT(Peek() == '#', "should not have been called");
 788
 789   // Fall back for when '#' isn't followed by identifier characters.
 790   aToken.mSymbol = '#';
 791   Advance();
 792
 793   int32_t ch = Peek();
 794   if (IsIdentChar(ch) || ch == '\\') {
 795     nsCSSTokenType type =
 796       StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
 797     aToken.mIdent.SetLength(0);
 798     if (GatherText(IS_IDCHAR, aToken.mIdent)) {
 799       aToken.mType = type;
 800     }
 801   }
 802
 803   return true;
 804 }
 805
 806 /**
 807  * Scan a Number, Percentage, or Dimension token (all of which begin
 808  * like a Number).  Can produce a Symbol when a '.' is not followed by
 809  * digits, or when '+' or '-' are not followed by either a digit or a
 810  * '.' and then a digit.  Can also produce a HTMLComment when it
 811  * encounters '-->'.
 812  */
 813 bool
 814 nsCSSScanner::ScanNumber(nsCSSToken& aToken)
 815 {
 816   int32_t c = Peek();
 817 #ifdef DEBUG
 818   {
 819     int32_t c2 = Peek(1);
 820     int32_t c3 = Peek(2);
 821     MOZ_ASSERT(IsDigit(c) ||
 822                (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
 823                (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
 824                "should not have been called");
 825   }
 826 #endif
 827
 828   // Sign of the mantissa (-1 or 1).
 829   int32_t sign = c == '-' ? -1 : 1;
 830   // Absolute value of the integer part of the mantissa.  This is a double so
 831   // we don't run into overflow issues for consumers that only care about our
 832   // floating-point value while still being able to express the full int32_t
 833   // range for consumers who want integers.
 834   double intPart = 0;
 835   // Fractional part of the mantissa.  This is a double so that when we convert
 836   // to float at the end we'll end up rounding to nearest float instead of
 837   // truncating down (as we would if fracPart were a float and we just
 838   // effectively lost the last several digits).
 839   double fracPart = 0;
 840   // Absolute value of the power of 10 that we should multiply by (only
 841   // relevant for numbers in scientific notation).  Has to be a signed integer,
 842   // because multiplication of signed by unsigned converts the unsigned to
 843   // signed, so if we plan to actually multiply by expSign...
 844   int32_t exponent = 0;
 845   // Sign of the exponent.
 846   int32_t expSign = 1;
 847
 848   aToken.mHasSign = (c == '+' || c == '-');
 849   if (aToken.mHasSign) {
 850     Advance();
 851     c = Peek();
 852   }
 853
 854   bool gotDot = (c == '.');
 855
 856   if (!gotDot) {
 857     // Scan the integer part of the mantissa.
 858     MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
 859     do {
 860       intPart = 10*intPart + DecimalDigitValue(c);
 861       Advance();
 862       c = Peek();
 863     } while (IsDigit(c));
 864
 865     gotDot = (c == '.') && IsDigit(Peek(1));
 866   }
 867
 868   if (gotDot) {
 869     // Scan the fractional part of the mantissa.
 870     Advance();
 871     c = Peek();
 872     MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
 873     // Power of ten by which we need to divide our next digit
 874     double divisor = 10;
 875     do {
 876       fracPart += DecimalDigitValue(c) / divisor;
 877       divisor *= 10;
 878       Advance();
 879       c = Peek();
 880     } while (IsDigit(c));
 881   }
 882
 883   bool gotE = false;
 884   if (c == 'e' || c == 'E') {
 885     int32_t expSignChar = Peek(1);
 886     int32_t nextChar = Peek(2);
 887     if (IsDigit(expSignChar) ||
 888         ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
 889       gotE = true;
 890       if (expSignChar == '-') {
 891         expSign = -1;
 892       }
 893       Advance(); // consumes the E
 894       if (expSignChar == '-' || expSignChar == '+') {
 895         Advance();
 896         c = nextChar;
 897       } else {
 898         c = expSignChar;
 899       }
 900       MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
 901       do {
 902         exponent = 10*exponent + DecimalDigitValue(c);
 903         Advance();
 904         c = Peek();
 905       } while (IsDigit(c));
 906     }
 907   }
 908
 909   nsCSSTokenType type = eCSSToken_Number;
 910
 911   // Set mIntegerValid for all cases (except %, below) because we need
 912   // it for the "2n" in :nth-child(2n).
 913   aToken.mIntegerValid = false;
 914
 915   // Time to reassemble our number.
 916   // Do all the math in double precision so it's truncated only once.
 917   double value = sign * (intPart + fracPart);
 918   if (gotE) {
 919     // Explicitly cast expSign*exponent to double to avoid issues with
 920     // overloaded pow() on Windows.
 921     value *= pow(10.0, double(expSign * exponent));
 922   } else if (!gotDot) {
 923     // Clamp values outside of integer range.
 924     if (sign > 0) {
 925       aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
 926     } else {
 927       aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
 928     }
 929     aToken.mIntegerValid = true;
 930   }
 931
 932   nsString& ident = aToken.mIdent;
 933
 934   // Check for Dimension and Percentage tokens.
 935   if (c >= 0) {
 936     if (StartsIdent(c, Peek(1))) {
 937       if (GatherText(IS_IDCHAR, ident)) {
 938         type = eCSSToken_Dimension;
 939       }
 940     } else if (c == '%') {
 941       Advance();
 942       type = eCSSToken_Percentage;
 943       value = value / 100.0f;
 944       aToken.mIntegerValid = false;
 945     }
 946   }
 947   aToken.mNumber = value;
 948   aToken.mType = type;
 949   return true;
 950 }
 951
 952 /**
 953  * Scan a string constant ('foo' or "foo").  Will always produce
 954  * either a String or a Bad_String token; the latter occurs when the
 955  * close quote is missing.  Always returns true (for convenience in Next()).
 956  */
 957 bool
 958 nsCSSScanner::ScanString(nsCSSToken& aToken)
 959 {
 960   int32_t aStop = Peek();
 961   MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
 962   aToken.mType = eCSSToken_String;
 963   aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
 964   Advance();
 965
 966   for (;;) {
 967     GatherText(IS_STRING, aToken.mIdent);
 968
 969     int32_t ch = Peek();
 970     if (ch == -1) {
 971       AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
 972                                       eEOFCharacters_SingleQuote);
 973       break; // EOF ends a string token with no error.
 974     }
 975     if (ch == aStop) {
 976       Advance();
 977       break;
 978     }
 979     // Both " and ' are excluded from IS_STRING.
 980     if (ch == '"' || ch == '\'') {
 981       aToken.mIdent.Append(ch);
 982       Advance();
 983       continue;
 984     }
 985
 986     mSeenBadToken = true;
 987     aToken.mType = eCSSToken_Bad_String;
 988     mReporter->ReportUnexpected("SEUnterminatedString", aToken);
 989     break;
 990   }
 991   return true;
 992 }
 993
 994 /**
 995  * Scan a unicode-range token.  These match the regular expression
 996  *
 997  *     u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
 998  *
 999  * However, some such tokens are "invalid".  There are three valid forms:
1000  *
1001  *     u+[0-9a-f]{x}              1 <= x <= 6
1002  *     u+[0-9a-f]{x}\?{y}         1 <= x+y <= 6
1003  *     u+[0-9a-f]{x}-[0-9a-f]{y}  1 <= x <= 6, 1 <= y <= 6
1004  *
1005  * All unicode-range tokens have their text recorded in mIdent; valid ones
1006  * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
1007  * Note that this does not validate the numeric range, only the syntactic
1008  * form.
1009  */
1010 bool
1011 nsCSSScanner::ScanURange(nsCSSToken& aResult)
1012 {
1013   int32_t intro1 = Peek();
1014   int32_t intro2 = Peek(1);
1015   int32_t ch = Peek(2);
1016
1017   MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
1018              intro2 == '+' &&
1019              (IsHexDigit(ch) || ch == '?'),
1020              "should not have been called");
1021
1022   aResult.mIdent.Append(intro1);
1023   aResult.mIdent.Append(intro2);
1024   Advance(2);
1025
1026   bool valid = true;
1027   bool haveQues = false;
1028   uint32_t low = 0;
1029   uint32_t high = 0;
1030   int i = 0;
1031
1032   do {
1033     aResult.mIdent.Append(ch);
1034     if (IsHexDigit(ch)) {
1035       if (haveQues) {
1036         valid = false; // All question marks should be at the end.
1037       }
1038       low = low*16 + HexDigitValue(ch);
1039       high = high*16 + HexDigitValue(ch);
1040     } else {
1041       haveQues = true;
1042       low = low*16 + 0x0;
1043       high = high*16 + 0xF;
1044     }
1045
1046     i++;
1047     Advance();
1048     ch = Peek();
1049   } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
1050
1051   if (ch == '-' && IsHexDigit(Peek(1))) {
1052     if (haveQues) {
1053       valid = false;
1054     }
1055
1056     aResult.mIdent.Append(ch);
1057     Advance();
1058     ch = Peek();
1059     high = 0;
1060     i = 0;
1061     do {
1062       aResult.mIdent.Append(ch);
1063       high = high*16 + HexDigitValue(ch);
1064
1065       i++;
1066       Advance();
1067       ch = Peek();
1068     } while (i < 6 && IsHexDigit(ch));
1069   }
1070
1071   aResult.mInteger = low;
1072   aResult.mInteger2 = high;
1073   aResult.mIntegerValid = valid;
1074   aResult.mType = eCSSToken_URange;
1075   return true;
1076 }
1077
1078 #ifdef DEBUG
1079 /* static */ void
1080 nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
1081 {
1082   MOZ_ASSERT(c == eEOFCharacters_None ||
1083              c == eEOFCharacters_ReplacementChar ||
1084              c == eEOFCharacters_Slash ||
1085              c == (eEOFCharacters_Asterisk |
1086                    eEOFCharacters_Slash) ||
1087              c == eEOFCharacters_DoubleQuote ||
1088              c == eEOFCharacters_SingleQuote ||
1089              c == (eEOFCharacters_DropBackslash |
1090                    eEOFCharacters_DoubleQuote) ||
1091              c == (eEOFCharacters_DropBackslash |
1092                    eEOFCharacters_SingleQuote) ||
1093              c == eEOFCharacters_CloseParen ||
1094              c == (eEOFCharacters_ReplacementChar |
1095                    eEOFCharacters_CloseParen) ||
1096              c == (eEOFCharacters_DoubleQuote |
1097                    eEOFCharacters_CloseParen) ||
1098              c == (eEOFCharacters_SingleQuote |
1099                    eEOFCharacters_CloseParen) ||
1100              c == (eEOFCharacters_DropBackslash |
1101                    eEOFCharacters_DoubleQuote |
1102                    eEOFCharacters_CloseParen) ||
1103              c == (eEOFCharacters_DropBackslash |
1104                    eEOFCharacters_SingleQuote |
1105                    eEOFCharacters_CloseParen),
1106              "invalid EOFCharacters value");
1107 }
1108 #endif
1109
1110 void
1111 nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
1112 {
1113   mEOFCharacters = EOFCharacters(aEOFCharacters);
1114 }
1115
1116 void
1117 nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
1118 {
1119   mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
1120 }
1121
1122 static const char16_t kImpliedEOFCharacters[] = {
1123   UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
1124 };
1125
1126 /* static */ void
1127 nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
1128                                          nsAString& aResult)
1129 {
1130   // First, ignore eEOFCharacters_DropBackslash.
1131   uint32_t c = aEOFCharacters >> 1;
1132
1133   // All of the remaining EOFCharacters bits represent appended characters,
1134   // and the bits are in the order that they need appending.
1135   for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
1136     if (c & 1) {
1137       aResult.Append(*p);
1138     }
1139   }
1140
1141   MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
1142 }
1143
1144 /**
1145  * Consume the part of an URL token after the initial 'url('.  Caller
1146  * is assumed to have consumed 'url(' already.  Will always produce
1147  * either an URL or a Bad_URL token.
1148  *
1149  * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
1150  * the special lexical rules for URL tokens in a nonstandard context.
1151  */
1152 void
1153 nsCSSScanner::NextURL(nsCSSToken& aToken)
1154 {
1155   SkipWhitespace();
1156
1157   // aToken.mIdent may be "url" at this point; clear that out
1158   aToken.mIdent.Truncate();
1159
1160   int32_t ch = Peek();
1161   // Do we have a string?
1162   if (ch == '"' || ch == '\'') {
1163     ScanString(aToken);
1164     if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
1165       aToken.mType = eCSSToken_Bad_URL;
1166       return;
1167     }
1168     MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
1169
1170   } else {
1171     // Otherwise, this is the start of a non-quoted url (which may be empty).
1172     aToken.mSymbol = char16_t(0);
1173     GatherText(IS_URL_CHAR, aToken.mIdent);
1174   }
1175
1176   // Consume trailing whitespace and then look for a close parenthesis.
1177   SkipWhitespace();
1178   ch = Peek();
1179   // ch can be less than zero indicating EOF
1180   if (MOZ_LIKELY(ch < 0 || ch == ')')) {
1181     Advance();
1182     aToken.mType = eCSSToken_URL;
1183     if (ch < 0) {
1184       AddEOFCharacters(eEOFCharacters_CloseParen);
1185     }
1186   } else {
1187     mSeenBadToken = true;
1188     aToken.mType = eCSSToken_Bad_URL;
1189   }
1190 }
1191
1192 /**
1193  * Primary scanner entry point.  Consume one token and fill in
1194  * |aToken| accordingly.  Will skip over any number of comments first,
1195  * and will also skip over rather than return whitespace tokens if
1196  * |aSkipWS| is true.
1197  *
1198  * Returns true if it successfully consumed a token, false if EOF has
1199  * been reached.  Will always advance the current read position by at
1200  * least one character unless called when already at EOF.
1201  */
1202 bool
1203 nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
1204 {
1205   int32_t ch;
1206
1207   // do this here so we don't have to do it in dozens of other places
1208   aToken.mIdent.Truncate();
1209   aToken.mType = eCSSToken_Symbol;
1210
1211   for (;;) {
1212     // Consume any number of comments, and possibly also whitespace tokens,
1213     // in between other tokens.
1214     mTokenOffset = mOffset;
1215     mTokenLineOffset = mLineOffset;
1216     mTokenLineNumber = mLineNumber;
1217
1218     ch = Peek();
1219     if (IsWhitespace(ch)) {
1220       SkipWhitespace();
1221       if (!aSkipWS) {
1222         aToken.mType = eCSSToken_Whitespace;
1223         return true;
1224       }
1225       continue; // start again at the beginning
1226     }
1227     if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
1228       // FIXME: Editor wants comments to be preserved (bug 60290).
1229       SkipComment();
1230       continue; // start again at the beginning
1231     }
1232     break;
1233   }
1234
1235   // EOF
1236   if (ch < 0) {
1237     return false;
1238   }
1239
1240   // 'u' could be UNICODE-RANGE or an identifier-family token
1241   if (ch == 'u' || ch == 'U') {
1242     int32_t c2 = Peek(1);
1243     int32_t c3 = Peek(2);
1244     if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
1245       return ScanURange(aToken);
1246     }
1247     return ScanIdent(aToken);
1248   }
1249
1250   // identifier family
1251   if (IsIdentStart(ch)) {
1252     return ScanIdent(aToken);
1253   }
1254
1255   // number family
1256   if (IsDigit(ch)) {
1257     return ScanNumber(aToken);
1258   }
1259
1260   if (ch == '.' && IsDigit(Peek(1))) {
1261     return ScanNumber(aToken);
1262   }
1263
1264   if (ch == '+') {
1265     int32_t c2 = Peek(1);
1266     if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
1267       return ScanNumber(aToken);
1268     }
1269   }
1270
1271   // '-' can start an identifier-family token, a number-family token,
1272   // or an HTML-comment
1273   if (ch == '-') {
1274     int32_t c2 = Peek(1);
1275     int32_t c3 = Peek(2);
1276     if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
1277       return ScanIdent(aToken);
1278     }
1279     if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
1280       return ScanNumber(aToken);
1281     }
1282     if (c2 == '-' && c3 == '>') {
1283       Advance(3);
1284       aToken.mType = eCSSToken_HTMLComment;
1285       aToken.mIdent.AssignLiteral("-->");
1286       return true;
1287     }
1288   }
1289
1290   // the other HTML-comment token
1291   if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
1292     Advance(4);
1293     aToken.mType = eCSSToken_HTMLComment;
1294     aToken.mIdent.AssignLiteral("<!--");
1295     return true;
1296   }
1297
1298   // AT_KEYWORD
1299   if (ch == '@') {
1300     return ScanAtKeyword(aToken);
1301   }
1302
1303   // HASH
1304   if (ch == '#') {
1305     return ScanHash(aToken);
1306   }
1307
1308   // STRING
1309   if (ch == '"' || ch == '\'') {
1310     return ScanString(aToken);
1311   }
1312
1313   // Match operators: ~= |= ^= $= *=
1314   nsCSSTokenType opType = MatchOperatorType(ch);
1315   if (opType != eCSSToken_Symbol && Peek(1) == '=') {
1316     aToken.mType = opType;
1317     Advance(2);
1318     return true;
1319   }
1320
1321   // Otherwise, a symbol (DELIM).
1322   aToken.mSymbol = ch;
1323   Advance();
1324   return true;
1325 }
1326
1327 /* nsCSSGridTemplateAreaScanner methods. */
1328
1329 nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
1330   : mBuffer(aBuffer.BeginReading())
1331   , mOffset(0)
1332   , mCount(aBuffer.Length())
1333 {
1334 }
1335
1336 bool
1337 nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
1338 {
1339   int32_t ch;
1340   // Skip whitespace
1341   do {
1342     if (mOffset >= mCount) {
1343       return false;
1344     }
1345     ch = mBuffer[mOffset];
1346     mOffset++;
1347   } while (IsWhitespace(ch));
1348
1349   if (IsOpenCharClass(ch, IS_IDCHAR)) {
1350     // Named cell token
1351     uint32_t start = mOffset - 1;  // offset of |ch|
1352     while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
1353       mOffset++;
1354     }
1355     aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
1356     aTokenResult.isTrash = false;
1357   } else if (ch == '.') {
1358     // Null cell token
1359     aTokenResult.mName.Truncate();
1360     aTokenResult.isTrash = false;
1361   } else {
1362     // Trash token
1363     aTokenResult.isTrash = true;
1364   }
1365   return true;
1366 }