1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* tokenization of CSS style sheets */
9 #include "nsCSSScanner.h"
10 #include "nsStyleUtil.h"
11 #include "nsISupportsImpl.h"
12 #include "mozilla/ArrayUtils.h"
13 #include "mozilla/css/ErrorReporter.h"
14 #include "mozilla/Likely.h"
17 /* Character class tables and related helper functions. */
19 static const uint8_t IS_HEX_DIGIT
= 0x01;
20 static const uint8_t IS_IDSTART
= 0x02;
21 static const uint8_t IS_IDCHAR
= 0x04;
22 static const uint8_t IS_URL_CHAR
= 0x08;
23 static const uint8_t IS_HSPACE
= 0x10;
24 static const uint8_t IS_VSPACE
= 0x20;
25 static const uint8_t IS_SPACE
= IS_HSPACE
|IS_VSPACE
;
26 static const uint8_t IS_STRING
= 0x40;
34 #define X IS_HEX_DIGIT
41 #define SUIJX S|U|I|J|X
43 static const uint8_t gLexTable
[] = {
44 // 00 01 02 03 04 05 06 07
45 0, S
, S
, S
, S
, S
, S
, S
,
46 // 08 TAB LF 0B FF CR 0E 0F
47 S
, SH
, V
, S
, V
, V
, S
, S
,
48 // 10 11 12 13 14 15 16 17
49 S
, S
, S
, S
, S
, S
, S
, S
,
50 // 18 19 1A 1B 1C 1D 1E 1F
51 S
, S
, S
, S
, S
, S
, S
, S
,
53 SH
, SU
, 0, SU
, SU
, SU
, SU
, 0,
55 S
, S
, SU
, SU
, SU
, SUI
, SU
, SU
,
57 SUIX
, SUIX
, SUIX
, SUIX
, SUIX
, SUIX
, SUIX
, SUIX
,
59 SUIX
, SUIX
, SU
, SU
, SU
, SU
, SU
, SU
,
61 SU
,SUIJX
,SUIJX
,SUIJX
,SUIJX
,SUIJX
,SUIJX
, SUIJ
,
63 SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
,
65 SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
,
67 SUIJ
, SUIJ
, SUIJ
, SU
, J
, SU
, SU
, SUIJ
,
69 SU
,SUIJX
,SUIJX
,SUIJX
,SUIJX
,SUIJX
,SUIJX
, SUIJ
,
71 SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
,
73 SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
, SUIJ
,
75 SUIJ
, SUIJ
, SUIJ
, SU
, SU
, SU
, SU
, S
,
78 static_assert(MOZ_ARRAY_LENGTH(gLexTable
) == 128,
79 "gLexTable expected to cover all 128 ASCII characters");
94 * True if 'ch' is in character class 'cls', which should be one of
95 * the constants above or some combination of them. All characters
96 * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
99 IsOpenCharClass(int32_t ch
, uint8_t cls
) {
100 return ch
>= 0 && (ch
>= 128 || (gLexTable
[ch
] & cls
) != 0);
104 * True if 'ch' is in character class 'cls', which should be one of
105 * the constants above or some combination of them. No characters
106 * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
109 IsClosedCharClass(int32_t ch
, uint8_t cls
) {
110 return uint32_t(ch
) < 128 && (gLexTable
[ch
] & cls
) != 0;
114 * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
115 * TAB, LF, FF, CR, or SPC.
118 IsWhitespace(int32_t ch
) {
119 return IsClosedCharClass(ch
, IS_SPACE
);
123 * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
126 IsHorzSpace(int32_t ch
) {
127 return IsClosedCharClass(ch
, IS_HSPACE
);
131 * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
132 * whitespace requires special handling when consumed, see AdvanceLine.
135 IsVertSpace(int32_t ch
) {
136 return IsClosedCharClass(ch
, IS_VSPACE
);
140 * True if 'ch' is a character that can appear in the middle of an identifier.
141 * This includes U+0000 since it is handled as U+FFFD, but for purposes of
142 * GatherText it should not be included in IsOpenCharClass.
145 IsIdentChar(int32_t ch
) {
146 return IsOpenCharClass(ch
, IS_IDCHAR
) || ch
== 0;
150 * True if 'ch' is a character that by itself begins an identifier.
151 * This includes U+0000 since it is handled as U+FFFD, but for purposes of
152 * GatherText it should not be included in IsOpenCharClass.
153 * (This is a subset of IsIdentChar.)
156 IsIdentStart(int32_t ch
) {
157 return IsOpenCharClass(ch
, IS_IDSTART
) || ch
== 0;
161 * True if the two-character sequence aFirstChar+aSecondChar begins an
165 StartsIdent(int32_t aFirstChar
, int32_t aSecondChar
)
167 return IsIdentStart(aFirstChar
) ||
168 (aFirstChar
== '-' && IsIdentStart(aSecondChar
));
172 * True if 'ch' is a decimal digit.
175 IsDigit(int32_t ch
) {
176 return (ch
>= '0') && (ch
<= '9');
180 * True if 'ch' is a hexadecimal digit.
183 IsHexDigit(int32_t ch
) {
184 return IsClosedCharClass(ch
, IS_HEX_DIGIT
);
188 * Assuming that 'ch' is a decimal digit, return its numeric value.
190 static inline uint32_t
191 DecimalDigitValue(int32_t ch
)
197 * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
199 static inline uint32_t
200 HexDigitValue(int32_t ch
)
203 return DecimalDigitValue(ch
);
205 // Note: c&7 just keeps the low three bits which causes
206 // upper and lower case alphabetics to both yield their
207 // "relative to 10" value for computing the hex value.
208 return (ch
& 0x7) + 9;
213 * If 'ch' can be the first character of a two-character match operator
214 * token, return the token type code for that token, otherwise return
215 * eCSSToken_Symbol to indicate that it can't.
217 static inline nsCSSTokenType
218 MatchOperatorType(int32_t ch
)
221 case '~': return eCSSToken_Includes
;
222 case '|': return eCSSToken_Dashmatch
;
223 case '^': return eCSSToken_Beginsmatch
;
224 case '$': return eCSSToken_Endsmatch
;
225 case '*': return eCSSToken_Containsmatch
;
226 default: return eCSSToken_Symbol
;
230 /* Out-of-line nsCSSToken methods. */
233 * Append the textual representation of |this| to |aBuffer|.
236 nsCSSToken::AppendToString(nsString
& aBuffer
) const
239 case eCSSToken_Ident
:
240 nsStyleUtil::AppendEscapedCSSIdent(mIdent
, aBuffer
);
243 case eCSSToken_AtKeyword
:
245 nsStyleUtil::AppendEscapedCSSIdent(mIdent
, aBuffer
);
251 nsStyleUtil::AppendEscapedCSSIdent(mIdent
, aBuffer
);
254 case eCSSToken_Function
:
255 nsStyleUtil::AppendEscapedCSSIdent(mIdent
, aBuffer
);
260 case eCSSToken_Bad_URL
:
261 aBuffer
.AppendLiteral("url(");
262 if (mSymbol
!= char16_t(0)) {
263 nsStyleUtil::AppendEscapedCSSString(mIdent
, aBuffer
, mSymbol
);
265 aBuffer
.Append(mIdent
);
267 if (mType
== eCSSToken_URL
) {
268 aBuffer
.Append(char16_t(')'));
272 case eCSSToken_Number
:
274 aBuffer
.AppendInt(mInteger
, 10);
276 aBuffer
.AppendFloat(mNumber
);
280 case eCSSToken_Percentage
:
281 aBuffer
.AppendFloat(mNumber
* 100.0f
);
282 aBuffer
.Append(char16_t('%'));
285 case eCSSToken_Dimension
:
287 aBuffer
.AppendInt(mInteger
, 10);
289 aBuffer
.AppendFloat(mNumber
);
291 nsStyleUtil::AppendEscapedCSSIdent(mIdent
, aBuffer
);
294 case eCSSToken_Bad_String
:
295 nsStyleUtil::AppendEscapedCSSString(mIdent
, aBuffer
, mSymbol
);
296 // remove the trailing quote character
297 aBuffer
.Truncate(aBuffer
.Length() - 1);
300 case eCSSToken_String
:
301 nsStyleUtil::AppendEscapedCSSString(mIdent
, aBuffer
, mSymbol
);
304 case eCSSToken_Symbol
:
305 aBuffer
.Append(mSymbol
);
308 case eCSSToken_Whitespace
:
312 case eCSSToken_HTMLComment
:
313 case eCSSToken_URange
:
314 aBuffer
.Append(mIdent
);
317 case eCSSToken_Includes
:
318 aBuffer
.AppendLiteral("~=");
320 case eCSSToken_Dashmatch
:
321 aBuffer
.AppendLiteral("|=");
323 case eCSSToken_Beginsmatch
:
324 aBuffer
.AppendLiteral("^=");
326 case eCSSToken_Endsmatch
:
327 aBuffer
.AppendLiteral("$=");
329 case eCSSToken_Containsmatch
:
330 aBuffer
.AppendLiteral("*=");
334 NS_ERROR("invalid token type");
339 /* nsCSSScanner methods. */
341 nsCSSScanner::nsCSSScanner(const nsAString
& aBuffer
, uint32_t aLineNumber
)
342 : mBuffer(aBuffer
.BeginReading())
344 , mCount(aBuffer
.Length())
345 , mLineNumber(aLineNumber
)
347 , mTokenLineNumber(aLineNumber
)
348 , mTokenLineOffset(0)
350 , mRecordStartOffset(0)
351 , mEOFCharacters(eEOFCharacters_None
)
355 , mSeenBadToken(false)
356 , mSeenVariableReference(false)
358 MOZ_COUNT_CTOR(nsCSSScanner
);
361 nsCSSScanner::~nsCSSScanner()
363 MOZ_COUNT_DTOR(nsCSSScanner
);
367 nsCSSScanner::StartRecording()
369 MOZ_ASSERT(!mRecording
, "already started recording");
371 mRecordStartOffset
= mOffset
;
375 nsCSSScanner::StopRecording()
377 MOZ_ASSERT(mRecording
, "haven't started recording");
382 nsCSSScanner::StopRecording(nsString
& aBuffer
)
384 MOZ_ASSERT(mRecording
, "haven't started recording");
386 aBuffer
.Append(mBuffer
+ mRecordStartOffset
,
387 mOffset
- mRecordStartOffset
);
391 nsCSSScanner::RecordingLength() const
393 MOZ_ASSERT(mRecording
, "haven't started recording");
394 return mOffset
- mRecordStartOffset
;
399 nsCSSScanner::IsRecording() const
406 nsCSSScanner::GetCurrentLine() const
408 uint32_t end
= mTokenOffset
;
409 while (end
< mCount
&& !IsVertSpace(mBuffer
[end
])) {
412 return nsDependentSubstring(mBuffer
+ mTokenLineOffset
,
417 * Return the raw UTF-16 code unit at position |mOffset + n| within
418 * the read buffer. If that is beyond the end of the buffer, returns
419 * -1 to indicate end of input.
422 nsCSSScanner::Peek(uint32_t n
)
424 if (mOffset
+ n
>= mCount
) {
427 return mBuffer
[mOffset
+ n
];
431 * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
432 * If |n| is greater than the distance to end of input, will silently
433 * stop at the end. May not be used to advance over a line boundary;
434 * AdvanceLine() must be used instead.
437 nsCSSScanner::Advance(uint32_t n
)
440 while (mOffset
< mCount
&& n
> 0) {
441 MOZ_ASSERT(!IsVertSpace(mBuffer
[mOffset
]),
442 "may not Advance() over a line boundary");
447 if (mOffset
+ n
>= mCount
|| mOffset
+ n
< mOffset
)
455 * Advance |mOffset| over a line boundary.
458 nsCSSScanner::AdvanceLine()
460 MOZ_ASSERT(IsVertSpace(mBuffer
[mOffset
]),
461 "may not AdvanceLine() over a horizontal character");
462 // Advance over \r\n as a unit.
463 if (mBuffer
[mOffset
] == '\r' && mOffset
+ 1 < mCount
&&
464 mBuffer
[mOffset
+1] == '\n')
468 // 0 is a magical line number meaning that we don't know (i.e., script)
469 if (mLineNumber
!= 0)
471 mLineOffset
= mOffset
;
475 * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
476 * If |n| is greater than the distance to beginning of input, will
477 * silently stop at the beginning. May not be used to back up over a
481 nsCSSScanner::Backup(uint32_t n
)
484 while (mOffset
> 0 && n
> 0) {
485 MOZ_ASSERT(!IsVertSpace(mBuffer
[mOffset
-1]),
486 "may not Backup() over a line boundary");
499 nsCSSScanner::SavePosition(nsCSSScannerPosition
& aState
)
501 aState
.mOffset
= mOffset
;
502 aState
.mLineNumber
= mLineNumber
;
503 aState
.mLineOffset
= mLineOffset
;
504 aState
.mTokenLineNumber
= mTokenLineNumber
;
505 aState
.mTokenLineOffset
= mTokenLineOffset
;
506 aState
.mTokenOffset
= mTokenOffset
;
507 aState
.mInitialized
= true;
511 nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition
& aState
)
513 MOZ_ASSERT(aState
.mInitialized
, "have not saved state");
514 if (aState
.mInitialized
) {
515 mOffset
= aState
.mOffset
;
516 mLineNumber
= aState
.mLineNumber
;
517 mLineOffset
= aState
.mLineOffset
;
518 mTokenLineNumber
= aState
.mTokenLineNumber
;
519 mTokenLineOffset
= aState
.mTokenLineOffset
;
520 mTokenOffset
= aState
.mTokenOffset
;
525 * Skip over a sequence of whitespace characters (vertical or
526 * horizontal) starting at the current read position.
529 nsCSSScanner::SkipWhitespace()
533 if (!IsWhitespace(ch
)) { // EOF counts as non-whitespace
536 if (IsVertSpace(ch
)) {
545 * Skip over one CSS comment starting at the current read position.
548 nsCSSScanner::SkipComment()
550 MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
555 mReporter
->ReportUnexpectedEOF("PECommentEOF");
556 SetEOFCharacters(eEOFCharacters_Asterisk
| eEOFCharacters_Slash
);
563 mReporter
->ReportUnexpectedEOF("PECommentEOF");
564 SetEOFCharacters(eEOFCharacters_Slash
);
571 } else if (IsVertSpace(ch
)) {
580 * If there is a valid escape sequence starting at the current read
581 * position, consume it, decode it, append the result to |aOutput|,
582 * and return true. Otherwise, consume nothing, leave |aOutput|
583 * unmodified, and return false. If |aInString| is true, accept the
584 * additional form of escape sequence allowed within string-like tokens.
587 nsCSSScanner::GatherEscape(nsString
& aOutput
, bool aInString
)
589 MOZ_ASSERT(Peek() == '\\', "should not have been called");
590 int32_t ch
= Peek(1);
592 // If we are in a string (or a url() containing a string), we want to drop
593 // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
597 SetEOFCharacters(eEOFCharacters_DropBackslash
);
599 aOutput
.Append(UCS2_REPLACEMENT_CHAR
);
600 SetEOFCharacters(eEOFCharacters_ReplacementChar
);
604 if (IsVertSpace(ch
)) {
606 // In strings (and in url() containing a string), escaped
607 // newlines are completely removed, to allow splitting over
613 // Outside of strings, backslash followed by a newline is not an escape.
617 if (!IsHexDigit(ch
)) {
618 // "Any character (except a hexadecimal digit, linefeed, carriage
619 // return, or form feed) can be escaped with a backslash to remove
620 // its special meaning." -- CSS2.1 section 4.1.3
623 aOutput
.Append(UCS2_REPLACEMENT_CHAR
);
630 // "[at most six hexadecimal digits following a backslash] stand
631 // for the ISO 10646 character with that number, which must not be
632 // zero. (It is undefined in CSS 2.1 what happens if a style sheet
633 // does contain a character with Unicode codepoint zero.)"
634 // -- CSS2.1 section 4.1.3
636 // At this point we know we have \ followed by at least one
637 // hexadecimal digit, therefore the escape sequence is valid and we
638 // can go ahead and consume the backslash.
643 val
= val
* 16 + HexDigitValue(ch
);
647 } while (i
< 6 && IsHexDigit(ch
));
649 // "Interpret the hex digits as a hexadecimal number. If this number is zero,
650 // or is greater than the maximum allowed codepoint, return U+FFFD
651 // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
652 if (MOZ_UNLIKELY(val
== 0)) {
653 aOutput
.Append(UCS2_REPLACEMENT_CHAR
);
655 AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val
), aOutput
);
658 // Consume exactly one whitespace character after a
659 // hexadecimal escape sequence.
660 if (IsVertSpace(ch
)) {
662 } else if (IsHorzSpace(ch
)) {
669 * Consume a run of "text" beginning with the current read position,
670 * consisting of characters in the class |aClass| (which must be a
671 * suitable argument to IsOpenCharClass) plus escape sequences.
672 * Append the text to |aText|, after decoding escape sequences.
674 * Returns true if at least one character was appended to |aText|,
678 nsCSSScanner::GatherText(uint8_t aClass
, nsString
& aText
)
680 // This is all of the character classes currently used with
681 // GatherText. If you have a need to use this function with a
682 // different class, go ahead and add it.
683 MOZ_ASSERT(aClass
== IS_STRING
||
684 aClass
== IS_IDCHAR
||
685 aClass
== IS_URL_CHAR
,
686 "possibly-inappropriate character class");
688 uint32_t start
= mOffset
;
689 bool inString
= aClass
== IS_STRING
;
692 // Consume runs of unescaped characters in one go.
693 uint32_t n
= mOffset
;
694 while (n
< mCount
&& IsOpenCharClass(mBuffer
[n
], aClass
)) {
698 aText
.Append(&mBuffer
[mOffset
], n
- mOffset
);
706 MOZ_ASSERT(!IsOpenCharClass(ch
, aClass
),
707 "should not have exited the inner loop");
710 aText
.Append(UCS2_REPLACEMENT_CHAR
);
717 if (!GatherEscape(aText
, inString
)) {
722 return mOffset
> start
;
726 * Scan an Ident token. This also handles Function and URL tokens,
727 * both of which begin indistinguishably from an identifier. It can
728 * produce a Symbol token when an apparent identifier actually led
729 * into an invalid escape sequence.
732 nsCSSScanner::ScanIdent(nsCSSToken
& aToken
)
734 if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR
, aToken
.mIdent
))) {
735 MOZ_ASSERT(Peek() == '\\',
736 "unexpected IsIdentStart character that did not begin an ident");
737 aToken
.mSymbol
= Peek();
742 if (MOZ_LIKELY(Peek() != '(')) {
743 aToken
.mType
= eCSSToken_Ident
;
748 aToken
.mType
= eCSSToken_Function
;
749 if (aToken
.mIdent
.LowerCaseEqualsLiteral("url")) {
751 } else if (aToken
.mIdent
.LowerCaseEqualsLiteral("var")) {
752 mSeenVariableReference
= true;
758 * Scan an AtKeyword token. Also handles production of Symbol when
759 * an '@' is not followed by an identifier.
762 nsCSSScanner::ScanAtKeyword(nsCSSToken
& aToken
)
764 MOZ_ASSERT(Peek() == '@', "should not have been called");
766 // Fall back for when '@' isn't followed by an identifier.
767 aToken
.mSymbol
= '@';
771 if (StartsIdent(ch
, Peek(1))) {
772 if (GatherText(IS_IDCHAR
, aToken
.mIdent
)) {
773 aToken
.mType
= eCSSToken_AtKeyword
;
780 * Scan a Hash token. Handles the distinction between eCSSToken_ID
781 * and eCSSToken_Hash, and handles production of Symbol when a '#'
782 * is not followed by identifier characters.
785 nsCSSScanner::ScanHash(nsCSSToken
& aToken
)
787 MOZ_ASSERT(Peek() == '#', "should not have been called");
789 // Fall back for when '#' isn't followed by identifier characters.
790 aToken
.mSymbol
= '#';
794 if (IsIdentChar(ch
) || ch
== '\\') {
795 nsCSSTokenType type
=
796 StartsIdent(ch
, Peek(1)) ? eCSSToken_ID
: eCSSToken_Hash
;
797 aToken
.mIdent
.SetLength(0);
798 if (GatherText(IS_IDCHAR
, aToken
.mIdent
)) {
807 * Scan a Number, Percentage, or Dimension token (all of which begin
808 * like a Number). Can produce a Symbol when a '.' is not followed by
809 * digits, or when '+' or '-' are not followed by either a digit or a
810 * '.' and then a digit. Can also produce a HTMLComment when it
814 nsCSSScanner::ScanNumber(nsCSSToken
& aToken
)
819 int32_t c2
= Peek(1);
820 int32_t c3
= Peek(2);
821 MOZ_ASSERT(IsDigit(c
) ||
822 (IsDigit(c2
) && (c
== '.' || c
== '+' || c
== '-')) ||
823 (IsDigit(c3
) && (c
== '+' || c
== '-') && c2
== '.'),
824 "should not have been called");
828 // Sign of the mantissa (-1 or 1).
829 int32_t sign
= c
== '-' ? -1 : 1;
830 // Absolute value of the integer part of the mantissa. This is a double so
831 // we don't run into overflow issues for consumers that only care about our
832 // floating-point value while still being able to express the full int32_t
833 // range for consumers who want integers.
835 // Fractional part of the mantissa. This is a double so that when we convert
836 // to float at the end we'll end up rounding to nearest float instead of
837 // truncating down (as we would if fracPart were a float and we just
838 // effectively lost the last several digits).
840 // Absolute value of the power of 10 that we should multiply by (only
841 // relevant for numbers in scientific notation). Has to be a signed integer,
842 // because multiplication of signed by unsigned converts the unsigned to
843 // signed, so if we plan to actually multiply by expSign...
844 int32_t exponent
= 0;
845 // Sign of the exponent.
848 aToken
.mHasSign
= (c
== '+' || c
== '-');
849 if (aToken
.mHasSign
) {
854 bool gotDot
= (c
== '.');
857 // Scan the integer part of the mantissa.
858 MOZ_ASSERT(IsDigit(c
), "should have been excluded by logic above");
860 intPart
= 10*intPart
+ DecimalDigitValue(c
);
863 } while (IsDigit(c
));
865 gotDot
= (c
== '.') && IsDigit(Peek(1));
869 // Scan the fractional part of the mantissa.
872 MOZ_ASSERT(IsDigit(c
), "should have been excluded by logic above");
873 // Power of ten by which we need to divide our next digit
876 fracPart
+= DecimalDigitValue(c
) / divisor
;
880 } while (IsDigit(c
));
884 if (c
== 'e' || c
== 'E') {
885 int32_t expSignChar
= Peek(1);
886 int32_t nextChar
= Peek(2);
887 if (IsDigit(expSignChar
) ||
888 ((expSignChar
== '-' || expSignChar
== '+') && IsDigit(nextChar
))) {
890 if (expSignChar
== '-') {
893 Advance(); // consumes the E
894 if (expSignChar
== '-' || expSignChar
== '+') {
900 MOZ_ASSERT(IsDigit(c
), "should have been excluded by logic above");
902 exponent
= 10*exponent
+ DecimalDigitValue(c
);
905 } while (IsDigit(c
));
909 nsCSSTokenType type
= eCSSToken_Number
;
911 // Set mIntegerValid for all cases (except %, below) because we need
912 // it for the "2n" in :nth-child(2n).
913 aToken
.mIntegerValid
= false;
915 // Time to reassemble our number.
916 // Do all the math in double precision so it's truncated only once.
917 double value
= sign
* (intPart
+ fracPart
);
919 // Explicitly cast expSign*exponent to double to avoid issues with
920 // overloaded pow() on Windows.
921 value
*= pow(10.0, double(expSign
* exponent
));
922 } else if (!gotDot
) {
923 // Clamp values outside of integer range.
925 aToken
.mInteger
= int32_t(std::min(intPart
, double(INT32_MAX
)));
927 aToken
.mInteger
= int32_t(std::max(-intPart
, double(INT32_MIN
)));
929 aToken
.mIntegerValid
= true;
932 nsString
& ident
= aToken
.mIdent
;
934 // Check for Dimension and Percentage tokens.
936 if (StartsIdent(c
, Peek(1))) {
937 if (GatherText(IS_IDCHAR
, ident
)) {
938 type
= eCSSToken_Dimension
;
940 } else if (c
== '%') {
942 type
= eCSSToken_Percentage
;
943 value
= value
/ 100.0f
;
944 aToken
.mIntegerValid
= false;
947 aToken
.mNumber
= value
;
953 * Scan a string constant ('foo' or "foo"). Will always produce
954 * either a String or a Bad_String token; the latter occurs when the
955 * close quote is missing. Always returns true (for convenience in Next()).
958 nsCSSScanner::ScanString(nsCSSToken
& aToken
)
960 int32_t aStop
= Peek();
961 MOZ_ASSERT(aStop
== '"' || aStop
== '\'', "should not have been called");
962 aToken
.mType
= eCSSToken_String
;
963 aToken
.mSymbol
= char16_t(aStop
); // Remember how it's quoted.
967 GatherText(IS_STRING
, aToken
.mIdent
);
971 AddEOFCharacters(aStop
== '"' ? eEOFCharacters_DoubleQuote
:
972 eEOFCharacters_SingleQuote
);
973 break; // EOF ends a string token with no error.
979 // Both " and ' are excluded from IS_STRING.
980 if (ch
== '"' || ch
== '\'') {
981 aToken
.mIdent
.Append(ch
);
986 mSeenBadToken
= true;
987 aToken
.mType
= eCSSToken_Bad_String
;
988 mReporter
->ReportUnexpected("SEUnterminatedString", aToken
);
995 * Scan a unicode-range token. These match the regular expression
997 * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
999 * However, some such tokens are "invalid". There are three valid forms:
1001 * u+[0-9a-f]{x} 1 <= x <= 6
1002 * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
1003 * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
1005 * All unicode-range tokens have their text recorded in mIdent; valid ones
1006 * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
1007 * Note that this does not validate the numeric range, only the syntactic
1011 nsCSSScanner::ScanURange(nsCSSToken
& aResult
)
1013 int32_t intro1
= Peek();
1014 int32_t intro2
= Peek(1);
1015 int32_t ch
= Peek(2);
1017 MOZ_ASSERT((intro1
== 'u' || intro1
== 'U') &&
1019 (IsHexDigit(ch
) || ch
== '?'),
1020 "should not have been called");
1022 aResult
.mIdent
.Append(intro1
);
1023 aResult
.mIdent
.Append(intro2
);
1027 bool haveQues
= false;
1033 aResult
.mIdent
.Append(ch
);
1034 if (IsHexDigit(ch
)) {
1036 valid
= false; // All question marks should be at the end.
1038 low
= low
*16 + HexDigitValue(ch
);
1039 high
= high
*16 + HexDigitValue(ch
);
1043 high
= high
*16 + 0xF;
1049 } while (i
< 6 && (IsHexDigit(ch
) || ch
== '?'));
1051 if (ch
== '-' && IsHexDigit(Peek(1))) {
1056 aResult
.mIdent
.Append(ch
);
1062 aResult
.mIdent
.Append(ch
);
1063 high
= high
*16 + HexDigitValue(ch
);
1068 } while (i
< 6 && IsHexDigit(ch
));
1071 aResult
.mInteger
= low
;
1072 aResult
.mInteger2
= high
;
1073 aResult
.mIntegerValid
= valid
;
1074 aResult
.mType
= eCSSToken_URange
;
1080 nsCSSScanner::AssertEOFCharactersValid(uint32_t c
)
1082 MOZ_ASSERT(c
== eEOFCharacters_None
||
1083 c
== eEOFCharacters_ReplacementChar
||
1084 c
== eEOFCharacters_Slash
||
1085 c
== (eEOFCharacters_Asterisk
|
1086 eEOFCharacters_Slash
) ||
1087 c
== eEOFCharacters_DoubleQuote
||
1088 c
== eEOFCharacters_SingleQuote
||
1089 c
== (eEOFCharacters_DropBackslash
|
1090 eEOFCharacters_DoubleQuote
) ||
1091 c
== (eEOFCharacters_DropBackslash
|
1092 eEOFCharacters_SingleQuote
) ||
1093 c
== eEOFCharacters_CloseParen
||
1094 c
== (eEOFCharacters_ReplacementChar
|
1095 eEOFCharacters_CloseParen
) ||
1096 c
== (eEOFCharacters_DoubleQuote
|
1097 eEOFCharacters_CloseParen
) ||
1098 c
== (eEOFCharacters_SingleQuote
|
1099 eEOFCharacters_CloseParen
) ||
1100 c
== (eEOFCharacters_DropBackslash
|
1101 eEOFCharacters_DoubleQuote
|
1102 eEOFCharacters_CloseParen
) ||
1103 c
== (eEOFCharacters_DropBackslash
|
1104 eEOFCharacters_SingleQuote
|
1105 eEOFCharacters_CloseParen
),
1106 "invalid EOFCharacters value");
1111 nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters
)
1113 mEOFCharacters
= EOFCharacters(aEOFCharacters
);
1117 nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters
)
1119 mEOFCharacters
= EOFCharacters(mEOFCharacters
| aEOFCharacters
);
1122 static const char16_t kImpliedEOFCharacters
[] = {
1123 UCS2_REPLACEMENT_CHAR
, '*', '/', '"', '\'', ')', 0
1127 nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters
,
1130 // First, ignore eEOFCharacters_DropBackslash.
1131 uint32_t c
= aEOFCharacters
>> 1;
1133 // All of the remaining EOFCharacters bits represent appended characters,
1134 // and the bits are in the order that they need appending.
1135 for (const char16_t
* p
= kImpliedEOFCharacters
; *p
&& c
; p
++, c
>>= 1) {
1141 MOZ_ASSERT(c
== 0, "too many bits in mEOFCharacters");
1145 * Consume the part of an URL token after the initial 'url('. Caller
1146 * is assumed to have consumed 'url(' already. Will always produce
1147 * either an URL or a Bad_URL token.
1149 * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
1150 * the special lexical rules for URL tokens in a nonstandard context.
1153 nsCSSScanner::NextURL(nsCSSToken
& aToken
)
1157 // aToken.mIdent may be "url" at this point; clear that out
1158 aToken
.mIdent
.Truncate();
1160 int32_t ch
= Peek();
1161 // Do we have a string?
1162 if (ch
== '"' || ch
== '\'') {
1164 if (MOZ_UNLIKELY(aToken
.mType
== eCSSToken_Bad_String
)) {
1165 aToken
.mType
= eCSSToken_Bad_URL
;
1168 MOZ_ASSERT(aToken
.mType
== eCSSToken_String
, "unexpected token type");
1171 // Otherwise, this is the start of a non-quoted url (which may be empty).
1172 aToken
.mSymbol
= char16_t(0);
1173 GatherText(IS_URL_CHAR
, aToken
.mIdent
);
1176 // Consume trailing whitespace and then look for a close parenthesis.
1179 // ch can be less than zero indicating EOF
1180 if (MOZ_LIKELY(ch
< 0 || ch
== ')')) {
1182 aToken
.mType
= eCSSToken_URL
;
1184 AddEOFCharacters(eEOFCharacters_CloseParen
);
1187 mSeenBadToken
= true;
1188 aToken
.mType
= eCSSToken_Bad_URL
;
1193 * Primary scanner entry point. Consume one token and fill in
1194 * |aToken| accordingly. Will skip over any number of comments first,
1195 * and will also skip over rather than return whitespace tokens if
1196 * |aSkipWS| is true.
1198 * Returns true if it successfully consumed a token, false if EOF has
1199 * been reached. Will always advance the current read position by at
1200 * least one character unless called when already at EOF.
1203 nsCSSScanner::Next(nsCSSToken
& aToken
, bool aSkipWS
)
1207 // do this here so we don't have to do it in dozens of other places
1208 aToken
.mIdent
.Truncate();
1209 aToken
.mType
= eCSSToken_Symbol
;
1212 // Consume any number of comments, and possibly also whitespace tokens,
1213 // in between other tokens.
1214 mTokenOffset
= mOffset
;
1215 mTokenLineOffset
= mLineOffset
;
1216 mTokenLineNumber
= mLineNumber
;
1219 if (IsWhitespace(ch
)) {
1222 aToken
.mType
= eCSSToken_Whitespace
;
1225 continue; // start again at the beginning
1227 if (ch
== '/' && !IsSVGMode() && Peek(1) == '*') {
1228 // FIXME: Editor wants comments to be preserved (bug 60290).
1230 continue; // start again at the beginning
1240 // 'u' could be UNICODE-RANGE or an identifier-family token
1241 if (ch
== 'u' || ch
== 'U') {
1242 int32_t c2
= Peek(1);
1243 int32_t c3
= Peek(2);
1244 if (c2
== '+' && (IsHexDigit(c3
) || c3
== '?')) {
1245 return ScanURange(aToken
);
1247 return ScanIdent(aToken
);
1250 // identifier family
1251 if (IsIdentStart(ch
)) {
1252 return ScanIdent(aToken
);
1257 return ScanNumber(aToken
);
1260 if (ch
== '.' && IsDigit(Peek(1))) {
1261 return ScanNumber(aToken
);
1265 int32_t c2
= Peek(1);
1266 if (IsDigit(c2
) || (c2
== '.' && IsDigit(Peek(2)))) {
1267 return ScanNumber(aToken
);
1271 // '-' can start an identifier-family token, a number-family token,
1272 // or an HTML-comment
1274 int32_t c2
= Peek(1);
1275 int32_t c3
= Peek(2);
1276 if (IsIdentStart(c2
) || (c2
== '-' && c3
!= '>')) {
1277 return ScanIdent(aToken
);
1279 if (IsDigit(c2
) || (c2
== '.' && IsDigit(c3
))) {
1280 return ScanNumber(aToken
);
1282 if (c2
== '-' && c3
== '>') {
1284 aToken
.mType
= eCSSToken_HTMLComment
;
1285 aToken
.mIdent
.AssignLiteral("-->");
1290 // the other HTML-comment token
1291 if (ch
== '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
1293 aToken
.mType
= eCSSToken_HTMLComment
;
1294 aToken
.mIdent
.AssignLiteral("<!--");
1300 return ScanAtKeyword(aToken
);
1305 return ScanHash(aToken
);
1309 if (ch
== '"' || ch
== '\'') {
1310 return ScanString(aToken
);
1313 // Match operators: ~= |= ^= $= *=
1314 nsCSSTokenType opType
= MatchOperatorType(ch
);
1315 if (opType
!= eCSSToken_Symbol
&& Peek(1) == '=') {
1316 aToken
.mType
= opType
;
1321 // Otherwise, a symbol (DELIM).
1322 aToken
.mSymbol
= ch
;
1327 /* nsCSSGridTemplateAreaScanner methods. */
1329 nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString
& aBuffer
)
1330 : mBuffer(aBuffer
.BeginReading())
1332 , mCount(aBuffer
.Length())
1337 nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken
& aTokenResult
)
1342 if (mOffset
>= mCount
) {
1345 ch
= mBuffer
[mOffset
];
1347 } while (IsWhitespace(ch
));
1349 if (IsOpenCharClass(ch
, IS_IDCHAR
)) {
1351 uint32_t start
= mOffset
- 1; // offset of |ch|
1352 while (mOffset
< mCount
&& IsOpenCharClass(mBuffer
[mOffset
], IS_IDCHAR
)) {
1355 aTokenResult
.mName
.Assign(&mBuffer
[start
], mOffset
- start
);
1356 aTokenResult
.isTrash
= false;
1357 } else if (ch
== '.') {
1359 aTokenResult
.mName
.Truncate();
1360 aTokenResult
.isTrash
= false;
1363 aTokenResult
.isTrash
= true;