1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * L. David Baron <dbaron@dbaron.org>
24 * Daniel Glazman <glazman@netscape.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
41 /* tokenization of CSS style sheets */
43 #include "nsCSSScanner.h"
44 #include "nsIFactory.h"
45 #include "nsIInputStream.h"
46 #include "nsIUnicharInputStream.h"
50 // for #ifdef CSS_REPORT_PARSE_ERRORS
52 #include "nsIServiceManager.h"
53 #include "nsIComponentManager.h"
54 #include "nsReadableUtils.h"
56 #include "nsIConsoleService.h"
57 #include "nsIScriptError.h"
58 #include "nsIStringBundle.h"
59 #include "nsContentUtils.h"
60 #include "mozilla/Services.h"
61 #include "mozilla/css/Loader.h"
62 #include "nsCSSStyleSheet.h"
64 #ifdef CSS_REPORT_PARSE_ERRORS
65 static PRBool gReportErrors
= PR_TRUE
;
66 static nsIConsoleService
*gConsoleService
;
67 static nsIFactory
*gScriptErrorFactory
;
68 static nsIStringBundle
*gStringBundle
;
71 // Don't bother collecting whitespace characters in token's mIdent buffer
72 #undef COLLECT_WHITESPACE
74 // Table of character classes
75 static const PRUnichar CSS_ESCAPE
= PRUnichar('\\');
77 static const PRUint8 IS_HEX_DIGIT
= 0x01;
78 static const PRUint8 START_IDENT
= 0x02;
79 static const PRUint8 IS_IDENT
= 0x04;
80 static const PRUint8 IS_WHITESPACE
= 0x08;
82 #define W IS_WHITESPACE
85 #define SI IS_IDENT|START_IDENT
86 #define XI IS_IDENT |IS_HEX_DIGIT
87 #define XSI IS_IDENT|START_IDENT|IS_HEX_DIGIT
89 static const PRUint8 gLexTable
[256] = {
91 0, 0, 0, 0, 0, 0, 0, 0, 0, W
, W
, 0, W
, W
, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 // SPC ! " # $ % & ' ( ) * + , - . /
95 W
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, I
, 0, 0,
96 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
97 XI
, XI
, XI
, XI
, XI
, XI
, XI
, XI
, XI
, XI
, 0, 0, 0, 0, 0, 0,
98 // @ A B C D E F G H I J K L M N O
99 0, XSI
,XSI
,XSI
,XSI
,XSI
,XSI
,SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
100 // P Q R S T U V W X Y Z [ \ ] ^ _
101 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, 0, S
, 0, 0, SI
,
102 // ` a b c d e f g h i j k l m n o
103 0, XSI
,XSI
,XSI
,XSI
,XSI
,XSI
,SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
104 // p q r s t u v w x y z { | } ~
105 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 // NBSP¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯
111 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
112 // ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
113 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
114 // À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
115 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
116 // Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
117 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
118 // à á â ã ä å æ ç è é ê ë ì í î ï
119 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
120 // ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
121 SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
, SI
,
132 IsIdentStart(PRInt32 aChar
)
135 (aChar
>= 256 || (gLexTable
[aChar
] & START_IDENT
) != 0);
139 StartsIdent(PRInt32 aFirstChar
, PRInt32 aSecondChar
)
141 return IsIdentStart(aFirstChar
) ||
142 (aFirstChar
== '-' && IsIdentStart(aSecondChar
));
146 IsWhitespace(PRInt32 ch
) {
147 return PRUint32(ch
) < 256 && (gLexTable
[ch
] & IS_WHITESPACE
) != 0;
151 IsDigit(PRInt32 ch
) {
152 return (ch
>= '0') && (ch
<= '9');
156 IsHexDigit(PRInt32 ch
) {
157 return PRUint32(ch
) < 256 && (gLexTable
[ch
] & IS_HEX_DIGIT
) != 0;
161 IsIdent(PRInt32 ch
) {
162 return ch
>= 0 && (ch
>= 256 || (gLexTable
[ch
] & IS_IDENT
) != 0);
165 static inline PRUint32
166 DecimalDigitValue(PRInt32 ch
)
171 static inline PRUint32
172 HexDigitValue(PRInt32 ch
)
175 return DecimalDigitValue(ch
);
177 // Note: c&7 just keeps the low three bits which causes
178 // upper and lower case alphabetics to both yield their
179 // "relative to 10" value for computing the hex value.
180 return (ch
& 0x7) + 9;
184 nsCSSToken::nsCSSToken()
186 mType
= eCSSToken_Symbol
;
190 nsCSSToken::AppendToString(nsString
& aBuffer
)
193 case eCSSToken_AtKeyword
:
194 aBuffer
.Append(PRUnichar('@')); // fall through intentional
195 case eCSSToken_Ident
:
196 case eCSSToken_WhiteSpace
:
197 case eCSSToken_Function
:
199 case eCSSToken_InvalidURL
:
200 case eCSSToken_HTMLComment
:
201 case eCSSToken_URange
:
202 aBuffer
.Append(mIdent
);
203 if (mType
== eCSSToken_Function
)
204 aBuffer
.Append(PRUnichar('('));
206 case eCSSToken_Number
:
208 aBuffer
.AppendInt(mInteger
, 10);
211 aBuffer
.AppendFloat(mNumber
);
214 case eCSSToken_Percentage
:
215 NS_ASSERTION(!mIntegerValid
, "How did a percentage token get this set?");
216 aBuffer
.AppendFloat(mNumber
* 100.0f
);
217 aBuffer
.Append(PRUnichar('%')); // STRING USE WARNING: technically, this should be |AppendWithConversion|
219 case eCSSToken_Dimension
:
221 aBuffer
.AppendInt(mInteger
, 10);
224 aBuffer
.AppendFloat(mNumber
);
226 aBuffer
.Append(mIdent
);
228 case eCSSToken_String
:
229 aBuffer
.Append(mSymbol
);
230 aBuffer
.Append(mIdent
); // fall through intentional
231 case eCSSToken_Symbol
:
232 aBuffer
.Append(mSymbol
);
236 aBuffer
.Append(PRUnichar('#'));
237 aBuffer
.Append(mIdent
);
239 case eCSSToken_Includes
:
240 aBuffer
.AppendLiteral("~=");
242 case eCSSToken_Dashmatch
:
243 aBuffer
.AppendLiteral("|=");
245 case eCSSToken_Beginsmatch
:
246 aBuffer
.AppendLiteral("^=");
248 case eCSSToken_Endsmatch
:
249 aBuffer
.AppendLiteral("$=");
251 case eCSSToken_Containsmatch
:
252 aBuffer
.AppendLiteral("*=");
254 case eCSSToken_Error
:
255 aBuffer
.Append(mSymbol
);
256 aBuffer
.Append(mIdent
);
259 NS_ERROR("invalid token type");
264 nsCSSScanner::nsCSSScanner()
265 : mInputStream(nsnull
)
266 , mReadPointer(nsnull
)
267 , mLowLevelError(NS_OK
)
269 #ifdef CSS_REPORT_PARSE_ERRORS
270 , mError(mErrorBuf
, NS_ARRAY_LENGTH(mErrorBuf
), 0)
272 , mWindowIDCached(PR_FALSE
)
277 MOZ_COUNT_CTOR(nsCSSScanner
);
278 mPushback
= mLocalPushback
;
279 mPushbackSize
= NS_ARRAY_LENGTH(mLocalPushback
);
280 // No need to init the other members, since they represent state
281 // which can get cleared. We'll init them every time Init() is
285 nsCSSScanner::~nsCSSScanner()
287 MOZ_COUNT_DTOR(nsCSSScanner
);
289 if (mLocalPushback
!= mPushback
) {
295 nsCSSScanner::GetLowLevelError()
297 return mLowLevelError
;
301 nsCSSScanner::SetLowLevelError(nsresult aErrorCode
)
303 NS_ASSERTION(aErrorCode
!= NS_OK
, "SetLowLevelError() used to clear error");
304 NS_ASSERTION(mLowLevelError
== NS_OK
, "there is already a low-level error");
305 mLowLevelError
= aErrorCode
;
308 #ifdef CSS_REPORT_PARSE_ERRORS
309 #define CSS_ERRORS_PREF "layout.css.report_errors"
312 CSSErrorsPrefChanged(const char *aPref
, void *aClosure
)
314 gReportErrors
= nsContentUtils::GetBoolPref(CSS_ERRORS_PREF
, PR_TRUE
);
320 nsCSSScanner::InitGlobals()
322 #ifdef CSS_REPORT_PARSE_ERRORS
323 if (gConsoleService
&& gScriptErrorFactory
)
326 nsresult rv
= CallGetService(NS_CONSOLESERVICE_CONTRACTID
, &gConsoleService
);
327 NS_ENSURE_SUCCESS(rv
, PR_FALSE
);
329 rv
= CallGetClassObject(NS_SCRIPTERROR_CONTRACTID
, &gScriptErrorFactory
);
330 NS_ENSURE_SUCCESS(rv
, PR_FALSE
);
331 NS_ASSERTION(gConsoleService
&& gScriptErrorFactory
,
332 "unexpected null pointer without failure");
334 nsContentUtils::RegisterPrefCallback(CSS_ERRORS_PREF
, CSSErrorsPrefChanged
, nsnull
);
335 CSSErrorsPrefChanged(CSS_ERRORS_PREF
, nsnull
);
341 nsCSSScanner::ReleaseGlobals()
343 #ifdef CSS_REPORT_PARSE_ERRORS
344 nsContentUtils::UnregisterPrefCallback(CSS_ERRORS_PREF
, CSSErrorsPrefChanged
, nsnull
);
345 NS_IF_RELEASE(gConsoleService
);
346 NS_IF_RELEASE(gScriptErrorFactory
);
347 NS_IF_RELEASE(gStringBundle
);
352 nsCSSScanner::Init(nsIUnicharInputStream
* aInput
,
353 const PRUnichar
* aBuffer
, PRUint32 aCount
,
354 nsIURI
* aURI
, PRUint32 aLineNumber
,
355 nsCSSStyleSheet
* aSheet
, mozilla::css::Loader
* aLoader
)
357 NS_PRECONDITION(!mInputStream
, "Should not have an existing input stream!");
358 NS_PRECONDITION(!mReadPointer
, "Should not have an existing input buffer!");
360 // Read from stream via my own buffer
362 NS_PRECONDITION(!aBuffer
, "Shouldn't have both input and buffer!");
363 NS_PRECONDITION(aCount
== 0, "Shouldn't have count with a stream");
364 mInputStream
= aInput
;
365 mReadPointer
= mBuffer
;
368 NS_PRECONDITION(aBuffer
, "Either aInput or aBuffer must be set");
369 // Read directly from the provided buffer
370 mInputStream
= nsnull
;
371 mReadPointer
= aBuffer
;
375 #ifdef CSS_REPORT_PARSE_ERRORS
376 // If aURI is the same as mURI, no need to reget mFileName -- it
377 // shouldn't have changed.
381 aURI
->GetSpec(mFileName
);
383 mFileName
.Adopt(NS_strdup("from DOM"));
386 #endif // CSS_REPORT_PARSE_ERRORS
387 mLineNumber
= aLineNumber
;
389 // Reset variables that we use to keep track of our progress through the input
392 mLowLevelError
= NS_OK
;
394 #ifdef CSS_REPORT_PARSE_ERRORS
401 #ifdef CSS_REPORT_PARSE_ERRORS
403 // @see REPORT_UNEXPECTED_EOF in nsCSSParser.cpp
404 #define REPORT_UNEXPECTED_EOF(lf_) \
405 ReportUnexpectedEOF(#lf_)
408 nsCSSScanner::AddToError(const nsSubstring
& aErrorText
)
410 if (mError
.IsEmpty()) {
411 mErrorLineNumber
= mLineNumber
;
412 mErrorColNumber
= mColNumber
;
415 mError
.Append(NS_LITERAL_STRING(" ") + aErrorText
);
420 nsCSSScanner::ClearError()
426 nsCSSScanner::OutputError()
428 if (mError
.IsEmpty()) return;
430 // Log it to the Error console
432 if (InitGlobals() && gReportErrors
) {
433 if (!mWindowIDCached
) {
435 mWindowID
= mSheet
->FindOwningWindowID();
437 if (mWindowID
== 0 && mLoader
) {
438 nsIDocument
* doc
= mLoader
->GetDocument();
440 mWindowID
= doc
->OuterWindowID();
443 mWindowIDCached
= PR_TRUE
;
447 nsCOMPtr
<nsIScriptError2
> errorObject
=
448 do_CreateInstance(gScriptErrorFactory
, &rv
);
450 if (NS_SUCCEEDED(rv
)) {
451 rv
= errorObject
->InitWithWindowID(mError
.get(),
452 NS_ConvertUTF8toUTF16(mFileName
).get(),
456 nsIScriptError::warningFlag
,
457 "CSS Parser", mWindowID
);
458 if (NS_SUCCEEDED(rv
)) {
459 nsCOMPtr
<nsIScriptError
> logError
= do_QueryInterface(errorObject
);
460 gConsoleService
->LogMessage(logError
);
473 nsCOMPtr
<nsIStringBundleService
> sbs
=
474 mozilla::services::GetStringBundleService();
479 sbs
->CreateBundle("chrome://global/locale/css.properties", &gStringBundle
);
481 gStringBundle
= nsnull
;
488 #define ENSURE_STRINGBUNDLE \
489 PR_BEGIN_MACRO if (!InitStringBundle()) return; PR_END_MACRO
491 // aMessage must take no parameters
492 void nsCSSScanner::ReportUnexpected(const char* aMessage
)
497 gStringBundle
->GetStringFromName(NS_ConvertASCIItoUTF16(aMessage
).get(),
503 nsCSSScanner::ReportUnexpectedParams(const char* aMessage
,
504 const PRUnichar
**aParams
,
505 PRUint32 aParamsLength
)
507 NS_PRECONDITION(aParamsLength
> 0, "use the non-params version");
511 gStringBundle
->FormatStringFromName(NS_ConvertASCIItoUTF16(aMessage
).get(),
512 aParams
, aParamsLength
,
517 // aLookingFor is a plain string, not a format string
519 nsCSSScanner::ReportUnexpectedEOF(const char* aLookingFor
)
523 nsXPIDLString innerStr
;
524 gStringBundle
->GetStringFromName(NS_ConvertASCIItoUTF16(aLookingFor
).get(),
525 getter_Copies(innerStr
));
527 const PRUnichar
*params
[] = {
531 gStringBundle
->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
532 params
, NS_ARRAY_LENGTH(params
),
537 // aLookingFor is a single character
539 nsCSSScanner::ReportUnexpectedEOF(PRUnichar aLookingFor
)
543 const PRUnichar lookingForStr
[] = {
544 PRUnichar('\''), aLookingFor
, PRUnichar('\''), PRUnichar(0)
546 const PRUnichar
*params
[] = { lookingForStr
};
548 gStringBundle
->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
549 params
, NS_ARRAY_LENGTH(params
),
554 // aMessage must take 1 parameter (for the string representation of the
557 nsCSSScanner::ReportUnexpectedToken(nsCSSToken
& tok
,
558 const char *aMessage
)
562 nsAutoString tokenString
;
563 tok
.AppendToString(tokenString
);
565 const PRUnichar
*params
[] = {
569 ReportUnexpectedParams(aMessage
, params
, NS_ARRAY_LENGTH(params
));
572 // aParams's first entry must be null, and we'll fill in the token
574 nsCSSScanner::ReportUnexpectedTokenParams(nsCSSToken
& tok
,
575 const char* aMessage
,
576 const PRUnichar
**aParams
,
577 PRUint32 aParamsLength
)
579 NS_PRECONDITION(aParamsLength
> 1, "use the non-params version");
580 NS_PRECONDITION(aParams
[0] == nsnull
, "first param should be empty");
584 nsAutoString tokenString
;
585 tok
.AppendToString(tokenString
);
586 aParams
[0] = tokenString
.get();
588 ReportUnexpectedParams(aMessage
, aParams
, aParamsLength
);
593 #define REPORT_UNEXPECTED_EOF(lf_)
595 #endif // CSS_REPORT_PARSE_ERRORS
598 nsCSSScanner::Close()
600 mInputStream
= nsnull
;
601 mReadPointer
= nsnull
;
603 // Clean things up so we don't hold on to memory if our parser gets recycled.
604 #ifdef CSS_REPORT_PARSE_ERRORS
605 mFileName
.Truncate();
609 mWindowIDCached
= PR_FALSE
;
613 if (mPushback
!= mLocalPushback
) {
615 mPushback
= mLocalPushback
;
616 mPushbackSize
= NS_ARRAY_LENGTH(mLocalPushback
);
620 #ifdef CSS_REPORT_PARSE_ERRORS
621 #define TAB_STOP_WIDTH 8
625 nsCSSScanner::EnsureData()
627 if (mOffset
< mCount
)
634 nsresult rv
= mInputStream
->Read(mBuffer
, CSS_BUFFER_SIZE
, &mCount
);
638 SetLowLevelError(rv
);
645 // Returns -1 on error or eof
650 if (0 < mPushbackCount
) {
651 rv
= PRInt32(mPushback
[--mPushbackCount
]);
653 if (mOffset
== mCount
&& !EnsureData()) {
656 rv
= PRInt32(mReadPointer
[mOffset
++]);
657 // There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
658 // To simplify dealing with newlines, they are all normalized to "\n" here
660 if (EnsureData() && mReadPointer
[mOffset
] == '\n') {
664 } else if (rv
== '\f') {
668 // 0 is a magical line number meaning that we don't know (i.e., script)
669 if (mLineNumber
!= 0)
671 #ifdef CSS_REPORT_PARSE_ERRORS
675 #ifdef CSS_REPORT_PARSE_ERRORS
676 else if (rv
== '\t') {
677 mColNumber
= ((mColNumber
- 1 + TAB_STOP_WIDTH
) / TAB_STOP_WIDTH
)
679 } else if (rv
!= '\n') {
684 //printf("Read => %x\n", rv);
691 if (0 == mPushbackCount
) {
696 mPushback
[0] = PRUnichar(ch
);
699 //printf("Peek => %x\n", mLookAhead);
700 return PRInt32(mPushback
[mPushbackCount
- 1]);
704 nsCSSScanner::Pushback(PRUnichar aChar
)
706 if (mPushbackCount
== mPushbackSize
) { // grow buffer
707 PRUnichar
* newPushback
= new PRUnichar
[mPushbackSize
+ 4];
708 if (nsnull
== newPushback
) {
712 memcpy(newPushback
, mPushback
, sizeof(PRUnichar
) * mPushbackCount
);
713 if (mPushback
!= mLocalPushback
) {
716 mPushback
= newPushback
;
718 mPushback
[mPushbackCount
++] = aChar
;
722 nsCSSScanner::LookAhead(PRUnichar aChar
)
736 nsCSSScanner::EatWhiteSpace()
743 if ((ch
!= ' ') && (ch
!= '\n') && (ch
!= '\t')) {
751 nsCSSScanner::Next(nsCSSToken
& aToken
)
753 for (;;) { // Infinite loop so we can restart after comments.
760 if ((ch
== 'u' || ch
== 'U') && Peek() == '+')
761 return ParseURange(ch
, aToken
);
764 if (StartsIdent(ch
, Peek()))
765 return ParseIdent(ch
, aToken
);
769 PRInt32 nextChar
= Read();
771 PRInt32 followingChar
= Peek();
773 if (StartsIdent(nextChar
, followingChar
))
774 return ParseAtKeyword(ch
, aToken
);
779 if ((ch
== '.') || (ch
== '+') || (ch
== '-')) {
780 PRInt32 nextChar
= Peek();
781 if (IsDigit(nextChar
)) {
782 return ParseNumber(ch
, aToken
);
784 else if (('.' == nextChar
) && ('.' != ch
)) {
786 PRInt32 followingChar
= Peek();
788 if (IsDigit(followingChar
))
789 return ParseNumber(ch
, aToken
);
793 return ParseNumber(ch
, aToken
);
798 return ParseRef(ch
, aToken
);
802 if ((ch
== '"') || (ch
== '\'')) {
803 return ParseString(ch
, aToken
);
807 if (IsWhitespace(ch
)) {
808 aToken
.mType
= eCSSToken_WhiteSpace
;
809 aToken
.mIdent
.Assign(PRUnichar(ch
));
813 if (ch
== '/' && !IsSVGMode()) {
814 PRInt32 nextChar
= Peek();
815 if (nextChar
== '*') {
818 // If we change our storage data structures such that comments are
819 // stored (for Editor), we should reenable this code, condition it
820 // on being in editor mode, and apply glazou's patch from bug
822 aToken
.mIdent
.SetCapacity(2);
823 aToken
.mIdent
.Assign(PRUnichar(ch
));
824 aToken
.mIdent
.Append(PRUnichar(nextChar
));
825 return ParseCComment(aToken
);
827 if (!SkipCComment()) {
830 continue; // start again at the beginning
833 if (ch
== '<') { // consume HTML comment tags
834 if (LookAhead('!')) {
835 if (LookAhead('-')) {
836 if (LookAhead('-')) {
837 aToken
.mType
= eCSSToken_HTMLComment
;
838 aToken
.mIdent
.AssignLiteral("<!--");
846 if (ch
== '-') { // check for HTML comment end
847 if (LookAhead('-')) {
848 if (LookAhead('>')) {
849 aToken
.mType
= eCSSToken_HTMLComment
;
850 aToken
.mIdent
.AssignLiteral("-->");
857 // INCLUDES ("~=") and DASHMATCH ("|=")
858 if (( ch
== '|' ) || ( ch
== '~' ) || ( ch
== '^' ) ||
859 ( ch
== '$' ) || ( ch
== '*' )) {
860 PRInt32 nextChar
= Read();
861 if ( nextChar
== '=' ) {
863 aToken
.mType
= eCSSToken_Includes
;
865 else if (ch
== '|') {
866 aToken
.mType
= eCSSToken_Dashmatch
;
868 else if (ch
== '^') {
869 aToken
.mType
= eCSSToken_Beginsmatch
;
871 else if (ch
== '$') {
872 aToken
.mType
= eCSSToken_Endsmatch
;
874 else if (ch
== '*') {
875 aToken
.mType
= eCSSToken_Containsmatch
;
878 } else if (nextChar
>= 0) {
882 aToken
.mType
= eCSSToken_Symbol
;
889 nsCSSScanner::NextURL(nsCSSToken
& aToken
)
897 if ((ch
== '"') || (ch
== '\'')) {
898 return ParseString(ch
, aToken
);
902 if (IsWhitespace(ch
)) {
903 aToken
.mType
= eCSSToken_WhiteSpace
;
904 aToken
.mIdent
.Assign(PRUnichar(ch
));
909 // Process a url lexical token. A CSS1 url token can contain
910 // characters beyond identifier characters (e.g. '/', ':', etc.)
911 // Because of this the normal rules for tokenizing the input don't
912 // apply very well. To simplify the parser and relax some of the
913 // requirements on the scanner we parse url's here. If we find a
914 // malformed URL then we emit a token of type "InvalidURL" so that
915 // the CSS1 parser can ignore the invalid input. The parser must
916 // treat an InvalidURL token like a Function token, and process
917 // tokens until a matching parenthesis.
919 aToken
.mType
= eCSSToken_InvalidURL
;
920 nsString
& ident
= aToken
.mIdent
;
925 // start of a non-quoted url (which may be empty)
930 if (ch
== CSS_ESCAPE
) {
931 ParseAndAppendEscape(ident
);
932 } else if ((ch
== '"') || (ch
== '\'') || (ch
== '(')) {
933 // This is an invalid URL spec
935 Pushback(ch
); // push it back so the parser can match tokens and
936 // then closing parenthesis
938 } else if (IsWhitespace(ch
)) {
939 // Whitespace is allowed at the end of the URL
941 if (LookAhead(')')) {
942 Pushback(')'); // leave the closing symbol
946 // Whitespace is followed by something other than a
947 // ")". This is an invalid url spec.
950 } else if (ch
== ')') {
955 // A regular url character.
956 ident
.Append(PRUnichar(ch
));
960 // If the result of the above scanning is ok then change the token
961 // type to a useful one.
963 aToken
.mType
= eCSSToken_URL
;
970 nsCSSScanner::ParseAndAppendEscape(nsString
& aOutput
)
974 aOutput
.Append(CSS_ESCAPE
);
977 if (IsHexDigit(ch
)) {
980 for (i
= 0; i
< 6; i
++) { // up to six digits
983 // Whoops: error or premature eof
986 if (!IsHexDigit(ch
) && !IsWhitespace(ch
)) {
989 } else if (IsHexDigit(ch
)) {
990 rv
= rv
* 16 + HexDigitValue(ch
);
992 NS_ASSERTION(IsWhitespace(ch
), "bad control flow");
993 // single space ends escape
997 if (6 == i
) { // look for trailing whitespace and eat it
999 if (IsWhitespace(ch
)) {
1003 NS_ASSERTION(rv
>= 0, "How did rv become negative?");
1004 // "[at most six hexadecimal digits following a backslash] stand
1005 // for the ISO 10646 character with that number, which must not be
1006 // zero. (It is undefined in CSS 2.1 what happens if a style sheet
1007 // does contain a character with Unicode codepoint zero.)"
1008 // -- CSS2.1 section 4.1.3
1010 // Silently deleting \0 opens a content-filtration loophole (see
1011 // bug 228856), so what we do instead is pretend the "cancels the
1012 // meaning of special characters" rule applied.
1014 AppendUCS4ToUTF16(ENSURE_VALID_CHAR(rv
), aOutput
);
1017 aOutput
.Append('0');
1018 if (IsWhitespace(ch
))
1023 // "Any character except a hexidecimal digit can be escaped to
1024 // remove its special meaning by putting a backslash in front"
1025 // -- CSS1 spec section 7.1
1026 ch
= Read(); // Consume the escaped character
1027 if ((ch
> 0) && (ch
!= '\n')) {
1033 * Gather up the characters in an identifier. The identfier was
1034 * started by "aChar" which will be appended to aIdent. The result
1035 * will be aIdent with all of the identifier characters appended
1036 * until the first non-identifier character is seen. The termination
1037 * character is unread for the future re-reading.
1040 nsCSSScanner::GatherIdent(PRInt32 aChar
, nsString
& aIdent
)
1042 if (aChar
== CSS_ESCAPE
) {
1043 ParseAndAppendEscape(aIdent
);
1045 else if (0 < aChar
) {
1046 aIdent
.Append(aChar
);
1049 // If nothing in pushback, first try to get as much as possible in one go
1050 if (!mPushbackCount
&& EnsureData()) {
1051 // See how much we can consume and append in one go
1052 PRUint32 n
= mOffset
;
1053 // Count number of Ident characters that can be processed
1054 while (n
< mCount
&& IsIdent(mReadPointer
[n
])) {
1057 // Add to the token what we have so far
1059 #ifdef CSS_REPORT_PARSE_ERRORS
1060 mColNumber
+= n
- mOffset
;
1062 aIdent
.Append(&mReadPointer
[mOffset
], n
- mOffset
);
1068 if (aChar
< 0) break;
1069 if (aChar
== CSS_ESCAPE
) {
1070 ParseAndAppendEscape(aIdent
);
1071 } else if (IsIdent(aChar
)) {
1072 aIdent
.Append(PRUnichar(aChar
));
1082 nsCSSScanner::ParseRef(PRInt32 aChar
, nsCSSToken
& aToken
)
1084 aToken
.mIdent
.SetLength(0);
1085 aToken
.mType
= eCSSToken_Ref
;
1086 PRInt32 ch
= Read();
1090 if (IsIdent(ch
) || ch
== CSS_ESCAPE
) {
1091 // First char after the '#' is a valid ident char (or an escape),
1092 // so it makes sense to keep going
1093 if (StartsIdent(ch
, Peek())) {
1094 aToken
.mType
= eCSSToken_ID
;
1096 return GatherIdent(ch
, aToken
.mIdent
);
1099 // No ident chars after the '#'. Just unread |ch| and get out of here.
1105 nsCSSScanner::ParseIdent(PRInt32 aChar
, nsCSSToken
& aToken
)
1107 nsString
& ident
= aToken
.mIdent
;
1109 if (!GatherIdent(aChar
, ident
)) {
1113 nsCSSTokenType tokenType
= eCSSToken_Ident
;
1114 // look for functions (ie: "ident(")
1115 if (Peek() == PRUnichar('(')) {
1117 tokenType
= eCSSToken_Function
;
1120 aToken
.mType
= tokenType
;
1125 nsCSSScanner::ParseAtKeyword(PRInt32 aChar
, nsCSSToken
& aToken
)
1127 aToken
.mIdent
.SetLength(0);
1128 aToken
.mType
= eCSSToken_AtKeyword
;
1129 return GatherIdent(0, aToken
.mIdent
);
1133 nsCSSScanner::ParseNumber(PRInt32 c
, nsCSSToken
& aToken
)
1135 NS_PRECONDITION(c
== '.' || c
== '+' || c
== '-' || IsDigit(c
),
1136 "Why did we get called?");
1137 aToken
.mHasSign
= (c
== '+' || c
== '-');
1140 PRInt32 sign
= c
== '-' ? -1 : 1;
1141 // Absolute value of the integer part of the mantissa. This is a double so
1142 // we don't run into overflow issues for consumers that only care about our
1143 // floating-point value while still being able to express the full PRInt32
1144 // range for consumers who want integers.
1146 // Fractional part of the mantissa. This is a double so that when we convert
1147 // to float at the end we'll end up rounding to nearest float instead of
1148 // truncating down (as we would if fracPart were a float and we just
1149 // effectively lost the last several digits).
1150 double fracPart
= 0;
1151 // Absolute value of the power of 10 that we should multiply by (only
1152 // relevant for numbers in scientific notation). Has to be a signed integer,
1153 // because multiplication of signed by unsigned converts the unsigned to
1154 // signed, so if we plan to actually multiply by expSign...
1155 PRInt32 exponent
= 0;
1156 // Sign of the exponent.
1157 PRInt32 expSign
= 1;
1159 if (aToken
.mHasSign
) {
1160 NS_ASSERTION(c
!= '.', "How did that happen?");
1164 PRBool gotDot
= (c
== '.');
1167 // Parse the integer part of the mantisssa
1168 NS_ASSERTION(IsDigit(c
), "Why did we get called?");
1170 intPart
= 10*intPart
+ DecimalDigitValue(c
);
1172 // The IsDigit check will do the right thing even if Read() returns < 0
1173 } while (IsDigit(c
));
1175 gotDot
= (c
== '.') && IsDigit(Peek());
1179 // Parse the fractional part of the mantissa.
1181 NS_ASSERTION(IsDigit(c
), "How did we get here?");
1182 // Power of ten by which we need to divide our next digit
1185 fracPart
+= DecimalDigitValue(c
) / divisor
;
1188 // The IsDigit check will do the right thing even if Read() returns < 0
1189 } while (IsDigit(c
));
1192 PRBool gotE
= PR_FALSE
;
1193 if (IsSVGMode() && (c
== 'e' || c
== 'E')) {
1194 PRInt32 nextChar
= Peek();
1195 PRInt32 expSignChar
= 0;
1196 if (nextChar
== '-' || nextChar
== '+') {
1197 expSignChar
= Read();
1200 if (IsDigit(nextChar
)) {
1202 if (expSignChar
== '-') {
1207 NS_ASSERTION(IsDigit(c
), "Peek() must have lied");
1209 exponent
= 10*exponent
+ DecimalDigitValue(c
);
1211 // The IsDigit check will do the right thing even if Read() returns < 0
1212 } while (IsDigit(c
));
1215 Pushback(expSignChar
);
1220 nsCSSTokenType type
= eCSSToken_Number
;
1222 // Set mIntegerValid for all cases (except %, below) because we need
1223 // it for the "2n" in :nth-child(2n).
1224 aToken
.mIntegerValid
= PR_FALSE
;
1226 // Time to reassemble our number.
1227 float value
= float(sign
* (intPart
+ fracPart
));
1229 // pow(), not powf(), because at least wince doesn't have the latter.
1230 // And explicitly cast everything to doubles to avoid issues with
1231 // overloaded pow() on Windows.
1232 value
*= pow(10.0, double(expSign
* exponent
));
1233 } else if (!gotDot
) {
1234 // Clamp values outside of integer range.
1236 aToken
.mInteger
= PRInt32(NS_MIN(intPart
, double(PR_INT32_MAX
)));
1238 aToken
.mInteger
= PRInt32(NS_MAX(-intPart
, double(PR_INT32_MIN
)));
1240 aToken
.mIntegerValid
= PR_TRUE
;
1243 nsString
& ident
= aToken
.mIdent
;
1246 // Look at character that terminated the number
1248 if (StartsIdent(c
, Peek())) {
1249 if (!GatherIdent(c
, ident
)) {
1252 type
= eCSSToken_Dimension
;
1253 } else if ('%' == c
) {
1254 type
= eCSSToken_Percentage
;
1255 value
= value
/ 100.0f
;
1256 aToken
.mIntegerValid
= PR_FALSE
;
1258 // Put back character that stopped numeric scan
1262 aToken
.mNumber
= value
;
1263 aToken
.mType
= type
;
1268 nsCSSScanner::SkipCComment()
1271 PRInt32 ch
= Read();
1274 if (LookAhead('/')) {
1280 REPORT_UNEXPECTED_EOF(PECommentEOF
);
1285 nsCSSScanner::ParseString(PRInt32 aStop
, nsCSSToken
& aToken
)
1287 aToken
.mIdent
.SetLength(0);
1288 aToken
.mType
= eCSSToken_String
;
1289 aToken
.mSymbol
= PRUnichar(aStop
); // remember how it's quoted
1291 // If nothing in pushback, first try to get as much as possible in one go
1292 if (!mPushbackCount
&& EnsureData()) {
1293 // See how much we can consume and append in one go
1294 PRUint32 n
= mOffset
;
1295 // Count number of characters that can be processed
1296 for (;n
< mCount
; ++n
) {
1297 PRUnichar nextChar
= mReadPointer
[n
];
1298 if ((nextChar
== aStop
) || (nextChar
== CSS_ESCAPE
) ||
1299 (nextChar
== '\n') || (nextChar
== '\r') || (nextChar
== '\f')) {
1302 #ifdef CSS_REPORT_PARSE_ERRORS
1303 if (nextChar
== '\t') {
1304 mColNumber
= ((mColNumber
- 1 + TAB_STOP_WIDTH
) / TAB_STOP_WIDTH
)
1311 // Add to the token what we have so far
1313 aToken
.mIdent
.Append(&mReadPointer
[mOffset
], n
- mOffset
);
1317 PRInt32 ch
= Read();
1318 if (ch
< 0 || ch
== aStop
) {
1322 aToken
.mType
= eCSSToken_Error
;
1323 #ifdef CSS_REPORT_PARSE_ERRORS
1324 ReportUnexpectedToken(aToken
, "SEUnterminatedString");
1328 if (ch
== CSS_ESCAPE
) {
1329 ParseAndAppendEscape(aToken
.mIdent
);
1331 aToken
.mIdent
.Append(ch
);
1337 // UNICODE-RANGE tokens match the regular expression
1339 // u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
1341 // However, some such tokens are "invalid". There are three valid forms:
1343 // u+[0-9a-f]{x} 1 <= x <= 6
1344 // u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
1345 // u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
1347 // All unicode-range tokens have their text recorded in mIdent; valid ones
1348 // are also decoded into mInteger and mInteger2, and mIntegerValid is set.
1351 nsCSSScanner::ParseURange(PRInt32 aChar
, nsCSSToken
& aResult
)
1353 PRInt32 intro2
= Read();
1354 PRInt32 ch
= Peek();
1356 // We should only ever be called if these things are true.
1357 NS_ASSERTION(aChar
== 'u' || aChar
== 'U',
1358 "unicode-range called with improper introducer (U)");
1359 NS_ASSERTION(intro2
== '+',
1360 "unicode-range called with improper introducer (+)");
1362 // If the character immediately after the '+' is not a hex digit or
1363 // '?', this is not really a unicode-range token; push everything
1364 // back and scan the U as an ident.
1365 if (!IsHexDigit(ch
) && ch
!= '?') {
1368 return ParseIdent(aChar
, aResult
);
1371 aResult
.mIdent
.Truncate();
1372 aResult
.mIdent
.Append(aChar
);
1373 aResult
.mIdent
.Append(intro2
);
1375 PRBool valid
= PR_TRUE
;
1376 PRBool haveQues
= PR_FALSE
;
1384 if (i
== 7 || !(IsHexDigit(ch
) || ch
== '?')) {
1388 aResult
.mIdent
.Append(ch
);
1389 if (IsHexDigit(ch
)) {
1391 valid
= PR_FALSE
; // all question marks should be at the end
1393 low
= low
*16 + HexDigitValue(ch
);
1394 high
= high
*16 + HexDigitValue(ch
);
1398 high
= high
*16 + 0xF;
1402 if (ch
== '-' && IsHexDigit(Peek())) {
1407 aResult
.mIdent
.Append(ch
);
1413 if (i
== 7 || !IsHexDigit(ch
)) {
1416 aResult
.mIdent
.Append(ch
);
1417 high
= high
*16 + HexDigitValue(ch
);
1422 aResult
.mInteger
= low
;
1423 aResult
.mInteger2
= high
;
1424 aResult
.mIntegerValid
= valid
;
1425 aResult
.mType
= eCSSToken_URange
;