CLOSED TREE: TraceMonkey merge head. (a=blockers)
[mozilla-central.git] / layout / style / nsCSSScanner.cpp
blob1b9c7dea205d6e9af0688136dac98ee80249e3b2
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * L. David Baron <dbaron@dbaron.org>
24 * Daniel Glazman <glazman@netscape.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
39 #include <math.h>
41 /* tokenization of CSS style sheets */
43 #include "nsCSSScanner.h"
44 #include "nsIFactory.h"
45 #include "nsIInputStream.h"
46 #include "nsIUnicharInputStream.h"
47 #include "nsString.h"
48 #include "nsCRT.h"
50 // for #ifdef CSS_REPORT_PARSE_ERRORS
51 #include "nsCOMPtr.h"
52 #include "nsIServiceManager.h"
53 #include "nsIComponentManager.h"
54 #include "nsReadableUtils.h"
55 #include "nsIURI.h"
56 #include "nsIConsoleService.h"
57 #include "nsIScriptError.h"
58 #include "nsIStringBundle.h"
59 #include "nsContentUtils.h"
60 #include "mozilla/Services.h"
61 #include "mozilla/css/Loader.h"
62 #include "nsCSSStyleSheet.h"
64 #ifdef CSS_REPORT_PARSE_ERRORS
65 static PRBool gReportErrors = PR_TRUE;
66 static nsIConsoleService *gConsoleService;
67 static nsIFactory *gScriptErrorFactory;
68 static nsIStringBundle *gStringBundle;
69 #endif
71 // Don't bother collecting whitespace characters in token's mIdent buffer
72 #undef COLLECT_WHITESPACE
74 // Table of character classes
75 static const PRUnichar CSS_ESCAPE = PRUnichar('\\');
77 static const PRUint8 IS_HEX_DIGIT = 0x01;
78 static const PRUint8 START_IDENT = 0x02;
79 static const PRUint8 IS_IDENT = 0x04;
80 static const PRUint8 IS_WHITESPACE = 0x08;
82 #define W IS_WHITESPACE
83 #define I IS_IDENT
84 #define S START_IDENT
85 #define SI IS_IDENT|START_IDENT
86 #define XI IS_IDENT |IS_HEX_DIGIT
87 #define XSI IS_IDENT|START_IDENT|IS_HEX_DIGIT
89 static const PRUint8 gLexTable[256] = {
90 // TAB LF FF CR
91 0, 0, 0, 0, 0, 0, 0, 0, 0, W, W, 0, W, W, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 // SPC ! " # $ % & ' ( ) * + , - . /
95 W, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, I, 0, 0,
96 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
97 XI, XI, XI, XI, XI, XI, XI, XI, XI, XI, 0, 0, 0, 0, 0, 0,
98 // @ A B C D E F G H I J K L M N O
99 0, XSI,XSI,XSI,XSI,XSI,XSI,SI, SI, SI, SI, SI, SI, SI, SI, SI,
100 // P Q R S T U V W X Y Z [ \ ] ^ _
101 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, 0, S, 0, 0, SI,
102 // ` a b c d e f g h i j k l m n o
103 0, XSI,XSI,XSI,XSI,XSI,XSI,SI, SI, SI, SI, SI, SI, SI, SI, SI,
104 // p q r s t u v w x y z { | } ~
105 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 // NBSP¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
111 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
112 // ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
113 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
114 // À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
115 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
116 // Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
117 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
118 // à á â ã ä å æ ç è é ê ë ì í î ï
119 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
120 // ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
121 SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI, SI,
124 #undef W
125 #undef S
126 #undef I
127 #undef XI
128 #undef SI
129 #undef XSI
131 static inline PRBool
132 IsIdentStart(PRInt32 aChar)
134 return aChar >= 0 &&
135 (aChar >= 256 || (gLexTable[aChar] & START_IDENT) != 0);
138 static inline PRBool
139 StartsIdent(PRInt32 aFirstChar, PRInt32 aSecondChar)
141 return IsIdentStart(aFirstChar) ||
142 (aFirstChar == '-' && IsIdentStart(aSecondChar));
145 static inline PRBool
146 IsWhitespace(PRInt32 ch) {
147 return PRUint32(ch) < 256 && (gLexTable[ch] & IS_WHITESPACE) != 0;
150 static inline PRBool
151 IsDigit(PRInt32 ch) {
152 return (ch >= '0') && (ch <= '9');
155 static inline PRBool
156 IsHexDigit(PRInt32 ch) {
157 return PRUint32(ch) < 256 && (gLexTable[ch] & IS_HEX_DIGIT) != 0;
160 static inline PRBool
161 IsIdent(PRInt32 ch) {
162 return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_IDENT) != 0);
165 static inline PRUint32
166 DecimalDigitValue(PRInt32 ch)
168 return ch - '0';
171 static inline PRUint32
172 HexDigitValue(PRInt32 ch)
174 if (IsDigit(ch)) {
175 return DecimalDigitValue(ch);
176 } else {
177 // Note: c&7 just keeps the low three bits which causes
178 // upper and lower case alphabetics to both yield their
179 // "relative to 10" value for computing the hex value.
180 return (ch & 0x7) + 9;
184 nsCSSToken::nsCSSToken()
186 mType = eCSSToken_Symbol;
189 void
190 nsCSSToken::AppendToString(nsString& aBuffer)
192 switch (mType) {
193 case eCSSToken_AtKeyword:
194 aBuffer.Append(PRUnichar('@')); // fall through intentional
195 case eCSSToken_Ident:
196 case eCSSToken_WhiteSpace:
197 case eCSSToken_Function:
198 case eCSSToken_URL:
199 case eCSSToken_InvalidURL:
200 case eCSSToken_HTMLComment:
201 case eCSSToken_URange:
202 aBuffer.Append(mIdent);
203 if (mType == eCSSToken_Function)
204 aBuffer.Append(PRUnichar('('));
205 break;
206 case eCSSToken_Number:
207 if (mIntegerValid) {
208 aBuffer.AppendInt(mInteger, 10);
210 else {
211 aBuffer.AppendFloat(mNumber);
213 break;
214 case eCSSToken_Percentage:
215 NS_ASSERTION(!mIntegerValid, "How did a percentage token get this set?");
216 aBuffer.AppendFloat(mNumber * 100.0f);
217 aBuffer.Append(PRUnichar('%')); // STRING USE WARNING: technically, this should be |AppendWithConversion|
218 break;
219 case eCSSToken_Dimension:
220 if (mIntegerValid) {
221 aBuffer.AppendInt(mInteger, 10);
223 else {
224 aBuffer.AppendFloat(mNumber);
226 aBuffer.Append(mIdent);
227 break;
228 case eCSSToken_String:
229 aBuffer.Append(mSymbol);
230 aBuffer.Append(mIdent); // fall through intentional
231 case eCSSToken_Symbol:
232 aBuffer.Append(mSymbol);
233 break;
234 case eCSSToken_ID:
235 case eCSSToken_Ref:
236 aBuffer.Append(PRUnichar('#'));
237 aBuffer.Append(mIdent);
238 break;
239 case eCSSToken_Includes:
240 aBuffer.AppendLiteral("~=");
241 break;
242 case eCSSToken_Dashmatch:
243 aBuffer.AppendLiteral("|=");
244 break;
245 case eCSSToken_Beginsmatch:
246 aBuffer.AppendLiteral("^=");
247 break;
248 case eCSSToken_Endsmatch:
249 aBuffer.AppendLiteral("$=");
250 break;
251 case eCSSToken_Containsmatch:
252 aBuffer.AppendLiteral("*=");
253 break;
254 case eCSSToken_Error:
255 aBuffer.Append(mSymbol);
256 aBuffer.Append(mIdent);
257 break;
258 default:
259 NS_ERROR("invalid token type");
260 break;
264 nsCSSScanner::nsCSSScanner()
265 : mInputStream(nsnull)
266 , mReadPointer(nsnull)
267 , mLowLevelError(NS_OK)
268 , mSVGMode(PR_FALSE)
269 #ifdef CSS_REPORT_PARSE_ERRORS
270 , mError(mErrorBuf, NS_ARRAY_LENGTH(mErrorBuf), 0)
271 , mWindowID(0)
272 , mWindowIDCached(PR_FALSE)
273 , mSheet(nsnull)
274 , mLoader(nsnull)
275 #endif
277 MOZ_COUNT_CTOR(nsCSSScanner);
278 mPushback = mLocalPushback;
279 mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
280 // No need to init the other members, since they represent state
281 // which can get cleared. We'll init them every time Init() is
282 // called.
285 nsCSSScanner::~nsCSSScanner()
287 MOZ_COUNT_DTOR(nsCSSScanner);
288 Close();
289 if (mLocalPushback != mPushback) {
290 delete [] mPushback;
294 nsresult
295 nsCSSScanner::GetLowLevelError()
297 return mLowLevelError;
300 void
301 nsCSSScanner::SetLowLevelError(nsresult aErrorCode)
303 NS_ASSERTION(aErrorCode != NS_OK, "SetLowLevelError() used to clear error");
304 NS_ASSERTION(mLowLevelError == NS_OK, "there is already a low-level error");
305 mLowLevelError = aErrorCode;
308 #ifdef CSS_REPORT_PARSE_ERRORS
309 #define CSS_ERRORS_PREF "layout.css.report_errors"
311 static int
312 CSSErrorsPrefChanged(const char *aPref, void *aClosure)
314 gReportErrors = nsContentUtils::GetBoolPref(CSS_ERRORS_PREF, PR_TRUE);
315 return NS_OK;
317 #endif
319 /* static */ PRBool
320 nsCSSScanner::InitGlobals()
322 #ifdef CSS_REPORT_PARSE_ERRORS
323 if (gConsoleService && gScriptErrorFactory)
324 return PR_TRUE;
326 nsresult rv = CallGetService(NS_CONSOLESERVICE_CONTRACTID, &gConsoleService);
327 NS_ENSURE_SUCCESS(rv, PR_FALSE);
329 rv = CallGetClassObject(NS_SCRIPTERROR_CONTRACTID, &gScriptErrorFactory);
330 NS_ENSURE_SUCCESS(rv, PR_FALSE);
331 NS_ASSERTION(gConsoleService && gScriptErrorFactory,
332 "unexpected null pointer without failure");
334 nsContentUtils::RegisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
335 CSSErrorsPrefChanged(CSS_ERRORS_PREF, nsnull);
336 #endif
337 return PR_TRUE;
340 /* static */ void
341 nsCSSScanner::ReleaseGlobals()
343 #ifdef CSS_REPORT_PARSE_ERRORS
344 nsContentUtils::UnregisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
345 NS_IF_RELEASE(gConsoleService);
346 NS_IF_RELEASE(gScriptErrorFactory);
347 NS_IF_RELEASE(gStringBundle);
348 #endif
351 void
352 nsCSSScanner::Init(nsIUnicharInputStream* aInput,
353 const PRUnichar * aBuffer, PRUint32 aCount,
354 nsIURI* aURI, PRUint32 aLineNumber,
355 nsCSSStyleSheet* aSheet, mozilla::css::Loader* aLoader)
357 NS_PRECONDITION(!mInputStream, "Should not have an existing input stream!");
358 NS_PRECONDITION(!mReadPointer, "Should not have an existing input buffer!");
360 // Read from stream via my own buffer
361 if (aInput) {
362 NS_PRECONDITION(!aBuffer, "Shouldn't have both input and buffer!");
363 NS_PRECONDITION(aCount == 0, "Shouldn't have count with a stream");
364 mInputStream = aInput;
365 mReadPointer = mBuffer;
366 mCount = 0;
367 } else {
368 NS_PRECONDITION(aBuffer, "Either aInput or aBuffer must be set");
369 // Read directly from the provided buffer
370 mInputStream = nsnull;
371 mReadPointer = aBuffer;
372 mCount = aCount;
375 #ifdef CSS_REPORT_PARSE_ERRORS
376 // If aURI is the same as mURI, no need to reget mFileName -- it
377 // shouldn't have changed.
378 if (aURI != mURI) {
379 mURI = aURI;
380 if (aURI) {
381 aURI->GetSpec(mFileName);
382 } else {
383 mFileName.Adopt(NS_strdup("from DOM"));
386 #endif // CSS_REPORT_PARSE_ERRORS
387 mLineNumber = aLineNumber;
389 // Reset variables that we use to keep track of our progress through the input
390 mOffset = 0;
391 mPushbackCount = 0;
392 mLowLevelError = NS_OK;
394 #ifdef CSS_REPORT_PARSE_ERRORS
395 mColNumber = 0;
396 mSheet = aSheet;
397 mLoader = aLoader;
398 #endif
401 #ifdef CSS_REPORT_PARSE_ERRORS
403 // @see REPORT_UNEXPECTED_EOF in nsCSSParser.cpp
404 #define REPORT_UNEXPECTED_EOF(lf_) \
405 ReportUnexpectedEOF(#lf_)
407 void
408 nsCSSScanner::AddToError(const nsSubstring& aErrorText)
410 if (mError.IsEmpty()) {
411 mErrorLineNumber = mLineNumber;
412 mErrorColNumber = mColNumber;
413 mError = aErrorText;
414 } else {
415 mError.Append(NS_LITERAL_STRING(" ") + aErrorText);
419 void
420 nsCSSScanner::ClearError()
422 mError.Truncate();
425 void
426 nsCSSScanner::OutputError()
428 if (mError.IsEmpty()) return;
430 // Log it to the Error console
432 if (InitGlobals() && gReportErrors) {
433 if (!mWindowIDCached) {
434 if (mSheet) {
435 mWindowID = mSheet->FindOwningWindowID();
437 if (mWindowID == 0 && mLoader) {
438 nsIDocument* doc = mLoader->GetDocument();
439 if (doc) {
440 mWindowID = doc->OuterWindowID();
443 mWindowIDCached = PR_TRUE;
446 nsresult rv;
447 nsCOMPtr<nsIScriptError2> errorObject =
448 do_CreateInstance(gScriptErrorFactory, &rv);
450 if (NS_SUCCEEDED(rv)) {
451 rv = errorObject->InitWithWindowID(mError.get(),
452 NS_ConvertUTF8toUTF16(mFileName).get(),
453 EmptyString().get(),
454 mErrorLineNumber,
455 mErrorColNumber,
456 nsIScriptError::warningFlag,
457 "CSS Parser", mWindowID);
458 if (NS_SUCCEEDED(rv)) {
459 nsCOMPtr<nsIScriptError> logError = do_QueryInterface(errorObject);
460 gConsoleService->LogMessage(logError);
464 ClearError();
467 static PRBool
468 InitStringBundle()
470 if (gStringBundle)
471 return PR_TRUE;
473 nsCOMPtr<nsIStringBundleService> sbs =
474 mozilla::services::GetStringBundleService();
475 if (!sbs)
476 return PR_FALSE;
478 nsresult rv =
479 sbs->CreateBundle("chrome://global/locale/css.properties", &gStringBundle);
480 if (NS_FAILED(rv)) {
481 gStringBundle = nsnull;
482 return PR_FALSE;
485 return PR_TRUE;
488 #define ENSURE_STRINGBUNDLE \
489 PR_BEGIN_MACRO if (!InitStringBundle()) return; PR_END_MACRO
491 // aMessage must take no parameters
492 void nsCSSScanner::ReportUnexpected(const char* aMessage)
494 ENSURE_STRINGBUNDLE;
496 nsXPIDLString str;
497 gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
498 getter_Copies(str));
499 AddToError(str);
502 void
503 nsCSSScanner::ReportUnexpectedParams(const char* aMessage,
504 const PRUnichar **aParams,
505 PRUint32 aParamsLength)
507 NS_PRECONDITION(aParamsLength > 0, "use the non-params version");
508 ENSURE_STRINGBUNDLE;
510 nsXPIDLString str;
511 gStringBundle->FormatStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
512 aParams, aParamsLength,
513 getter_Copies(str));
514 AddToError(str);
517 // aLookingFor is a plain string, not a format string
518 void
519 nsCSSScanner::ReportUnexpectedEOF(const char* aLookingFor)
521 ENSURE_STRINGBUNDLE;
523 nsXPIDLString innerStr;
524 gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aLookingFor).get(),
525 getter_Copies(innerStr));
527 const PRUnichar *params[] = {
528 innerStr.get()
530 nsXPIDLString str;
531 gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
532 params, NS_ARRAY_LENGTH(params),
533 getter_Copies(str));
534 AddToError(str);
537 // aLookingFor is a single character
538 void
539 nsCSSScanner::ReportUnexpectedEOF(PRUnichar aLookingFor)
541 ENSURE_STRINGBUNDLE;
543 const PRUnichar lookingForStr[] = {
544 PRUnichar('\''), aLookingFor, PRUnichar('\''), PRUnichar(0)
546 const PRUnichar *params[] = { lookingForStr };
547 nsXPIDLString str;
548 gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
549 params, NS_ARRAY_LENGTH(params),
550 getter_Copies(str));
551 AddToError(str);
554 // aMessage must take 1 parameter (for the string representation of the
555 // unexpected token)
556 void
557 nsCSSScanner::ReportUnexpectedToken(nsCSSToken& tok,
558 const char *aMessage)
560 ENSURE_STRINGBUNDLE;
562 nsAutoString tokenString;
563 tok.AppendToString(tokenString);
565 const PRUnichar *params[] = {
566 tokenString.get()
569 ReportUnexpectedParams(aMessage, params, NS_ARRAY_LENGTH(params));
572 // aParams's first entry must be null, and we'll fill in the token
573 void
574 nsCSSScanner::ReportUnexpectedTokenParams(nsCSSToken& tok,
575 const char* aMessage,
576 const PRUnichar **aParams,
577 PRUint32 aParamsLength)
579 NS_PRECONDITION(aParamsLength > 1, "use the non-params version");
580 NS_PRECONDITION(aParams[0] == nsnull, "first param should be empty");
582 ENSURE_STRINGBUNDLE;
584 nsAutoString tokenString;
585 tok.AppendToString(tokenString);
586 aParams[0] = tokenString.get();
588 ReportUnexpectedParams(aMessage, aParams, aParamsLength);
591 #else
593 #define REPORT_UNEXPECTED_EOF(lf_)
595 #endif // CSS_REPORT_PARSE_ERRORS
597 void
598 nsCSSScanner::Close()
600 mInputStream = nsnull;
601 mReadPointer = nsnull;
603 // Clean things up so we don't hold on to memory if our parser gets recycled.
604 #ifdef CSS_REPORT_PARSE_ERRORS
605 mFileName.Truncate();
606 mURI = nsnull;
607 mError.Truncate();
608 mWindowID = 0;
609 mWindowIDCached = PR_FALSE;
610 mSheet = nsnull;
611 mLoader = nsnull;
612 #endif
613 if (mPushback != mLocalPushback) {
614 delete [] mPushback;
615 mPushback = mLocalPushback;
616 mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
620 #ifdef CSS_REPORT_PARSE_ERRORS
621 #define TAB_STOP_WIDTH 8
622 #endif
624 PRBool
625 nsCSSScanner::EnsureData()
627 if (mOffset < mCount)
628 return PR_TRUE;
630 if (!mInputStream)
631 return PR_FALSE;
633 mOffset = 0;
634 nsresult rv = mInputStream->Read(mBuffer, CSS_BUFFER_SIZE, &mCount);
636 if (NS_FAILED(rv)) {
637 mCount = 0;
638 SetLowLevelError(rv);
639 return PR_FALSE;
642 return mCount > 0;
645 // Returns -1 on error or eof
646 PRInt32
647 nsCSSScanner::Read()
649 PRInt32 rv;
650 if (0 < mPushbackCount) {
651 rv = PRInt32(mPushback[--mPushbackCount]);
652 } else {
653 if (mOffset == mCount && !EnsureData()) {
654 return -1;
656 rv = PRInt32(mReadPointer[mOffset++]);
657 // There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
658 // To simplify dealing with newlines, they are all normalized to "\n" here
659 if (rv == '\r') {
660 if (EnsureData() && mReadPointer[mOffset] == '\n') {
661 mOffset++;
663 rv = '\n';
664 } else if (rv == '\f') {
665 rv = '\n';
667 if (rv == '\n') {
668 // 0 is a magical line number meaning that we don't know (i.e., script)
669 if (mLineNumber != 0)
670 ++mLineNumber;
671 #ifdef CSS_REPORT_PARSE_ERRORS
672 mColNumber = 0;
673 #endif
675 #ifdef CSS_REPORT_PARSE_ERRORS
676 else if (rv == '\t') {
677 mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
678 * TAB_STOP_WIDTH;
679 } else if (rv != '\n') {
680 mColNumber++;
682 #endif
684 //printf("Read => %x\n", rv);
685 return rv;
688 PRInt32
689 nsCSSScanner::Peek()
691 if (0 == mPushbackCount) {
692 PRInt32 ch = Read();
693 if (ch < 0) {
694 return -1;
696 mPushback[0] = PRUnichar(ch);
697 mPushbackCount++;
699 //printf("Peek => %x\n", mLookAhead);
700 return PRInt32(mPushback[mPushbackCount - 1]);
703 void
704 nsCSSScanner::Pushback(PRUnichar aChar)
706 if (mPushbackCount == mPushbackSize) { // grow buffer
707 PRUnichar* newPushback = new PRUnichar[mPushbackSize + 4];
708 if (nsnull == newPushback) {
709 return;
711 mPushbackSize += 4;
712 memcpy(newPushback, mPushback, sizeof(PRUnichar) * mPushbackCount);
713 if (mPushback != mLocalPushback) {
714 delete [] mPushback;
716 mPushback = newPushback;
718 mPushback[mPushbackCount++] = aChar;
721 PRBool
722 nsCSSScanner::LookAhead(PRUnichar aChar)
724 PRInt32 ch = Read();
725 if (ch < 0) {
726 return PR_FALSE;
728 if (ch == aChar) {
729 return PR_TRUE;
731 Pushback(ch);
732 return PR_FALSE;
735 void
736 nsCSSScanner::EatWhiteSpace()
738 for (;;) {
739 PRInt32 ch = Read();
740 if (ch < 0) {
741 break;
743 if ((ch != ' ') && (ch != '\n') && (ch != '\t')) {
744 Pushback(ch);
745 break;
750 PRBool
751 nsCSSScanner::Next(nsCSSToken& aToken)
753 for (;;) { // Infinite loop so we can restart after comments.
754 PRInt32 ch = Read();
755 if (ch < 0) {
756 return PR_FALSE;
759 // UNICODE-RANGE
760 if ((ch == 'u' || ch == 'U') && Peek() == '+')
761 return ParseURange(ch, aToken);
763 // IDENT
764 if (StartsIdent(ch, Peek()))
765 return ParseIdent(ch, aToken);
767 // AT_KEYWORD
768 if (ch == '@') {
769 PRInt32 nextChar = Read();
770 if (nextChar >= 0) {
771 PRInt32 followingChar = Peek();
772 Pushback(nextChar);
773 if (StartsIdent(nextChar, followingChar))
774 return ParseAtKeyword(ch, aToken);
778 // NUMBER or DIM
779 if ((ch == '.') || (ch == '+') || (ch == '-')) {
780 PRInt32 nextChar = Peek();
781 if (IsDigit(nextChar)) {
782 return ParseNumber(ch, aToken);
784 else if (('.' == nextChar) && ('.' != ch)) {
785 nextChar = Read();
786 PRInt32 followingChar = Peek();
787 Pushback(nextChar);
788 if (IsDigit(followingChar))
789 return ParseNumber(ch, aToken);
792 if (IsDigit(ch)) {
793 return ParseNumber(ch, aToken);
796 // ID
797 if (ch == '#') {
798 return ParseRef(ch, aToken);
801 // STRING
802 if ((ch == '"') || (ch == '\'')) {
803 return ParseString(ch, aToken);
806 // WS
807 if (IsWhitespace(ch)) {
808 aToken.mType = eCSSToken_WhiteSpace;
809 aToken.mIdent.Assign(PRUnichar(ch));
810 EatWhiteSpace();
811 return PR_TRUE;
813 if (ch == '/' && !IsSVGMode()) {
814 PRInt32 nextChar = Peek();
815 if (nextChar == '*') {
816 (void) Read();
817 #if 0
818 // If we change our storage data structures such that comments are
819 // stored (for Editor), we should reenable this code, condition it
820 // on being in editor mode, and apply glazou's patch from bug
821 // 60290.
822 aToken.mIdent.SetCapacity(2);
823 aToken.mIdent.Assign(PRUnichar(ch));
824 aToken.mIdent.Append(PRUnichar(nextChar));
825 return ParseCComment(aToken);
826 #endif
827 if (!SkipCComment()) {
828 return PR_FALSE;
830 continue; // start again at the beginning
833 if (ch == '<') { // consume HTML comment tags
834 if (LookAhead('!')) {
835 if (LookAhead('-')) {
836 if (LookAhead('-')) {
837 aToken.mType = eCSSToken_HTMLComment;
838 aToken.mIdent.AssignLiteral("<!--");
839 return PR_TRUE;
841 Pushback('-');
843 Pushback('!');
846 if (ch == '-') { // check for HTML comment end
847 if (LookAhead('-')) {
848 if (LookAhead('>')) {
849 aToken.mType = eCSSToken_HTMLComment;
850 aToken.mIdent.AssignLiteral("-->");
851 return PR_TRUE;
853 Pushback('-');
857 // INCLUDES ("~=") and DASHMATCH ("|=")
858 if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
859 ( ch == '$' ) || ( ch == '*' )) {
860 PRInt32 nextChar = Read();
861 if ( nextChar == '=' ) {
862 if (ch == '~') {
863 aToken.mType = eCSSToken_Includes;
865 else if (ch == '|') {
866 aToken.mType = eCSSToken_Dashmatch;
868 else if (ch == '^') {
869 aToken.mType = eCSSToken_Beginsmatch;
871 else if (ch == '$') {
872 aToken.mType = eCSSToken_Endsmatch;
874 else if (ch == '*') {
875 aToken.mType = eCSSToken_Containsmatch;
877 return PR_TRUE;
878 } else if (nextChar >= 0) {
879 Pushback(nextChar);
882 aToken.mType = eCSSToken_Symbol;
883 aToken.mSymbol = ch;
884 return PR_TRUE;
888 PRBool
889 nsCSSScanner::NextURL(nsCSSToken& aToken)
891 PRInt32 ch = Read();
892 if (ch < 0) {
893 return PR_FALSE;
896 // STRING
897 if ((ch == '"') || (ch == '\'')) {
898 return ParseString(ch, aToken);
901 // WS
902 if (IsWhitespace(ch)) {
903 aToken.mType = eCSSToken_WhiteSpace;
904 aToken.mIdent.Assign(PRUnichar(ch));
905 EatWhiteSpace();
906 return PR_TRUE;
909 // Process a url lexical token. A CSS1 url token can contain
910 // characters beyond identifier characters (e.g. '/', ':', etc.)
911 // Because of this the normal rules for tokenizing the input don't
912 // apply very well. To simplify the parser and relax some of the
913 // requirements on the scanner we parse url's here. If we find a
914 // malformed URL then we emit a token of type "InvalidURL" so that
915 // the CSS1 parser can ignore the invalid input. The parser must
916 // treat an InvalidURL token like a Function token, and process
917 // tokens until a matching parenthesis.
919 aToken.mType = eCSSToken_InvalidURL;
920 nsString& ident = aToken.mIdent;
921 ident.SetLength(0);
923 Pushback(ch);
925 // start of a non-quoted url (which may be empty)
926 PRBool ok = PR_TRUE;
927 for (;;) {
928 ch = Read();
929 if (ch < 0) break;
930 if (ch == CSS_ESCAPE) {
931 ParseAndAppendEscape(ident);
932 } else if ((ch == '"') || (ch == '\'') || (ch == '(')) {
933 // This is an invalid URL spec
934 ok = PR_FALSE;
935 Pushback(ch); // push it back so the parser can match tokens and
936 // then closing parenthesis
937 break;
938 } else if (IsWhitespace(ch)) {
939 // Whitespace is allowed at the end of the URL
940 EatWhiteSpace();
941 if (LookAhead(')')) {
942 Pushback(')'); // leave the closing symbol
943 // done!
944 break;
946 // Whitespace is followed by something other than a
947 // ")". This is an invalid url spec.
948 ok = PR_FALSE;
949 break;
950 } else if (ch == ')') {
951 Pushback(ch);
952 // All done
953 break;
954 } else {
955 // A regular url character.
956 ident.Append(PRUnichar(ch));
960 // If the result of the above scanning is ok then change the token
961 // type to a useful one.
962 if (ok) {
963 aToken.mType = eCSSToken_URL;
965 return PR_TRUE;
969 void
970 nsCSSScanner::ParseAndAppendEscape(nsString& aOutput)
972 PRInt32 ch = Peek();
973 if (ch < 0) {
974 aOutput.Append(CSS_ESCAPE);
975 return;
977 if (IsHexDigit(ch)) {
978 PRInt32 rv = 0;
979 int i;
980 for (i = 0; i < 6; i++) { // up to six digits
981 ch = Read();
982 if (ch < 0) {
983 // Whoops: error or premature eof
984 break;
986 if (!IsHexDigit(ch) && !IsWhitespace(ch)) {
987 Pushback(ch);
988 break;
989 } else if (IsHexDigit(ch)) {
990 rv = rv * 16 + HexDigitValue(ch);
991 } else {
992 NS_ASSERTION(IsWhitespace(ch), "bad control flow");
993 // single space ends escape
994 break;
997 if (6 == i) { // look for trailing whitespace and eat it
998 ch = Peek();
999 if (IsWhitespace(ch)) {
1000 (void) Read();
1003 NS_ASSERTION(rv >= 0, "How did rv become negative?");
1004 // "[at most six hexadecimal digits following a backslash] stand
1005 // for the ISO 10646 character with that number, which must not be
1006 // zero. (It is undefined in CSS 2.1 what happens if a style sheet
1007 // does contain a character with Unicode codepoint zero.)"
1008 // -- CSS2.1 section 4.1.3
1010 // Silently deleting \0 opens a content-filtration loophole (see
1011 // bug 228856), so what we do instead is pretend the "cancels the
1012 // meaning of special characters" rule applied.
1013 if (rv > 0) {
1014 AppendUCS4ToUTF16(ENSURE_VALID_CHAR(rv), aOutput);
1015 } else {
1016 while (i--)
1017 aOutput.Append('0');
1018 if (IsWhitespace(ch))
1019 Pushback(ch);
1021 return;
1023 // "Any character except a hexidecimal digit can be escaped to
1024 // remove its special meaning by putting a backslash in front"
1025 // -- CSS1 spec section 7.1
1026 ch = Read(); // Consume the escaped character
1027 if ((ch > 0) && (ch != '\n')) {
1028 aOutput.Append(ch);
1033 * Gather up the characters in an identifier. The identfier was
1034 * started by "aChar" which will be appended to aIdent. The result
1035 * will be aIdent with all of the identifier characters appended
1036 * until the first non-identifier character is seen. The termination
1037 * character is unread for the future re-reading.
1039 PRBool
1040 nsCSSScanner::GatherIdent(PRInt32 aChar, nsString& aIdent)
1042 if (aChar == CSS_ESCAPE) {
1043 ParseAndAppendEscape(aIdent);
1045 else if (0 < aChar) {
1046 aIdent.Append(aChar);
1048 for (;;) {
1049 // If nothing in pushback, first try to get as much as possible in one go
1050 if (!mPushbackCount && EnsureData()) {
1051 // See how much we can consume and append in one go
1052 PRUint32 n = mOffset;
1053 // Count number of Ident characters that can be processed
1054 while (n < mCount && IsIdent(mReadPointer[n])) {
1055 ++n;
1057 // Add to the token what we have so far
1058 if (n > mOffset) {
1059 #ifdef CSS_REPORT_PARSE_ERRORS
1060 mColNumber += n - mOffset;
1061 #endif
1062 aIdent.Append(&mReadPointer[mOffset], n - mOffset);
1063 mOffset = n;
1067 aChar = Read();
1068 if (aChar < 0) break;
1069 if (aChar == CSS_ESCAPE) {
1070 ParseAndAppendEscape(aIdent);
1071 } else if (IsIdent(aChar)) {
1072 aIdent.Append(PRUnichar(aChar));
1073 } else {
1074 Pushback(aChar);
1075 break;
1078 return PR_TRUE;
1081 PRBool
1082 nsCSSScanner::ParseRef(PRInt32 aChar, nsCSSToken& aToken)
1084 aToken.mIdent.SetLength(0);
1085 aToken.mType = eCSSToken_Ref;
1086 PRInt32 ch = Read();
1087 if (ch < 0) {
1088 return PR_FALSE;
1090 if (IsIdent(ch) || ch == CSS_ESCAPE) {
1091 // First char after the '#' is a valid ident char (or an escape),
1092 // so it makes sense to keep going
1093 if (StartsIdent(ch, Peek())) {
1094 aToken.mType = eCSSToken_ID;
1096 return GatherIdent(ch, aToken.mIdent);
1099 // No ident chars after the '#'. Just unread |ch| and get out of here.
1100 Pushback(ch);
1101 return PR_TRUE;
1104 PRBool
1105 nsCSSScanner::ParseIdent(PRInt32 aChar, nsCSSToken& aToken)
1107 nsString& ident = aToken.mIdent;
1108 ident.SetLength(0);
1109 if (!GatherIdent(aChar, ident)) {
1110 return PR_FALSE;
1113 nsCSSTokenType tokenType = eCSSToken_Ident;
1114 // look for functions (ie: "ident(")
1115 if (Peek() == PRUnichar('(')) {
1116 Read();
1117 tokenType = eCSSToken_Function;
1120 aToken.mType = tokenType;
1121 return PR_TRUE;
1124 PRBool
1125 nsCSSScanner::ParseAtKeyword(PRInt32 aChar, nsCSSToken& aToken)
1127 aToken.mIdent.SetLength(0);
1128 aToken.mType = eCSSToken_AtKeyword;
1129 return GatherIdent(0, aToken.mIdent);
1132 PRBool
1133 nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
1135 NS_PRECONDITION(c == '.' || c == '+' || c == '-' || IsDigit(c),
1136 "Why did we get called?");
1137 aToken.mHasSign = (c == '+' || c == '-');
1139 // Our sign.
1140 PRInt32 sign = c == '-' ? -1 : 1;
1141 // Absolute value of the integer part of the mantissa. This is a double so
1142 // we don't run into overflow issues for consumers that only care about our
1143 // floating-point value while still being able to express the full PRInt32
1144 // range for consumers who want integers.
1145 double intPart = 0;
1146 // Fractional part of the mantissa. This is a double so that when we convert
1147 // to float at the end we'll end up rounding to nearest float instead of
1148 // truncating down (as we would if fracPart were a float and we just
1149 // effectively lost the last several digits).
1150 double fracPart = 0;
1151 // Absolute value of the power of 10 that we should multiply by (only
1152 // relevant for numbers in scientific notation). Has to be a signed integer,
1153 // because multiplication of signed by unsigned converts the unsigned to
1154 // signed, so if we plan to actually multiply by expSign...
1155 PRInt32 exponent = 0;
1156 // Sign of the exponent.
1157 PRInt32 expSign = 1;
1159 if (aToken.mHasSign) {
1160 NS_ASSERTION(c != '.', "How did that happen?");
1161 c = Read();
1164 PRBool gotDot = (c == '.');
1166 if (!gotDot) {
1167 // Parse the integer part of the mantisssa
1168 NS_ASSERTION(IsDigit(c), "Why did we get called?");
1169 do {
1170 intPart = 10*intPart + DecimalDigitValue(c);
1171 c = Read();
1172 // The IsDigit check will do the right thing even if Read() returns < 0
1173 } while (IsDigit(c));
1175 gotDot = (c == '.') && IsDigit(Peek());
1178 if (gotDot) {
1179 // Parse the fractional part of the mantissa.
1180 c = Read();
1181 NS_ASSERTION(IsDigit(c), "How did we get here?");
1182 // Power of ten by which we need to divide our next digit
1183 float divisor = 10;
1184 do {
1185 fracPart += DecimalDigitValue(c) / divisor;
1186 divisor *= 10;
1187 c = Read();
1188 // The IsDigit check will do the right thing even if Read() returns < 0
1189 } while (IsDigit(c));
1192 PRBool gotE = PR_FALSE;
1193 if (IsSVGMode() && (c == 'e' || c == 'E')) {
1194 PRInt32 nextChar = Peek();
1195 PRInt32 expSignChar = 0;
1196 if (nextChar == '-' || nextChar == '+') {
1197 expSignChar = Read();
1198 nextChar = Peek();
1200 if (IsDigit(nextChar)) {
1201 gotE = PR_TRUE;
1202 if (expSignChar == '-') {
1203 expSign = -1;
1206 c = Read();
1207 NS_ASSERTION(IsDigit(c), "Peek() must have lied");
1208 do {
1209 exponent = 10*exponent + DecimalDigitValue(c);
1210 c = Read();
1211 // The IsDigit check will do the right thing even if Read() returns < 0
1212 } while (IsDigit(c));
1213 } else {
1214 if (expSignChar) {
1215 Pushback(expSignChar);
1220 nsCSSTokenType type = eCSSToken_Number;
1222 // Set mIntegerValid for all cases (except %, below) because we need
1223 // it for the "2n" in :nth-child(2n).
1224 aToken.mIntegerValid = PR_FALSE;
1226 // Time to reassemble our number.
1227 float value = float(sign * (intPart + fracPart));
1228 if (gotE) {
1229 // pow(), not powf(), because at least wince doesn't have the latter.
1230 // And explicitly cast everything to doubles to avoid issues with
1231 // overloaded pow() on Windows.
1232 value *= pow(10.0, double(expSign * exponent));
1233 } else if (!gotDot) {
1234 // Clamp values outside of integer range.
1235 if (sign > 0) {
1236 aToken.mInteger = PRInt32(NS_MIN(intPart, double(PR_INT32_MAX)));
1237 } else {
1238 aToken.mInteger = PRInt32(NS_MAX(-intPart, double(PR_INT32_MIN)));
1240 aToken.mIntegerValid = PR_TRUE;
1243 nsString& ident = aToken.mIdent;
1244 ident.Truncate();
1246 // Look at character that terminated the number
1247 if (c >= 0) {
1248 if (StartsIdent(c, Peek())) {
1249 if (!GatherIdent(c, ident)) {
1250 return PR_FALSE;
1252 type = eCSSToken_Dimension;
1253 } else if ('%' == c) {
1254 type = eCSSToken_Percentage;
1255 value = value / 100.0f;
1256 aToken.mIntegerValid = PR_FALSE;
1257 } else {
1258 // Put back character that stopped numeric scan
1259 Pushback(c);
1262 aToken.mNumber = value;
1263 aToken.mType = type;
1264 return PR_TRUE;
1267 PRBool
1268 nsCSSScanner::SkipCComment()
1270 for (;;) {
1271 PRInt32 ch = Read();
1272 if (ch < 0) break;
1273 if (ch == '*') {
1274 if (LookAhead('/')) {
1275 return PR_TRUE;
1280 REPORT_UNEXPECTED_EOF(PECommentEOF);
1281 return PR_FALSE;
1284 PRBool
1285 nsCSSScanner::ParseString(PRInt32 aStop, nsCSSToken& aToken)
1287 aToken.mIdent.SetLength(0);
1288 aToken.mType = eCSSToken_String;
1289 aToken.mSymbol = PRUnichar(aStop); // remember how it's quoted
1290 for (;;) {
1291 // If nothing in pushback, first try to get as much as possible in one go
1292 if (!mPushbackCount && EnsureData()) {
1293 // See how much we can consume and append in one go
1294 PRUint32 n = mOffset;
1295 // Count number of characters that can be processed
1296 for (;n < mCount; ++n) {
1297 PRUnichar nextChar = mReadPointer[n];
1298 if ((nextChar == aStop) || (nextChar == CSS_ESCAPE) ||
1299 (nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
1300 break;
1302 #ifdef CSS_REPORT_PARSE_ERRORS
1303 if (nextChar == '\t') {
1304 mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
1305 * TAB_STOP_WIDTH;
1306 } else {
1307 ++mColNumber;
1309 #endif
1311 // Add to the token what we have so far
1312 if (n > mOffset) {
1313 aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
1314 mOffset = n;
1317 PRInt32 ch = Read();
1318 if (ch < 0 || ch == aStop) {
1319 break;
1321 if (ch == '\n') {
1322 aToken.mType = eCSSToken_Error;
1323 #ifdef CSS_REPORT_PARSE_ERRORS
1324 ReportUnexpectedToken(aToken, "SEUnterminatedString");
1325 #endif
1326 break;
1328 if (ch == CSS_ESCAPE) {
1329 ParseAndAppendEscape(aToken.mIdent);
1330 } else {
1331 aToken.mIdent.Append(ch);
1334 return PR_TRUE;
1337 // UNICODE-RANGE tokens match the regular expression
1339 // u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
1341 // However, some such tokens are "invalid". There are three valid forms:
1343 // u+[0-9a-f]{x} 1 <= x <= 6
1344 // u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
1345 // u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
1347 // All unicode-range tokens have their text recorded in mIdent; valid ones
1348 // are also decoded into mInteger and mInteger2, and mIntegerValid is set.
1350 PRBool
1351 nsCSSScanner::ParseURange(PRInt32 aChar, nsCSSToken& aResult)
1353 PRInt32 intro2 = Read();
1354 PRInt32 ch = Peek();
1356 // We should only ever be called if these things are true.
1357 NS_ASSERTION(aChar == 'u' || aChar == 'U',
1358 "unicode-range called with improper introducer (U)");
1359 NS_ASSERTION(intro2 == '+',
1360 "unicode-range called with improper introducer (+)");
1362 // If the character immediately after the '+' is not a hex digit or
1363 // '?', this is not really a unicode-range token; push everything
1364 // back and scan the U as an ident.
1365 if (!IsHexDigit(ch) && ch != '?') {
1366 Pushback(intro2);
1367 Pushback(aChar);
1368 return ParseIdent(aChar, aResult);
1371 aResult.mIdent.Truncate();
1372 aResult.mIdent.Append(aChar);
1373 aResult.mIdent.Append(intro2);
1375 PRBool valid = PR_TRUE;
1376 PRBool haveQues = PR_FALSE;
1377 PRUint32 low = 0;
1378 PRUint32 high = 0;
1379 int i = 0;
1381 for (;;) {
1382 ch = Read();
1383 i++;
1384 if (i == 7 || !(IsHexDigit(ch) || ch == '?')) {
1385 break;
1388 aResult.mIdent.Append(ch);
1389 if (IsHexDigit(ch)) {
1390 if (haveQues) {
1391 valid = PR_FALSE; // all question marks should be at the end
1393 low = low*16 + HexDigitValue(ch);
1394 high = high*16 + HexDigitValue(ch);
1395 } else {
1396 haveQues = PR_TRUE;
1397 low = low*16 + 0x0;
1398 high = high*16 + 0xF;
1402 if (ch == '-' && IsHexDigit(Peek())) {
1403 if (haveQues) {
1404 valid = PR_FALSE;
1407 aResult.mIdent.Append(ch);
1408 high = 0;
1409 i = 0;
1410 for (;;) {
1411 ch = Read();
1412 i++;
1413 if (i == 7 || !IsHexDigit(ch)) {
1414 break;
1416 aResult.mIdent.Append(ch);
1417 high = high*16 + HexDigitValue(ch);
1420 Pushback(ch);
1422 aResult.mInteger = low;
1423 aResult.mInteger2 = high;
1424 aResult.mIntegerValid = valid;
1425 aResult.mType = eCSSToken_URange;
1426 return PR_TRUE;