Bumping manifests a=b2g-bump
[gecko.git] / layout / style / nsCSSScanner.h
blobdc39f7d061953116d21d2a6cce000f18cef02258
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /* tokenization of CSS style sheets */
8 #ifndef nsCSSScanner_h___
9 #define nsCSSScanner_h___
11 #include "nsString.h"
13 namespace mozilla {
14 namespace css {
15 class ErrorReporter;
19 // Token types; in close but not perfect correspondence to the token
20 // categorization in section 4.1.1 of CSS2.1. (The deviations are all
21 // the fault of css3-selectors, which has requirements that can only be
22 // met by changing the generic tokenization.) The comment on each line
23 // illustrates the form of each identifier.
25 enum nsCSSTokenType {
26 // White space of any kind. No value fields are used. Note that
27 // comments do *not* count as white space; comments separate tokens
28 // but are not themselves tokens.
29 eCSSToken_Whitespace, //
31 // Identifier-like tokens. mIdent is the text of the identifier.
32 // The difference between ID and Hash is: if the text after the #
33 // would have been a valid Ident if the # hadn't been there, the
34 // scanner produces an ID token. Otherwise it produces a Hash token.
35 // (This distinction is required by css3-selectors.)
36 eCSSToken_Ident, // word
37 eCSSToken_Function, // word(
38 eCSSToken_AtKeyword, // @word
39 eCSSToken_ID, // #word
40 eCSSToken_Hash, // #0word
42 // Numeric tokens. mNumber is the floating-point value of the
43 // number, and mHasSign indicates whether there was an explicit sign
44 // (+ or -) in front of the number. If mIntegerValid is true, the
45 // number had the lexical form of an integer, and mInteger is its
46 // integer value. Lexically integer values outside the range of a
47 // 32-bit signed number are clamped to the maximum values; mNumber
48 // will indicate a 'truer' value in that case. Percentage tokens
49 // are always considered not to be integers, even if their numeric
50 // value is integral (100% => mNumber = 1.0). For Dimension
51 // tokens, mIdent holds the text of the unit.
52 eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
53 eCSSToken_Dimension, // 24px 8.5in
54 eCSSToken_Percentage, // 85% 1280.4%
56 // String-like tokens. In all cases, mIdent holds the text
57 // belonging to the string, and mSymbol holds the delimiter
58 // character, which may be ', ", or zero (only for unquoted URLs).
59 // Bad_String and Bad_URL tokens are emitted when the closing
60 // delimiter or parenthesis was missing.
61 eCSSToken_String, // 'foo bar' "foo bar"
62 eCSSToken_Bad_String, // 'foo bar
63 eCSSToken_URL, // url(foobar) url("foo bar")
64 eCSSToken_Bad_URL, // url(foo
66 // Any one-character symbol. mSymbol holds the character.
67 eCSSToken_Symbol, // . ; { } ! *
69 // Match operators. These are single tokens rather than pairs of
70 // Symbol tokens because css3-selectors forbids the presence of
71 // comments between the two characters. No value fields are used;
72 // the token type indicates which operator.
73 eCSSToken_Includes, // ~=
74 eCSSToken_Dashmatch, // |=
75 eCSSToken_Beginsmatch, // ^=
76 eCSSToken_Endsmatch, // $=
77 eCSSToken_Containsmatch, // *=
79 // Unicode-range token: currently used only in @font-face.
80 // The lexical rule for this token includes several forms that are
81 // semantically invalid. Therefore, mIdent always holds the
82 // complete original text of the token (so we can print it
83 // accurately in diagnostics), and mIntegerValid is true iff the
84 // token is semantically valid. In that case, mInteger holds the
85 // lowest value included in the range, and mInteger2 holds the
86 // highest value included in the range.
87 eCSSToken_URange, // U+007e U+01?? U+2000-206F
89 // HTML comment delimiters, ignored as a unit when they appear at
90 // the top level of a style sheet, for compatibility with websites
91 // written for compatibility with pre-CSS browsers. This token type
92 // subsumes the css2.1 CDO and CDC tokens, which are always treated
93 // the same by the parser. mIdent holds the text of the token, for
94 // diagnostics.
95 eCSSToken_HTMLComment, // <!-- -->
98 // Classification of tokens used to determine if a "/**/" string must be
99 // inserted if pasting token streams together when serializing. We include
100 // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
101 // as css-syntax does not treat these as whole tokens, but we will still
102 // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
103 // and between a '/' delim and a '*=' containsmatch.
105 // https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization
106 enum nsCSSTokenSerializationType {
107 eCSSTokenSerialization_Nothing,
108 eCSSTokenSerialization_Whitespace,
109 eCSSTokenSerialization_AtKeyword_or_Hash,
110 eCSSTokenSerialization_Number,
111 eCSSTokenSerialization_Dimension,
112 eCSSTokenSerialization_Percentage,
113 eCSSTokenSerialization_URange,
114 eCSSTokenSerialization_URL_or_BadURL,
115 eCSSTokenSerialization_Function,
116 eCSSTokenSerialization_Ident,
117 eCSSTokenSerialization_CDC,
118 eCSSTokenSerialization_DashMatch,
119 eCSSTokenSerialization_ContainsMatch,
120 eCSSTokenSerialization_Symbol_Hash, // '#'
121 eCSSTokenSerialization_Symbol_At, // '@'
122 eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
123 eCSSTokenSerialization_Symbol_Minus, // '-'
124 eCSSTokenSerialization_Symbol_OpenParen, // '('
125 eCSSTokenSerialization_Symbol_Question, // '?'
126 eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
127 eCSSTokenSerialization_Symbol_Equals, // '='
128 eCSSTokenSerialization_Symbol_Bar, // '|'
129 eCSSTokenSerialization_Symbol_Slash, // '/'
130 eCSSTokenSerialization_Symbol_Asterisk, // '*'
131 eCSSTokenSerialization_Other // anything else
134 // A single token returned from the scanner. mType is always
135 // meaningful; comments above describe which other fields are
136 // meaningful for which token types.
137 struct nsCSSToken {
138 nsAutoString mIdent;
139 float mNumber;
140 int32_t mInteger;
141 int32_t mInteger2;
142 nsCSSTokenType mType;
143 char16_t mSymbol;
144 bool mIntegerValid;
145 bool mHasSign;
147 nsCSSToken()
148 : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
149 mSymbol('\0'), mIntegerValid(false), mHasSign(false)
152 bool IsSymbol(char16_t aSymbol) const {
153 return mType == eCSSToken_Symbol && mSymbol == aSymbol;
156 void AppendToString(nsString& aBuffer) const;
159 // Represents an nsCSSScanner's saved position in the input buffer.
160 class nsCSSScannerPosition {
161 friend class nsCSSScanner;
162 public:
163 nsCSSScannerPosition() : mInitialized(false) { }
165 uint32_t LineNumber() {
166 MOZ_ASSERT(mInitialized);
167 return mLineNumber;
170 uint32_t LineOffset() {
171 MOZ_ASSERT(mInitialized);
172 return mLineOffset;
175 private:
176 uint32_t mOffset;
177 uint32_t mLineNumber;
178 uint32_t mLineOffset;
179 uint32_t mTokenLineNumber;
180 uint32_t mTokenLineOffset;
181 uint32_t mTokenOffset;
182 bool mInitialized;
185 // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
186 // compatible tokenization rules. Used internally by nsCSSParser;
187 // not available for use by other code.
188 class nsCSSScanner {
189 public:
190 // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
191 // when the line number is unknown.
192 nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
193 ~nsCSSScanner();
195 void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
196 mReporter = aReporter;
198 // Set whether or not we are processing SVG
199 void SetSVGMode(bool aSVGMode) {
200 mSVGMode = aSVGMode;
202 bool IsSVGMode() const {
203 return mSVGMode;
206 // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
207 void ClearSeenBadToken() { mSeenBadToken = false; }
208 bool SeenBadToken() const { return mSeenBadToken; }
210 // Reset or check whether a "var(" FUNCTION token has been seen.
211 void ClearSeenVariableReference() { mSeenVariableReference = false; }
212 bool SeenVariableReference() const { return mSeenVariableReference; }
214 // Get the 1-based line number of the last character of
215 // the most recently processed token.
216 uint32_t GetLineNumber() const { return mTokenLineNumber; }
218 // Get the 0-based column number of the first character of
219 // the most recently processed token.
220 uint32_t GetColumnNumber() const
221 { return mTokenOffset - mTokenLineOffset; }
223 // Get the text of the line containing the first character of
224 // the most recently processed token.
225 nsDependentSubstring GetCurrentLine() const;
227 // Get the next token. Return false on EOF. aTokenResult is filled
228 // in with the data for the token. If aSkipWS is true, skip over
229 // eCSSToken_Whitespace tokens rather than returning them.
230 bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
232 // Get the body of an URL token (everything after the 'url(').
233 // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
234 // which, for historical reasons, must make additional function
235 // tokens behave like url(). Please do not add new uses to the
236 // parser.
237 void NextURL(nsCSSToken& aTokenResult);
239 // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
240 // because "2n-1" is a single DIMENSION token, and "n-1" is a single
241 // IDENT token, but the :nth() selector syntax wants to interpret
242 // them the same as "2n -1" and "n -1" respectively. Please do not
243 // add new uses to the parser.
245 // Note: this function may not be used to back up over a line boundary.
246 void Backup(uint32_t n);
248 // Starts recording the input stream from the current position.
249 void StartRecording();
251 // Abandons recording of the input stream.
252 void StopRecording();
254 // Stops recording of the input stream and appends the recorded
255 // input to aBuffer.
256 void StopRecording(nsString& aBuffer);
258 // Returns the length of the current recording.
259 uint32_t RecordingLength() const;
261 #ifdef DEBUG
262 bool IsRecording() const;
263 #endif
265 // Stores the current scanner offset into the specified object.
266 void SavePosition(nsCSSScannerPosition& aState);
268 // Resets the scanner offset to a position saved by SavePosition.
269 void RestoreSavedPosition(const nsCSSScannerPosition& aState);
271 enum EOFCharacters {
272 eEOFCharacters_None = 0x0000,
274 // to handle \<EOF> inside strings
275 eEOFCharacters_DropBackslash = 0x0001,
277 // to handle \<EOF> outside strings
278 eEOFCharacters_ReplacementChar = 0x0002,
280 // to close comments
281 eEOFCharacters_Asterisk = 0x0004,
282 eEOFCharacters_Slash = 0x0008,
284 // to close double-quoted strings
285 eEOFCharacters_DoubleQuote = 0x0010,
287 // to close single-quoted strings
288 eEOFCharacters_SingleQuote = 0x0020,
290 // to close URLs
291 eEOFCharacters_CloseParen = 0x0040,
294 // Appends any characters to the specified string the input stream to make the
295 // last token not rely on special EOF handling behavior.
297 // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
298 static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
299 nsAString& aString);
301 EOFCharacters GetEOFCharacters() const {
302 #ifdef DEBUG
303 AssertEOFCharactersValid(mEOFCharacters);
304 #endif
305 return mEOFCharacters;
308 #ifdef DEBUG
309 static void AssertEOFCharactersValid(uint32_t c);
310 #endif
312 protected:
313 int32_t Peek(uint32_t n = 0);
314 void Advance(uint32_t n = 1);
315 void AdvanceLine();
317 void SkipWhitespace();
318 void SkipComment();
320 bool GatherEscape(nsString& aOutput, bool aInString);
321 bool GatherText(uint8_t aClass, nsString& aIdent);
323 bool ScanIdent(nsCSSToken& aResult);
324 bool ScanAtKeyword(nsCSSToken& aResult);
325 bool ScanHash(nsCSSToken& aResult);
326 bool ScanNumber(nsCSSToken& aResult);
327 bool ScanString(nsCSSToken& aResult);
328 bool ScanURange(nsCSSToken& aResult);
330 void SetEOFCharacters(uint32_t aEOFCharacters);
331 void AddEOFCharacters(uint32_t aEOFCharacters);
333 const char16_t *mBuffer;
334 uint32_t mOffset;
335 uint32_t mCount;
337 uint32_t mLineNumber;
338 uint32_t mLineOffset;
340 uint32_t mTokenLineNumber;
341 uint32_t mTokenLineOffset;
342 uint32_t mTokenOffset;
344 uint32_t mRecordStartOffset;
345 EOFCharacters mEOFCharacters;
347 mozilla::css::ErrorReporter *mReporter;
349 // True if we are in SVG mode; false in "normal" CSS
350 bool mSVGMode;
351 bool mRecording;
352 bool mSeenBadToken;
353 bool mSeenVariableReference;
356 // Token for the grid-template-areas micro-syntax
357 // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
358 struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
359 nsAutoString mName; // Empty for a null cell, non-empty for a named cell
360 bool isTrash; // True for a trash token, mName is ignored in this case.
363 // Scanner for the grid-template-areas micro-syntax
364 class nsCSSGridTemplateAreaScanner {
365 public:
366 explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
368 // Get the next token. Return false on EOF.
369 // aTokenResult is filled in with the data for the token.
370 bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
372 private:
373 const char16_t *mBuffer;
374 uint32_t mOffset;
375 uint32_t mCount;
378 #endif /* nsCSSScanner_h___ */