2 * Copyright (c) 2005-2007 Henri Sivonen
3 * Copyright (c) 2007-2010 Mozilla Foundation
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 * Please edit Tokenizer.java instead and regenerate.
31 #ifndef nsHtml5Tokenizer_h__
32 #define nsHtml5Tokenizer_h__
36 #include "nsHtml5AtomTable.h"
38 #include "nsINameSpaceManager.h"
39 #include "nsIContent.h"
40 #include "nsIDocument.h"
41 #include "nsTraceRefcnt.h"
43 #include "nsHtml5DocumentMode.h"
44 #include "nsHtml5ArrayCopy.h"
45 #include "nsHtml5NamedCharacters.h"
46 #include "nsHtml5NamedCharactersAccel.h"
47 #include "nsHtml5Atoms.h"
48 #include "nsHtml5ByteReadable.h"
49 #include "nsIUnicodeDecoder.h"
50 #include "nsAHtml5TreeBuilderState.h"
51 #include "nsHtml5Macros.h"
53 class nsHtml5StreamParser
;
55 class nsHtml5TreeBuilder
;
56 class nsHtml5MetaScanner
;
57 class nsHtml5AttributeName
;
58 class nsHtml5ElementName
;
59 class nsHtml5HtmlAttributes
;
60 class nsHtml5UTF16Buffer
;
61 class nsHtml5StateSnapshot
;
62 class nsHtml5Portability
;
65 class nsHtml5Tokenizer
68 static PRUnichar LT_GT
[];
69 static PRUnichar LT_SOLIDUS
[];
70 static PRUnichar RSQB_RSQB
[];
71 static PRUnichar REPLACEMENT_CHARACTER
[];
72 static PRUnichar LF
[];
73 static PRUnichar CDATA_LSQB
[];
74 static PRUnichar OCTYPE
[];
75 static PRUnichar UBLIC
[];
76 static PRUnichar YSTEM
[];
77 static staticJArray
<PRUnichar
,PRInt32
> TITLE_ARR
;
78 static staticJArray
<PRUnichar
,PRInt32
> SCRIPT_ARR
;
79 static staticJArray
<PRUnichar
,PRInt32
> STYLE_ARR
;
80 static staticJArray
<PRUnichar
,PRInt32
> PLAINTEXT_ARR
;
81 static staticJArray
<PRUnichar
,PRInt32
> XMP_ARR
;
82 static staticJArray
<PRUnichar
,PRInt32
> TEXTAREA_ARR
;
83 static staticJArray
<PRUnichar
,PRInt32
> IFRAME_ARR
;
84 static staticJArray
<PRUnichar
,PRInt32
> NOEMBED_ARR
;
85 static staticJArray
<PRUnichar
,PRInt32
> NOSCRIPT_ARR
;
86 static staticJArray
<PRUnichar
,PRInt32
> NOFRAMES_ARR
;
88 nsHtml5TreeBuilder
* tokenHandler
;
89 nsHtml5StreamParser
* encodingDeclarationHandler
;
93 PRInt32 returnStateSave
;
100 PRInt32 firstCharKey
;
115 autoJArray
<PRUnichar
,PRInt32
> strBuf
;
117 autoJArray
<PRUnichar
,PRInt32
> longStrBuf
;
118 PRInt32 longStrBufLen
;
119 autoJArray
<PRUnichar
,PRInt32
> bmpChar
;
120 autoJArray
<PRUnichar
,PRInt32
> astralChar
;
122 nsHtml5ElementName
* endTagExpectation
;
124 jArray
<PRUnichar
,PRInt32
> endTagExpectationAsArray
;
128 nsHtml5ElementName
* tagName
;
130 nsHtml5AttributeName
* attributeName
;
132 nsIAtom
* doctypeName
;
133 nsString
* publicIdentifier
;
134 nsString
* systemIdentifier
;
135 nsHtml5HtmlAttributes
* attributes
;
136 PRInt32 mappingLangToXmlLang
;
137 PRBool shouldSuspend
;
142 nsHtml5AtomTable
* interner
;
144 nsHtml5Tokenizer(nsHtml5TreeBuilder
* tokenHandler
);
145 void setInterner(nsHtml5AtomTable
* interner
);
146 void initLocation(nsString
* newPublicId
, nsString
* newSystemId
);
147 void setStateAndEndTagExpectation(PRInt32 specialTokenizerState
, nsIAtom
* endTagExpectation
);
148 void setStateAndEndTagExpectation(PRInt32 specialTokenizerState
, nsHtml5ElementName
* endTagExpectation
);
150 void endTagExpectationToArray();
152 void setLineNumber(PRInt32 line
);
153 inline PRInt32
getLineNumber()
158 nsHtml5HtmlAttributes
* emptyAttributes();
160 inline void clearStrBufAndAppend(PRUnichar c
)
166 inline void clearStrBuf()
171 void appendStrBuf(PRUnichar c
);
173 nsString
* strBufToString();
175 void strBufToDoctypeName();
177 inline void clearLongStrBuf()
182 inline void clearLongStrBufAndAppend(PRUnichar c
)
188 void appendLongStrBuf(PRUnichar c
);
189 inline void appendSecondHyphenToBogusComment()
191 appendLongStrBuf('-');
194 inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c
)
200 void appendLongStrBuf(PRUnichar
* buffer
, PRInt32 offset
, PRInt32 length
);
201 inline void appendStrBufToLongStrBuf()
203 appendLongStrBuf(strBuf
, 0, strBufLen
);
206 nsString
* longStrBufToString();
207 void emitComment(PRInt32 provisionalHyphens
, PRInt32 pos
);
209 void flushChars(PRUnichar
* buf
, PRInt32 pos
);
211 void resetAttributes();
212 void strBufToElementNameString();
213 PRInt32
emitCurrentTagToken(PRBool selfClosing
, PRInt32 pos
);
214 void attributeNameComplete();
215 void addAttributeWithoutValue();
216 void addAttributeWithValue();
219 PRBool
tokenizeBuffer(nsHtml5UTF16Buffer
* buffer
);
221 PRInt32
stateLoop(PRInt32 state
, PRUnichar c
, PRInt32 pos
, PRUnichar
* buf
, PRBool reconsume
, PRInt32 returnState
, PRInt32 endPos
);
222 void initDoctypeFields();
223 inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
225 silentCarriageReturn();
226 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
229 inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
232 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
235 inline void appendLongStrBufLineFeed()
238 appendLongStrBuf('\n');
241 inline void appendLongStrBufCarriageReturn()
243 silentCarriageReturn();
244 appendLongStrBuf('\n');
248 inline void silentCarriageReturn()
254 inline void silentLineFeed()
260 void emitCarriageReturn(PRUnichar
* buf
, PRInt32 pos
);
261 void emitReplacementCharacter(PRUnichar
* buf
, PRInt32 pos
);
262 void setAdditionalAndRememberAmpersandLocation(PRUnichar add
);
264 void bogusDoctypeWithoutQuirks();
265 void emitOrAppendStrBuf(PRInt32 returnState
);
266 void handleNcrValue(PRInt32 returnState
);
270 void emitDoctypeToken(PRInt32 pos
);
272 inline PRUnichar
checkChar(PRUnichar
* buf
, PRInt32 pos
)
278 void internalEncodingDeclaration(nsString
* internalCharset
);
280 void emitOrAppendTwo(const PRUnichar
* val
, PRInt32 returnState
);
281 void emitOrAppendOne(const PRUnichar
* val
, PRInt32 returnState
);
284 void requestSuspension();
285 PRBool
isInDataState();
286 void resetToDataState();
287 void loadState(nsHtml5Tokenizer
* other
);
288 void initializeWithoutStarting();
289 void setEncodingDeclarationHandler(nsHtml5StreamParser
* encodingDeclarationHandler
);
291 static void initializeStatics();
292 static void releaseStatics();
295 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
296 #define NS_HTML5TOKENIZER_DATA 0
297 #define NS_HTML5TOKENIZER_RCDATA 1
298 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
299 #define NS_HTML5TOKENIZER_RAWTEXT 3
300 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
301 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
304 #define NS_HTML5TOKENIZER_PLAINTEXT 8
305 #define NS_HTML5TOKENIZER_TAG_OPEN 9
306 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
307 #define NS_HTML5TOKENIZER_TAG_NAME 11
308 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
309 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
310 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
311 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
312 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
313 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
314 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
315 #define NS_HTML5TOKENIZER_DOCTYPE 19
316 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
317 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
318 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
319 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
320 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
322 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
323 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
324 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
326 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
327 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
328 #define NS_HTML5TOKENIZER_COMMENT_START 32
329 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
330 #define NS_HTML5TOKENIZER_COMMENT 34
331 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
332 #define NS_HTML5TOKENIZER_COMMENT_END 36
333 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
334 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
335 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
337 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
338 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
339 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
340 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
341 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
342 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
343 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
344 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
345 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
346 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
347 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
349 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
350 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
351 #define NS_HTML5TOKENIZER_CDATA_START 55
352 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
353 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
354 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
355 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
360 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
361 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
362 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
369 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
370 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024