Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / parser / html / nsHtml5Tokenizer.h
blobb35a39f5a6ee0d59fc6686663d4b983480cb836e
1 /*
2 * Copyright (c) 2005-2007 Henri Sivonen
3 * Copyright (c) 2007-2010 Mozilla Foundation
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 * Please edit Tokenizer.java instead and regenerate.
31 #ifndef nsHtml5Tokenizer_h__
32 #define nsHtml5Tokenizer_h__
34 #include "prtypes.h"
35 #include "nsIAtom.h"
36 #include "nsHtml5AtomTable.h"
37 #include "nsString.h"
38 #include "nsINameSpaceManager.h"
39 #include "nsIContent.h"
40 #include "nsIDocument.h"
41 #include "nsTraceRefcnt.h"
42 #include "jArray.h"
43 #include "nsHtml5DocumentMode.h"
44 #include "nsHtml5ArrayCopy.h"
45 #include "nsHtml5NamedCharacters.h"
46 #include "nsHtml5NamedCharactersAccel.h"
47 #include "nsHtml5Atoms.h"
48 #include "nsHtml5ByteReadable.h"
49 #include "nsIUnicodeDecoder.h"
50 #include "nsAHtml5TreeBuilderState.h"
51 #include "nsHtml5Macros.h"
53 class nsHtml5StreamParser;
55 class nsHtml5TreeBuilder;
56 class nsHtml5MetaScanner;
57 class nsHtml5AttributeName;
58 class nsHtml5ElementName;
59 class nsHtml5HtmlAttributes;
60 class nsHtml5UTF16Buffer;
61 class nsHtml5StateSnapshot;
62 class nsHtml5Portability;
65 class nsHtml5Tokenizer
67 private:
68 static PRUnichar LT_GT[];
69 static PRUnichar LT_SOLIDUS[];
70 static PRUnichar RSQB_RSQB[];
71 static PRUnichar REPLACEMENT_CHARACTER[];
72 static PRUnichar LF[];
73 static PRUnichar CDATA_LSQB[];
74 static PRUnichar OCTYPE[];
75 static PRUnichar UBLIC[];
76 static PRUnichar YSTEM[];
77 static staticJArray<PRUnichar,PRInt32> TITLE_ARR;
78 static staticJArray<PRUnichar,PRInt32> SCRIPT_ARR;
79 static staticJArray<PRUnichar,PRInt32> STYLE_ARR;
80 static staticJArray<PRUnichar,PRInt32> PLAINTEXT_ARR;
81 static staticJArray<PRUnichar,PRInt32> XMP_ARR;
82 static staticJArray<PRUnichar,PRInt32> TEXTAREA_ARR;
83 static staticJArray<PRUnichar,PRInt32> IFRAME_ARR;
84 static staticJArray<PRUnichar,PRInt32> NOEMBED_ARR;
85 static staticJArray<PRUnichar,PRInt32> NOSCRIPT_ARR;
86 static staticJArray<PRUnichar,PRInt32> NOFRAMES_ARR;
87 protected:
88 nsHtml5TreeBuilder* tokenHandler;
89 nsHtml5StreamParser* encodingDeclarationHandler;
90 PRBool lastCR;
91 PRInt32 stateSave;
92 private:
93 PRInt32 returnStateSave;
94 protected:
95 PRInt32 index;
96 private:
97 PRBool forceQuirks;
98 PRUnichar additional;
99 PRInt32 entCol;
100 PRInt32 firstCharKey;
101 PRInt32 lo;
102 PRInt32 hi;
103 PRInt32 candidate;
104 PRInt32 strBufMark;
105 PRInt32 prevValue;
106 protected:
107 PRInt32 value;
108 private:
109 PRBool seenDigits;
110 protected:
111 PRInt32 cstart;
112 private:
113 nsString* publicId;
114 nsString* systemId;
115 autoJArray<PRUnichar,PRInt32> strBuf;
116 PRInt32 strBufLen;
117 autoJArray<PRUnichar,PRInt32> longStrBuf;
118 PRInt32 longStrBufLen;
119 autoJArray<PRUnichar,PRInt32> bmpChar;
120 autoJArray<PRUnichar,PRInt32> astralChar;
121 protected:
122 nsHtml5ElementName* endTagExpectation;
123 private:
124 jArray<PRUnichar,PRInt32> endTagExpectationAsArray;
125 protected:
126 PRBool endTag;
127 private:
128 nsHtml5ElementName* tagName;
129 protected:
130 nsHtml5AttributeName* attributeName;
131 private:
132 nsIAtom* doctypeName;
133 nsString* publicIdentifier;
134 nsString* systemIdentifier;
135 nsHtml5HtmlAttributes* attributes;
136 PRInt32 mappingLangToXmlLang;
137 PRBool shouldSuspend;
138 protected:
139 PRBool confident;
140 private:
141 PRInt32 line;
142 nsHtml5AtomTable* interner;
143 public:
144 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler);
145 void setInterner(nsHtml5AtomTable* interner);
146 void initLocation(nsString* newPublicId, nsString* newSystemId);
147 void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation);
148 void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation);
149 private:
150 void endTagExpectationToArray();
151 public:
152 void setLineNumber(PRInt32 line);
153 inline PRInt32 getLineNumber()
155 return line;
158 nsHtml5HtmlAttributes* emptyAttributes();
159 private:
160 inline void clearStrBufAndAppend(PRUnichar c)
162 strBuf[0] = c;
163 strBufLen = 1;
166 inline void clearStrBuf()
168 strBufLen = 0;
171 void appendStrBuf(PRUnichar c);
172 protected:
173 nsString* strBufToString();
174 private:
175 void strBufToDoctypeName();
176 void emitStrBuf();
177 inline void clearLongStrBuf()
179 longStrBufLen = 0;
182 inline void clearLongStrBufAndAppend(PRUnichar c)
184 longStrBuf[0] = c;
185 longStrBufLen = 1;
188 void appendLongStrBuf(PRUnichar c);
189 inline void appendSecondHyphenToBogusComment()
191 appendLongStrBuf('-');
194 inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
197 appendLongStrBuf(c);
200 void appendLongStrBuf(PRUnichar* buffer, PRInt32 offset, PRInt32 length);
201 inline void appendStrBufToLongStrBuf()
203 appendLongStrBuf(strBuf, 0, strBufLen);
206 nsString* longStrBufToString();
207 void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
208 protected:
209 void flushChars(PRUnichar* buf, PRInt32 pos);
210 private:
211 void resetAttributes();
212 void strBufToElementNameString();
213 PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos);
214 void attributeNameComplete();
215 void addAttributeWithoutValue();
216 void addAttributeWithValue();
217 public:
218 void start();
219 PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
220 private:
221 PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos);
222 void initDoctypeFields();
223 inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
225 silentCarriageReturn();
226 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
229 inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
231 silentLineFeed();
232 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
235 inline void appendLongStrBufLineFeed()
237 silentLineFeed();
238 appendLongStrBuf('\n');
241 inline void appendLongStrBufCarriageReturn()
243 silentCarriageReturn();
244 appendLongStrBuf('\n');
247 protected:
248 inline void silentCarriageReturn()
250 ++line;
251 lastCR = PR_TRUE;
254 inline void silentLineFeed()
256 ++line;
259 private:
260 void emitCarriageReturn(PRUnichar* buf, PRInt32 pos);
261 void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos);
262 void emitPlaintextReplacementCharacter(PRUnichar* buf, PRInt32 pos);
263 void setAdditionalAndRememberAmpersandLocation(PRUnichar add);
264 void bogusDoctype();
265 void bogusDoctypeWithoutQuirks();
266 void emitOrAppendStrBuf(PRInt32 returnState);
267 void handleNcrValue(PRInt32 returnState);
268 public:
269 void eof();
270 private:
271 void emitDoctypeToken(PRInt32 pos);
272 protected:
273 inline PRUnichar checkChar(PRUnichar* buf, PRInt32 pos)
275 return buf[pos];
278 public:
279 PRBool internalEncodingDeclaration(nsString* internalCharset);
280 private:
281 void emitOrAppendTwo(const PRUnichar* val, PRInt32 returnState);
282 void emitOrAppendOne(const PRUnichar* val, PRInt32 returnState);
283 public:
284 void end();
285 void requestSuspension();
286 PRBool isInDataState();
287 void resetToDataState();
288 void loadState(nsHtml5Tokenizer* other);
289 void initializeWithoutStarting();
290 void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
291 ~nsHtml5Tokenizer();
292 static void initializeStatics();
293 static void releaseStatics();
296 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
297 #define NS_HTML5TOKENIZER_DATA 0
298 #define NS_HTML5TOKENIZER_RCDATA 1
299 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
300 #define NS_HTML5TOKENIZER_RAWTEXT 3
301 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
304 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
305 #define NS_HTML5TOKENIZER_PLAINTEXT 8
306 #define NS_HTML5TOKENIZER_TAG_OPEN 9
307 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
308 #define NS_HTML5TOKENIZER_TAG_NAME 11
309 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
310 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
311 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
312 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
313 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
314 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
315 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
316 #define NS_HTML5TOKENIZER_DOCTYPE 19
317 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
318 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
319 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
320 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
322 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
323 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
324 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
326 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
327 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
328 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
329 #define NS_HTML5TOKENIZER_COMMENT_START 32
330 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
331 #define NS_HTML5TOKENIZER_COMMENT 34
332 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
333 #define NS_HTML5TOKENIZER_COMMENT_END 36
334 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
335 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
337 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
338 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
339 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
340 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
341 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
342 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
343 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
344 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
345 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
346 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
347 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
349 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
350 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
351 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
352 #define NS_HTML5TOKENIZER_CDATA_START 55
353 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
354 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
355 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
360 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
361 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
362 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
370 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
371 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
374 #endif