Bug 1468361 [wpt PR 11478] - Add test run time to wptreport.json format, a=testonly
[gecko.git] / parser / html / nsHtml5Tokenizer.h
blob32b002fab45399d44f015aa055ae297b66bdb9ce
1 /*
2 * Copyright (c) 2005-2007 Henri Sivonen
3 * Copyright (c) 2007-2015 Mozilla Foundation
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 * Please edit Tokenizer.java instead and regenerate.
31 #ifndef nsHtml5Tokenizer_h
32 #define nsHtml5Tokenizer_h
34 #include "nsAtom.h"
35 #include "nsHtml5AtomTable.h"
36 #include "nsHtml5String.h"
37 #include "nsIContent.h"
38 #include "nsTraceRefcnt.h"
39 #include "jArray.h"
40 #include "nsHtml5DocumentMode.h"
41 #include "nsHtml5ArrayCopy.h"
42 #include "nsHtml5NamedCharacters.h"
43 #include "nsHtml5NamedCharactersAccel.h"
44 #include "nsGkAtoms.h"
45 #include "nsAHtml5TreeBuilderState.h"
46 #include "nsHtml5Macros.h"
47 #include "nsHtml5Highlighter.h"
48 #include "nsHtml5TokenizerLoopPolicies.h"
50 class nsHtml5StreamParser;
52 class nsHtml5AttributeName;
53 class nsHtml5ElementName;
54 class nsHtml5TreeBuilder;
55 class nsHtml5MetaScanner;
56 class nsHtml5UTF16Buffer;
57 class nsHtml5StateSnapshot;
58 class nsHtml5Portability;
60 class nsHtml5Tokenizer
62 private:
63 static const int32_t DATA_AND_RCDATA_MASK = ~1;
65 public:
66 static const int32_t DATA = 0;
68 static const int32_t RCDATA = 1;
70 static const int32_t SCRIPT_DATA = 2;
72 static const int32_t RAWTEXT = 3;
74 static const int32_t SCRIPT_DATA_ESCAPED = 4;
76 static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
78 static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
80 static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
82 static const int32_t PLAINTEXT = 8;
84 static const int32_t TAG_OPEN = 9;
86 static const int32_t CLOSE_TAG_OPEN = 10;
88 static const int32_t TAG_NAME = 11;
90 static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
92 static const int32_t ATTRIBUTE_NAME = 13;
94 static const int32_t AFTER_ATTRIBUTE_NAME = 14;
96 static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
98 static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
100 static const int32_t BOGUS_COMMENT = 17;
102 static const int32_t MARKUP_DECLARATION_OPEN = 18;
104 static const int32_t DOCTYPE = 19;
106 static const int32_t BEFORE_DOCTYPE_NAME = 20;
108 static const int32_t DOCTYPE_NAME = 21;
110 static const int32_t AFTER_DOCTYPE_NAME = 22;
112 static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
114 static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
116 static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
118 static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
120 static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
122 static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
124 static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
126 static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
128 static const int32_t BOGUS_DOCTYPE = 31;
130 static const int32_t COMMENT_START = 32;
132 static const int32_t COMMENT_START_DASH = 33;
134 static const int32_t COMMENT = 34;
136 static const int32_t COMMENT_END_DASH = 35;
138 static const int32_t COMMENT_END = 36;
140 static const int32_t COMMENT_END_BANG = 37;
142 static const int32_t NON_DATA_END_TAG_NAME = 38;
144 static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
146 static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
148 static const int32_t DOCTYPE_UBLIC = 41;
150 static const int32_t DOCTYPE_YSTEM = 42;
152 static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
154 static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
156 static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
158 static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
160 static const int32_t CONSUME_NCR = 47;
162 static const int32_t CHARACTER_REFERENCE_TAIL = 48;
164 static const int32_t HEX_NCR_LOOP = 49;
166 static const int32_t DECIMAL_NRC_LOOP = 50;
168 static const int32_t HANDLE_NCR_VALUE = 51;
170 static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
172 static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
174 static const int32_t SELF_CLOSING_START_TAG = 54;
176 static const int32_t CDATA_START = 55;
178 static const int32_t CDATA_SECTION = 56;
180 static const int32_t CDATA_RSQB = 57;
182 static const int32_t CDATA_RSQB_RSQB = 58;
184 static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
186 static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
188 static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
190 static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
192 static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
194 static const int32_t BOGUS_COMMENT_HYPHEN = 64;
196 static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
198 static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
200 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
202 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
204 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
206 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
208 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
210 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
212 static const int32_t PROCESSING_INSTRUCTION = 73;
214 static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
216 private:
217 static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
219 static char16_t LT_GT[];
220 static char16_t LT_SOLIDUS[];
221 static char16_t RSQB_RSQB[];
222 static char16_t REPLACEMENT_CHARACTER[];
223 static char16_t LF[];
224 static char16_t CDATA_LSQB[];
225 static char16_t OCTYPE[];
226 static char16_t UBLIC[];
227 static char16_t YSTEM[];
228 static staticJArray<char16_t, int32_t> TITLE_ARR;
229 static staticJArray<char16_t, int32_t> SCRIPT_ARR;
230 static staticJArray<char16_t, int32_t> STYLE_ARR;
231 static staticJArray<char16_t, int32_t> PLAINTEXT_ARR;
232 static staticJArray<char16_t, int32_t> XMP_ARR;
233 static staticJArray<char16_t, int32_t> TEXTAREA_ARR;
234 static staticJArray<char16_t, int32_t> IFRAME_ARR;
235 static staticJArray<char16_t, int32_t> NOEMBED_ARR;
236 static staticJArray<char16_t, int32_t> NOSCRIPT_ARR;
237 static staticJArray<char16_t, int32_t> NOFRAMES_ARR;
239 protected:
240 nsHtml5TreeBuilder* tokenHandler;
241 nsHtml5StreamParser* encodingDeclarationHandler;
242 bool lastCR;
243 int32_t stateSave;
245 private:
246 int32_t returnStateSave;
248 protected:
249 int32_t index;
251 private:
252 bool forceQuirks;
253 char16_t additional;
254 int32_t entCol;
255 int32_t firstCharKey;
256 int32_t lo;
257 int32_t hi;
258 int32_t candidate;
259 int32_t charRefBufMark;
261 protected:
262 int32_t value;
264 private:
265 bool seenDigits;
267 protected:
268 int32_t cstart;
270 private:
271 nsHtml5String publicId;
272 nsHtml5String systemId;
273 autoJArray<char16_t, int32_t> strBuf;
274 int32_t strBufLen;
275 autoJArray<char16_t, int32_t> charRefBuf;
276 int32_t charRefBufLen;
277 autoJArray<char16_t, int32_t> bmpChar;
278 autoJArray<char16_t, int32_t> astralChar;
280 protected:
281 nsHtml5ElementName* endTagExpectation;
283 private:
284 jArray<char16_t, int32_t> endTagExpectationAsArray;
286 protected:
287 bool endTag;
289 private:
290 bool containsHyphen;
291 nsHtml5ElementName* tagName;
292 nsHtml5ElementName* nonInternedTagName;
294 protected:
295 nsHtml5AttributeName* attributeName;
297 private:
298 nsHtml5AttributeName* nonInternedAttributeName;
299 nsAtom* doctypeName;
300 nsHtml5String publicIdentifier;
301 nsHtml5String systemIdentifier;
302 nsHtml5HtmlAttributes* attributes;
303 bool newAttributesEachTime;
304 bool shouldSuspend;
306 protected:
307 bool confident;
309 private:
310 int32_t line;
311 int32_t attributeLine;
312 nsHtml5AtomTable* interner;
313 bool viewingXmlSource;
315 public:
316 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
317 void setInterner(nsHtml5AtomTable* interner);
318 void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
319 bool isViewingXmlSource();
320 void setStateAndEndTagExpectation(int32_t specialTokenizerState,
321 nsAtom* endTagExpectation);
322 void setStateAndEndTagExpectation(int32_t specialTokenizerState,
323 nsHtml5ElementName* endTagExpectation);
325 private:
326 void endTagExpectationToArray();
328 public:
329 void setLineNumber(int32_t line);
330 inline int32_t getLineNumber() { return line; }
332 nsHtml5HtmlAttributes* emptyAttributes();
334 private:
335 inline void appendCharRefBuf(char16_t c)
337 MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
338 "Attempted to overrun charRefBuf!");
339 charRefBuf[charRefBufLen++] = c;
342 void emitOrAppendCharRefBuf(int32_t returnState);
343 inline void clearStrBufAfterUse() { strBufLen = 0; }
345 inline void clearStrBufBeforeUse()
347 MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
348 strBufLen = 0;
351 inline void clearStrBufAfterOneHyphen()
353 MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
354 MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
355 strBufLen = 0;
358 inline void appendStrBuf(char16_t c)
360 MOZ_ASSERT(strBufLen < strBuf.length,
361 "Previous buffer length insufficient.");
362 if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
363 if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
364 MOZ_CRASH("Unable to recover from buffer reallocation failure");
367 strBuf[strBufLen++] = c;
370 protected:
371 nsHtml5String strBufToString();
373 private:
374 void strBufToDoctypeName();
375 void emitStrBuf();
376 inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }
378 inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
380 errConsecutiveHyphens();
381 appendStrBuf(c);
384 void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
385 inline void appendCharRefBufToStrBuf()
387 appendStrBuf(charRefBuf, 0, charRefBufLen);
388 charRefBufLen = 0;
391 void emitComment(int32_t provisionalHyphens, int32_t pos);
393 protected:
394 void flushChars(char16_t* buf, int32_t pos);
396 private:
397 void strBufToElementNameString();
398 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
399 void attributeNameComplete();
400 void addAttributeWithoutValue();
401 void addAttributeWithValue();
403 public:
404 void start();
405 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
407 private:
408 template<class P>
409 int32_t stateLoop(int32_t state,
410 char16_t c,
411 int32_t pos,
412 char16_t* buf,
413 bool reconsume,
414 int32_t returnState,
415 int32_t endPos);
416 void initDoctypeFields();
417 inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
419 silentCarriageReturn();
420 adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
423 inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
425 silentLineFeed();
426 adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
429 inline void appendStrBufLineFeed()
431 silentLineFeed();
432 appendStrBuf('\n');
435 inline void appendStrBufCarriageReturn()
437 silentCarriageReturn();
438 appendStrBuf('\n');
441 protected:
442 inline void silentCarriageReturn()
444 ++line;
445 lastCR = true;
448 inline void silentLineFeed() { ++line; }
450 private:
451 void emitCarriageReturn(char16_t* buf, int32_t pos);
452 void emitReplacementCharacter(char16_t* buf, int32_t pos);
453 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
454 void setAdditionalAndRememberAmpersandLocation(char16_t add);
455 void bogusDoctype();
456 void bogusDoctypeWithoutQuirks();
457 void handleNcrValue(int32_t returnState);
459 public:
460 void eof();
462 private:
463 void emitDoctypeToken(int32_t pos);
465 protected:
466 inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }
468 public:
469 bool internalEncodingDeclaration(nsHtml5String internalCharset);
471 private:
472 void emitOrAppendTwo(const char16_t* val, int32_t returnState);
473 void emitOrAppendOne(const char16_t* val, int32_t returnState);
475 public:
476 void end();
477 void requestSuspension();
478 bool isInDataState();
479 void resetToDataState();
480 void loadState(nsHtml5Tokenizer* other);
481 void initializeWithoutStarting();
482 void setEncodingDeclarationHandler(
483 nsHtml5StreamParser* encodingDeclarationHandler);
484 ~nsHtml5Tokenizer();
485 static void initializeStatics();
486 static void releaseStatics();
488 #include "nsHtml5TokenizerHSupplement.h"
491 #endif