Backed out changeset bcbab342eed8 (bug 1889658) for causing wpt reftest failures...
[gecko.git] / parser / html / nsHtml5Tokenizer.h
blobe19570ca3f9ab332c10d730b88fb403c657b9e6b
1 /*
2 * Copyright (c) 2005-2007 Henri Sivonen
3 * Copyright (c) 2007-2017 Mozilla Foundation
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 * Please edit Tokenizer.java instead and regenerate.
31 #ifndef nsHtml5Tokenizer_h
32 #define nsHtml5Tokenizer_h
34 #include "jArray.h"
35 #include "nsAHtml5TreeBuilderState.h"
36 #include "nsAtom.h"
37 #include "nsGkAtoms.h"
38 #include "nsHtml5ArrayCopy.h"
39 #include "nsHtml5AtomTable.h"
40 #include "nsHtml5DocumentMode.h"
41 #include "nsHtml5Highlighter.h"
42 #include "nsHtml5Macros.h"
43 #include "nsHtml5NamedCharacters.h"
44 #include "nsHtml5NamedCharactersAccel.h"
45 #include "nsHtml5String.h"
46 #include "nsIContent.h"
47 #include "nsTraceRefcnt.h"
49 class nsHtml5StreamParser;
51 class nsHtml5AttributeName;
52 class nsHtml5ElementName;
53 class nsHtml5TreeBuilder;
54 class nsHtml5UTF16Buffer;
55 class nsHtml5StateSnapshot;
56 class nsHtml5Portability;
58 class nsHtml5Tokenizer {
59 private:
60 static const int32_t DATA_AND_RCDATA_MASK = ~1;
62 public:
63 static const int32_t DATA = 0;
65 static const int32_t RCDATA = 1;
67 static const int32_t SCRIPT_DATA = 2;
69 static const int32_t RAWTEXT = 3;
71 static const int32_t SCRIPT_DATA_ESCAPED = 4;
73 static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
75 static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
77 static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
79 static const int32_t PLAINTEXT = 8;
81 static const int32_t TAG_OPEN = 9;
83 static const int32_t CLOSE_TAG_OPEN = 10;
85 static const int32_t TAG_NAME = 11;
87 static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
89 static const int32_t ATTRIBUTE_NAME = 13;
91 static const int32_t AFTER_ATTRIBUTE_NAME = 14;
93 static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
95 static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
97 static const int32_t BOGUS_COMMENT = 17;
99 static const int32_t MARKUP_DECLARATION_OPEN = 18;
101 static const int32_t DOCTYPE = 19;
103 static const int32_t BEFORE_DOCTYPE_NAME = 20;
105 static const int32_t DOCTYPE_NAME = 21;
107 static const int32_t AFTER_DOCTYPE_NAME = 22;
109 static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
111 static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
113 static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
115 static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
117 static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
119 static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
121 static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
123 static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
125 static const int32_t BOGUS_DOCTYPE = 31;
127 static const int32_t COMMENT_START = 32;
129 static const int32_t COMMENT_START_DASH = 33;
131 static const int32_t COMMENT = 34;
133 static const int32_t COMMENT_END_DASH = 35;
135 static const int32_t COMMENT_END = 36;
137 static const int32_t COMMENT_END_BANG = 37;
139 static const int32_t NON_DATA_END_TAG_NAME = 38;
141 static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
143 static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
145 static const int32_t DOCTYPE_UBLIC = 41;
147 static const int32_t DOCTYPE_YSTEM = 42;
149 static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
151 static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
153 static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
155 static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
157 static const int32_t CONSUME_NCR = 47;
159 static const int32_t CHARACTER_REFERENCE_TAIL = 48;
161 static const int32_t HEX_NCR_LOOP = 49;
163 static const int32_t DECIMAL_NRC_LOOP = 50;
165 static const int32_t HANDLE_NCR_VALUE = 51;
167 static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
169 static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
171 static const int32_t SELF_CLOSING_START_TAG = 54;
173 static const int32_t CDATA_START = 55;
175 static const int32_t CDATA_SECTION = 56;
177 static const int32_t CDATA_RSQB = 57;
179 static const int32_t CDATA_RSQB_RSQB = 58;
181 static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
183 static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
185 static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
187 static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
189 static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
191 static const int32_t BOGUS_COMMENT_HYPHEN = 64;
193 static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
195 static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
197 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
199 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
201 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
203 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
205 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
207 static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
209 static const int32_t PROCESSING_INSTRUCTION = 73;
211 static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
213 static const int32_t COMMENT_LESSTHAN = 76;
215 static const int32_t COMMENT_LESSTHAN_BANG = 77;
217 static const int32_t COMMENT_LESSTHAN_BANG_DASH = 78;
219 static const int32_t COMMENT_LESSTHAN_BANG_DASH_DASH = 79;
221 private:
222 static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
224 static char16_t LT_GT[];
225 static char16_t LT_SOLIDUS[];
226 static char16_t RSQB_RSQB[];
227 static char16_t REPLACEMENT_CHARACTER[];
228 static char16_t LF[];
229 static char16_t CDATA_LSQB[];
230 static char16_t OCTYPE[];
231 static char16_t UBLIC[];
232 static char16_t YSTEM[];
233 static staticJArray<char16_t, int32_t> TITLE_ARR;
234 static staticJArray<char16_t, int32_t> SCRIPT_ARR;
235 static staticJArray<char16_t, int32_t> STYLE_ARR;
236 static staticJArray<char16_t, int32_t> PLAINTEXT_ARR;
237 static staticJArray<char16_t, int32_t> XMP_ARR;
238 static staticJArray<char16_t, int32_t> TEXTAREA_ARR;
239 static staticJArray<char16_t, int32_t> IFRAME_ARR;
240 static staticJArray<char16_t, int32_t> NOEMBED_ARR;
241 static staticJArray<char16_t, int32_t> NOSCRIPT_ARR;
242 static staticJArray<char16_t, int32_t> NOFRAMES_ARR;
244 protected:
245 nsHtml5TreeBuilder* tokenHandler;
246 nsHtml5StreamParser* encodingDeclarationHandler;
247 bool lastCR;
248 int32_t stateSave;
250 private:
251 int32_t returnStateSave;
253 protected:
254 int32_t index;
256 private:
257 bool forceQuirks;
258 char16_t additional;
259 int32_t entCol;
260 int32_t firstCharKey;
261 int32_t lo;
262 int32_t hi;
263 int32_t candidate;
264 int32_t charRefBufMark;
266 protected:
267 int32_t value;
269 private:
270 bool seenDigits;
271 bool suspendAfterCurrentNonTextToken;
273 protected:
274 int32_t cstart;
276 private:
277 nsHtml5String publicId;
278 nsHtml5String systemId;
279 autoJArray<char16_t, int32_t> strBuf;
280 int32_t strBufLen;
281 autoJArray<char16_t, int32_t> charRefBuf;
282 int32_t charRefBufLen;
283 autoJArray<char16_t, int32_t> bmpChar;
284 autoJArray<char16_t, int32_t> astralChar;
286 protected:
287 nsHtml5ElementName* endTagExpectation;
289 private:
290 jArray<char16_t, int32_t> endTagExpectationAsArray;
292 protected:
293 bool endTag;
295 private:
296 bool containsHyphen;
297 nsHtml5ElementName* tagName;
298 nsHtml5ElementName* nonInternedTagName;
300 protected:
301 nsHtml5AttributeName* attributeName;
303 private:
304 nsHtml5AttributeName* nonInternedAttributeName;
305 RefPtr<nsAtom> doctypeName;
306 nsHtml5String publicIdentifier;
307 nsHtml5String systemIdentifier;
308 nsHtml5HtmlAttributes* attributes;
309 bool newAttributesEachTime;
310 bool shouldSuspend;
311 bool keepBuffer;
313 protected:
314 bool confident;
316 private:
317 int32_t line;
318 int32_t attributeLine;
319 nsHtml5AtomTable* interner;
320 bool viewingXmlSource;
322 public:
323 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
324 void setInterner(nsHtml5AtomTable* interner);
325 void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
326 bool isViewingXmlSource();
327 void setKeepBuffer(bool keepBuffer);
328 bool dropBufferIfLongerThan(int32_t length);
329 void setState(int32_t specialTokenizerState);
330 void setStateAndEndTagExpectation(int32_t specialTokenizerState,
331 nsHtml5ElementName* endTagExpectation);
333 private:
334 void endTagExpectationToArray();
336 public:
337 void setLineNumber(int32_t line);
338 inline int32_t getLineNumber() { return line; }
340 nsHtml5HtmlAttributes* emptyAttributes();
342 private:
343 inline void appendCharRefBuf(char16_t c) {
344 MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
345 "Attempted to overrun charRefBuf!");
346 charRefBuf[charRefBufLen++] = c;
349 void emitOrAppendCharRefBuf(int32_t returnState);
350 inline void clearStrBufAfterUse() { strBufLen = 0; }
352 inline void clearStrBufBeforeUse() {
353 MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
354 strBufLen = 0;
357 inline void clearStrBufAfterOneHyphen() {
358 MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
359 MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
360 strBufLen = 0;
363 inline void appendStrBuf(char16_t c) {
364 MOZ_ASSERT(strBufLen < strBuf.length,
365 "Previous buffer length insufficient.");
366 if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
367 if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
368 MOZ_CRASH("Unable to recover from buffer reallocation failure");
371 strBuf[strBufLen++] = c;
374 protected:
375 nsHtml5String strBufToString();
377 private:
378 void strBufToDoctypeName();
379 void emitStrBuf();
380 inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }
382 inline void adjustDoubleHyphenAndAppendToStrBufAndErr(
383 char16_t c, bool reportedConsecutiveHyphens) {
384 appendStrBuf(c);
387 void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
388 inline void appendCharRefBufToStrBuf() {
389 appendStrBuf(charRefBuf, 0, charRefBufLen);
390 charRefBufLen = 0;
393 void emitComment(int32_t provisionalHyphens, int32_t pos);
395 protected:
396 void flushChars(char16_t* buf, int32_t pos);
398 private:
399 void strBufToElementNameString();
400 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
401 void attributeNameComplete();
402 void addAttributeWithoutValue();
403 void addAttributeWithValue();
405 public:
406 void start();
407 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
409 private:
410 template <class P>
411 int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf,
412 bool reconsume, int32_t returnState, int32_t endPos);
413 void initDoctypeFields();
414 template <class P>
415 void adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
416 template <class P>
417 void adjustDoubleHyphenAndAppendToStrBufLineFeed();
418 template <class P>
419 void appendStrBufLineFeed();
420 template <class P>
421 void appendStrBufCarriageReturn();
422 template <class P>
423 void emitCarriageReturn(char16_t* buf, int32_t pos);
424 void emitReplacementCharacter(char16_t* buf, int32_t pos);
425 void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
426 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
427 void setAdditionalAndRememberAmpersandLocation(char16_t add);
428 void bogusDoctype();
429 void bogusDoctypeWithoutQuirks();
430 void handleNcrValue(int32_t returnState);
432 public:
433 void eof();
435 private:
436 void emitDoctypeToken(int32_t pos);
437 void suspendIfRequestedAfterCurrentNonTextToken();
438 void suspendAfterCurrentTokenIfNotInText();
439 bool suspensionAfterCurrentNonTextTokenPending();
441 public:
442 bool internalEncodingDeclaration(nsHtml5String internalCharset);
444 private:
445 void emitOrAppendTwo(const char16_t* val, int32_t returnState);
446 void emitOrAppendOne(const char16_t* val, int32_t returnState);
448 public:
449 void end();
450 void requestSuspension();
451 bool isInDataState();
452 void resetToDataState();
453 void loadState(nsHtml5Tokenizer* other);
454 void initializeWithoutStarting();
455 void setEncodingDeclarationHandler(
456 nsHtml5StreamParser* encodingDeclarationHandler);
457 ~nsHtml5Tokenizer();
458 static void initializeStatics();
459 static void releaseStatics();
461 #include "nsHtml5TokenizerHSupplement.h"
464 #endif