1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "mozilla/CheckedInt.h"
6 #include "mozilla/Likely.h"
8 // INT32_MAX is (2^31)-1. Therefore, the highest power-of-two that fits
9 // is 2^30. Note that this is counting char16_t units. The underlying
10 // bytes will be twice that, but they fit even in 32-bit size_t even
11 // if a contiguous chunk of memory of that size is pretty unlikely to
12 // be available on a 32-bit system.
13 #define MAX_POWER_OF_TWO_IN_INT32 0x40000000
15 bool nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength
) {
16 MOZ_RELEASE_ASSERT(aLength
>= 0, "Negative length.");
17 if (aLength
> MAX_POWER_OF_TWO_IN_INT32
) {
18 // Can't happen when loading from network.
21 mozilla::CheckedInt
<int32_t> worstCase(strBufLen
);
23 worstCase
+= charRefBufLen
;
24 // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
25 // Adding to the general worst case instead of only the
26 // TreeBuilder-exposed worst case to avoid re-introducing a bug when
27 // unifying the tokenizer and tree builder buffers in the future.
29 if (!worstCase
.isValid()) {
32 if (worstCase
.value() > MAX_POWER_OF_TWO_IN_INT32
) {
35 // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
36 // so that the call below becomes unnecessary.
37 if (!tokenHandler
->EnsureBufferSpace(worstCase
.value())) {
41 if (worstCase
.value() < MAX_POWER_OF_TWO_IN_INT32
) {
42 // Add one to round to the next power of two to avoid immediate
43 // reallocation once there are a few characters in the buffer.
46 strBuf
= jArray
<char16_t
, int32_t>::newFallibleJArray(
47 mozilla::RoundUpPow2(worstCase
.value()));
51 } else if (worstCase
.value() > strBuf
.length
) {
52 jArray
<char16_t
, int32_t> newBuf
=
53 jArray
<char16_t
, int32_t>::newFallibleJArray(
54 mozilla::RoundUpPow2(worstCase
.value()));
58 memcpy(newBuf
, strBuf
, sizeof(char16_t
) * size_t(strBufLen
));
64 bool nsHtml5Tokenizer::TemplatePushedOrHeadPopped() {
65 if (encodingDeclarationHandler
) {
66 return encodingDeclarationHandler
->TemplatePushedOrHeadPopped();
71 void nsHtml5Tokenizer::RememberGt(int32_t aPos
) {
72 if (encodingDeclarationHandler
) {
73 return encodingDeclarationHandler
->RememberGt(aPos
);
77 void nsHtml5Tokenizer::StartPlainText() {
78 stateSave
= nsHtml5Tokenizer::PLAINTEXT
;
81 void nsHtml5Tokenizer::EnableViewSource(nsHtml5Highlighter
* aHighlighter
) {
82 mViewSource
= mozilla::WrapUnique(aHighlighter
);
85 bool nsHtml5Tokenizer::ShouldFlushViewSource() {
86 return mViewSource
->ShouldFlushOps();
89 mozilla::Result
<bool, nsresult
> nsHtml5Tokenizer::FlushViewSource() {
90 return mViewSource
->FlushOps();
93 void nsHtml5Tokenizer::StartViewSource(const nsAutoString
& aTitle
) {
94 mViewSource
->Start(aTitle
);
97 void nsHtml5Tokenizer::StartViewSourceCharacters() {
98 mViewSource
->StartCharacters();
101 [[nodiscard
]] bool nsHtml5Tokenizer::EndViewSource() {
102 return mViewSource
->End();
105 void nsHtml5Tokenizer::SetViewSourceOpSink(nsAHtml5TreeOpSink
* aOpSink
) {
106 mViewSource
->SetOpSink(aOpSink
);
109 void nsHtml5Tokenizer::RewindViewSource() { mViewSource
->Rewind(); }
111 void nsHtml5Tokenizer::errWarnLtSlashInRcdata() {}
113 // The null checks below annotated MOZ_LIKELY are not actually necessary.
115 void nsHtml5Tokenizer::errUnquotedAttributeValOrNull(char16_t c
) {
116 if (MOZ_LIKELY(mViewSource
)) {
119 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeLt");
122 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeGrave");
126 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeQuote");
129 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeEquals");
135 void nsHtml5Tokenizer::errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
137 if (MOZ_LIKELY(mViewSource
)) {
140 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartEquals");
143 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartLt");
146 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartGrave");
152 void nsHtml5Tokenizer::errBadCharBeforeAttributeNameOrNull(char16_t c
) {
153 if (MOZ_LIKELY(mViewSource
)) {
155 mViewSource
->AddErrorToCurrentNode("errBadCharBeforeAttributeNameLt");
156 } else if (c
== '=') {
157 errEqualsSignBeforeAttributeName();
158 } else if (c
!= 0xFFFD) {
159 errQuoteBeforeAttributeName(c
);
164 void nsHtml5Tokenizer::errBadCharAfterLt(char16_t c
) {
165 if (MOZ_LIKELY(mViewSource
)) {
166 mViewSource
->AddErrorToCurrentNode("errBadCharAfterLt");
170 void nsHtml5Tokenizer::errQuoteOrLtInAttributeNameOrNull(char16_t c
) {
171 if (MOZ_LIKELY(mViewSource
)) {
173 mViewSource
->AddErrorToCurrentNode("errLtInAttributeName");
174 } else if (c
!= 0xFFFD) {
175 mViewSource
->AddErrorToCurrentNode("errQuoteInAttributeName");
180 void nsHtml5Tokenizer::maybeErrAttributesOnEndTag(
181 nsHtml5HtmlAttributes
* attrs
) {
182 if (mViewSource
&& attrs
->getLength() != 0) {
184 * When an end tag token is emitted with attributes, that is a parse
187 mViewSource
->AddErrorToCurrentRun("maybeErrAttributesOnEndTag");
191 void nsHtml5Tokenizer::maybeErrSlashInEndTag(bool selfClosing
) {
192 if (mViewSource
&& selfClosing
&& endTag
) {
193 mViewSource
->AddErrorToCurrentSlash("maybeErrSlashInEndTag");
197 char16_t
nsHtml5Tokenizer::errNcrNonCharacter(char16_t ch
) {
198 if (MOZ_UNLIKELY(mViewSource
)) {
199 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
204 void nsHtml5Tokenizer::errAstralNonCharacter(int32_t ch
) {
205 if (MOZ_UNLIKELY(mViewSource
)) {
206 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
210 char16_t
nsHtml5Tokenizer::errNcrControlChar(char16_t ch
) {
211 if (MOZ_UNLIKELY(mViewSource
)) {
212 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
217 void nsHtml5Tokenizer::errGarbageAfterLtSlash() {
218 if (MOZ_LIKELY(mViewSource
)) {
219 mViewSource
->AddErrorToCurrentNode("errGarbageAfterLtSlash");
223 void nsHtml5Tokenizer::errLtSlashGt() {
224 if (MOZ_LIKELY(mViewSource
)) {
225 mViewSource
->AddErrorToCurrentNode("errLtSlashGt");
229 void nsHtml5Tokenizer::errCharRefLacksSemicolon() {
230 if (MOZ_UNLIKELY(mViewSource
)) {
231 mViewSource
->AddErrorToCurrentNode("errCharRefLacksSemicolon");
235 void nsHtml5Tokenizer::errNoDigitsInNCR() {
236 if (MOZ_UNLIKELY(mViewSource
)) {
237 mViewSource
->AddErrorToCurrentNode("errNoDigitsInNCR");
241 void nsHtml5Tokenizer::errGtInSystemId() {
242 if (MOZ_LIKELY(mViewSource
)) {
243 mViewSource
->AddErrorToCurrentNode("errGtInSystemId");
247 void nsHtml5Tokenizer::errGtInPublicId() {
248 if (MOZ_LIKELY(mViewSource
)) {
249 mViewSource
->AddErrorToCurrentNode("errGtInPublicId");
253 void nsHtml5Tokenizer::errNamelessDoctype() {
254 if (MOZ_LIKELY(mViewSource
)) {
255 mViewSource
->AddErrorToCurrentNode("errNamelessDoctype");
259 void nsHtml5Tokenizer::errConsecutiveHyphens() {
260 if (MOZ_UNLIKELY(mViewSource
)) {
261 mViewSource
->AddErrorToCurrentNode("errConsecutiveHyphens");
265 void nsHtml5Tokenizer::errPrematureEndOfComment() {
266 if (MOZ_LIKELY(mViewSource
)) {
267 mViewSource
->AddErrorToCurrentNode("errPrematureEndOfComment");
271 void nsHtml5Tokenizer::errBogusComment() {
272 if (MOZ_UNLIKELY(mViewSource
)) {
273 mViewSource
->AddErrorToCurrentNode("errBogusComment");
277 void nsHtml5Tokenizer::errSlashNotFollowedByGt() {
278 if (MOZ_LIKELY(mViewSource
)) {
279 mViewSource
->AddErrorToCurrentSlash("errSlashNotFollowedByGt");
283 void nsHtml5Tokenizer::errNoSpaceBetweenAttributes() {
284 if (MOZ_LIKELY(mViewSource
)) {
285 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenAttributes");
289 void nsHtml5Tokenizer::errAttributeValueMissing() {
290 if (MOZ_LIKELY(mViewSource
)) {
291 mViewSource
->AddErrorToCurrentNode("errAttributeValueMissing");
295 void nsHtml5Tokenizer::errEqualsSignBeforeAttributeName() {
296 if (MOZ_LIKELY(mViewSource
)) {
297 mViewSource
->AddErrorToCurrentNode("errEqualsSignBeforeAttributeName");
301 void nsHtml5Tokenizer::errLtGt() {
302 if (MOZ_LIKELY(mViewSource
)) {
303 mViewSource
->AddErrorToCurrentNode("errLtGt");
307 void nsHtml5Tokenizer::errProcessingInstruction() {
308 if (MOZ_LIKELY(mViewSource
)) {
309 mViewSource
->AddErrorToCurrentNode("errProcessingInstruction");
313 void nsHtml5Tokenizer::errUnescapedAmpersandInterpretedAsCharacterReference() {
314 if (MOZ_UNLIKELY(mViewSource
)) {
315 mViewSource
->AddErrorToCurrentAmpersand(
316 "errUnescapedAmpersandInterpretedAsCharacterReference");
320 void nsHtml5Tokenizer::errNotSemicolonTerminated() {
321 if (MOZ_UNLIKELY(mViewSource
)) {
322 mViewSource
->AddErrorToCurrentNode("errNotSemicolonTerminated");
326 void nsHtml5Tokenizer::errNoNamedCharacterMatch() {
327 if (MOZ_UNLIKELY(mViewSource
)) {
328 mViewSource
->AddErrorToCurrentAmpersand("errNoNamedCharacterMatch");
332 void nsHtml5Tokenizer::errQuoteBeforeAttributeName(char16_t c
) {
333 if (MOZ_LIKELY(mViewSource
)) {
334 mViewSource
->AddErrorToCurrentNode("errQuoteBeforeAttributeName");
338 void nsHtml5Tokenizer::errExpectedPublicId() {
339 if (MOZ_LIKELY(mViewSource
)) {
340 mViewSource
->AddErrorToCurrentNode("errExpectedPublicId");
344 void nsHtml5Tokenizer::errBogusDoctype() {
345 if (MOZ_UNLIKELY(mViewSource
)) {
346 mViewSource
->AddErrorToCurrentNode("errBogusDoctype");
350 void nsHtml5Tokenizer::errNcrSurrogate() {
351 if (MOZ_UNLIKELY(mViewSource
)) {
352 mViewSource
->AddErrorToCurrentNode("errNcrSurrogate");
356 void nsHtml5Tokenizer::errNcrCr() {
357 if (MOZ_UNLIKELY(mViewSource
)) {
358 mViewSource
->AddErrorToCurrentNode("errNcrCr");
362 void nsHtml5Tokenizer::errNcrInC1Range() {
363 if (MOZ_UNLIKELY(mViewSource
)) {
364 mViewSource
->AddErrorToCurrentNode("errNcrInC1Range");
368 void nsHtml5Tokenizer::errEofInPublicId() {
369 if (MOZ_UNLIKELY(mViewSource
)) {
370 mViewSource
->AddErrorToCurrentRun("errEofInPublicId");
374 void nsHtml5Tokenizer::errEofInComment() {
375 if (MOZ_UNLIKELY(mViewSource
)) {
376 mViewSource
->AddErrorToCurrentRun("errEofInComment");
380 void nsHtml5Tokenizer::errEofInDoctype() {
381 if (MOZ_UNLIKELY(mViewSource
)) {
382 mViewSource
->AddErrorToCurrentRun("errEofInDoctype");
386 void nsHtml5Tokenizer::errEofInAttributeValue() {
387 if (MOZ_UNLIKELY(mViewSource
)) {
388 mViewSource
->AddErrorToCurrentRun("errEofInAttributeValue");
392 void nsHtml5Tokenizer::errEofInAttributeName() {
393 if (MOZ_UNLIKELY(mViewSource
)) {
394 mViewSource
->AddErrorToCurrentRun("errEofInAttributeName");
398 void nsHtml5Tokenizer::errEofWithoutGt() {
399 if (MOZ_UNLIKELY(mViewSource
)) {
400 mViewSource
->AddErrorToCurrentRun("errEofWithoutGt");
404 void nsHtml5Tokenizer::errEofInTagName() {
405 if (MOZ_UNLIKELY(mViewSource
)) {
406 mViewSource
->AddErrorToCurrentRun("errEofInTagName");
410 void nsHtml5Tokenizer::errEofInEndTag() {
411 if (MOZ_UNLIKELY(mViewSource
)) {
412 mViewSource
->AddErrorToCurrentRun("errEofInEndTag");
416 void nsHtml5Tokenizer::errEofAfterLt() {
417 if (MOZ_UNLIKELY(mViewSource
)) {
418 mViewSource
->AddErrorToCurrentRun("errEofAfterLt");
422 void nsHtml5Tokenizer::errNcrOutOfRange() {
423 if (MOZ_UNLIKELY(mViewSource
)) {
424 mViewSource
->AddErrorToCurrentNode("errNcrOutOfRange");
428 void nsHtml5Tokenizer::errNcrUnassigned() {
429 if (MOZ_UNLIKELY(mViewSource
)) {
430 mViewSource
->AddErrorToCurrentNode("errNcrUnassigned");
434 void nsHtml5Tokenizer::errDuplicateAttribute() {
436 // There is an open issue for properly specifying this:
437 // https://github.com/whatwg/html/issues/3257
438 attributes
->setDuplicateAttributeError();
441 if (MOZ_UNLIKELY(mViewSource
)) {
442 mViewSource
->AddErrorToCurrentNode("errDuplicateAttribute");
446 void nsHtml5Tokenizer::errEofInSystemId() {
447 if (MOZ_UNLIKELY(mViewSource
)) {
448 mViewSource
->AddErrorToCurrentRun("errEofInSystemId");
452 void nsHtml5Tokenizer::errExpectedSystemId() {
453 if (MOZ_LIKELY(mViewSource
)) {
454 mViewSource
->AddErrorToCurrentNode("errExpectedSystemId");
458 void nsHtml5Tokenizer::errMissingSpaceBeforeDoctypeName() {
459 if (MOZ_LIKELY(mViewSource
)) {
460 mViewSource
->AddErrorToCurrentNode("errMissingSpaceBeforeDoctypeName");
464 void nsHtml5Tokenizer::errNestedComment() {
465 if (MOZ_LIKELY(mViewSource
)) {
466 mViewSource
->AddErrorToCurrentNode("errNestedComment");
470 void nsHtml5Tokenizer::errNcrControlChar() {
471 if (MOZ_UNLIKELY(mViewSource
)) {
472 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
476 void nsHtml5Tokenizer::errNcrZero() {
477 if (MOZ_UNLIKELY(mViewSource
)) {
478 mViewSource
->AddErrorToCurrentNode("errNcrZero");
482 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypeSystemKeywordAndQuote() {
483 if (MOZ_LIKELY(mViewSource
)) {
484 mViewSource
->AddErrorToCurrentNode(
485 "errNoSpaceBetweenDoctypeSystemKeywordAndQuote");
489 void nsHtml5Tokenizer::errNoSpaceBetweenPublicAndSystemIds() {
490 if (MOZ_LIKELY(mViewSource
)) {
491 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenPublicAndSystemIds");
495 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypePublicKeywordAndQuote() {
496 if (MOZ_LIKELY(mViewSource
)) {
497 mViewSource
->AddErrorToCurrentNode(
498 "errNoSpaceBetweenDoctypePublicKeywordAndQuote");