1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "mozilla/CheckedInt.h"
6 #include "mozilla/Likely.h"
8 // INT32_MAX is (2^31)-1. Therefore, the highest power-of-two that fits
9 // is 2^30. Note that this is counting char16_t units. The underlying
10 // bytes will be twice that, but they fit even in 32-bit size_t even
11 // if a contiguous chunk of memory of that size is pretty unlikely to
12 // be available on a 32-bit system.
13 #define MAX_POWER_OF_TWO_IN_INT32 0x40000000
16 nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength
)
18 MOZ_RELEASE_ASSERT(aLength
>= 0, "Negative length.");
19 if (aLength
> MAX_POWER_OF_TWO_IN_INT32
) {
20 // Can't happen when loading from network.
23 mozilla::CheckedInt
<int32_t> worstCase(strBufLen
);
25 worstCase
+= charRefBufLen
;
26 // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
27 // Adding to the general worst case instead of only the
28 // TreeBuilder-exposed worst case to avoid re-introducing a bug when
29 // unifying the tokenizer and tree builder buffers in the future.
31 if (!worstCase
.isValid()) {
34 if (worstCase
.value() > MAX_POWER_OF_TWO_IN_INT32
) {
37 // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
38 // so that the call below becomes unnecessary.
39 if (!tokenHandler
->EnsureBufferSpace(worstCase
.value())) {
43 if (worstCase
.value() < MAX_POWER_OF_TWO_IN_INT32
) {
44 // Add one to round to the next power of two to avoid immediate
45 // reallocation once there are a few characters in the buffer.
48 strBuf
= jArray
<char16_t
, int32_t>::newFallibleJArray(
49 mozilla::RoundUpPow2(worstCase
.value()));
53 } else if (worstCase
.value() > strBuf
.length
) {
54 jArray
<char16_t
, int32_t> newBuf
=
55 jArray
<char16_t
, int32_t>::newFallibleJArray(
56 mozilla::RoundUpPow2(worstCase
.value()));
60 memcpy(newBuf
, strBuf
, sizeof(char16_t
) * size_t(strBufLen
));
67 nsHtml5Tokenizer::StartPlainText()
69 stateSave
= nsHtml5Tokenizer::PLAINTEXT
;
73 nsHtml5Tokenizer::EnableViewSource(nsHtml5Highlighter
* aHighlighter
)
75 mViewSource
= aHighlighter
;
79 nsHtml5Tokenizer::FlushViewSource()
81 return mViewSource
->FlushOps();
85 nsHtml5Tokenizer::StartViewSource(const nsAutoString
& aTitle
)
87 mViewSource
->Start(aTitle
);
91 nsHtml5Tokenizer::EndViewSource()
97 nsHtml5Tokenizer::errWarnLtSlashInRcdata()
101 // The null checks below annotated MOZ_LIKELY are not actually necessary.
104 nsHtml5Tokenizer::errUnquotedAttributeValOrNull(char16_t c
)
106 if (MOZ_LIKELY(mViewSource
)) {
109 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeLt");
112 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeGrave");
116 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeQuote");
119 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeEquals");
126 nsHtml5Tokenizer::errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char16_t c
)
128 if (MOZ_LIKELY(mViewSource
)) {
131 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartEquals");
134 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartLt");
137 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartGrave");
144 nsHtml5Tokenizer::errBadCharBeforeAttributeNameOrNull(char16_t c
)
146 if (MOZ_LIKELY(mViewSource
)) {
148 mViewSource
->AddErrorToCurrentNode("errBadCharBeforeAttributeNameLt");
149 } else if (c
== '=') {
150 errEqualsSignBeforeAttributeName();
151 } else if (c
!= 0xFFFD) {
152 errQuoteBeforeAttributeName(c
);
158 nsHtml5Tokenizer::errBadCharAfterLt(char16_t c
)
160 if (MOZ_LIKELY(mViewSource
)) {
161 mViewSource
->AddErrorToCurrentNode("errBadCharAfterLt");
166 nsHtml5Tokenizer::errQuoteOrLtInAttributeNameOrNull(char16_t c
)
168 if (MOZ_LIKELY(mViewSource
)) {
170 mViewSource
->AddErrorToCurrentNode("errLtInAttributeName");
171 } else if (c
!= 0xFFFD) {
172 mViewSource
->AddErrorToCurrentNode("errQuoteInAttributeName");
178 nsHtml5Tokenizer::maybeErrAttributesOnEndTag(nsHtml5HtmlAttributes
* attrs
)
180 if (mViewSource
&& attrs
->getLength() != 0) {
182 * When an end tag token is emitted with attributes, that is a parse
185 mViewSource
->AddErrorToCurrentRun("maybeErrAttributesOnEndTag");
190 nsHtml5Tokenizer::maybeErrSlashInEndTag(bool selfClosing
)
192 if (mViewSource
&& selfClosing
&& endTag
) {
193 mViewSource
->AddErrorToCurrentSlash("maybeErrSlashInEndTag");
198 nsHtml5Tokenizer::errNcrNonCharacter(char16_t ch
)
200 if (MOZ_UNLIKELY(mViewSource
)) {
201 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
207 nsHtml5Tokenizer::errAstralNonCharacter(int32_t ch
)
209 if (MOZ_UNLIKELY(mViewSource
)) {
210 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
215 nsHtml5Tokenizer::errNcrControlChar(char16_t ch
)
217 if (MOZ_UNLIKELY(mViewSource
)) {
218 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
224 nsHtml5Tokenizer::errGarbageAfterLtSlash()
226 if (MOZ_LIKELY(mViewSource
)) {
227 mViewSource
->AddErrorToCurrentNode("errGarbageAfterLtSlash");
232 nsHtml5Tokenizer::errLtSlashGt()
234 if (MOZ_LIKELY(mViewSource
)) {
235 mViewSource
->AddErrorToCurrentNode("errLtSlashGt");
240 nsHtml5Tokenizer::errCharRefLacksSemicolon()
242 if (MOZ_UNLIKELY(mViewSource
)) {
243 mViewSource
->AddErrorToCurrentNode("errCharRefLacksSemicolon");
248 nsHtml5Tokenizer::errNoDigitsInNCR()
250 if (MOZ_UNLIKELY(mViewSource
)) {
251 mViewSource
->AddErrorToCurrentNode("errNoDigitsInNCR");
256 nsHtml5Tokenizer::errGtInSystemId()
258 if (MOZ_LIKELY(mViewSource
)) {
259 mViewSource
->AddErrorToCurrentNode("errGtInSystemId");
264 nsHtml5Tokenizer::errGtInPublicId()
266 if (MOZ_LIKELY(mViewSource
)) {
267 mViewSource
->AddErrorToCurrentNode("errGtInPublicId");
272 nsHtml5Tokenizer::errNamelessDoctype()
274 if (MOZ_LIKELY(mViewSource
)) {
275 mViewSource
->AddErrorToCurrentNode("errNamelessDoctype");
280 nsHtml5Tokenizer::errConsecutiveHyphens()
282 if (MOZ_UNLIKELY(mViewSource
)) {
283 mViewSource
->AddErrorToCurrentNode("errConsecutiveHyphens");
288 nsHtml5Tokenizer::errPrematureEndOfComment()
290 if (MOZ_LIKELY(mViewSource
)) {
291 mViewSource
->AddErrorToCurrentNode("errPrematureEndOfComment");
296 nsHtml5Tokenizer::errBogusComment()
298 if (MOZ_UNLIKELY(mViewSource
)) {
299 mViewSource
->AddErrorToCurrentNode("errBogusComment");
304 nsHtml5Tokenizer::errSlashNotFollowedByGt()
306 if (MOZ_LIKELY(mViewSource
)) {
307 mViewSource
->AddErrorToCurrentSlash("errSlashNotFollowedByGt");
312 nsHtml5Tokenizer::errNoSpaceBetweenAttributes()
314 if (MOZ_LIKELY(mViewSource
)) {
315 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenAttributes");
320 nsHtml5Tokenizer::errAttributeValueMissing()
322 if (MOZ_LIKELY(mViewSource
)) {
323 mViewSource
->AddErrorToCurrentNode("errAttributeValueMissing");
328 nsHtml5Tokenizer::errEqualsSignBeforeAttributeName()
330 if (MOZ_LIKELY(mViewSource
)) {
331 mViewSource
->AddErrorToCurrentNode("errEqualsSignBeforeAttributeName");
336 nsHtml5Tokenizer::errLtGt()
338 if (MOZ_LIKELY(mViewSource
)) {
339 mViewSource
->AddErrorToCurrentNode("errLtGt");
344 nsHtml5Tokenizer::errProcessingInstruction()
346 if (MOZ_LIKELY(mViewSource
)) {
347 mViewSource
->AddErrorToCurrentNode("errProcessingInstruction");
352 nsHtml5Tokenizer::errUnescapedAmpersandInterpretedAsCharacterReference()
354 if (MOZ_UNLIKELY(mViewSource
)) {
355 mViewSource
->AddErrorToCurrentAmpersand(
356 "errUnescapedAmpersandInterpretedAsCharacterReference");
361 nsHtml5Tokenizer::errNotSemicolonTerminated()
363 if (MOZ_UNLIKELY(mViewSource
)) {
364 mViewSource
->AddErrorToCurrentNode("errNotSemicolonTerminated");
369 nsHtml5Tokenizer::errNoNamedCharacterMatch()
371 if (MOZ_UNLIKELY(mViewSource
)) {
372 mViewSource
->AddErrorToCurrentAmpersand("errNoNamedCharacterMatch");
377 nsHtml5Tokenizer::errQuoteBeforeAttributeName(char16_t c
)
379 if (MOZ_LIKELY(mViewSource
)) {
380 mViewSource
->AddErrorToCurrentNode("errQuoteBeforeAttributeName");
385 nsHtml5Tokenizer::errExpectedPublicId()
387 if (MOZ_LIKELY(mViewSource
)) {
388 mViewSource
->AddErrorToCurrentNode("errExpectedPublicId");
393 nsHtml5Tokenizer::errBogusDoctype()
395 if (MOZ_UNLIKELY(mViewSource
)) {
396 mViewSource
->AddErrorToCurrentNode("errBogusDoctype");
401 nsHtml5Tokenizer::errNcrSurrogate()
403 if (MOZ_UNLIKELY(mViewSource
)) {
404 mViewSource
->AddErrorToCurrentNode("errNcrSurrogate");
409 nsHtml5Tokenizer::errNcrCr()
411 if (MOZ_UNLIKELY(mViewSource
)) {
412 mViewSource
->AddErrorToCurrentNode("errNcrCr");
417 nsHtml5Tokenizer::errNcrInC1Range()
419 if (MOZ_UNLIKELY(mViewSource
)) {
420 mViewSource
->AddErrorToCurrentNode("errNcrInC1Range");
425 nsHtml5Tokenizer::errEofInPublicId()
427 if (MOZ_UNLIKELY(mViewSource
)) {
428 mViewSource
->AddErrorToCurrentRun("errEofInPublicId");
433 nsHtml5Tokenizer::errEofInComment()
435 if (MOZ_UNLIKELY(mViewSource
)) {
436 mViewSource
->AddErrorToCurrentRun("errEofInComment");
441 nsHtml5Tokenizer::errEofInDoctype()
443 if (MOZ_UNLIKELY(mViewSource
)) {
444 mViewSource
->AddErrorToCurrentRun("errEofInDoctype");
449 nsHtml5Tokenizer::errEofInAttributeValue()
451 if (MOZ_UNLIKELY(mViewSource
)) {
452 mViewSource
->AddErrorToCurrentRun("errEofInAttributeValue");
457 nsHtml5Tokenizer::errEofInAttributeName()
459 if (MOZ_UNLIKELY(mViewSource
)) {
460 mViewSource
->AddErrorToCurrentRun("errEofInAttributeName");
465 nsHtml5Tokenizer::errEofWithoutGt()
467 if (MOZ_UNLIKELY(mViewSource
)) {
468 mViewSource
->AddErrorToCurrentRun("errEofWithoutGt");
473 nsHtml5Tokenizer::errEofInTagName()
475 if (MOZ_UNLIKELY(mViewSource
)) {
476 mViewSource
->AddErrorToCurrentRun("errEofInTagName");
481 nsHtml5Tokenizer::errEofInEndTag()
483 if (MOZ_UNLIKELY(mViewSource
)) {
484 mViewSource
->AddErrorToCurrentRun("errEofInEndTag");
489 nsHtml5Tokenizer::errEofAfterLt()
491 if (MOZ_UNLIKELY(mViewSource
)) {
492 mViewSource
->AddErrorToCurrentRun("errEofAfterLt");
497 nsHtml5Tokenizer::errNcrOutOfRange()
499 if (MOZ_UNLIKELY(mViewSource
)) {
500 mViewSource
->AddErrorToCurrentNode("errNcrOutOfRange");
505 nsHtml5Tokenizer::errNcrUnassigned()
507 if (MOZ_UNLIKELY(mViewSource
)) {
508 mViewSource
->AddErrorToCurrentNode("errNcrUnassigned");
513 nsHtml5Tokenizer::errDuplicateAttribute()
515 if (MOZ_UNLIKELY(mViewSource
)) {
516 mViewSource
->AddErrorToCurrentNode("errDuplicateAttribute");
521 nsHtml5Tokenizer::errEofInSystemId()
523 if (MOZ_UNLIKELY(mViewSource
)) {
524 mViewSource
->AddErrorToCurrentRun("errEofInSystemId");
529 nsHtml5Tokenizer::errExpectedSystemId()
531 if (MOZ_LIKELY(mViewSource
)) {
532 mViewSource
->AddErrorToCurrentNode("errExpectedSystemId");
537 nsHtml5Tokenizer::errMissingSpaceBeforeDoctypeName()
539 if (MOZ_LIKELY(mViewSource
)) {
540 mViewSource
->AddErrorToCurrentNode("errMissingSpaceBeforeDoctypeName");
545 nsHtml5Tokenizer::errHyphenHyphenBang()
547 if (MOZ_LIKELY(mViewSource
)) {
548 mViewSource
->AddErrorToCurrentNode("errHyphenHyphenBang");
553 nsHtml5Tokenizer::errNcrControlChar()
555 if (MOZ_UNLIKELY(mViewSource
)) {
556 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
561 nsHtml5Tokenizer::errNcrZero()
563 if (MOZ_UNLIKELY(mViewSource
)) {
564 mViewSource
->AddErrorToCurrentNode("errNcrZero");
569 nsHtml5Tokenizer::errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
571 if (MOZ_LIKELY(mViewSource
)) {
572 mViewSource
->AddErrorToCurrentNode(
573 "errNoSpaceBetweenDoctypeSystemKeywordAndQuote");
578 nsHtml5Tokenizer::errNoSpaceBetweenPublicAndSystemIds()
580 if (MOZ_LIKELY(mViewSource
)) {
581 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenPublicAndSystemIds");
586 nsHtml5Tokenizer::errNoSpaceBetweenDoctypePublicKeywordAndQuote()
588 if (MOZ_LIKELY(mViewSource
)) {
589 mViewSource
->AddErrorToCurrentNode(
590 "errNoSpaceBetweenDoctypePublicKeywordAndQuote");