1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "mozilla/CheckedInt.h"
6 #include "mozilla/Likely.h"
8 // INT32_MAX is (2^31)-1. Therefore, the highest power-of-two that fits
9 // is 2^30. Note that this is counting char16_t units. The underlying
10 // bytes will be twice that, but they fit even in 32-bit size_t even
11 // if a contiguous chunk of memory of that size is pretty unlikely to
12 // be available on a 32-bit system.
13 #define MAX_POWER_OF_TWO_IN_INT32 0x40000000
15 bool nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength
) {
16 MOZ_RELEASE_ASSERT(aLength
>= 0, "Negative length.");
17 if (aLength
> MAX_POWER_OF_TWO_IN_INT32
) {
18 // Can't happen when loading from network.
21 mozilla::CheckedInt
<int32_t> worstCase(strBufLen
);
23 worstCase
+= charRefBufLen
;
24 // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
25 // Adding to the general worst case instead of only the
26 // TreeBuilder-exposed worst case to avoid re-introducing a bug when
27 // unifying the tokenizer and tree builder buffers in the future.
29 if (!worstCase
.isValid()) {
32 if (worstCase
.value() > MAX_POWER_OF_TWO_IN_INT32
) {
35 // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
36 // so that the call below becomes unnecessary.
37 if (!tokenHandler
->EnsureBufferSpace(worstCase
.value())) {
41 if (worstCase
.value() < MAX_POWER_OF_TWO_IN_INT32
) {
42 // Add one to round to the next power of two to avoid immediate
43 // reallocation once there are a few characters in the buffer.
46 strBuf
= jArray
<char16_t
, int32_t>::newFallibleJArray(
47 mozilla::RoundUpPow2(worstCase
.value()));
51 } else if (worstCase
.value() > strBuf
.length
) {
52 jArray
<char16_t
, int32_t> newBuf
=
53 jArray
<char16_t
, int32_t>::newFallibleJArray(
54 mozilla::RoundUpPow2(worstCase
.value()));
58 memcpy(newBuf
, strBuf
, sizeof(char16_t
) * size_t(strBufLen
));
64 void nsHtml5Tokenizer::StartPlainText() {
65 stateSave
= nsHtml5Tokenizer::PLAINTEXT
;
68 void nsHtml5Tokenizer::EnableViewSource(nsHtml5Highlighter
* aHighlighter
) {
69 mViewSource
= WrapUnique(aHighlighter
);
72 bool nsHtml5Tokenizer::FlushViewSource() { return mViewSource
->FlushOps(); }
74 void nsHtml5Tokenizer::StartViewSource(const nsAutoString
& aTitle
) {
75 mViewSource
->Start(aTitle
);
78 void nsHtml5Tokenizer::EndViewSource() { mViewSource
->End(); }
80 void nsHtml5Tokenizer::errWarnLtSlashInRcdata() {}
82 // The null checks below annotated MOZ_LIKELY are not actually necessary.
84 void nsHtml5Tokenizer::errUnquotedAttributeValOrNull(char16_t c
) {
85 if (MOZ_LIKELY(mViewSource
)) {
88 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeLt");
91 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeGrave");
95 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeQuote");
98 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeEquals");
104 void nsHtml5Tokenizer::errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
106 if (MOZ_LIKELY(mViewSource
)) {
109 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartEquals");
112 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartLt");
115 mViewSource
->AddErrorToCurrentNode("errUnquotedAttributeStartGrave");
121 void nsHtml5Tokenizer::errBadCharBeforeAttributeNameOrNull(char16_t c
) {
122 if (MOZ_LIKELY(mViewSource
)) {
124 mViewSource
->AddErrorToCurrentNode("errBadCharBeforeAttributeNameLt");
125 } else if (c
== '=') {
126 errEqualsSignBeforeAttributeName();
127 } else if (c
!= 0xFFFD) {
128 errQuoteBeforeAttributeName(c
);
133 void nsHtml5Tokenizer::errBadCharAfterLt(char16_t c
) {
134 if (MOZ_LIKELY(mViewSource
)) {
135 mViewSource
->AddErrorToCurrentNode("errBadCharAfterLt");
139 void nsHtml5Tokenizer::errQuoteOrLtInAttributeNameOrNull(char16_t c
) {
140 if (MOZ_LIKELY(mViewSource
)) {
142 mViewSource
->AddErrorToCurrentNode("errLtInAttributeName");
143 } else if (c
!= 0xFFFD) {
144 mViewSource
->AddErrorToCurrentNode("errQuoteInAttributeName");
149 void nsHtml5Tokenizer::maybeErrAttributesOnEndTag(
150 nsHtml5HtmlAttributes
* attrs
) {
151 if (mViewSource
&& attrs
->getLength() != 0) {
153 * When an end tag token is emitted with attributes, that is a parse
156 mViewSource
->AddErrorToCurrentRun("maybeErrAttributesOnEndTag");
160 void nsHtml5Tokenizer::maybeErrSlashInEndTag(bool selfClosing
) {
161 if (mViewSource
&& selfClosing
&& endTag
) {
162 mViewSource
->AddErrorToCurrentSlash("maybeErrSlashInEndTag");
166 char16_t
nsHtml5Tokenizer::errNcrNonCharacter(char16_t ch
) {
167 if (MOZ_UNLIKELY(mViewSource
)) {
168 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
173 void nsHtml5Tokenizer::errAstralNonCharacter(int32_t ch
) {
174 if (MOZ_UNLIKELY(mViewSource
)) {
175 mViewSource
->AddErrorToCurrentNode("errNcrNonCharacter");
179 char16_t
nsHtml5Tokenizer::errNcrControlChar(char16_t ch
) {
180 if (MOZ_UNLIKELY(mViewSource
)) {
181 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
186 void nsHtml5Tokenizer::errGarbageAfterLtSlash() {
187 if (MOZ_LIKELY(mViewSource
)) {
188 mViewSource
->AddErrorToCurrentNode("errGarbageAfterLtSlash");
192 void nsHtml5Tokenizer::errLtSlashGt() {
193 if (MOZ_LIKELY(mViewSource
)) {
194 mViewSource
->AddErrorToCurrentNode("errLtSlashGt");
198 void nsHtml5Tokenizer::errCharRefLacksSemicolon() {
199 if (MOZ_UNLIKELY(mViewSource
)) {
200 mViewSource
->AddErrorToCurrentNode("errCharRefLacksSemicolon");
204 void nsHtml5Tokenizer::errNoDigitsInNCR() {
205 if (MOZ_UNLIKELY(mViewSource
)) {
206 mViewSource
->AddErrorToCurrentNode("errNoDigitsInNCR");
210 void nsHtml5Tokenizer::errGtInSystemId() {
211 if (MOZ_LIKELY(mViewSource
)) {
212 mViewSource
->AddErrorToCurrentNode("errGtInSystemId");
216 void nsHtml5Tokenizer::errGtInPublicId() {
217 if (MOZ_LIKELY(mViewSource
)) {
218 mViewSource
->AddErrorToCurrentNode("errGtInPublicId");
222 void nsHtml5Tokenizer::errNamelessDoctype() {
223 if (MOZ_LIKELY(mViewSource
)) {
224 mViewSource
->AddErrorToCurrentNode("errNamelessDoctype");
228 void nsHtml5Tokenizer::errConsecutiveHyphens() {
229 if (MOZ_UNLIKELY(mViewSource
)) {
230 mViewSource
->AddErrorToCurrentNode("errConsecutiveHyphens");
234 void nsHtml5Tokenizer::errPrematureEndOfComment() {
235 if (MOZ_LIKELY(mViewSource
)) {
236 mViewSource
->AddErrorToCurrentNode("errPrematureEndOfComment");
240 void nsHtml5Tokenizer::errBogusComment() {
241 if (MOZ_UNLIKELY(mViewSource
)) {
242 mViewSource
->AddErrorToCurrentNode("errBogusComment");
246 void nsHtml5Tokenizer::errSlashNotFollowedByGt() {
247 if (MOZ_LIKELY(mViewSource
)) {
248 mViewSource
->AddErrorToCurrentSlash("errSlashNotFollowedByGt");
252 void nsHtml5Tokenizer::errNoSpaceBetweenAttributes() {
253 if (MOZ_LIKELY(mViewSource
)) {
254 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenAttributes");
258 void nsHtml5Tokenizer::errAttributeValueMissing() {
259 if (MOZ_LIKELY(mViewSource
)) {
260 mViewSource
->AddErrorToCurrentNode("errAttributeValueMissing");
264 void nsHtml5Tokenizer::errEqualsSignBeforeAttributeName() {
265 if (MOZ_LIKELY(mViewSource
)) {
266 mViewSource
->AddErrorToCurrentNode("errEqualsSignBeforeAttributeName");
270 void nsHtml5Tokenizer::errLtGt() {
271 if (MOZ_LIKELY(mViewSource
)) {
272 mViewSource
->AddErrorToCurrentNode("errLtGt");
276 void nsHtml5Tokenizer::errProcessingInstruction() {
277 if (MOZ_LIKELY(mViewSource
)) {
278 mViewSource
->AddErrorToCurrentNode("errProcessingInstruction");
282 void nsHtml5Tokenizer::errUnescapedAmpersandInterpretedAsCharacterReference() {
283 if (MOZ_UNLIKELY(mViewSource
)) {
284 mViewSource
->AddErrorToCurrentAmpersand(
285 "errUnescapedAmpersandInterpretedAsCharacterReference");
289 void nsHtml5Tokenizer::errNotSemicolonTerminated() {
290 if (MOZ_UNLIKELY(mViewSource
)) {
291 mViewSource
->AddErrorToCurrentNode("errNotSemicolonTerminated");
295 void nsHtml5Tokenizer::errNoNamedCharacterMatch() {
296 if (MOZ_UNLIKELY(mViewSource
)) {
297 mViewSource
->AddErrorToCurrentAmpersand("errNoNamedCharacterMatch");
301 void nsHtml5Tokenizer::errQuoteBeforeAttributeName(char16_t c
) {
302 if (MOZ_LIKELY(mViewSource
)) {
303 mViewSource
->AddErrorToCurrentNode("errQuoteBeforeAttributeName");
307 void nsHtml5Tokenizer::errExpectedPublicId() {
308 if (MOZ_LIKELY(mViewSource
)) {
309 mViewSource
->AddErrorToCurrentNode("errExpectedPublicId");
313 void nsHtml5Tokenizer::errBogusDoctype() {
314 if (MOZ_UNLIKELY(mViewSource
)) {
315 mViewSource
->AddErrorToCurrentNode("errBogusDoctype");
319 void nsHtml5Tokenizer::errNcrSurrogate() {
320 if (MOZ_UNLIKELY(mViewSource
)) {
321 mViewSource
->AddErrorToCurrentNode("errNcrSurrogate");
325 void nsHtml5Tokenizer::errNcrCr() {
326 if (MOZ_UNLIKELY(mViewSource
)) {
327 mViewSource
->AddErrorToCurrentNode("errNcrCr");
331 void nsHtml5Tokenizer::errNcrInC1Range() {
332 if (MOZ_UNLIKELY(mViewSource
)) {
333 mViewSource
->AddErrorToCurrentNode("errNcrInC1Range");
337 void nsHtml5Tokenizer::errEofInPublicId() {
338 if (MOZ_UNLIKELY(mViewSource
)) {
339 mViewSource
->AddErrorToCurrentRun("errEofInPublicId");
343 void nsHtml5Tokenizer::errEofInComment() {
344 if (MOZ_UNLIKELY(mViewSource
)) {
345 mViewSource
->AddErrorToCurrentRun("errEofInComment");
349 void nsHtml5Tokenizer::errEofInDoctype() {
350 if (MOZ_UNLIKELY(mViewSource
)) {
351 mViewSource
->AddErrorToCurrentRun("errEofInDoctype");
355 void nsHtml5Tokenizer::errEofInAttributeValue() {
356 if (MOZ_UNLIKELY(mViewSource
)) {
357 mViewSource
->AddErrorToCurrentRun("errEofInAttributeValue");
361 void nsHtml5Tokenizer::errEofInAttributeName() {
362 if (MOZ_UNLIKELY(mViewSource
)) {
363 mViewSource
->AddErrorToCurrentRun("errEofInAttributeName");
367 void nsHtml5Tokenizer::errEofWithoutGt() {
368 if (MOZ_UNLIKELY(mViewSource
)) {
369 mViewSource
->AddErrorToCurrentRun("errEofWithoutGt");
373 void nsHtml5Tokenizer::errEofInTagName() {
374 if (MOZ_UNLIKELY(mViewSource
)) {
375 mViewSource
->AddErrorToCurrentRun("errEofInTagName");
379 void nsHtml5Tokenizer::errEofInEndTag() {
380 if (MOZ_UNLIKELY(mViewSource
)) {
381 mViewSource
->AddErrorToCurrentRun("errEofInEndTag");
385 void nsHtml5Tokenizer::errEofAfterLt() {
386 if (MOZ_UNLIKELY(mViewSource
)) {
387 mViewSource
->AddErrorToCurrentRun("errEofAfterLt");
391 void nsHtml5Tokenizer::errNcrOutOfRange() {
392 if (MOZ_UNLIKELY(mViewSource
)) {
393 mViewSource
->AddErrorToCurrentNode("errNcrOutOfRange");
397 void nsHtml5Tokenizer::errNcrUnassigned() {
398 if (MOZ_UNLIKELY(mViewSource
)) {
399 mViewSource
->AddErrorToCurrentNode("errNcrUnassigned");
403 void nsHtml5Tokenizer::errDuplicateAttribute() {
404 if (MOZ_UNLIKELY(mViewSource
)) {
405 mViewSource
->AddErrorToCurrentNode("errDuplicateAttribute");
409 void nsHtml5Tokenizer::errEofInSystemId() {
410 if (MOZ_UNLIKELY(mViewSource
)) {
411 mViewSource
->AddErrorToCurrentRun("errEofInSystemId");
415 void nsHtml5Tokenizer::errExpectedSystemId() {
416 if (MOZ_LIKELY(mViewSource
)) {
417 mViewSource
->AddErrorToCurrentNode("errExpectedSystemId");
421 void nsHtml5Tokenizer::errMissingSpaceBeforeDoctypeName() {
422 if (MOZ_LIKELY(mViewSource
)) {
423 mViewSource
->AddErrorToCurrentNode("errMissingSpaceBeforeDoctypeName");
427 void nsHtml5Tokenizer::errHyphenHyphenBang() {
428 if (MOZ_LIKELY(mViewSource
)) {
429 mViewSource
->AddErrorToCurrentNode("errHyphenHyphenBang");
433 void nsHtml5Tokenizer::errNcrControlChar() {
434 if (MOZ_UNLIKELY(mViewSource
)) {
435 mViewSource
->AddErrorToCurrentNode("errNcrControlChar");
439 void nsHtml5Tokenizer::errNcrZero() {
440 if (MOZ_UNLIKELY(mViewSource
)) {
441 mViewSource
->AddErrorToCurrentNode("errNcrZero");
445 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypeSystemKeywordAndQuote() {
446 if (MOZ_LIKELY(mViewSource
)) {
447 mViewSource
->AddErrorToCurrentNode(
448 "errNoSpaceBetweenDoctypeSystemKeywordAndQuote");
452 void nsHtml5Tokenizer::errNoSpaceBetweenPublicAndSystemIds() {
453 if (MOZ_LIKELY(mViewSource
)) {
454 mViewSource
->AddErrorToCurrentNode("errNoSpaceBetweenPublicAndSystemIds");
458 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypePublicKeywordAndQuote() {
459 if (MOZ_LIKELY(mViewSource
)) {
460 mViewSource
->AddErrorToCurrentNode(
461 "errNoSpaceBetweenDoctypePublicKeywordAndQuote");