1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=79: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "mozilla/DebugOnly.h"
9 #include "nsHtml5StreamParser.h"
10 #include "nsContentUtils.h"
11 #include "nsHtml5Tokenizer.h"
12 #include "nsIHttpChannel.h"
13 #include "nsHtml5Parser.h"
14 #include "nsHtml5TreeBuilder.h"
15 #include "nsHtml5AtomTable.h"
16 #include "nsHtml5Module.h"
17 #include "nsHtml5RefPtr.h"
18 #include "nsIScriptError.h"
19 #include "mozilla/Preferences.h"
20 #include "nsHtml5Highlighter.h"
21 #include "expat_config.h"
23 #include "nsINestedURI.h"
24 #include "nsCharsetSource.h"
25 #include "nsIWyciwygChannel.h"
26 #include "nsIThreadRetargetableRequest.h"
27 #include "nsPrintfCString.h"
28 #include "nsNetUtil.h"
29 #include "nsXULAppAPI.h"
31 #include "mozilla/dom/EncodingUtils.h"
33 using namespace mozilla
;
34 using mozilla::dom::EncodingUtils
;
36 int32_t nsHtml5StreamParser::sTimerInitialDelay
= 120;
37 int32_t nsHtml5StreamParser::sTimerSubsequentDelay
= 120;
41 nsHtml5StreamParser::InitializeStatics()
43 Preferences::AddIntVarCache(&sTimerInitialDelay
,
44 "html5.flushtimer.initialdelay");
45 Preferences::AddIntVarCache(&sTimerSubsequentDelay
,
46 "html5.flushtimer.subsequentdelay");
50 * Note that nsHtml5StreamParser implements cycle collecting AddRef and
51 * Release. Therefore, nsHtml5StreamParser must never be refcounted from
54 * To work around this limitation, runnables posted by the main thread to the
55 * parser thread hold their reference to the stream parser in an
56 * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds
57 * just like a regular nsRefPtr. This is OK, since the creation of the
58 * runnable and the nsHtml5RefPtr happens on the main thread.
60 * When the runnable is done on the parser thread, the destructor of
61 * nsHtml5RefPtr runs there. It doesn't call Release on the held object
62 * directly. Instead, it posts another runnable back to the main thread where
63 * that runnable calls Release on the wrapped object.
65 * When posting runnables in the other direction, the runnables have to be
66 * created on the main thread when nsHtml5StreamParser is instantiated and
67 * held for the lifetime of the nsHtml5StreamParser. This works, because the
68 * same runnabled can be dispatched multiple times and currently runnables
69 * posted from the parser thread to main thread don't need to wrap any
70 * runnable-specific data. (In the other direction, the runnables most notably
71 * wrap the byte data of the stream.)
73 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser
)
74 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser
)
76 NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser
)
77 NS_INTERFACE_TABLE(nsHtml5StreamParser
,
78 nsICharsetDetectionObserver
)
79 NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser
)
82 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser
)
84 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser
)
86 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver
)
87 NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest
)
88 NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner
)
89 tmp
->mExecutorFlusher
= nullptr;
90 tmp
->mLoadFlusher
= nullptr;
91 tmp
->mExecutor
= nullptr;
92 NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet
)
93 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
95 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser
)
96 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver
)
97 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest
)
98 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner
)
99 // hack: count the strongly owned edge wrapped in the runnable
100 if (tmp
->mExecutorFlusher
) {
101 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb
, "mExecutorFlusher->mExecutor");
102 cb
.NoteXPCOMChild(static_cast<nsIContentSink
*> (tmp
->mExecutor
));
104 // hack: count the strongly owned edge wrapped in the runnable
105 if (tmp
->mLoadFlusher
) {
106 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb
, "mLoadFlusher->mExecutor");
107 cb
.NoteXPCOMChild(static_cast<nsIContentSink
*> (tmp
->mExecutor
));
109 // hack: count self if held by mChardet
111 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb
, "mChardet->mObserver");
112 cb
.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver
*>(tmp
));
114 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
116 class nsHtml5ExecutorFlusher
: public nsRunnable
119 nsRefPtr
<nsHtml5TreeOpExecutor
> mExecutor
;
121 explicit nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor
* aExecutor
)
122 : mExecutor(aExecutor
)
126 if (!mExecutor
->isInList()) {
127 mExecutor
->RunFlushLoop();
133 class nsHtml5LoadFlusher
: public nsRunnable
136 nsRefPtr
<nsHtml5TreeOpExecutor
> mExecutor
;
138 explicit nsHtml5LoadFlusher(nsHtml5TreeOpExecutor
* aExecutor
)
139 : mExecutor(aExecutor
)
143 mExecutor
->FlushSpeculativeLoads();
148 nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor
* aExecutor
,
149 nsHtml5Parser
* aOwner
,
151 : mFirstBuffer(nullptr) // Will be filled when starting
152 , mLastBuffer(nullptr) // Will be filled when starting
153 , mExecutor(aExecutor
)
154 , mTreeBuilder(new nsHtml5TreeBuilder((aMode
== VIEW_SOURCE_HTML
||
155 aMode
== VIEW_SOURCE_XML
) ?
156 nullptr : mExecutor
->GetStage(),
158 mExecutor
->GetStage() : nullptr))
159 , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder
, aMode
== VIEW_SOURCE_XML
))
160 , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
162 , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
163 , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
164 , mThread(nsHtml5Module::GetStreamParserThread())
165 , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor
))
166 , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor
))
167 , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
170 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
171 mFlushTimer
->SetTarget(mThread
);
173 mAtomTable
.SetPermittedLookupThread(mThread
);
175 mTokenizer
->setInterner(&mAtomTable
);
176 mTokenizer
->setEncodingDeclarationHandler(this);
178 if (aMode
== VIEW_SOURCE_HTML
|| aMode
== VIEW_SOURCE_XML
) {
179 nsHtml5Highlighter
* highlighter
=
180 new nsHtml5Highlighter(mExecutor
->GetStage());
181 mTokenizer
->EnableViewSource(highlighter
); // takes ownership
182 mTreeBuilder
->EnableViewSource(highlighter
); // doesn't own
185 // Chardet instantiation adapted from File.
186 // Chardet is initialized here even if it turns out to be useless
187 // to make the chardet refcount its observer (nsHtml5StreamParser)
188 // on the main thread.
189 const nsAdoptingCString
& detectorName
=
190 Preferences::GetLocalizedCString("intl.charset.detector");
191 if (!detectorName
.IsEmpty()) {
192 nsAutoCString detectorContractID
;
193 detectorContractID
.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE
);
194 detectorContractID
+= detectorName
;
195 if ((mChardet
= do_CreateInstance(detectorContractID
.get()))) {
196 (void) mChardet
->Init(this);
201 // There's a zeroing operator new for everything else
204 nsHtml5StreamParser::~nsHtml5StreamParser()
206 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
208 NS_ASSERTION(!mFlushTimer
, "Flush timer was not dropped before dtor!");
212 mUnicodeDecoder
= nullptr;
213 mSniffingBuffer
= nullptr;
214 mMetaScanner
= nullptr;
215 mFirstBuffer
= nullptr;
217 mTreeBuilder
= nullptr;
218 mTokenizer
= nullptr;
224 nsHtml5StreamParser::GetChannel(nsIChannel
** aChannel
)
226 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
227 return mRequest
? CallQueryInterface(mRequest
, aChannel
) :
228 NS_ERROR_NOT_AVAILABLE
;
232 nsHtml5StreamParser::Notify(const char* aCharset
, nsDetectionConfident aConf
)
234 NS_ASSERTION(IsParserThread(), "Wrong thread!");
235 if (aConf
== eBestAnswer
|| aConf
== eSureAnswer
) {
236 mFeedChardet
= false; // just in case
237 nsAutoCString encoding
;
238 if (!EncodingUtils::FindEncodingForLabelNoReplacement(
239 nsDependentCString(aCharset
), encoding
)) {
243 if (mCharset
.Equals(encoding
)) {
244 NS_ASSERTION(mCharsetSource
< kCharsetFromAutoDetection
,
245 "Why are we running chardet at all?");
246 mCharsetSource
= kCharsetFromAutoDetection
;
247 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
249 // We've already committed to a decoder. Request a reload from the
251 mTreeBuilder
->NeedsCharsetSwitchTo(encoding
,
252 kCharsetFromAutoDetection
,
254 FlushTreeOpsAndDisarmTimer();
258 // Got a confident answer from the sniffing buffer. That code will
259 // take care of setting up the decoder.
260 mCharset
.Assign(encoding
);
261 mCharsetSource
= kCharsetFromAutoDetection
;
262 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
269 nsHtml5StreamParser::SetViewSourceTitle(nsIURI
* aURL
)
272 nsCOMPtr
<nsIURI
> temp
;
274 aURL
->SchemeIs("view-source", &isViewSource
);
276 nsCOMPtr
<nsINestedURI
> nested
= do_QueryInterface(aURL
);
277 nested
->GetInnerURI(getter_AddRefs(temp
));
282 temp
->SchemeIs("data", &isData
);
284 // Avoid showing potentially huge data: URLs. The three last bytes are
285 // UTF-8 for an ellipsis.
286 mViewSourceTitle
.AssignLiteral("data:\xE2\x80\xA6");
288 temp
->GetSpec(mViewSourceTitle
);
294 nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment
, // can be null
296 uint32_t* aWriteCount
)
298 NS_ASSERTION(IsParserThread(), "Wrong thread!");
300 mUnicodeDecoder
= EncodingUtils::DecoderForEncoding(mCharset
);
301 if (mSniffingBuffer
) {
303 rv
= WriteStreamBytes(mSniffingBuffer
, mSniffingLength
, &writeCount
);
304 NS_ENSURE_SUCCESS(rv
, rv
);
305 mSniffingBuffer
= nullptr;
307 mMetaScanner
= nullptr;
309 rv
= WriteStreamBytes(aFromSegment
, aCount
, aWriteCount
);
315 nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName
)
317 NS_ASSERTION(IsParserThread(), "Wrong thread!");
318 mCharset
.Assign(aDecoderCharsetName
);
319 mUnicodeDecoder
= EncodingUtils::DecoderForEncoding(mCharset
);
320 mCharsetSource
= kCharsetFromByteOrderMark
;
321 mFeedChardet
= false;
322 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
323 mSniffingBuffer
= nullptr;
324 mMetaScanner
= nullptr;
325 mBomState
= BOM_SNIFFING_OVER
;
330 nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment
,
331 uint32_t aCountToSniffingLimit
)
333 // Avoid underspecified heuristic craziness for XHR
334 if (mMode
== LOAD_AS_DATA
) {
337 // Make sure there's enough data. Require room for "<title></title>"
338 if (mSniffingLength
+ aCountToSniffingLimit
< 30) {
341 // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
342 bool byteZero
[2] = { false, false };
343 bool byteNonZero
[2] = { false, false };
345 if (mSniffingBuffer
) {
346 for (; i
< mSniffingLength
; ++i
) {
347 if (mSniffingBuffer
[i
]) {
348 if (byteNonZero
[1 - (i
% 2)]) {
351 byteNonZero
[i
% 2] = true;
353 if (byteZero
[1 - (i
% 2)]) {
356 byteZero
[i
% 2] = true;
361 for (uint32_t j
= 0; j
< aCountToSniffingLimit
; ++j
) {
362 if (aFromSegment
[j
]) {
363 if (byteNonZero
[1 - ((i
+ j
) % 2)]) {
366 byteNonZero
[(i
+ j
) % 2] = true;
368 if (byteZero
[1 - ((i
+ j
) % 2)]) {
371 byteZero
[(i
+ j
) % 2] = true;
376 if (byteNonZero
[0]) {
377 mCharset
.AssignLiteral("UTF-16LE");
379 mCharset
.AssignLiteral("UTF-16BE");
381 mCharsetSource
= kCharsetFromIrreversibleAutoDetection
;
382 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
383 mFeedChardet
= false;
384 mTreeBuilder
->MaybeComplainAboutCharset("EncBomlessUtf16",
391 nsHtml5StreamParser::SetEncodingFromExpat(const char16_t
* aEncoding
)
394 nsDependentString
utf16(aEncoding
);
396 CopyUTF16toUTF8(utf16
, utf8
);
397 if (PreferredForInternalEncodingDecl(utf8
)) {
398 mCharset
.Assign(utf8
);
399 mCharsetSource
= kCharsetFromMetaTag
; // closest for XML
402 // else the page declared an encoding Gecko doesn't support and we'd
403 // end up defaulting to UTF-8 anyway. Might as well fall through here
404 // right away and let the encoding be set to UTF-8 which we'd default to
407 mCharset
.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
408 mCharsetSource
= kCharsetFromMetaTag
; // means confident
411 // A separate user data struct is used instead of passing the
412 // nsHtml5StreamParser instance as user data in order to avoid including
413 // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
414 // Using a separate user data struct also avoids bloating nsHtml5StreamParser
418 nsHtml5StreamParser
* mStreamParser
;
421 // Using no-namespace handler callbacks to avoid including expat.h in
422 // nsHtml5StreamParser.h, since doing so would cause naming conclicts.
424 HandleXMLDeclaration(void* aUserData
,
425 const XML_Char
* aVersion
,
426 const XML_Char
* aEncoding
,
429 UserData
* ud
= static_cast<UserData
*>(aUserData
);
430 ud
->mStreamParser
->SetEncodingFromExpat(
431 reinterpret_cast<const char16_t
*>(aEncoding
));
432 XML_StopParser(ud
->mExpat
, false);
436 HandleStartElement(void* aUserData
,
437 const XML_Char
* aName
,
438 const XML_Char
**aAtts
)
440 UserData
* ud
= static_cast<UserData
*>(aUserData
);
441 XML_StopParser(ud
->mExpat
, false);
445 HandleEndElement(void* aUserData
,
446 const XML_Char
* aName
)
448 UserData
* ud
= static_cast<UserData
*>(aUserData
);
449 XML_StopParser(ud
->mExpat
, false);
453 HandleComment(void* aUserData
,
454 const XML_Char
* aName
)
456 UserData
* ud
= static_cast<UserData
*>(aUserData
);
457 XML_StopParser(ud
->mExpat
, false);
461 HandleProcessingInstruction(void* aUserData
,
462 const XML_Char
* aTarget
,
463 const XML_Char
* aData
)
465 UserData
* ud
= static_cast<UserData
*>(aUserData
);
466 XML_StopParser(ud
->mExpat
, false);
470 nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment
, // can be null
472 uint32_t* aWriteCount
,
473 uint32_t aCountToSniffingLimit
)
475 NS_ASSERTION(IsParserThread(), "Wrong thread!");
476 NS_ASSERTION(mCharsetSource
< kCharsetFromParentForced
,
477 "Should not finalize sniffing when using forced charset.");
478 if (mMode
== VIEW_SOURCE_XML
) {
479 static const XML_Memory_Handling_Suite memsuite
=
481 (void *(*)(size_t))moz_xmalloc
,
482 (void *(*)(void *, size_t))moz_xrealloc
,
486 static const char16_t kExpatSeparator
[] = { 0xFFFF, '\0' };
488 static const char16_t kISO88591
[] =
489 { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
492 ud
.mStreamParser
= this;
494 // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
495 // documents MUST begin with a BOM. We don't support EBCDIC and such.
496 // Thus, at this point, what we have is garbage or something encoded using
497 // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
498 // without throwing errors when bytes have the most significant bit set
499 // and without triggering expat's unknown encoding code paths. This is
500 // enough to be able to use expat to parse the XML declaration in order
501 // to extract the encoding name from it.
502 ud
.mExpat
= XML_ParserCreate_MM(kISO88591
, &memsuite
, kExpatSeparator
);
503 XML_SetXmlDeclHandler(ud
.mExpat
, HandleXMLDeclaration
);
504 XML_SetElementHandler(ud
.mExpat
, HandleStartElement
, HandleEndElement
);
505 XML_SetCommentHandler(ud
.mExpat
, HandleComment
);
506 XML_SetProcessingInstructionHandler(ud
.mExpat
, HandleProcessingInstruction
);
507 XML_SetUserData(ud
.mExpat
, static_cast<void*>(&ud
));
509 XML_Status status
= XML_STATUS_OK
;
511 // aFromSegment points to the data obtained from the current network
512 // event. mSniffingBuffer (if it exists) contains the data obtained before
513 // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
514 // followed by aCountToSniffingLimit bytes from aFromSegment are the
515 // first 1024 bytes of the file (or the file as a whole if the file is
516 // 1024 bytes long or shorter). Thus, we parse both buffers, but if the
517 // first call succeeds already, we skip parsing the second buffer.
518 if (mSniffingBuffer
) {
519 status
= XML_Parse(ud
.mExpat
,
520 reinterpret_cast<const char*>(mSniffingBuffer
.get()),
524 if (status
== XML_STATUS_OK
&&
525 mCharsetSource
< kCharsetFromMetaTag
&&
527 status
= XML_Parse(ud
.mExpat
,
528 reinterpret_cast<const char*>(aFromSegment
),
529 aCountToSniffingLimit
,
532 XML_ParserFree(ud
.mExpat
);
534 if (mCharsetSource
< kCharsetFromMetaTag
) {
535 // Failed to get an encoding from the XML declaration. XML defaults
536 // confidently to UTF-8 in this case.
537 // It is also possible that the document has an XML declaration that is
538 // longer than 1024 bytes, but that case is not worth worrying about.
539 mCharset
.AssignLiteral("UTF-8");
540 mCharsetSource
= kCharsetFromMetaTag
; // means confident
543 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
,
549 if (mCharsetSource
>= kCharsetFromHintPrevDoc
) {
550 mFeedChardet
= false;
551 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
, aCount
, aWriteCount
);
553 // Check for BOMless UTF-16 with Basic
554 // Latin content for compat with IE. See bug 631751.
555 SniffBOMlessUTF16BasicLatin(aFromSegment
, aCountToSniffingLimit
);
556 // the charset may have been set now
557 // maybe try chardet now;
561 if (mSniffingBuffer
) {
562 rv
= mChardet
->DoIt((const char*)mSniffingBuffer
.get(), mSniffingLength
, &dontFeed
);
563 mFeedChardet
= !dontFeed
;
564 NS_ENSURE_SUCCESS(rv
, rv
);
566 if (mFeedChardet
&& aFromSegment
) {
567 rv
= mChardet
->DoIt((const char*)aFromSegment
,
568 // Avoid buffer boundary-dependent behavior when
569 // reparsing is forbidden. If reparse is forbidden,
570 // act as if we only saw the first 1024 bytes.
571 // When reparsing isn't forbidden, buffer boundaries
572 // can have an effect on whether the page is loaded
573 // once or twice. :-(
574 mReparseForbidden
? aCountToSniffingLimit
: aCount
,
576 mFeedChardet
= !dontFeed
;
577 NS_ENSURE_SUCCESS(rv
, rv
);
579 if (mFeedChardet
&& (!aFromSegment
|| mReparseForbidden
)) {
580 // mReparseForbidden is checked so that we get to use the sniffing
581 // buffer with the best guess so far if we aren't allowed to guess
583 mFeedChardet
= false;
584 rv
= mChardet
->Done();
585 NS_ENSURE_SUCCESS(rv
, rv
);
587 // fall thru; callback may have changed charset
589 if (mCharsetSource
== kCharsetUninitialized
) {
590 // Hopefully this case is never needed, but dealing with it anyway
591 mCharset
.AssignLiteral("windows-1252");
592 mCharsetSource
= kCharsetFromFallback
;
593 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
594 } else if (mMode
== LOAD_AS_DATA
&&
595 mCharsetSource
== kCharsetFromFallback
) {
596 NS_ASSERTION(mReparseForbidden
, "Reparse should be forbidden for XHR");
597 NS_ASSERTION(!mFeedChardet
, "Should not feed chardet for XHR");
598 NS_ASSERTION(mCharset
.EqualsLiteral("UTF-8"),
599 "XHR should default to UTF-8");
600 // Now mark charset source as non-weak to signal that we have a decision
601 mCharsetSource
= kCharsetFromDocTypeDefault
;
602 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
604 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
, aCount
, aWriteCount
);
608 nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment
,
610 uint32_t* aWriteCount
)
612 NS_ASSERTION(IsParserThread(), "Wrong thread!");
616 // mCharset and mCharsetSource potentially have come from channel or higher
617 // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
618 // If we don't find a BOM, the previously set values of mCharset and
619 // mCharsetSource are not modified by the BOM sniffing here.
620 for (uint32_t i
= 0; i
< aCount
&& mBomState
!= BOM_SNIFFING_OVER
; i
++) {
622 case BOM_SNIFFING_NOT_STARTED
:
623 NS_ASSERTION(i
== 0, "Bad BOM sniffing state.");
624 switch (*aFromSegment
) {
626 mBomState
= SEEN_UTF_8_FIRST_BYTE
;
629 mBomState
= SEEN_UTF_16_LE_FIRST_BYTE
;
632 mBomState
= SEEN_UTF_16_BE_FIRST_BYTE
;
635 mBomState
= BOM_SNIFFING_OVER
;
639 case SEEN_UTF_16_LE_FIRST_BYTE
:
640 if (aFromSegment
[i
] == 0xFE) {
641 rv
= SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
642 NS_ENSURE_SUCCESS(rv
, rv
);
643 uint32_t count
= aCount
- (i
+ 1);
644 rv
= WriteStreamBytes(aFromSegment
+ (i
+ 1), count
, &writeCount
);
645 NS_ENSURE_SUCCESS(rv
, rv
);
646 *aWriteCount
= writeCount
+ (i
+ 1);
649 mBomState
= BOM_SNIFFING_OVER
;
651 case SEEN_UTF_16_BE_FIRST_BYTE
:
652 if (aFromSegment
[i
] == 0xFF) {
653 rv
= SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
654 NS_ENSURE_SUCCESS(rv
, rv
);
655 uint32_t count
= aCount
- (i
+ 1);
656 rv
= WriteStreamBytes(aFromSegment
+ (i
+ 1), count
, &writeCount
);
657 NS_ENSURE_SUCCESS(rv
, rv
);
658 *aWriteCount
= writeCount
+ (i
+ 1);
661 mBomState
= BOM_SNIFFING_OVER
;
663 case SEEN_UTF_8_FIRST_BYTE
:
664 if (aFromSegment
[i
] == 0xBB) {
665 mBomState
= SEEN_UTF_8_SECOND_BYTE
;
667 mBomState
= BOM_SNIFFING_OVER
;
670 case SEEN_UTF_8_SECOND_BYTE
:
671 if (aFromSegment
[i
] == 0xBF) {
672 rv
= SetupDecodingFromBom("UTF-8"); // upper case is the raw form
673 NS_ENSURE_SUCCESS(rv
, rv
);
674 uint32_t count
= aCount
- (i
+ 1);
675 rv
= WriteStreamBytes(aFromSegment
+ (i
+ 1), count
, &writeCount
);
676 NS_ENSURE_SUCCESS(rv
, rv
);
677 *aWriteCount
= writeCount
+ (i
+ 1);
680 mBomState
= BOM_SNIFFING_OVER
;
683 mBomState
= BOM_SNIFFING_OVER
;
687 // if we get here, there either was no BOM or the BOM sniffing isn't complete
690 MOZ_ASSERT(mCharsetSource
!= kCharsetFromByteOrderMark
,
691 "Should not come here if BOM was found.");
692 MOZ_ASSERT(mCharsetSource
!= kCharsetFromOtherComponent
,
693 "kCharsetFromOtherComponent is for XSLT.");
695 if (mBomState
== BOM_SNIFFING_OVER
&&
696 mCharsetSource
== kCharsetFromChannel
) {
697 // There was no BOM and the charset came from channel. mCharset
698 // still contains the charset from the channel as set by an
699 // earlier call to SetDocumentCharset(), since we didn't find a BOM and
700 // overwrite mCharset. (Note that if the user has overridden the charset,
701 // we don't come here but check <meta> for XSS-dangerous charsets first.)
702 mFeedChardet
= false;
703 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
704 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
,
705 aCount
, aWriteCount
);
708 if (!mMetaScanner
&& (mMode
== NORMAL
||
709 mMode
== VIEW_SOURCE_HTML
||
710 mMode
== LOAD_AS_DATA
)) {
711 mMetaScanner
= new nsHtml5MetaScanner();
714 if (mSniffingLength
+ aCount
>= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE
) {
715 // this is the last buffer
716 uint32_t countToSniffingLimit
=
717 NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE
- mSniffingLength
;
718 if (mMode
== NORMAL
|| mMode
== VIEW_SOURCE_HTML
|| mMode
== LOAD_AS_DATA
) {
719 nsHtml5ByteReadable
readable(aFromSegment
, aFromSegment
+
720 countToSniffingLimit
);
721 nsAutoCString encoding
;
722 mMetaScanner
->sniff(&readable
, encoding
);
723 if (!encoding
.IsEmpty()) {
724 // meta scan successful; honor overrides unless meta is XSS-dangerous
725 if ((mCharsetSource
== kCharsetFromParentForced
||
726 mCharsetSource
== kCharsetFromUserForced
) &&
727 EncodingUtils::IsAsciiCompatible(encoding
)) {
729 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
730 aFromSegment
, aCount
, aWriteCount
);
732 mCharset
.Assign(encoding
);
733 mCharsetSource
= kCharsetFromMetaPrescan
;
734 mFeedChardet
= false;
735 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
736 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
737 aFromSegment
, aCount
, aWriteCount
);
740 if (mCharsetSource
== kCharsetFromParentForced
||
741 mCharsetSource
== kCharsetFromUserForced
) {
742 // meta not found, honor override
743 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
744 aFromSegment
, aCount
, aWriteCount
);
746 return FinalizeSniffing(aFromSegment
, aCount
, aWriteCount
,
747 countToSniffingLimit
);
750 // not the last buffer
751 if (mMode
== NORMAL
|| mMode
== VIEW_SOURCE_HTML
|| mMode
== LOAD_AS_DATA
) {
752 nsHtml5ByteReadable
readable(aFromSegment
, aFromSegment
+ aCount
);
753 nsAutoCString encoding
;
754 mMetaScanner
->sniff(&readable
, encoding
);
755 if (!encoding
.IsEmpty()) {
756 // meta scan successful; honor overrides unless meta is XSS-dangerous
757 if ((mCharsetSource
== kCharsetFromParentForced
||
758 mCharsetSource
== kCharsetFromUserForced
) &&
759 EncodingUtils::IsAsciiCompatible(encoding
)) {
761 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
,
762 aCount
, aWriteCount
);
764 mCharset
.Assign(encoding
);
765 mCharsetSource
= kCharsetFromMetaPrescan
;
766 mFeedChardet
= false;
767 mTreeBuilder
->SetDocumentCharset(mCharset
, mCharsetSource
);
768 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment
,
769 aCount
, aWriteCount
);
773 if (!mSniffingBuffer
) {
774 const mozilla::fallible_t fallible
= mozilla::fallible_t();
775 mSniffingBuffer
= new (fallible
)
776 uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE
];
777 if (!mSniffingBuffer
) {
778 return NS_ERROR_OUT_OF_MEMORY
;
781 memcpy(mSniffingBuffer
+ mSniffingLength
, aFromSegment
, aCount
);
782 mSniffingLength
+= aCount
;
783 *aWriteCount
= aCount
;
788 nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment
,
790 uint32_t* aWriteCount
)
792 NS_ASSERTION(IsParserThread(), "Wrong thread!");
793 // mLastBuffer should always point to a buffer of the size
794 // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
796 NS_WARNING("mLastBuffer should not be null!");
797 MarkAsBroken(NS_ERROR_NULL_POINTER
);
798 return NS_ERROR_NULL_POINTER
;
800 if (mLastBuffer
->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
) {
801 nsRefPtr
<nsHtml5OwningUTF16Buffer
> newBuf
=
802 nsHtml5OwningUTF16Buffer::FalliblyCreate(
803 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
);
805 return NS_ERROR_OUT_OF_MEMORY
;
807 mLastBuffer
= (mLastBuffer
->next
= newBuf
.forget());
809 int32_t totalByteCount
= 0;
811 int32_t end
= mLastBuffer
->getEnd();
812 int32_t byteCount
= aCount
- totalByteCount
;
813 int32_t utf16Count
= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
- end
;
815 NS_ASSERTION(utf16Count
, "Trying to convert into a buffer with no free space!");
816 // byteCount may be zero to force the decoder to output a pending surrogate
819 nsresult convResult
= mUnicodeDecoder
->Convert((const char*)aFromSegment
, &byteCount
, mLastBuffer
->getBuffer() + end
, &utf16Count
);
820 MOZ_ASSERT(NS_SUCCEEDED(convResult
));
823 mLastBuffer
->setEnd(end
);
824 totalByteCount
+= byteCount
;
825 aFromSegment
+= byteCount
;
827 NS_ASSERTION(end
<= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
,
828 "The Unicode decoder wrote too much data.");
829 NS_ASSERTION(byteCount
>= -1, "The decoder consumed fewer than -1 bytes.");
831 if (convResult
== NS_PARTIAL_MORE_OUTPUT
) {
832 nsRefPtr
<nsHtml5OwningUTF16Buffer
> newBuf
=
833 nsHtml5OwningUTF16Buffer::FalliblyCreate(
834 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
);
836 return NS_ERROR_OUT_OF_MEMORY
;
838 mLastBuffer
= (mLastBuffer
->next
= newBuf
.forget());
839 // All input may have been consumed if there is a pending surrogate pair
840 // that doesn't fit in the output buffer. Loop back to push a zero-length
841 // input to the decoder in that case.
843 NS_ASSERTION(totalByteCount
== (int32_t)aCount
,
844 "The Unicode decoder consumed the wrong number of bytes.");
845 *aWriteCount
= (uint32_t)totalByteCount
;
852 nsHtml5StreamParser::OnStartRequest(nsIRequest
* aRequest
, nsISupports
* aContext
)
854 NS_PRECONDITION(STREAM_NOT_STARTED
== mStreamState
,
855 "Got OnStartRequest when the stream had already started.");
856 NS_PRECONDITION(!mExecutor
->HasStarted(),
857 "Got OnStartRequest at the wrong stage in the executor life cycle.");
858 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
860 mObserver
->OnStartRequest(aRequest
, aContext
);
864 mStreamState
= STREAM_BEING_READ
;
866 if (mMode
== VIEW_SOURCE_HTML
|| mMode
== VIEW_SOURCE_XML
) {
867 mTokenizer
->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle
));
870 // For View Source, the parser should run with scripts "enabled" if a normal
871 // load would have scripts enabled.
872 bool scriptingEnabled
= mMode
== LOAD_AS_DATA
?
873 false : mExecutor
->IsScriptEnabled();
874 mOwner
->StartTokenizer(scriptingEnabled
);
876 bool isSrcdoc
= false;
877 nsCOMPtr
<nsIChannel
> channel
;
878 nsresult rv
= GetChannel(getter_AddRefs(channel
));
879 if (NS_SUCCEEDED(rv
)) {
880 isSrcdoc
= NS_IsSrcdocChannel(channel
);
882 mTreeBuilder
->setIsSrcdocDocument(isSrcdoc
);
883 mTreeBuilder
->setScriptingEnabled(scriptingEnabled
);
884 mTreeBuilder
->SetPreventScriptExecution(!((mMode
== NORMAL
) &&
888 mExecutor
->StartReadingFromStage();
890 if (mMode
== PLAIN_TEXT
) {
891 mTreeBuilder
->StartPlainText();
892 mTokenizer
->StartPlainText();
893 } else if (mMode
== VIEW_SOURCE_PLAIN
) {
894 mTreeBuilder
->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle
));
895 mTokenizer
->StartPlainText();
899 * If you move the following line, be very careful not to cause
900 * WillBuildModel to be called before the document has had its
901 * script global object set.
903 rv
= mExecutor
->WillBuildModel(eDTDMode_unknown
);
904 NS_ENSURE_SUCCESS(rv
, rv
);
906 nsRefPtr
<nsHtml5OwningUTF16Buffer
> newBuf
=
907 nsHtml5OwningUTF16Buffer::FalliblyCreate(
908 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
);
910 // marks this stream parser as terminated,
911 // which prevents entry to code paths that
912 // would use mFirstBuffer or mLastBuffer.
913 return mExecutor
->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY
);
915 NS_ASSERTION(!mFirstBuffer
, "How come we have the first buffer set?");
916 NS_ASSERTION(!mLastBuffer
, "How come we have the last buffer set?");
917 mFirstBuffer
= mLastBuffer
= newBuf
;
921 // The line below means that the encoding can end up being wrong if
922 // a view-source URL is loaded without having the encoding hint from a
923 // previous normal load in the history.
924 mReparseForbidden
= !(mMode
== NORMAL
|| mMode
== PLAIN_TEXT
);
926 nsCOMPtr
<nsIHttpChannel
> httpChannel(do_QueryInterface(mRequest
, &rv
));
927 if (NS_SUCCEEDED(rv
)) {
928 nsAutoCString method
;
929 httpChannel
->GetRequestMethod(method
);
930 // XXX does Necko have a way to renavigate POST, etc. without hitting
932 if (!method
.EqualsLiteral("GET")) {
933 // This is the old Gecko behavior but the HTML5 spec disagrees.
934 // Don't reparse on POST.
935 mReparseForbidden
= true;
936 mFeedChardet
= false; // can't restart anyway
940 // Attempt to retarget delivery of data (via OnDataAvailable) to the parser
941 // thread, rather than through the main thread.
942 nsCOMPtr
<nsIThreadRetargetableRequest
> threadRetargetableRequest
=
943 do_QueryInterface(mRequest
, &rv
);
944 if (threadRetargetableRequest
) {
945 rv
= threadRetargetableRequest
->RetargetDeliveryTo(mThread
);
949 // for now skip warning if we're on child process, since we don't support
950 // off-main thread delivery there yet. This will change with bug 1015466
951 if (XRE_GetProcessType() != GeckoProcessType_Content
) {
952 NS_WARNING("Failed to retarget HTML data delivery to the parser thread.");
956 if (mCharsetSource
== kCharsetFromParentFrame
) {
957 // Remember this in case chardet overwrites mCharsetSource
958 mInitialEncodingWasFromParentFrame
= true;
961 if (mCharsetSource
>= kCharsetFromAutoDetection
) {
962 mFeedChardet
= false;
965 nsCOMPtr
<nsIWyciwygChannel
> wyciwygChannel(do_QueryInterface(mRequest
));
966 if (!wyciwygChannel
) {
967 // we aren't ready to commit to an encoding yet
968 // leave converter uninstantiated for now
972 // We are reloading a document.open()ed doc.
973 mReparseForbidden
= true;
974 mFeedChardet
= false;
976 // Instantiate the converter here to avoid BOM sniffing.
977 mUnicodeDecoder
= EncodingUtils::DecoderForEncoding(mCharset
);
982 nsHtml5StreamParser::CheckListenerChain()
984 NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
989 nsCOMPtr
<nsIThreadRetargetableStreamListener
> retargetable
=
990 do_QueryInterface(mObserver
, &rv
);
991 if (NS_SUCCEEDED(rv
) && retargetable
) {
992 rv
= retargetable
->CheckListenerChain();
998 nsHtml5StreamParser::DoStopRequest()
1000 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1001 NS_PRECONDITION(STREAM_BEING_READ
== mStreamState
,
1002 "Stream ended without being open.");
1003 mTokenizerMutex
.AssertCurrentThreadOwns();
1005 if (IsTerminated()) {
1009 mStreamState
= STREAM_ENDED
;
1011 if (!mUnicodeDecoder
) {
1012 uint32_t writeCount
;
1014 if (NS_FAILED(rv
= FinalizeSniffing(nullptr, 0, &writeCount
, 0))) {
1018 } else if (mFeedChardet
) {
1022 if (IsTerminatedOrInterrupted()) {
1026 ParseAvailableData();
1029 class nsHtml5RequestStopper
: public nsRunnable
1032 nsHtml5RefPtr
<nsHtml5StreamParser
> mStreamParser
;
1034 explicit nsHtml5RequestStopper(nsHtml5StreamParser
* aStreamParser
)
1035 : mStreamParser(aStreamParser
)
1039 mozilla::MutexAutoLock
autoLock(mStreamParser
->mTokenizerMutex
);
1040 mStreamParser
->DoStopRequest();
1046 nsHtml5StreamParser::OnStopRequest(nsIRequest
* aRequest
,
1047 nsISupports
* aContext
,
1050 NS_ASSERTION(mRequest
== aRequest
, "Got Stop on wrong stream.");
1051 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1053 mObserver
->OnStopRequest(aRequest
, aContext
, status
);
1055 nsCOMPtr
<nsIRunnable
> stopper
= new nsHtml5RequestStopper(this);
1056 if (NS_FAILED(mThread
->Dispatch(stopper
, nsIThread::DISPATCH_NORMAL
))) {
1057 NS_WARNING("Dispatching StopRequest event failed.");
1063 nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer
, uint32_t aLength
)
1065 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1066 NS_PRECONDITION(STREAM_BEING_READ
== mStreamState
,
1067 "DoDataAvailable called when stream not open.");
1068 mTokenizerMutex
.AssertCurrentThreadOwns();
1070 if (IsTerminated()) {
1074 uint32_t writeCount
;
1079 mChardet
->DoIt((const char*)aBuffer
, aLength
, &dontFeed
);
1080 mFeedChardet
= !dontFeed
;
1082 rv
= WriteStreamBytes(aBuffer
, aLength
, &writeCount
);
1084 rv
= SniffStreamBytes(aBuffer
, aLength
, &writeCount
);
1086 if (NS_FAILED(rv
)) {
1090 NS_ASSERTION(writeCount
== aLength
, "Wrong number of stream bytes written/sniffed.");
1092 if (IsTerminatedOrInterrupted()) {
1096 ParseAvailableData();
1098 if (mFlushTimerArmed
|| mSpeculating
) {
1102 mFlushTimer
->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback
,
1103 static_cast<void*> (this),
1104 mFlushTimerEverFired
?
1105 sTimerInitialDelay
:
1106 sTimerSubsequentDelay
,
1107 nsITimer::TYPE_ONE_SHOT
);
1108 mFlushTimerArmed
= true;
1111 class nsHtml5DataAvailable
: public nsRunnable
1114 nsHtml5RefPtr
<nsHtml5StreamParser
> mStreamParser
;
1115 nsAutoArrayPtr
<uint8_t> mData
;
1118 nsHtml5DataAvailable(nsHtml5StreamParser
* aStreamParser
,
1121 : mStreamParser(aStreamParser
)
1127 mozilla::MutexAutoLock
autoLock(mStreamParser
->mTokenizerMutex
);
1128 mStreamParser
->DoDataAvailable(mData
, mLength
);
1134 nsHtml5StreamParser::OnDataAvailable(nsIRequest
* aRequest
,
1135 nsISupports
* aContext
,
1136 nsIInputStream
* aInStream
,
1137 uint64_t aSourceOffset
,
1141 if (NS_FAILED(rv
= mExecutor
->IsBroken())) {
1145 NS_ASSERTION(mRequest
== aRequest
, "Got data on wrong stream.");
1147 // Main thread to parser thread dispatch requires copying to buffer first.
1148 if (NS_IsMainThread()) {
1149 const mozilla::fallible_t fallible
= mozilla::fallible_t();
1150 nsAutoArrayPtr
<uint8_t> data(new (fallible
) uint8_t[aLength
]);
1152 return mExecutor
->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY
);
1154 rv
= aInStream
->Read(reinterpret_cast<char*>(data
.get()),
1155 aLength
, &totalRead
);
1156 NS_ENSURE_SUCCESS(rv
, rv
);
1157 NS_ASSERTION(totalRead
<= aLength
, "Read more bytes than were available?");
1159 nsCOMPtr
<nsIRunnable
> dataAvailable
= new nsHtml5DataAvailable(this,
1162 if (NS_FAILED(mThread
->Dispatch(dataAvailable
, nsIThread::DISPATCH_NORMAL
))) {
1163 NS_WARNING("Dispatching DataAvailable event failed.");
1167 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1168 mozilla::MutexAutoLock
autoLock(mTokenizerMutex
);
1170 // Read directly from response buffer.
1171 rv
= aInStream
->ReadSegments(CopySegmentsToParser
, this, aLength
,
1173 if (NS_FAILED(rv
)) {
1174 NS_WARNING("Failed reading response data to parser");
1183 nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream
*aInStream
,
1185 const char *aFromSegment
,
1188 uint32_t *aWriteCount
)
1190 nsHtml5StreamParser
* parser
= static_cast<nsHtml5StreamParser
*>(aClosure
);
1192 parser
->DoDataAvailable((const uint8_t*)aFromSegment
, aCount
);
1193 // Assume DoDataAvailable consumed all available bytes.
1194 *aWriteCount
= aCount
;
1199 nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString
& aEncoding
)
1201 nsAutoCString newEncoding
;
1202 if (!EncodingUtils::FindEncodingForLabel(aEncoding
, newEncoding
)) {
1203 // the encoding name is bogus
1204 mTreeBuilder
->MaybeComplainAboutCharset("EncMetaUnsupported",
1206 mTokenizer
->getLineNumber());
1210 if (newEncoding
.EqualsLiteral("UTF-16BE") ||
1211 newEncoding
.EqualsLiteral("UTF-16LE")) {
1212 mTreeBuilder
->MaybeComplainAboutCharset("EncMetaUtf16",
1214 mTokenizer
->getLineNumber());
1215 newEncoding
.AssignLiteral("UTF-8");
1218 if (newEncoding
.EqualsLiteral("x-user-defined")) {
1219 // WebKit/Blink hack for Indian and Armenian legacy sites
1220 mTreeBuilder
->MaybeComplainAboutCharset("EncMetaUserDefined",
1222 mTokenizer
->getLineNumber());
1223 newEncoding
.AssignLiteral("windows-1252");
1226 if (newEncoding
.Equals(mCharset
)) {
1227 if (mCharsetSource
< kCharsetFromMetaPrescan
) {
1228 if (mInitialEncodingWasFromParentFrame
) {
1229 mTreeBuilder
->MaybeComplainAboutCharset("EncLateMetaFrame",
1231 mTokenizer
->getLineNumber());
1233 mTreeBuilder
->MaybeComplainAboutCharset("EncLateMeta",
1235 mTokenizer
->getLineNumber());
1238 mCharsetSource
= kCharsetFromMetaTag
; // become confident
1239 mFeedChardet
= false; // don't feed chardet when confident
1243 aEncoding
.Assign(newEncoding
);
1248 nsHtml5StreamParser::internalEncodingDeclaration(nsString
* aEncoding
)
1250 // This code needs to stay in sync with
1251 // nsHtml5MetaScanner::tryCharset. Unfortunately, the
1252 // trickery with member fields there leads to some copy-paste reuse. :-(
1253 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1254 if (mCharsetSource
>= kCharsetFromMetaTag
) { // this threshold corresponds to "confident" in the HTML5 spec
1258 nsAutoCString newEncoding
;
1259 CopyUTF16toUTF8(*aEncoding
, newEncoding
);
1261 if (!PreferredForInternalEncodingDecl(newEncoding
)) {
1265 if (mReparseForbidden
) {
1266 // This mReparseForbidden check happens after the call to
1267 // PreferredForInternalEncodingDecl so that if that method calls
1268 // MaybeComplainAboutCharset, its charset complaint wins over the one
1270 mTreeBuilder
->MaybeComplainAboutCharset("EncLateMetaTooLate",
1272 mTokenizer
->getLineNumber());
1273 return false; // not reparsing even if we wanted to
1276 // Avoid having the chardet ask for another restart after this restart
1278 mFeedChardet
= false;
1279 mTreeBuilder
->NeedsCharsetSwitchTo(newEncoding
,
1280 kCharsetFromMetaTag
,
1281 mTokenizer
->getLineNumber());
1282 FlushTreeOpsAndDisarmTimer();
1284 // the tree op executor will cause the stream parser to terminate
1285 // if the charset switch request is accepted or it'll uninterrupt
1286 // if the request failed. Note that if the restart request fails,
1287 // we don't bother trying to make chardet resume. Might as well
1288 // assume that chardet-requested restarts would fail, too.
1293 nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer()
1295 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1296 if (mFlushTimerArmed
) {
1297 // avoid calling Cancel if the flush timer isn't armed to avoid acquiring
1299 mFlushTimer
->Cancel();
1300 mFlushTimerArmed
= false;
1302 if (mMode
== VIEW_SOURCE_HTML
|| mMode
== VIEW_SOURCE_XML
) {
1303 mTokenizer
->FlushViewSource();
1305 mTreeBuilder
->Flush();
1306 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher
))) {
1307 NS_WARNING("failed to dispatch executor flush event");
1312 nsHtml5StreamParser::ParseAvailableData()
1314 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1315 mTokenizerMutex
.AssertCurrentThreadOwns();
1317 if (IsTerminatedOrInterrupted()) {
1322 if (!mFirstBuffer
->hasMore()) {
1323 if (mFirstBuffer
== mLastBuffer
) {
1324 switch (mStreamState
) {
1325 case STREAM_BEING_READ
:
1326 // never release the last buffer.
1327 if (!mSpeculating
) {
1328 // reuse buffer space if not speculating
1329 mFirstBuffer
->setStart(0);
1330 mFirstBuffer
->setEnd(0);
1332 mTreeBuilder
->FlushLoads();
1333 // Dispatch this runnable unconditionally, because the loads
1334 // that need flushing may have been flushed earlier even if the
1335 // flush right above here did nothing.
1336 if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher
))) {
1337 NS_WARNING("failed to dispatch load flush event");
1339 return; // no more data for now but expecting more
1345 if (mCharsetSource
< kCharsetFromMetaTag
) {
1346 if (mInitialEncodingWasFromParentFrame
) {
1347 // Unfortunately, this check doesn't take effect for
1348 // cross-origin frames, so cross-origin ad frames that have
1349 // no text and only an image or a Flash embed get the more
1350 // severe message from the next if block. The message is
1351 // technically accurate, though.
1352 mTreeBuilder
->MaybeComplainAboutCharset("EncNoDeclarationFrame",
1355 } else if (mMode
== NORMAL
) {
1356 mTreeBuilder
->MaybeComplainAboutCharset("EncNoDeclaration",
1359 } else if (mMode
== PLAIN_TEXT
) {
1360 mTreeBuilder
->MaybeComplainAboutCharset("EncNoDeclarationPlain",
1366 mTreeBuilder
->StreamEnded();
1367 if (mMode
== VIEW_SOURCE_HTML
|| mMode
== VIEW_SOURCE_XML
) {
1368 mTokenizer
->EndViewSource();
1370 FlushTreeOpsAndDisarmTimer();
1371 return; // no more data and not expecting more
1373 NS_NOTREACHED("It should be impossible to reach this.");
1377 mFirstBuffer
= mFirstBuffer
->next
;
1381 // now we have a non-empty buffer
1382 mFirstBuffer
->adjust(mLastWasCR
);
1384 if (mFirstBuffer
->hasMore()) {
1385 mLastWasCR
= mTokenizer
->tokenizeBuffer(mFirstBuffer
);
1386 // At this point, internalEncodingDeclaration() may have called
1387 // Terminate, but that never happens together with script.
1388 // Can't assert that here, though, because it's possible that the main
1389 // thread has called Terminate() while this thread was parsing.
1390 if (mTreeBuilder
->HasScript()) {
1391 // HasScript() cannot return true if the tree builder is preventing
1392 // script execution.
1393 MOZ_ASSERT(mMode
== NORMAL
);
1394 mozilla::MutexAutoLock
speculationAutoLock(mSpeculationMutex
);
1395 nsHtml5Speculation
* speculation
=
1396 new nsHtml5Speculation(mFirstBuffer
,
1397 mFirstBuffer
->getStart(),
1398 mTokenizer
->getLineNumber(),
1399 mTreeBuilder
->newSnapshot());
1400 mTreeBuilder
->AddSnapshotToScript(speculation
->GetSnapshot(),
1401 speculation
->GetStartLineNumber());
1402 FlushTreeOpsAndDisarmTimer();
1403 mTreeBuilder
->SetOpSink(speculation
);
1404 mSpeculations
.AppendElement(speculation
); // adopts the pointer
1405 mSpeculating
= true;
1407 if (IsTerminatedOrInterrupted()) {
1415 class nsHtml5StreamParserContinuation
: public nsRunnable
1418 nsHtml5RefPtr
<nsHtml5StreamParser
> mStreamParser
;
1420 explicit nsHtml5StreamParserContinuation(nsHtml5StreamParser
* aStreamParser
)
1421 : mStreamParser(aStreamParser
)
1425 mozilla::MutexAutoLock
autoLock(mStreamParser
->mTokenizerMutex
);
1426 mStreamParser
->Uninterrupt();
1427 mStreamParser
->ParseAvailableData();
1433 nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer
* aTokenizer
,
1434 nsHtml5TreeBuilder
* aTreeBuilder
,
1437 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1438 NS_ASSERTION(!(mMode
== VIEW_SOURCE_HTML
|| mMode
== VIEW_SOURCE_XML
),
1439 "ContinueAfterScripts called in view source mode!");
1440 if (NS_FAILED(mExecutor
->IsBroken())) {
1444 mExecutor
->AssertStageEmpty();
1446 bool speculationFailed
= false;
1448 mozilla::MutexAutoLock
speculationAutoLock(mSpeculationMutex
);
1449 if (mSpeculations
.IsEmpty()) {
1450 NS_NOTREACHED("ContinueAfterScripts called without speculations.");
1453 nsHtml5Speculation
* speculation
= mSpeculations
.ElementAt(0);
1455 !aTokenizer
->isInDataState() ||
1456 !aTreeBuilder
->snapshotMatches(speculation
->GetSnapshot())) {
1457 speculationFailed
= true;
1458 // We've got a failed speculation :-(
1459 Interrupt(); // Make the parser thread release the tokenizer mutex sooner
1460 // now fall out of the speculationAutoLock into the tokenizerAutoLock block
1462 // We've got a successful speculation!
1463 if (mSpeculations
.Length() > 1) {
1464 // the first speculation isn't the current speculation, so there's
1465 // no need to bother the parser thread.
1466 speculation
->FlushToSink(mExecutor
);
1467 NS_ASSERTION(!mExecutor
->IsScriptExecuting(),
1468 "ParseUntilBlocked() was supposed to ensure we don't come "
1469 "here when scripts are executing.");
1470 NS_ASSERTION(mExecutor
->IsInFlushLoop(), "How are we here if "
1471 "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
1472 "only caller of this method?");
1473 mSpeculations
.RemoveElementAt(0);
1477 Interrupt(); // Make the parser thread release the tokenizer mutex sooner
1480 // the first speculation is the current speculation. Need to
1481 // release the the speculation mutex and acquire the tokenizer
1482 // mutex. (Just acquiring the other mutex here would deadlock)
1486 mozilla::MutexAutoLock
tokenizerAutoLock(mTokenizerMutex
);
1489 nsCOMPtr
<nsIThread
> mainThread
;
1490 NS_GetMainThread(getter_AddRefs(mainThread
));
1491 mAtomTable
.SetPermittedLookupThread(mainThread
);
1494 // In principle, the speculation mutex should be acquired here,
1495 // but there's no point, because the parser thread only acquires it
1496 // when it has also acquired the tokenizer mutex and we are already
1497 // holding the tokenizer mutex.
1498 if (speculationFailed
) {
1499 // Rewind the stream
1501 nsHtml5Speculation
* speculation
= mSpeculations
.ElementAt(0);
1502 mFirstBuffer
= speculation
->GetBuffer();
1503 mFirstBuffer
->setStart(speculation
->GetStart());
1504 mTokenizer
->setLineNumber(speculation
->GetStartLineNumber());
1506 nsContentUtils::ReportToConsole(nsIScriptError::warningFlag
,
1507 NS_LITERAL_CSTRING("DOM Events"),
1508 mExecutor
->GetDocument(),
1509 nsContentUtils::eDOM_PROPERTIES
,
1510 "SpeculationFailed",
1514 speculation
->GetStartLineNumber());
1516 nsHtml5OwningUTF16Buffer
* buffer
= mFirstBuffer
->next
;
1518 buffer
->setStart(0);
1519 buffer
= buffer
->next
;
1522 mSpeculations
.Clear(); // potentially a huge number of destructors
1523 // run here synchronously on the main thread...
1525 mTreeBuilder
->flushCharacters(); // empty the pending buffer
1526 mTreeBuilder
->ClearOps(); // now get rid of the failed ops
1528 mTreeBuilder
->SetOpSink(mExecutor
->GetStage());
1529 mExecutor
->StartReadingFromStage();
1530 mSpeculating
= false;
1533 mLastWasCR
= aLastWasCR
;
1534 mTokenizer
->loadState(aTokenizer
);
1535 mTreeBuilder
->loadState(aTreeBuilder
, &mAtomTable
);
1537 // We've got a successful speculation and at least a moment ago it was
1538 // the current speculation
1539 mSpeculations
.ElementAt(0)->FlushToSink(mExecutor
);
1540 NS_ASSERTION(!mExecutor
->IsScriptExecuting(),
1541 "ParseUntilBlocked() was supposed to ensure we don't come "
1542 "here when scripts are executing.");
1543 NS_ASSERTION(mExecutor
->IsInFlushLoop(), "How are we here if "
1544 "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
1545 "only caller of this method?");
1546 mSpeculations
.RemoveElementAt(0);
1547 if (mSpeculations
.IsEmpty()) {
1548 // yes, it was still the only speculation. Now stop speculating
1549 // However, before telling the executor to read from stage, flush
1550 // any pending ops straight to the executor, because otherwise
1551 // they remain unflushed until we get more data from the network.
1552 mTreeBuilder
->SetOpSink(mExecutor
);
1553 mTreeBuilder
->Flush(true);
1554 mTreeBuilder
->SetOpSink(mExecutor
->GetStage());
1555 mExecutor
->StartReadingFromStage();
1556 mSpeculating
= false;
1559 nsCOMPtr
<nsIRunnable
> event
= new nsHtml5StreamParserContinuation(this);
1560 if (NS_FAILED(mThread
->Dispatch(event
, nsIThread::DISPATCH_NORMAL
))) {
1561 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
1563 // A stream event might run before this event runs, but that's harmless.
1565 mAtomTable
.SetPermittedLookupThread(mThread
);
1571 nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch()
1573 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1574 nsCOMPtr
<nsIRunnable
> event
= new nsHtml5StreamParserContinuation(this);
1575 if (NS_FAILED(mThread
->Dispatch(event
, nsIThread::DISPATCH_NORMAL
))) {
1576 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
1580 class nsHtml5TimerKungFu
: public nsRunnable
1583 nsHtml5RefPtr
<nsHtml5StreamParser
> mStreamParser
;
1585 explicit nsHtml5TimerKungFu(nsHtml5StreamParser
* aStreamParser
)
1586 : mStreamParser(aStreamParser
)
1590 if (mStreamParser
->mFlushTimer
) {
1591 mStreamParser
->mFlushTimer
->Cancel();
1592 mStreamParser
->mFlushTimer
= nullptr;
1599 nsHtml5StreamParser::DropTimer()
1601 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1603 * Simply nulling out the timer wouldn't work, because if the timer is
1604 * armed, it needs to be canceled first. Simply canceling it first wouldn't
1605 * work, because nsTimerImpl::Cancel is not safe for calling from outside
1606 * the thread where nsTimerImpl::Fire would run. It's not safe to
1607 * dispatch a runnable to cancel the timer from the destructor of this
1608 * class, because the timer has a weak (void*) pointer back to this instance
1609 * of the stream parser and having the timer fire before the runnable
1610 * cancels it would make the timer access a deleted object.
1612 * This DropTimer method addresses these issues. This method must be called
1613 * on the main thread before the destructor of this class is reached.
1614 * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this
1615 * stream parser object to keep it alive until the runnable is done.
1616 * The runnable cancels the timer on the parser thread, drops the timer
1617 * and lets nsHtml5RefPtr send a runnable back to the main thread to
1618 * release the stream parser.
1621 nsCOMPtr
<nsIRunnable
> event
= new nsHtml5TimerKungFu(this);
1622 if (NS_FAILED(mThread
->Dispatch(event
, nsIThread::DISPATCH_NORMAL
))) {
1623 NS_WARNING("Failed to dispatch TimerKungFu event");
1628 // Using a static, because the method name Notify is taken by the chardet
1631 nsHtml5StreamParser::TimerCallback(nsITimer
* aTimer
, void* aClosure
)
1633 (static_cast<nsHtml5StreamParser
*> (aClosure
))->TimerFlush();
1637 nsHtml5StreamParser::TimerFlush()
1639 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1640 mozilla::MutexAutoLock
autoLock(mTokenizerMutex
);
1642 NS_ASSERTION(!mSpeculating
, "Flush timer fired while speculating.");
1644 // The timer fired if we got here. No need to cancel it. Mark it as
1645 // not armed, though.
1646 mFlushTimerArmed
= false;
1648 mFlushTimerEverFired
= true;
1650 if (IsTerminatedOrInterrupted()) {
1654 if (mMode
== VIEW_SOURCE_HTML
|| mMode
== VIEW_SOURCE_XML
) {
1655 mTreeBuilder
->Flush(); // delete useless ops
1656 if (mTokenizer
->FlushViewSource()) {
1657 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher
))) {
1658 NS_WARNING("failed to dispatch executor flush event");
1662 // we aren't speculating and we don't know when new data is
1663 // going to arrive. Send data to the main thread.
1664 if (mTreeBuilder
->Flush(true)) {
1665 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher
))) {
1666 NS_WARNING("failed to dispatch executor flush event");
1673 nsHtml5StreamParser::MarkAsBroken(nsresult aRv
)
1675 NS_ASSERTION(IsParserThread(), "Wrong thread!");
1676 mTokenizerMutex
.AssertCurrentThreadOwns();
1679 mTreeBuilder
->MarkAsBroken(aRv
);
1680 mozilla::DebugOnly
<bool> hadOps
= mTreeBuilder
->Flush(false);
1681 NS_ASSERTION(hadOps
, "Should have had the markAsBroken op!");
1682 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher
))) {
1683 NS_WARNING("failed to dispatch executor flush event");