Bug 1855360 - Fix the skip-if syntax. a=bustage-fix
[gecko.git] / parser / htmlparser / nsParser.cpp
blob04d02e2084a8516a95d29c78a616c01a4e5b7af2
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsAtom.h"
8 #include "nsParser.h"
9 #include "nsString.h"
10 #include "nsCRT.h"
11 #include "nsScanner.h"
12 #include "plstr.h"
13 #include "nsIChannel.h"
14 #include "nsIInputStream.h"
15 #include "CNavDTD.h"
16 #include "prenv.h"
17 #include "prlock.h"
18 #include "prcvar.h"
19 #include "nsReadableUtils.h"
20 #include "nsCOMPtr.h"
21 #include "nsExpatDriver.h"
22 #include "nsIFragmentContentSink.h"
23 #include "nsStreamUtils.h"
24 #include "nsXPCOMCIDInternal.h"
25 #include "nsMimeTypes.h"
26 #include "nsCharsetSource.h"
27 #include "nsThreadUtils.h"
28 #include "nsIHTMLContentSink.h"
30 #include "mozilla/BinarySearch.h"
31 #include "mozilla/CondVar.h"
32 #include "mozilla/dom/ScriptLoader.h"
33 #include "mozilla/Encoding.h"
34 #include "mozilla/Mutex.h"
36 using namespace mozilla;
38 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001
39 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002
41 //-------------- Begin ParseContinue Event Definition ------------------------
43 The parser can be explicitly interrupted by passing a return value of
44 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
45 the parser to stop processing and allow the application to return to the event
46 loop. The data which was left at the time of interruption will be processed
47 the next time OnDataAvailable is called. If the parser has received its final
48 chunk of data then OnDataAvailable will no longer be called by the networking
49 module, so the parser will schedule a nsParserContinueEvent which will call
50 the parser to process the remaining data after returning to the event loop.
51 If the parser is interrupted while processing the remaining data it will
52 schedule another ParseContinueEvent. The processing of data followed by
53 scheduling of the continue events will proceed until either:
55 1) All of the remaining data can be processed without interrupting
56 2) The parser has been cancelled.
59 This capability is currently used in CNavDTD and nsHTMLContentSink. The
60 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
61 processed and when each token is processed. The nsHTML content sink records
62 the time when the chunk has started processing and will return
63 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
64 threshold called max tokenizing processing time. This allows the content sink
65 to limit how much data is processed in a single chunk which in turn gates how
66 much time is spent away from the event loop. Processing smaller chunks of data
67 also reduces the time spent in subsequent reflows.
69 This capability is most apparent when loading large documents. If the maximum
70 token processing time is set small enough the application will remain
71 responsive during document load.
73 A side-effect of this capability is that document load is not complete when
74 the last chunk of data is passed to OnDataAvailable since the parser may have
75 been interrupted when the last chunk of data arrived. The document is complete
76 when all of the document has been tokenized and there aren't any pending
77 nsParserContinueEvents. This can cause problems if the application assumes
78 that it can monitor the load requests to determine when the document load has
79 been completed. This is what happens in Mozilla. The document is considered
80 completely loaded when all of the load requests have been satisfied. To delay
81 the document load until all of the parsing has been completed the
82 nsHTMLContentSink adds a dummy parser load request which is not removed until
83 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
84 DidBuildModel until the final chunk of data has been passed to the parser
85 through the OnDataAvailable and there aren't any pending
86 nsParserContineEvents.
88 Currently the parser is ignores requests to be interrupted during the
89 processing of script. This is because a document.write followed by JavaScript
90 calls to manipulate the DOM may fail if the parser was interrupted during the
91 document.write.
93 For more details @see bugzilla bug 76722
96 class nsParserContinueEvent : public Runnable {
97 public:
98 RefPtr<nsParser> mParser;
100 explicit nsParserContinueEvent(nsParser* aParser)
101 : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
103 NS_IMETHOD Run() override {
104 mParser->HandleParserContinueEvent(this);
105 return NS_OK;
109 //-------------- End ParseContinue Event Definition ------------------------
112 * default constructor
114 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); }
116 nsParser::~nsParser() { Cleanup(); }
118 void nsParser::Initialize() {
119 mContinueEvent = nullptr;
120 mCharsetSource = kCharsetUninitialized;
121 mCharset = WINDOWS_1252_ENCODING;
122 mInternalState = NS_OK;
123 mStreamStatus = NS_OK;
124 mCommand = eViewNormal;
125 mBlocked = 0;
126 mFlags = NS_PARSER_FLAG_CAN_TOKENIZE;
128 mProcessingNetworkData = false;
129 mIsAboutBlank = false;
132 void nsParser::Cleanup() {
133 // It should not be possible for this flag to be set when we are getting
134 // destroyed since this flag implies a pending nsParserContinueEvent, which
135 // has an owning reference to |this|.
136 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
139 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
141 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
142 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
143 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
144 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
145 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
147 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
148 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
149 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
150 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
152 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
153 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
154 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
155 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
156 NS_INTERFACE_MAP_ENTRY(nsIParser)
157 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
158 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
159 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
160 NS_INTERFACE_MAP_END
162 // The parser continue event is posted only if
163 // all of the data to parse has been passed to ::OnDataAvailable
164 // and the parser has been interrupted by the content sink
165 // because the processing of tokens took too long.
167 nsresult nsParser::PostContinueEvent() {
168 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
169 // If this flag isn't set, then there shouldn't be a live continue event!
170 NS_ASSERTION(!mContinueEvent, "bad");
172 // This creates a reference cycle between this and the event that is
173 // broken when the event fires.
174 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
175 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
176 NS_WARNING("failed to dispatch parser continuation event");
177 } else {
178 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
179 mContinueEvent = event;
182 return NS_OK;
185 NS_IMETHODIMP_(void)
186 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
189 * Call this method once you've created a parser, and want to instruct it
190 * about the command which caused the parser to be constructed. For example,
191 * this allows us to select a DTD which can do, say, view-source.
193 * @param aCommand the command string to set
195 NS_IMETHODIMP_(void)
196 nsParser::SetCommand(const char* aCommand) {
197 mCommandStr.Assign(aCommand);
198 if (mCommandStr.EqualsLiteral("view-source")) {
199 mCommand = eViewSource;
200 } else if (mCommandStr.EqualsLiteral("view-fragment")) {
201 mCommand = eViewFragment;
202 } else {
203 mCommand = eViewNormal;
208 * Call this method once you've created a parser, and want to instruct it
209 * about the command which caused the parser to be constructed. For example,
210 * this allows us to select a DTD which can do, say, view-source.
212 * @param aParserCommand the command to set
214 NS_IMETHODIMP_(void)
215 nsParser::SetCommand(eParserCommands aParserCommand) {
216 mCommand = aParserCommand;
220 * Call this method once you've created a parser, and want to instruct it
221 * about what charset to load
223 * @param aCharset- the charset of a document
224 * @param aCharsetSource- the source of the charset
226 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
227 int32_t aCharsetSource,
228 bool aForceAutoDetection) {
229 mCharset = aCharset;
230 mCharsetSource = aCharsetSource;
231 if (mParserContext) {
232 mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
236 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
237 if (mSink) {
238 mSink->SetDocumentCharset(aCharset);
243 * This method gets called in order to set the content
244 * sink for this parser to dump nodes to.
246 * @param nsIContentSink interface for node receiver
248 NS_IMETHODIMP_(void)
249 nsParser::SetContentSink(nsIContentSink* aSink) {
250 MOZ_ASSERT(aSink, "sink cannot be null!");
251 mSink = aSink;
253 if (mSink) {
254 mSink->SetParser(this);
255 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
256 if (htmlSink) {
257 mIsAboutBlank = true;
263 * retrieve the sink set into the parser
264 * @return current sink
266 NS_IMETHODIMP_(nsIContentSink*)
267 nsParser::GetContentSink() { return mSink; }
269 ////////////////////////////////////////////////////////////////////////
272 * This gets called just prior to the model actually
273 * being constructed. It's important to make this the
274 * last thing that happens right before parsing, so we
275 * can delay until the last moment the resolution of
276 * which DTD to use (unless of course we're assigned one).
278 nsresult nsParser::WillBuildModel() {
279 if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
281 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
282 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
283 // to avoid introducing unintentional changes to behavior.
284 return mInternalState;
287 if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
289 if (eDTDMode_autodetect == mParserContext->mDTDMode) {
290 if (mIsAboutBlank) {
291 mParserContext->mDTDMode = eDTDMode_quirks;
292 mParserContext->mDocType = eHTML_Quirks;
293 } else {
294 mParserContext->mDTDMode = eDTDMode_full_standards;
295 mParserContext->mDocType = eXML;
297 } // else XML fragment with nested parser context
299 // We always find a DTD.
300 mParserContext->mAutoDetectStatus = ePrimaryDetect;
302 // Quick check for view source.
303 MOZ_ASSERT(mParserContext->mParserCommand != eViewSource,
304 "The old parser is not supposed to be used for View Source "
305 "anymore.");
307 // Now see if we're parsing XML or HTML (which, as far as we're concerned,
308 // simply means "not XML").
309 if (mParserContext->mDocType == eXML) {
310 RefPtr<nsExpatDriver> expat = new nsExpatDriver();
311 nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink);
312 NS_ENSURE_SUCCESS(rv, rv);
314 mDTD = expat.forget();
315 } else {
316 mDTD = new CNavDTD();
319 return mSink->WillBuildModel(mParserContext->mDTDMode);
323 * This gets called when the parser is done with its input.
325 void nsParser::DidBuildModel() {
326 if (IsComplete() && mParserContext) {
327 // Let sink know if we're about to end load because we've been terminated.
328 // In that case we don't want it to run deferred scripts.
329 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
330 if (mDTD && mSink) {
331 mDTD->DidBuildModel();
332 mSink->DidBuildModel(terminated);
335 // Ref. to bug 61462.
336 mParserContext->mRequest = nullptr;
341 * Call this when you want to *force* the parser to terminate the
342 * parsing process altogether. This is binary -- so once you terminate
343 * you can't resume without restarting altogether.
345 NS_IMETHODIMP
346 nsParser::Terminate(void) {
347 // We should only call DidBuildModel once, so don't do anything if this is
348 // the second time that Terminate has been called.
349 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
350 return NS_OK;
353 nsresult result = NS_OK;
354 // XXX - [ until we figure out a way to break parser-sink circularity ]
355 // Hack - Hold a reference until we are completely done...
356 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
357 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
359 // @see bug 108049
360 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so
361 // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete()
362 // call inside of DidBuildModel looks at the pendingContinueEvents flag.
363 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
364 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
365 // Revoke the pending continue parsing event
366 mContinueEvent = nullptr;
367 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
370 if (mDTD) {
371 mDTD->Terminate();
372 DidBuildModel();
373 } else if (mSink) {
374 // We have no parser context or no DTD yet (so we got terminated before we
375 // got any data). Manually break the reference cycle with the sink.
376 result = mSink->DidBuildModel(true);
377 NS_ENSURE_SUCCESS(result, result);
380 return NS_OK;
383 NS_IMETHODIMP
384 nsParser::ContinueInterruptedParsing() {
385 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
386 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
387 // to avoid introducing unintentional changes to behavior.
388 return mInternalState;
391 // If there are scripts executing, then the content sink is jumping the gun
392 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
393 // later, see bug 460706.
394 if (!IsOkToProcessNetworkData()) {
395 return NS_OK;
398 // If the stream has already finished, there's a good chance
399 // that we might start closing things down when the parser
400 // is reenabled. To make sure that we're not deleted across
401 // the reenabling process, hold a reference to ourselves.
402 nsresult result = NS_OK;
403 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
404 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
406 #ifdef DEBUG
407 if (mBlocked) {
408 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
410 #endif
412 bool isFinalChunk =
413 mParserContext && mParserContext->mStreamListenerState == eOnStop;
415 mProcessingNetworkData = true;
416 if (sinkDeathGrip) {
417 sinkDeathGrip->WillParse();
419 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
420 mProcessingNetworkData = false;
422 if (result != NS_OK) {
423 result = mInternalState;
426 return result;
430 * Stops parsing temporarily. That is, it will prevent the
431 * parser from building up content model while scripts
432 * are being loaded (either an external script from a web
433 * page, or any number of extension content scripts).
435 NS_IMETHODIMP_(void)
436 nsParser::BlockParser() { mBlocked++; }
439 * Open up the parser for tokenization, building up content
440 * model..etc. However, this method does not resume parsing
441 * automatically. It's the callers' responsibility to restart
442 * the parsing engine.
444 NS_IMETHODIMP_(void)
445 nsParser::UnblockParser() {
446 MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
447 if (MOZ_LIKELY(mBlocked > 0)) {
448 mBlocked--;
452 NS_IMETHODIMP_(void)
453 nsParser::ContinueInterruptedParsingAsync() {
454 MOZ_ASSERT(mSink);
455 if (MOZ_LIKELY(mSink)) {
456 mSink->ContinueInterruptedParsingAsync();
461 * Call this to query whether the parser is enabled or not.
463 NS_IMETHODIMP_(bool)
464 nsParser::IsParserEnabled() { return !mBlocked; }
467 * Call this to query whether the parser thinks it's done with parsing.
469 NS_IMETHODIMP_(bool)
470 nsParser::IsComplete() {
471 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
474 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
475 // Ignore any revoked continue events...
476 if (mContinueEvent != ev) return;
478 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
479 mContinueEvent = nullptr;
481 NS_ASSERTION(IsOkToProcessNetworkData(),
482 "Interrupted in the middle of a script?");
483 ContinueInterruptedParsing();
486 bool nsParser::IsInsertionPointDefined() { return false; }
488 void nsParser::IncrementScriptNestingLevel() {}
490 void nsParser::DecrementScriptNestingLevel() {}
492 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
494 bool nsParser::IsScriptCreated() { return false; }
497 * This is the main controlling routine in the parsing process.
498 * Note that it may get called multiple times for the same scanner,
499 * since this is a pushed based system, and all the tokens may
500 * not have been consumed by the scanner during a given invocation
501 * of this method.
503 NS_IMETHODIMP
504 nsParser::Parse(nsIURI* aURL) {
505 MOZ_ASSERT(aURL, "Error: Null URL given");
507 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
508 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
509 // to avoid introducing unintentional changes to behavior.
510 return mInternalState;
513 if (!aURL) {
514 return NS_ERROR_HTMLPARSER_BADURL;
517 MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");
519 mParserContext = MakeUnique<CParserContext>(aURL, mCommand);
521 return NS_OK;
525 * Used by XML fragment parsing below.
527 * @param aSourceBuffer contains a string-full of real content
529 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
530 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
531 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
532 // to avoid introducing unintentional changes to behavior.
533 return mInternalState;
536 // Don't bother if we're never going to parse this.
537 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
538 return NS_OK;
541 if (!aLastCall && aSourceBuffer.IsEmpty()) {
542 // Nothing is being passed to the parser so return
543 // immediately. mUnusedInput will get processed when
544 // some data is actually passed in.
545 // But if this is the last call, make sure to finish up
546 // stuff correctly.
547 return NS_OK;
550 // Maintain a reference to ourselves so we don't go away
551 // till we're completely done.
552 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
554 if (!mParserContext) {
555 // Only make a new context if we don't have one.
556 mParserContext =
557 MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);
559 mUnusedInput.Truncate();
560 } else if (aLastCall) {
561 // Set stream listener state to eOnStop, on the final context - Fix
562 // 68160, to guarantee DidBuildModel() call - Fix 36148
563 mParserContext->mStreamListenerState = eOnStop;
564 mParserContext->mScanner.SetIncremental(false);
567 mParserContext->mScanner.Append(aSourceBuffer);
568 return ResumeParse(false, false, false);
571 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer,
572 nsTArray<nsString>& aTagStack) {
573 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
574 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
575 // to avoid introducing unintentional changes to behavior.
576 return mInternalState;
579 nsresult result = NS_OK;
580 nsAutoString theContext;
581 uint32_t theCount = aTagStack.Length();
582 uint32_t theIndex = 0;
584 for (theIndex = 0; theIndex < theCount; theIndex++) {
585 theContext.Append('<');
586 theContext.Append(aTagStack[theCount - theIndex - 1]);
587 theContext.Append('>');
590 if (theCount == 0) {
591 // Ensure that the buffer is not empty. Because none of the DTDs care
592 // about leading whitespace, this doesn't change the result.
593 theContext.Assign(' ');
596 // First, parse the context to build up the DTD's tag stack. Note that we
597 // pass false for the aLastCall parameter.
598 result = Parse(theContext, false);
599 if (NS_FAILED(result)) {
600 return result;
603 if (!mSink) {
604 // Parse must have failed in the XML case and so the sink was killed.
605 return NS_ERROR_HTMLPARSER_STOPPARSING;
608 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
609 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
611 fragSink->WillBuildContent();
612 // Now, parse the actual content. Note that this is the last call
613 // for HTML content, but for XML, we will want to build and parse
614 // the end tags. However, if tagStack is empty, it's the last call
615 // for XML as well.
616 if (theCount == 0) {
617 result = Parse(aSourceBuffer, true);
618 fragSink->DidBuildContent();
619 } else {
620 // Add an end tag chunk, so expat will read the whole source buffer,
621 // and not worry about ']]' etc.
622 result = Parse(aSourceBuffer + u"</"_ns, false);
623 fragSink->DidBuildContent();
625 if (NS_SUCCEEDED(result)) {
626 nsAutoString endContext;
627 for (theIndex = 0; theIndex < theCount; theIndex++) {
628 // we already added an end tag chunk above
629 if (theIndex > 0) {
630 endContext.AppendLiteral("</");
633 nsString& thisTag = aTagStack[theIndex];
634 // was there an xmlns=?
635 int32_t endOfTag = thisTag.FindChar(char16_t(' '));
636 if (endOfTag == -1) {
637 endContext.Append(thisTag);
638 } else {
639 endContext.Append(Substring(thisTag, 0, endOfTag));
642 endContext.Append('>');
645 result = Parse(endContext, true);
649 mParserContext.reset();
651 return result;
655 * This routine is called to cause the parser to continue parsing its
656 * underlying stream. This call allows the parse process to happen in
657 * chunks, such as when the content is push based, and we need to parse in
658 * pieces.
660 * An interesting change in how the parser gets used has led us to add extra
661 * processing to this method. The case occurs when the parser is blocked in
662 * one context, and gets a parse(string) call in another context. In this
663 * case, the parserContexts are linked. No problem.
665 * The problem is that Parse(string) assumes that it can proceed unabated,
666 * but if the parser is already blocked that assumption is false. So we
667 * needed to add a mechanism here to allow the parser to continue to process
668 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
669 * out of contexts.
672 * @param allowItertion : set to true if non-script resumption is requested
673 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
674 * @return error code -- 0 if ok, non-zero if error.
676 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
677 bool aCanInterrupt) {
678 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
679 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
680 // to avoid introducing unintentional changes to behavior.
681 return mInternalState;
684 nsresult result = NS_OK;
686 if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
687 result = WillBuildModel();
688 if (NS_FAILED(result)) {
689 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
690 return result;
693 if (mDTD) {
694 mSink->WillResume();
695 bool theIterationIsOk = true;
697 while (result == NS_OK && theIterationIsOk) {
698 if (!mUnusedInput.IsEmpty()) {
699 // -- Ref: Bug# 22485 --
700 // Insert the unused input into the source buffer
701 // as if it was read from the input stream.
702 // Adding UngetReadable() per vidur!!
703 mParserContext->mScanner.UngetReadable(mUnusedInput);
704 mUnusedInput.Truncate(0);
707 // Only allow parsing to be interrupted in the subsequent call to
708 // build model.
709 nsresult theTokenizerResult;
710 if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) {
711 mParserContext->mScanner.Mark();
712 if (mParserContext->mDocType == eXML &&
713 mParserContext->mParserCommand != eViewSource) {
714 nsExpatDriver* expat = static_cast<nsExpatDriver*>(mDTD.get());
715 theTokenizerResult =
716 expat->ResumeParse(mParserContext->mScanner, aIsFinalChunk);
717 if (NS_FAILED(theTokenizerResult)) {
718 mParserContext->mScanner.RewindToMark();
719 if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) {
720 theTokenizerResult = Terminate();
721 mSink = nullptr;
724 } else {
725 // Nothing to do for non-XML. Note that this should only be
726 // about:blank at this point, we're also checking for view-source
727 // above, but that shouldn't end up here anymore.
728 theTokenizerResult = NS_ERROR_HTMLPARSER_EOF;
730 } else {
731 theTokenizerResult = NS_OK;
734 result = mDTD->BuildModel(mSink);
735 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
736 PostContinueEvent();
739 theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
740 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
742 // Make sure not to stop parsing too early. Therefore, before shutting
743 // down the parser, it's important to check whether the input buffer
744 // has been scanned to completion (theTokenizerResult should be kEOF).
745 // kEOF -> End of buffer.
747 // If we're told the parser has been blocked, we disable all further
748 // parsing (and cache any data coming in) until the parser is
749 // re-enabled.
750 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
751 mSink->WillInterrupt();
752 return NS_OK;
754 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
755 // Note: Parser Terminate() calls DidBuildModel.
756 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
757 DidBuildModel();
758 mInternalState = result;
761 return NS_OK;
763 if (((NS_OK == result &&
764 theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
765 result == NS_ERROR_HTMLPARSER_INTERRUPTED) &&
766 mParserContext->mStreamListenerState == eOnStop) {
767 DidBuildModel();
768 return NS_OK;
771 if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
772 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
773 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
774 mSink->WillInterrupt();
777 } else {
778 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
782 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
785 /*******************************************************************
786 These methods are used to talk to the netlib system...
787 *******************************************************************/
789 nsresult nsParser::OnStartRequest(nsIRequest* request) {
790 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
791 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
792 // to avoid introducing unintentional changes to behavior.
793 return mInternalState;
796 MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
797 "Parser's nsIStreamListener API was not setup "
798 "correctly in constructor.");
800 mParserContext->mStreamListenerState = eOnStart;
801 mParserContext->mAutoDetectStatus = eUnknownDetect;
802 mParserContext->mRequest = request;
804 mDTD = nullptr;
806 nsresult rv;
807 nsAutoCString contentType;
808 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
809 if (channel) {
810 rv = channel->GetContentType(contentType);
811 if (NS_SUCCEEDED(rv)) {
812 mParserContext->SetMimeType(contentType);
816 rv = NS_OK;
818 return rv;
821 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
822 int32_t aLen,
823 nsCString& oCharset) {
824 // This code is rather pointless to have. Might as well reuse expat as
825 // seen in nsHtml5StreamParser. -- hsivonen
826 oCharset.Truncate();
827 if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
828 ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
829 int32_t i;
830 bool versionFound = false, encodingFound = false;
831 for (i = 6; i < aLen && !encodingFound; ++i) {
832 // end of XML declaration?
833 if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
834 (((char*)aBytes)[i + 1] == '>')) {
835 break;
837 // Version is required.
838 if (!versionFound) {
839 // Want to avoid string comparisons, hence looking for 'n'
840 // and only if found check the string leading to it. Not
841 // foolproof, but fast.
842 // The shortest string allowed before this is (strlen==13):
843 // <?xml version
844 if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
845 (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) {
846 // Fast forward through version
847 char q = 0;
848 for (++i; i < aLen; ++i) {
849 char qi = ((char*)aBytes)[i];
850 if (qi == '\'' || qi == '"') {
851 if (q && q == qi) {
852 // ending quote
853 versionFound = true;
854 break;
855 } else {
856 // Starting quote
857 q = qi;
862 } else {
863 // encoding must follow version
864 // Want to avoid string comparisons, hence looking for 'g'
865 // and only if found check the string leading to it. Not
866 // foolproof, but fast.
867 // The shortest allowed string before this (strlen==26):
868 // <?xml version="1" encoding
869 if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
870 (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
871 int32_t encStart = 0;
872 char q = 0;
873 for (++i; i < aLen; ++i) {
874 char qi = ((char*)aBytes)[i];
875 if (qi == '\'' || qi == '"') {
876 if (q && q == qi) {
877 int32_t count = i - encStart;
878 // encoding value is invalid if it is UTF-16
879 if (count > 0 &&
880 PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
881 count)) {
882 oCharset.Assign((char*)(aBytes + encStart), count);
884 encodingFound = true;
885 break;
886 } else {
887 encStart = i + 1;
888 q = qi;
893 } // if (!versionFound)
894 } // for
896 return !oCharset.IsEmpty();
899 inline char GetNextChar(nsACString::const_iterator& aStart,
900 nsACString::const_iterator& aEnd) {
901 NS_ASSERTION(aStart != aEnd, "end of buffer");
902 return (++aStart != aEnd) ? *aStart : '\0';
905 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
906 const char* fromRawSegment,
907 uint32_t toOffset, uint32_t count,
908 uint32_t* writeCount) {
909 *writeCount = count;
910 return NS_OK;
913 typedef struct {
914 bool mNeedCharsetCheck;
915 nsParser* mParser;
916 nsScanner* mScanner;
917 nsIRequest* mRequest;
918 } ParserWriteStruct;
921 * This function is invoked as a result of a call to a stream's
922 * ReadSegments() method. It is called for each contiguous buffer
923 * of data in the underlying stream or pipe. Using ReadSegments
924 * allows us to avoid copying data to read out of the stream.
926 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
927 const char* fromRawSegment, uint32_t toOffset,
928 uint32_t count, uint32_t* writeCount) {
929 nsresult result;
930 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
931 const unsigned char* buf =
932 reinterpret_cast<const unsigned char*>(fromRawSegment);
933 uint32_t theNumRead = count;
935 if (!pws) {
936 return NS_ERROR_FAILURE;
939 if (pws->mNeedCharsetCheck) {
940 pws->mNeedCharsetCheck = false;
941 int32_t source;
942 auto preferred = pws->mParser->GetDocumentCharset(source);
944 // This code was bogus when I found it. It expects the BOM or the XML
945 // declaration to be entirely in the first network buffer. -- hsivonen
946 const Encoding* encoding;
947 std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count));
948 if (encoding) {
949 // The decoder will swallow the BOM. The UTF-16 will re-sniff for
950 // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
951 // or "UTF-16BE".
952 preferred = WrapNotNull(encoding);
953 source = kCharsetFromByteOrderMark;
954 } else if (source < kCharsetFromChannel) {
955 nsAutoCString declCharset;
957 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
958 encoding = Encoding::ForLabel(declCharset);
959 if (encoding) {
960 preferred = WrapNotNull(encoding);
961 source = kCharsetFromMetaTag;
966 pws->mParser->SetDocumentCharset(preferred, source, false);
967 pws->mParser->SetSinkCharset(preferred);
970 result = pws->mScanner->Append(fromRawSegment, theNumRead);
971 if (NS_SUCCEEDED(result)) {
972 *writeCount = count;
975 return result;
978 nsresult nsParser::OnDataAvailable(nsIRequest* request,
979 nsIInputStream* pIStream,
980 uint64_t sourceOffset, uint32_t aLength) {
981 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
982 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
983 // to avoid introducing unintentional changes to behavior.
984 return mInternalState;
987 MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
988 eOnDataAvail == mParserContext->mStreamListenerState),
989 "Error: OnStartRequest() must be called before OnDataAvailable()");
990 MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
991 "Must have a buffered input stream");
993 nsresult rv = NS_OK;
995 if (mIsAboutBlank) {
996 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
997 // ... but if an extension tries to feed us data for about:blank in a
998 // release build, silently ignore the data.
999 uint32_t totalRead;
1000 rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1001 &totalRead);
1002 return rv;
1005 if (mParserContext->mRequest == request) {
1006 mParserContext->mStreamListenerState = eOnDataAvail;
1008 uint32_t totalRead;
1009 ParserWriteStruct pws;
1010 pws.mNeedCharsetCheck = true;
1011 pws.mParser = this;
1012 pws.mScanner = &mParserContext->mScanner;
1013 pws.mRequest = request;
1015 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1016 if (NS_FAILED(rv)) {
1017 return rv;
1020 if (IsOkToProcessNetworkData()) {
1021 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1022 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1023 mProcessingNetworkData = true;
1024 if (sinkDeathGrip) {
1025 sinkDeathGrip->WillParse();
1027 rv = ResumeParse();
1028 mProcessingNetworkData = false;
1030 } else {
1031 rv = NS_ERROR_UNEXPECTED;
1034 return rv;
1038 * This is called by the networking library once the last block of data
1039 * has been collected from the net.
1041 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1042 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
1043 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
1044 // to avoid introducing unintentional changes to behavior.
1045 return mInternalState;
1048 nsresult rv = NS_OK;
1050 if (mParserContext->mRequest == request) {
1051 mParserContext->mStreamListenerState = eOnStop;
1052 mParserContext->mScanner.SetIncremental(false);
1055 mStreamStatus = status;
1057 if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1058 mProcessingNetworkData = true;
1059 if (mSink) {
1060 mSink->WillParse();
1062 rv = ResumeParse(true, true);
1063 mProcessingNetworkData = false;
1066 // If the parser isn't enabled, we don't finish parsing till
1067 // it is reenabled.
1069 return rv;
1073 * Get this as nsIStreamListener
1075 nsIStreamListener* nsParser::GetStreamListener() { return this; }