Bug 1700051: part 33) Move `AdjustSoftBeginAndBuildSoftText` to `SoftText`. r=smaug
[gecko.git] / parser / htmlparser / nsParser.cpp
blobd877085c3f7676ee4a4057c53a4afb06cf8d95fe
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsAtom.h"
8 #include "nsParser.h"
9 #include "nsString.h"
10 #include "nsCRT.h"
11 #include "nsScanner.h"
12 #include "plstr.h"
13 #include "nsIChannel.h"
14 #include "nsIInputStream.h"
15 #include "CNavDTD.h"
16 #include "prenv.h"
17 #include "prlock.h"
18 #include "prcvar.h"
19 #include "nsParserCIID.h"
20 #include "nsReadableUtils.h"
21 #include "nsCOMPtr.h"
22 #include "nsExpatDriver.h"
23 #include "nsIFragmentContentSink.h"
24 #include "nsStreamUtils.h"
25 #include "nsHTMLTokenizer.h"
26 #include "nsXPCOMCIDInternal.h"
27 #include "nsMimeTypes.h"
28 #include "mozilla/CondVar.h"
29 #include "mozilla/Mutex.h"
30 #include "nsCharsetSource.h"
31 #include "nsThreadUtils.h"
32 #include "nsIHTMLContentSink.h"
34 #include "mozilla/BinarySearch.h"
35 #include "mozilla/dom/ScriptLoader.h"
36 #include "mozilla/Encoding.h"
38 using namespace mozilla;
40 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
41 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
42 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
43 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
45 //-------------- Begin ParseContinue Event Definition ------------------------
47 The parser can be explicitly interrupted by passing a return value of
48 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
49 the parser to stop processing and allow the application to return to the event
50 loop. The data which was left at the time of interruption will be processed
51 the next time OnDataAvailable is called. If the parser has received its final
52 chunk of data then OnDataAvailable will no longer be called by the networking
53 module, so the parser will schedule a nsParserContinueEvent which will call
54 the parser to process the remaining data after returning to the event loop.
55 If the parser is interrupted while processing the remaining data it will
56 schedule another ParseContinueEvent. The processing of data followed by
57 scheduling of the continue events will proceed until either:
59 1) All of the remaining data can be processed without interrupting
60 2) The parser has been cancelled.
63 This capability is currently used in CNavDTD and nsHTMLContentSink. The
64 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
65 processed and when each token is processed. The nsHTML content sink records
66 the time when the chunk has started processing and will return
67 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
68 threshold called max tokenizing processing time. This allows the content sink
69 to limit how much data is processed in a single chunk which in turn gates how
70 much time is spent away from the event loop. Processing smaller chunks of data
71 also reduces the time spent in subsequent reflows.
73 This capability is most apparent when loading large documents. If the maximum
74 token processing time is set small enough the application will remain
75 responsive during document load.
77 A side-effect of this capability is that document load is not complete when
78 the last chunk of data is passed to OnDataAvailable since the parser may have
79 been interrupted when the last chunk of data arrived. The document is complete
80 when all of the document has been tokenized and there aren't any pending
81 nsParserContinueEvents. This can cause problems if the application assumes
82 that it can monitor the load requests to determine when the document load has
83 been completed. This is what happens in Mozilla. The document is considered
84 completely loaded when all of the load requests have been satisfied. To delay
85 the document load until all of the parsing has been completed the
86 nsHTMLContentSink adds a dummy parser load request which is not removed until
87 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
88 DidBuildModel until the final chunk of data has been passed to the parser
89 through the OnDataAvailable and there aren't any pending
90 nsParserContineEvents.
92 Currently the parser is ignores requests to be interrupted during the
93 processing of script. This is because a document.write followed by JavaScript
94 calls to manipulate the DOM may fail if the parser was interrupted during the
95 document.write.
97 For more details @see bugzilla bug 76722
100 class nsParserContinueEvent : public Runnable {
101 public:
102 RefPtr<nsParser> mParser;
104 explicit nsParserContinueEvent(nsParser* aParser)
105 : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
107 NS_IMETHOD Run() override {
108 mParser->HandleParserContinueEvent(this);
109 return NS_OK;
113 //-------------- End ParseContinue Event Definition ------------------------
116 * default constructor
118 nsParser::nsParser()
119 : mParserContext(nullptr), mCharset(WINDOWS_1252_ENCODING) {
120 Initialize(true);
123 nsParser::~nsParser() { Cleanup(); }
125 void nsParser::Initialize(bool aConstructor) {
126 if (aConstructor) {
127 // Raw pointer
128 mParserContext = 0;
129 } else {
130 // nsCOMPtrs
131 mObserver = nullptr;
132 mUnusedInput.Truncate();
135 mContinueEvent = nullptr;
136 mCharsetSource = kCharsetUninitialized;
137 mCharset = WINDOWS_1252_ENCODING;
138 mInternalState = NS_OK;
139 mStreamStatus = NS_OK;
140 mCommand = eViewNormal;
141 mBlocked = 0;
142 mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE;
144 mProcessingNetworkData = false;
145 mIsAboutBlank = false;
148 void nsParser::Cleanup() {
149 #ifdef DEBUG
150 if (mParserContext && mParserContext->mPrevContext) {
151 NS_WARNING("Extra parser contexts still on the parser stack");
153 #endif
155 while (mParserContext) {
156 CParserContext* pc = mParserContext->mPrevContext;
157 delete mParserContext;
158 mParserContext = pc;
161 // It should not be possible for this flag to be set when we are getting
162 // destroyed since this flag implies a pending nsParserContinueEvent, which
163 // has an owning reference to |this|.
164 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
167 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
169 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
170 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
171 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
172 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
173 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
174 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
176 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
177 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
178 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
179 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
180 CParserContext* pc = tmp->mParserContext;
181 while (pc) {
182 cb.NoteXPCOMChild(pc->mTokenizer);
183 pc = pc->mPrevContext;
185 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
187 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
188 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
189 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
190 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
191 NS_INTERFACE_MAP_ENTRY(nsIParser)
192 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
193 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
194 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
195 NS_INTERFACE_MAP_END
197 // The parser continue event is posted only if
198 // all of the data to parse has been passed to ::OnDataAvailable
199 // and the parser has been interrupted by the content sink
200 // because the processing of tokens took too long.
202 nsresult nsParser::PostContinueEvent() {
203 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
204 // If this flag isn't set, then there shouldn't be a live continue event!
205 NS_ASSERTION(!mContinueEvent, "bad");
207 // This creates a reference cycle between this and the event that is
208 // broken when the event fires.
209 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
210 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
211 NS_WARNING("failed to dispatch parser continuation event");
212 } else {
213 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
214 mContinueEvent = event;
217 return NS_OK;
220 NS_IMETHODIMP_(void)
221 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
224 * Call this method once you've created a parser, and want to instruct it
225 * about the command which caused the parser to be constructed. For example,
226 * this allows us to select a DTD which can do, say, view-source.
228 * @param aCommand the command string to set
230 NS_IMETHODIMP_(void)
231 nsParser::SetCommand(const char* aCommand) {
232 mCommandStr.Assign(aCommand);
233 if (mCommandStr.EqualsLiteral("view-source")) {
234 mCommand = eViewSource;
235 } else if (mCommandStr.EqualsLiteral("view-fragment")) {
236 mCommand = eViewFragment;
237 } else {
238 mCommand = eViewNormal;
243 * Call this method once you've created a parser, and want to instruct it
244 * about the command which caused the parser to be constructed. For example,
245 * this allows us to select a DTD which can do, say, view-source.
247 * @param aParserCommand the command to set
249 NS_IMETHODIMP_(void)
250 nsParser::SetCommand(eParserCommands aParserCommand) {
251 mCommand = aParserCommand;
255 * Call this method once you've created a parser, and want to instruct it
256 * about what charset to load
258 * @param aCharset- the charset of a document
259 * @param aCharsetSource- the source of the charset
261 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
262 int32_t aCharsetSource,
263 bool aChannelHadCharset) {
264 mCharset = aCharset;
265 mCharsetSource = aCharsetSource;
266 if (mParserContext && mParserContext->mScanner) {
267 mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
271 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
272 if (mSink) {
273 mSink->SetDocumentCharset(aCharset);
278 * This method gets called in order to set the content
279 * sink for this parser to dump nodes to.
281 * @param nsIContentSink interface for node receiver
283 NS_IMETHODIMP_(void)
284 nsParser::SetContentSink(nsIContentSink* aSink) {
285 MOZ_ASSERT(aSink, "sink cannot be null!");
286 mSink = aSink;
288 if (mSink) {
289 mSink->SetParser(this);
290 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
291 if (htmlSink) {
292 mIsAboutBlank = true;
298 * retrieve the sink set into the parser
299 * @return current sink
301 NS_IMETHODIMP_(nsIContentSink*)
302 nsParser::GetContentSink() { return mSink; }
304 static nsIDTD* FindSuitableDTD(CParserContext& aParserContext) {
305 // We always find a DTD.
306 aParserContext.mAutoDetectStatus = ePrimaryDetect;
308 // Quick check for view source.
309 MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
310 "The old parser is not supposed to be used for View Source "
311 "anymore.");
313 // Now see if we're parsing HTML (which, as far as we're concerned, simply
314 // means "not XML").
315 if (aParserContext.mDocType != eXML) {
316 return new CNavDTD();
319 // If we're here, then we'd better be parsing XML.
320 NS_ASSERTION(aParserContext.mDocType == eXML,
321 "What are you trying to send me, here?");
322 return new nsExpatDriver();
325 NS_IMETHODIMP
326 nsParser::CancelParsingEvents() {
327 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
328 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
329 // Revoke the pending continue parsing event
330 mContinueEvent = nullptr;
331 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
333 return NS_OK;
336 ////////////////////////////////////////////////////////////////////////
339 * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
340 * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
341 * (which could be success or failure).
343 * To understand the motivation for this construct, consider these example
344 * methods:
346 * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
347 * nsresult rv = NS_OK;
348 * ...
349 * return obj->DoThatThing();
350 * NS_ENSURE_SUCCESS(rv, rv);
351 * ...
352 * return rv;
355 * void nsCaller::MakeThingsHappen() {
356 * return mSomething->DoThatThing(mWhatever);
359 * Suppose, for whatever reason*, we want to shift responsibility for calling
360 * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
361 * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
363 * nsresult nsSomething::DoThatThing() {
364 * nsresult rv = NS_OK;
365 * ...
366 * ...
367 * return rv;
370 * void nsCaller::MakeThingsHappen() {
371 * nsresult rv;
372 * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
373 * mWhatever->DoThatThing(),
374 * rv);
375 * return rv;
378 * *Possible reasons include: nsCaller doesn't want to give mSomething access
379 * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
380 * be called regardless of how nsSomething::DoThatThing behaves, &c.
382 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) \
384 nsresult RV##__temp = EXPR1; \
385 RV = EXPR2; \
386 if (NS_FAILED(RV)) { \
387 RV = RV##__temp; \
392 * This gets called just prior to the model actually
393 * being constructed. It's important to make this the
394 * last thing that happens right before parsing, so we
395 * can delay until the last moment the resolution of
396 * which DTD to use (unless of course we're assigned one).
398 nsresult nsParser::WillBuildModel(nsString& aFilename) {
399 if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
401 if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
403 if (eDTDMode_unknown == mParserContext->mDTDMode ||
404 eDTDMode_autodetect == mParserContext->mDTDMode) {
405 if (mIsAboutBlank) {
406 mParserContext->mDTDMode = eDTDMode_quirks;
407 mParserContext->mDocType = eHTML_Quirks;
408 } else {
409 mParserContext->mDTDMode = eDTDMode_full_standards;
410 mParserContext->mDocType = eXML;
412 } // else XML fragment with nested parser context
414 NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
415 "Clobbering DTD for non-root parser context!");
416 mDTD = FindSuitableDTD(*mParserContext);
417 NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
419 nsITokenizer* tokenizer;
420 nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
421 NS_ENSURE_SUCCESS(rv, rv);
423 rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
424 nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
425 // nsIDTD::WillBuildModel used to be responsible for calling
426 // nsIContentSink::WillBuildModel, but that obligation isn't expressible
427 // in the nsIDTD interface itself, so it's sounder and simpler to give that
428 // responsibility back to the parser. The former behavior of the DTD was to
429 // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
430 // failure we should use sinkResult instead of rv, to preserve the old error
431 // handling behavior of the DTD:
432 return NS_FAILED(sinkResult) ? sinkResult : rv;
436 * This gets called when the parser is done with its input.
437 * Note that the parser may have been called recursively, so we
438 * have to check for a prev. context before closing out the DTD/sink.
440 nsresult nsParser::DidBuildModel(nsresult anErrorCode) {
441 nsresult result = anErrorCode;
443 if (IsComplete()) {
444 if (mParserContext && !mParserContext->mPrevContext) {
445 // Let sink know if we're about to end load because we've been terminated.
446 // In that case we don't want it to run deferred scripts.
447 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
448 if (mDTD && mSink) {
449 nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
450 sinkResult = mSink->DidBuildModel(terminated);
451 // nsIDTD::DidBuildModel used to be responsible for calling
452 // nsIContentSink::DidBuildModel, but that obligation isn't expressible
453 // in the nsIDTD interface itself, so it's sounder and simpler to give
454 // that responsibility back to the parser. The former behavior of the
455 // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
456 // sink returns failure we should use sinkResult instead of dtdResult,
457 // to preserve the old error handling behavior of the DTD:
458 result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
461 // Ref. to bug 61462.
462 mParserContext->mRequest = nullptr;
466 return result;
470 * This method adds a new parser context to the list,
471 * pushing the current one to the next position.
473 * @param ptr to new context
475 void nsParser::PushContext(CParserContext& aContext) {
476 NS_ASSERTION(aContext.mPrevContext == mParserContext,
477 "Trying to push a context whose previous context differs from "
478 "the current parser context.");
479 mParserContext = &aContext;
483 * This method pops the topmost context off the stack,
484 * returning it to the user. The next context (if any)
485 * becomes the current context.
486 * @update gess7/22/98
487 * @return prev. context
489 CParserContext* nsParser::PopContext() {
490 CParserContext* oldContext = mParserContext;
491 if (oldContext) {
492 mParserContext = oldContext->mPrevContext;
493 if (mParserContext) {
494 // If the old context was blocked, propagate the blocked state
495 // back to the new one. Also, propagate the stream listener state
496 // but don't override onStop state to guarantee the call to
497 // DidBuildModel().
498 if (mParserContext->mStreamListenerState != eOnStop) {
499 mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
503 return oldContext;
507 * Call this when you want control whether or not the parser will parse
508 * and tokenize input (TRUE), or whether it just caches input to be
509 * parsed later (FALSE).
511 * @param aState determines whether we parse/tokenize or just cache.
512 * @return current state
514 void nsParser::SetUnusedInput(nsString& aBuffer) { mUnusedInput = aBuffer; }
517 * Call this when you want to *force* the parser to terminate the
518 * parsing process altogether. This is binary -- so once you terminate
519 * you can't resume without restarting altogether.
521 NS_IMETHODIMP
522 nsParser::Terminate(void) {
523 // We should only call DidBuildModel once, so don't do anything if this is
524 // the second time that Terminate has been called.
525 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
526 return NS_OK;
529 nsresult result = NS_OK;
530 // XXX - [ until we figure out a way to break parser-sink circularity ]
531 // Hack - Hold a reference until we are completely done...
532 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
533 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
535 // CancelParsingEvents must be called to avoid leaking the nsParser object
536 // @see bug 108049
537 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
538 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
539 // The IsComplete() call inside of DidBuildModel looks at the
540 // pendingContinueEvents flag.
541 CancelParsingEvents();
543 // If we got interrupted in the middle of a document.write, then we might
544 // have more than one parser context on our parsercontext stack. This has
545 // the effect of making DidBuildModel a no-op, meaning that we never call
546 // our sink's DidBuildModel and break the reference cycle, causing a leak.
547 // Since we're getting terminated, we manually clean up our context stack.
548 while (mParserContext && mParserContext->mPrevContext) {
549 CParserContext* prev = mParserContext->mPrevContext;
550 delete mParserContext;
551 mParserContext = prev;
554 if (mDTD) {
555 mDTD->Terminate();
556 DidBuildModel(result);
557 } else if (mSink) {
558 // We have no parser context or no DTD yet (so we got terminated before we
559 // got any data). Manually break the reference cycle with the sink.
560 result = mSink->DidBuildModel(true);
561 NS_ENSURE_SUCCESS(result, result);
564 return NS_OK;
567 NS_IMETHODIMP
568 nsParser::ContinueInterruptedParsing() {
569 // If there are scripts executing, then the content sink is jumping the gun
570 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
571 // later, see bug 460706.
572 if (!IsOkToProcessNetworkData()) {
573 return NS_OK;
576 // If the stream has already finished, there's a good chance
577 // that we might start closing things down when the parser
578 // is reenabled. To make sure that we're not deleted across
579 // the reenabling process, hold a reference to ourselves.
580 nsresult result = NS_OK;
581 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
582 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
584 #ifdef DEBUG
585 if (mBlocked) {
586 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
588 #endif
590 bool isFinalChunk =
591 mParserContext && mParserContext->mStreamListenerState == eOnStop;
593 mProcessingNetworkData = true;
594 if (sinkDeathGrip) {
595 sinkDeathGrip->WillParse();
597 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
598 mProcessingNetworkData = false;
600 if (result != NS_OK) {
601 result = mInternalState;
604 return result;
608 * Stops parsing temporarily. That is, it will prevent the
609 * parser from building up content model while scripts
610 * are being loaded (either an external script from a web
611 * page, or any number of extension content scripts).
613 NS_IMETHODIMP_(void)
614 nsParser::BlockParser() { mBlocked++; }
617 * Open up the parser for tokenization, building up content
618 * model..etc. However, this method does not resume parsing
619 * automatically. It's the callers' responsibility to restart
620 * the parsing engine.
622 NS_IMETHODIMP_(void)
623 nsParser::UnblockParser() {
624 MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
625 if (MOZ_LIKELY(mBlocked > 0)) {
626 mBlocked--;
630 NS_IMETHODIMP_(void)
631 nsParser::ContinueInterruptedParsingAsync() {
632 MOZ_ASSERT(mSink);
633 if (MOZ_LIKELY(mSink)) {
634 mSink->ContinueInterruptedParsingAsync();
639 * Call this to query whether the parser is enabled or not.
641 NS_IMETHODIMP_(bool)
642 nsParser::IsParserEnabled() { return !mBlocked; }
645 * Call this to query whether the parser thinks it's done with parsing.
647 NS_IMETHODIMP_(bool)
648 nsParser::IsComplete() {
649 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
652 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
653 // Ignore any revoked continue events...
654 if (mContinueEvent != ev) return;
656 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
657 mContinueEvent = nullptr;
659 NS_ASSERTION(IsOkToProcessNetworkData(),
660 "Interrupted in the middle of a script?");
661 ContinueInterruptedParsing();
664 bool nsParser::IsInsertionPointDefined() { return false; }
666 void nsParser::IncrementScriptNestingLevel() {}
668 void nsParser::DecrementScriptNestingLevel() {}
670 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
672 void nsParser::MarkAsNotScriptCreated(const char* aCommand) {}
674 bool nsParser::IsScriptCreated() { return false; }
677 * This is the main controlling routine in the parsing process.
678 * Note that it may get called multiple times for the same scanner,
679 * since this is a pushed based system, and all the tokens may
680 * not have been consumed by the scanner during a given invocation
681 * of this method.
683 NS_IMETHODIMP
684 nsParser::Parse(nsIURI* aURL, nsIRequestObserver* aListener, void* aKey,
685 nsDTDMode aMode) {
686 MOZ_ASSERT(aURL, "Error: Null URL given");
688 nsresult result = NS_ERROR_HTMLPARSER_BADURL;
689 mObserver = aListener;
691 if (aURL) {
692 nsAutoCString spec;
693 nsresult rv = aURL->GetSpec(spec);
694 if (rv != NS_OK) {
695 return rv;
697 NS_ConvertUTF8toUTF16 theName(spec);
699 nsScanner* theScanner = new nsScanner(theName, false);
700 CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
701 mCommand, aListener);
702 if (pc && theScanner) {
703 pc->mMultipart = true;
704 pc->mContextType = CParserContext::eCTURL;
705 pc->mDTDMode = aMode;
706 PushContext(*pc);
708 result = NS_OK;
709 } else {
710 result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
713 return result;
717 * Used by XML fragment parsing below.
719 * @param aSourceBuffer contains a string-full of real content
721 nsresult nsParser::Parse(const nsAString& aSourceBuffer, void* aKey,
722 bool aLastCall) {
723 nsresult result = NS_OK;
725 // Don't bother if we're never going to parse this.
726 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
727 return result;
730 if (!aLastCall && aSourceBuffer.IsEmpty()) {
731 // Nothing is being passed to the parser so return
732 // immediately. mUnusedInput will get processed when
733 // some data is actually passed in.
734 // But if this is the last call, make sure to finish up
735 // stuff correctly.
736 return result;
739 // Maintain a reference to ourselves so we don't go away
740 // till we're completely done.
741 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
743 if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
744 // Note: The following code will always find the parser context associated
745 // with the given key, even if that context has been suspended (e.g., for
746 // another document.write call). This doesn't appear to be exactly what IE
747 // does in the case where this happens, but this makes more sense.
748 CParserContext* pc = mParserContext;
749 while (pc && pc->mKey != aKey) {
750 pc = pc->mPrevContext;
753 if (!pc) {
754 // Only make a new context if we don't have one, OR if we do, but has a
755 // different context key.
756 nsScanner* theScanner = new nsScanner(mUnusedInput);
757 NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
759 eAutoDetectResult theStatus = eUnknownDetect;
761 if (mParserContext &&
762 mParserContext->mMimeType.EqualsLiteral("application/xml")) {
763 // Ref. Bug 90379
764 NS_ASSERTION(mDTD, "How come the DTD is null?");
766 if (mParserContext) {
767 theStatus = mParserContext->mAutoDetectStatus;
768 // Added this to fix bug 32022.
772 pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, 0,
773 theStatus, aLastCall);
774 NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
776 PushContext(*pc);
778 pc->mMultipart = !aLastCall; // By default
779 if (pc->mPrevContext) {
780 pc->mMultipart |= pc->mPrevContext->mMultipart;
783 // Start fix bug 40143
784 if (pc->mMultipart) {
785 pc->mStreamListenerState = eOnDataAvail;
786 if (pc->mScanner) {
787 pc->mScanner->SetIncremental(true);
789 } else {
790 pc->mStreamListenerState = eOnStop;
791 if (pc->mScanner) {
792 pc->mScanner->SetIncremental(false);
795 // end fix for 40143
797 pc->mContextType = CParserContext::eCTString;
798 pc->SetMimeType("application/xml"_ns);
799 pc->mDTDMode = eDTDMode_full_standards;
801 mUnusedInput.Truncate();
803 pc->mScanner->Append(aSourceBuffer);
804 // Do not interrupt document.write() - bug 95487
805 result = ResumeParse(false, false, false);
806 } else {
807 pc->mScanner->Append(aSourceBuffer);
808 if (!pc->mPrevContext) {
809 // Set stream listener state to eOnStop, on the final context - Fix
810 // 68160, to guarantee DidBuildModel() call - Fix 36148
811 if (aLastCall) {
812 pc->mStreamListenerState = eOnStop;
813 pc->mScanner->SetIncremental(false);
816 if (pc == mParserContext) {
817 // If pc is not mParserContext, then this call to ResumeParse would
818 // do the wrong thing and try to continue parsing using
819 // mParserContext. We need to wait to actually resume parsing on pc.
820 ResumeParse(false, false, false);
826 return result;
829 NS_IMETHODIMP
830 nsParser::ParseFragment(const nsAString& aSourceBuffer,
831 nsTArray<nsString>& aTagStack) {
832 nsresult result = NS_OK;
833 nsAutoString theContext;
834 uint32_t theCount = aTagStack.Length();
835 uint32_t theIndex = 0;
837 // Disable observers for fragments
838 mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
840 for (theIndex = 0; theIndex < theCount; theIndex++) {
841 theContext.Append('<');
842 theContext.Append(aTagStack[theCount - theIndex - 1]);
843 theContext.Append('>');
846 if (theCount == 0) {
847 // Ensure that the buffer is not empty. Because none of the DTDs care
848 // about leading whitespace, this doesn't change the result.
849 theContext.Assign(' ');
852 // First, parse the context to build up the DTD's tag stack. Note that we
853 // pass false for the aLastCall parameter.
854 result = Parse(theContext, (void*)&theContext, false);
855 if (NS_FAILED(result)) {
856 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
857 return result;
860 if (!mSink) {
861 // Parse must have failed in the XML case and so the sink was killed.
862 return NS_ERROR_HTMLPARSER_STOPPARSING;
865 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
866 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
868 fragSink->WillBuildContent();
869 // Now, parse the actual content. Note that this is the last call
870 // for HTML content, but for XML, we will want to build and parse
871 // the end tags. However, if tagStack is empty, it's the last call
872 // for XML as well.
873 if (theCount == 0) {
874 result = Parse(aSourceBuffer, &theContext, true);
875 fragSink->DidBuildContent();
876 } else {
877 // Add an end tag chunk, so expat will read the whole source buffer,
878 // and not worry about ']]' etc.
879 result = Parse(aSourceBuffer + u"</"_ns, &theContext, false);
880 fragSink->DidBuildContent();
882 if (NS_SUCCEEDED(result)) {
883 nsAutoString endContext;
884 for (theIndex = 0; theIndex < theCount; theIndex++) {
885 // we already added an end tag chunk above
886 if (theIndex > 0) {
887 endContext.AppendLiteral("</");
890 nsString& thisTag = aTagStack[theIndex];
891 // was there an xmlns=?
892 int32_t endOfTag = thisTag.FindChar(char16_t(' '));
893 if (endOfTag == -1) {
894 endContext.Append(thisTag);
895 } else {
896 endContext.Append(Substring(thisTag, 0, endOfTag));
899 endContext.Append('>');
902 result = Parse(endContext, &theContext, true);
906 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
908 return result;
912 * This routine is called to cause the parser to continue parsing its
913 * underlying stream. This call allows the parse process to happen in
914 * chunks, such as when the content is push based, and we need to parse in
915 * pieces.
917 * An interesting change in how the parser gets used has led us to add extra
918 * processing to this method. The case occurs when the parser is blocked in
919 * one context, and gets a parse(string) call in another context. In this
920 * case, the parserContexts are linked. No problem.
922 * The problem is that Parse(string) assumes that it can proceed unabated,
923 * but if the parser is already blocked that assumption is false. So we
924 * needed to add a mechanism here to allow the parser to continue to process
925 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
926 * out of contexts.
929 * @param allowItertion : set to true if non-script resumption is requested
930 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
931 * @return error code -- 0 if ok, non-zero if error.
933 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
934 bool aCanInterrupt) {
935 nsresult result = NS_OK;
937 if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
938 result = WillBuildModel(mParserContext->mScanner->GetFilename());
939 if (NS_FAILED(result)) {
940 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
941 return result;
944 if (mDTD) {
945 mSink->WillResume();
946 bool theIterationIsOk = true;
948 while (result == NS_OK && theIterationIsOk) {
949 if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
950 // -- Ref: Bug# 22485 --
951 // Insert the unused input into the source buffer
952 // as if it was read from the input stream.
953 // Adding UngetReadable() per vidur!!
954 mParserContext->mScanner->UngetReadable(mUnusedInput);
955 mUnusedInput.Truncate(0);
958 // Only allow parsing to be interrupted in the subsequent call to
959 // build model.
960 nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
961 ? Tokenize(aIsFinalChunk)
962 : NS_OK;
963 result = BuildModel();
965 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
966 PostContinueEvent();
969 theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
970 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
972 // Make sure not to stop parsing too early. Therefore, before shutting
973 // down the parser, it's important to check whether the input buffer
974 // has been scanned to completion (theTokenizerResult should be kEOF).
975 // kEOF -> End of buffer.
977 // If we're told to block the parser, we disable all further parsing
978 // (and cache any data coming in) until the parser is re-enabled.
979 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
980 mSink->WillInterrupt();
981 if (!mBlocked) {
982 // If we were blocked by a recursive invocation, don't re-block.
983 BlockParser();
985 return NS_OK;
987 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
988 // Note: Parser Terminate() calls DidBuildModel.
989 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
990 DidBuildModel(mStreamStatus);
991 mInternalState = result;
994 return NS_OK;
996 if ((NS_OK == result &&
997 theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
998 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
999 bool theContextIsStringBased =
1000 CParserContext::eCTString == mParserContext->mContextType;
1002 if (mParserContext->mStreamListenerState == eOnStop ||
1003 !mParserContext->mMultipart || theContextIsStringBased) {
1004 if (!mParserContext->mPrevContext) {
1005 if (mParserContext->mStreamListenerState == eOnStop) {
1006 DidBuildModel(mStreamStatus);
1007 return NS_OK;
1009 } else {
1010 CParserContext* theContext = PopContext();
1011 if (theContext) {
1012 theIterationIsOk = allowIteration && theContextIsStringBased;
1013 if (theContext->mCopyUnused) {
1014 if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
1015 mInternalState = NS_ERROR_OUT_OF_MEMORY;
1019 delete theContext;
1022 result = mInternalState;
1023 aIsFinalChunk = mParserContext &&
1024 mParserContext->mStreamListenerState == eOnStop;
1025 // ...then intentionally fall through to mSink->WillInterrupt()...
1030 if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
1031 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1032 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1033 mSink->WillInterrupt();
1036 } else {
1037 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
1041 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1045 * This is where we loop over the tokens created in the
1046 * tokenization phase, and try to make sense out of them.
1048 nsresult nsParser::BuildModel() {
1049 nsITokenizer* theTokenizer = nullptr;
1051 nsresult result = NS_OK;
1052 if (mParserContext) {
1053 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1056 if (NS_SUCCEEDED(result)) {
1057 if (mDTD) {
1058 result = mDTD->BuildModel(theTokenizer, mSink);
1060 } else {
1061 mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1063 return result;
1066 /*******************************************************************
1067 These methods are used to talk to the netlib system...
1068 *******************************************************************/
1070 nsresult nsParser::OnStartRequest(nsIRequest* request) {
1071 MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
1072 "Parser's nsIStreamListener API was not setup "
1073 "correctly in constructor.");
1075 if (mObserver) {
1076 mObserver->OnStartRequest(request);
1078 mParserContext->mStreamListenerState = eOnStart;
1079 mParserContext->mAutoDetectStatus = eUnknownDetect;
1080 mParserContext->mRequest = request;
1082 NS_ASSERTION(!mParserContext->mPrevContext,
1083 "Clobbering DTD for non-root parser context!");
1084 mDTD = nullptr;
1086 nsresult rv;
1087 nsAutoCString contentType;
1088 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
1089 if (channel) {
1090 rv = channel->GetContentType(contentType);
1091 if (NS_SUCCEEDED(rv)) {
1092 mParserContext->SetMimeType(contentType);
1096 rv = NS_OK;
1098 return rv;
1101 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
1102 int32_t aLen,
1103 nsCString& oCharset) {
1104 // This code is rather pointless to have. Might as well reuse expat as
1105 // seen in nsHtml5StreamParser. -- hsivonen
1106 oCharset.Truncate();
1107 if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
1108 ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
1109 int32_t i;
1110 bool versionFound = false, encodingFound = false;
1111 for (i = 6; i < aLen && !encodingFound; ++i) {
1112 // end of XML declaration?
1113 if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
1114 (((char*)aBytes)[i + 1] == '>')) {
1115 break;
1117 // Version is required.
1118 if (!versionFound) {
1119 // Want to avoid string comparisons, hence looking for 'n'
1120 // and only if found check the string leading to it. Not
1121 // foolproof, but fast.
1122 // The shortest string allowed before this is (strlen==13):
1123 // <?xml version
1124 if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
1125 (0 == PL_strncmp("versio", (char*)(aBytes + i - 6), 6))) {
1126 // Fast forward through version
1127 char q = 0;
1128 for (++i; i < aLen; ++i) {
1129 char qi = ((char*)aBytes)[i];
1130 if (qi == '\'' || qi == '"') {
1131 if (q && q == qi) {
1132 // ending quote
1133 versionFound = true;
1134 break;
1135 } else {
1136 // Starting quote
1137 q = qi;
1142 } else {
1143 // encoding must follow version
1144 // Want to avoid string comparisons, hence looking for 'g'
1145 // and only if found check the string leading to it. Not
1146 // foolproof, but fast.
1147 // The shortest allowed string before this (strlen==26):
1148 // <?xml version="1" encoding
1149 if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
1150 (0 == PL_strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
1151 int32_t encStart = 0;
1152 char q = 0;
1153 for (++i; i < aLen; ++i) {
1154 char qi = ((char*)aBytes)[i];
1155 if (qi == '\'' || qi == '"') {
1156 if (q && q == qi) {
1157 int32_t count = i - encStart;
1158 // encoding value is invalid if it is UTF-16
1159 if (count > 0 &&
1160 PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
1161 count)) {
1162 oCharset.Assign((char*)(aBytes + encStart), count);
1164 encodingFound = true;
1165 break;
1166 } else {
1167 encStart = i + 1;
1168 q = qi;
1173 } // if (!versionFound)
1174 } // for
1176 return !oCharset.IsEmpty();
1179 inline char GetNextChar(nsACString::const_iterator& aStart,
1180 nsACString::const_iterator& aEnd) {
1181 NS_ASSERTION(aStart != aEnd, "end of buffer");
1182 return (++aStart != aEnd) ? *aStart : '\0';
1185 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
1186 const char* fromRawSegment,
1187 uint32_t toOffset, uint32_t count,
1188 uint32_t* writeCount) {
1189 *writeCount = count;
1190 return NS_OK;
1193 typedef struct {
1194 bool mNeedCharsetCheck;
1195 nsParser* mParser;
1196 nsScanner* mScanner;
1197 nsIRequest* mRequest;
1198 } ParserWriteStruct;
1201 * This function is invoked as a result of a call to a stream's
1202 * ReadSegments() method. It is called for each contiguous buffer
1203 * of data in the underlying stream or pipe. Using ReadSegments
1204 * allows us to avoid copying data to read out of the stream.
1206 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
1207 const char* fromRawSegment, uint32_t toOffset,
1208 uint32_t count, uint32_t* writeCount) {
1209 nsresult result;
1210 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
1211 const unsigned char* buf =
1212 reinterpret_cast<const unsigned char*>(fromRawSegment);
1213 uint32_t theNumRead = count;
1215 if (!pws) {
1216 return NS_ERROR_FAILURE;
1219 if (pws->mNeedCharsetCheck) {
1220 pws->mNeedCharsetCheck = false;
1221 int32_t source;
1222 auto preferred = pws->mParser->GetDocumentCharset(source);
1224 // This code was bogus when I found it. It expects the BOM or the XML
1225 // declaration to be entirely in the first network buffer. -- hsivonen
1226 const Encoding* encoding;
1227 size_t bomLength;
1228 Tie(encoding, bomLength) = Encoding::ForBOM(Span(buf, count));
1229 Unused << bomLength;
1230 if (encoding) {
1231 // The decoder will swallow the BOM. The UTF-16 will re-sniff for
1232 // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
1233 // or "UTF-16BE".
1234 preferred = WrapNotNull(encoding);
1235 source = kCharsetFromByteOrderMark;
1236 } else if (source < kCharsetFromChannel) {
1237 nsAutoCString declCharset;
1239 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
1240 encoding = Encoding::ForLabel(declCharset);
1241 if (encoding) {
1242 preferred = WrapNotNull(encoding);
1243 source = kCharsetFromMetaTag;
1248 pws->mParser->SetDocumentCharset(preferred, source, false);
1249 pws->mParser->SetSinkCharset(preferred);
1252 result = pws->mScanner->Append(fromRawSegment, theNumRead);
1253 if (NS_SUCCEEDED(result)) {
1254 *writeCount = count;
1257 return result;
1260 nsresult nsParser::OnDataAvailable(nsIRequest* request,
1261 nsIInputStream* pIStream,
1262 uint64_t sourceOffset, uint32_t aLength) {
1263 MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
1264 eOnDataAvail == mParserContext->mStreamListenerState),
1265 "Error: OnStartRequest() must be called before OnDataAvailable()");
1266 MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
1267 "Must have a buffered input stream");
1269 nsresult rv = NS_OK;
1271 if (mIsAboutBlank) {
1272 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
1273 // ... but if an extension tries to feed us data for about:blank in a
1274 // release build, silently ignore the data.
1275 uint32_t totalRead;
1276 rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1277 &totalRead);
1278 return rv;
1281 CParserContext* theContext = mParserContext;
1283 while (theContext && theContext->mRequest != request) {
1284 theContext = theContext->mPrevContext;
1287 if (theContext) {
1288 theContext->mStreamListenerState = eOnDataAvail;
1290 if (eInvalidDetect == theContext->mAutoDetectStatus) {
1291 if (theContext->mScanner) {
1292 nsScannerIterator iter;
1293 theContext->mScanner->EndReading(iter);
1294 theContext->mScanner->SetPosition(iter, true);
1298 uint32_t totalRead;
1299 ParserWriteStruct pws;
1300 pws.mNeedCharsetCheck = true;
1301 pws.mParser = this;
1302 pws.mScanner = theContext->mScanner.get();
1303 pws.mRequest = request;
1305 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1306 if (NS_FAILED(rv)) {
1307 return rv;
1310 if (IsOkToProcessNetworkData()) {
1311 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1312 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1313 mProcessingNetworkData = true;
1314 if (sinkDeathGrip) {
1315 sinkDeathGrip->WillParse();
1317 rv = ResumeParse();
1318 mProcessingNetworkData = false;
1320 } else {
1321 rv = NS_ERROR_UNEXPECTED;
1324 return rv;
1328 * This is called by the networking library once the last block of data
1329 * has been collected from the net.
1331 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1332 nsresult rv = NS_OK;
1334 CParserContext* pc = mParserContext;
1335 while (pc) {
1336 if (pc->mRequest == request) {
1337 pc->mStreamListenerState = eOnStop;
1338 pc->mScanner->SetIncremental(false);
1339 break;
1342 pc = pc->mPrevContext;
1345 mStreamStatus = status;
1347 if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1348 mProcessingNetworkData = true;
1349 if (mSink) {
1350 mSink->WillParse();
1352 rv = ResumeParse(true, true);
1353 mProcessingNetworkData = false;
1356 // If the parser isn't enabled, we don't finish parsing till
1357 // it is reenabled.
1359 // XXX Should we wait to notify our observers as well if the
1360 // parser isn't yet enabled?
1361 if (mObserver) {
1362 mObserver->OnStopRequest(request, status);
1365 return rv;
1368 /*******************************************************************
1369 Here come the tokenization methods...
1370 *******************************************************************/
1373 * Part of the code sandwich, this gets called right before
1374 * the tokenization process begins. The main reason for
1375 * this call is to allow the delegate to do initialization.
1377 bool nsParser::WillTokenize(bool aIsFinalChunk) {
1378 if (!mParserContext) {
1379 return true;
1382 nsITokenizer* theTokenizer;
1383 nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1384 NS_ENSURE_SUCCESS(result, false);
1385 return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
1389 * This is the primary control routine to consume tokens.
1390 * It iteratively consumes tokens until an error occurs or
1391 * you run out of data.
1393 nsresult nsParser::Tokenize(bool aIsFinalChunk) {
1394 nsITokenizer* theTokenizer;
1396 nsresult result = NS_ERROR_NOT_AVAILABLE;
1397 if (mParserContext) {
1398 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1401 if (NS_SUCCEEDED(result)) {
1402 bool flushTokens = false;
1404 bool killSink = false;
1406 WillTokenize(aIsFinalChunk);
1407 while (NS_SUCCEEDED(result)) {
1408 mParserContext->mScanner->Mark();
1409 result =
1410 theTokenizer->ConsumeToken(*mParserContext->mScanner, flushTokens);
1411 if (NS_FAILED(result)) {
1412 mParserContext->mScanner->RewindToMark();
1413 if (NS_ERROR_HTMLPARSER_EOF == result) {
1414 break;
1416 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1417 killSink = true;
1418 result = Terminate();
1419 break;
1421 } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
1422 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix
1423 // Bug# 23931. Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
1424 // Also remember to update the marked position.
1425 mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
1426 mParserContext->mScanner->Mark();
1427 break;
1431 if (killSink) {
1432 mSink = nullptr;
1434 } else {
1435 result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1438 return result;
1442 * Get the channel associated with this parser
1444 * @param aChannel out param that will contain the result
1445 * @return NS_OK if successful
1447 NS_IMETHODIMP
1448 nsParser::GetChannel(nsIChannel** aChannel) {
1449 nsresult result = NS_ERROR_NOT_AVAILABLE;
1450 if (mParserContext && mParserContext->mRequest) {
1451 result = CallQueryInterface(mParserContext->mRequest, aChannel);
1453 return result;
1457 * Get the DTD associated with this parser
1459 NS_IMETHODIMP
1460 nsParser::GetDTD(nsIDTD** aDTD) {
1461 if (mParserContext) {
1462 NS_IF_ADDREF(*aDTD = mDTD);
1465 return NS_OK;
1469 * Get this as nsIStreamListener
1471 nsIStreamListener* nsParser::GetStreamListener() { return this; }