parser/htmlparser/nsParser.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set sw=2 ts=2 et tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsAtom.h"
   8 #include "nsParser.h"
   9 #include "nsString.h"
  10 #include "nsCRT.h"
  11 #include "nsScanner.h"
  12 #include "plstr.h"
  13 #include "nsIChannel.h"
  14 #include "nsIInputStream.h"
  15 #include "CNavDTD.h"
  16 #include "prenv.h"
  17 #include "prlock.h"
  18 #include "prcvar.h"
  19 #include "nsReadableUtils.h"
  20 #include "nsCOMPtr.h"
  21 #include "nsExpatDriver.h"
  22 #include "nsIFragmentContentSink.h"
  23 #include "nsStreamUtils.h"
  24 #include "nsXPCOMCIDInternal.h"
  25 #include "nsMimeTypes.h"
  26 #include "nsCharsetSource.h"
  27 #include "nsThreadUtils.h"
  28 #include "nsIHTMLContentSink.h"
  29
  30 #include "mozilla/BinarySearch.h"
  31 #include "mozilla/CondVar.h"
  32 #include "mozilla/dom/ScriptLoader.h"
  33 #include "mozilla/Encoding.h"
  34 #include "mozilla/Mutex.h"
  35
  36 using namespace mozilla;
  37
  38 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001
  39 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002
  40
  41 //-------------- Begin ParseContinue Event Definition ------------------------
  42 /*
  43 The parser can be explicitly interrupted by passing a return value of
  44 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
  45 the parser to stop processing and allow the application to return to the event
  46 loop. The data which was left at the time of interruption will be processed
  47 the next time OnDataAvailable is called. If the parser has received its final
  48 chunk of data then OnDataAvailable will no longer be called by the networking
  49 module, so the parser will schedule a nsParserContinueEvent which will call
  50 the parser to process the remaining data after returning to the event loop.
  51 If the parser is interrupted while processing the remaining data it will
  52 schedule another ParseContinueEvent. The processing of data followed by
  53 scheduling of the continue events will proceed until either:
  54
  55   1) All of the remaining data can be processed without interrupting
  56   2) The parser has been cancelled.
  57
  58
  59 This capability is currently used in CNavDTD and nsHTMLContentSink. The
  60 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
  61 processed and when each token is processed. The nsHTML content sink records
  62 the time when the chunk has started processing and will return
  63 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
  64 threshold called max tokenizing processing time. This allows the content sink
  65 to limit how much data is processed in a single chunk which in turn gates how
  66 much time is spent away from the event loop. Processing smaller chunks of data
  67 also reduces the time spent in subsequent reflows.
  68
  69 This capability is most apparent when loading large documents. If the maximum
  70 token processing time is set small enough the application will remain
  71 responsive during document load.
  72
  73 A side-effect of this capability is that document load is not complete when
  74 the last chunk of data is passed to OnDataAvailable since  the parser may have
  75 been interrupted when the last chunk of data arrived. The document is complete
  76 when all of the document has been tokenized and there aren't any pending
  77 nsParserContinueEvents. This can cause problems if the application assumes
  78 that it can monitor the load requests to determine when the document load has
  79 been completed. This is what happens in Mozilla. The document is considered
  80 completely loaded when all of the load requests have been satisfied. To delay
  81 the document load until all of the parsing has been completed the
  82 nsHTMLContentSink adds a dummy parser load request which is not removed until
  83 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
  84 DidBuildModel until the final chunk of data has been passed to the parser
  85 through the OnDataAvailable and there aren't any pending
  86 nsParserContineEvents.
  87
  88 Currently the parser is ignores requests to be interrupted during the
  89 processing of script.  This is because a document.write followed by JavaScript
  90 calls to manipulate the DOM may fail if the parser was interrupted during the
  91 document.write.
  92
  93 For more details @see bugzilla bug 76722
  94 */
  95
  96 class nsParserContinueEvent : public Runnable {
  97  public:
  98   RefPtr<nsParser> mParser;
  99
 100   explicit nsParserContinueEvent(nsParser* aParser)
 101       : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
 102
 103   NS_IMETHOD Run() override {
 104     mParser->HandleParserContinueEvent(this);
 105     return NS_OK;
 106   }
 107 };
 108
 109 //-------------- End ParseContinue Event Definition ------------------------
 110
 111 /**
 112  *  default constructor
 113  */
 114 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); }
 115
 116 nsParser::~nsParser() { Cleanup(); }
 117
 118 void nsParser::Initialize() {
 119   mContinueEvent = nullptr;
 120   mCharsetSource = kCharsetUninitialized;
 121   mCharset = WINDOWS_1252_ENCODING;
 122   mInternalState = NS_OK;
 123   mStreamStatus = NS_OK;
 124   mCommand = eViewNormal;
 125   mBlocked = 0;
 126   mFlags = NS_PARSER_FLAG_CAN_TOKENIZE;
 127
 128   mProcessingNetworkData = false;
 129   mIsAboutBlank = false;
 130 }
 131
 132 void nsParser::Cleanup() {
 133   // It should not be possible for this flag to be set when we are getting
 134   // destroyed since this flag implies a pending nsParserContinueEvent, which
 135   // has an owning reference to |this|.
 136   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
 137 }
 138
 139 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
 140
 141 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
 142   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
 143   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
 144   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
 145 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
 146
 147 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
 148   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
 149   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
 150 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
 151
 152 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
 153 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
 154 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
 155   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
 156   NS_INTERFACE_MAP_ENTRY(nsIParser)
 157   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
 158   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
 159   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
 160 NS_INTERFACE_MAP_END
 161
 162 // The parser continue event is posted only if
 163 // all of the data to parse has been passed to ::OnDataAvailable
 164 // and the parser has been interrupted by the content sink
 165 // because the processing of tokens took too long.
 166
 167 nsresult nsParser::PostContinueEvent() {
 168   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
 169     // If this flag isn't set, then there shouldn't be a live continue event!
 170     NS_ASSERTION(!mContinueEvent, "bad");
 171
 172     // This creates a reference cycle between this and the event that is
 173     // broken when the event fires.
 174     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
 175     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
 176       NS_WARNING("failed to dispatch parser continuation event");
 177     } else {
 178       mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 179       mContinueEvent = event;
 180     }
 181   }
 182   return NS_OK;
 183 }
 184
 185 NS_IMETHODIMP_(void)
 186 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
 187
 188 /**
 189  *  Call this method once you've created a parser, and want to instruct it
 190  *  about the command which caused the parser to be constructed. For example,
 191  *  this allows us to select a DTD which can do, say, view-source.
 192  *
 193  *  @param   aCommand the command string to set
 194  */
 195 NS_IMETHODIMP_(void)
 196 nsParser::SetCommand(const char* aCommand) {
 197   mCommandStr.Assign(aCommand);
 198   if (mCommandStr.EqualsLiteral("view-source")) {
 199     mCommand = eViewSource;
 200   } else if (mCommandStr.EqualsLiteral("view-fragment")) {
 201     mCommand = eViewFragment;
 202   } else {
 203     mCommand = eViewNormal;
 204   }
 205 }
 206
 207 /**
 208  *  Call this method once you've created a parser, and want to instruct it
 209  *  about the command which caused the parser to be constructed. For example,
 210  *  this allows us to select a DTD which can do, say, view-source.
 211  *
 212  *  @param   aParserCommand the command to set
 213  */
 214 NS_IMETHODIMP_(void)
 215 nsParser::SetCommand(eParserCommands aParserCommand) {
 216   mCommand = aParserCommand;
 217 }
 218
 219 /**
 220  *  Call this method once you've created a parser, and want to instruct it
 221  *  about what charset to load
 222  *
 223  *  @param   aCharset- the charset of a document
 224  *  @param   aCharsetSource- the source of the charset
 225  */
 226 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
 227                                   int32_t aCharsetSource,
 228                                   bool aForceAutoDetection) {
 229   mCharset = aCharset;
 230   mCharsetSource = aCharsetSource;
 231   if (mParserContext) {
 232     mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
 233   }
 234 }
 235
 236 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
 237   if (mSink) {
 238     mSink->SetDocumentCharset(aCharset);
 239   }
 240 }
 241
 242 /**
 243  *  This method gets called in order to set the content
 244  *  sink for this parser to dump nodes to.
 245  *
 246  *  @param   nsIContentSink interface for node receiver
 247  */
 248 NS_IMETHODIMP_(void)
 249 nsParser::SetContentSink(nsIContentSink* aSink) {
 250   MOZ_ASSERT(aSink, "sink cannot be null!");
 251   mSink = aSink;
 252
 253   if (mSink) {
 254     mSink->SetParser(this);
 255     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
 256     if (htmlSink) {
 257       mIsAboutBlank = true;
 258     }
 259   }
 260 }
 261
 262 /**
 263  * retrieve the sink set into the parser
 264  * @return  current sink
 265  */
 266 NS_IMETHODIMP_(nsIContentSink*)
 267 nsParser::GetContentSink() { return mSink; }
 268
 269 ////////////////////////////////////////////////////////////////////////
 270
 271 /**
 272  * This gets called just prior to the model actually
 273  * being constructed. It's important to make this the
 274  * last thing that happens right before parsing, so we
 275  * can delay until the last moment the resolution of
 276  * which DTD to use (unless of course we're assigned one).
 277  */
 278 nsresult nsParser::WillBuildModel() {
 279   if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
 280
 281   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 282     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 283     // to avoid introducing unintentional changes to behavior.
 284     return mInternalState;
 285   }
 286
 287   if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
 288
 289   if (eDTDMode_autodetect == mParserContext->mDTDMode) {
 290     if (mIsAboutBlank) {
 291       mParserContext->mDTDMode = eDTDMode_quirks;
 292       mParserContext->mDocType = eHTML_Quirks;
 293     } else {
 294       mParserContext->mDTDMode = eDTDMode_full_standards;
 295       mParserContext->mDocType = eXML;
 296     }
 297   }  // else XML fragment with nested parser context
 298
 299   // We always find a DTD.
 300   mParserContext->mAutoDetectStatus = ePrimaryDetect;
 301
 302   // Quick check for view source.
 303   MOZ_ASSERT(mParserContext->mParserCommand != eViewSource,
 304              "The old parser is not supposed to be used for View Source "
 305              "anymore.");
 306
 307   // Now see if we're parsing XML or HTML (which, as far as we're concerned,
 308   // simply means "not XML").
 309   if (mParserContext->mDocType == eXML) {
 310     RefPtr<nsExpatDriver> expat = new nsExpatDriver();
 311     nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink);
 312     NS_ENSURE_SUCCESS(rv, rv);
 313
 314     mDTD = expat.forget();
 315   } else {
 316     mDTD = new CNavDTD();
 317   }
 318
 319   return mSink->WillBuildModel(mParserContext->mDTDMode);
 320 }
 321
 322 /**
 323  * This gets called when the parser is done with its input.
 324  */
 325 void nsParser::DidBuildModel() {
 326   if (IsComplete() && mParserContext) {
 327     // Let sink know if we're about to end load because we've been terminated.
 328     // In that case we don't want it to run deferred scripts.
 329     bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
 330     if (mDTD && mSink) {
 331       mDTD->DidBuildModel();
 332       mSink->DidBuildModel(terminated);
 333     }
 334
 335     // Ref. to bug 61462.
 336     mParserContext->mRequest = nullptr;
 337   }
 338 }
 339
 340 /**
 341  *  Call this when you want to *force* the parser to terminate the
 342  *  parsing process altogether. This is binary -- so once you terminate
 343  *  you can't resume without restarting altogether.
 344  */
 345 NS_IMETHODIMP
 346 nsParser::Terminate(void) {
 347   // We should only call DidBuildModel once, so don't do anything if this is
 348   // the second time that Terminate has been called.
 349   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 350     return NS_OK;
 351   }
 352
 353   nsresult result = NS_OK;
 354   // XXX - [ until we figure out a way to break parser-sink circularity ]
 355   // Hack - Hold a reference until we are completely done...
 356   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 357   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
 358
 359   // @see bug 108049
 360   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so
 361   // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete()
 362   // call inside of DidBuildModel looks at the pendingContinueEvents flag.
 363   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
 364     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
 365     // Revoke the pending continue parsing event
 366     mContinueEvent = nullptr;
 367     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 368   }
 369
 370   if (mDTD) {
 371     mDTD->Terminate();
 372     DidBuildModel();
 373   } else if (mSink) {
 374     // We have no parser context or no DTD yet (so we got terminated before we
 375     // got any data).  Manually break the reference cycle with the sink.
 376     result = mSink->DidBuildModel(true);
 377     NS_ENSURE_SUCCESS(result, result);
 378   }
 379
 380   return NS_OK;
 381 }
 382
 383 NS_IMETHODIMP
 384 nsParser::ContinueInterruptedParsing() {
 385   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 386     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 387     // to avoid introducing unintentional changes to behavior.
 388     return mInternalState;
 389   }
 390
 391   // If there are scripts executing, then the content sink is jumping the gun
 392   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
 393   // later, see bug 460706.
 394   if (!IsOkToProcessNetworkData()) {
 395     return NS_OK;
 396   }
 397
 398   // If the stream has already finished, there's a good chance
 399   // that we might start closing things down when the parser
 400   // is reenabled. To make sure that we're not deleted across
 401   // the reenabling process, hold a reference to ourselves.
 402   nsresult result = NS_OK;
 403   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 404   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
 405
 406 #ifdef DEBUG
 407   if (mBlocked) {
 408     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
 409   }
 410 #endif
 411
 412   bool isFinalChunk =
 413       mParserContext && mParserContext->mStreamListenerState == eOnStop;
 414
 415   mProcessingNetworkData = true;
 416   if (sinkDeathGrip) {
 417     sinkDeathGrip->WillParse();
 418   }
 419   result = ResumeParse(true, isFinalChunk);  // Ref. bug 57999
 420   mProcessingNetworkData = false;
 421
 422   if (result != NS_OK) {
 423     result = mInternalState;
 424   }
 425
 426   return result;
 427 }
 428
 429 /**
 430  *  Stops parsing temporarily. That is, it will prevent the
 431  *  parser from building up content model while scripts
 432  *  are being loaded (either an external script from a web
 433  *  page, or any number of extension content scripts).
 434  */
 435 NS_IMETHODIMP_(void)
 436 nsParser::BlockParser() { mBlocked++; }
 437
 438 /**
 439  *  Open up the parser for tokenization, building up content
 440  *  model..etc. However, this method does not resume parsing
 441  *  automatically. It's the callers' responsibility to restart
 442  *  the parsing engine.
 443  */
 444 NS_IMETHODIMP_(void)
 445 nsParser::UnblockParser() {
 446   MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
 447   if (MOZ_LIKELY(mBlocked > 0)) {
 448     mBlocked--;
 449   }
 450 }
 451
 452 NS_IMETHODIMP_(void)
 453 nsParser::ContinueInterruptedParsingAsync() {
 454   MOZ_ASSERT(mSink);
 455   if (MOZ_LIKELY(mSink)) {
 456     mSink->ContinueInterruptedParsingAsync();
 457   }
 458 }
 459
 460 /**
 461  * Call this to query whether the parser is enabled or not.
 462  */
 463 NS_IMETHODIMP_(bool)
 464 nsParser::IsParserEnabled() { return !mBlocked; }
 465
 466 /**
 467  * Call this to query whether the parser thinks it's done with parsing.
 468  */
 469 NS_IMETHODIMP_(bool)
 470 nsParser::IsComplete() {
 471   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
 472 }
 473
 474 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
 475   // Ignore any revoked continue events...
 476   if (mContinueEvent != ev) return;
 477
 478   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 479   mContinueEvent = nullptr;
 480
 481   NS_ASSERTION(IsOkToProcessNetworkData(),
 482                "Interrupted in the middle of a script?");
 483   ContinueInterruptedParsing();
 484 }
 485
 486 bool nsParser::IsInsertionPointDefined() { return false; }
 487
 488 void nsParser::IncrementScriptNestingLevel() {}
 489
 490 void nsParser::DecrementScriptNestingLevel() {}
 491
 492 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
 493
 494 bool nsParser::IsScriptCreated() { return false; }
 495
 496 /**
 497  *  This is the main controlling routine in the parsing process.
 498  *  Note that it may get called multiple times for the same scanner,
 499  *  since this is a pushed based system, and all the tokens may
 500  *  not have been consumed by the scanner during a given invocation
 501  *  of this method.
 502  */
 503 NS_IMETHODIMP
 504 nsParser::Parse(nsIURI* aURL) {
 505   MOZ_ASSERT(aURL, "Error: Null URL given");
 506
 507   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 508     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 509     // to avoid introducing unintentional changes to behavior.
 510     return mInternalState;
 511   }
 512
 513   if (!aURL) {
 514     return NS_ERROR_HTMLPARSER_BADURL;
 515   }
 516
 517   MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");
 518
 519   mParserContext = MakeUnique<CParserContext>(aURL, mCommand);
 520
 521   return NS_OK;
 522 }
 523
 524 /**
 525  * Used by XML fragment parsing below.
 526  *
 527  * @param   aSourceBuffer contains a string-full of real content
 528  */
 529 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
 530   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 531     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 532     // to avoid introducing unintentional changes to behavior.
 533     return mInternalState;
 534   }
 535
 536   // Don't bother if we're never going to parse this.
 537   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 538     return NS_OK;
 539   }
 540
 541   if (!aLastCall && aSourceBuffer.IsEmpty()) {
 542     // Nothing is being passed to the parser so return
 543     // immediately. mUnusedInput will get processed when
 544     // some data is actually passed in.
 545     // But if this is the last call, make sure to finish up
 546     // stuff correctly.
 547     return NS_OK;
 548   }
 549
 550   // Maintain a reference to ourselves so we don't go away
 551   // till we're completely done.
 552   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 553
 554   if (!mParserContext) {
 555     // Only make a new context if we don't have one.
 556     mParserContext =
 557         MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);
 558
 559     mUnusedInput.Truncate();
 560   } else if (aLastCall) {
 561     // Set stream listener state to eOnStop, on the final context - Fix
 562     // 68160, to guarantee DidBuildModel() call - Fix 36148
 563     mParserContext->mStreamListenerState = eOnStop;
 564     mParserContext->mScanner.SetIncremental(false);
 565   }
 566
 567   mParserContext->mScanner.Append(aSourceBuffer);
 568   return ResumeParse(false, false, false);
 569 }
 570
 571 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer,
 572                                  nsTArray<nsString>& aTagStack) {
 573   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 574     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 575     // to avoid introducing unintentional changes to behavior.
 576     return mInternalState;
 577   }
 578
 579   nsresult result = NS_OK;
 580   nsAutoString theContext;
 581   uint32_t theCount = aTagStack.Length();
 582   uint32_t theIndex = 0;
 583
 584   for (theIndex = 0; theIndex < theCount; theIndex++) {
 585     theContext.Append('<');
 586     theContext.Append(aTagStack[theCount - theIndex - 1]);
 587     theContext.Append('>');
 588   }
 589
 590   if (theCount == 0) {
 591     // Ensure that the buffer is not empty. Because none of the DTDs care
 592     // about leading whitespace, this doesn't change the result.
 593     theContext.Assign(' ');
 594   }
 595
 596   // First, parse the context to build up the DTD's tag stack. Note that we
 597   // pass false for the aLastCall parameter.
 598   result = Parse(theContext, false);
 599   if (NS_FAILED(result)) {
 600     return result;
 601   }
 602
 603   if (!mSink) {
 604     // Parse must have failed in the XML case and so the sink was killed.
 605     return NS_ERROR_HTMLPARSER_STOPPARSING;
 606   }
 607
 608   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
 609   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
 610
 611   fragSink->WillBuildContent();
 612   // Now, parse the actual content. Note that this is the last call
 613   // for HTML content, but for XML, we will want to build and parse
 614   // the end tags.  However, if tagStack is empty, it's the last call
 615   // for XML as well.
 616   if (theCount == 0) {
 617     result = Parse(aSourceBuffer, true);
 618     fragSink->DidBuildContent();
 619   } else {
 620     // Add an end tag chunk, so expat will read the whole source buffer,
 621     // and not worry about ']]' etc.
 622     result = Parse(aSourceBuffer + u"</"_ns, false);
 623     fragSink->DidBuildContent();
 624
 625     if (NS_SUCCEEDED(result)) {
 626       nsAutoString endContext;
 627       for (theIndex = 0; theIndex < theCount; theIndex++) {
 628         // we already added an end tag chunk above
 629         if (theIndex > 0) {
 630           endContext.AppendLiteral("</");
 631         }
 632
 633         nsString& thisTag = aTagStack[theIndex];
 634         // was there an xmlns=?
 635         int32_t endOfTag = thisTag.FindChar(char16_t(' '));
 636         if (endOfTag == -1) {
 637           endContext.Append(thisTag);
 638         } else {
 639           endContext.Append(Substring(thisTag, 0, endOfTag));
 640         }
 641
 642         endContext.Append('>');
 643       }
 644
 645       result = Parse(endContext, true);
 646     }
 647   }
 648
 649   mParserContext.reset();
 650
 651   return result;
 652 }
 653
 654 /**
 655  *  This routine is called to cause the parser to continue parsing its
 656  *  underlying stream.  This call allows the parse process to happen in
 657  *  chunks, such as when the content is push based, and we need to parse in
 658  *  pieces.
 659  *
 660  *  An interesting change in how the parser gets used has led us to add extra
 661  *  processing to this method.  The case occurs when the parser is blocked in
 662  *  one context, and gets a parse(string) call in another context.  In this
 663  *  case, the parserContexts are linked. No problem.
 664  *
 665  *  The problem is that Parse(string) assumes that it can proceed unabated,
 666  *  but if the parser is already blocked that assumption is false. So we
 667  *  needed to add a mechanism here to allow the parser to continue to process
 668  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
 669  *  out of contexts.
 670  *
 671  *
 672  *  @param   allowItertion : set to true if non-script resumption is requested
 673  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
 674  *  @return  error code -- 0 if ok, non-zero if error.
 675  */
 676 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
 677                                bool aCanInterrupt) {
 678   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 679     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 680     // to avoid introducing unintentional changes to behavior.
 681     return mInternalState;
 682   }
 683
 684   nsresult result = NS_OK;
 685
 686   if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 687     result = WillBuildModel();
 688     if (NS_FAILED(result)) {
 689       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
 690       return result;
 691     }
 692
 693     if (mDTD) {
 694       mSink->WillResume();
 695       bool theIterationIsOk = true;
 696
 697       while (result == NS_OK && theIterationIsOk) {
 698         if (!mUnusedInput.IsEmpty()) {
 699           // -- Ref: Bug# 22485 --
 700           // Insert the unused input into the source buffer
 701           // as if it was read from the input stream.
 702           // Adding UngetReadable() per vidur!!
 703           mParserContext->mScanner.UngetReadable(mUnusedInput);
 704           mUnusedInput.Truncate(0);
 705         }
 706
 707         // Only allow parsing to be interrupted in the subsequent call to
 708         // build model.
 709         nsresult theTokenizerResult;
 710         if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) {
 711           mParserContext->mScanner.Mark();
 712           if (mParserContext->mDocType == eXML &&
 713               mParserContext->mParserCommand != eViewSource) {
 714             nsExpatDriver* expat = static_cast<nsExpatDriver*>(mDTD.get());
 715             theTokenizerResult =
 716                 expat->ResumeParse(mParserContext->mScanner, aIsFinalChunk);
 717             if (NS_FAILED(theTokenizerResult)) {
 718               mParserContext->mScanner.RewindToMark();
 719               if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) {
 720                 theTokenizerResult = Terminate();
 721                 mSink = nullptr;
 722               }
 723             }
 724           } else {
 725             // Nothing to do for non-XML. Note that this should only be
 726             // about:blank at this point, we're also checking for view-source
 727             // above, but that shouldn't end up here anymore.
 728             theTokenizerResult = NS_ERROR_HTMLPARSER_EOF;
 729           }
 730         } else {
 731           theTokenizerResult = NS_OK;
 732         }
 733
 734         result = mDTD->BuildModel(mSink);
 735         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
 736           PostContinueEvent();
 737         }
 738
 739         theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
 740                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
 741
 742         // Make sure not to stop parsing too early. Therefore, before shutting
 743         // down the parser, it's important to check whether the input buffer
 744         // has been scanned to completion (theTokenizerResult should be kEOF).
 745         // kEOF -> End of buffer.
 746
 747         // If we're told the parser has been blocked, we disable all further
 748         // parsing (and cache any data coming in) until the parser is
 749         // re-enabled.
 750         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
 751           mSink->WillInterrupt();
 752           return NS_OK;
 753         }
 754         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
 755           // Note: Parser Terminate() calls DidBuildModel.
 756           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 757             DidBuildModel();
 758             mInternalState = result;
 759           }
 760
 761           return NS_OK;
 762         }
 763         if (((NS_OK == result &&
 764               theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
 765              result == NS_ERROR_HTMLPARSER_INTERRUPTED) &&
 766             mParserContext->mStreamListenerState == eOnStop) {
 767           DidBuildModel();
 768           return NS_OK;
 769         }
 770
 771         if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
 772             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
 773           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
 774           mSink->WillInterrupt();
 775         }
 776       }
 777     } else {
 778       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
 779     }
 780   }
 781
 782   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
 783 }
 784
 785 /*******************************************************************
 786   These methods are used to talk to the netlib system...
 787  *******************************************************************/
 788
 789 nsresult nsParser::OnStartRequest(nsIRequest* request) {
 790   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 791     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 792     // to avoid introducing unintentional changes to behavior.
 793     return mInternalState;
 794   }
 795
 796   MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
 797              "Parser's nsIStreamListener API was not setup "
 798              "correctly in constructor.");
 799
 800   mParserContext->mStreamListenerState = eOnStart;
 801   mParserContext->mAutoDetectStatus = eUnknownDetect;
 802   mParserContext->mRequest = request;
 803
 804   mDTD = nullptr;
 805
 806   nsresult rv;
 807   nsAutoCString contentType;
 808   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
 809   if (channel) {
 810     rv = channel->GetContentType(contentType);
 811     if (NS_SUCCEEDED(rv)) {
 812       mParserContext->SetMimeType(contentType);
 813     }
 814   }
 815
 816   rv = NS_OK;
 817
 818   return rv;
 819 }
 820
 821 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
 822                                              int32_t aLen,
 823                                              nsCString& oCharset) {
 824   // This code is rather pointless to have. Might as well reuse expat as
 825   // seen in nsHtml5StreamParser. -- hsivonen
 826   oCharset.Truncate();
 827   if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
 828       ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
 829     int32_t i;
 830     bool versionFound = false, encodingFound = false;
 831     for (i = 6; i < aLen && !encodingFound; ++i) {
 832       // end of XML declaration?
 833       if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
 834           (((char*)aBytes)[i + 1] == '>')) {
 835         break;
 836       }
 837       // Version is required.
 838       if (!versionFound) {
 839         // Want to avoid string comparisons, hence looking for 'n'
 840         // and only if found check the string leading to it. Not
 841         // foolproof, but fast.
 842         // The shortest string allowed before this is  (strlen==13):
 843         // <?xml version
 844         if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
 845             (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) {
 846           // Fast forward through version
 847           char q = 0;
 848           for (++i; i < aLen; ++i) {
 849             char qi = ((char*)aBytes)[i];
 850             if (qi == '\'' || qi == '"') {
 851               if (q && q == qi) {
 852                 //  ending quote
 853                 versionFound = true;
 854                 break;
 855               } else {
 856                 // Starting quote
 857                 q = qi;
 858               }
 859             }
 860           }
 861         }
 862       } else {
 863         // encoding must follow version
 864         // Want to avoid string comparisons, hence looking for 'g'
 865         // and only if found check the string leading to it. Not
 866         // foolproof, but fast.
 867         // The shortest allowed string before this (strlen==26):
 868         // <?xml version="1" encoding
 869         if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
 870             (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
 871           int32_t encStart = 0;
 872           char q = 0;
 873           for (++i; i < aLen; ++i) {
 874             char qi = ((char*)aBytes)[i];
 875             if (qi == '\'' || qi == '"') {
 876               if (q && q == qi) {
 877                 int32_t count = i - encStart;
 878                 // encoding value is invalid if it is UTF-16
 879                 if (count > 0 &&
 880                     PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
 881                                    count)) {
 882                   oCharset.Assign((char*)(aBytes + encStart), count);
 883                 }
 884                 encodingFound = true;
 885                 break;
 886               } else {
 887                 encStart = i + 1;
 888                 q = qi;
 889               }
 890             }
 891           }
 892         }
 893       }  // if (!versionFound)
 894     }    // for
 895   }
 896   return !oCharset.IsEmpty();
 897 }
 898
 899 inline char GetNextChar(nsACString::const_iterator& aStart,
 900                         nsACString::const_iterator& aEnd) {
 901   NS_ASSERTION(aStart != aEnd, "end of buffer");
 902   return (++aStart != aEnd) ? *aStart : '\0';
 903 }
 904
 905 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
 906                                     const char* fromRawSegment,
 907                                     uint32_t toOffset, uint32_t count,
 908                                     uint32_t* writeCount) {
 909   *writeCount = count;
 910   return NS_OK;
 911 }
 912
 913 typedef struct {
 914   bool mNeedCharsetCheck;
 915   nsParser* mParser;
 916   nsScanner* mScanner;
 917   nsIRequest* mRequest;
 918 } ParserWriteStruct;
 919
 920 /*
 921  * This function is invoked as a result of a call to a stream's
 922  * ReadSegments() method. It is called for each contiguous buffer
 923  * of data in the underlying stream or pipe. Using ReadSegments
 924  * allows us to avoid copying data to read out of the stream.
 925  */
 926 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
 927                                 const char* fromRawSegment, uint32_t toOffset,
 928                                 uint32_t count, uint32_t* writeCount) {
 929   nsresult result;
 930   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
 931   const unsigned char* buf =
 932       reinterpret_cast<const unsigned char*>(fromRawSegment);
 933   uint32_t theNumRead = count;
 934
 935   if (!pws) {
 936     return NS_ERROR_FAILURE;
 937   }
 938
 939   if (pws->mNeedCharsetCheck) {
 940     pws->mNeedCharsetCheck = false;
 941     int32_t source;
 942     auto preferred = pws->mParser->GetDocumentCharset(source);
 943
 944     // This code was bogus when I found it. It expects the BOM or the XML
 945     // declaration to be entirely in the first network buffer. -- hsivonen
 946     const Encoding* encoding;
 947     std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count));
 948     if (encoding) {
 949       // The decoder will swallow the BOM. The UTF-16 will re-sniff for
 950       // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
 951       // or "UTF-16BE".
 952       preferred = WrapNotNull(encoding);
 953       source = kCharsetFromByteOrderMark;
 954     } else if (source < kCharsetFromChannel) {
 955       nsAutoCString declCharset;
 956
 957       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
 958         encoding = Encoding::ForLabel(declCharset);
 959         if (encoding) {
 960           preferred = WrapNotNull(encoding);
 961           source = kCharsetFromMetaTag;
 962         }
 963       }
 964     }
 965
 966     pws->mParser->SetDocumentCharset(preferred, source, false);
 967     pws->mParser->SetSinkCharset(preferred);
 968   }
 969
 970   result = pws->mScanner->Append(fromRawSegment, theNumRead);
 971   if (NS_SUCCEEDED(result)) {
 972     *writeCount = count;
 973   }
 974
 975   return result;
 976 }
 977
 978 nsresult nsParser::OnDataAvailable(nsIRequest* request,
 979                                    nsIInputStream* pIStream,
 980                                    uint64_t sourceOffset, uint32_t aLength) {
 981   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
 982     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
 983     // to avoid introducing unintentional changes to behavior.
 984     return mInternalState;
 985   }
 986
 987   MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
 988               eOnDataAvail == mParserContext->mStreamListenerState),
 989              "Error: OnStartRequest() must be called before OnDataAvailable()");
 990   MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
 991              "Must have a buffered input stream");
 992
 993   nsresult rv = NS_OK;
 994
 995   if (mIsAboutBlank) {
 996     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
 997     // ... but if an extension tries to feed us data for about:blank in a
 998     // release build, silently ignore the data.
 999     uint32_t totalRead;
1000     rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1001                                 &totalRead);
1002     return rv;
1003   }
1004
1005   if (mParserContext->mRequest == request) {
1006     mParserContext->mStreamListenerState = eOnDataAvail;
1007
1008     uint32_t totalRead;
1009     ParserWriteStruct pws;
1010     pws.mNeedCharsetCheck = true;
1011     pws.mParser = this;
1012     pws.mScanner = &mParserContext->mScanner;
1013     pws.mRequest = request;
1014
1015     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1016     if (NS_FAILED(rv)) {
1017       return rv;
1018     }
1019
1020     if (IsOkToProcessNetworkData()) {
1021       nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1022       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1023       mProcessingNetworkData = true;
1024       if (sinkDeathGrip) {
1025         sinkDeathGrip->WillParse();
1026       }
1027       rv = ResumeParse();
1028       mProcessingNetworkData = false;
1029     }
1030   } else {
1031     rv = NS_ERROR_UNEXPECTED;
1032   }
1033
1034   return rv;
1035 }
1036
1037 /**
1038  *  This is called by the networking library once the last block of data
1039  *  has been collected from the net.
1040  */
1041 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1042   if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
1043     // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
1044     // to avoid introducing unintentional changes to behavior.
1045     return mInternalState;
1046   }
1047
1048   nsresult rv = NS_OK;
1049
1050   if (mParserContext->mRequest == request) {
1051     mParserContext->mStreamListenerState = eOnStop;
1052     mParserContext->mScanner.SetIncremental(false);
1053   }
1054
1055   mStreamStatus = status;
1056
1057   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1058     mProcessingNetworkData = true;
1059     if (mSink) {
1060       mSink->WillParse();
1061     }
1062     rv = ResumeParse(true, true);
1063     mProcessingNetworkData = false;
1064   }
1065
1066   // If the parser isn't enabled, we don't finish parsing till
1067   // it is reenabled.
1068
1069   return rv;
1070 }
1071
1072 /**
1073  * Get this as nsIStreamListener
1074  */
1075 nsIStreamListener* nsParser::GetStreamListener() { return this; }