parser/htmlparser/nsParser.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set sw=2 ts=2 et tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsAtom.h"
   8 #include "nsParser.h"
   9 #include "nsString.h"
  10 #include "nsCRT.h"
  11 #include "nsScanner.h"
  12 #include "plstr.h"
  13 #include "nsIChannel.h"
  14 #include "nsIInputStream.h"
  15 #include "CNavDTD.h"
  16 #include "prenv.h"
  17 #include "prlock.h"
  18 #include "prcvar.h"
  19 #include "nsParserCIID.h"
  20 #include "nsReadableUtils.h"
  21 #include "nsCOMPtr.h"
  22 #include "nsExpatDriver.h"
  23 #include "nsIFragmentContentSink.h"
  24 #include "nsStreamUtils.h"
  25 #include "nsHTMLTokenizer.h"
  26 #include "nsDataHashtable.h"
  27 #include "nsXPCOMCIDInternal.h"
  28 #include "nsMimeTypes.h"
  29 #include "mozilla/CondVar.h"
  30 #include "mozilla/Mutex.h"
  31 #include "nsCharsetSource.h"
  32 #include "nsThreadUtils.h"
  33 #include "nsIHTMLContentSink.h"
  34
  35 #include "mozilla/BinarySearch.h"
  36 #include "mozilla/dom/ScriptLoader.h"
  37 #include "mozilla/Encoding.h"
  38
  39 using namespace mozilla;
  40
  41 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
  42 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
  43 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
  44 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
  45
  46 //-------------- Begin ParseContinue Event Definition ------------------------
  47 /*
  48 The parser can be explicitly interrupted by passing a return value of
  49 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
  50 the parser to stop processing and allow the application to return to the event
  51 loop. The data which was left at the time of interruption will be processed
  52 the next time OnDataAvailable is called. If the parser has received its final
  53 chunk of data then OnDataAvailable will no longer be called by the networking
  54 module, so the parser will schedule a nsParserContinueEvent which will call
  55 the parser to process the remaining data after returning to the event loop.
  56 If the parser is interrupted while processing the remaining data it will
  57 schedule another ParseContinueEvent. The processing of data followed by
  58 scheduling of the continue events will proceed until either:
  59
  60   1) All of the remaining data can be processed without interrupting
  61   2) The parser has been cancelled.
  62
  63
  64 This capability is currently used in CNavDTD and nsHTMLContentSink. The
  65 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
  66 processed and when each token is processed. The nsHTML content sink records
  67 the time when the chunk has started processing and will return
  68 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
  69 threshold called max tokenizing processing time. This allows the content sink
  70 to limit how much data is processed in a single chunk which in turn gates how
  71 much time is spent away from the event loop. Processing smaller chunks of data
  72 also reduces the time spent in subsequent reflows.
  73
  74 This capability is most apparent when loading large documents. If the maximum
  75 token processing time is set small enough the application will remain
  76 responsive during document load.
  77
  78 A side-effect of this capability is that document load is not complete when
  79 the last chunk of data is passed to OnDataAvailable since  the parser may have
  80 been interrupted when the last chunk of data arrived. The document is complete
  81 when all of the document has been tokenized and there aren't any pending
  82 nsParserContinueEvents. This can cause problems if the application assumes
  83 that it can monitor the load requests to determine when the document load has
  84 been completed. This is what happens in Mozilla. The document is considered
  85 completely loaded when all of the load requests have been satisfied. To delay
  86 the document load until all of the parsing has been completed the
  87 nsHTMLContentSink adds a dummy parser load request which is not removed until
  88 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
  89 DidBuildModel until the final chunk of data has been passed to the parser
  90 through the OnDataAvailable and there aren't any pending
  91 nsParserContineEvents.
  92
  93 Currently the parser is ignores requests to be interrupted during the
  94 processing of script.  This is because a document.write followed by JavaScript
  95 calls to manipulate the DOM may fail if the parser was interrupted during the
  96 document.write.
  97
  98 For more details @see bugzilla bug 76722
  99 */
 100
 101 class nsParserContinueEvent : public Runnable {
 102  public:
 103   RefPtr<nsParser> mParser;
 104
 105   explicit nsParserContinueEvent(nsParser* aParser)
 106       : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
 107
 108   NS_IMETHOD Run() override {
 109     mParser->HandleParserContinueEvent(this);
 110     return NS_OK;
 111   }
 112 };
 113
 114 //-------------- End ParseContinue Event Definition ------------------------
 115
 116 /**
 117  *  default constructor
 118  */
 119 nsParser::nsParser()
 120     : mParserContext(nullptr), mCharset(WINDOWS_1252_ENCODING) {
 121   Initialize(true);
 122 }
 123
 124 nsParser::~nsParser() { Cleanup(); }
 125
 126 void nsParser::Initialize(bool aConstructor) {
 127   if (aConstructor) {
 128     // Raw pointer
 129     mParserContext = 0;
 130   } else {
 131     // nsCOMPtrs
 132     mObserver = nullptr;
 133     mUnusedInput.Truncate();
 134   }
 135
 136   mContinueEvent = nullptr;
 137   mCharsetSource = kCharsetUninitialized;
 138   mCharset = WINDOWS_1252_ENCODING;
 139   mInternalState = NS_OK;
 140   mStreamStatus = NS_OK;
 141   mCommand = eViewNormal;
 142   mBlocked = 0;
 143   mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE;
 144
 145   mProcessingNetworkData = false;
 146   mIsAboutBlank = false;
 147 }
 148
 149 void nsParser::Cleanup() {
 150 #ifdef DEBUG
 151   if (mParserContext && mParserContext->mPrevContext) {
 152     NS_WARNING("Extra parser contexts still on the parser stack");
 153   }
 154 #endif
 155
 156   while (mParserContext) {
 157     CParserContext* pc = mParserContext->mPrevContext;
 158     delete mParserContext;
 159     mParserContext = pc;
 160   }
 161
 162   // It should not be possible for this flag to be set when we are getting
 163   // destroyed since this flag implies a pending nsParserContinueEvent, which
 164   // has an owning reference to |this|.
 165   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
 166 }
 167
 168 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
 169
 170 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
 171   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
 172   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
 173   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
 174   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
 175 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
 176
 177 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
 178   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
 179   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
 180   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
 181   CParserContext* pc = tmp->mParserContext;
 182   while (pc) {
 183     cb.NoteXPCOMChild(pc->mTokenizer);
 184     pc = pc->mPrevContext;
 185   }
 186 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
 187
 188 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
 189 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
 190 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
 191   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
 192   NS_INTERFACE_MAP_ENTRY(nsIParser)
 193   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
 194   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
 195   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
 196 NS_INTERFACE_MAP_END
 197
 198 // The parser continue event is posted only if
 199 // all of the data to parse has been passed to ::OnDataAvailable
 200 // and the parser has been interrupted by the content sink
 201 // because the processing of tokens took too long.
 202
 203 nsresult nsParser::PostContinueEvent() {
 204   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
 205     // If this flag isn't set, then there shouldn't be a live continue event!
 206     NS_ASSERTION(!mContinueEvent, "bad");
 207
 208     // This creates a reference cycle between this and the event that is
 209     // broken when the event fires.
 210     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
 211     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
 212       NS_WARNING("failed to dispatch parser continuation event");
 213     } else {
 214       mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 215       mContinueEvent = event;
 216     }
 217   }
 218   return NS_OK;
 219 }
 220
 221 NS_IMETHODIMP_(void)
 222 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
 223
 224 /**
 225  *  Call this method once you've created a parser, and want to instruct it
 226  *  about the command which caused the parser to be constructed. For example,
 227  *  this allows us to select a DTD which can do, say, view-source.
 228  *
 229  *  @param   aCommand the command string to set
 230  */
 231 NS_IMETHODIMP_(void)
 232 nsParser::SetCommand(const char* aCommand) {
 233   mCommandStr.Assign(aCommand);
 234   if (mCommandStr.EqualsLiteral("view-source")) {
 235     mCommand = eViewSource;
 236   } else if (mCommandStr.EqualsLiteral("view-fragment")) {
 237     mCommand = eViewFragment;
 238   } else {
 239     mCommand = eViewNormal;
 240   }
 241 }
 242
 243 /**
 244  *  Call this method once you've created a parser, and want to instruct it
 245  *  about the command which caused the parser to be constructed. For example,
 246  *  this allows us to select a DTD which can do, say, view-source.
 247  *
 248  *  @param   aParserCommand the command to set
 249  */
 250 NS_IMETHODIMP_(void)
 251 nsParser::SetCommand(eParserCommands aParserCommand) {
 252   mCommand = aParserCommand;
 253 }
 254
 255 /**
 256  *  Call this method once you've created a parser, and want to instruct it
 257  *  about what charset to load
 258  *
 259  *  @param   aCharset- the charset of a document
 260  *  @param   aCharsetSource- the source of the charset
 261  */
 262 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
 263                                   int32_t aCharsetSource) {
 264   mCharset = aCharset;
 265   mCharsetSource = aCharsetSource;
 266   if (mParserContext && mParserContext->mScanner) {
 267     mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
 268   }
 269 }
 270
 271 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
 272   if (mSink) {
 273     mSink->SetDocumentCharset(aCharset);
 274   }
 275 }
 276
 277 /**
 278  *  This method gets called in order to set the content
 279  *  sink for this parser to dump nodes to.
 280  *
 281  *  @param   nsIContentSink interface for node receiver
 282  */
 283 NS_IMETHODIMP_(void)
 284 nsParser::SetContentSink(nsIContentSink* aSink) {
 285   MOZ_ASSERT(aSink, "sink cannot be null!");
 286   mSink = aSink;
 287
 288   if (mSink) {
 289     mSink->SetParser(this);
 290     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
 291     if (htmlSink) {
 292       mIsAboutBlank = true;
 293     }
 294   }
 295 }
 296
 297 /**
 298  * retrieve the sink set into the parser
 299  * @return  current sink
 300  */
 301 NS_IMETHODIMP_(nsIContentSink*)
 302 nsParser::GetContentSink() { return mSink; }
 303
 304 static nsIDTD* FindSuitableDTD(CParserContext& aParserContext) {
 305   // We always find a DTD.
 306   aParserContext.mAutoDetectStatus = ePrimaryDetect;
 307
 308   // Quick check for view source.
 309   MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
 310              "The old parser is not supposed to be used for View Source "
 311              "anymore.");
 312
 313   // Now see if we're parsing HTML (which, as far as we're concerned, simply
 314   // means "not XML").
 315   if (aParserContext.mDocType != eXML) {
 316     return new CNavDTD();
 317   }
 318
 319   // If we're here, then we'd better be parsing XML.
 320   NS_ASSERTION(aParserContext.mDocType == eXML,
 321                "What are you trying to send me, here?");
 322   return new nsExpatDriver();
 323 }
 324
 325 NS_IMETHODIMP
 326 nsParser::CancelParsingEvents() {
 327   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
 328     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
 329     // Revoke the pending continue parsing event
 330     mContinueEvent = nullptr;
 331     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 332   }
 333   return NS_OK;
 334 }
 335
 336 ////////////////////////////////////////////////////////////////////////
 337
 338 /**
 339  * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
 340  * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
 341  * (which could be success or failure).
 342  *
 343  * To understand the motivation for this construct, consider these example
 344  * methods:
 345  *
 346  *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
 347  *     nsresult rv = NS_OK;
 348  *     ...
 349  *     return obj->DoThatThing();
 350  *     NS_ENSURE_SUCCESS(rv, rv);
 351  *     ...
 352  *     return rv;
 353  *   }
 354  *
 355  *   void nsCaller::MakeThingsHappen() {
 356  *     return mSomething->DoThatThing(mWhatever);
 357  *   }
 358  *
 359  * Suppose, for whatever reason*, we want to shift responsibility for calling
 360  * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
 361  * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
 362  *
 363  *   nsresult nsSomething::DoThatThing() {
 364  *     nsresult rv = NS_OK;
 365  *     ...
 366  *     ...
 367  *     return rv;
 368  *   }
 369  *
 370  *   void nsCaller::MakeThingsHappen() {
 371  *     nsresult rv;
 372  *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
 373  *                              mWhatever->DoThatThing(),
 374  *                              rv);
 375  *     return rv;
 376  *   }
 377  *
 378  * *Possible reasons include: nsCaller doesn't want to give mSomething access
 379  * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
 380  * be called regardless of how nsSomething::DoThatThing behaves, &c.
 381  */
 382 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) \
 383   {                                                \
 384     nsresult RV##__temp = EXPR1;                   \
 385     RV = EXPR2;                                    \
 386     if (NS_FAILED(RV)) {                           \
 387       RV = RV##__temp;                             \
 388     }                                              \
 389   }
 390
 391 /**
 392  * This gets called just prior to the model actually
 393  * being constructed. It's important to make this the
 394  * last thing that happens right before parsing, so we
 395  * can delay until the last moment the resolution of
 396  * which DTD to use (unless of course we're assigned one).
 397  */
 398 nsresult nsParser::WillBuildModel(nsString& aFilename) {
 399   if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
 400
 401   if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
 402
 403   if (eDTDMode_unknown == mParserContext->mDTDMode ||
 404       eDTDMode_autodetect == mParserContext->mDTDMode) {
 405     if (mIsAboutBlank) {
 406       mParserContext->mDTDMode = eDTDMode_quirks;
 407       mParserContext->mDocType = eHTML_Quirks;
 408     } else {
 409       mParserContext->mDTDMode = eDTDMode_full_standards;
 410       mParserContext->mDocType = eXML;
 411     }
 412   }  // else XML fragment with nested parser context
 413
 414   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
 415                "Clobbering DTD for non-root parser context!");
 416   mDTD = FindSuitableDTD(*mParserContext);
 417   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
 418
 419   nsITokenizer* tokenizer;
 420   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
 421   NS_ENSURE_SUCCESS(rv, rv);
 422
 423   rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
 424   nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
 425   // nsIDTD::WillBuildModel used to be responsible for calling
 426   // nsIContentSink::WillBuildModel, but that obligation isn't expressible
 427   // in the nsIDTD interface itself, so it's sounder and simpler to give that
 428   // responsibility back to the parser. The former behavior of the DTD was to
 429   // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
 430   // failure we should use sinkResult instead of rv, to preserve the old error
 431   // handling behavior of the DTD:
 432   return NS_FAILED(sinkResult) ? sinkResult : rv;
 433 }
 434
 435 /**
 436  * This gets called when the parser is done with its input.
 437  * Note that the parser may have been called recursively, so we
 438  * have to check for a prev. context before closing out the DTD/sink.
 439  */
 440 nsresult nsParser::DidBuildModel(nsresult anErrorCode) {
 441   nsresult result = anErrorCode;
 442
 443   if (IsComplete()) {
 444     if (mParserContext && !mParserContext->mPrevContext) {
 445       // Let sink know if we're about to end load because we've been terminated.
 446       // In that case we don't want it to run deferred scripts.
 447       bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
 448       if (mDTD && mSink) {
 449         nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
 450                  sinkResult = mSink->DidBuildModel(terminated);
 451         // nsIDTD::DidBuildModel used to be responsible for calling
 452         // nsIContentSink::DidBuildModel, but that obligation isn't expressible
 453         // in the nsIDTD interface itself, so it's sounder and simpler to give
 454         // that responsibility back to the parser. The former behavior of the
 455         // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
 456         // sink returns failure we should use sinkResult instead of dtdResult,
 457         // to preserve the old error handling behavior of the DTD:
 458         result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
 459       }
 460
 461       // Ref. to bug 61462.
 462       mParserContext->mRequest = nullptr;
 463     }
 464   }
 465
 466   return result;
 467 }
 468
 469 /**
 470  * This method adds a new parser context to the list,
 471  * pushing the current one to the next position.
 472  *
 473  * @param   ptr to new context
 474  */
 475 void nsParser::PushContext(CParserContext& aContext) {
 476   NS_ASSERTION(aContext.mPrevContext == mParserContext,
 477                "Trying to push a context whose previous context differs from "
 478                "the current parser context.");
 479   mParserContext = &aContext;
 480 }
 481
 482 /**
 483  * This method pops the topmost context off the stack,
 484  * returning it to the user. The next context  (if any)
 485  * becomes the current context.
 486  * @update      gess7/22/98
 487  * @return  prev. context
 488  */
 489 CParserContext* nsParser::PopContext() {
 490   CParserContext* oldContext = mParserContext;
 491   if (oldContext) {
 492     mParserContext = oldContext->mPrevContext;
 493     if (mParserContext) {
 494       // If the old context was blocked, propagate the blocked state
 495       // back to the new one. Also, propagate the stream listener state
 496       // but don't override onStop state to guarantee the call to
 497       // DidBuildModel().
 498       if (mParserContext->mStreamListenerState != eOnStop) {
 499         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
 500       }
 501     }
 502   }
 503   return oldContext;
 504 }
 505
 506 /**
 507  *  Call this when you want control whether or not the parser will parse
 508  *  and tokenize input (TRUE), or whether it just caches input to be
 509  *  parsed later (FALSE).
 510  *
 511  *  @param   aState determines whether we parse/tokenize or just cache.
 512  *  @return  current state
 513  */
 514 void nsParser::SetUnusedInput(nsString& aBuffer) { mUnusedInput = aBuffer; }
 515
 516 /**
 517  *  Call this when you want to *force* the parser to terminate the
 518  *  parsing process altogether. This is binary -- so once you terminate
 519  *  you can't resume without restarting altogether.
 520  */
 521 NS_IMETHODIMP
 522 nsParser::Terminate(void) {
 523   // We should only call DidBuildModel once, so don't do anything if this is
 524   // the second time that Terminate has been called.
 525   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 526     return NS_OK;
 527   }
 528
 529   nsresult result = NS_OK;
 530   // XXX - [ until we figure out a way to break parser-sink circularity ]
 531   // Hack - Hold a reference until we are completely done...
 532   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 533   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
 534
 535   // CancelParsingEvents must be called to avoid leaking the nsParser object
 536   // @see bug 108049
 537   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
 538   // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
 539   // The IsComplete() call inside of DidBuildModel looks at the
 540   // pendingContinueEvents flag.
 541   CancelParsingEvents();
 542
 543   // If we got interrupted in the middle of a document.write, then we might
 544   // have more than one parser context on our parsercontext stack. This has
 545   // the effect of making DidBuildModel a no-op, meaning that we never call
 546   // our sink's DidBuildModel and break the reference cycle, causing a leak.
 547   // Since we're getting terminated, we manually clean up our context stack.
 548   while (mParserContext && mParserContext->mPrevContext) {
 549     CParserContext* prev = mParserContext->mPrevContext;
 550     delete mParserContext;
 551     mParserContext = prev;
 552   }
 553
 554   if (mDTD) {
 555     mDTD->Terminate();
 556     DidBuildModel(result);
 557   } else if (mSink) {
 558     // We have no parser context or no DTD yet (so we got terminated before we
 559     // got any data).  Manually break the reference cycle with the sink.
 560     result = mSink->DidBuildModel(true);
 561     NS_ENSURE_SUCCESS(result, result);
 562   }
 563
 564   return NS_OK;
 565 }
 566
 567 NS_IMETHODIMP
 568 nsParser::ContinueInterruptedParsing() {
 569   // If there are scripts executing, then the content sink is jumping the gun
 570   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
 571   // later, see bug 460706.
 572   if (!IsOkToProcessNetworkData()) {
 573     return NS_OK;
 574   }
 575
 576   // If the stream has already finished, there's a good chance
 577   // that we might start closing things down when the parser
 578   // is reenabled. To make sure that we're not deleted across
 579   // the reenabling process, hold a reference to ourselves.
 580   nsresult result = NS_OK;
 581   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 582   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
 583
 584 #ifdef DEBUG
 585   if (mBlocked) {
 586     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
 587   }
 588 #endif
 589
 590   bool isFinalChunk =
 591       mParserContext && mParserContext->mStreamListenerState == eOnStop;
 592
 593   mProcessingNetworkData = true;
 594   if (sinkDeathGrip) {
 595     sinkDeathGrip->WillParse();
 596   }
 597   result = ResumeParse(true, isFinalChunk);  // Ref. bug 57999
 598   mProcessingNetworkData = false;
 599
 600   if (result != NS_OK) {
 601     result = mInternalState;
 602   }
 603
 604   return result;
 605 }
 606
 607 /**
 608  *  Stops parsing temporarily. That is, it will prevent the
 609  *  parser from building up content model while scripts
 610  *  are being loaded (either an external script from a web
 611  *  page, or any number of extension content scripts).
 612  */
 613 NS_IMETHODIMP_(void)
 614 nsParser::BlockParser() { mBlocked++; }
 615
 616 /**
 617  *  Open up the parser for tokenization, building up content
 618  *  model..etc. However, this method does not resume parsing
 619  *  automatically. It's the callers' responsibility to restart
 620  *  the parsing engine.
 621  */
 622 NS_IMETHODIMP_(void)
 623 nsParser::UnblockParser() {
 624   MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
 625   if (MOZ_LIKELY(mBlocked > 0)) {
 626     mBlocked--;
 627   }
 628 }
 629
 630 NS_IMETHODIMP_(void)
 631 nsParser::ContinueInterruptedParsingAsync() {
 632   MOZ_ASSERT(mSink);
 633   if (MOZ_LIKELY(mSink)) {
 634     mSink->ContinueInterruptedParsingAsync();
 635   }
 636 }
 637
 638 /**
 639  * Call this to query whether the parser is enabled or not.
 640  */
 641 NS_IMETHODIMP_(bool)
 642 nsParser::IsParserEnabled() { return !mBlocked; }
 643
 644 /**
 645  * Call this to query whether the parser thinks it's done with parsing.
 646  */
 647 NS_IMETHODIMP_(bool)
 648 nsParser::IsComplete() {
 649   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
 650 }
 651
 652 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
 653   // Ignore any revoked continue events...
 654   if (mContinueEvent != ev) return;
 655
 656   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
 657   mContinueEvent = nullptr;
 658
 659   NS_ASSERTION(IsOkToProcessNetworkData(),
 660                "Interrupted in the middle of a script?");
 661   ContinueInterruptedParsing();
 662 }
 663
 664 bool nsParser::IsInsertionPointDefined() { return false; }
 665
 666 void nsParser::IncrementScriptNestingLevel() {}
 667
 668 void nsParser::DecrementScriptNestingLevel() {}
 669
 670 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
 671
 672 void nsParser::MarkAsNotScriptCreated(const char* aCommand) {}
 673
 674 bool nsParser::IsScriptCreated() { return false; }
 675
 676 /**
 677  *  This is the main controlling routine in the parsing process.
 678  *  Note that it may get called multiple times for the same scanner,
 679  *  since this is a pushed based system, and all the tokens may
 680  *  not have been consumed by the scanner during a given invocation
 681  *  of this method.
 682  */
 683 NS_IMETHODIMP
 684 nsParser::Parse(nsIURI* aURL, nsIRequestObserver* aListener, void* aKey,
 685                 nsDTDMode aMode) {
 686   MOZ_ASSERT(aURL, "Error: Null URL given");
 687
 688   nsresult result = NS_ERROR_HTMLPARSER_BADURL;
 689   mObserver = aListener;
 690
 691   if (aURL) {
 692     nsAutoCString spec;
 693     nsresult rv = aURL->GetSpec(spec);
 694     if (rv != NS_OK) {
 695       return rv;
 696     }
 697     NS_ConvertUTF8toUTF16 theName(spec);
 698
 699     nsScanner* theScanner = new nsScanner(theName, false);
 700     CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
 701                                             mCommand, aListener);
 702     if (pc && theScanner) {
 703       pc->mMultipart = true;
 704       pc->mContextType = CParserContext::eCTURL;
 705       pc->mDTDMode = aMode;
 706       PushContext(*pc);
 707
 708       result = NS_OK;
 709     } else {
 710       result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
 711     }
 712   }
 713   return result;
 714 }
 715
 716 /**
 717  * Used by XML fragment parsing below.
 718  *
 719  * @param   aSourceBuffer contains a string-full of real content
 720  */
 721 nsresult nsParser::Parse(const nsAString& aSourceBuffer, void* aKey,
 722                          bool aLastCall) {
 723   nsresult result = NS_OK;
 724
 725   // Don't bother if we're never going to parse this.
 726   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
 727     return result;
 728   }
 729
 730   if (!aLastCall && aSourceBuffer.IsEmpty()) {
 731     // Nothing is being passed to the parser so return
 732     // immediately. mUnusedInput will get processed when
 733     // some data is actually passed in.
 734     // But if this is the last call, make sure to finish up
 735     // stuff correctly.
 736     return result;
 737   }
 738
 739   // Maintain a reference to ourselves so we don't go away
 740   // till we're completely done.
 741   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
 742
 743   if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
 744     // Note: The following code will always find the parser context associated
 745     // with the given key, even if that context has been suspended (e.g., for
 746     // another document.write call). This doesn't appear to be exactly what IE
 747     // does in the case where this happens, but this makes more sense.
 748     CParserContext* pc = mParserContext;
 749     while (pc && pc->mKey != aKey) {
 750       pc = pc->mPrevContext;
 751     }
 752
 753     if (!pc) {
 754       // Only make a new context if we don't have one, OR if we do, but has a
 755       // different context key.
 756       nsScanner* theScanner = new nsScanner(mUnusedInput);
 757       NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
 758
 759       eAutoDetectResult theStatus = eUnknownDetect;
 760
 761       if (mParserContext &&
 762           mParserContext->mMimeType.EqualsLiteral("application/xml")) {
 763         // Ref. Bug 90379
 764         NS_ASSERTION(mDTD, "How come the DTD is null?");
 765
 766         if (mParserContext) {
 767           theStatus = mParserContext->mAutoDetectStatus;
 768           // Added this to fix bug 32022.
 769         }
 770       }
 771
 772       pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, 0,
 773                               theStatus, aLastCall);
 774       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
 775
 776       PushContext(*pc);
 777
 778       pc->mMultipart = !aLastCall;  // By default
 779       if (pc->mPrevContext) {
 780         pc->mMultipart |= pc->mPrevContext->mMultipart;
 781       }
 782
 783       // Start fix bug 40143
 784       if (pc->mMultipart) {
 785         pc->mStreamListenerState = eOnDataAvail;
 786         if (pc->mScanner) {
 787           pc->mScanner->SetIncremental(true);
 788         }
 789       } else {
 790         pc->mStreamListenerState = eOnStop;
 791         if (pc->mScanner) {
 792           pc->mScanner->SetIncremental(false);
 793         }
 794       }
 795       // end fix for 40143
 796
 797       pc->mContextType = CParserContext::eCTString;
 798       pc->SetMimeType("application/xml"_ns);
 799       pc->mDTDMode = eDTDMode_full_standards;
 800
 801       mUnusedInput.Truncate();
 802
 803       pc->mScanner->Append(aSourceBuffer);
 804       // Do not interrupt document.write() - bug 95487
 805       result = ResumeParse(false, false, false);
 806     } else {
 807       pc->mScanner->Append(aSourceBuffer);
 808       if (!pc->mPrevContext) {
 809         // Set stream listener state to eOnStop, on the final context - Fix
 810         // 68160, to guarantee DidBuildModel() call - Fix 36148
 811         if (aLastCall) {
 812           pc->mStreamListenerState = eOnStop;
 813           pc->mScanner->SetIncremental(false);
 814         }
 815
 816         if (pc == mParserContext) {
 817           // If pc is not mParserContext, then this call to ResumeParse would
 818           // do the wrong thing and try to continue parsing using
 819           // mParserContext. We need to wait to actually resume parsing on pc.
 820           ResumeParse(false, false, false);
 821         }
 822       }
 823     }
 824   }
 825
 826   return result;
 827 }
 828
 829 NS_IMETHODIMP
 830 nsParser::ParseFragment(const nsAString& aSourceBuffer,
 831                         nsTArray<nsString>& aTagStack) {
 832   nsresult result = NS_OK;
 833   nsAutoString theContext;
 834   uint32_t theCount = aTagStack.Length();
 835   uint32_t theIndex = 0;
 836
 837   // Disable observers for fragments
 838   mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
 839
 840   for (theIndex = 0; theIndex < theCount; theIndex++) {
 841     theContext.Append('<');
 842     theContext.Append(aTagStack[theCount - theIndex - 1]);
 843     theContext.Append('>');
 844   }
 845
 846   if (theCount == 0) {
 847     // Ensure that the buffer is not empty. Because none of the DTDs care
 848     // about leading whitespace, this doesn't change the result.
 849     theContext.Assign(' ');
 850   }
 851
 852   // First, parse the context to build up the DTD's tag stack. Note that we
 853   // pass false for the aLastCall parameter.
 854   result = Parse(theContext, (void*)&theContext, false);
 855   if (NS_FAILED(result)) {
 856     mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
 857     return result;
 858   }
 859
 860   if (!mSink) {
 861     // Parse must have failed in the XML case and so the sink was killed.
 862     return NS_ERROR_HTMLPARSER_STOPPARSING;
 863   }
 864
 865   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
 866   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
 867
 868   fragSink->WillBuildContent();
 869   // Now, parse the actual content. Note that this is the last call
 870   // for HTML content, but for XML, we will want to build and parse
 871   // the end tags.  However, if tagStack is empty, it's the last call
 872   // for XML as well.
 873   if (theCount == 0) {
 874     result = Parse(aSourceBuffer, &theContext, true);
 875     fragSink->DidBuildContent();
 876   } else {
 877     // Add an end tag chunk, so expat will read the whole source buffer,
 878     // and not worry about ']]' etc.
 879     result = Parse(aSourceBuffer + u"</"_ns, &theContext, false);
 880     fragSink->DidBuildContent();
 881
 882     if (NS_SUCCEEDED(result)) {
 883       nsAutoString endContext;
 884       for (theIndex = 0; theIndex < theCount; theIndex++) {
 885         // we already added an end tag chunk above
 886         if (theIndex > 0) {
 887           endContext.AppendLiteral("</");
 888         }
 889
 890         nsString& thisTag = aTagStack[theIndex];
 891         // was there an xmlns=?
 892         int32_t endOfTag = thisTag.FindChar(char16_t(' '));
 893         if (endOfTag == -1) {
 894           endContext.Append(thisTag);
 895         } else {
 896           endContext.Append(Substring(thisTag, 0, endOfTag));
 897         }
 898
 899         endContext.Append('>');
 900       }
 901
 902       result = Parse(endContext, &theContext, true);
 903     }
 904   }
 905
 906   mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
 907
 908   return result;
 909 }
 910
 911 /**
 912  *  This routine is called to cause the parser to continue parsing its
 913  *  underlying stream.  This call allows the parse process to happen in
 914  *  chunks, such as when the content is push based, and we need to parse in
 915  *  pieces.
 916  *
 917  *  An interesting change in how the parser gets used has led us to add extra
 918  *  processing to this method.  The case occurs when the parser is blocked in
 919  *  one context, and gets a parse(string) call in another context.  In this
 920  *  case, the parserContexts are linked. No problem.
 921  *
 922  *  The problem is that Parse(string) assumes that it can proceed unabated,
 923  *  but if the parser is already blocked that assumption is false. So we
 924  *  needed to add a mechanism here to allow the parser to continue to process
 925  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
 926  *  out of contexts.
 927  *
 928  *
 929  *  @param   allowItertion : set to true if non-script resumption is requested
 930  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
 931  *  @return  error code -- 0 if ok, non-zero if error.
 932  */
 933 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
 934                                bool aCanInterrupt) {
 935   nsresult result = NS_OK;
 936
 937   if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 938     result = WillBuildModel(mParserContext->mScanner->GetFilename());
 939     if (NS_FAILED(result)) {
 940       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
 941       return result;
 942     }
 943
 944     if (mDTD) {
 945       mSink->WillResume();
 946       bool theIterationIsOk = true;
 947
 948       while (result == NS_OK && theIterationIsOk) {
 949         if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
 950           // -- Ref: Bug# 22485 --
 951           // Insert the unused input into the source buffer
 952           // as if it was read from the input stream.
 953           // Adding UngetReadable() per vidur!!
 954           mParserContext->mScanner->UngetReadable(mUnusedInput);
 955           mUnusedInput.Truncate(0);
 956         }
 957
 958         // Only allow parsing to be interrupted in the subsequent call to
 959         // build model.
 960         nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
 961                                           ? Tokenize(aIsFinalChunk)
 962                                           : NS_OK;
 963         result = BuildModel();
 964
 965         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
 966           PostContinueEvent();
 967         }
 968
 969         theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
 970                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
 971
 972         // Make sure not to stop parsing too early. Therefore, before shutting
 973         // down the parser, it's important to check whether the input buffer
 974         // has been scanned to completion (theTokenizerResult should be kEOF).
 975         // kEOF -> End of buffer.
 976
 977         // If we're told to block the parser, we disable all further parsing
 978         // (and cache any data coming in) until the parser is re-enabled.
 979         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
 980           mSink->WillInterrupt();
 981           if (!mBlocked) {
 982             // If we were blocked by a recursive invocation, don't re-block.
 983             BlockParser();
 984           }
 985           return NS_OK;
 986         }
 987         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
 988           // Note: Parser Terminate() calls DidBuildModel.
 989           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
 990             DidBuildModel(mStreamStatus);
 991             mInternalState = result;
 992           }
 993
 994           return NS_OK;
 995         }
 996         if ((NS_OK == result &&
 997              theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
 998             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
 999           bool theContextIsStringBased =
1000               CParserContext::eCTString == mParserContext->mContextType;
1001
1002           if (mParserContext->mStreamListenerState == eOnStop ||
1003               !mParserContext->mMultipart || theContextIsStringBased) {
1004             if (!mParserContext->mPrevContext) {
1005               if (mParserContext->mStreamListenerState == eOnStop) {
1006                 DidBuildModel(mStreamStatus);
1007                 return NS_OK;
1008               }
1009             } else {
1010               CParserContext* theContext = PopContext();
1011               if (theContext) {
1012                 theIterationIsOk = allowIteration && theContextIsStringBased;
1013                 if (theContext->mCopyUnused) {
1014                   if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
1015                     mInternalState = NS_ERROR_OUT_OF_MEMORY;
1016                   }
1017                 }
1018
1019                 delete theContext;
1020               }
1021
1022               result = mInternalState;
1023               aIsFinalChunk = mParserContext &&
1024                               mParserContext->mStreamListenerState == eOnStop;
1025               // ...then intentionally fall through to mSink->WillInterrupt()...
1026             }
1027           }
1028         }
1029
1030         if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
1031             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1032           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1033           mSink->WillInterrupt();
1034         }
1035       }
1036     } else {
1037       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
1038     }
1039   }
1040
1041   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1042 }
1043
1044 /**
1045  *  This is where we loop over the tokens created in the
1046  *  tokenization phase, and try to make sense out of them.
1047  */
1048 nsresult nsParser::BuildModel() {
1049   nsITokenizer* theTokenizer = nullptr;
1050
1051   nsresult result = NS_OK;
1052   if (mParserContext) {
1053     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1054   }
1055
1056   if (NS_SUCCEEDED(result)) {
1057     if (mDTD) {
1058       result = mDTD->BuildModel(theTokenizer, mSink);
1059     }
1060   } else {
1061     mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1062   }
1063   return result;
1064 }
1065
1066 /*******************************************************************
1067   These methods are used to talk to the netlib system...
1068  *******************************************************************/
1069
1070 nsresult nsParser::OnStartRequest(nsIRequest* request) {
1071   MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
1072              "Parser's nsIStreamListener API was not setup "
1073              "correctly in constructor.");
1074
1075   if (mObserver) {
1076     mObserver->OnStartRequest(request);
1077   }
1078   mParserContext->mStreamListenerState = eOnStart;
1079   mParserContext->mAutoDetectStatus = eUnknownDetect;
1080   mParserContext->mRequest = request;
1081
1082   NS_ASSERTION(!mParserContext->mPrevContext,
1083                "Clobbering DTD for non-root parser context!");
1084   mDTD = nullptr;
1085
1086   nsresult rv;
1087   nsAutoCString contentType;
1088   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
1089   if (channel) {
1090     rv = channel->GetContentType(contentType);
1091     if (NS_SUCCEEDED(rv)) {
1092       mParserContext->SetMimeType(contentType);
1093     }
1094   }
1095
1096   rv = NS_OK;
1097
1098   return rv;
1099 }
1100
1101 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
1102                                              int32_t aLen,
1103                                              nsCString& oCharset) {
1104   // This code is rather pointless to have. Might as well reuse expat as
1105   // seen in nsHtml5StreamParser. -- hsivonen
1106   oCharset.Truncate();
1107   if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
1108       ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
1109     int32_t i;
1110     bool versionFound = false, encodingFound = false;
1111     for (i = 6; i < aLen && !encodingFound; ++i) {
1112       // end of XML declaration?
1113       if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
1114           (((char*)aBytes)[i + 1] == '>')) {
1115         break;
1116       }
1117       // Version is required.
1118       if (!versionFound) {
1119         // Want to avoid string comparisons, hence looking for 'n'
1120         // and only if found check the string leading to it. Not
1121         // foolproof, but fast.
1122         // The shortest string allowed before this is  (strlen==13):
1123         // <?xml version
1124         if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
1125             (0 == PL_strncmp("versio", (char*)(aBytes + i - 6), 6))) {
1126           // Fast forward through version
1127           char q = 0;
1128           for (++i; i < aLen; ++i) {
1129             char qi = ((char*)aBytes)[i];
1130             if (qi == '\'' || qi == '"') {
1131               if (q && q == qi) {
1132                 //  ending quote
1133                 versionFound = true;
1134                 break;
1135               } else {
1136                 // Starting quote
1137                 q = qi;
1138               }
1139             }
1140           }
1141         }
1142       } else {
1143         // encoding must follow version
1144         // Want to avoid string comparisons, hence looking for 'g'
1145         // and only if found check the string leading to it. Not
1146         // foolproof, but fast.
1147         // The shortest allowed string before this (strlen==26):
1148         // <?xml version="1" encoding
1149         if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
1150             (0 == PL_strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
1151           int32_t encStart = 0;
1152           char q = 0;
1153           for (++i; i < aLen; ++i) {
1154             char qi = ((char*)aBytes)[i];
1155             if (qi == '\'' || qi == '"') {
1156               if (q && q == qi) {
1157                 int32_t count = i - encStart;
1158                 // encoding value is invalid if it is UTF-16
1159                 if (count > 0 &&
1160                     PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
1161                                    count)) {
1162                   oCharset.Assign((char*)(aBytes + encStart), count);
1163                 }
1164                 encodingFound = true;
1165                 break;
1166               } else {
1167                 encStart = i + 1;
1168                 q = qi;
1169               }
1170             }
1171           }
1172         }
1173       }  // if (!versionFound)
1174     }    // for
1175   }
1176   return !oCharset.IsEmpty();
1177 }
1178
1179 inline char GetNextChar(nsACString::const_iterator& aStart,
1180                         nsACString::const_iterator& aEnd) {
1181   NS_ASSERTION(aStart != aEnd, "end of buffer");
1182   return (++aStart != aEnd) ? *aStart : '\0';
1183 }
1184
1185 static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure,
1186                                     const char* fromRawSegment,
1187                                     uint32_t toOffset, uint32_t count,
1188                                     uint32_t* writeCount) {
1189   *writeCount = count;
1190   return NS_OK;
1191 }
1192
1193 typedef struct {
1194   bool mNeedCharsetCheck;
1195   nsParser* mParser;
1196   nsScanner* mScanner;
1197   nsIRequest* mRequest;
1198 } ParserWriteStruct;
1199
1200 /*
1201  * This function is invoked as a result of a call to a stream's
1202  * ReadSegments() method. It is called for each contiguous buffer
1203  * of data in the underlying stream or pipe. Using ReadSegments
1204  * allows us to avoid copying data to read out of the stream.
1205  */
1206 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
1207                                 const char* fromRawSegment, uint32_t toOffset,
1208                                 uint32_t count, uint32_t* writeCount) {
1209   nsresult result;
1210   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
1211   const unsigned char* buf =
1212       reinterpret_cast<const unsigned char*>(fromRawSegment);
1213   uint32_t theNumRead = count;
1214
1215   if (!pws) {
1216     return NS_ERROR_FAILURE;
1217   }
1218
1219   if (pws->mNeedCharsetCheck) {
1220     pws->mNeedCharsetCheck = false;
1221     int32_t source;
1222     auto preferred = pws->mParser->GetDocumentCharset(source);
1223
1224     // This code was bogus when I found it. It expects the BOM or the XML
1225     // declaration to be entirely in the first network buffer. -- hsivonen
1226     const Encoding* encoding;
1227     size_t bomLength;
1228     Tie(encoding, bomLength) = Encoding::ForBOM(Span(buf, count));
1229     Unused << bomLength;
1230     if (encoding) {
1231       // The decoder will swallow the BOM. The UTF-16 will re-sniff for
1232       // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
1233       // or "UTF-16BE".
1234       preferred = WrapNotNull(encoding);
1235       source = kCharsetFromByteOrderMark;
1236     } else if (source < kCharsetFromChannel) {
1237       nsAutoCString declCharset;
1238
1239       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
1240         encoding = Encoding::ForLabel(declCharset);
1241         if (encoding) {
1242           preferred = WrapNotNull(encoding);
1243           source = kCharsetFromMetaTag;
1244         }
1245       }
1246     }
1247
1248     pws->mParser->SetDocumentCharset(preferred, source);
1249     pws->mParser->SetSinkCharset(preferred);
1250   }
1251
1252   result = pws->mScanner->Append(fromRawSegment, theNumRead);
1253   if (NS_SUCCEEDED(result)) {
1254     *writeCount = count;
1255   }
1256
1257   return result;
1258 }
1259
1260 nsresult nsParser::OnDataAvailable(nsIRequest* request,
1261                                    nsIInputStream* pIStream,
1262                                    uint64_t sourceOffset, uint32_t aLength) {
1263   MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
1264               eOnDataAvail == mParserContext->mStreamListenerState),
1265              "Error: OnStartRequest() must be called before OnDataAvailable()");
1266   MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
1267              "Must have a buffered input stream");
1268
1269   nsresult rv = NS_OK;
1270
1271   if (mIsAboutBlank) {
1272     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
1273     // ... but if an extension tries to feed us data for about:blank in a
1274     // release build, silently ignore the data.
1275     uint32_t totalRead;
1276     rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength,
1277                                 &totalRead);
1278     return rv;
1279   }
1280
1281   CParserContext* theContext = mParserContext;
1282
1283   while (theContext && theContext->mRequest != request) {
1284     theContext = theContext->mPrevContext;
1285   }
1286
1287   if (theContext) {
1288     theContext->mStreamListenerState = eOnDataAvail;
1289
1290     if (eInvalidDetect == theContext->mAutoDetectStatus) {
1291       if (theContext->mScanner) {
1292         nsScannerIterator iter;
1293         theContext->mScanner->EndReading(iter);
1294         theContext->mScanner->SetPosition(iter, true);
1295       }
1296     }
1297
1298     uint32_t totalRead;
1299     ParserWriteStruct pws;
1300     pws.mNeedCharsetCheck = true;
1301     pws.mParser = this;
1302     pws.mScanner = theContext->mScanner.get();
1303     pws.mRequest = request;
1304
1305     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1306     if (NS_FAILED(rv)) {
1307       return rv;
1308     }
1309
1310     if (IsOkToProcessNetworkData()) {
1311       nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1312       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1313       mProcessingNetworkData = true;
1314       if (sinkDeathGrip) {
1315         sinkDeathGrip->WillParse();
1316       }
1317       rv = ResumeParse();
1318       mProcessingNetworkData = false;
1319     }
1320   } else {
1321     rv = NS_ERROR_UNEXPECTED;
1322   }
1323
1324   return rv;
1325 }
1326
1327 /**
1328  *  This is called by the networking library once the last block of data
1329  *  has been collected from the net.
1330  */
1331 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
1332   nsresult rv = NS_OK;
1333
1334   CParserContext* pc = mParserContext;
1335   while (pc) {
1336     if (pc->mRequest == request) {
1337       pc->mStreamListenerState = eOnStop;
1338       pc->mScanner->SetIncremental(false);
1339       break;
1340     }
1341
1342     pc = pc->mPrevContext;
1343   }
1344
1345   mStreamStatus = status;
1346
1347   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1348     mProcessingNetworkData = true;
1349     if (mSink) {
1350       mSink->WillParse();
1351     }
1352     rv = ResumeParse(true, true);
1353     mProcessingNetworkData = false;
1354   }
1355
1356   // If the parser isn't enabled, we don't finish parsing till
1357   // it is reenabled.
1358
1359   // XXX Should we wait to notify our observers as well if the
1360   // parser isn't yet enabled?
1361   if (mObserver) {
1362     mObserver->OnStopRequest(request, status);
1363   }
1364
1365   return rv;
1366 }
1367
1368 /*******************************************************************
1369   Here come the tokenization methods...
1370  *******************************************************************/
1371
1372 /**
1373  *  Part of the code sandwich, this gets called right before
1374  *  the tokenization process begins. The main reason for
1375  *  this call is to allow the delegate to do initialization.
1376  */
1377 bool nsParser::WillTokenize(bool aIsFinalChunk) {
1378   if (!mParserContext) {
1379     return true;
1380   }
1381
1382   nsITokenizer* theTokenizer;
1383   nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1384   NS_ENSURE_SUCCESS(result, false);
1385   return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
1386 }
1387
1388 /**
1389  * This is the primary control routine to consume tokens.
1390  * It iteratively consumes tokens until an error occurs or
1391  * you run out of data.
1392  */
1393 nsresult nsParser::Tokenize(bool aIsFinalChunk) {
1394   nsITokenizer* theTokenizer;
1395
1396   nsresult result = NS_ERROR_NOT_AVAILABLE;
1397   if (mParserContext) {
1398     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1399   }
1400
1401   if (NS_SUCCEEDED(result)) {
1402     bool flushTokens = false;
1403
1404     bool killSink = false;
1405
1406     WillTokenize(aIsFinalChunk);
1407     while (NS_SUCCEEDED(result)) {
1408       mParserContext->mScanner->Mark();
1409       result =
1410           theTokenizer->ConsumeToken(*mParserContext->mScanner, flushTokens);
1411       if (NS_FAILED(result)) {
1412         mParserContext->mScanner->RewindToMark();
1413         if (NS_ERROR_HTMLPARSER_EOF == result) {
1414           break;
1415         }
1416         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1417           killSink = true;
1418           result = Terminate();
1419           break;
1420         }
1421       } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
1422         // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix
1423         // Bug# 23931. Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
1424         // Also remember to update the marked position.
1425         mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
1426         mParserContext->mScanner->Mark();
1427         break;
1428       }
1429     }
1430
1431     if (killSink) {
1432       mSink = nullptr;
1433     }
1434   } else {
1435     result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1436   }
1437
1438   return result;
1439 }
1440
1441 /**
1442  * Get the channel associated with this parser
1443  *
1444  * @param aChannel out param that will contain the result
1445  * @return NS_OK if successful
1446  */
1447 NS_IMETHODIMP
1448 nsParser::GetChannel(nsIChannel** aChannel) {
1449   nsresult result = NS_ERROR_NOT_AVAILABLE;
1450   if (mParserContext && mParserContext->mRequest) {
1451     result = CallQueryInterface(mParserContext->mRequest, aChannel);
1452   }
1453   return result;
1454 }
1455
1456 /**
1457  * Get the DTD associated with this parser
1458  */
1459 NS_IMETHODIMP
1460 nsParser::GetDTD(nsIDTD** aDTD) {
1461   if (mParserContext) {
1462     NS_IF_ADDREF(*aDTD = mDTD);
1463   }
1464
1465   return NS_OK;
1466 }
1467
1468 /**
1469  * Get this as nsIStreamListener
1470  */
1471 nsIStreamListener* nsParser::GetStreamListener() { return this; }