parser/htmlparser/nsParser.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 /**
   7  * MODULE NOTES:
   8  *
   9  *  This class does two primary jobs:
  10  *    1) It iterates the tokens provided during the
  11  *       tokenization process, identifing where elements
  12  *       begin and end (doing validation and normalization).
  13  *    2) It controls and coordinates with an instance of
  14  *       the IContentSink interface, to coordinate the
  15  *       the production of the content model.
  16  *
  17  *  The basic operation of this class assumes that an HTML
  18  *  document is non-normalized. Therefore, we don't process
  19  *  the document in a normalized way. Don't bother to look
  20  *  for methods like: doHead() or doBody().
  21  *
  22  *  Instead, in order to be backward compatible, we must
  23  *  scan the set of tokens and perform this basic set of
  24  *  operations:
  25  *    1)  Determine the token type (easy, since the tokens know)
  26  *    2)  Determine the appropriate section of the HTML document
  27  *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
  28  *    3)  Insert content into our document (via the sink) into
  29  *        the correct section.
  30  *    4)  In the case of tags that belong in the BODY, we must
  31  *        ensure that our underlying document state reflects
  32  *        the appropriate context for our tag.
  33  *
  34  *        For example,if we see a <TR>, we must ensure our
  35  *        document contains a table into which the row can
  36  *        be placed. This may result in "implicit containers"
  37  *        created to ensure a well-formed document.
  38  *
  39  */
  40
  41 #ifndef NS_PARSER__
  42 #define NS_PARSER__
  43
  44 #include "nsIParser.h"
  45 #include "nsDeque.h"
  46 #include "nsIURL.h"
  47 #include "CParserContext.h"
  48 #include "nsParserCIID.h"
  49 #include "nsITokenizer.h"
  50 #include "nsHTMLTags.h"
  51 #include "nsIContentSink.h"
  52 #include "nsCOMArray.h"
  53 #include "nsCycleCollectionParticipant.h"
  54 #include "nsWeakReference.h"
  55
  56 class nsIDTD;
  57 class nsScanner;
  58 class nsIRunnable;
  59
  60 #ifdef _MSC_VER
  61 #pragma warning( disable : 4275 )
  62 #endif
  63
  64
  65 class nsParser MOZ_FINAL : public nsIParser,
  66                            public nsIStreamListener,
  67                            public nsSupportsWeakReference
  68 {
  69     /**
  70      * Destructor
  71      * @update  gess5/11/98
  72      */
  73     virtual ~nsParser();
  74
  75   public:
  76     /**
  77      * Called on module init
  78      */
  79     static nsresult Init();
  80
  81     /**
  82      * Called on module shutdown
  83      */
  84     static void Shutdown();
  85
  86     NS_DECL_CYCLE_COLLECTING_ISUPPORTS
  87     NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
  88
  89     /**
  90      * default constructor
  91      * @update  gess5/11/98
  92      */
  93     nsParser();
  94
  95     /**
  96      * Select given content sink into parser for parser output
  97      * @update  gess5/11/98
  98      * @param   aSink is the new sink to be used by parser
  99      * @return  old sink, or nullptr
 100      */
 101     NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink);
 102
 103     /**
 104      * retrive the sink set into the parser
 105      * @update  gess5/11/98
 106      * @param   aSink is the new sink to be used by parser
 107      * @return  old sink, or nullptr
 108      */
 109     NS_IMETHOD_(nsIContentSink*) GetContentSink(void);
 110
 111     /**
 112      *  Call this method once you've created a parser, and want to instruct it
 113      *  about the command which caused the parser to be constructed. For example,
 114      *  this allows us to select a DTD which can do, say, view-source.
 115      *
 116      *  @update  gess 3/25/98
 117      *  @param   aCommand -- ptrs to string that contains command
 118      *  @return  nada
 119      */
 120     NS_IMETHOD_(void) GetCommand(nsCString& aCommand);
 121     NS_IMETHOD_(void) SetCommand(const char* aCommand);
 122     NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand);
 123
 124     /**
 125      *  Call this method once you've created a parser, and want to instruct it
 126      *  about what charset to load
 127      *
 128      *  @update  ftang 4/23/99
 129      *  @param   aCharset- the charset of a document
 130      *  @param   aCharsetSource- the source of the charset
 131      *  @return  nada
 132      */
 133     NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
 134
 135     NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource)
 136     {
 137          aCharset = mCharset;
 138          aSource = mCharsetSource;
 139     }
 140
 141     /**
 142      * Cause parser to parse input from given URL
 143      * @update  gess5/11/98
 144      * @param   aURL is a descriptor for source document
 145      * @param   aListener is a listener to forward notifications to
 146      * @return  TRUE if all went well -- FALSE otherwise
 147      */
 148     NS_IMETHOD Parse(nsIURI* aURL,
 149                      nsIRequestObserver* aListener = nullptr,
 150                      void* aKey = 0,
 151                      nsDTDMode aMode = eDTDMode_autodetect);
 152
 153     /**
 154      * This method needs documentation
 155      */
 156     NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
 157                              nsTArray<nsString>& aTagStack);
 158
 159     /**
 160      * This method gets called when the tokens have been consumed, and it's time
 161      * to build the model via the content sink.
 162      * @update  gess5/11/98
 163      * @return  YES if model building went well -- NO otherwise.
 164      */
 165     NS_IMETHOD BuildModel(void);
 166
 167     NS_IMETHOD        ContinueInterruptedParsing();
 168     NS_IMETHOD_(void) BlockParser();
 169     NS_IMETHOD_(void) UnblockParser();
 170     NS_IMETHOD_(void) ContinueInterruptedParsingAsync();
 171     NS_IMETHOD        Terminate(void);
 172
 173     /**
 174      * Call this to query whether the parser is enabled or not.
 175      *
 176      *  @update  vidur 4/12/99
 177      *  @return  current state
 178      */
 179     NS_IMETHOD_(bool) IsParserEnabled();
 180
 181     /**
 182      * Call this to query whether the parser thinks it's done with parsing.
 183      *
 184      *  @update  rickg 5/12/01
 185      *  @return  complete state
 186      */
 187     NS_IMETHOD_(bool) IsComplete();
 188
 189     /**
 190      *  This rather arcane method (hack) is used as a signal between the
 191      *  DTD and the parser. It allows the DTD to tell the parser that content
 192      *  that comes through (parser::parser(string)) but not consumed should
 193      *  propagate into the next string based parse call.
 194      *
 195      *  @update  gess 9/1/98
 196      *  @param   aState determines whether we propagate unused string content.
 197      *  @return  current state
 198      */
 199     void SetUnusedInput(nsString& aBuffer);
 200
 201     /**
 202      * This method gets called (automatically) during incremental parsing
 203      * @update  gess5/11/98
 204      * @return  TRUE if all went well, otherwise FALSE
 205      */
 206     virtual nsresult ResumeParse(bool allowIteration = true,
 207                                  bool aIsFinalChunk = false,
 208                                  bool aCanInterrupt = true);
 209
 210      //*********************************************
 211       // These methods are callback methods used by
 212       // net lib to let us know about our inputstream.
 213       //*********************************************
 214     // nsIRequestObserver methods:
 215     NS_DECL_NSIREQUESTOBSERVER
 216
 217     // nsIStreamListener methods:
 218     NS_DECL_NSISTREAMLISTENER
 219
 220     void              PushContext(CParserContext& aContext);
 221     CParserContext*   PopContext();
 222     CParserContext*   PeekContext() {return mParserContext;}
 223
 224     /**
 225      * Get the channel associated with this parser
 226      * @update harishd,gagan 07/17/01
 227      * @param aChannel out param that will contain the result
 228      * @return NS_OK if successful
 229      */
 230     NS_IMETHOD GetChannel(nsIChannel** aChannel);
 231
 232     /**
 233      * Get the DTD associated with this parser
 234      * @update vidur 9/29/99
 235      * @param aDTD out param that will contain the result
 236      * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
 237      */
 238     NS_IMETHOD GetDTD(nsIDTD** aDTD);
 239
 240     /**
 241      * Get the nsIStreamListener for this parser
 242      */
 243     virtual nsIStreamListener* GetStreamListener();
 244
 245     void SetSinkCharset(nsACString& aCharset);
 246
 247     /**
 248      *  Removes continue parsing events
 249      *  @update  kmcclusk 5/18/98
 250      */
 251
 252     NS_IMETHODIMP CancelParsingEvents();
 253
 254     /**
 255      * Return true.
 256      */
 257     virtual bool IsInsertionPointDefined();
 258
 259     /**
 260      * No-op.
 261      */
 262     virtual void BeginEvaluatingParserInsertedScript();
 263
 264     /**
 265      * No-op.
 266      */
 267     virtual void EndEvaluatingParserInsertedScript();
 268
 269     /**
 270      * No-op.
 271      */
 272     virtual void MarkAsNotScriptCreated(const char* aCommand);
 273
 274     /**
 275      * Always false.
 276      */
 277     virtual bool IsScriptCreated();
 278
 279     /**
 280      *  Set to parser state to indicate whether parsing tokens can be interrupted
 281      *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
 282      *  @update  kmcclusk 5/18/98
 283      */
 284     void SetCanInterrupt(bool aCanInterrupt);
 285
 286     /**
 287      * This is called when the final chunk has been
 288      * passed to the parser and the content sink has
 289      * interrupted token processing. It schedules
 290      * a ParserContinue PL_Event which will ask the parser
 291      * to HandleParserContinueEvent when it is handled.
 292      * @update  kmcclusk6/1/2001
 293      */
 294     nsresult PostContinueEvent();
 295
 296     /**
 297      *  Fired when the continue parse event is triggered.
 298      *  @update  kmcclusk 5/18/98
 299      */
 300     void HandleParserContinueEvent(class nsParserContinueEvent *);
 301
 302     virtual void Reset() {
 303       Cleanup();
 304       Initialize();
 305     }
 306
 307     bool IsScriptExecuting() {
 308       return mSink && mSink->IsScriptExecuting();
 309     }
 310
 311     bool IsOkToProcessNetworkData() {
 312       return !IsScriptExecuting() && !mProcessingNetworkData;
 313     }
 314
 315  protected:
 316
 317     void Initialize(bool aConstructor = false);
 318     void Cleanup();
 319
 320     /**
 321      *
 322      * @update  gess5/18/98
 323      * @param
 324      * @return
 325      */
 326     nsresult WillBuildModel(nsString& aFilename);
 327
 328     /**
 329      *
 330      * @update  gess5/18/98
 331      * @param
 332      * @return
 333      */
 334     nsresult DidBuildModel(nsresult anErrorCode);
 335
 336 private:
 337
 338     /*******************************************
 339       These are the tokenization methods...
 340      *******************************************/
 341
 342     /**
 343      *  Part of the code sandwich, this gets called right before
 344      *  the tokenization process begins. The main reason for
 345      *  this call is to allow the delegate to do initialization.
 346      *
 347      *  @update  gess 3/25/98
 348      *  @param
 349      *  @return  TRUE if it's ok to proceed
 350      */
 351     bool WillTokenize(bool aIsFinalChunk = false);
 352
 353
 354     /**
 355      *  This is the primary control routine. It iteratively
 356      *  consumes tokens until an error occurs or you run out
 357      *  of data.
 358      *
 359      *  @update  gess 3/25/98
 360      *  @return  error code
 361      */
 362     nsresult Tokenize(bool aIsFinalChunk = false);
 363
 364     /**
 365      * Pushes XML fragment parsing data to expat without an input stream.
 366      */
 367     nsresult Parse(const nsAString& aSourceBuffer,
 368                    void* aKey,
 369                    bool aLastCall);
 370
 371 protected:
 372     //*********************************************
 373     // And now, some data members...
 374     //*********************************************
 375
 376
 377     CParserContext*              mParserContext;
 378     nsCOMPtr<nsIDTD>             mDTD;
 379     nsCOMPtr<nsIRequestObserver> mObserver;
 380     nsCOMPtr<nsIContentSink>     mSink;
 381     nsIRunnable*                 mContinueEvent;  // weak ref
 382
 383     eParserCommands     mCommand;
 384     nsresult            mInternalState;
 385     nsresult            mStreamStatus;
 386     int32_t             mCharsetSource;
 387
 388     uint16_t            mFlags;
 389
 390     nsString            mUnusedInput;
 391     nsCString           mCharset;
 392     nsCString           mCommandStr;
 393
 394     bool                mProcessingNetworkData;
 395     bool                mIsAboutBlank;
 396 };
 397
 398 #endif
 399