1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_HTML5_PARSER
7 #define NS_HTML5_PARSER
10 #include "nsIParser.h"
13 #include "nsParserCIID.h"
14 #include "nsITokenizer.h"
15 #include "nsIContentSink.h"
16 #include "nsIRequest.h"
17 #include "nsIChannel.h"
18 #include "nsCOMArray.h"
19 #include "nsContentSink.h"
20 #include "nsCycleCollectionParticipant.h"
21 #include "nsIInputStream.h"
22 #include "nsDetectionConfident.h"
23 #include "nsHtml5OwningUTF16Buffer.h"
24 #include "nsHtml5TreeOpExecutor.h"
25 #include "nsHtml5StreamParser.h"
26 #include "nsHtml5AtomTable.h"
27 #include "nsWeakReference.h"
28 #include "nsHtml5StreamListener.h"
30 class nsHtml5Parser final
32 , public nsSupportsWeakReference
35 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
37 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser
, nsIParser
)
43 * No-op for backwards compat.
45 NS_IMETHOD_(void) SetContentSink(nsIContentSink
* aSink
) override
;
48 * Returns the tree op executor for backwards compat.
50 NS_IMETHOD_(nsIContentSink
*) GetContentSink() override
;
53 * Always returns "view" for backwards compat.
55 NS_IMETHOD_(void) GetCommand(nsCString
& aCommand
) override
;
58 * No-op for backwards compat.
60 NS_IMETHOD_(void) SetCommand(const char* aCommand
) override
;
63 * No-op for backwards compat.
65 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand
) override
;
68 * Call this method once you've created a parser, and want to instruct it
69 * about what charset to load
71 * @param aEncoding the charset of a document
72 * @param aCharsetSource the source of the charset
74 virtual void SetDocumentCharset(NotNull
<const Encoding
*> aEncoding
,
75 int32_t aSource
) override
;
78 * Get the channel associated with this parser
79 * @param aChannel out param that will contain the result
80 * @return NS_OK if successful or NS_NOT_AVAILABLE if not
82 NS_IMETHOD
GetChannel(nsIChannel
** aChannel
) override
;
85 * Return |this| for backwards compat.
87 NS_IMETHOD
GetDTD(nsIDTD
** aDTD
) override
;
90 * Get the stream parser for this parser
92 virtual nsIStreamListener
* GetStreamListener() override
;
95 * Don't call. For interface compat only.
97 NS_IMETHOD
ContinueInterruptedParsing() override
;
102 NS_IMETHOD_(void) BlockParser() override
;
105 * Unblocks the parser.
107 NS_IMETHOD_(void) UnblockParser() override
;
110 * Asynchronously continues parsing.
112 NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override
;
115 * Query whether the parser is enabled (i.e. not blocked) or not.
117 NS_IMETHOD_(bool) IsParserEnabled() override
;
120 * Query whether the parser thinks it's done with parsing.
122 NS_IMETHOD_(bool) IsComplete() override
;
125 * Set up request observer.
127 * @param aURL used for View Source title
128 * @param aListener a listener to forward notifications to
129 * @param aKey the root context key (used for document.write)
130 * @param aMode ignored (for interface compat only)
132 NS_IMETHOD
Parse(nsIURI
* aURL
,
133 nsIRequestObserver
* aListener
= nullptr,
135 nsDTDMode aMode
= eDTDMode_autodetect
) override
;
138 * document.write and document.close
140 * @param aSourceBuffer the argument of document.write (empty for .close())
141 * @param aKey a key unique to the script element that caused this call
142 * @param aContentType "text/html" for HTML mode, else text/plain mode
143 * @param aLastCall true if .close() false if .write()
144 * @param aMode ignored (for interface compat only)
146 nsresult
Parse(const nsAString
& aSourceBuffer
,
148 const nsACString
& aContentType
,
150 nsDTDMode aMode
= eDTDMode_autodetect
);
153 * Stops the parser prematurely
155 NS_IMETHOD
Terminate() override
;
158 * Don't call. For interface backwards compat only.
160 NS_IMETHOD
ParseFragment(const nsAString
& aSourceBuffer
,
161 nsTArray
<nsString
>& aTagStack
) override
;
164 * Don't call. For interface compat only.
166 NS_IMETHOD
BuildModel() override
;
169 * Don't call. For interface compat only.
171 NS_IMETHOD
CancelParsingEvents() override
;
174 * Don't call. For interface compat only.
176 virtual void Reset() override
;
179 * True if the insertion point (per HTML5) is defined.
181 virtual bool IsInsertionPointDefined() override
;
184 * Call immediately before starting to evaluate a parser-inserted script or
185 * in general when the spec says to define an insertion point.
187 virtual void PushDefinedInsertionPoint() override
;
190 * Call immediately after having evaluated a parser-inserted script or
191 * generally want to restore to the state before the last
192 * PushDefinedInsertionPoint call.
194 virtual void PopDefinedInsertionPoint() override
;
197 * Marks the HTML5 parser as not a script-created parser: Prepares the
198 * parser to be able to read a stream.
200 * @param aCommand the parser command (Yeah, this is bad API design. Let's
201 * make this better when retiring nsIParser)
203 virtual void MarkAsNotScriptCreated(const char* aCommand
) override
;
206 * True if this is a script-created HTML5 parser.
208 virtual bool IsScriptCreated() override
;
212 // Not from an external interface
213 // Non-inherited methods
217 * Initializes the parser to load from a channel.
219 virtual nsresult
Initialize(nsIDocument
* aDoc
,
221 nsISupports
* aContainer
,
222 nsIChannel
* aChannel
);
224 inline nsHtml5Tokenizer
* GetTokenizer() { return mTokenizer
; }
226 void InitializeDocWriteParserState(nsAHtml5TreeBuilderState
* aState
,
229 void DropStreamParser()
231 if (GetStreamParser()) {
232 GetStreamParser()->DropTimer();
233 mStreamListener
->DropDelegate();
234 mStreamListener
= nullptr;
238 void StartTokenizer(bool aScriptingEnabled
);
240 void ContinueAfterFailedCharsetSwitch();
242 nsHtml5StreamParser
* GetStreamParser()
244 if (!mStreamListener
) {
247 return mStreamListener
->GetDelegate();
250 void PermanentlyUndefineInsertionPoint()
252 mInsertionPointPermanentlyUndefined
= true;
256 * Parse until pending data is exhausted or a script blocks the parser
258 nsresult
ParseUntilBlocked();
261 virtual ~nsHtml5Parser();
266 * Whether the last character tokenized was a carriage return (for CRLF)
271 * Whether the last character tokenized was a carriage return (for CRLF)
272 * when preparsing document.write.
274 bool mDocWriteSpeculativeLastWasCR
;
277 * The parser is blocking on the load of an external script from a web
278 * page, or any number of extension content scripts.
283 * Whether the document.write() speculator is already active.
285 bool mDocWriteSpeculatorActive
;
288 * The number of PushDefinedInsertionPoint calls we've seen without a
289 * matching PopDefinedInsertionPoint.
291 int32_t mInsertionPointPushLevel
;
294 * True if document.close() has been called.
296 bool mDocumentClosed
;
298 bool mInDocumentWrite
;
301 * This is set when the tokenizer has seen EOF. The purpose is to
302 * keep the insertion point undefined between the time the
303 * parser has reached the point where it can't accept more input
304 * and the time the document's mParser is set to nullptr.
305 * Scripts can run during this time period due to an update
306 * batch ending and due to various end-of-parse events firing.
307 * (Setting mParser on the document to nullptr at the point
308 * where this flag gets set to true would break things that for
309 * legacy reasons assume that mParser on the document stays
310 * non-null though the end-of-parse events.)
312 bool mInsertionPointPermanentlyUndefined
;
314 // Portable parser objects
316 * The first buffer in the pending UTF-16 buffer queue
318 RefPtr
<nsHtml5OwningUTF16Buffer
> mFirstBuffer
;
321 * The last buffer in the pending UTF-16 buffer queue. Always points
322 * to a sentinel object with nullptr as its parser key.
324 nsHtml5OwningUTF16Buffer
* mLastBuffer
; // weak ref;
327 * The tree operation executor
329 RefPtr
<nsHtml5TreeOpExecutor
> mExecutor
;
332 * The HTML5 tree builder
334 const nsAutoPtr
<nsHtml5TreeBuilder
> mTreeBuilder
;
337 * The HTML5 tokenizer
339 const nsAutoPtr
<nsHtml5Tokenizer
> mTokenizer
;
342 * Another HTML5 tree builder for preloading document.written content.
344 nsAutoPtr
<nsHtml5TreeBuilder
> mDocWriteSpeculativeTreeBuilder
;
347 * Another HTML5 tokenizer for preloading document.written content.
349 nsAutoPtr
<nsHtml5Tokenizer
> mDocWriteSpeculativeTokenizer
;
352 * The stream listener holding the stream parser.
354 RefPtr
<nsHtml5StreamListener
> mStreamListener
;
359 int32_t mRootContextLineNumber
;
362 * Whether it's OK to transfer parsing back to the stream parser
364 bool mReturnToStreamParserPermitted
;
367 * The scoped atom table
369 nsHtml5AtomTable mAtomTable
;