1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_HTML5_PARSER
7 #define NS_HTML5_PARSER
9 #include "mozilla/UniquePtr.h"
10 #include "nsIParser.h"
12 #include "nsParserCIID.h"
13 #include "nsITokenizer.h"
14 #include "nsIContentSink.h"
15 #include "nsIRequest.h"
16 #include "nsIChannel.h"
17 #include "nsCOMArray.h"
18 #include "nsContentSink.h"
19 #include "nsCycleCollectionParticipant.h"
20 #include "nsHtml5OwningUTF16Buffer.h"
21 #include "nsHtml5TreeOpExecutor.h"
22 #include "nsHtml5StreamParser.h"
23 #include "nsHtml5AtomTable.h"
24 #include "nsWeakReference.h"
25 #include "nsHtml5StreamListener.h"
27 class nsHtml5Parser final
: public nsIParser
, public nsSupportsWeakReference
{
29 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
31 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser
, nsIParser
)
37 * No-op for backwards compat.
39 NS_IMETHOD_(void) SetContentSink(nsIContentSink
* aSink
) override
;
42 * Returns the tree op executor for backwards compat.
44 NS_IMETHOD_(nsIContentSink
*) GetContentSink() override
;
47 * Always returns "view" for backwards compat.
49 NS_IMETHOD_(void) GetCommand(nsCString
& aCommand
) override
;
52 * No-op for backwards compat.
54 NS_IMETHOD_(void) SetCommand(const char* aCommand
) override
;
57 * No-op for backwards compat.
59 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand
) override
;
62 * Call this method once you've created a parser, and want to instruct it
63 * about what charset to load
65 * @param aEncoding the charset of a document
66 * @param aCharsetSource the source of the charset
68 virtual void SetDocumentCharset(NotNull
<const Encoding
*> aEncoding
,
69 int32_t aSource
) override
;
72 * Get the channel associated with this parser
73 * @param aChannel out param that will contain the result
74 * @return NS_OK if successful or NS_NOT_AVAILABLE if not
76 NS_IMETHOD
GetChannel(nsIChannel
** aChannel
) override
;
79 * Return |this| for backwards compat.
81 NS_IMETHOD
GetDTD(nsIDTD
** aDTD
) override
;
84 * Get the stream parser for this parser
86 virtual nsIStreamListener
* GetStreamListener() override
;
89 * Don't call. For interface compat only.
91 NS_IMETHOD
ContinueInterruptedParsing() override
;
96 NS_IMETHOD_(void) BlockParser() override
;
99 * Unblocks the parser.
101 NS_IMETHOD_(void) UnblockParser() override
;
104 * Asynchronously continues parsing.
106 NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override
;
109 * Query whether the parser is enabled (i.e. not blocked) or not.
111 NS_IMETHOD_(bool) IsParserEnabled() override
;
114 * Query whether the parser thinks it's done with parsing.
116 NS_IMETHOD_(bool) IsComplete() override
;
119 * Set up request observer.
121 * @param aURL used for View Source title
122 * @param aListener a listener to forward notifications to
123 * @param aKey the root context key (used for document.write)
124 * @param aMode ignored (for interface compat only)
126 NS_IMETHOD
Parse(nsIURI
* aURL
, nsIRequestObserver
* aListener
= nullptr,
128 nsDTDMode aMode
= eDTDMode_autodetect
) override
;
131 * document.write and document.close
133 * @param aSourceBuffer the argument of document.write (empty for .close())
134 * @param aKey a key unique to the script element that caused this call
135 * @param aLastCall true if .close() false if .write()
137 nsresult
Parse(const nsAString
& aSourceBuffer
, void* aKey
, bool aLastCall
);
140 * Stops the parser prematurely
142 NS_IMETHOD
Terminate() override
;
145 * Don't call. For interface backwards compat only.
147 NS_IMETHOD
ParseFragment(const nsAString
& aSourceBuffer
,
148 nsTArray
<nsString
>& aTagStack
) override
;
151 * Don't call. For interface compat only.
153 NS_IMETHOD
BuildModel() override
;
156 * Don't call. For interface compat only.
158 NS_IMETHOD
CancelParsingEvents() override
;
161 * Don't call. For interface compat only.
163 virtual void Reset() override
;
166 * True if the insertion point (per HTML5) is defined.
168 virtual bool IsInsertionPointDefined() override
;
171 * Call immediately before starting to evaluate a parser-inserted script or
172 * in general when the spec says to increment the script nesting level.
174 void IncrementScriptNestingLevel() final
;
177 * Call immediately after having evaluated a parser-inserted script or
178 * generally want to restore to the state before the last
179 * IncrementScriptNestingLevel call.
181 void DecrementScriptNestingLevel() final
;
184 * True if this is an HTML5 parser whose script nesting level (in
186 * <https://html.spec.whatwg.org/multipage/parsing.html#script-nesting-level>)
189 bool HasNonzeroScriptNestingLevel() const final
;
192 * Marks the HTML5 parser as not a script-created parser: Prepares the
193 * parser to be able to read a stream.
195 * @param aCommand the parser command (Yeah, this is bad API design. Let's
196 * make this better when retiring nsIParser)
198 virtual void MarkAsNotScriptCreated(const char* aCommand
) override
;
201 * True if this is a script-created HTML5 parser.
203 virtual bool IsScriptCreated() override
;
207 // Not from an external interface
208 // Non-inherited methods
212 * Initializes the parser to load from a channel.
214 virtual nsresult
Initialize(mozilla::dom::Document
* aDoc
, nsIURI
* aURI
,
215 nsISupports
* aContainer
, nsIChannel
* aChannel
);
217 inline nsHtml5Tokenizer
* GetTokenizer() { return mTokenizer
.get(); }
219 void InitializeDocWriteParserState(nsAHtml5TreeBuilderState
* aState
,
222 void DropStreamParser() {
223 if (GetStreamParser()) {
224 GetStreamParser()->DropTimer();
225 mStreamListener
->DropDelegate();
226 mStreamListener
= nullptr;
230 void StartTokenizer(bool aScriptingEnabled
);
232 void ContinueAfterFailedCharsetSwitch();
234 nsHtml5StreamParser
* GetStreamParser() {
235 if (!mStreamListener
) {
238 return mStreamListener
->GetDelegate();
241 void PermanentlyUndefineInsertionPoint() {
242 mInsertionPointPermanentlyUndefined
= true;
246 * Parse until pending data is exhausted or a script blocks the parser
248 nsresult
ParseUntilBlocked();
251 * Start our executor. This is meant to be used from document.open() _only_
252 * and does some work similar to what nsHtml5StreamParser::OnStartRequest does
255 nsresult
StartExecutor();
258 virtual ~nsHtml5Parser();
263 * Whether the last character tokenized was a carriage return (for CRLF)
268 * Whether the last character tokenized was a carriage return (for CRLF)
269 * when preparsing document.write.
271 bool mDocWriteSpeculativeLastWasCR
;
274 * The parser is blocking on the load of an external script from a web
275 * page, or any number of extension content scripts.
280 * Whether the document.write() speculator is already active.
282 bool mDocWriteSpeculatorActive
;
285 * The number of IncrementScriptNestingLevel calls we've seen without a
286 * matching DecrementScriptNestingLevel.
288 int32_t mScriptNestingLevel
;
291 * True if document.close() has been called.
293 bool mDocumentClosed
;
295 bool mInDocumentWrite
;
298 * This is set when the tokenizer has seen EOF. The purpose is to
299 * keep the insertion point undefined between the time the
300 * parser has reached the point where it can't accept more input
301 * and the time the document's mParser is set to nullptr.
302 * Scripts can run during this time period due to an update
303 * batch ending and due to various end-of-parse events firing.
304 * (Setting mParser on the document to nullptr at the point
305 * where this flag gets set to true would break things that for
306 * legacy reasons assume that mParser on the document stays
307 * non-null though the end-of-parse events.)
309 bool mInsertionPointPermanentlyUndefined
;
311 // Portable parser objects
313 * The first buffer in the pending UTF-16 buffer queue
315 RefPtr
<nsHtml5OwningUTF16Buffer
> mFirstBuffer
;
318 * The last buffer in the pending UTF-16 buffer queue. Always points
319 * to a sentinel object with nullptr as its parser key.
321 nsHtml5OwningUTF16Buffer
* mLastBuffer
; // weak ref;
324 * The tree operation executor
326 RefPtr
<nsHtml5TreeOpExecutor
> mExecutor
;
329 * The HTML5 tree builder
331 const mozilla::UniquePtr
<nsHtml5TreeBuilder
> mTreeBuilder
;
334 * The HTML5 tokenizer
336 const mozilla::UniquePtr
<nsHtml5Tokenizer
> mTokenizer
;
339 * Another HTML5 tree builder for preloading document.written content.
341 mozilla::UniquePtr
<nsHtml5TreeBuilder
> mDocWriteSpeculativeTreeBuilder
;
344 * Another HTML5 tokenizer for preloading document.written content.
346 mozilla::UniquePtr
<nsHtml5Tokenizer
> mDocWriteSpeculativeTokenizer
;
349 * The stream listener holding the stream parser.
351 RefPtr
<nsHtml5StreamListener
> mStreamListener
;
356 int32_t mRootContextLineNumber
;
359 * Whether it's OK to transfer parsing back to the stream parser
361 bool mReturnToStreamParserPermitted
;
364 * The scoped atom table
366 nsHtml5AtomTable mAtomTable
;