Bug 1802897 - Part 12: Make gray unmarking work with parallel marking r=sfink
[gecko.git] / parser / htmlparser / nsExpatDriver.h
blob78f9caf343078dfa4b86428742b68b49d2118778
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_EXPAT_DRIVER__
7 #define NS_EXPAT_DRIVER__
9 #include "expat_config.h"
10 #include "expat.h"
11 #include "nsCOMPtr.h"
12 #include "nsString.h"
13 #include "nsIDTD.h"
14 #include "nsIInputStream.h"
15 #include "nsIParser.h"
16 #include "nsCycleCollectionParticipant.h"
18 #include "rlbox_expat.h"
19 #include "nsRLBoxExpatDriver.h"
20 #include "mozilla/UniquePtr.h"
22 class nsIExpatSink;
23 struct nsCatalogData;
24 class RLBoxExpatSandboxData;
25 namespace mozilla {
26 template <typename, size_t>
27 class Array;
30 class nsExpatDriver : public nsIDTD {
31 virtual ~nsExpatDriver();
33 public:
34 NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
35 NS_DECL_NSIDTD
36 NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
38 nsExpatDriver();
40 nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
42 nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
44 int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
45 const char16_t* aBase, const char16_t* aSystemId,
46 const char16_t* aPublicId);
47 static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
48 tainted_expat<void*> aUserData,
49 tainted_expat<const char16_t*> aName,
50 tainted_expat<const char16_t**> aAtts);
51 static void HandleStartElementForSystemPrincipal(
52 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
53 tainted_expat<const char16_t*> aName,
54 tainted_expat<const char16_t**> aAtts);
55 static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
56 tainted_expat<void*> aUserData,
57 tainted_expat<const char16_t*> aName);
58 static void HandleEndElementForSystemPrincipal(
59 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
60 tainted_expat<const char16_t*> aName);
61 nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
62 nsresult HandleComment(const char16_t* aName);
63 nsresult HandleProcessingInstruction(const char16_t* aTarget,
64 const char16_t* aData);
65 nsresult HandleXMLDeclaration(const char16_t* aVersion,
66 const char16_t* aEncoding, int32_t aStandalone);
67 nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
68 nsresult HandleStartCdataSection();
69 nsresult HandleEndCdataSection();
70 nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
71 const char16_t* aSysid,
72 const char16_t* aPubid,
73 bool aHasInternalSubset);
74 nsresult HandleEndDoctypeDecl();
76 private:
77 // Load up an external stream to get external entity information
78 nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
79 const char16_t* aURLStr,
80 nsIURI* aBaseURI,
81 nsIInputStream** aStream,
82 nsIURI** aAbsURI);
84 enum class ChunkOrBufferIsFinal {
85 None,
86 FinalChunk,
87 FinalChunkAndBuffer,
90 /**
91 * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
92 * aLength should be 0. The result of the call will be stored in
93 * mInternalState. Expat will parse as much of the buffer as it can and store
94 * the rest in its internal buffer.
96 * @param aBuffer the buffer to pass to Expat. May be null.
97 * @param aLength the length of the buffer to pass to Expat (in number of
98 * char16_t's). Must be 0 if aBuffer is null and > 0 if
99 * aBuffer is not null.
100 * @param aIsFinal whether this is the last chunk in a row passed to
101 * ParseChunk, and if so whether it's the last chunk and
102 * buffer passed to ParseChunk (meaning there will be no more
103 * calls to ParseChunk for the document being parsed).
104 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
105 * doesn't include the PRUnichars that Expat stored in
106 * its buffer but didn't parse yet.
107 * @param aLastLineLength [out] the length of the last line that Expat has
108 * consumed. This will only be computed if
109 * aIsFinal is not None or mInternalState is set
110 * to a failure.
112 void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
113 ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
114 XML_Size* aLastLineLength);
116 * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
117 * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
118 * loop.
120 * @param aBuffer the buffer to pass to Expat. May be null.
121 * @param aLength the length of the buffer to pass to Expat (in number of
122 * char16_t's). Must be 0 if aBuffer is null and > 0 if
123 * aBuffer is not null.
124 * @param aIsFinal whether there will definitely not be any more new buffers
125 * passed in to ParseBuffer
126 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
127 * doesn't include the PRUnichars that Expat stored in
128 * its buffer but didn't parse yet.
129 * @param aLastLineLength [out] the length of the last line that Expat has
130 * consumed.
132 void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
133 bool aIsFinal, uint32_t* aPassedToExpat,
134 uint32_t* aConsumed, XML_Size* aLastLineLength);
136 nsresult HandleError();
138 void MaybeStopParser(nsresult aState);
140 bool BlockedOrInterrupted() {
141 return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
142 mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
145 // Expat allows us to set the base URI for entities. It doesn't use the base
146 // URI itself, but just passes it along to all the entity handlers (just the
147 // external entity reference handler for us). It does expect the base URI as a
148 // null-terminated string, with the same character type as the parsed buffers
149 // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
150 // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
151 // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
152 // Most of the time this base URI is unused (the external entity handler is
153 // rarely called), but when it is we also convert it back to a nsIURI, so we
154 // convert the string back to UTF-8.
156 // We'd rather not do any of these conversions and copies, so we use a (hacky)
157 // workaround. We store all base URIs in an array of nsIURIs. Instead of
158 // passing the real URI to Expat as a string, we pass it a null-terminated
159 // 2-character buffer. The first character of that buffer stores the index of
160 // the corresponding nsIURI in the array (incremented with 1 because 0 is used
161 // to terminate a string). The entity handler can then use the index from the
162 // base URI that Expat passes it to look up the right nsIURI from the array.
164 // GetExpatBaseURI pushes the nsIURI to the array, and creates the
165 // two-character buffer for it.
167 // GetBaseURI looks up the right nsIURI in the array, based on the index from
168 // the two-character buffer.
169 using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
170 ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
171 nsIURI* GetBaseURI(const XML_Char* aBase) const;
173 RLBoxExpatSandboxData* SandboxData() const;
174 rlbox_sandbox_expat* Sandbox() const;
176 // Destroy expat parser and return sandbox to pool
177 void Destroy();
179 mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
180 tainted_expat<XML_Parser> mExpatParser;
182 nsString mLastLine;
183 nsString mCDataText;
184 // Various parts of a doctype
185 nsString mDoctypeName;
186 nsString mSystemID;
187 nsString mPublicID;
188 nsString mInternalSubset;
189 bool mInCData;
190 bool mInInternalSubset;
191 bool mInExternalDTD;
192 bool mMadeFinalCallToExpat;
194 // Used to track if we're in the parser.
195 bool mInParser;
197 nsresult mInternalState;
199 // The length of the data in Expat's buffer (in number of PRUnichars).
200 uint32_t mExpatBuffered;
202 uint16_t mTagDepth;
204 // These sinks all refer the same conceptual object. mOriginalSink is
205 // identical with the nsIContentSink* passed to WillBuildModel, and exists
206 // only to avoid QI-ing back to nsIContentSink*.
207 nsCOMPtr<nsIContentSink> mOriginalSink;
208 nsCOMPtr<nsIExpatSink> mSink;
210 const nsCatalogData* mCatalogData; // weak
211 nsTArray<nsCOMPtr<nsIURI>> mURIs;
213 // Used for error reporting.
214 uint64_t mInnerWindowID;
217 class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
218 friend class RLBoxExpatSandboxPool;
219 friend class nsExpatDriver;
221 public:
222 explicit RLBoxExpatSandboxData(uint64_t aSize)
223 : mozilla::RLBoxSandboxDataBase(aSize) {
224 MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
226 ~RLBoxExpatSandboxData();
227 rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
228 // After getting a sandbox from the pool we need to register the
229 // Handle{Start,End}Element callbacks and associate the driver with the
230 // sandbox.
231 void AttachDriver(bool IsSystemPrincipal, void* aDriver);
232 void DetachDriver();
234 private:
235 mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
236 // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
237 // and consequently across sandbox reuses.
238 sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
239 sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
240 sandbox_callback_expat<XML_ProcessingInstructionHandler>
241 mHandleProcessingInstruction;
242 sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
243 sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
244 sandbox_callback_expat<XML_CommentHandler> mHandleComment;
245 sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
246 sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
247 sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
248 sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
249 // Expat callbacks specific to each driver, and thus (re)set across sandbox
250 // reuses.
251 sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
252 sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
255 #endif