Backed out changeset ddccd40117a0 (bug 1853271) for causing bug 1854769. CLOSED TREE
[gecko.git] / parser / htmlparser / nsExpatDriver.h
blobb07ba72b6b4626e780a587d2e192d513e015521a
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_EXPAT_DRIVER__
7 #define NS_EXPAT_DRIVER__
9 #include "expat_config.h"
10 #include "expat.h"
11 #include "nsCOMPtr.h"
12 #include "nsString.h"
13 #include "nsIDTD.h"
14 #include "nsIInputStream.h"
15 #include "nsIParser.h"
16 #include "nsCycleCollectionParticipant.h"
17 #include "nsScanner.h"
19 #include "rlbox_expat.h"
20 #include "nsRLBoxExpatDriver.h"
21 #include "mozilla/UniquePtr.h"
23 class nsIExpatSink;
24 struct nsCatalogData;
25 class RLBoxExpatSandboxData;
26 namespace mozilla {
27 template <typename, size_t>
28 class Array;
31 class nsExpatDriver : public nsIDTD {
32 virtual ~nsExpatDriver();
34 public:
35 NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
36 NS_DECL_NSIDTD
37 NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
39 nsExpatDriver();
41 nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
43 nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
45 int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
46 const char16_t* aBase, const char16_t* aSystemId,
47 const char16_t* aPublicId);
48 static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
49 tainted_expat<void*> aUserData,
50 tainted_expat<const char16_t*> aName,
51 tainted_expat<const char16_t**> aAtts);
52 static void HandleStartElementForSystemPrincipal(
53 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
54 tainted_expat<const char16_t*> aName,
55 tainted_expat<const char16_t**> aAtts);
56 static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
57 tainted_expat<void*> aUserData,
58 tainted_expat<const char16_t*> aName);
59 static void HandleEndElementForSystemPrincipal(
60 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
61 tainted_expat<const char16_t*> aName);
62 nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
63 nsresult HandleComment(const char16_t* aName);
64 nsresult HandleProcessingInstruction(const char16_t* aTarget,
65 const char16_t* aData);
66 nsresult HandleXMLDeclaration(const char16_t* aVersion,
67 const char16_t* aEncoding, int32_t aStandalone);
68 nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
69 nsresult HandleStartCdataSection();
70 nsresult HandleEndCdataSection();
71 nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
72 const char16_t* aSysid,
73 const char16_t* aPubid,
74 bool aHasInternalSubset);
75 nsresult HandleEndDoctypeDecl();
77 private:
78 // Load up an external stream to get external entity information
79 nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
80 const char16_t* aURLStr,
81 nsIURI* aBaseURI,
82 nsIInputStream** aStream,
83 nsIURI** aAbsURI);
85 enum class ChunkOrBufferIsFinal {
86 None,
87 FinalChunk,
88 FinalChunkAndBuffer,
91 /**
92 * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
93 * aLength should be 0. The result of the call will be stored in
94 * mInternalState. Expat will parse as much of the buffer as it can and store
95 * the rest in its internal buffer.
97 * @param aBuffer the buffer to pass to Expat. May be null.
98 * @param aLength the length of the buffer to pass to Expat (in number of
99 * char16_t's). Must be 0 if aBuffer is null and > 0 if
100 * aBuffer is not null.
101 * @param aIsFinal whether this is the last chunk in a row passed to
102 * ParseChunk, and if so whether it's the last chunk and
103 * buffer passed to ParseChunk (meaning there will be no more
104 * calls to ParseChunk for the document being parsed).
105 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
106 * doesn't include the PRUnichars that Expat stored in
107 * its buffer but didn't parse yet.
108 * @param aLastLineLength [out] the length of the last line that Expat has
109 * consumed. This will only be computed if
110 * aIsFinal is not None or mInternalState is set
111 * to a failure.
113 void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
114 ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
115 XML_Size* aLastLineLength);
117 * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
118 * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
119 * loop.
121 * @param aBuffer the buffer to pass to Expat. May be null.
122 * @param aLength the length of the buffer to pass to Expat (in number of
123 * char16_t's). Must be 0 if aBuffer is null and > 0 if
124 * aBuffer is not null.
125 * @param aIsFinal whether there will definitely not be any more new buffers
126 * passed in to ParseBuffer
127 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
128 * doesn't include the PRUnichars that Expat stored in
129 * its buffer but didn't parse yet.
130 * @param aLastLineLength [out] the length of the last line that Expat has
131 * consumed.
133 void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
134 bool aIsFinal, uint32_t* aPassedToExpat,
135 uint32_t* aConsumed, XML_Size* aLastLineLength);
137 nsresult HandleError();
139 void MaybeStopParser(nsresult aState);
141 bool BlockedOrInterrupted() {
142 return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
143 mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
146 // Expat allows us to set the base URI for entities. It doesn't use the base
147 // URI itself, but just passes it along to all the entity handlers (just the
148 // external entity reference handler for us). It does expect the base URI as a
149 // null-terminated string, with the same character type as the parsed buffers
150 // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
151 // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
152 // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
153 // Most of the time this base URI is unused (the external entity handler is
154 // rarely called), but when it is we also convert it back to a nsIURI, so we
155 // convert the string back to UTF-8.
157 // We'd rather not do any of these conversions and copies, so we use a (hacky)
158 // workaround. We store all base URIs in an array of nsIURIs. Instead of
159 // passing the real URI to Expat as a string, we pass it a null-terminated
160 // 2-character buffer. The first character of that buffer stores the index of
161 // the corresponding nsIURI in the array (incremented with 1 because 0 is used
162 // to terminate a string). The entity handler can then use the index from the
163 // base URI that Expat passes it to look up the right nsIURI from the array.
165 // GetExpatBaseURI pushes the nsIURI to the array, and creates the
166 // two-character buffer for it.
168 // GetBaseURI looks up the right nsIURI in the array, based on the index from
169 // the two-character buffer.
170 using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
171 ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
172 nsIURI* GetBaseURI(const XML_Char* aBase) const;
174 RLBoxExpatSandboxData* SandboxData() const;
175 rlbox_sandbox_expat* Sandbox() const;
177 // Destroy expat parser and return sandbox to pool
178 void Destroy();
180 mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
181 tainted_expat<XML_Parser> mExpatParser;
183 nsString mLastLine;
184 nsString mCDataText;
185 // Various parts of a doctype
186 nsString mDoctypeName;
187 nsString mSystemID;
188 nsString mPublicID;
189 nsString mInternalSubset;
190 bool mInCData;
191 bool mInInternalSubset;
192 bool mInExternalDTD;
193 bool mMadeFinalCallToExpat;
195 // Used to track if we're in the parser.
196 bool mInParser;
198 nsresult mInternalState;
200 // The length of the data in Expat's buffer (in number of PRUnichars).
201 uint32_t mExpatBuffered;
203 uint16_t mTagDepth;
205 // These sinks all refer the same conceptual object. mOriginalSink is
206 // identical with the nsIContentSink* passed to WillBuildModel, and exists
207 // only to avoid QI-ing back to nsIContentSink*.
208 nsCOMPtr<nsIContentSink> mOriginalSink;
209 nsCOMPtr<nsIExpatSink> mSink;
211 const nsCatalogData* mCatalogData; // weak
212 nsTArray<nsCOMPtr<nsIURI>> mURIs;
214 // Used for error reporting.
215 uint64_t mInnerWindowID;
218 class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
219 friend class RLBoxExpatSandboxPool;
220 friend class nsExpatDriver;
222 public:
223 explicit RLBoxExpatSandboxData(uint64_t aSize)
224 : mozilla::RLBoxSandboxDataBase(aSize) {
225 MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
227 ~RLBoxExpatSandboxData();
228 rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
229 // After getting a sandbox from the pool we need to register the
230 // Handle{Start,End}Element callbacks and associate the driver with the
231 // sandbox.
232 void AttachDriver(bool IsSystemPrincipal, void* aDriver);
233 void DetachDriver();
235 private:
236 mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
237 // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
238 // and consequently across sandbox reuses.
239 sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
240 sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
241 sandbox_callback_expat<XML_ProcessingInstructionHandler>
242 mHandleProcessingInstruction;
243 sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
244 sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
245 sandbox_callback_expat<XML_CommentHandler> mHandleComment;
246 sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
247 sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
248 sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
249 sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
250 // Expat callbacks specific to each driver, and thus (re)set across sandbox
251 // reuses.
252 sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
253 sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
256 #endif