1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_EXPAT_DRIVER__
7 #define NS_EXPAT_DRIVER__
9 #include "expat_config.h"
14 #include "nsIInputStream.h"
15 #include "nsIParser.h"
16 #include "nsCycleCollectionParticipant.h"
18 #include "rlbox_expat.h"
19 #include "nsRLBoxExpatDriver.h"
20 #include "mozilla/UniquePtr.h"
24 class RLBoxExpatSandboxData
;
26 template <typename
, size_t>
30 class nsExpatDriver
: public nsIDTD
{
31 virtual ~nsExpatDriver();
34 NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
36 NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver
)
40 nsresult
Initialize(nsIURI
* aURI
, nsIContentSink
* aSink
);
42 nsresult
ResumeParse(nsScanner
& aScanner
, bool aIsFinalChunk
);
44 int HandleExternalEntityRef(const char16_t
* aOpenEntityNames
,
45 const char16_t
* aBase
, const char16_t
* aSystemId
,
46 const char16_t
* aPublicId
);
47 static void HandleStartElement(rlbox_sandbox_expat
& aSandbox
,
48 tainted_expat
<void*> aUserData
,
49 tainted_expat
<const char16_t
*> aName
,
50 tainted_expat
<const char16_t
**> aAtts
);
51 static void HandleStartElementForSystemPrincipal(
52 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
53 tainted_expat
<const char16_t
*> aName
,
54 tainted_expat
<const char16_t
**> aAtts
);
55 static void HandleEndElement(rlbox_sandbox_expat
& aSandbox
,
56 tainted_expat
<void*> aUserData
,
57 tainted_expat
<const char16_t
*> aName
);
58 static void HandleEndElementForSystemPrincipal(
59 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
60 tainted_expat
<const char16_t
*> aName
);
61 nsresult
HandleCharacterData(const char16_t
* aCData
, const uint32_t aLength
);
62 nsresult
HandleComment(const char16_t
* aName
);
63 nsresult
HandleProcessingInstruction(const char16_t
* aTarget
,
64 const char16_t
* aData
);
65 nsresult
HandleXMLDeclaration(const char16_t
* aVersion
,
66 const char16_t
* aEncoding
, int32_t aStandalone
);
67 nsresult
HandleDefault(const char16_t
* aData
, const uint32_t aLength
);
68 nsresult
HandleStartCdataSection();
69 nsresult
HandleEndCdataSection();
70 nsresult
HandleStartDoctypeDecl(const char16_t
* aDoctypeName
,
71 const char16_t
* aSysid
,
72 const char16_t
* aPubid
,
73 bool aHasInternalSubset
);
74 nsresult
HandleEndDoctypeDecl();
77 // Load up an external stream to get external entity information
78 nsresult
OpenInputStreamFromExternalDTD(const char16_t
* aFPIStr
,
79 const char16_t
* aURLStr
,
81 nsIInputStream
** aStream
,
84 enum class ChunkOrBufferIsFinal
{
91 * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
92 * aLength should be 0. The result of the call will be stored in
93 * mInternalState. Expat will parse as much of the buffer as it can and store
94 * the rest in its internal buffer.
96 * @param aBuffer the buffer to pass to Expat. May be null.
97 * @param aLength the length of the buffer to pass to Expat (in number of
98 * char16_t's). Must be 0 if aBuffer is null and > 0 if
99 * aBuffer is not null.
100 * @param aIsFinal whether this is the last chunk in a row passed to
101 * ParseChunk, and if so whether it's the last chunk and
102 * buffer passed to ParseChunk (meaning there will be no more
103 * calls to ParseChunk for the document being parsed).
104 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
105 * doesn't include the PRUnichars that Expat stored in
106 * its buffer but didn't parse yet.
107 * @param aLastLineLength [out] the length of the last line that Expat has
108 * consumed. This will only be computed if
109 * aIsFinal is not None or mInternalState is set
112 void ParseChunk(const char16_t
* aBuffer
, uint32_t aLength
,
113 ChunkOrBufferIsFinal aIsFinal
, uint32_t* aConsumed
,
114 XML_Size
* aLastLineLength
);
116 * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
117 * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
120 * @param aBuffer the buffer to pass to Expat. May be null.
121 * @param aLength the length of the buffer to pass to Expat (in number of
122 * char16_t's). Must be 0 if aBuffer is null and > 0 if
123 * aBuffer is not null.
124 * @param aIsFinal whether there will definitely not be any more new buffers
125 * passed in to ParseBuffer
126 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
127 * doesn't include the PRUnichars that Expat stored in
128 * its buffer but didn't parse yet.
129 * @param aLastLineLength [out] the length of the last line that Expat has
132 void ChunkAndParseBuffer(const char16_t
* aBuffer
, uint32_t aLength
,
133 bool aIsFinal
, uint32_t* aPassedToExpat
,
134 uint32_t* aConsumed
, XML_Size
* aLastLineLength
);
136 nsresult
HandleError();
138 void MaybeStopParser(nsresult aState
);
140 bool BlockedOrInterrupted() {
141 return mInternalState
== NS_ERROR_HTMLPARSER_BLOCK
||
142 mInternalState
== NS_ERROR_HTMLPARSER_INTERRUPTED
;
145 // Expat allows us to set the base URI for entities. It doesn't use the base
146 // URI itself, but just passes it along to all the entity handlers (just the
147 // external entity reference handler for us). It does expect the base URI as a
148 // null-terminated string, with the same character type as the parsed buffers
149 // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
150 // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
151 // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
152 // Most of the time this base URI is unused (the external entity handler is
153 // rarely called), but when it is we also convert it back to a nsIURI, so we
154 // convert the string back to UTF-8.
156 // We'd rather not do any of these conversions and copies, so we use a (hacky)
157 // workaround. We store all base URIs in an array of nsIURIs. Instead of
158 // passing the real URI to Expat as a string, we pass it a null-terminated
159 // 2-character buffer. The first character of that buffer stores the index of
160 // the corresponding nsIURI in the array (incremented with 1 because 0 is used
161 // to terminate a string). The entity handler can then use the index from the
162 // base URI that Expat passes it to look up the right nsIURI from the array.
164 // GetExpatBaseURI pushes the nsIURI to the array, and creates the
165 // two-character buffer for it.
167 // GetBaseURI looks up the right nsIURI in the array, based on the index from
168 // the two-character buffer.
169 using ExpatBaseURI
= mozilla::Array
<XML_Char
, 2>;
170 ExpatBaseURI
GetExpatBaseURI(nsIURI
* aURI
);
171 nsIURI
* GetBaseURI(const XML_Char
* aBase
) const;
173 RLBoxExpatSandboxData
* SandboxData() const;
174 rlbox_sandbox_expat
* Sandbox() const;
176 // Destroy expat parser and return sandbox to pool
179 mozilla::UniquePtr
<mozilla::RLBoxSandboxPoolData
> mSandboxPoolData
;
180 tainted_expat
<XML_Parser
> mExpatParser
;
184 // Various parts of a doctype
185 nsString mDoctypeName
;
188 nsString mInternalSubset
;
190 bool mInInternalSubset
;
192 bool mMadeFinalCallToExpat
;
194 // Used to track if we're in the parser.
197 nsresult mInternalState
;
199 // The length of the data in Expat's buffer (in number of PRUnichars).
200 uint32_t mExpatBuffered
;
204 // These sinks all refer the same conceptual object. mOriginalSink is
205 // identical with the nsIContentSink* passed to WillBuildModel, and exists
206 // only to avoid QI-ing back to nsIContentSink*.
207 nsCOMPtr
<nsIContentSink
> mOriginalSink
;
208 nsCOMPtr
<nsIExpatSink
> mSink
;
210 const nsCatalogData
* mCatalogData
; // weak
211 nsTArray
<nsCOMPtr
<nsIURI
>> mURIs
;
213 // Used for error reporting.
214 uint64_t mInnerWindowID
;
217 class RLBoxExpatSandboxData
: public mozilla::RLBoxSandboxDataBase
{
218 friend class RLBoxExpatSandboxPool
;
219 friend class nsExpatDriver
;
222 explicit RLBoxExpatSandboxData(uint64_t aSize
)
223 : mozilla::RLBoxSandboxDataBase(aSize
) {
224 MOZ_COUNT_CTOR(RLBoxExpatSandboxData
);
226 ~RLBoxExpatSandboxData();
227 rlbox_sandbox_expat
* Sandbox() const { return mSandbox
.get(); }
228 // After getting a sandbox from the pool we need to register the
229 // Handle{Start,End}Element callbacks and associate the driver with the
231 void AttachDriver(bool IsSystemPrincipal
, void* aDriver
);
235 mozilla::UniquePtr
<rlbox_sandbox_expat
> mSandbox
;
236 // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
237 // and consequently across sandbox reuses.
238 sandbox_callback_expat
<XML_XmlDeclHandler
> mHandleXMLDeclaration
;
239 sandbox_callback_expat
<XML_CharacterDataHandler
> mHandleCharacterData
;
240 sandbox_callback_expat
<XML_ProcessingInstructionHandler
>
241 mHandleProcessingInstruction
;
242 sandbox_callback_expat
<XML_DefaultHandler
> mHandleDefault
;
243 sandbox_callback_expat
<XML_ExternalEntityRefHandler
> mHandleExternalEntityRef
;
244 sandbox_callback_expat
<XML_CommentHandler
> mHandleComment
;
245 sandbox_callback_expat
<XML_StartCdataSectionHandler
> mHandleStartCdataSection
;
246 sandbox_callback_expat
<XML_EndCdataSectionHandler
> mHandleEndCdataSection
;
247 sandbox_callback_expat
<XML_StartDoctypeDeclHandler
> mHandleStartDoctypeDecl
;
248 sandbox_callback_expat
<XML_EndDoctypeDeclHandler
> mHandleEndDoctypeDecl
;
249 // Expat callbacks specific to each driver, and thus (re)set across sandbox
251 sandbox_callback_expat
<XML_StartElementHandler
> mHandleStartElement
;
252 sandbox_callback_expat
<XML_EndElementHandler
> mHandleEndElement
;