1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef NS_EXPAT_DRIVER__
7 #define NS_EXPAT_DRIVER__
9 #include "expat_config.h"
14 #include "nsIInputStream.h"
15 #include "nsIParser.h"
16 #include "nsCycleCollectionParticipant.h"
17 #include "nsScanner.h"
19 #include "rlbox_expat.h"
20 #include "nsRLBoxExpatDriver.h"
21 #include "mozilla/UniquePtr.h"
25 class RLBoxExpatSandboxData
;
27 template <typename
, size_t>
31 class nsExpatDriver
: public nsIDTD
{
32 virtual ~nsExpatDriver();
35 NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
37 NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver
)
41 nsresult
Initialize(nsIURI
* aURI
, nsIContentSink
* aSink
);
43 nsresult
ResumeParse(nsScanner
& aScanner
, bool aIsFinalChunk
);
45 int HandleExternalEntityRef(const char16_t
* aOpenEntityNames
,
46 const char16_t
* aBase
, const char16_t
* aSystemId
,
47 const char16_t
* aPublicId
);
48 static void HandleStartElement(rlbox_sandbox_expat
& aSandbox
,
49 tainted_expat
<void*> aUserData
,
50 tainted_expat
<const char16_t
*> aName
,
51 tainted_expat
<const char16_t
**> aAtts
);
52 static void HandleStartElementForSystemPrincipal(
53 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
54 tainted_expat
<const char16_t
*> aName
,
55 tainted_expat
<const char16_t
**> aAtts
);
56 static void HandleEndElement(rlbox_sandbox_expat
& aSandbox
,
57 tainted_expat
<void*> aUserData
,
58 tainted_expat
<const char16_t
*> aName
);
59 static void HandleEndElementForSystemPrincipal(
60 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
61 tainted_expat
<const char16_t
*> aName
);
62 nsresult
HandleCharacterData(const char16_t
* aCData
, const uint32_t aLength
);
63 nsresult
HandleComment(const char16_t
* aName
);
64 nsresult
HandleProcessingInstruction(const char16_t
* aTarget
,
65 const char16_t
* aData
);
66 nsresult
HandleXMLDeclaration(const char16_t
* aVersion
,
67 const char16_t
* aEncoding
, int32_t aStandalone
);
68 nsresult
HandleDefault(const char16_t
* aData
, const uint32_t aLength
);
69 nsresult
HandleStartCdataSection();
70 nsresult
HandleEndCdataSection();
71 nsresult
HandleStartDoctypeDecl(const char16_t
* aDoctypeName
,
72 const char16_t
* aSysid
,
73 const char16_t
* aPubid
,
74 bool aHasInternalSubset
);
75 nsresult
HandleEndDoctypeDecl();
78 // Load up an external stream to get external entity information
79 nsresult
OpenInputStreamFromExternalDTD(const char16_t
* aFPIStr
,
80 const char16_t
* aURLStr
,
82 nsIInputStream
** aStream
,
85 enum class ChunkOrBufferIsFinal
{
92 * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
93 * aLength should be 0. The result of the call will be stored in
94 * mInternalState. Expat will parse as much of the buffer as it can and store
95 * the rest in its internal buffer.
97 * @param aBuffer the buffer to pass to Expat. May be null.
98 * @param aLength the length of the buffer to pass to Expat (in number of
99 * char16_t's). Must be 0 if aBuffer is null and > 0 if
100 * aBuffer is not null.
101 * @param aIsFinal whether this is the last chunk in a row passed to
102 * ParseChunk, and if so whether it's the last chunk and
103 * buffer passed to ParseChunk (meaning there will be no more
104 * calls to ParseChunk for the document being parsed).
105 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
106 * doesn't include the PRUnichars that Expat stored in
107 * its buffer but didn't parse yet.
108 * @param aLastLineLength [out] the length of the last line that Expat has
109 * consumed. This will only be computed if
110 * aIsFinal is not None or mInternalState is set
113 void ParseChunk(const char16_t
* aBuffer
, uint32_t aLength
,
114 ChunkOrBufferIsFinal aIsFinal
, uint32_t* aConsumed
,
115 XML_Size
* aLastLineLength
);
117 * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
118 * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
121 * @param aBuffer the buffer to pass to Expat. May be null.
122 * @param aLength the length of the buffer to pass to Expat (in number of
123 * char16_t's). Must be 0 if aBuffer is null and > 0 if
124 * aBuffer is not null.
125 * @param aIsFinal whether there will definitely not be any more new buffers
126 * passed in to ParseBuffer
127 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
128 * doesn't include the PRUnichars that Expat stored in
129 * its buffer but didn't parse yet.
130 * @param aLastLineLength [out] the length of the last line that Expat has
133 void ChunkAndParseBuffer(const char16_t
* aBuffer
, uint32_t aLength
,
134 bool aIsFinal
, uint32_t* aPassedToExpat
,
135 uint32_t* aConsumed
, XML_Size
* aLastLineLength
);
137 nsresult
HandleError();
139 void MaybeStopParser(nsresult aState
);
141 bool BlockedOrInterrupted() {
142 return mInternalState
== NS_ERROR_HTMLPARSER_BLOCK
||
143 mInternalState
== NS_ERROR_HTMLPARSER_INTERRUPTED
;
146 // Expat allows us to set the base URI for entities. It doesn't use the base
147 // URI itself, but just passes it along to all the entity handlers (just the
148 // external entity reference handler for us). It does expect the base URI as a
149 // null-terminated string, with the same character type as the parsed buffers
150 // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
151 // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
152 // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
153 // Most of the time this base URI is unused (the external entity handler is
154 // rarely called), but when it is we also convert it back to a nsIURI, so we
155 // convert the string back to UTF-8.
157 // We'd rather not do any of these conversions and copies, so we use a (hacky)
158 // workaround. We store all base URIs in an array of nsIURIs. Instead of
159 // passing the real URI to Expat as a string, we pass it a null-terminated
160 // 2-character buffer. The first character of that buffer stores the index of
161 // the corresponding nsIURI in the array (incremented with 1 because 0 is used
162 // to terminate a string). The entity handler can then use the index from the
163 // base URI that Expat passes it to look up the right nsIURI from the array.
165 // GetExpatBaseURI pushes the nsIURI to the array, and creates the
166 // two-character buffer for it.
168 // GetBaseURI looks up the right nsIURI in the array, based on the index from
169 // the two-character buffer.
170 using ExpatBaseURI
= mozilla::Array
<XML_Char
, 2>;
171 ExpatBaseURI
GetExpatBaseURI(nsIURI
* aURI
);
172 nsIURI
* GetBaseURI(const XML_Char
* aBase
) const;
174 RLBoxExpatSandboxData
* SandboxData() const;
175 rlbox_sandbox_expat
* Sandbox() const;
177 // Destroy expat parser and return sandbox to pool
180 mozilla::UniquePtr
<mozilla::RLBoxSandboxPoolData
> mSandboxPoolData
;
181 tainted_expat
<XML_Parser
> mExpatParser
;
185 // Various parts of a doctype
186 nsString mDoctypeName
;
189 nsString mInternalSubset
;
191 bool mInInternalSubset
;
193 bool mMadeFinalCallToExpat
;
195 // Used to track if we're in the parser.
198 nsresult mInternalState
;
200 // The length of the data in Expat's buffer (in number of PRUnichars).
201 uint32_t mExpatBuffered
;
205 // These sinks all refer the same conceptual object. mOriginalSink is
206 // identical with the nsIContentSink* passed to WillBuildModel, and exists
207 // only to avoid QI-ing back to nsIContentSink*.
208 nsCOMPtr
<nsIContentSink
> mOriginalSink
;
209 nsCOMPtr
<nsIExpatSink
> mSink
;
211 const nsCatalogData
* mCatalogData
; // weak
212 nsTArray
<nsCOMPtr
<nsIURI
>> mURIs
;
214 // Used for error reporting.
215 uint64_t mInnerWindowID
;
218 class RLBoxExpatSandboxData
: public mozilla::RLBoxSandboxDataBase
{
219 friend class RLBoxExpatSandboxPool
;
220 friend class nsExpatDriver
;
223 explicit RLBoxExpatSandboxData(uint64_t aSize
)
224 : mozilla::RLBoxSandboxDataBase(aSize
) {
225 MOZ_COUNT_CTOR(RLBoxExpatSandboxData
);
227 ~RLBoxExpatSandboxData();
228 rlbox_sandbox_expat
* Sandbox() const { return mSandbox
.get(); }
229 // After getting a sandbox from the pool we need to register the
230 // Handle{Start,End}Element callbacks and associate the driver with the
232 void AttachDriver(bool IsSystemPrincipal
, void* aDriver
);
236 mozilla::UniquePtr
<rlbox_sandbox_expat
> mSandbox
;
237 // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
238 // and consequently across sandbox reuses.
239 sandbox_callback_expat
<XML_XmlDeclHandler
> mHandleXMLDeclaration
;
240 sandbox_callback_expat
<XML_CharacterDataHandler
> mHandleCharacterData
;
241 sandbox_callback_expat
<XML_ProcessingInstructionHandler
>
242 mHandleProcessingInstruction
;
243 sandbox_callback_expat
<XML_DefaultHandler
> mHandleDefault
;
244 sandbox_callback_expat
<XML_ExternalEntityRefHandler
> mHandleExternalEntityRef
;
245 sandbox_callback_expat
<XML_CommentHandler
> mHandleComment
;
246 sandbox_callback_expat
<XML_StartCdataSectionHandler
> mHandleStartCdataSection
;
247 sandbox_callback_expat
<XML_EndCdataSectionHandler
> mHandleEndCdataSection
;
248 sandbox_callback_expat
<XML_StartDoctypeDeclHandler
> mHandleStartDoctypeDecl
;
249 sandbox_callback_expat
<XML_EndDoctypeDeclHandler
> mHandleEndDoctypeDecl
;
250 // Expat callbacks specific to each driver, and thus (re)set across sandbox
252 sandbox_callback_expat
<XML_StartElementHandler
> mHandleStartElement
;
253 sandbox_callback_expat
<XML_EndElementHandler
> mHandleEndElement
;