1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsExpatDriver.h"
7 #include "mozilla/fallible.h"
9 #include "CParserContext.h"
10 #include "nsIExpatSink.h"
11 #include "nsIContentSink.h"
12 #include "nsIDocShell.h"
13 #include "nsParserMsgUtils.h"
15 #include "nsIUnicharInputStream.h"
16 #include "nsIProtocolHandler.h"
17 #include "nsNetUtil.h"
19 #include "nsTextFormatter.h"
20 #include "nsDirectoryServiceDefs.h"
22 #include "nsIConsoleService.h"
23 #include "nsIScriptError.h"
24 #include "nsIScriptGlobalObject.h"
25 #include "nsIContentPolicy.h"
26 #include "nsComponentManagerUtils.h"
27 #include "nsContentPolicyUtils.h"
29 #include "nsXPCOMCIDInternal.h"
30 #include "nsUnicharInputStream.h"
31 #include "nsContentUtils.h"
32 #include "mozilla/Array.h"
33 #include "mozilla/ArrayUtils.h"
34 #include "mozilla/BasePrincipal.h"
35 #include "mozilla/IntegerTypeTraits.h"
36 #include "mozilla/NullPrincipal.h"
37 #include "mozilla/Telemetry.h"
38 #include "mozilla/TelemetryComms.h"
40 #include "nsThreadUtils.h"
41 #include "mozilla/ClearOnShutdown.h"
42 #include "mozilla/RLBoxUtils.h"
43 #include "mozilla/UniquePtr.h"
45 #include "mozilla/Logging.h"
47 using mozilla::fallible
;
48 using mozilla::LogLevel
;
49 using mozilla::MakeStringSpan
;
50 using mozilla::Unused
;
51 using mozilla::dom::Document
;
53 // We only pass chunks of length sMaxChunkLength to Expat in the RLBOX sandbox.
54 // The RLBOX sandbox has a limited amount of memory, and we have to account for
55 // other memory use by Expat (including the buffering it does).
56 // Note that sMaxChunkLength is in number of characters.
58 // On debug builds we set a much lower limit (1kB) to try to hit boundary
59 // conditions more frequently.
60 static const uint32_t sMaxChunkLength
= 1024 / sizeof(char16_t
);
62 static const uint32_t sMaxChunkLength
= (128 * 1024) / sizeof(char16_t
);
65 #define kExpatSeparatorChar 0xFFFF
67 static const char16_t kUTF16
[] = {'U', 'T', 'F', '-', '1', '6', '\0'};
69 static mozilla::LazyLogModule
gExpatDriverLog("expatdriver");
71 // Use the same maximum tree depth as Chromium (see
72 // https://chromium.googlesource.com/chromium/src/+/f464165c1dedff1c955d3c051c5a9a1c6a0e8f6b/third_party/WebKit/Source/core/xml/parser/XMLDocumentParser.cpp#85).
73 static const uint16_t sMaxXMLTreeDepth
= 5000;
75 /***************************** RLBOX HELPERS ********************************/
76 // Helpers for calling sandboxed expat functions in handlers
78 #define RLBOX_EXPAT_SAFE_CALL(foo, verifier, ...) \
79 aSandbox.invoke_sandbox_function(foo, self->mExpatParser, ##__VA_ARGS__) \
80 .copy_and_verify(verifier)
82 #define RLBOX_EXPAT_SAFE_MCALL(foo, verifier, ...) \
84 ->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__) \
85 .copy_and_verify(verifier)
87 #define RLBOX_EXPAT_MCALL(foo, ...) \
88 Sandbox()->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__)
90 /* safe_unverified is used whenever it's safe to not use a validator */
92 static T
safe_unverified(T val
) {
96 /* status_verifier is a type validator for XML_Status */
97 inline enum XML_Status
status_verifier(enum XML_Status s
) {
98 MOZ_RELEASE_ASSERT(s
>= XML_STATUS_ERROR
&& s
<= XML_STATUS_SUSPENDED
,
99 "unexpected status code");
103 /* error_verifier is a type validator for XML_Error */
104 inline enum XML_Error
error_verifier(enum XML_Error code
) {
106 code
>= XML_ERROR_NONE
&& code
<= XML_ERROR_INVALID_ARGUMENT
,
107 "unexpected XML error code");
111 /* We use unverified_xml_string to just expose sandbox expat strings to Firefox
112 * without any validation. On 64-bit we have guard pages at the sandbox
113 * boundary; on 32-bit we don't and a string could be used to read beyond the
114 * sandbox boundary. In our attacker model this is okay (the attacker can just
117 * Nevertheless, we should try to add strings validators to the consumer code
118 * of expat whenever we have some semantics. At the very lest we should make
119 * sure that the strings are never written to. Bug 1693991 tracks this.
121 static const XML_Char
* unverified_xml_string(uintptr_t ptr
) {
122 return reinterpret_cast<const XML_Char
*>(ptr
);
125 /* The TransferBuffer class is used to copy (or directly expose in the
126 * noop-sandbox case) buffers into the expat sandbox (and automatically
127 * when out of scope).
129 template <typename T
>
130 using TransferBuffer
=
131 mozilla::RLBoxTransferBufferToSandbox
<T
, rlbox_expat_sandbox_type
>;
133 /*************************** END RLBOX HELPERS ******************************/
135 /***************************** EXPAT CALL BACKS ******************************/
136 // The callback handlers that get called from the expat parser.
138 static void Driver_HandleXMLDeclaration(
139 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> /* aUserData */,
140 tainted_expat
<const XML_Char
*> aVersion
,
141 tainted_expat
<const XML_Char
*> aEncoding
, tainted_expat
<int> aStandalone
) {
142 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
145 int standalone
= aStandalone
.copy_and_verify([&](auto a
) {
146 // Standalone argument can be -1, 0, or 1 (see
147 // /parser/expat/lib/expat.h#185)
148 MOZ_RELEASE_ASSERT(a
>= -1 && a
<= 1, "Unexpected standalone parameter");
152 const auto* version
= aVersion
.copy_and_verify_address(unverified_xml_string
);
153 const auto* encoding
=
154 aEncoding
.copy_and_verify_address(unverified_xml_string
);
155 driver
->HandleXMLDeclaration(version
, encoding
, standalone
);
158 static void Driver_HandleCharacterData(rlbox_sandbox_expat
& aSandbox
,
159 tainted_expat
<void*> /* aUserData */,
160 tainted_expat
<const XML_Char
*> aData
,
161 tainted_expat
<int> aLength
) {
162 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
164 // aData is not null terminated; even with bad length we will not span beyond
167 static_cast<uint32_t>(aLength
.copy_and_verify(safe_unverified
<int>));
168 const auto* data
= aData
.unverified_safe_pointer_because(
169 length
, "Only care that the data is within sandbox boundary.");
170 driver
->HandleCharacterData(data
, length
);
173 static void Driver_HandleComment(rlbox_sandbox_expat
& aSandbox
,
174 tainted_expat
<void*> /* aUserData */,
175 tainted_expat
<const XML_Char
*> aName
) {
176 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
178 const auto* name
= aName
.copy_and_verify_address(unverified_xml_string
);
179 driver
->HandleComment(name
);
182 static void Driver_HandleProcessingInstruction(
183 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> /* aUserData */,
184 tainted_expat
<const XML_Char
*> aTarget
,
185 tainted_expat
<const XML_Char
*> aData
) {
186 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
188 const auto* target
= aTarget
.copy_and_verify_address(unverified_xml_string
);
189 const auto* data
= aData
.copy_and_verify_address(unverified_xml_string
);
190 driver
->HandleProcessingInstruction(target
, data
);
193 static void Driver_HandleDefault(rlbox_sandbox_expat
& aSandbox
,
194 tainted_expat
<void*> /* aUserData */,
195 tainted_expat
<const XML_Char
*> aData
,
196 tainted_expat
<int> aLength
) {
197 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
199 // aData is not null terminated; even with bad length we will not span
200 // beyond sandbox boundary
202 static_cast<uint32_t>(aLength
.copy_and_verify(safe_unverified
<int>));
203 const auto* data
= aData
.unverified_safe_pointer_because(
204 length
, "Only care that the data is within sandbox boundary.");
205 driver
->HandleDefault(data
, length
);
208 static void Driver_HandleStartCdataSection(
209 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> /* aUserData */) {
210 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
212 driver
->HandleStartCdataSection();
215 static void Driver_HandleEndCdataSection(rlbox_sandbox_expat
& aSandbox
,
216 tainted_expat
<void*> /* aUserData */) {
217 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
219 driver
->HandleEndCdataSection();
222 static void Driver_HandleStartDoctypeDecl(
223 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> /* aUserData */,
224 tainted_expat
<const XML_Char
*> aDoctypeName
,
225 tainted_expat
<const XML_Char
*> aSysid
,
226 tainted_expat
<const XML_Char
*> aPubid
,
227 tainted_expat
<int> aHasInternalSubset
) {
228 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
230 const auto* doctypeName
=
231 aDoctypeName
.copy_and_verify_address(unverified_xml_string
);
232 const auto* sysid
= aSysid
.copy_and_verify_address(unverified_xml_string
);
233 const auto* pubid
= aPubid
.copy_and_verify_address(unverified_xml_string
);
234 bool hasInternalSubset
=
235 !!(aHasInternalSubset
.copy_and_verify(safe_unverified
<int>));
236 driver
->HandleStartDoctypeDecl(doctypeName
, sysid
, pubid
, hasInternalSubset
);
239 static void Driver_HandleEndDoctypeDecl(rlbox_sandbox_expat
& aSandbox
,
240 tainted_expat
<void*> /* aUserData */) {
241 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
243 driver
->HandleEndDoctypeDecl();
246 static tainted_expat
<int> Driver_HandleExternalEntityRef(
247 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<XML_Parser
> /* aParser */,
248 tainted_expat
<const XML_Char
*> aOpenEntityNames
,
249 tainted_expat
<const XML_Char
*> aBase
,
250 tainted_expat
<const XML_Char
*> aSystemId
,
251 tainted_expat
<const XML_Char
*> aPublicId
) {
252 nsExpatDriver
* driver
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
255 const auto* openEntityNames
=
256 aOpenEntityNames
.copy_and_verify_address(unverified_xml_string
);
257 const auto* base
= aBase
.copy_and_verify_address(unverified_xml_string
);
258 const auto* systemId
=
259 aSystemId
.copy_and_verify_address(unverified_xml_string
);
260 const auto* publicId
=
261 aPublicId
.copy_and_verify_address(unverified_xml_string
);
262 return driver
->HandleExternalEntityRef(openEntityNames
, base
, systemId
,
266 /***************************** END CALL BACKS ********************************/
268 /***************************** CATALOG UTILS *********************************/
270 // Initially added for bug 113400 to switch from the remote "XHTML 1.0 plus
271 // MathML 2.0" DTD to the the lightweight customized version that Mozilla uses.
272 // Since Mozilla is not validating, no need to fetch a *huge* file at each
274 // XXX The cleanest solution here would be to fix Bug 98413: Implement XML
276 struct nsCatalogData
{
277 const char* mPublicID
;
278 const char* mLocalDTD
;
279 const char* mAgentSheet
;
282 // The order of this table is guestimated to be in the optimum order
283 static const nsCatalogData kCatalogTable
[] = {
284 {"-//W3C//DTD XHTML 1.0 Transitional//EN", "htmlmathml-f.ent", nullptr},
285 {"-//W3C//DTD XHTML 1.1//EN", "htmlmathml-f.ent", nullptr},
286 {"-//W3C//DTD XHTML 1.0 Strict//EN", "htmlmathml-f.ent", nullptr},
287 {"-//W3C//DTD XHTML 1.0 Frameset//EN", "htmlmathml-f.ent", nullptr},
288 {"-//W3C//DTD XHTML Basic 1.0//EN", "htmlmathml-f.ent", nullptr},
289 {"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
290 {"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN",
291 "htmlmathml-f.ent", nullptr},
292 {"-//W3C//DTD MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
293 {"-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "htmlmathml-f.ent", nullptr},
294 {nullptr, nullptr, nullptr}};
296 static const nsCatalogData
* LookupCatalogData(const char16_t
* aPublicID
) {
297 nsDependentString
publicID(aPublicID
);
299 // linear search for now since the number of entries is going to
300 // be negligible, and the fix for bug 98413 would get rid of this
302 const nsCatalogData
* data
= kCatalogTable
;
303 while (data
->mPublicID
) {
304 if (publicID
.EqualsASCII(data
->mPublicID
)) {
313 // This function provides a resource URI to a local DTD
314 // in resource://gre/res/dtd/ which may or may not exist.
315 // If aCatalogData is provided, it is used to remap the
316 // DTD instead of taking the filename from the URI. aDTD
317 // may be null in some cases that are relying on
318 // aCatalogData working for them.
319 static void GetLocalDTDURI(const nsCatalogData
* aCatalogData
, nsIURI
* aDTD
,
321 nsAutoCString fileName
;
323 // remap the DTD to a known local DTD
324 fileName
.Assign(aCatalogData
->mLocalDTD
);
327 if (fileName
.IsEmpty()) {
328 // Try to see if the user has installed the DTD file -- we extract the
329 // filename.ext of the DTD here. Hence, for any DTD for which we have
330 // no predefined mapping, users just have to copy the DTD file to our
331 // special DTD directory and it will be picked.
332 nsCOMPtr
<nsIURL
> dtdURL
= do_QueryInterface(aDTD
);
334 // Not a URL with a filename, or maybe it was null. Either way, nothing
335 // else we can do here.
339 dtdURL
->GetFileName(fileName
);
340 if (fileName
.IsEmpty()) {
345 nsAutoCString
respath("resource://gre/res/dtd/");
347 NS_NewURI(aResult
, respath
);
350 /***************************** END CATALOG UTILS *****************************/
352 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsExpatDriver
)
353 NS_INTERFACE_MAP_ENTRY(nsIDTD
)
354 NS_INTERFACE_MAP_ENTRY(nsISupports
)
357 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsExpatDriver
)
358 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsExpatDriver
)
360 NS_IMPL_CYCLE_COLLECTION(nsExpatDriver
, mSink
)
362 nsExpatDriver::nsExpatDriver()
363 : mExpatParser(nullptr),
365 mInInternalSubset(false),
366 mInExternalDTD(false),
367 mMadeFinalCallToExpat(false),
369 mInternalState(NS_OK
),
372 mCatalogData(nullptr),
375 nsExpatDriver::~nsExpatDriver() { Destroy(); }
377 void nsExpatDriver::Destroy() {
378 if (mSandboxPoolData
) {
379 SandboxData()->DetachDriver();
381 RLBOX_EXPAT_MCALL(MOZ_XML_ParserFree
);
384 mSandboxPoolData
.reset();
386 mExpatParser
= nullptr;
389 // The AllocAttrs class is used to speed up copying attributes from the
390 // sandboxed expat by fast allocating attributes on the stack and only falling
391 // back to malloc when we need to allocate lots of attributes.
392 class MOZ_STACK_CLASS AllocAttrs
{
393 #define NUM_STACK_SLOTS 16
395 const char16_t
** Init(size_t size
) {
396 if (size
<= NUM_STACK_SLOTS
) {
399 mHeapPtr
= mozilla::MakeUnique
<const char16_t
*[]>(size
);
400 return mHeapPtr
.get();
404 const char16_t
* mInlineArr
[NUM_STACK_SLOTS
];
405 mozilla::UniquePtr
<const char16_t
*[]> mHeapPtr
;
406 #undef NUM_STACK_SLOTS
410 void nsExpatDriver::HandleStartElement(rlbox_sandbox_expat
& aSandbox
,
411 tainted_expat
<void*> /* aUserData */,
412 tainted_expat
<const char16_t
*> aName
,
413 tainted_expat
<const char16_t
**> aAttrs
) {
414 nsExpatDriver
* self
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
415 MOZ_ASSERT(self
&& self
->mSink
);
417 const auto* name
= aName
.copy_and_verify_address(unverified_xml_string
);
419 // Calculate the total number of elements in aAttrs.
420 // XML_GetSpecifiedAttributeCount will only give us the number of specified
421 // attrs (twice that number, actually), so we have to check for default
423 int count
= RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetSpecifiedAttributeCount
,
424 safe_unverified
<int>);
425 MOZ_RELEASE_ASSERT(count
>= 0, "Unexpected attribute count");
426 uint32_t attrArrayLength
;
427 for (attrArrayLength
= count
;
428 (aAttrs
[attrArrayLength
] != nullptr)
429 .unverified_safe_because("Bad length is checked later");
430 attrArrayLength
+= 2) {
431 // Just looping till we find out what the length is
433 // A malicious length could result in an overflow when we allocate aAttrs
434 // and then access elements of the array.
435 MOZ_RELEASE_ASSERT(attrArrayLength
< UINT32_MAX
, "Overflow attempt");
437 // Copy tainted aAttrs from sandbox
438 AllocAttrs allocAttrs
;
439 const char16_t
** attrs
= allocAttrs
.Init(attrArrayLength
+ 1);
440 if (NS_WARN_IF(!aAttrs
|| !attrs
)) {
441 self
->MaybeStopParser(NS_ERROR_OUT_OF_MEMORY
);
445 for (uint32_t i
= 0; i
< attrArrayLength
; i
++) {
446 attrs
[i
] = aAttrs
[i
].copy_and_verify_address(unverified_xml_string
);
448 attrs
[attrArrayLength
] = nullptr;
451 // We store the tagdepth in a PRUint16, so make sure the limit fits in a
455 std::numeric_limits
<decltype(nsExpatDriver::mTagDepth
)>::max());
457 if (++self
->mTagDepth
> sMaxXMLTreeDepth
) {
458 self
->MaybeStopParser(NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP
);
462 nsresult rv
= self
->mSink
->HandleStartElement(
463 name
, attrs
, attrArrayLength
,
464 RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentLineNumber
,
465 safe_unverified
<XML_Size
>),
466 RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentColumnNumber
,
467 safe_unverified
<XML_Size
>));
468 self
->MaybeStopParser(rv
);
473 void nsExpatDriver::HandleStartElementForSystemPrincipal(
474 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
475 tainted_expat
<const char16_t
*> aName
,
476 tainted_expat
<const char16_t
**> aAttrs
) {
477 nsExpatDriver
* self
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
479 if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue
,
480 safe_unverified
<XML_Bool
>)) {
481 HandleStartElement(aSandbox
, aUserData
, aName
, aAttrs
);
483 nsCOMPtr
<Document
> doc
=
484 do_QueryInterface(self
->mOriginalSink
->GetTarget());
486 // Adjust the column number so that it is one based rather than zero
488 uint32_t colNumber
= RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentColumnNumber
,
489 safe_unverified
<XML_Size
>) +
491 uint32_t lineNumber
= RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentLineNumber
,
492 safe_unverified
<XML_Size
>);
495 RefPtr
<nsAtom
> prefix
, localName
;
496 const auto* name
= aName
.copy_and_verify_address(unverified_xml_string
);
497 nsContentUtils::SplitExpatName(name
, getter_AddRefs(prefix
),
498 getter_AddRefs(localName
), &nameSpaceID
);
501 error
.AppendLiteral("Ignoring element <");
503 error
.Append(prefix
->GetUTF16String());
506 error
.Append(localName
->GetUTF16String());
507 error
.AppendLiteral("> created from entity value.");
509 nsContentUtils::ReportToConsoleNonLocalized(
510 error
, nsIScriptError::warningFlag
, "XML Document"_ns
, doc
, nullptr,
511 u
""_ns
, lineNumber
, colNumber
);
516 void nsExpatDriver::HandleEndElement(rlbox_sandbox_expat
& aSandbox
,
517 tainted_expat
<void*> aUserData
,
518 tainted_expat
<const char16_t
*> aName
) {
519 nsExpatDriver
* self
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
521 const auto* name
= aName
.copy_and_verify_address(unverified_xml_string
);
523 NS_ASSERTION(self
->mSink
, "content sink not found!");
524 NS_ASSERTION(self
->mInternalState
!= NS_ERROR_HTMLPARSER_BLOCK
,
525 "Shouldn't block from HandleStartElement.");
527 if (self
->mSink
&& self
->mInternalState
!= NS_ERROR_HTMLPARSER_STOPPARSING
) {
528 nsresult rv
= self
->mSink
->HandleEndElement(name
);
530 self
->MaybeStopParser(rv
);
535 void nsExpatDriver::HandleEndElementForSystemPrincipal(
536 rlbox_sandbox_expat
& aSandbox
, tainted_expat
<void*> aUserData
,
537 tainted_expat
<const char16_t
*> aName
) {
538 nsExpatDriver
* self
= static_cast<nsExpatDriver
*>(aSandbox
.sandbox_storage
);
540 if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue
,
541 safe_unverified
<XML_Bool
>)) {
542 HandleEndElement(aSandbox
, aUserData
, aName
);
546 nsresult
nsExpatDriver::HandleCharacterData(const char16_t
* aValue
,
547 const uint32_t aLength
) {
548 NS_ASSERTION(mSink
, "content sink not found!");
551 if (!mCDataText
.Append(aValue
, aLength
, fallible
)) {
552 MaybeStopParser(NS_ERROR_OUT_OF_MEMORY
);
555 nsresult rv
= mSink
->HandleCharacterData(aValue
, aLength
);
562 nsresult
nsExpatDriver::HandleComment(const char16_t
* aValue
) {
563 NS_ASSERTION(mSink
, "content sink not found!");
565 if (mInExternalDTD
) {
566 // Ignore comments from external DTDs
570 if (mInInternalSubset
) {
571 mInternalSubset
.AppendLiteral("<!--");
572 mInternalSubset
.Append(aValue
);
573 mInternalSubset
.AppendLiteral("-->");
575 nsresult rv
= mSink
->HandleComment(aValue
);
582 nsresult
nsExpatDriver::HandleProcessingInstruction(const char16_t
* aTarget
,
583 const char16_t
* aData
) {
584 NS_ASSERTION(mSink
, "content sink not found!");
586 if (mInExternalDTD
) {
587 // Ignore PIs in external DTDs for now. Eventually we want to
588 // pass them to the sink in a way that doesn't put them in the DOM
592 if (mInInternalSubset
) {
593 mInternalSubset
.AppendLiteral("<?");
594 mInternalSubset
.Append(aTarget
);
595 mInternalSubset
.Append(' ');
596 mInternalSubset
.Append(aData
);
597 mInternalSubset
.AppendLiteral("?>");
599 nsresult rv
= mSink
->HandleProcessingInstruction(aTarget
, aData
);
606 nsresult
nsExpatDriver::HandleXMLDeclaration(const char16_t
* aVersion
,
607 const char16_t
* aEncoding
,
608 int32_t aStandalone
) {
610 nsresult rv
= mSink
->HandleXMLDeclaration(aVersion
, aEncoding
, aStandalone
);
617 nsresult
nsExpatDriver::HandleDefault(const char16_t
* aValue
,
618 const uint32_t aLength
) {
619 NS_ASSERTION(mSink
, "content sink not found!");
621 if (mInExternalDTD
) {
622 // Ignore newlines in external DTDs
626 if (mInInternalSubset
) {
627 mInternalSubset
.Append(aValue
, aLength
);
630 nsresult rv
= mInternalState
;
631 for (i
= 0; i
< aLength
&& NS_SUCCEEDED(rv
); ++i
) {
632 if (aValue
[i
] == '\n' || aValue
[i
] == '\r') {
633 rv
= mSink
->HandleCharacterData(&aValue
[i
], 1);
642 nsresult
nsExpatDriver::HandleStartCdataSection() {
648 nsresult
nsExpatDriver::HandleEndCdataSection() {
649 NS_ASSERTION(mSink
, "content sink not found!");
654 mSink
->HandleCDataSection(mCDataText
.get(), mCDataText
.Length());
657 mCDataText
.Truncate();
662 nsresult
nsExpatDriver::HandleStartDoctypeDecl(const char16_t
* aDoctypeName
,
663 const char16_t
* aSysid
,
664 const char16_t
* aPubid
,
665 bool aHasInternalSubset
) {
666 mDoctypeName
= aDoctypeName
;
670 if (aHasInternalSubset
) {
671 // Consuming a huge internal subset translates to numerous
672 // allocations. In an effort to avoid too many allocations
673 // setting mInternalSubset's capacity to be 1K ( just a guesstimate! ).
674 mInInternalSubset
= true;
675 mInternalSubset
.SetCapacity(1024);
677 // Distinguish missing internal subset from an empty one
678 mInternalSubset
.SetIsVoid(true);
684 nsresult
nsExpatDriver::HandleEndDoctypeDecl() {
685 NS_ASSERTION(mSink
, "content sink not found!");
687 mInInternalSubset
= false;
690 // let the sink know any additional knowledge that we have about the
691 // document (currently, from bug 124570, we only expect to pass additional
692 // agent sheets needed to layout the XML vocabulary of the document)
693 nsCOMPtr
<nsIURI
> data
;
695 if (mCatalogData
&& mCatalogData
->mAgentSheet
) {
696 NS_NewURI(getter_AddRefs(data
), mCatalogData
->mAgentSheet
);
700 // The unused support for "catalog style sheets" was removed. It doesn't
701 // look like we'll ever fix bug 98413 either.
702 MOZ_ASSERT(!mCatalogData
|| !mCatalogData
->mAgentSheet
,
703 "Need to add back support for catalog style sheets");
705 // Note: mInternalSubset already doesn't include the [] around it.
706 nsresult rv
= mSink
->HandleDoctypeDecl(mInternalSubset
, mDoctypeName
,
707 mSystemID
, mPublicID
, data
);
711 mInternalSubset
.Truncate();
716 // Wrapper class for passing the sandbox data and parser as a closure to
717 // ExternalDTDStreamReaderFunc.
718 class RLBoxExpatClosure
{
720 RLBoxExpatClosure(RLBoxExpatSandboxData
* aSbxData
,
721 tainted_expat
<XML_Parser
> aExpatParser
)
722 : mSbxData(aSbxData
), mExpatParser(aExpatParser
){};
723 inline rlbox_sandbox_expat
* Sandbox() const { return mSbxData
->Sandbox(); };
724 inline tainted_expat
<XML_Parser
> Parser() const { return mExpatParser
; };
727 RLBoxExpatSandboxData
* mSbxData
;
728 tainted_expat
<XML_Parser
> mExpatParser
;
731 static nsresult
ExternalDTDStreamReaderFunc(nsIUnicharInputStream
* aIn
,
733 const char16_t
* aFromSegment
,
734 uint32_t aToOffset
, uint32_t aCount
,
735 uint32_t* aWriteCount
) {
736 MOZ_ASSERT(aClosure
&& aFromSegment
&& aWriteCount
);
740 // Get sandbox and parser
741 auto* closure
= reinterpret_cast<RLBoxExpatClosure
*>(aClosure
);
744 // Transfer segment into the sandbox
746 TransferBuffer
<char16_t
>(closure
->Sandbox(), aFromSegment
, aCount
);
747 NS_ENSURE_TRUE(*fromSegment
, NS_ERROR_OUT_OF_MEMORY
);
749 // Pass the buffer to expat for parsing.
750 if (closure
->Sandbox()
751 ->invoke_sandbox_function(
752 MOZ_XML_Parse
, closure
->Parser(),
753 rlbox::sandbox_reinterpret_cast
<const char*>(*fromSegment
),
754 aCount
* sizeof(char16_t
), 0)
755 .copy_and_verify(status_verifier
) == XML_STATUS_OK
) {
756 *aWriteCount
= aCount
;
760 return NS_ERROR_FAILURE
;
763 int nsExpatDriver::HandleExternalEntityRef(const char16_t
* openEntityNames
,
764 const char16_t
* base
,
765 const char16_t
* systemId
,
766 const char16_t
* publicId
) {
767 if (mInInternalSubset
&& !mInExternalDTD
&& openEntityNames
) {
768 mInternalSubset
.Append(char16_t('%'));
769 mInternalSubset
.Append(nsDependentString(openEntityNames
));
770 mInternalSubset
.Append(char16_t(';'));
773 nsCOMPtr
<nsIURI
> baseURI
= GetBaseURI(base
);
774 NS_ENSURE_TRUE(baseURI
, 1);
776 // Load the external entity into a buffer.
777 nsCOMPtr
<nsIInputStream
> in
;
778 nsCOMPtr
<nsIURI
> absURI
;
779 nsresult rv
= OpenInputStreamFromExternalDTD(
780 publicId
, systemId
, baseURI
, getter_AddRefs(in
), getter_AddRefs(absURI
));
783 nsCString
message("Failed to open external DTD: publicId \"");
784 AppendUTF16toUTF8(MakeStringSpan(publicId
), message
);
785 message
+= "\" systemId \"";
786 AppendUTF16toUTF8(MakeStringSpan(systemId
), message
);
787 message
+= "\" base \"";
788 message
.Append(baseURI
->GetSpecOrDefault());
789 message
+= "\" URL \"";
791 message
.Append(absURI
->GetSpecOrDefault());
794 NS_WARNING(message
.get());
799 nsCOMPtr
<nsIUnicharInputStream
> uniIn
;
800 rv
= NS_NewUnicharInputStream(in
, getter_AddRefs(uniIn
));
801 NS_ENSURE_SUCCESS(rv
, 1);
805 auto utf16
= TransferBuffer
<char16_t
>(
806 Sandbox(), kUTF16
, nsCharTraits
<char16_t
>::length(kUTF16
) + 1);
807 NS_ENSURE_TRUE(*utf16
, 1);
808 tainted_expat
<XML_Parser
> entParser
;
810 RLBOX_EXPAT_MCALL(MOZ_XML_ExternalEntityParserCreate
, nullptr, *utf16
);
812 auto baseURI
= GetExpatBaseURI(absURI
);
813 auto url
= TransferBuffer
<XML_Char
>(Sandbox(), &baseURI
[0],
814 ArrayLength(baseURI
));
815 NS_ENSURE_TRUE(*url
, 1);
816 Sandbox()->invoke_sandbox_function(MOZ_XML_SetBase
, entParser
, *url
);
818 mInExternalDTD
= true;
820 bool inParser
= mInParser
; // Save in-parser status
823 RLBoxExpatClosure
closure(SandboxData(), entParser
);
826 rv
= uniIn
->ReadSegments(ExternalDTDStreamReaderFunc
, &closure
,
827 uint32_t(-1), &totalRead
);
828 } while (NS_SUCCEEDED(rv
) && totalRead
> 0);
832 ->invoke_sandbox_function(MOZ_XML_Parse
, entParser
, nullptr, 0, 1)
833 .copy_and_verify(status_verifier
);
835 mInParser
= inParser
; // Restore in-parser status
836 mInExternalDTD
= false;
838 Sandbox()->invoke_sandbox_function(MOZ_XML_ParserFree
, entParser
);
845 nsresult
nsExpatDriver::OpenInputStreamFromExternalDTD(const char16_t
* aFPIStr
,
846 const char16_t
* aURLStr
,
848 nsIInputStream
** aStream
,
850 nsCOMPtr
<nsIURI
> uri
;
851 nsresult rv
= NS_NewURI(getter_AddRefs(uri
), NS_ConvertUTF16toUTF8(aURLStr
),
853 // Even if the URI is malformed (most likely because we have a
854 // non-hierarchical base URI and a relative DTD URI, with the latter
855 // being the normal XHTML DTD case), we can try to see whether we
856 // have catalog data for aFPIStr.
857 if (NS_WARN_IF(NS_FAILED(rv
) && rv
!= NS_ERROR_MALFORMED_URI
)) {
861 // make sure the URI, if we have one, is allowed to be loaded in sync
862 bool isUIResource
= false;
864 rv
= NS_URIChainHasFlags(uri
, nsIProtocolHandler::URI_IS_UI_RESOURCE
,
866 NS_ENSURE_SUCCESS(rv
, rv
);
869 nsCOMPtr
<nsIURI
> localURI
;
871 // Check to see if we can map the DTD to a known local DTD, or if a DTD
872 // file of the same name exists in the special DTD directory
874 // see if the Formal Public Identifier (FPI) maps to a catalog entry
875 mCatalogData
= LookupCatalogData(aFPIStr
);
876 GetLocalDTDURI(mCatalogData
, uri
, getter_AddRefs(localURI
));
879 return NS_ERROR_NOT_IMPLEMENTED
;
883 nsCOMPtr
<nsIChannel
> channel
;
886 rv
= NS_NewChannel(getter_AddRefs(channel
), uri
,
887 nsContentUtils::GetSystemPrincipal(),
888 nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL
,
889 nsIContentPolicy::TYPE_DTD
);
890 NS_ENSURE_SUCCESS(rv
, rv
);
893 mSink
== nsCOMPtr
<nsIExpatSink
>(do_QueryInterface(mOriginalSink
)),
894 "In nsExpatDriver::OpenInputStreamFromExternalDTD: "
895 "mOriginalSink not the same object as mSink?");
896 nsContentPolicyType policyType
= nsIContentPolicy::TYPE_INTERNAL_DTD
;
898 nsCOMPtr
<Document
> doc
;
899 doc
= do_QueryInterface(mOriginalSink
->GetTarget());
901 if (doc
->SkipDTDSecurityChecks()) {
902 policyType
= nsIContentPolicy::TYPE_INTERNAL_FORCE_ALLOWED_DTD
;
905 getter_AddRefs(channel
), uri
, doc
,
906 nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT
|
907 nsILoadInfo::SEC_ALLOW_CHROME
,
909 NS_ENSURE_SUCCESS(rv
, rv
);
913 nsCOMPtr
<nsIPrincipal
> nullPrincipal
=
914 mozilla::NullPrincipal::CreateWithoutOriginAttributes();
916 getter_AddRefs(channel
), uri
, nullPrincipal
,
917 nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT
|
918 nsILoadInfo::SEC_ALLOW_CHROME
,
920 NS_ENSURE_SUCCESS(rv
, rv
);
926 channel
->SetContentType("application/xml"_ns
);
927 return channel
->Open(aStream
);
930 static nsresult
CreateErrorText(const char16_t
* aDescription
,
931 const char16_t
* aSourceURL
,
932 const uint32_t aLineNumber
,
933 const uint32_t aColNumber
,
934 nsString
& aErrorString
, bool spoofEnglish
) {
935 aErrorString
.Truncate();
938 nsresult rv
= nsParserMsgUtils::GetLocalizedStringByName(
939 spoofEnglish
? XMLPARSER_PROPERTIES_en_US
: XMLPARSER_PROPERTIES
,
940 "XMLParsingError", msg
);
941 NS_ENSURE_SUCCESS(rv
, rv
);
943 // XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$u, Column %4$u:
944 nsTextFormatter::ssprintf(aErrorString
, msg
.get(), aDescription
, aSourceURL
,
945 aLineNumber
, aColNumber
);
949 static nsresult
AppendErrorPointer(const int32_t aColNumber
,
950 const char16_t
* aSourceLine
,
951 nsString
& aSourceString
) {
952 aSourceString
.Append(char16_t('\n'));
954 // Last character will be '^'.
955 int32_t last
= aColNumber
- 1;
957 uint32_t minuses
= 0;
958 for (i
= 0; i
< last
; ++i
) {
959 if (aSourceLine
[i
] == '\t') {
960 // Since this uses |white-space: pre;| a tab stop equals 8 spaces.
961 uint32_t add
= 8 - (minuses
% 8);
962 aSourceString
.AppendASCII("--------", add
);
965 aSourceString
.Append(char16_t('-'));
969 aSourceString
.Append(char16_t('^'));
974 nsresult
nsExpatDriver::HandleError() {
975 int32_t code
= RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetErrorCode
, error_verifier
);
977 // Map Expat error code to an error string
978 // XXX Deal with error returns.
979 nsAutoString description
;
980 nsCOMPtr
<Document
> doc
;
982 doc
= do_QueryInterface(mOriginalSink
->GetTarget());
986 nsContentUtils::SpoofLocaleEnglish() && (!doc
|| !doc
->AllowsL10n());
987 nsParserMsgUtils::GetLocalizedStringByID(
988 spoofEnglish
? XMLPARSER_PROPERTIES_en_US
: XMLPARSER_PROPERTIES
, code
,
991 if (code
== XML_ERROR_TAG_MISMATCH
) {
993 * Expat can send the following:
995 * namespaceURI<separator>localName
996 * namespaceURI<separator>localName<separator>prefix
998 * and we use 0xFFFF for the <separator>.
1002 const char16_t
* mismatch
=
1003 RLBOX_EXPAT_MCALL(MOZ_XML_GetMismatchedTag
)
1004 .copy_and_verify_address(unverified_xml_string
);
1005 const char16_t
* uriEnd
= nullptr;
1006 const char16_t
* nameEnd
= nullptr;
1007 const char16_t
* pos
;
1008 for (pos
= mismatch
; *pos
; ++pos
) {
1009 if (*pos
== kExpatSeparatorChar
) {
1018 nsAutoString tagName
;
1019 if (uriEnd
&& nameEnd
) {
1020 // We have a prefix.
1021 tagName
.Append(nameEnd
+ 1, pos
- nameEnd
- 1);
1022 tagName
.Append(char16_t(':'));
1024 const char16_t
* nameStart
= uriEnd
? uriEnd
+ 1 : mismatch
;
1025 tagName
.Append(nameStart
, (nameEnd
? nameEnd
: pos
) - nameStart
);
1028 nsParserMsgUtils::GetLocalizedStringByName(
1029 spoofEnglish
? XMLPARSER_PROPERTIES_en_US
: XMLPARSER_PROPERTIES
,
1032 // . Expected: </%S>.
1033 nsAutoString message
;
1034 nsTextFormatter::ssprintf(message
, msg
.get(), tagName
.get());
1035 description
.Append(message
);
1038 // Adjust the column number so that it is one based rather than zero based.
1039 uint32_t colNumber
= RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetCurrentColumnNumber
,
1040 safe_unverified
<XML_Size
>) +
1042 uint32_t lineNumber
= RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetCurrentLineNumber
,
1043 safe_unverified
<XML_Size
>);
1044 const XML_Char
* expatBase
=
1045 RLBOX_EXPAT_MCALL(MOZ_XML_GetBase
)
1046 .copy_and_verify_address(unverified_xml_string
);
1048 nsCOMPtr
<nsIURI
> baseURI
;
1049 if (expatBase
&& (baseURI
= GetBaseURI(expatBase
))) {
1050 // Let's ignore if this fails, we're already reporting a parse error.
1051 Unused
<< CopyUTF8toUTF16(baseURI
->GetSpecOrDefault(), uri
, fallible
);
1053 nsAutoString errorText
;
1054 CreateErrorText(description
.get(), uri
.get(), lineNumber
, colNumber
,
1055 errorText
, spoofEnglish
);
1057 nsAutoString
sourceText(mLastLine
);
1058 AppendErrorPointer(colNumber
, mLastLine
.get(), sourceText
);
1060 if (doc
&& nsContentUtils::IsChromeDoc(doc
)) {
1061 nsCString path
= doc
->GetDocumentURI()->GetSpecOrDefault();
1062 nsCOMPtr
<nsISupports
> container
= doc
->GetContainer();
1063 nsCOMPtr
<nsIDocShell
> docShell
= do_QueryInterface(container
);
1064 nsCString
docShellDestroyed("unknown"_ns
);
1066 bool destroyed
= false;
1067 docShell
->IsBeingDestroyed(&destroyed
);
1068 docShellDestroyed
.Assign(destroyed
? "true"_ns
: "false"_ns
);
1071 mozilla::Maybe
<nsTArray
<mozilla::Telemetry::EventExtraEntry
>> extra
=
1072 mozilla::Some
<nsTArray
<mozilla::Telemetry::EventExtraEntry
>>({
1073 mozilla::Telemetry::EventExtraEntry
{"error_code"_ns
,
1074 nsPrintfCString("%u", code
)},
1075 mozilla::Telemetry::EventExtraEntry
{
1076 "location"_ns
, nsPrintfCString("%u:%u", lineNumber
, colNumber
)},
1077 mozilla::Telemetry::EventExtraEntry
{
1078 "last_line"_ns
, NS_ConvertUTF16toUTF8(mLastLine
)},
1079 mozilla::Telemetry::EventExtraEntry
{
1080 "last_line_len"_ns
, nsPrintfCString("%zu", mLastLine
.Length())},
1081 mozilla::Telemetry::EventExtraEntry
{
1082 "hidden"_ns
, doc
->Hidden() ? "true"_ns
: "false"_ns
},
1083 mozilla::Telemetry::EventExtraEntry
{"destroyed"_ns
,
1087 mozilla::Telemetry::SetEventRecordingEnabled("ysod"_ns
, true);
1088 mozilla::Telemetry::RecordEvent(
1089 mozilla::Telemetry::EventID::Ysod_Shown_Ysod
, mozilla::Some(path
),
1093 // Try to create and initialize the script error.
1094 nsCOMPtr
<nsIScriptError
> serr(do_CreateInstance(NS_SCRIPTERROR_CONTRACTID
));
1095 nsresult rv
= NS_ERROR_FAILURE
;
1097 rv
= serr
->InitWithSourceURI(
1098 errorText
, mURIs
.SafeElementAt(0), mLastLine
, lineNumber
, colNumber
,
1099 nsIScriptError::errorFlag
, "malformed-xml", mInnerWindowID
);
1102 // If it didn't initialize, we can't do any logging.
1103 bool shouldReportError
= NS_SUCCEEDED(rv
);
1105 // mSink might be null here if our parser was terminated.
1106 if (mSink
&& shouldReportError
) {
1107 rv
= mSink
->ReportError(errorText
.get(), sourceText
.get(), serr
,
1108 &shouldReportError
);
1109 if (NS_FAILED(rv
)) {
1110 shouldReportError
= true;
1114 // mOriginalSink might be null here if our parser was terminated.
1115 if (mOriginalSink
) {
1116 nsCOMPtr
<Document
> doc
= do_QueryInterface(mOriginalSink
->GetTarget());
1117 if (doc
&& doc
->SuppressParserErrorConsoleMessages()) {
1118 shouldReportError
= false;
1122 if (shouldReportError
) {
1123 nsCOMPtr
<nsIConsoleService
> cs(do_GetService(NS_CONSOLESERVICE_CONTRACTID
));
1125 cs
->LogMessage(serr
);
1129 return NS_ERROR_HTMLPARSER_STOPPARSING
;
1132 // Because we need to allocate a buffer in the RLBOX sandbox, and copy the data
1133 // to it for Expat to parse, we are limited in size by the memory available in
1134 // the RLBOX sandbox. nsExpatDriver::ChunkAndParseBuffer divides the buffer into
1135 // chunks of sMaxChunkLength characters or less, and passes them to
1136 // nsExpatDriver::ParseBuffer. That should ensure that we almost never run out
1137 // of memory in the sandbox.
1138 void nsExpatDriver::ChunkAndParseBuffer(const char16_t
* aBuffer
,
1139 uint32_t aLength
, bool aIsFinal
,
1140 uint32_t* aPassedToExpat
,
1141 uint32_t* aConsumed
,
1142 XML_Size
* aLastLineLength
) {
1144 *aLastLineLength
= 0;
1146 uint32_t remainder
= aLength
;
1147 while (remainder
> sMaxChunkLength
) {
1148 ParseChunk(aBuffer
, sMaxChunkLength
, ChunkOrBufferIsFinal::None
, aConsumed
,
1150 aBuffer
+= sMaxChunkLength
;
1151 remainder
-= sMaxChunkLength
;
1152 if (NS_FAILED(mInternalState
)) {
1153 // Stop parsing if there's an error (including if we're blocked or
1155 *aPassedToExpat
= aLength
- remainder
;
1160 ParseChunk(aBuffer
, remainder
,
1161 aIsFinal
? ChunkOrBufferIsFinal::FinalChunkAndBuffer
1162 : ChunkOrBufferIsFinal::FinalChunk
,
1163 aConsumed
, aLastLineLength
);
1164 *aPassedToExpat
= aLength
;
1167 void nsExpatDriver::ParseChunk(const char16_t
* aBuffer
, uint32_t aLength
,
1168 ChunkOrBufferIsFinal aIsFinal
,
1169 uint32_t* aConsumed
, XML_Size
* aLastLineLength
) {
1170 NS_ASSERTION((aBuffer
&& aLength
!= 0) || (!aBuffer
&& aLength
== 0), "?");
1171 NS_ASSERTION(mInternalState
!= NS_OK
||
1172 (aIsFinal
== ChunkOrBufferIsFinal::FinalChunkAndBuffer
) ||
1174 "Useless call, we won't call Expat");
1175 MOZ_ASSERT(!BlockedOrInterrupted() || !aBuffer
,
1176 "Non-null buffer when resuming");
1177 MOZ_ASSERT(mExpatParser
);
1179 auto parserBytesBefore_verifier
= [&](auto parserBytesBefore
) {
1180 MOZ_RELEASE_ASSERT(parserBytesBefore
>= 0, "Unexpected value");
1181 MOZ_RELEASE_ASSERT(parserBytesBefore
% sizeof(char16_t
) == 0,
1182 "Consumed part of a char16_t?");
1183 return parserBytesBefore
;
1185 int32_t parserBytesBefore
= RLBOX_EXPAT_SAFE_MCALL(
1186 XML_GetCurrentByteIndex
, parserBytesBefore_verifier
);
1188 if (mInternalState
!= NS_OK
&& !BlockedOrInterrupted()) {
1193 bool inParser
= mInParser
; // Save in-parser status
1195 Maybe
<TransferBuffer
<char16_t
>> buffer
;
1196 if (BlockedOrInterrupted()) {
1197 mInternalState
= NS_OK
; // Resume in case we're blocked.
1198 status
= RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_ResumeParser
, status_verifier
);
1200 buffer
.emplace(Sandbox(), aBuffer
, aLength
);
1201 MOZ_RELEASE_ASSERT(!aBuffer
|| !!*buffer
.ref(),
1202 "Chunking should avoid OOM in ParseBuffer");
1204 status
= RLBOX_EXPAT_SAFE_MCALL(
1205 MOZ_XML_Parse
, status_verifier
,
1206 rlbox::sandbox_reinterpret_cast
<const char*>(*buffer
.ref()),
1207 aLength
* sizeof(char16_t
),
1208 aIsFinal
== ChunkOrBufferIsFinal::FinalChunkAndBuffer
);
1210 mInParser
= inParser
; // Restore in-parser status
1212 auto parserBytesConsumed_verifier
= [&](auto parserBytesConsumed
) {
1213 MOZ_RELEASE_ASSERT(parserBytesConsumed
>= 0, "Unexpected value");
1214 MOZ_RELEASE_ASSERT(parserBytesConsumed
>= parserBytesBefore
,
1215 "How'd this happen?");
1216 MOZ_RELEASE_ASSERT(parserBytesConsumed
% sizeof(char16_t
) == 0,
1217 "Consumed part of a char16_t?");
1218 return parserBytesConsumed
;
1220 int32_t parserBytesConsumed
= RLBOX_EXPAT_SAFE_MCALL(
1221 XML_GetCurrentByteIndex
, parserBytesConsumed_verifier
);
1223 // Consumed something.
1224 *aConsumed
+= (parserBytesConsumed
- parserBytesBefore
) / sizeof(char16_t
);
1226 NS_ASSERTION(status
!= XML_STATUS_SUSPENDED
|| BlockedOrInterrupted(),
1227 "Inconsistent expat suspension state.");
1229 if (status
== XML_STATUS_ERROR
) {
1230 mInternalState
= NS_ERROR_HTMLPARSER_STOPPARSING
;
1233 if (*aConsumed
> 0 &&
1234 (aIsFinal
!= ChunkOrBufferIsFinal::None
|| NS_FAILED(mInternalState
))) {
1235 *aLastLineLength
= RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetCurrentColumnNumber
,
1236 safe_unverified
<XML_Size
>);
1240 nsresult
nsExpatDriver::ResumeParse(nsScanner
& aScanner
, bool aIsFinalChunk
) {
1241 // We keep the scanner pointing to the position where Expat will start
1243 nsScannerIterator currentExpatPosition
;
1244 aScanner
.CurrentPosition(currentExpatPosition
);
1246 // This is the start of the first buffer that we need to pass to Expat.
1247 nsScannerIterator start
= currentExpatPosition
;
1248 start
.advance(mExpatBuffered
);
1250 // This is the end of the last buffer (at this point, more data could come in
1252 nsScannerIterator end
;
1253 aScanner
.EndReading(end
);
1255 MOZ_LOG(gExpatDriverLog
, LogLevel::Debug
,
1256 ("Remaining in expat's buffer: %i, remaining in scanner: %zu.",
1257 mExpatBuffered
, Distance(start
, end
)));
1259 // We want to call Expat if we have more buffers, or if we know there won't
1260 // be more buffers (and so we want to flush the remaining data), or if we're
1261 // currently blocked and there's data in Expat's buffer.
1262 while (start
!= end
|| (aIsFinalChunk
&& !mMadeFinalCallToExpat
) ||
1263 (BlockedOrInterrupted() && mExpatBuffered
> 0)) {
1264 bool noMoreBuffers
= start
== end
&& aIsFinalChunk
;
1265 bool blocked
= BlockedOrInterrupted();
1267 const char16_t
* buffer
;
1269 if (blocked
|| noMoreBuffers
) {
1270 // If we're blocked we just resume Expat so we don't need a buffer, if
1271 // there aren't any more buffers we pass a null buffer to Expat.
1277 gExpatDriverLog
, LogLevel::Debug
,
1278 ("Resuming Expat, will parse data remaining in Expat's "
1279 "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
1280 NS_ConvertUTF16toUTF8(currentExpatPosition
.get(), mExpatBuffered
)
1283 NS_ASSERTION(mExpatBuffered
== Distance(currentExpatPosition
, end
),
1284 "Didn't pass all the data to Expat?");
1286 gExpatDriverLog
, LogLevel::Debug
,
1287 ("Last call to Expat, will parse data remaining in Expat's "
1288 "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
1289 NS_ConvertUTF16toUTF8(currentExpatPosition
.get(), mExpatBuffered
)
1293 buffer
= start
.get();
1294 length
= uint32_t(start
.size_forward());
1296 MOZ_LOG(gExpatDriverLog
, LogLevel::Debug
,
1297 ("Calling Expat, will parse data remaining in Expat's buffer and "
1298 "new data.\nContent of Expat's buffer:\n-----\n%s\n-----\nNew "
1299 "data:\n-----\n%s\n-----\n",
1300 NS_ConvertUTF16toUTF8(currentExpatPosition
.get(), mExpatBuffered
)
1302 NS_ConvertUTF16toUTF8(start
.get(), length
).get()));
1305 uint32_t passedToExpat
;
1307 XML_Size lastLineLength
;
1308 ChunkAndParseBuffer(buffer
, length
, noMoreBuffers
, &passedToExpat
,
1309 &consumed
, &lastLineLength
);
1310 MOZ_ASSERT_IF(passedToExpat
!= length
, NS_FAILED(mInternalState
));
1311 MOZ_ASSERT(consumed
<= passedToExpat
+ mExpatBuffered
);
1313 nsScannerIterator oldExpatPosition
= currentExpatPosition
;
1314 currentExpatPosition
.advance(consumed
);
1316 // We consumed some data, we want to store the last line of data that
1317 // was consumed in case we run into an error (to show the line in which
1318 // the error occurred).
1320 if (lastLineLength
<= consumed
) {
1321 // The length of the last line was less than what expat consumed, so
1322 // there was at least one line break in the consumed data. Store the
1323 // last line until the point where we stopped parsing.
1324 nsScannerIterator startLastLine
= currentExpatPosition
;
1325 startLastLine
.advance(-((ptrdiff_t)lastLineLength
));
1326 if (!CopyUnicodeTo(startLastLine
, currentExpatPosition
, mLastLine
)) {
1327 return (mInternalState
= NS_ERROR_OUT_OF_MEMORY
);
1330 // There was no line break in the consumed data, append the consumed
1332 if (!AppendUnicodeTo(oldExpatPosition
, currentExpatPosition
,
1334 return (mInternalState
= NS_ERROR_OUT_OF_MEMORY
);
1339 mExpatBuffered
+= passedToExpat
- consumed
;
1341 if (BlockedOrInterrupted()) {
1342 MOZ_LOG(gExpatDriverLog
, LogLevel::Debug
,
1343 ("Blocked or interrupted parser (probably for loading linked "
1344 "stylesheets or scripts)."));
1346 aScanner
.SetPosition(currentExpatPosition
, true);
1349 return mInternalState
;
1352 if (noMoreBuffers
&& mExpatBuffered
== 0) {
1353 mMadeFinalCallToExpat
= true;
1356 if (NS_FAILED(mInternalState
)) {
1357 if (RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetErrorCode
, error_verifier
) !=
1359 NS_ASSERTION(mInternalState
== NS_ERROR_HTMLPARSER_STOPPARSING
,
1360 "Unexpected error");
1362 // Look for the next newline after the last one we consumed
1363 nsScannerIterator lastLine
= currentExpatPosition
;
1364 while (lastLine
!= end
) {
1365 length
= uint32_t(lastLine
.size_forward());
1366 uint32_t endOffset
= 0;
1367 const char16_t
* buffer
= lastLine
.get();
1368 while (endOffset
< length
&& buffer
[endOffset
] != '\n' &&
1369 buffer
[endOffset
] != '\r') {
1372 mLastLine
.Append(Substring(buffer
, buffer
+ endOffset
));
1373 if (endOffset
< length
) {
1374 // We found a newline.
1378 lastLine
.advance(length
);
1384 return mInternalState
;
1387 // Either we have more buffers, or we were blocked (and we'll flush in the
1388 // next iteration), or we should have emptied Expat's buffer.
1389 NS_ASSERTION(!noMoreBuffers
|| blocked
||
1390 (mExpatBuffered
== 0 && currentExpatPosition
== end
),
1391 "Unreachable data left in Expat's buffer");
1393 start
.advance(length
);
1395 // It's possible for start to have passed end if we received more data
1396 // (e.g. if we spun the event loop in an inline script). Reload end now
1398 aScanner
.EndReading(end
);
1401 aScanner
.SetPosition(currentExpatPosition
, true);
1404 MOZ_LOG(gExpatDriverLog
, LogLevel::Debug
,
1405 ("Remaining in expat's buffer: %i, remaining in scanner: %zu.",
1406 mExpatBuffered
, Distance(currentExpatPosition
, end
)));
1408 return NS_SUCCEEDED(mInternalState
) ? NS_ERROR_HTMLPARSER_EOF
: NS_OK
;
1411 mozilla::UniquePtr
<mozilla::RLBoxSandboxDataBase
>
1412 RLBoxExpatSandboxPool::CreateSandboxData(uint64_t aSize
) {
1413 // Create expat sandbox
1414 auto sandbox
= mozilla::MakeUnique
<rlbox_sandbox_expat
>();
1416 #ifdef MOZ_WASM_SANDBOXING_EXPAT
1417 bool create_ok
= sandbox
->create_sandbox(/* infallible = */ false, aSize
);
1419 bool create_ok
= sandbox
->create_sandbox();
1422 NS_ENSURE_TRUE(create_ok
, nullptr);
1424 mozilla::UniquePtr
<RLBoxExpatSandboxData
> sbxData
=
1425 mozilla::MakeUnique
<RLBoxExpatSandboxData
>(aSize
);
1427 // Register callbacks common to both system and non-system principals
1428 sbxData
->mHandleXMLDeclaration
=
1429 sandbox
->register_callback(Driver_HandleXMLDeclaration
);
1430 sbxData
->mHandleCharacterData
=
1431 sandbox
->register_callback(Driver_HandleCharacterData
);
1432 sbxData
->mHandleProcessingInstruction
=
1433 sandbox
->register_callback(Driver_HandleProcessingInstruction
);
1434 sbxData
->mHandleDefault
= sandbox
->register_callback(Driver_HandleDefault
);
1435 sbxData
->mHandleExternalEntityRef
=
1436 sandbox
->register_callback(Driver_HandleExternalEntityRef
);
1437 sbxData
->mHandleComment
= sandbox
->register_callback(Driver_HandleComment
);
1438 sbxData
->mHandleStartCdataSection
=
1439 sandbox
->register_callback(Driver_HandleStartCdataSection
);
1440 sbxData
->mHandleEndCdataSection
=
1441 sandbox
->register_callback(Driver_HandleEndCdataSection
);
1442 sbxData
->mHandleStartDoctypeDecl
=
1443 sandbox
->register_callback(Driver_HandleStartDoctypeDecl
);
1444 sbxData
->mHandleEndDoctypeDecl
=
1445 sandbox
->register_callback(Driver_HandleEndDoctypeDecl
);
1447 sbxData
->mSandbox
= std::move(sandbox
);
1452 mozilla::StaticRefPtr
<RLBoxExpatSandboxPool
> RLBoxExpatSandboxPool::sSingleton
;
1454 void RLBoxExpatSandboxPool::Initialize(size_t aDelaySeconds
) {
1455 mozilla::AssertIsOnMainThread();
1456 RLBoxExpatSandboxPool::sSingleton
= new RLBoxExpatSandboxPool(aDelaySeconds
);
1457 ClearOnShutdown(&RLBoxExpatSandboxPool::sSingleton
);
1460 void RLBoxExpatSandboxData::AttachDriver(bool aIsSystemPrincipal
,
1462 MOZ_ASSERT(!mSandbox
->sandbox_storage
);
1463 MOZ_ASSERT(mHandleStartElement
.is_unregistered());
1464 MOZ_ASSERT(mHandleEndElement
.is_unregistered());
1466 if (aIsSystemPrincipal
) {
1467 mHandleStartElement
= mSandbox
->register_callback(
1468 nsExpatDriver::HandleStartElementForSystemPrincipal
);
1469 mHandleEndElement
= mSandbox
->register_callback(
1470 nsExpatDriver::HandleEndElementForSystemPrincipal
);
1472 mHandleStartElement
=
1473 mSandbox
->register_callback(nsExpatDriver::HandleStartElement
);
1475 mSandbox
->register_callback(nsExpatDriver::HandleEndElement
);
1478 mSandbox
->sandbox_storage
= aDriver
;
1481 void RLBoxExpatSandboxData::DetachDriver() {
1482 mSandbox
->sandbox_storage
= nullptr;
1483 mHandleStartElement
.unregister();
1484 mHandleEndElement
.unregister();
1487 RLBoxExpatSandboxData::~RLBoxExpatSandboxData() {
1488 MOZ_ASSERT(mSandbox
);
1490 // DetachDriver should always be called before a sandbox goes back into the
1491 // pool, and thus before it's freed.
1492 MOZ_ASSERT(!mSandbox
->sandbox_storage
);
1493 MOZ_ASSERT(mHandleStartElement
.is_unregistered());
1494 MOZ_ASSERT(mHandleEndElement
.is_unregistered());
1496 // Unregister callbacks
1497 mHandleXMLDeclaration
.unregister();
1498 mHandleCharacterData
.unregister();
1499 mHandleProcessingInstruction
.unregister();
1500 mHandleDefault
.unregister();
1501 mHandleExternalEntityRef
.unregister();
1502 mHandleComment
.unregister();
1503 mHandleStartCdataSection
.unregister();
1504 mHandleEndCdataSection
.unregister();
1505 mHandleStartDoctypeDecl
.unregister();
1506 mHandleEndDoctypeDecl
.unregister();
1508 mSandbox
->destroy_sandbox();
1509 MOZ_COUNT_DTOR(RLBoxExpatSandboxData
);
1512 nsresult
nsExpatDriver::Initialize(nsIURI
* aURI
, nsIContentSink
* aSink
) {
1513 mSink
= do_QueryInterface(aSink
);
1515 NS_ERROR("nsExpatDriver didn't get an nsIExpatSink");
1516 // Make sure future calls to us bail out as needed
1517 mInternalState
= NS_ERROR_UNEXPECTED
;
1518 return mInternalState
;
1521 mOriginalSink
= aSink
;
1523 static const char16_t kExpatSeparator
[] = {kExpatSeparatorChar
, '\0'};
1525 // Get the doc if any
1526 nsCOMPtr
<Document
> doc
= do_QueryInterface(mOriginalSink
->GetTarget());
1528 nsCOMPtr
<nsPIDOMWindowOuter
> win
= doc
->GetWindow();
1529 nsCOMPtr
<nsPIDOMWindowInner
> inner
;
1531 inner
= win
->GetCurrentInnerWindow();
1533 bool aHasHadScriptHandlingObject
;
1534 nsIScriptGlobalObject
* global
=
1535 doc
->GetScriptHandlingObject(aHasHadScriptHandlingObject
);
1537 inner
= do_QueryInterface(global
);
1541 mInnerWindowID
= inner
->WindowID();
1547 // We have to make sure the sandbox is large enough. We unscientifically
1548 // request two MB. Note that the parsing itself is chunked so as not to
1549 // require a large sandbox.
1550 static const uint64_t minSandboxSize
= 2 * 1024 * 1024;
1551 MOZ_ASSERT(!mSandboxPoolData
);
1553 RLBoxExpatSandboxPool::sSingleton
->PopOrCreate(minSandboxSize
);
1554 NS_ENSURE_TRUE(mSandboxPoolData
, NS_ERROR_OUT_OF_MEMORY
);
1556 MOZ_ASSERT(SandboxData());
1558 SandboxData()->AttachDriver(doc
&& doc
->NodePrincipal()->IsSystemPrincipal(),
1559 static_cast<void*>(this));
1561 // Create expat parser.
1562 // We need to copy the encoding and namespace separator into the sandbox.
1563 // For the noop sandbox we pass in the memsuite; for the Wasm sandbox, we
1564 // pass in nullptr to let expat use the standard library memory suite.
1565 auto expatSeparator
= TransferBuffer
<char16_t
>(
1566 Sandbox(), kExpatSeparator
,
1567 nsCharTraits
<char16_t
>::length(kExpatSeparator
) + 1);
1568 MOZ_RELEASE_ASSERT(*expatSeparator
);
1569 auto utf16
= TransferBuffer
<char16_t
>(
1570 Sandbox(), kUTF16
, nsCharTraits
<char16_t
>::length(kUTF16
) + 1);
1571 MOZ_RELEASE_ASSERT(*utf16
);
1572 mExpatParser
= Sandbox()->invoke_sandbox_function(
1573 MOZ_XML_ParserCreate_MM
, *utf16
, nullptr, *expatSeparator
);
1574 NS_ENSURE_TRUE(mExpatParser
, NS_ERROR_FAILURE
);
1576 RLBOX_EXPAT_MCALL(MOZ_XML_SetReturnNSTriplet
, XML_TRUE
);
1579 RLBOX_EXPAT_MCALL(MOZ_XML_SetParamEntityParsing
,
1580 XML_PARAM_ENTITY_PARSING_ALWAYS
);
1583 auto baseURI
= GetExpatBaseURI(aURI
);
1585 TransferBuffer
<XML_Char
>(Sandbox(), &baseURI
[0], ArrayLength(baseURI
));
1586 RLBOX_EXPAT_MCALL(MOZ_XML_SetBase
, *uri
);
1588 // Set up the callbacks
1589 RLBOX_EXPAT_MCALL(MOZ_XML_SetXmlDeclHandler
,
1590 SandboxData()->mHandleXMLDeclaration
);
1591 RLBOX_EXPAT_MCALL(MOZ_XML_SetElementHandler
,
1592 SandboxData()->mHandleStartElement
,
1593 SandboxData()->mHandleEndElement
);
1594 RLBOX_EXPAT_MCALL(MOZ_XML_SetCharacterDataHandler
,
1595 SandboxData()->mHandleCharacterData
);
1596 RLBOX_EXPAT_MCALL(MOZ_XML_SetProcessingInstructionHandler
,
1597 SandboxData()->mHandleProcessingInstruction
);
1598 RLBOX_EXPAT_MCALL(MOZ_XML_SetDefaultHandlerExpand
,
1599 SandboxData()->mHandleDefault
);
1600 RLBOX_EXPAT_MCALL(MOZ_XML_SetExternalEntityRefHandler
,
1601 SandboxData()->mHandleExternalEntityRef
);
1602 RLBOX_EXPAT_MCALL(MOZ_XML_SetCommentHandler
, SandboxData()->mHandleComment
);
1603 RLBOX_EXPAT_MCALL(MOZ_XML_SetCdataSectionHandler
,
1604 SandboxData()->mHandleStartCdataSection
,
1605 SandboxData()->mHandleEndCdataSection
);
1607 RLBOX_EXPAT_MCALL(MOZ_XML_SetParamEntityParsing
,
1608 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
);
1609 RLBOX_EXPAT_MCALL(MOZ_XML_SetDoctypeDeclHandler
,
1610 SandboxData()->mHandleStartDoctypeDecl
,
1611 SandboxData()->mHandleEndDoctypeDecl
);
1613 return mInternalState
;
1617 nsExpatDriver::BuildModel(nsIContentSink
* aSink
) { return mInternalState
; }
1619 void nsExpatDriver::DidBuildModel() {
1621 // Because nsExpatDriver is cycle-collected, it gets destroyed
1622 // asynchronously. We want to eagerly release the sandbox back into the
1623 // pool so that it can be reused immediately, unless this is a reentrant
1624 // call (which we track with mInParser).
1627 mOriginalSink
= nullptr;
1631 NS_IMETHODIMP_(void)
1632 nsExpatDriver::Terminate() {
1633 // XXX - not sure what happens to the unparsed data.
1635 RLBOX_EXPAT_MCALL(MOZ_XML_StopParser
, XML_FALSE
);
1637 mInternalState
= NS_ERROR_HTMLPARSER_STOPPARSING
;
1640 /*************************** Unused methods **********************************/
1642 void nsExpatDriver::MaybeStopParser(nsresult aState
) {
1643 if (NS_FAILED(aState
)) {
1644 // If we had a failure we want to override NS_ERROR_HTMLPARSER_INTERRUPTED
1645 // and we want to override NS_ERROR_HTMLPARSER_BLOCK but not with
1646 // NS_ERROR_HTMLPARSER_INTERRUPTED.
1647 if (NS_SUCCEEDED(mInternalState
) ||
1648 mInternalState
== NS_ERROR_HTMLPARSER_INTERRUPTED
||
1649 (mInternalState
== NS_ERROR_HTMLPARSER_BLOCK
&&
1650 aState
!= NS_ERROR_HTMLPARSER_INTERRUPTED
)) {
1651 mInternalState
= (aState
== NS_ERROR_HTMLPARSER_INTERRUPTED
||
1652 aState
== NS_ERROR_HTMLPARSER_BLOCK
)
1654 : NS_ERROR_HTMLPARSER_STOPPARSING
;
1657 // If we get an error then we need to stop Expat (by calling XML_StopParser
1658 // with false as the last argument). If the parser should be blocked or
1659 // interrupted we need to pause Expat (by calling XML_StopParser with
1660 // true as the last argument).
1662 // Note that due to Bug 1742913, we need to explicitly cast the parameter to
1663 // an int so that the value is correctly zero extended.
1664 int resumable
= BlockedOrInterrupted();
1665 RLBOX_EXPAT_MCALL(MOZ_XML_StopParser
, resumable
);
1666 } else if (NS_SUCCEEDED(mInternalState
)) {
1667 // Only clobber mInternalState with the success code if we didn't block or
1668 // interrupt before.
1669 mInternalState
= aState
;
1673 nsExpatDriver::ExpatBaseURI
nsExpatDriver::GetExpatBaseURI(nsIURI
* aURI
) {
1674 mURIs
.AppendElement(aURI
);
1676 MOZ_RELEASE_ASSERT(mURIs
.Length() <= std::numeric_limits
<XML_Char
>::max());
1678 return ExpatBaseURI(static_cast<XML_Char
>(mURIs
.Length()), XML_T('\0'));
1681 nsIURI
* nsExpatDriver::GetBaseURI(const XML_Char
* aBase
) const {
1682 MOZ_ASSERT(aBase
[0] != '\0' && aBase
[1] == '\0');
1684 if (aBase
[0] == '\0' || aBase
[1] != '\0') {
1688 uint32_t index
= aBase
[0] - 1;
1689 MOZ_ASSERT(index
< mURIs
.Length());
1691 return mURIs
.SafeElementAt(index
);
1694 inline RLBoxExpatSandboxData
* nsExpatDriver::SandboxData() const {
1695 return reinterpret_cast<RLBoxExpatSandboxData
*>(
1696 mSandboxPoolData
->SandboxData());
1699 inline rlbox_sandbox_expat
* nsExpatDriver::Sandbox() const {
1700 return SandboxData()->Sandbox();