Bug 1842773 - Part 5: Add ArrayBuffer.prototype.{maxByteLength,resizable} getters...
[gecko.git] / dom / serializers / nsHTMLContentSerializer.cpp
bloba0b8c8882c6897370325284f0a3ebce6623d26ac
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * nsIContentSerializer implementation that can be used with an
9 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
10 * string that could be parsed into more or less the original DOM.
13 #include "nsHTMLContentSerializer.h"
15 #include "nsIContent.h"
16 #include "mozilla/dom/Document.h"
17 #include "nsElementTable.h"
18 #include "nsNameSpaceManager.h"
19 #include "nsString.h"
20 #include "nsUnicharUtils.h"
21 #include "nsIDocumentEncoder.h"
22 #include "nsGkAtoms.h"
23 #include "nsIURI.h"
24 #include "nsNetUtil.h"
25 #include "nsEscape.h"
26 #include "nsCRT.h"
27 #include "nsContentUtils.h"
28 #include "nsIScriptElement.h"
29 #include "nsAttrName.h"
30 #include "mozilla/dom/Element.h"
31 #include "nsParserConstants.h"
33 using namespace mozilla::dom;
35 nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer) {
36 RefPtr<nsHTMLContentSerializer> it = new nsHTMLContentSerializer();
37 it.forget(aSerializer);
38 return NS_OK;
41 nsHTMLContentSerializer::nsHTMLContentSerializer() { mIsHTMLSerializer = true; }
43 nsHTMLContentSerializer::~nsHTMLContentSerializer() = default;
45 NS_IMETHODIMP
46 nsHTMLContentSerializer::AppendDocumentStart(Document* aDocument) {
47 return NS_OK;
50 bool nsHTMLContentSerializer::SerializeHTMLAttributes(
51 Element* aElement, Element* aOriginalElement, nsAString& aTagPrefix,
52 const nsAString& aTagNamespaceURI, nsAtom* aTagName, int32_t aNamespace,
53 nsAString& aStr) {
54 MaybeSerializeIsValue(aElement, aStr);
56 int32_t count = aElement->GetAttrCount();
57 if (!count) return true;
59 nsresult rv;
60 nsAutoString valueStr;
62 for (int32_t index = 0; index < count; index++) {
63 const nsAttrName* name = aElement->GetAttrNameAt(index);
64 int32_t namespaceID = name->NamespaceID();
65 nsAtom* attrName = name->LocalName();
67 // Filter out any attribute starting with [-|_]moz
68 nsDependentAtomString attrNameStr(attrName);
69 if (StringBeginsWith(attrNameStr, u"_moz"_ns) ||
70 StringBeginsWith(attrNameStr, u"-moz"_ns)) {
71 continue;
73 aElement->GetAttr(namespaceID, attrName, valueStr);
75 if (mIsCopying && mIsFirstChildOfOL && aTagName == nsGkAtoms::li &&
76 aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::value &&
77 namespaceID == kNameSpaceID_None) {
78 // This is handled separately in SerializeLIValueAttribute()
79 continue;
81 bool isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
83 if (((attrName == nsGkAtoms::href && (namespaceID == kNameSpaceID_None ||
84 namespaceID == kNameSpaceID_XLink)) ||
85 (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
86 // Make all links absolute when converting only the selection:
87 if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
88 // Would be nice to handle OBJECT tags, but that gets more complicated
89 // since we have to search the tag list for CODEBASE as well. For now,
90 // just leave them relative.
91 nsIURI* uri = aElement->GetBaseURI();
92 if (uri) {
93 nsAutoString absURI;
94 rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
95 if (NS_SUCCEEDED(rv)) {
96 valueStr = absURI;
102 if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
103 aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content &&
104 namespaceID == kNameSpaceID_None) {
105 // If we're serializing a <meta http-equiv="content-type">,
106 // use the proper value, rather than what's in the document.
107 nsAutoString header;
108 aElement->GetAttr(nsGkAtoms::httpEquiv, header);
109 if (header.LowerCaseEqualsLiteral("content-type")) {
110 valueStr = u"text/html; charset="_ns + NS_ConvertASCIItoUTF16(mCharset);
114 nsDependentAtomString nameStr(attrName);
115 nsAutoString prefix;
116 if (namespaceID == kNameSpaceID_XML) {
117 prefix.AssignLiteral(u"xml");
118 } else if (namespaceID == kNameSpaceID_XLink) {
119 prefix.AssignLiteral(u"xlink");
122 // Expand shorthand attribute.
123 if (aNamespace == kNameSpaceID_XHTML && namespaceID == kNameSpaceID_None &&
124 IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
125 valueStr = nameStr;
127 NS_ENSURE_TRUE(SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS),
128 false);
131 return true;
134 NS_IMETHODIMP
135 nsHTMLContentSerializer::AppendElementStart(Element* aElement,
136 Element* aOriginalElement) {
137 NS_ENSURE_ARG(aElement);
138 NS_ENSURE_STATE(mOutput);
140 bool forceFormat = false;
141 nsresult rv = NS_OK;
142 if (!CheckElementStart(aElement, forceFormat, *mOutput, rv)) {
143 // When we go to AppendElementEnd for this element, we're going to
144 // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent()
145 // now, so our PreLevel() doesn't get confused.
146 MaybeEnterInPreContent(aElement);
147 return rv;
150 NS_ENSURE_SUCCESS(rv, rv);
152 nsAtom* name = aElement->NodeInfo()->NameAtom();
153 int32_t ns = aElement->GetNameSpaceID();
155 bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
157 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
158 if (mColPos && lineBreakBeforeOpen) {
159 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
160 } else {
161 NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
162 NS_ERROR_OUT_OF_MEMORY);
164 if (!mColPos) {
165 NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
166 } else if (mAddSpace) {
167 bool result = AppendToString(char16_t(' '), *mOutput);
168 mAddSpace = false;
169 NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
171 } else if (mAddSpace) {
172 bool result = AppendToString(char16_t(' '), *mOutput);
173 mAddSpace = false;
174 NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
175 } else {
176 NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
177 NS_ERROR_OUT_OF_MEMORY);
179 // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode
180 // wasn't called
181 mAddNewlineForRootNode = false;
183 NS_ENSURE_TRUE(AppendToString(kLessThan, *mOutput), NS_ERROR_OUT_OF_MEMORY);
185 NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput),
186 NS_ERROR_OUT_OF_MEMORY);
188 MaybeEnterInPreContent(aElement);
190 // for block elements, we increase the indentation
191 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel())
192 NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY);
194 // Need to keep track of OL and LI elements in order to get ordinal number
195 // for the LI.
196 if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
197 // We are copying and current node is an OL;
198 // Store its start attribute value in olState->startVal.
199 nsAutoString start;
200 int32_t startAttrVal = 0;
202 aElement->GetAttr(nsGkAtoms::start, start);
203 if (!start.IsEmpty()) {
204 nsresult rv = NS_OK;
205 startAttrVal = start.ToInteger(&rv);
206 // If OL has "start" attribute, first LI element has to start with that
207 // value Therefore subtracting 1 as all the LI elements are incrementing
208 // it before using it; In failure of ToInteger(), default StartAttrValue
209 // to 0.
210 if (NS_SUCCEEDED(rv))
211 startAttrVal--;
212 else
213 startAttrVal = 0;
215 mOLStateStack.AppendElement(olState(startAttrVal, true));
218 if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
219 mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
220 if (mIsFirstChildOfOL) {
221 // If OL is parent of this LI, serialize attributes in different manner.
222 NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, *mOutput),
223 NS_ERROR_OUT_OF_MEMORY);
227 // Even LI passed above have to go through this
228 // for serializing attributes other than "value".
229 nsAutoString dummyPrefix;
230 NS_ENSURE_TRUE(
231 SerializeHTMLAttributes(aElement, aOriginalElement, dummyPrefix, u""_ns,
232 name, ns, *mOutput),
233 NS_ERROR_OUT_OF_MEMORY);
235 NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
236 NS_ERROR_OUT_OF_MEMORY);
238 if (ns == kNameSpaceID_XHTML &&
239 (name == nsGkAtoms::script || name == nsGkAtoms::style ||
240 (name == nsGkAtoms::noscript &&
241 aElement->OwnerDoc()->IsScriptEnabled()) ||
242 name == nsGkAtoms::noframes)) {
243 ++mDisableEntityEncoding;
246 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
247 LineBreakAfterOpen(ns, name)) {
248 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
251 NS_ENSURE_TRUE(AfterElementStart(aElement, aOriginalElement, *mOutput),
252 NS_ERROR_OUT_OF_MEMORY);
254 return NS_OK;
257 NS_IMETHODIMP
258 nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
259 Element* aOriginalElement) {
260 NS_ENSURE_ARG(aElement);
261 NS_ENSURE_STATE(mOutput);
263 nsAtom* name = aElement->NodeInfo()->NameAtom();
264 int32_t ns = aElement->GetNameSpaceID();
266 if (ns == kNameSpaceID_XHTML &&
267 (name == nsGkAtoms::script || name == nsGkAtoms::style ||
268 (name == nsGkAtoms::noscript &&
269 aElement->OwnerDoc()->IsScriptEnabled()) ||
270 name == nsGkAtoms::noframes)) {
271 --mDisableEntityEncoding;
274 bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
275 aElement->HasAttr(nsGkAtoms::mozdirty);
277 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
278 DecrIndentation(name);
281 if (name == nsGkAtoms::script) {
282 nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
284 if (ShouldMaintainPreLevel() && script && script->IsMalformed()) {
285 // We're looking at a malformed script tag. This means that the end tag
286 // was missing in the source. Imitate that here by not serializing the end
287 // tag.
288 --PreLevel();
289 return NS_OK;
291 } else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
292 NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
293 /* Though at this point we must always have an state to be deleted as all
294 the OL opening tags are supposed to push an olState object to the stack*/
295 if (!mOLStateStack.IsEmpty()) {
296 mOLStateStack.RemoveLastElement();
300 if (ns == kNameSpaceID_XHTML) {
301 bool isContainer =
302 nsHTMLElement::IsContainer(nsHTMLTags::CaseSensitiveAtomTagToId(name));
303 if (!isContainer) {
304 // Keep this in sync with the cleanup at the end of this method.
305 MOZ_ASSERT(name != nsGkAtoms::body);
306 MaybeLeaveFromPreContent(aElement);
307 return NS_OK;
311 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
312 bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
314 if (mColPos && lineBreakBeforeClose) {
315 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
317 if (!mColPos) {
318 NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
319 } else if (mAddSpace) {
320 bool result = AppendToString(char16_t(' '), *mOutput);
321 mAddSpace = false;
322 NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
324 } else if (mAddSpace) {
325 bool result = AppendToString(char16_t(' '), *mOutput);
326 mAddSpace = false;
327 NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
330 NS_ENSURE_TRUE(AppendToString(kEndTag, *mOutput), NS_ERROR_OUT_OF_MEMORY);
331 NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput),
332 NS_ERROR_OUT_OF_MEMORY);
333 NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
334 NS_ERROR_OUT_OF_MEMORY);
336 // Keep this cleanup in sync with the IsContainer() early return above.
337 MaybeLeaveFromPreContent(aElement);
339 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
340 LineBreakAfterClose(ns, name)) {
341 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
342 } else {
343 MaybeFlagNewlineForRootNode(aElement);
346 if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
347 --mInBody;
350 return NS_OK;
353 static const uint16_t kValNBSP = 160;
355 #define _ 0
357 // This table indexes into kEntityStrings[].
358 const uint8_t nsHTMLContentSerializer::kEntities[] = {
359 // clang-format off
360 _, _, _, _, _, _, _, _, _, _,
361 _, _, _, _, _, _, _, _, _, _,
362 _, _, _, _, _, _, _, _, _, _,
363 _, _, _, _, _, _, _, _, 2, _,
364 _, _, _, _, _, _, _, _, _, _,
365 _, _, _, _, _, _, _, _, _, _,
366 3, _, 4, _, _, _, _, _, _, _,
367 _, _, _, _, _, _, _, _, _, _,
368 _, _, _, _, _, _, _, _, _, _,
369 _, _, _, _, _, _, _, _, _, _,
370 _, _, _, _, _, _, _, _, _, _,
371 _, _, _, _, _, _, _, _, _, _,
372 _, _, _, _, _, _, _, _, _, _,
373 _, _, _, _, _, _, _, _, _, _,
374 _, _, _, _, _, _, _, _, _, _,
375 _, _, _, _, _, _, _, _, _, _,
377 // clang-format on
380 // This table indexes into kEntityStrings[].
381 const uint8_t nsHTMLContentSerializer::kAttrEntities[] = {
382 // clang-format off
383 _, _, _, _, _, _, _, _, _, _,
384 _, _, _, _, _, _, _, _, _, _,
385 _, _, _, _, _, _, _, _, _, _,
386 _, _, _, _, 1, _, _, _, 2, _,
387 _, _, _, _, _, _, _, _, _, _,
388 _, _, _, _, _, _, _, _, _, _,
389 3, _, 4, _, _, _, _, _, _, _,
390 _, _, _, _, _, _, _, _, _, _,
391 _, _, _, _, _, _, _, _, _, _,
392 _, _, _, _, _, _, _, _, _, _,
393 _, _, _, _, _, _, _, _, _, _,
394 _, _, _, _, _, _, _, _, _, _,
395 _, _, _, _, _, _, _, _, _, _,
396 _, _, _, _, _, _, _, _, _, _,
397 _, _, _, _, _, _, _, _, _, _,
398 _, _, _, _, _, _, _, _, _, _,
400 // clang-format on
403 #undef _
405 const char* const nsHTMLContentSerializer::kEntityStrings[] = {
406 /* 0 */ nullptr,
407 /* 1 */ "&quot;",
408 /* 2 */ "&amp;",
409 /* 3 */ "&lt;",
410 /* 4 */ "&gt;",
411 /* 5 */ "&nbsp;"};
413 bool nsHTMLContentSerializer::AppendAndTranslateEntities(
414 const nsAString& aStr, nsAString& aOutputStr) {
415 if (mBodyOnly && !mInBody) {
416 return true;
419 if (mDisableEntityEncoding) {
420 return aOutputStr.Append(aStr, mozilla::fallible);
423 if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities)) {
424 // Per the API documentation, encode &nbsp;, &amp;, &lt;, &gt;, and &quot;
425 if (mInAttribute) {
426 return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>(
427 aStr, aOutputStr, kAttrEntities, kEntityStrings);
430 return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>(
431 aStr, aOutputStr, kEntities, kEntityStrings);
434 // We don't want to call into our superclass 2-arg version of
435 // AppendAndTranslateEntities, because it wants to encode more characters
436 // than we do. Use our tables, but avoid encoding &nbsp; by passing in a
437 // smaller max index. This will only encode &amp;, &lt;, &gt;, and &quot;.
438 if (mInAttribute) {
439 return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>(
440 aStr, aOutputStr, kAttrEntities, kEntityStrings);
443 return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>(
444 aStr, aOutputStr, kEntities, kEntityStrings);