1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * nsIContentSerializer implementation that can be used with an
9 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
10 * string that could be parsed into more or less the original DOM.
13 #include "nsHTMLContentSerializer.h"
15 #include "nsIContent.h"
16 #include "mozilla/dom/Document.h"
17 #include "nsElementTable.h"
18 #include "nsNameSpaceManager.h"
20 #include "nsUnicharUtils.h"
21 #include "nsIDocumentEncoder.h"
22 #include "nsGkAtoms.h"
24 #include "nsNetUtil.h"
27 #include "nsContentUtils.h"
28 #include "nsIScriptElement.h"
29 #include "nsAttrName.h"
30 #include "mozilla/dom/Element.h"
31 #include "nsParserConstants.h"
33 using namespace mozilla::dom
;
35 nsresult
NS_NewHTMLContentSerializer(nsIContentSerializer
** aSerializer
) {
36 RefPtr
<nsHTMLContentSerializer
> it
= new nsHTMLContentSerializer();
37 it
.forget(aSerializer
);
41 nsHTMLContentSerializer::nsHTMLContentSerializer() { mIsHTMLSerializer
= true; }
43 nsHTMLContentSerializer::~nsHTMLContentSerializer() = default;
46 nsHTMLContentSerializer::AppendDocumentStart(Document
* aDocument
) {
50 bool nsHTMLContentSerializer::SerializeHTMLAttributes(
51 Element
* aElement
, Element
* aOriginalElement
, nsAString
& aTagPrefix
,
52 const nsAString
& aTagNamespaceURI
, nsAtom
* aTagName
, int32_t aNamespace
,
54 MaybeSerializeIsValue(aElement
, aStr
);
56 int32_t count
= aElement
->GetAttrCount();
57 if (!count
) return true;
60 nsAutoString valueStr
;
62 for (int32_t index
= 0; index
< count
; index
++) {
63 const nsAttrName
* name
= aElement
->GetAttrNameAt(index
);
64 int32_t namespaceID
= name
->NamespaceID();
65 nsAtom
* attrName
= name
->LocalName();
67 // Filter out any attribute starting with [-|_]moz
68 nsDependentAtomString
attrNameStr(attrName
);
69 if (StringBeginsWith(attrNameStr
, u
"_moz"_ns
) ||
70 StringBeginsWith(attrNameStr
, u
"-moz"_ns
)) {
73 aElement
->GetAttr(namespaceID
, attrName
, valueStr
);
75 if (mIsCopying
&& mIsFirstChildOfOL
&& aTagName
== nsGkAtoms::li
&&
76 aNamespace
== kNameSpaceID_XHTML
&& attrName
== nsGkAtoms::value
&&
77 namespaceID
== kNameSpaceID_None
) {
78 // This is handled separately in SerializeLIValueAttribute()
81 bool isJS
= IsJavaScript(aElement
, attrName
, namespaceID
, valueStr
);
83 if (((attrName
== nsGkAtoms::href
&& (namespaceID
== kNameSpaceID_None
||
84 namespaceID
== kNameSpaceID_XLink
)) ||
85 (attrName
== nsGkAtoms::src
&& namespaceID
== kNameSpaceID_None
))) {
86 // Make all links absolute when converting only the selection:
87 if (mFlags
& nsIDocumentEncoder::OutputAbsoluteLinks
) {
88 // Would be nice to handle OBJECT tags, but that gets more complicated
89 // since we have to search the tag list for CODEBASE as well. For now,
90 // just leave them relative.
91 nsIURI
* uri
= aElement
->GetBaseURI();
94 rv
= NS_MakeAbsoluteURI(absURI
, valueStr
, uri
);
95 if (NS_SUCCEEDED(rv
)) {
102 if (mRewriteEncodingDeclaration
&& aTagName
== nsGkAtoms::meta
&&
103 aNamespace
== kNameSpaceID_XHTML
&& attrName
== nsGkAtoms::content
&&
104 namespaceID
== kNameSpaceID_None
) {
105 // If we're serializing a <meta http-equiv="content-type">,
106 // use the proper value, rather than what's in the document.
108 aElement
->GetAttr(nsGkAtoms::httpEquiv
, header
);
109 if (header
.LowerCaseEqualsLiteral("content-type")) {
110 valueStr
= u
"text/html; charset="_ns
+ NS_ConvertASCIItoUTF16(mCharset
);
114 nsDependentAtomString
nameStr(attrName
);
116 if (namespaceID
== kNameSpaceID_XML
) {
117 prefix
.AssignLiteral(u
"xml");
118 } else if (namespaceID
== kNameSpaceID_XLink
) {
119 prefix
.AssignLiteral(u
"xlink");
122 // Expand shorthand attribute.
123 if (aNamespace
== kNameSpaceID_XHTML
&& namespaceID
== kNameSpaceID_None
&&
124 IsShorthandAttr(attrName
, aTagName
) && valueStr
.IsEmpty()) {
127 NS_ENSURE_TRUE(SerializeAttr(prefix
, nameStr
, valueStr
, aStr
, !isJS
),
135 nsHTMLContentSerializer::AppendElementStart(Element
* aElement
,
136 Element
* aOriginalElement
) {
137 NS_ENSURE_ARG(aElement
);
138 NS_ENSURE_STATE(mOutput
);
140 bool forceFormat
= false;
142 if (!CheckElementStart(aElement
, forceFormat
, *mOutput
, rv
)) {
143 // When we go to AppendElementEnd for this element, we're going to
144 // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent()
145 // now, so our PreLevel() doesn't get confused.
146 MaybeEnterInPreContent(aElement
);
150 NS_ENSURE_SUCCESS(rv
, rv
);
152 nsAtom
* name
= aElement
->NodeInfo()->NameAtom();
153 int32_t ns
= aElement
->GetNameSpaceID();
155 bool lineBreakBeforeOpen
= LineBreakBeforeOpen(ns
, name
);
157 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel()) {
158 if (mColPos
&& lineBreakBeforeOpen
) {
159 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
161 NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput
),
162 NS_ERROR_OUT_OF_MEMORY
);
165 NS_ENSURE_TRUE(AppendIndentation(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
166 } else if (mAddSpace
) {
167 bool result
= AppendToString(char16_t(' '), *mOutput
);
169 NS_ENSURE_TRUE(result
, NS_ERROR_OUT_OF_MEMORY
);
171 } else if (mAddSpace
) {
172 bool result
= AppendToString(char16_t(' '), *mOutput
);
174 NS_ENSURE_TRUE(result
, NS_ERROR_OUT_OF_MEMORY
);
176 NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput
),
177 NS_ERROR_OUT_OF_MEMORY
);
179 // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode
181 mAddNewlineForRootNode
= false;
183 NS_ENSURE_TRUE(AppendToString(kLessThan
, *mOutput
), NS_ERROR_OUT_OF_MEMORY
);
185 NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name
), *mOutput
),
186 NS_ERROR_OUT_OF_MEMORY
);
188 MaybeEnterInPreContent(aElement
);
190 // for block elements, we increase the indentation
191 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel())
192 NS_ENSURE_TRUE(IncrIndentation(name
), NS_ERROR_OUT_OF_MEMORY
);
194 // Need to keep track of OL and LI elements in order to get ordinal number
196 if (mIsCopying
&& name
== nsGkAtoms::ol
&& ns
== kNameSpaceID_XHTML
) {
197 // We are copying and current node is an OL;
198 // Store its start attribute value in olState->startVal.
200 int32_t startAttrVal
= 0;
202 aElement
->GetAttr(nsGkAtoms::start
, start
);
203 if (!start
.IsEmpty()) {
205 startAttrVal
= start
.ToInteger(&rv
);
206 // If OL has "start" attribute, first LI element has to start with that
207 // value Therefore subtracting 1 as all the LI elements are incrementing
208 // it before using it; In failure of ToInteger(), default StartAttrValue
210 if (NS_SUCCEEDED(rv
))
215 mOLStateStack
.AppendElement(olState(startAttrVal
, true));
218 if (mIsCopying
&& name
== nsGkAtoms::li
&& ns
== kNameSpaceID_XHTML
) {
219 mIsFirstChildOfOL
= IsFirstChildOfOL(aOriginalElement
);
220 if (mIsFirstChildOfOL
) {
221 // If OL is parent of this LI, serialize attributes in different manner.
222 NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement
, *mOutput
),
223 NS_ERROR_OUT_OF_MEMORY
);
227 // Even LI passed above have to go through this
228 // for serializing attributes other than "value".
229 nsAutoString dummyPrefix
;
231 SerializeHTMLAttributes(aElement
, aOriginalElement
, dummyPrefix
, u
""_ns
,
233 NS_ERROR_OUT_OF_MEMORY
);
235 NS_ENSURE_TRUE(AppendToString(kGreaterThan
, *mOutput
),
236 NS_ERROR_OUT_OF_MEMORY
);
238 if (ns
== kNameSpaceID_XHTML
&&
239 (name
== nsGkAtoms::script
|| name
== nsGkAtoms::style
||
240 (name
== nsGkAtoms::noscript
&&
241 aElement
->OwnerDoc()->IsScriptEnabled()) ||
242 name
== nsGkAtoms::noframes
)) {
243 ++mDisableEntityEncoding
;
246 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel() &&
247 LineBreakAfterOpen(ns
, name
)) {
248 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
251 NS_ENSURE_TRUE(AfterElementStart(aElement
, aOriginalElement
, *mOutput
),
252 NS_ERROR_OUT_OF_MEMORY
);
258 nsHTMLContentSerializer::AppendElementEnd(Element
* aElement
,
259 Element
* aOriginalElement
) {
260 NS_ENSURE_ARG(aElement
);
261 NS_ENSURE_STATE(mOutput
);
263 nsAtom
* name
= aElement
->NodeInfo()->NameAtom();
264 int32_t ns
= aElement
->GetNameSpaceID();
266 if (ns
== kNameSpaceID_XHTML
&&
267 (name
== nsGkAtoms::script
|| name
== nsGkAtoms::style
||
268 (name
== nsGkAtoms::noscript
&&
269 aElement
->OwnerDoc()->IsScriptEnabled()) ||
270 name
== nsGkAtoms::noframes
)) {
271 --mDisableEntityEncoding
;
274 bool forceFormat
= !(mFlags
& nsIDocumentEncoder::OutputIgnoreMozDirty
) &&
275 aElement
->HasAttr(nsGkAtoms::mozdirty
);
277 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel()) {
278 DecrIndentation(name
);
281 if (name
== nsGkAtoms::script
) {
282 nsCOMPtr
<nsIScriptElement
> script
= do_QueryInterface(aElement
);
284 if (ShouldMaintainPreLevel() && script
&& script
->IsMalformed()) {
285 // We're looking at a malformed script tag. This means that the end tag
286 // was missing in the source. Imitate that here by not serializing the end
291 } else if (mIsCopying
&& name
== nsGkAtoms::ol
&& ns
== kNameSpaceID_XHTML
) {
292 NS_ASSERTION((!mOLStateStack
.IsEmpty()), "Cannot have an empty OL Stack");
293 /* Though at this point we must always have an state to be deleted as all
294 the OL opening tags are supposed to push an olState object to the stack*/
295 if (!mOLStateStack
.IsEmpty()) {
296 mOLStateStack
.RemoveLastElement();
300 if (ns
== kNameSpaceID_XHTML
) {
302 nsHTMLElement::IsContainer(nsHTMLTags::CaseSensitiveAtomTagToId(name
));
304 // Keep this in sync with the cleanup at the end of this method.
305 MOZ_ASSERT(name
!= nsGkAtoms::body
);
306 MaybeLeaveFromPreContent(aElement
);
311 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel()) {
312 bool lineBreakBeforeClose
= LineBreakBeforeClose(ns
, name
);
314 if (mColPos
&& lineBreakBeforeClose
) {
315 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
318 NS_ENSURE_TRUE(AppendIndentation(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
319 } else if (mAddSpace
) {
320 bool result
= AppendToString(char16_t(' '), *mOutput
);
322 NS_ENSURE_TRUE(result
, NS_ERROR_OUT_OF_MEMORY
);
324 } else if (mAddSpace
) {
325 bool result
= AppendToString(char16_t(' '), *mOutput
);
327 NS_ENSURE_TRUE(result
, NS_ERROR_OUT_OF_MEMORY
);
330 NS_ENSURE_TRUE(AppendToString(kEndTag
, *mOutput
), NS_ERROR_OUT_OF_MEMORY
);
331 NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name
), *mOutput
),
332 NS_ERROR_OUT_OF_MEMORY
);
333 NS_ENSURE_TRUE(AppendToString(kGreaterThan
, *mOutput
),
334 NS_ERROR_OUT_OF_MEMORY
);
336 // Keep this cleanup in sync with the IsContainer() early return above.
337 MaybeLeaveFromPreContent(aElement
);
339 if ((mDoFormat
|| forceFormat
) && !mDoRaw
&& !PreLevel() &&
340 LineBreakAfterClose(ns
, name
)) {
341 NS_ENSURE_TRUE(AppendNewLineToString(*mOutput
), NS_ERROR_OUT_OF_MEMORY
);
343 MaybeFlagNewlineForRootNode(aElement
);
346 if (name
== nsGkAtoms::body
&& ns
== kNameSpaceID_XHTML
) {
353 static const uint16_t kValNBSP
= 160;
357 // This table indexes into kEntityStrings[].
358 const uint8_t nsHTMLContentSerializer::kEntities
[] = {
360 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
361 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
362 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
363 _
, _
, _
, _
, _
, _
, _
, _
, 2, _
,
364 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
365 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
366 3, _
, 4, _
, _
, _
, _
, _
, _
, _
,
367 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
368 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
369 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
370 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
371 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
372 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
373 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
374 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
375 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
380 // This table indexes into kEntityStrings[].
381 const uint8_t nsHTMLContentSerializer::kAttrEntities
[] = {
383 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
384 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
385 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
386 _
, _
, _
, _
, 1, _
, _
, _
, 2, _
,
387 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
388 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
389 3, _
, 4, _
, _
, _
, _
, _
, _
, _
,
390 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
391 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
392 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
393 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
394 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
395 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
396 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
397 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
398 _
, _
, _
, _
, _
, _
, _
, _
, _
, _
,
405 const char* const nsHTMLContentSerializer::kEntityStrings
[] = {
413 bool nsHTMLContentSerializer::AppendAndTranslateEntities(
414 const nsAString
& aStr
, nsAString
& aOutputStr
) {
415 if (mBodyOnly
&& !mInBody
) {
419 if (mDisableEntityEncoding
) {
420 return aOutputStr
.Append(aStr
, mozilla::fallible
);
423 if (mFlags
& (nsIDocumentEncoder::OutputEncodeBasicEntities
)) {
424 // Per the API documentation, encode , &, <, >, and "
426 return nsXMLContentSerializer::AppendAndTranslateEntities
<kValNBSP
>(
427 aStr
, aOutputStr
, kAttrEntities
, kEntityStrings
);
430 return nsXMLContentSerializer::AppendAndTranslateEntities
<kValNBSP
>(
431 aStr
, aOutputStr
, kEntities
, kEntityStrings
);
434 // We don't want to call into our superclass 2-arg version of
435 // AppendAndTranslateEntities, because it wants to encode more characters
436 // than we do. Use our tables, but avoid encoding by passing in a
437 // smaller max index. This will only encode &, <, >, and ".
439 return nsXMLContentSerializer::AppendAndTranslateEntities
<kGTVal
>(
440 aStr
, aOutputStr
, kAttrEntities
, kEntityStrings
);
443 return nsXMLContentSerializer::AppendAndTranslateEntities
<kGTVal
>(
444 aStr
, aOutputStr
, kEntities
, kEntityStrings
);