Bumping gaia.json for 8 gaia revision(s) a=gaia-bump
[gecko.git] / dom / base / nsHTMLContentSerializer.cpp
blob6c14f5147f69632d89c98f0882c47d532c3119f6
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * nsIContentSerializer implementation that can be used with an
9 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
10 * string that could be parsed into more or less the original DOM.
13 #include "nsHTMLContentSerializer.h"
15 #include "nsIDOMElement.h"
16 #include "nsIContent.h"
17 #include "nsIDocument.h"
18 #include "nsNameSpaceManager.h"
19 #include "nsString.h"
20 #include "nsUnicharUtils.h"
21 #include "nsXPIDLString.h"
22 #include "nsIServiceManager.h"
23 #include "nsIDocumentEncoder.h"
24 #include "nsGkAtoms.h"
25 #include "nsIURI.h"
26 #include "nsNetUtil.h"
27 #include "nsEscape.h"
28 #include "nsITextToSubURI.h"
29 #include "nsCRT.h"
30 #include "nsIParserService.h"
31 #include "nsContentUtils.h"
32 #include "nsLWBrkCIID.h"
33 #include "nsIScriptElement.h"
34 #include "nsAttrName.h"
35 #include "nsIDocShell.h"
36 #include "nsIEditor.h"
37 #include "nsIHTMLEditor.h"
38 #include "mozilla/dom/Element.h"
39 #include "nsParserConstants.h"
41 using namespace mozilla::dom;
43 nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
45 nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
46 if (!it) {
47 return NS_ERROR_OUT_OF_MEMORY;
50 return CallQueryInterface(it, aSerializer);
53 nsHTMLContentSerializer::nsHTMLContentSerializer()
55 mIsHTMLSerializer = true;
58 nsHTMLContentSerializer::~nsHTMLContentSerializer()
63 NS_IMETHODIMP
64 nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
65 nsAString& aStr)
67 return NS_OK;
70 void
71 nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
72 nsIContent *aOriginalElement,
73 nsAString& aTagPrefix,
74 const nsAString& aTagNamespaceURI,
75 nsIAtom* aTagName,
76 int32_t aNamespace,
77 nsAString& aStr)
79 int32_t count = aContent->GetAttrCount();
80 if (!count)
81 return;
83 nsresult rv;
84 nsAutoString valueStr;
85 NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
87 for (int32_t index = count; index > 0;) {
88 --index;
89 const nsAttrName* name = aContent->GetAttrNameAt(index);
90 int32_t namespaceID = name->NamespaceID();
91 nsIAtom* attrName = name->LocalName();
93 // Filter out any attribute starting with [-|_]moz
94 nsDependentAtomString attrNameStr(attrName);
95 if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
96 StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
97 continue;
99 aContent->GetAttr(namespaceID, attrName, valueStr);
102 // Filter out special case of <br type="_moz"> or <br _moz*>,
103 // used by the editor. Bug 16988. Yuck.
105 if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
106 attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
107 StringBeginsWith(valueStr, _mozStr)) {
108 continue;
111 if (mIsCopying && mIsFirstChildOfOL &&
112 aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
113 attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
114 // This is handled separately in SerializeLIValueAttribute()
115 continue;
117 bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
119 if (((attrName == nsGkAtoms::href &&
120 (namespaceID == kNameSpaceID_None ||
121 namespaceID == kNameSpaceID_XLink)) ||
122 (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
123 // Make all links absolute when converting only the selection:
124 if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
125 // Would be nice to handle OBJECT and APPLET tags,
126 // but that gets more complicated since we have to
127 // search the tag list for CODEBASE as well.
128 // For now, just leave them relative.
129 nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
130 if (uri) {
131 nsAutoString absURI;
132 rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
133 if (NS_SUCCEEDED(rv)) {
134 valueStr = absURI;
138 // Need to escape URI.
139 nsAutoString tempURI(valueStr);
140 if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
141 valueStr = tempURI;
144 if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
145 aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
146 && namespaceID == kNameSpaceID_None) {
147 // If we're serializing a <meta http-equiv="content-type">,
148 // use the proper value, rather than what's in the document.
149 nsAutoString header;
150 aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
151 if (header.LowerCaseEqualsLiteral("content-type")) {
152 valueStr = NS_LITERAL_STRING("text/html; charset=") +
153 NS_ConvertASCIItoUTF16(mCharset);
157 nsDependentAtomString nameStr(attrName);
158 nsAutoString prefix;
159 if (namespaceID == kNameSpaceID_XML) {
160 prefix.AssignLiteral(MOZ_UTF16("xml"));
161 } else if (namespaceID == kNameSpaceID_XLink) {
162 prefix.AssignLiteral(MOZ_UTF16("xlink"));
165 // Expand shorthand attribute.
166 if (aNamespace == kNameSpaceID_XHTML &&
167 namespaceID == kNameSpaceID_None &&
168 IsShorthandAttr(attrName, aTagName) &&
169 valueStr.IsEmpty()) {
170 valueStr = nameStr;
172 SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS);
176 NS_IMETHODIMP
177 nsHTMLContentSerializer::AppendElementStart(Element* aElement,
178 Element* aOriginalElement,
179 nsAString& aStr)
181 NS_ENSURE_ARG(aElement);
183 nsIContent* content = aElement;
185 bool forceFormat = false;
186 if (!CheckElementStart(content, forceFormat, aStr)) {
187 return NS_OK;
190 nsIAtom *name = content->Tag();
191 int32_t ns = content->GetNameSpaceID();
193 bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
195 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
196 if (mColPos && lineBreakBeforeOpen) {
197 AppendNewLineToString(aStr);
199 else {
200 MaybeAddNewlineForRootNode(aStr);
202 if (!mColPos) {
203 AppendIndentation(aStr);
205 else if (mAddSpace) {
206 AppendToString(char16_t(' '), aStr);
207 mAddSpace = false;
210 else if (mAddSpace) {
211 AppendToString(char16_t(' '), aStr);
212 mAddSpace = false;
214 else {
215 MaybeAddNewlineForRootNode(aStr);
217 // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
218 // called
219 mAddNewlineForRootNode = false;
221 AppendToString(kLessThan, aStr);
223 AppendToString(nsDependentAtomString(name), aStr);
225 MaybeEnterInPreContent(content);
227 // for block elements, we increase the indentation
228 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel())
229 IncrIndentation(name);
231 // Need to keep track of OL and LI elements in order to get ordinal number
232 // for the LI.
233 if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
234 // We are copying and current node is an OL;
235 // Store its start attribute value in olState->startVal.
236 nsAutoString start;
237 int32_t startAttrVal = 0;
239 aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
240 if (!start.IsEmpty()){
241 nsresult rv = NS_OK;
242 startAttrVal = start.ToInteger(&rv);
243 //If OL has "start" attribute, first LI element has to start with that value
244 //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
245 //In failure of ToInteger(), default StartAttrValue to 0.
246 if (NS_SUCCEEDED(rv))
247 startAttrVal--;
248 else
249 startAttrVal = 0;
251 mOLStateStack.AppendElement(olState(startAttrVal, true));
254 if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
255 mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
256 if (mIsFirstChildOfOL){
257 // If OL is parent of this LI, serialize attributes in different manner.
258 SerializeLIValueAttribute(aElement, aStr);
262 // Even LI passed above have to go through this
263 // for serializing attributes other than "value".
264 nsAutoString dummyPrefix;
265 SerializeHTMLAttributes(content,
266 aOriginalElement,
267 dummyPrefix,
268 EmptyString(),
269 name,
271 aStr);
273 AppendToString(kGreaterThan, aStr);
275 if (ns == kNameSpaceID_XHTML &&
276 (name == nsGkAtoms::script ||
277 name == nsGkAtoms::style ||
278 name == nsGkAtoms::noscript ||
279 name == nsGkAtoms::noframes)) {
280 ++mDisableEntityEncoding;
283 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
284 LineBreakAfterOpen(ns, name)) {
285 AppendNewLineToString(aStr);
288 AfterElementStart(content, aOriginalElement, aStr);
290 return NS_OK;
293 NS_IMETHODIMP
294 nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
295 nsAString& aStr)
297 NS_ENSURE_ARG(aElement);
299 nsIContent* content = aElement;
301 nsIAtom *name = content->Tag();
302 int32_t ns = content->GetNameSpaceID();
304 if (ns == kNameSpaceID_XHTML &&
305 (name == nsGkAtoms::script ||
306 name == nsGkAtoms::style ||
307 name == nsGkAtoms::noscript ||
308 name == nsGkAtoms::noframes)) {
309 --mDisableEntityEncoding;
312 bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
313 content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
315 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
316 DecrIndentation(name);
319 if (name == nsGkAtoms::script) {
320 nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
322 if (ShouldMaintainPreLevel() && script && script->IsMalformed()) {
323 // We're looking at a malformed script tag. This means that the end tag
324 // was missing in the source. Imitate that here by not serializing the end
325 // tag.
326 --PreLevel();
327 return NS_OK;
330 else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
331 NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
332 /* Though at this point we must always have an state to be deleted as all
333 the OL opening tags are supposed to push an olState object to the stack*/
334 if (!mOLStateStack.IsEmpty()) {
335 mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
339 if (ns == kNameSpaceID_XHTML) {
340 nsIParserService* parserService = nsContentUtils::GetParserService();
342 if (parserService) {
343 bool isContainer;
345 parserService->
346 IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
347 isContainer);
348 if (!isContainer) {
349 return NS_OK;
354 if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
356 bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
358 if (mColPos && lineBreakBeforeClose) {
359 AppendNewLineToString(aStr);
361 if (!mColPos) {
362 AppendIndentation(aStr);
364 else if (mAddSpace) {
365 AppendToString(char16_t(' '), aStr);
366 mAddSpace = false;
369 else if (mAddSpace) {
370 AppendToString(char16_t(' '), aStr);
371 mAddSpace = false;
374 AppendToString(kEndTag, aStr);
375 AppendToString(nsDependentAtomString(name), aStr);
376 AppendToString(kGreaterThan, aStr);
378 MaybeLeaveFromPreContent(content);
380 if ((mDoFormat || forceFormat)&& !mDoRaw && !PreLevel()
381 && LineBreakAfterClose(ns, name)) {
382 AppendNewLineToString(aStr);
384 else {
385 MaybeFlagNewlineForRootNode(aElement);
388 if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
389 --mInBody;
392 return NS_OK;
395 static const uint16_t kValNBSP = 160;
396 static const char* kEntities[] = {
397 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
398 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
399 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
400 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&amp;", nullptr,
401 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
402 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
403 "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
404 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
405 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
406 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
407 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
408 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
409 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
410 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
411 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
412 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
413 "&nbsp;"
416 static const char* kAttrEntities[] = {
417 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
418 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
419 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
420 nullptr, nullptr, nullptr, nullptr, "&quot;", nullptr, nullptr, nullptr, "&amp;", nullptr,
421 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
422 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
423 "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
424 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
425 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
426 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
427 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
428 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
429 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
430 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
431 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
432 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
433 "&nbsp;"
436 uint32_t FindNextBasicEntity(const nsAString& aStr,
437 const uint32_t aLen,
438 uint32_t aIndex,
439 const char** aEntityTable,
440 const char** aEntity)
442 for (; aIndex < aLen; ++aIndex) {
443 // for each character in this chunk, check if it
444 // needs to be replaced
445 char16_t val = aStr[aIndex];
446 if (val <= kValNBSP && aEntityTable[val]) {
447 *aEntity = aEntityTable[val];
448 return aIndex;
451 return aIndex;
454 void
455 nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
456 nsAString& aOutputStr)
458 if (mBodyOnly && !mInBody) {
459 return;
462 if (mDisableEntityEncoding) {
463 aOutputStr.Append(aStr);
464 return;
467 bool nonBasicEntities =
468 !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
469 nsIDocumentEncoder::OutputEncodeHTMLEntities |
470 nsIDocumentEncoder::OutputEncodeW3CEntities));
472 if (!nonBasicEntities &&
473 (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
474 const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
475 uint32_t start = 0;
476 const uint32_t len = aStr.Length();
477 for (uint32_t i = 0; i < len; ++i) {
478 const char* entity = nullptr;
479 i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
480 uint32_t normalTextLen = i - start;
481 if (normalTextLen) {
482 aOutputStr.Append(Substring(aStr, start, normalTextLen));
484 if (entity) {
485 aOutputStr.AppendASCII(entity);
486 start = i + 1;
489 return;
490 } else if (nonBasicEntities) {
491 nsIParserService* parserService = nsContentUtils::GetParserService();
493 if (!parserService) {
494 NS_ERROR("Can't get parser service");
495 return;
498 nsReadingIterator<char16_t> done_reading;
499 aStr.EndReading(done_reading);
501 // for each chunk of |aString|...
502 uint32_t advanceLength = 0;
503 nsReadingIterator<char16_t> iter;
505 const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
506 nsAutoCString entityReplacement;
508 for (aStr.BeginReading(iter);
509 iter != done_reading;
510 iter.advance(int32_t(advanceLength))) {
511 uint32_t fragmentLength = iter.size_forward();
512 uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints
513 // replaced by a particular entity
514 const char16_t* c = iter.get();
515 const char16_t* fragmentStart = c;
516 const char16_t* fragmentEnd = c + fragmentLength;
517 const char* entityText = nullptr;
518 const char* fullConstEntityText = nullptr;
519 char* fullEntityText = nullptr;
521 advanceLength = 0;
522 // for each character in this chunk, check if it
523 // needs to be replaced
524 for (; c < fragmentEnd; c++, advanceLength++) {
525 char16_t val = *c;
526 if (val <= kValNBSP && entityTable[val]) {
527 fullConstEntityText = entityTable[val];
528 break;
529 } else if (val > 127 &&
530 ((val < 256 &&
531 mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
532 mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
533 entityReplacement.Truncate();
534 parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
536 if (!entityReplacement.IsEmpty()) {
537 entityText = entityReplacement.get();
538 break;
541 else if (val > 127 &&
542 mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
543 mEntityConverter) {
544 if (NS_IS_HIGH_SURROGATE(val) &&
545 c + 1 < fragmentEnd &&
546 NS_IS_LOW_SURROGATE(*(c + 1))) {
547 uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
548 if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
549 nsIEntityConverter::entityW3C, &fullEntityText))) {
550 lengthReplaced = 2;
551 break;
553 else {
554 advanceLength++;
557 else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
558 nsIEntityConverter::entityW3C,
559 &fullEntityText))) {
560 lengthReplaced = 1;
561 break;
566 aOutputStr.Append(fragmentStart, advanceLength);
567 if (entityText) {
568 aOutputStr.Append(char16_t('&'));
569 AppendASCIItoUTF16(entityText, aOutputStr);
570 aOutputStr.Append(char16_t(';'));
571 advanceLength++;
573 else if (fullConstEntityText) {
574 aOutputStr.AppendASCII(fullConstEntityText);
575 ++advanceLength;
577 // if it comes from nsIEntityConverter, it already has '&' and ';'
578 else if (fullEntityText) {
579 AppendASCIItoUTF16(fullEntityText, aOutputStr);
580 nsMemory::Free(fullEntityText);
581 advanceLength += lengthReplaced;
584 } else {
585 nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);