dom/base/nsPlainTextSerializer.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * nsIContentSerializer implementation that can be used with an
   9  * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
  10  * (eg for copy/paste as plaintext).
  11  */
  12
  13 #include "nsPlainTextSerializer.h"
  14 #include "nsIServiceManager.h"
  15 #include "nsGkAtoms.h"
  16 #include "nsNameSpaceManager.h"
  17 #include "nsTextFragment.h"
  18 #include "nsContentUtils.h"
  19 #include "nsReadableUtils.h"
  20 #include "nsUnicharUtils.h"
  21 #include "nsCRT.h"
  22 #include "mozilla/dom/Element.h"
  23 #include "mozilla/Preferences.h"
  24 #include "mozilla/BinarySearch.h"
  25 #include "nsComputedDOMStyle.h"
  26
  27 namespace mozilla {
  28 class Encoding;
  29 }
  30
  31 using namespace mozilla;
  32 using namespace mozilla::dom;
  33
  34 #define PREF_STRUCTS "converter.html2txt.structs"
  35 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
  36 #define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby"
  37
  38 static const int32_t kTabSize = 4;
  39 static const int32_t kIndentSizeHeaders = 2;      /* Indention of h1, if
  40                                                    mHeaderStrategy = 1 or = 2.
  41                                                    Indention of other headers
  42                                                    is derived from that.
  43                                                    XXX center h1? */
  44 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
  45                                               indent h(x+1) this many
  46                                               columns more than h(x) */
  47 static const int32_t kIndentSizeList = kTabSize;
  48 // Indention of non-first lines of ul and ol
  49 static const int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
  50 static const char16_t kNBSP = 160;
  51 static const char16_t kSPACE = ' ';
  52
  53 static int32_t HeaderLevel(nsAtom* aTag);
  54 static int32_t GetUnicharWidth(char16_t ucs);
  55 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
  56
  57 // Someday may want to make this non-const:
  58 static const uint32_t TagStackSize = 500;
  59 static const uint32_t OLStackSize = 100;
  60
  61 static bool gPreferenceInitialized = false;
  62 static bool gAlwaysIncludeRuby = false;
  63
  64 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
  65 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
  66
  67 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
  68   NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
  69   NS_INTERFACE_MAP_ENTRY(nsISupports)
  70 NS_INTERFACE_MAP_END
  71
  72 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
  73
  74 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
  75   RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
  76   it.forget(aSerializer);
  77   return NS_OK;
  78 }
  79
  80 nsPlainTextSerializer::nsPlainTextSerializer()
  81     : mFlags(0),
  82       mFloatingLines(-1),
  83       mLineBreakDue(false),
  84       kSpace(NS_LITERAL_STRING(" "))  // Init of "constant"
  85 {
  86   mOutputString = nullptr;
  87   mHeadLevel = 0;
  88   mAtFirstColumn = true;
  89   mIndent = 0;
  90   mCiteQuoteLevel = 0;
  91   mStructs = true;                              // will be read from prefs later
  92   mHeaderStrategy = 1 /*indent increasingly*/;  // ditto
  93   mHasWrittenCiteBlockquote = false;
  94   mSpanLevel = 0;
  95   for (int32_t i = 0; i <= 6; i++) {
  96     mHeaderCounter[i] = 0;
  97   }
  98
  99   // Line breaker
 100   mWrapColumn = 72;  // XXX magic number, we expect someone to reset this
 101   mCurrentLineWidth = 0;
 102
 103   // Flow
 104   mEmptyLines = 1;  // The start of the document is an "empty line" in itself,
 105   mInWhitespace = false;
 106   mPreFormattedMail = false;
 107   mStartedOutput = false;
 108
 109   mPreformattedBlockBoundary = false;
 110   mWithRubyAnnotation = false;  // will be read from pref and flag later
 111
 112   // initialize the tag stack to zero:
 113   // The stack only ever contains pointers to static atoms, so they don't
 114   // need refcounting.
 115   mTagStack = new nsAtom*[TagStackSize];
 116   mTagStackIndex = 0;
 117   mIgnoreAboveIndex = (uint32_t)kNotFound;
 118
 119   // initialize the OL stack, where numbers for ordered lists are kept
 120   mOLStack = new int32_t[OLStackSize];
 121   mOLStackIndex = 0;
 122
 123   mULCount = 0;
 124
 125   mIgnoredChildNodeLevel = 0;
 126
 127   if (!gPreferenceInitialized) {
 128     Preferences::AddBoolVarCache(&gAlwaysIncludeRuby, PREF_ALWAYS_INCLUDE_RUBY,
 129                                  true);
 130     gPreferenceInitialized = true;
 131   }
 132 }
 133
 134 nsPlainTextSerializer::~nsPlainTextSerializer() {
 135   delete[] mTagStack;
 136   delete[] mOLStack;
 137   NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
 138 }
 139
 140 NS_IMETHODIMP
 141 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
 142                             const Encoding* aEncoding, bool aIsCopying,
 143                             bool aIsWholeDocument,
 144                             bool* aNeedsPreformatScanning) {
 145 #ifdef DEBUG
 146   // Check if the major control flags are set correctly.
 147   if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
 148     NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
 149                  "If you want format=flowed, you must combine it with "
 150                  "nsIDocumentEncoder::OutputFormatted");
 151   }
 152
 153   if (aFlags & nsIDocumentEncoder::OutputFormatted) {
 154     NS_ASSERTION(
 155         !(aFlags & nsIDocumentEncoder::OutputPreformatted),
 156         "Can't do formatted and preformatted output at the same time!");
 157   }
 158 #endif
 159
 160   *aNeedsPreformatScanning = true;
 161   mFlags = aFlags;
 162   mWrapColumn = aWrapColumn;
 163
 164   // Only create a linebreaker if we will handle wrapping.
 165   if (MayWrap() && MayBreakLines()) {
 166     mLineBreaker = nsContentUtils::LineBreaker();
 167   }
 168
 169   // Set the line break character:
 170   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
 171       (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
 172     // Windows
 173     mLineBreak.AssignLiteral("\r\n");
 174   } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
 175     // Mac
 176     mLineBreak.Assign(char16_t('\r'));
 177   } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
 178     // Unix/DOM
 179     mLineBreak.Assign(char16_t('\n'));
 180   } else {
 181     // Platform/default
 182     mLineBreak.AssignLiteral(NS_LINEBREAK);
 183   }
 184
 185   mLineBreakDue = false;
 186   mFloatingLines = -1;
 187
 188   mPreformattedBlockBoundary = false;
 189
 190   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
 191     // Get some prefs that controls how we do formatted output
 192     mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
 193
 194     mHeaderStrategy =
 195         Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
 196   }
 197
 198   // The pref is default inited to false in libpref, but we use true
 199   // as fallback value because we don't want to affect behavior in
 200   // other places which use this serializer currently.
 201   mWithRubyAnnotation =
 202       gAlwaysIncludeRuby || (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
 203
 204   // XXX We should let the caller decide whether to do this or not
 205   mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
 206
 207   return NS_OK;
 208 }
 209
 210 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
 211   uint32_t size = aStack.Length();
 212   if (size == 0) {
 213     return false;
 214   }
 215   return aStack.ElementAt(size - 1);
 216 }
 217
 218 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
 219   uint32_t size = aStack.Length();
 220   if (size > 0) {
 221     aStack.ElementAt(size - 1) = aValue;
 222   } else {
 223     NS_ERROR("There is no \"Last\" value");
 224   }
 225 }
 226
 227 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
 228   aStack.AppendElement(bool(aValue));
 229 }
 230
 231 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
 232   bool returnValue = false;
 233   uint32_t size = aStack.Length();
 234   if (size > 0) {
 235     returnValue = aStack.ElementAt(size - 1);
 236     aStack.RemoveElementAt(size - 1);
 237   }
 238   return returnValue;
 239 }
 240
 241 bool nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(
 242     nsAtom* aTag) {
 243   // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
 244   // non-textual container element should be serialized as placeholder
 245   // character and its child nodes should be ignored. See bug 895239.
 246   if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
 247     return false;
 248   }
 249
 250   return (aTag == nsGkAtoms::audio) || (aTag == nsGkAtoms::canvas) ||
 251          (aTag == nsGkAtoms::iframe) || (aTag == nsGkAtoms::meter) ||
 252          (aTag == nsGkAtoms::progress) || (aTag == nsGkAtoms::object) ||
 253          (aTag == nsGkAtoms::svg) || (aTag == nsGkAtoms::video);
 254 }
 255
 256 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsAtom* aTag) {
 257   if (mWithRubyAnnotation) {
 258     return false;
 259   }
 260
 261   return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
 262          aTag == nsGkAtoms::rtc;
 263 }
 264
 265 // Return true if aElement has 'display:none' or if we just don't know.
 266 static bool IsDisplayNone(Element* aElement) {
 267   RefPtr<ComputedStyle> computedStyle =
 268       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
 269   return !computedStyle ||
 270          computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
 271 }
 272
 273 static bool IsIgnorableScriptOrStyle(Element* aElement) {
 274   return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
 275          IsDisplayNone(aElement);
 276 }
 277
 278 NS_IMETHODIMP
 279 nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
 280                                   int32_t aEndOffset, nsAString& aStr) {
 281   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
 282     return NS_OK;
 283   }
 284
 285   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
 286   if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
 287
 288   NS_ENSURE_ARG(aText);
 289
 290   nsresult rv = NS_OK;
 291
 292   nsIContent* content = aText;
 293   const nsTextFragment* frag;
 294   if (!content || !(frag = content->GetText())) {
 295     return NS_ERROR_FAILURE;
 296   }
 297
 298   int32_t fragLength = frag->GetLength();
 299   int32_t endoffset =
 300       (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
 301   NS_ASSERTION(aStartOffset <= endoffset,
 302                "A start offset is beyond the end of the text fragment!");
 303
 304   int32_t length = endoffset - aStartOffset;
 305   if (length <= 0) {
 306     return NS_OK;
 307   }
 308
 309   nsAutoString textstr;
 310   if (frag->Is2b()) {
 311     textstr.Assign(frag->Get2b() + aStartOffset, length);
 312   } else {
 313     // AssignASCII is for 7-bit character only, so don't use it
 314     const char* data = frag->Get1b();
 315     CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
 316   }
 317
 318   mOutputString = &aStr;
 319
 320   // We have to split the string across newlines
 321   // to match parser behavior
 322   int32_t start = 0;
 323   int32_t offset = textstr.FindCharInSet("\n\r");
 324   while (offset != kNotFound) {
 325     if (offset > start) {
 326       // Pass in the line
 327       DoAddText(false, Substring(textstr, start, offset - start));
 328     }
 329
 330     // Pass in a newline
 331     DoAddText(true, mLineBreak);
 332
 333     start = offset + 1;
 334     offset = textstr.FindCharInSet("\n\r", start);
 335   }
 336
 337   // Consume the last bit of the string if there's any left
 338   if (start < length) {
 339     if (start) {
 340       DoAddText(false, Substring(textstr, start, length - start));
 341     } else {
 342       DoAddText(false, textstr);
 343     }
 344   }
 345
 346   mOutputString = nullptr;
 347
 348   return rv;
 349 }
 350
 351 NS_IMETHODIMP
 352 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
 353                                           int32_t aStartOffset,
 354                                           int32_t aEndOffset, nsAString& aStr) {
 355   return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
 356 }
 357
 358 NS_IMETHODIMP
 359 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
 360   mPreformatStack.push(IsElementPreformatted(aElement));
 361   return NS_OK;
 362 }
 363
 364 NS_IMETHODIMP
 365 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
 366   MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
 367                      "Tried to pop without previous push.");
 368   mPreformatStack.pop();
 369   return NS_OK;
 370 }
 371
 372 NS_IMETHODIMP
 373 nsPlainTextSerializer::AppendElementStart(Element* aElement,
 374                                           Element* aOriginalElement,
 375                                           nsAString& aStr) {
 376   NS_ENSURE_ARG(aElement);
 377
 378   mElement = aElement;
 379
 380   nsresult rv;
 381   nsAtom* id = GetIdForContent(mElement);
 382
 383   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
 384
 385   mOutputString = &aStr;
 386
 387   if (isContainer) {
 388     rv = DoOpenContainer(id);
 389   } else {
 390     rv = DoAddLeaf(id);
 391   }
 392
 393   mElement = nullptr;
 394   mOutputString = nullptr;
 395
 396   if (id == nsGkAtoms::head) {
 397     ++mHeadLevel;
 398   }
 399
 400   return rv;
 401 }
 402
 403 NS_IMETHODIMP
 404 nsPlainTextSerializer::AppendElementEnd(Element* aElement, nsAString& aStr) {
 405   NS_ENSURE_ARG(aElement);
 406
 407   mElement = aElement;
 408
 409   nsresult rv;
 410   nsAtom* id = GetIdForContent(mElement);
 411
 412   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
 413
 414   mOutputString = &aStr;
 415
 416   rv = NS_OK;
 417   if (isContainer) {
 418     rv = DoCloseContainer(id);
 419   }
 420
 421   mElement = nullptr;
 422   mOutputString = nullptr;
 423
 424   if (id == nsGkAtoms::head) {
 425     NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
 426     --mHeadLevel;
 427   }
 428
 429   return rv;
 430 }
 431
 432 NS_IMETHODIMP
 433 nsPlainTextSerializer::Flush(nsAString& aStr) {
 434   mOutputString = &aStr;
 435   FlushLine();
 436   mOutputString = nullptr;
 437   return NS_OK;
 438 }
 439
 440 NS_IMETHODIMP
 441 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument,
 442                                            nsAString& aStr) {
 443   return NS_OK;
 444 }
 445
 446 nsresult nsPlainTextSerializer::DoOpenContainer(nsAtom* aTag) {
 447   // Check if we need output current node as placeholder character and ignore
 448   // child nodes.
 449   if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
 450     if (mIgnoredChildNodeLevel == 0) {
 451       // Serialize current node as placeholder character
 452       Write(NS_LITERAL_STRING(u"\xFFFC"));
 453     }
 454     // Ignore child nodes.
 455     mIgnoredChildNodeLevel++;
 456     return NS_OK;
 457   }
 458   if (IsIgnorableRubyAnnotation(aTag)) {
 459     // Ignorable ruby annotation shouldn't be replaced by a placeholder
 460     // character, neither any of its descendants.
 461     mIgnoredChildNodeLevel++;
 462     return NS_OK;
 463   }
 464   if (IsIgnorableScriptOrStyle(mElement)) {
 465     mIgnoredChildNodeLevel++;
 466     return NS_OK;
 467   }
 468
 469   if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
 470     if (mPreformattedBlockBoundary && DoOutput()) {
 471       // Should always end a line, but get no more whitespace
 472       if (mFloatingLines < 0) mFloatingLines = 0;
 473       mLineBreakDue = true;
 474     }
 475     mPreformattedBlockBoundary = false;
 476   }
 477
 478   if (mFlags & nsIDocumentEncoder::OutputRaw) {
 479     // Raw means raw.  Don't even think about doing anything fancy
 480     // here like indenting, adding line breaks or any other
 481     // characters such as list item bullets, quote characters
 482     // around <q>, etc.  I mean it!  Don't make me smack you!
 483
 484     return NS_OK;
 485   }
 486
 487   if (mTagStackIndex < TagStackSize) {
 488     mTagStack[mTagStackIndex++] = aTag;
 489   }
 490
 491   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
 492     return NS_OK;
 493   }
 494
 495   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
 496   // above random <pre>s below it.
 497   mHasWrittenCiteBlockquote =
 498       mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
 499
 500   bool isInCiteBlockquote = false;
 501
 502   // XXX special-case <blockquote type=cite> so that we don't add additional
 503   // newlines before the text.
 504   if (aTag == nsGkAtoms::blockquote) {
 505     nsAutoString value;
 506     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
 507     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
 508   }
 509
 510   if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
 511
 512   // Check if this tag's content that should not be output
 513   if ((aTag == nsGkAtoms::noscript &&
 514        !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
 515       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
 516        !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
 517     // Ignore everything that follows the current tag in
 518     // question until a matching end tag is encountered.
 519     mIgnoreAboveIndex = mTagStackIndex - 1;
 520     return NS_OK;
 521   }
 522
 523   if (aTag == nsGkAtoms::body) {
 524     // Try to figure out here whether we have a
 525     // preformatted style attribute set by Thunderbird.
 526     //
 527     // Trigger on the presence of a "pre-wrap" in the
 528     // style attribute. That's a very simplistic way to do
 529     // it, but better than nothing.
 530     // Also set mWrapColumn to the value given there
 531     // (which arguably we should only do if told to do so).
 532     nsAutoString style;
 533     int32_t whitespace;
 534     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
 535         (kNotFound != (whitespace = style.Find("white-space:")))) {
 536       if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
 537 #ifdef DEBUG_preformatted
 538         printf("Set mPreFormattedMail based on style pre-wrap\n");
 539 #endif
 540         mPreFormattedMail = true;
 541         int32_t widthOffset = style.Find("width:");
 542         if (widthOffset >= 0) {
 543           // We have to search for the ch before the semicolon,
 544           // not for the semicolon itself, because nsString::ToInteger()
 545           // considers 'c' to be a valid numeric char (even if radix=10)
 546           // but then gets confused if it sees it next to the number
 547           // when the radix specified was 10, and returns an error code.
 548           int32_t semiOffset = style.Find("ch", false, widthOffset + 6);
 549           int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
 550                                            : style.Length() - widthOffset);
 551           nsAutoString widthstr;
 552           style.Mid(widthstr, widthOffset + 6, length);
 553           nsresult err;
 554           int32_t col = widthstr.ToInteger(&err);
 555
 556           if (NS_SUCCEEDED(err)) {
 557             mWrapColumn = (uint32_t)col;
 558 #ifdef DEBUG_preformatted
 559             printf("Set wrap column to %d based on style\n", mWrapColumn);
 560 #endif
 561           }
 562         }
 563       } else if (kNotFound != style.Find("pre", true, whitespace)) {
 564 #ifdef DEBUG_preformatted
 565         printf("Set mPreFormattedMail based on style pre\n");
 566 #endif
 567         mPreFormattedMail = true;
 568         mWrapColumn = 0;
 569       }
 570     } else {
 571       /* See comment at end of function. */
 572       mInWhitespace = true;
 573       mPreFormattedMail = false;
 574     }
 575
 576     return NS_OK;
 577   }
 578
 579   // Keep this in sync with DoCloseContainer!
 580   if (!DoOutput()) {
 581     return NS_OK;
 582   }
 583
 584   if (aTag == nsGkAtoms::p)
 585     EnsureVerticalSpace(1);
 586   else if (aTag == nsGkAtoms::pre) {
 587     if (GetLastBool(mIsInCiteBlockquote))
 588       EnsureVerticalSpace(0);
 589     else if (mHasWrittenCiteBlockquote) {
 590       EnsureVerticalSpace(0);
 591       mHasWrittenCiteBlockquote = false;
 592     } else
 593       EnsureVerticalSpace(1);
 594   } else if (aTag == nsGkAtoms::tr) {
 595     PushBool(mHasWrittenCellsForRow, false);
 596   } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
 597     // We must make sure that the content of two table cells get a
 598     // space between them.
 599
 600     // To make the separation between cells most obvious and
 601     // importable, we use a TAB.
 602     if (GetLastBool(mHasWrittenCellsForRow)) {
 603       // Bypass |Write| so that the TAB isn't compressed away.
 604       AddToLine(u"\t", 1);
 605       mInWhitespace = true;
 606     } else if (mHasWrittenCellsForRow.IsEmpty()) {
 607       // We don't always see a <tr> (nor a <table>) before the <td> if we're
 608       // copying part of a table
 609       PushBool(mHasWrittenCellsForRow, true);  // will never be popped
 610     } else {
 611       SetLastBool(mHasWrittenCellsForRow, true);
 612     }
 613   } else if (aTag == nsGkAtoms::ul) {
 614     // Indent here to support nested lists, which aren't included in li :-(
 615     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
 616     // Must end the current line before we change indention
 617     mIndent += kIndentSizeList;
 618     mULCount++;
 619   } else if (aTag == nsGkAtoms::ol) {
 620     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
 621     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
 622       // Must end the current line before we change indention
 623       if (mOLStackIndex < OLStackSize) {
 624         nsAutoString startAttr;
 625         int32_t startVal = 1;
 626         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
 627           nsresult rv = NS_OK;
 628           startVal = startAttr.ToInteger(&rv);
 629           if (NS_FAILED(rv)) startVal = 1;
 630         }
 631         mOLStack[mOLStackIndex++] = startVal;
 632       }
 633     } else {
 634       mOLStackIndex++;
 635     }
 636     mIndent += kIndentSizeList;  // see ul
 637   } else if (aTag == nsGkAtoms::li &&
 638              (mFlags & nsIDocumentEncoder::OutputFormatted)) {
 639     if (mTagStackIndex > 1 && IsInOL()) {
 640       if (mOLStackIndex > 0) {
 641         nsAutoString valueAttr;
 642         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
 643           nsresult rv = NS_OK;
 644           int32_t valueAttrVal = valueAttr.ToInteger(&rv);
 645           if (NS_SUCCEEDED(rv)) mOLStack[mOLStackIndex - 1] = valueAttrVal;
 646         }
 647         // This is what nsBulletFrame does for OLs:
 648         mInIndentString.AppendInt(mOLStack[mOLStackIndex - 1]++, 10);
 649       } else {
 650         mInIndentString.Append(char16_t('#'));
 651       }
 652
 653       mInIndentString.Append(char16_t('.'));
 654
 655     } else {
 656       static const char bulletCharArray[] = "*o+#";
 657       uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
 658       char bulletChar = bulletCharArray[index % 4];
 659       mInIndentString.Append(char16_t(bulletChar));
 660     }
 661
 662     mInIndentString.Append(char16_t(' '));
 663   } else if (aTag == nsGkAtoms::dl) {
 664     EnsureVerticalSpace(1);
 665   } else if (aTag == nsGkAtoms::dt) {
 666     EnsureVerticalSpace(0);
 667   } else if (aTag == nsGkAtoms::dd) {
 668     EnsureVerticalSpace(0);
 669     mIndent += kIndentSizeDD;
 670   } else if (aTag == nsGkAtoms::span) {
 671     ++mSpanLevel;
 672   } else if (aTag == nsGkAtoms::blockquote) {
 673     // Push
 674     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
 675     if (isInCiteBlockquote) {
 676       EnsureVerticalSpace(0);
 677       mCiteQuoteLevel++;
 678     } else {
 679       EnsureVerticalSpace(1);
 680       mIndent += kTabSize;  // Check for some maximum value?
 681     }
 682   } else if (aTag == nsGkAtoms::q) {
 683     Write(NS_LITERAL_STRING("\""));
 684   }
 685
 686   // Else make sure we'll separate block level tags,
 687   // even if we're about to leave, before doing any other formatting.
 688   else if (IsElementBlock(mElement)) {
 689     EnsureVerticalSpace(0);
 690   }
 691
 692   //////////////////////////////////////////////////////////////
 693   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
 694     return NS_OK;
 695   }
 696   //////////////////////////////////////////////////////////////
 697   // The rest of this routine is formatted output stuff,
 698   // which we should skip if we're not formatted:
 699   //////////////////////////////////////////////////////////////
 700
 701   // Push on stack
 702   bool currentNodeIsConverted = IsCurrentNodeConverted();
 703
 704   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
 705       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
 706     EnsureVerticalSpace(2);
 707     if (mHeaderStrategy == 2) {  // numbered
 708       mIndent += kIndentSizeHeaders;
 709       // Caching
 710       int32_t level = HeaderLevel(aTag);
 711       // Increase counter for current level
 712       mHeaderCounter[level]++;
 713       // Reset all lower levels
 714       int32_t i;
 715
 716       for (i = level + 1; i <= 6; i++) {
 717         mHeaderCounter[i] = 0;
 718       }
 719
 720       // Construct numbers
 721       nsAutoString leadup;
 722       for (i = 1; i <= level; i++) {
 723         leadup.AppendInt(mHeaderCounter[i]);
 724         leadup.Append(char16_t('.'));
 725       }
 726       leadup.Append(char16_t(' '));
 727       Write(leadup);
 728     } else if (mHeaderStrategy == 1) {  // indent increasingly
 729       mIndent += kIndentSizeHeaders;
 730       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
 731         // for h(x), run x-1 times
 732         mIndent += kIndentIncrementHeaders;
 733       }
 734     }
 735   } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
 736     nsAutoString url;
 737     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
 738         !url.IsEmpty()) {
 739       mURL = url;
 740     }
 741   } else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
 742     Write(NS_LITERAL_STRING("^"));
 743   } else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
 744     Write(NS_LITERAL_STRING("_"));
 745   } else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
 746     Write(NS_LITERAL_STRING("|"));
 747   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && mStructs &&
 748              !currentNodeIsConverted) {
 749     Write(NS_LITERAL_STRING("*"));
 750   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && mStructs &&
 751              !currentNodeIsConverted) {
 752     Write(NS_LITERAL_STRING("/"));
 753   } else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
 754     Write(NS_LITERAL_STRING("_"));
 755   }
 756
 757   /* Container elements are always block elements, so we shouldn't
 758      output any whitespace immediately after the container tag even if
 759      there's extra whitespace there because the HTML is pretty-printed
 760      or something. To ensure that happens, tell the serializer we're
 761      already in whitespace so it won't output more. */
 762   mInWhitespace = true;
 763
 764   return NS_OK;
 765 }
 766
 767 nsresult nsPlainTextSerializer::DoCloseContainer(nsAtom* aTag) {
 768   if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
 769     mIgnoredChildNodeLevel--;
 770     return NS_OK;
 771   }
 772   if (IsIgnorableRubyAnnotation(aTag)) {
 773     mIgnoredChildNodeLevel--;
 774     return NS_OK;
 775   }
 776   if (IsIgnorableScriptOrStyle(mElement)) {
 777     mIgnoredChildNodeLevel--;
 778     return NS_OK;
 779   }
 780
 781   if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
 782     if (DoOutput() && IsInPre() && IsElementBlock(mElement)) {
 783       // If we're closing a preformatted block element, output a line break
 784       // when we find a new container.
 785       mPreformattedBlockBoundary = true;
 786     }
 787   }
 788
 789   if (mFlags & nsIDocumentEncoder::OutputRaw) {
 790     // Raw means raw.  Don't even think about doing anything fancy
 791     // here like indenting, adding line breaks or any other
 792     // characters such as list item bullets, quote characters
 793     // around <q>, etc.  I mean it!  Don't make me smack you!
 794
 795     return NS_OK;
 796   }
 797
 798   if (mTagStackIndex > 0) {
 799     --mTagStackIndex;
 800   }
 801
 802   if (mTagStackIndex >= mIgnoreAboveIndex) {
 803     if (mTagStackIndex == mIgnoreAboveIndex) {
 804       // We're dealing with the close tag whose matching
 805       // open tag had set the mIgnoreAboveIndex value.
 806       // Reset mIgnoreAboveIndex before discarding this tag.
 807       mIgnoreAboveIndex = (uint32_t)kNotFound;
 808     }
 809     return NS_OK;
 810   }
 811
 812   // End current line if we're ending a block level tag
 813   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
 814     // We want the output to end with a new line,
 815     // but in preformatted areas like text fields,
 816     // we can't emit newlines that weren't there.
 817     // So add the newline only in the case of formatted output.
 818     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
 819       EnsureVerticalSpace(0);
 820     } else {
 821       FlushLine();
 822     }
 823     // We won't want to do anything with these in formatted mode either,
 824     // so just return now:
 825     return NS_OK;
 826   }
 827
 828   // Keep this in sync with DoOpenContainer!
 829   if (!DoOutput()) {
 830     return NS_OK;
 831   }
 832
 833   if (aTag == nsGkAtoms::tr) {
 834     PopBool(mHasWrittenCellsForRow);
 835     // Should always end a line, but get no more whitespace
 836     if (mFloatingLines < 0) mFloatingLines = 0;
 837     mLineBreakDue = true;
 838   } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
 839              (mFlags & nsIDocumentEncoder::OutputFormatted)) {
 840     // Items that should always end a line, but get no more whitespace
 841     if (mFloatingLines < 0) mFloatingLines = 0;
 842     mLineBreakDue = true;
 843   } else if (aTag == nsGkAtoms::pre) {
 844     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
 845     mLineBreakDue = true;
 846   } else if (aTag == nsGkAtoms::ul) {
 847     FlushLine();
 848     mIndent -= kIndentSizeList;
 849     if (--mULCount + mOLStackIndex == 0) {
 850       mFloatingLines = 1;
 851       mLineBreakDue = true;
 852     }
 853   } else if (aTag == nsGkAtoms::ol) {
 854     FlushLine();  // Doing this after decreasing OLStackIndex would be wrong.
 855     mIndent -= kIndentSizeList;
 856     NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
 857     mOLStackIndex--;
 858     if (mULCount + mOLStackIndex == 0) {
 859       mFloatingLines = 1;
 860       mLineBreakDue = true;
 861     }
 862   } else if (aTag == nsGkAtoms::dl) {
 863     mFloatingLines = 1;
 864     mLineBreakDue = true;
 865   } else if (aTag == nsGkAtoms::dd) {
 866     FlushLine();
 867     mIndent -= kIndentSizeDD;
 868   } else if (aTag == nsGkAtoms::span) {
 869     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
 870     --mSpanLevel;
 871   } else if (aTag == nsGkAtoms::div) {
 872     if (mFloatingLines < 0) mFloatingLines = 0;
 873     mLineBreakDue = true;
 874   } else if (aTag == nsGkAtoms::blockquote) {
 875     FlushLine();  // Is this needed?
 876
 877     // Pop
 878     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
 879
 880     if (isInCiteBlockquote) {
 881       NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
 882       mCiteQuoteLevel--;
 883       mFloatingLines = 0;
 884       mHasWrittenCiteBlockquote = true;
 885     } else {
 886       mIndent -= kTabSize;
 887       mFloatingLines = 1;
 888     }
 889     mLineBreakDue = true;
 890   } else if (aTag == nsGkAtoms::q) {
 891     Write(NS_LITERAL_STRING("\""));
 892   } else if (IsElementBlock(mElement)) {
 893     // All other blocks get 1 vertical space after them
 894     // in formatted mode, otherwise 0.
 895     // This is hard. Sometimes 0 is a better number, but
 896     // how to know?
 897     if (mFlags & nsIDocumentEncoder::OutputFormatted)
 898       EnsureVerticalSpace(1);
 899     else {
 900       if (mFloatingLines < 0) mFloatingLines = 0;
 901       mLineBreakDue = true;
 902     }
 903   }
 904
 905   //////////////////////////////////////////////////////////////
 906   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
 907     return NS_OK;
 908   }
 909   //////////////////////////////////////////////////////////////
 910   // The rest of this routine is formatted output stuff,
 911   // which we should skip if we're not formatted:
 912   //////////////////////////////////////////////////////////////
 913
 914   // Pop the currentConverted stack
 915   bool currentNodeIsConverted = IsCurrentNodeConverted();
 916
 917   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
 918       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
 919     if (mHeaderStrategy) { /*numbered or indent increasingly*/
 920       mIndent -= kIndentSizeHeaders;
 921     }
 922     if (mHeaderStrategy == 1 /*indent increasingly*/) {
 923       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
 924         // for h(x), run x-1 times
 925         mIndent -= kIndentIncrementHeaders;
 926       }
 927     }
 928     EnsureVerticalSpace(1);
 929   } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted &&
 930              !mURL.IsEmpty()) {
 931     nsAutoString temp;
 932     temp.AssignLiteral(" <");
 933     temp += mURL;
 934     temp.Append(char16_t('>'));
 935     Write(temp);
 936     mURL.Truncate();
 937   } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) && mStructs &&
 938              !currentNodeIsConverted) {
 939     Write(kSpace);
 940   } else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
 941     Write(NS_LITERAL_STRING("|"));
 942   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && mStructs &&
 943              !currentNodeIsConverted) {
 944     Write(NS_LITERAL_STRING("*"));
 945   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && mStructs &&
 946              !currentNodeIsConverted) {
 947     Write(NS_LITERAL_STRING("/"));
 948   } else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
 949     Write(NS_LITERAL_STRING("_"));
 950   }
 951
 952   return NS_OK;
 953 }
 954
 955 bool nsPlainTextSerializer::MustSuppressLeaf() {
 956   if (mIgnoredChildNodeLevel > 0) {
 957     return true;
 958   }
 959
 960   if ((mTagStackIndex > 1 &&
 961        mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
 962       (mTagStackIndex > 0 &&
 963        mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
 964     // Don't output the contents of SELECT elements;
 965     // Might be nice, eventually, to output just the selected element.
 966     // Read more in bug 31994.
 967     return true;
 968   }
 969
 970   return false;
 971 }
 972
 973 void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
 974                                       const nsAString& aText) {
 975   // If we don't want any output, just return
 976   if (!DoOutput()) {
 977     return;
 978   }
 979
 980   if (!aIsLineBreak) {
 981     // Make sure to reset this, since it's no longer true.
 982     mHasWrittenCiteBlockquote = false;
 983   }
 984
 985   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
 986
 987   if (MustSuppressLeaf()) {
 988     return;
 989   }
 990
 991   if (aIsLineBreak) {
 992     // The only times we want to pass along whitespace from the original
 993     // html source are if we're forced into preformatted mode via flags,
 994     // or if we're prettyprinting and we're inside a <pre>.
 995     // Otherwise, either we're collapsing to minimal text, or we're
 996     // prettyprinting to mimic the html format, and in neither case
 997     // does the formatting of the html source help us.
 998     if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
 999         (mPreFormattedMail && !mWrapColumn) || IsInPre()) {
1000       EnsureVerticalSpace(mEmptyLines + 1);
1001     } else if (!mInWhitespace) {
1002       Write(kSpace);
1003       mInWhitespace = true;
1004     }
1005     return;
1006   }
1007
1008   /* Check, if we are in a link (symbolized with mURL containing the URL)
1009      and the text is equal to the URL. In that case we don't want to output
1010      the URL twice so we scrap the text in mURL. */
1011   if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1012     mURL.Truncate();
1013   }
1014   Write(aText);
1015 }
1016
1017 nsresult nsPlainTextSerializer::DoAddLeaf(nsAtom* aTag) {
1018   mPreformattedBlockBoundary = false;
1019
1020   // If we don't want any output, just return
1021   if (!DoOutput()) {
1022     return NS_OK;
1023   }
1024
1025   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1026
1027   if (MustSuppressLeaf()) {
1028     return NS_OK;
1029   }
1030
1031   if (aTag == nsGkAtoms::br) {
1032     // Another egregious editor workaround, see bug 38194:
1033     // ignore the bogus br tags that the editor sticks here and there.
1034     nsAutoString tagAttr;
1035     if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) ||
1036         !tagAttr.EqualsLiteral("_moz")) {
1037       EnsureVerticalSpace(mEmptyLines + 1);
1038     }
1039   } else if (aTag == nsGkAtoms::hr &&
1040              (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1041     EnsureVerticalSpace(0);
1042
1043     // Make a line of dashes as wide as the wrap width
1044     // XXX honoring percentage would be nice
1045     nsAutoString line;
1046     uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
1047     while (line.Length() < width) {
1048       line.Append(char16_t('-'));
1049     }
1050     Write(line);
1051
1052     EnsureVerticalSpace(0);
1053   } else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
1054     Write(NS_LITERAL_STRING(u"\xFFFC"));
1055   } else if (aTag == nsGkAtoms::img) {
1056     /* Output (in decreasing order of preference)
1057        alt, title or nothing */
1058     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1059     nsAutoString imageDescription;
1060     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
1061       // If the alt attribute has an empty value (|alt=""|), output nothing
1062     } else if (NS_SUCCEEDED(
1063                    GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
1064                !imageDescription.IsEmpty()) {
1065       imageDescription =
1066           NS_LITERAL_STRING(" [") + imageDescription + NS_LITERAL_STRING("] ");
1067     }
1068
1069     Write(imageDescription);
1070   }
1071
1072   return NS_OK;
1073 }
1074
1075 /**
1076  * Adds as many newline as necessary to get |noOfRows| empty lines
1077  *
1078  * noOfRows = -1    :   Being in the middle of some line of text
1079  * noOfRows =  0    :   Being at the start of a line
1080  * noOfRows =  n>0  :   Having n empty lines before the current line.
1081  */
1082 void nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) {
1083   // If we have something in the indent we probably want to output
1084   // it and it's not included in the count for empty lines so we don't
1085   // realize that we should start a new line.
1086   if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1087     EndLine(false);
1088     mInWhitespace = true;
1089   }
1090
1091   while (mEmptyLines < noOfRows) {
1092     EndLine(false);
1093     mInWhitespace = true;
1094   }
1095   mLineBreakDue = false;
1096   mFloatingLines = -1;
1097 }
1098
1099 /**
1100  * This empties the current line cache without adding a NEWLINE.
1101  * Should not be used if line wrapping is of importance since
1102  * this function destroys the cache information.
1103  *
1104  * It will also write indentation and quotes if we believe us to be
1105  * at the start of the line.
1106  */
1107 void nsPlainTextSerializer::FlushLine() {
1108   if (!mCurrentLine.IsEmpty()) {
1109     if (mAtFirstColumn) {
1110       OutputQuotesAndIndent();  // XXX: Should we always do this? Bug?
1111     }
1112
1113     Output(mCurrentLine);
1114     mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1115     mCurrentLine.Truncate();
1116     mCurrentLineWidth = 0;
1117   }
1118 }
1119
1120 /**
1121  * Prints the text to output to our current output device (the string
1122  * mOutputString). The only logic here is to replace non breaking spaces with a
1123  * normal space since most (all?) receivers of the result won't understand the
1124  * nbsp and even be confused by it.
1125  */
1126 void nsPlainTextSerializer::Output(nsString& aString) {
1127   if (!aString.IsEmpty()) {
1128     mStartedOutput = true;
1129   }
1130
1131   if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1132     // First, replace all nbsp characters with spaces,
1133     // which the unicode encoder won't do for us.
1134     aString.ReplaceChar(kNBSP, kSPACE);
1135   }
1136   mOutputString->Append(aString);
1137 }
1138
1139 static bool IsSpaceStuffable(const char16_t* s) {
1140   if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1141       NS_strncmp(s, u"From ", 5) == 0)
1142     return true;
1143   else
1144     return false;
1145 }
1146
1147 /**
1148  * This function adds a piece of text to the current stored line. If we are
1149  * wrapping text and the stored line will become too long, a suitable
1150  * location to wrap will be found and the line that's complete will be
1151  * output.
1152  */
1153 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
1154                                       int32_t aLineFragmentLength) {
1155   uint32_t prefixwidth =
1156       (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1 : 0) + mIndent;
1157
1158   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1159
1160   int32_t linelength = mCurrentLine.Length();
1161   if (0 == linelength) {
1162     if (0 == aLineFragmentLength) {
1163       // Nothing at all. Are you kidding me?
1164       return;
1165     }
1166
1167     if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1168       if (IsSpaceStuffable(aLineFragment) &&
1169           mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
1170       ) {
1171         // Space stuffing a la RFC 2646 (format=flowed).
1172         mCurrentLine.Append(char16_t(' '));
1173
1174         if (MayWrap()) {
1175           mCurrentLineWidth += GetUnicharWidth(' ');
1176 #ifdef DEBUG_wrapping
1177           NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1178                                              mCurrentLine.Length()) ==
1179                            (int32_t)mCurrentLineWidth,
1180                        "mCurrentLineWidth and reality out of sync!");
1181 #endif
1182         }
1183       }
1184     }
1185     mEmptyLines = -1;
1186   }
1187
1188   mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1189   if (MayWrap()) {
1190     mCurrentLineWidth +=
1191         GetUnicharStringWidth(aLineFragment, aLineFragmentLength);
1192 #ifdef DEBUG_wrapping
1193     NS_ASSERTION(
1194         GetUnicharstringWidth(mCurrentLine.get(), mCurrentLine.Length()) ==
1195             (int32_t)mCurrentLineWidth,
1196         "mCurrentLineWidth and reality out of sync!");
1197 #endif
1198   }
1199
1200   linelength = mCurrentLine.Length();
1201
1202   //  Wrap?
1203   if (MayWrap()) {
1204 #ifdef DEBUG_wrapping
1205     NS_ASSERTION(
1206         GetUnicharstringWidth(mCurrentLine.get(), mCurrentLine.Length()) ==
1207             (int32_t)mCurrentLineWidth,
1208         "mCurrentLineWidth and reality out of sync!");
1209 #endif
1210     // Yes, wrap!
1211     // The "+4" is to avoid wrap lines that only would be a couple
1212     // of letters too long. We give this bonus only if the
1213     // wrapcolumn is more than 20.
1214     uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1215
1216     // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1217     while (mCurrentLineWidth + prefixwidth > mWrapColumn + bonuswidth) {
1218       // We go from the end removing one letter at a time until
1219       // we have a reasonable width
1220       int32_t goodSpace = mCurrentLine.Length();
1221       uint32_t width = mCurrentLineWidth;
1222       while (goodSpace > 0 && (width + prefixwidth > mWrapColumn)) {
1223         goodSpace--;
1224         width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1225       }
1226
1227       goodSpace++;
1228
1229       if (mLineBreaker) {
1230         goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1231                                        mCurrentLine.Length(), goodSpace);
1232         if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1233             nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace - 1))) {
1234           --goodSpace;  // adjust the position since line breaker returns a
1235                         // position next to space
1236         }
1237       }
1238       // fallback if the line breaker is unavailable or failed
1239       if (!mLineBreaker) {
1240         if (mCurrentLine.IsEmpty() || mWrapColumn < prefixwidth) {
1241           goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
1242         } else {
1243           goodSpace =
1244               std::min(mWrapColumn - prefixwidth, mCurrentLine.Length() - 1);
1245           while (goodSpace >= 0 &&
1246                  !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1247             goodSpace--;
1248           }
1249         }
1250       }
1251
1252       nsAutoString restOfLine;
1253       if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1254         // If we didn't find a good place to break, accept long line and
1255         // try to find another place to break
1256         goodSpace =
1257             (prefixwidth > mWrapColumn + 1) ? 1 : mWrapColumn - prefixwidth + 1;
1258         if (mLineBreaker) {
1259           if ((uint32_t)goodSpace < mCurrentLine.Length())
1260             goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1261                                            mCurrentLine.Length(), goodSpace);
1262           if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1263             goodSpace = mCurrentLine.Length();
1264         }
1265         // fallback if the line breaker is unavailable or failed
1266         if (!mLineBreaker) {
1267           goodSpace =
1268               (prefixwidth > mWrapColumn) ? 1 : mWrapColumn - prefixwidth;
1269           while (goodSpace < linelength &&
1270                  !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1271             goodSpace++;
1272           }
1273         }
1274       }
1275
1276       if ((goodSpace < linelength) && (goodSpace > 0)) {
1277         // Found a place to break
1278
1279         // -1 (trim a char at the break position)
1280         // only if the line break was a space.
1281         if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1282           mCurrentLine.Right(restOfLine, linelength - goodSpace - 1);
1283         } else {
1284           mCurrentLine.Right(restOfLine, linelength - goodSpace);
1285         }
1286         // if breaker was U+0020, it has to consider for delsp=yes support
1287         bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1288         mCurrentLine.Truncate(goodSpace);
1289         EndLine(true, breakBySpace);
1290         mCurrentLine.Truncate();
1291         // Space stuff new line?
1292         if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1293           if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) &&
1294               mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
1295           ) {
1296             // Space stuffing a la RFC 2646 (format=flowed).
1297             mCurrentLine.Append(char16_t(' '));
1298             // XXX doesn't seem to work correctly for ' '
1299           }
1300         }
1301         mCurrentLine.Append(restOfLine);
1302         mCurrentLineWidth =
1303             GetUnicharStringWidth(mCurrentLine.get(), mCurrentLine.Length());
1304         linelength = mCurrentLine.Length();
1305         mEmptyLines = -1;
1306       } else {
1307         // Nothing to do. Hopefully we get more data later
1308         // to use for a place to break line
1309         break;
1310       }
1311     }
1312   } else {
1313     // No wrapping.
1314   }
1315 }
1316
1317 /**
1318  * Outputs the contents of mCurrentLine, and resets line specific
1319  * variables. Also adds an indentation and prefix if there is
1320  * one specified. Strips ending spaces from the line if it isn't
1321  * preformatted.
1322  */
1323 void nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) {
1324   uint32_t currentlinelength = mCurrentLine.Length();
1325
1326   if (aSoftlinebreak && 0 == currentlinelength) {
1327     // No meaning
1328     return;
1329   }
1330
1331   /* In non-preformatted mode, remove spaces from the end of the line for
1332    * format=flowed compatibility. Don't do this for these special cases:
1333    * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1334    * "- -- ", the OpenPGP dash-escaped signature separator in inline
1335    * signed messages according to the OpenPGP standard (RFC 2440).
1336    */
1337   if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1338       !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
1339       (aSoftlinebreak || !(mCurrentLine.EqualsLiteral("-- ") ||
1340                            mCurrentLine.EqualsLiteral("- -- ")))) {
1341     // Remove spaces from the end of the line.
1342     while (currentlinelength > 0 &&
1343            mCurrentLine[currentlinelength - 1] == ' ') {
1344       --currentlinelength;
1345     }
1346     mCurrentLine.SetLength(currentlinelength);
1347   }
1348
1349   if (aSoftlinebreak && (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1350       (mIndent == 0)) {
1351     // Add the soft part of the soft linebreak (RFC 2646 4.1)
1352     // We only do this when there is no indentation since format=flowed
1353     // lines and indentation doesn't work well together.
1354
1355     // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1356     // add twice space.
1357     if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1358       mCurrentLine.AppendLiteral("  ");
1359     else
1360       mCurrentLine.Append(char16_t(' '));
1361   }
1362
1363   if (aSoftlinebreak) {
1364     mEmptyLines = 0;
1365   } else {
1366     // Hard break
1367     if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1368       mEmptyLines = -1;
1369     }
1370
1371     mEmptyLines++;
1372   }
1373
1374   if (mAtFirstColumn) {
1375     // If we don't have anything "real" to output we have to
1376     // make sure the indent doesn't end in a space since that
1377     // would trick a format=flowed-aware receiver.
1378     bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1379     OutputQuotesAndIndent(stripTrailingSpaces);
1380   }
1381
1382   mCurrentLine.Append(mLineBreak);
1383   Output(mCurrentLine);
1384   mCurrentLine.Truncate();
1385   mCurrentLineWidth = 0;
1386   mAtFirstColumn = true;
1387   mInWhitespace = true;
1388   mLineBreakDue = false;
1389   mFloatingLines = -1;
1390 }
1391
1392 /**
1393  * Outputs the calculated and stored indent and text in the indentation. That is
1394  * quote chars and numbers for numbered lists and such. It will also reset any
1395  * stored text to put in the indentation after using it.
1396  */
1397 void nsPlainTextSerializer::OutputQuotesAndIndent(
1398     bool stripTrailingSpaces /* = false */) {
1399   nsAutoString stringToOutput;
1400
1401   // Put the mail quote "> " chars in, if appropriate:
1402   if (mCiteQuoteLevel > 0) {
1403     nsAutoString quotes;
1404     for (int i = 0; i < mCiteQuoteLevel; i++) {
1405       quotes.Append(char16_t('>'));
1406     }
1407     if (!mCurrentLine.IsEmpty()) {
1408       /* Better don't output a space here, if the line is empty,
1409          in case a receiving f=f-aware UA thinks, this were a flowed line,
1410          which it isn't - it's just empty.
1411          (Flowed lines may be joined with the following one,
1412          so the empty line may be lost completely.) */
1413       quotes.Append(char16_t(' '));
1414     }
1415     stringToOutput = quotes;
1416     mAtFirstColumn = false;
1417   }
1418
1419   // Indent if necessary
1420   int32_t indentwidth = mIndent - mInIndentString.Length();
1421   if (indentwidth > 0 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1422       // Don't make empty lines look flowed
1423   ) {
1424     nsAutoString spaces;
1425     for (int i = 0; i < indentwidth; ++i) spaces.Append(char16_t(' '));
1426     stringToOutput += spaces;
1427     mAtFirstColumn = false;
1428   }
1429
1430   if (!mInIndentString.IsEmpty()) {
1431     stringToOutput += mInIndentString;
1432     mAtFirstColumn = false;
1433     mInIndentString.Truncate();
1434   }
1435
1436   if (stripTrailingSpaces) {
1437     int32_t lineLength = stringToOutput.Length();
1438     while (lineLength > 0 && ' ' == stringToOutput[lineLength - 1]) {
1439       --lineLength;
1440     }
1441     stringToOutput.SetLength(lineLength);
1442   }
1443
1444   if (!stringToOutput.IsEmpty()) {
1445     Output(stringToOutput);
1446   }
1447 }
1448
1449 /**
1450  * Write a string. This is the highlevel function to use to get text output.
1451  * By using AddToLine, Output, EndLine and other functions it handles quotation,
1452  * line wrapping, indentation, whitespace compression and other things.
1453  */
1454 void nsPlainTextSerializer::Write(const nsAString& aStr) {
1455   // XXX Copy necessary to use nsString methods and gain
1456   // access to underlying buffer
1457   nsAutoString str(aStr);
1458
1459 #ifdef DEBUG_wrapping
1460   printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
1461          mWrapColumn);
1462 #endif
1463
1464   int32_t bol = 0;
1465   int32_t newline;
1466
1467   int32_t totLen = str.Length();
1468
1469   // If the string is empty, do nothing:
1470   if (totLen <= 0) return;
1471
1472   // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1473   // to be cut off along with usual spaces if required. (bug #125928)
1474   if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1475     for (int32_t i = totLen - 1; i >= 0; i--) {
1476       char16_t c = str[i];
1477       if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) continue;
1478       if (kNBSP == c)
1479         str.Replace(i, 1, ' ');
1480       else
1481         break;
1482     }
1483   }
1484
1485   // We have two major codepaths here. One that does preformatted text and one
1486   // that does normal formatted text. The one for preformatted text calls
1487   // Output directly while the other code path goes through AddToLine.
1488   if ((mPreFormattedMail && !mWrapColumn) ||
1489       (IsInPre() && !mPreFormattedMail) ||
1490       (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1491     // No intelligent wrapping.
1492
1493     // This mustn't be mixed with intelligent wrapping without clearing
1494     // the mCurrentLine buffer before!!!
1495     NS_ASSERTION(mCurrentLine.IsEmpty() || (IsInPre() && !mPreFormattedMail),
1496                  "Mixed wrapping data and nonwrapping data on the same line");
1497     if (!mCurrentLine.IsEmpty()) {
1498       FlushLine();
1499     }
1500
1501     // Put the mail quote "> " chars in, if appropriate.
1502     // Have to put it in before every line.
1503     while (bol < totLen) {
1504       bool outputQuotes = mAtFirstColumn;
1505       bool atFirstColumn;
1506       bool outputLineBreak = false;
1507       bool spacesOnly = true;
1508
1509       // Find one of '\n' or '\r' using iterators since nsAString
1510       // doesn't have the old FindCharInSet function.
1511       nsAString::const_iterator iter;
1512       str.BeginReading(iter);
1513       nsAString::const_iterator done_searching;
1514       str.EndReading(done_searching);
1515       iter.advance(bol);
1516       int32_t new_newline = bol;
1517       newline = kNotFound;
1518       while (iter != done_searching) {
1519         if ('\n' == *iter || '\r' == *iter) {
1520           newline = new_newline;
1521           break;
1522         }
1523         if (' ' != *iter) spacesOnly = false;
1524         ++new_newline;
1525         ++iter;
1526       }
1527
1528       // Done searching
1529       nsAutoString stringpart;
1530       if (newline == kNotFound) {
1531         // No new lines.
1532         stringpart.Assign(Substring(str, bol, totLen - bol));
1533         if (!stringpart.IsEmpty()) {
1534           char16_t lastchar = stringpart[stringpart.Length() - 1];
1535           if ((lastchar == '\t') || (lastchar == ' ') || (lastchar == '\r') ||
1536               (lastchar == '\n')) {
1537             mInWhitespace = true;
1538           } else {
1539             mInWhitespace = false;
1540           }
1541         }
1542         mEmptyLines = -1;
1543         atFirstColumn = mAtFirstColumn && (totLen - bol) == 0;
1544         bol = totLen;
1545       } else {
1546         // There is a newline
1547         stringpart.Assign(Substring(str, bol, newline - bol));
1548         mInWhitespace = true;
1549         outputLineBreak = true;
1550         mEmptyLines = 0;
1551         atFirstColumn = true;
1552         bol = newline + 1;
1553         if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1554           // There was a CRLF in the input. This used to be illegal and
1555           // stripped by the parser. Apparently not anymore. Let's skip
1556           // over the LF.
1557           bol++;
1558         }
1559       }
1560
1561       mCurrentLine.Truncate();
1562       if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1563         if ((outputLineBreak || !spacesOnly) &&  // bugs 261467,125928
1564             !IsQuotedLine(stringpart) && !stringpart.EqualsLiteral("-- ") &&
1565             !stringpart.EqualsLiteral("- -- "))
1566           stringpart.Trim(" ", false, true, true);
1567         if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart))
1568           mCurrentLine.Append(char16_t(' '));
1569       }
1570       mCurrentLine.Append(stringpart);
1571
1572       if (outputQuotes) {
1573         // Note: this call messes with mAtFirstColumn
1574         OutputQuotesAndIndent();
1575       }
1576
1577       Output(mCurrentLine);
1578       if (outputLineBreak) {
1579         Output(mLineBreak);
1580       }
1581       mAtFirstColumn = atFirstColumn;
1582     }
1583
1584     // Reset mCurrentLine.
1585     mCurrentLine.Truncate();
1586
1587 #ifdef DEBUG_wrapping
1588     printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
1589 #endif
1590     return;
1591   }
1592
1593   // Intelligent handling of text
1594   // If needed, strip out all "end of lines"
1595   // and multiple whitespace between words
1596   int32_t nextpos;
1597   const char16_t* offsetIntoBuffer = nullptr;
1598
1599   while (bol < totLen) {  // Loop over lines
1600     // Find a place where we may have to do whitespace compression
1601     nextpos = str.FindCharInSet(" \t\n\r", bol);
1602 #ifdef DEBUG_wrapping
1603     nsAutoString remaining;
1604     str.Right(remaining, totLen - bol);
1605     foo = ToNewCString(remaining);
1606     // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
1607     //        "string = '%s'\n", bol, nextpos, totLen, foo);
1608     free(foo);
1609 #endif
1610
1611     if (nextpos == kNotFound) {
1612       // The rest of the string
1613       offsetIntoBuffer = str.get() + bol;
1614       AddToLine(offsetIntoBuffer, totLen - bol);
1615       bol = totLen;
1616       mInWhitespace = false;
1617     } else {
1618       // There's still whitespace left in the string
1619       if (nextpos != 0 && (nextpos + 1) < totLen) {
1620         offsetIntoBuffer = str.get() + nextpos;
1621         // skip '\n' if it is between CJ chars
1622         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
1623             IS_CJ_CHAR(offsetIntoBuffer[1])) {
1624           offsetIntoBuffer = str.get() + bol;
1625           AddToLine(offsetIntoBuffer, nextpos - bol);
1626           bol = nextpos + 1;
1627           continue;
1628         }
1629       }
1630       // If we're already in whitespace and not preformatted, just skip it:
1631       if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1632           !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1633         // Skip whitespace
1634         bol++;
1635         continue;
1636       }
1637
1638       if (nextpos == bol) {
1639         // Note that we are in whitespace.
1640         mInWhitespace = true;
1641         offsetIntoBuffer = str.get() + nextpos;
1642         AddToLine(offsetIntoBuffer, 1);
1643         bol++;
1644         continue;
1645       }
1646
1647       mInWhitespace = true;
1648
1649       offsetIntoBuffer = str.get() + bol;
1650       if (mPreFormattedMail ||
1651           (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1652         // Preserve the real whitespace character
1653         nextpos++;
1654         AddToLine(offsetIntoBuffer, nextpos - bol);
1655         bol = nextpos;
1656       } else {
1657         // Replace the whitespace with a space
1658         AddToLine(offsetIntoBuffer, nextpos - bol);
1659         AddToLine(kSpace.get(), 1);
1660         bol = nextpos + 1;  // Let's eat the whitespace
1661       }
1662     }
1663   }  // Continue looping over the string
1664 }
1665
1666 /**
1667  * Gets the value of an attribute in a string. If the function returns
1668  * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1669  */
1670 nsresult nsPlainTextSerializer::GetAttributeValue(nsAtom* aName,
1671                                                   nsString& aValueRet) {
1672   if (mElement) {
1673     if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1674       return NS_OK;
1675     }
1676   }
1677
1678   return NS_ERROR_NOT_AVAILABLE;
1679 }
1680
1681 /**
1682  * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1683  * In this case, we should ignore it.
1684  */
1685 bool nsPlainTextSerializer::IsCurrentNodeConverted() {
1686   nsAutoString value;
1687   nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1688   return (NS_SUCCEEDED(rv) && (value.EqualsIgnoreCase("moz-txt", 7) ||
1689                                value.EqualsIgnoreCase("\"moz-txt", 8)));
1690 }
1691
1692 // static
1693 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
1694   if (!aContent->IsHTMLElement()) {
1695     return nullptr;
1696   }
1697
1698   nsAtom* localName = aContent->NodeInfo()->NameAtom();
1699   return localName->IsStatic() ? localName : nullptr;
1700 }
1701
1702 bool nsPlainTextSerializer::IsInPre() {
1703   return !mPreformatStack.empty() && mPreformatStack.top();
1704 }
1705
1706 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
1707   RefPtr<ComputedStyle> computedStyle =
1708       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1709   if (computedStyle) {
1710     const nsStyleText* textStyle = computedStyle->StyleText();
1711     return textStyle->WhiteSpaceOrNewlineIsSignificant();
1712   }
1713   // Fall back to looking at the tag, in case there is no style information.
1714   return GetIdForContent(aElement) == nsGkAtoms::pre;
1715 }
1716
1717 bool nsPlainTextSerializer::IsElementBlock(Element* aElement) {
1718   RefPtr<ComputedStyle> computedStyle =
1719       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1720   if (computedStyle) {
1721     const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1722     return displayStyle->IsBlockOutsideStyle();
1723   }
1724   // Fall back to looking at the tag, in case there is no style information.
1725   return nsContentUtils::IsHTMLBlock(aElement);
1726 }
1727
1728 /**
1729  * This method is required only to identify LI's inside OL.
1730  * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1731  */
1732 bool nsPlainTextSerializer::IsInOL() {
1733   int32_t i = mTagStackIndex;
1734   while (--i >= 0) {
1735     if (mTagStack[i] == nsGkAtoms::ol) return true;
1736     if (mTagStack[i] == nsGkAtoms::ul) {
1737       // If a UL is reached first, LI belongs the UL nested in OL.
1738       return false;
1739     }
1740   }
1741   // We may reach here for orphan LI's.
1742   return false;
1743 }
1744
1745 /*
1746   @return 0 = no header, 1 = h1, ..., 6 = h6
1747 */
1748 int32_t HeaderLevel(nsAtom* aTag) {
1749   if (aTag == nsGkAtoms::h1) {
1750     return 1;
1751   }
1752   if (aTag == nsGkAtoms::h2) {
1753     return 2;
1754   }
1755   if (aTag == nsGkAtoms::h3) {
1756     return 3;
1757   }
1758   if (aTag == nsGkAtoms::h4) {
1759     return 4;
1760   }
1761   if (aTag == nsGkAtoms::h5) {
1762     return 5;
1763   }
1764   if (aTag == nsGkAtoms::h6) {
1765     return 6;
1766   }
1767   return 0;
1768 }
1769
1770 /*
1771  * This is an implementation of GetUnicharWidth() and
1772  * GetUnicharStringWidth() as defined in
1773  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1774  * <http://www.UNIX-systems.org/online.html>
1775  *
1776  * Markus Kuhn -- 2000-02-08 -- public domain
1777  *
1778  * Minor alterations to fit Mozilla's data types by Daniel Bratell
1779  */
1780
1781 /* These functions define the column width of an ISO 10646 character
1782  * as follows:
1783  *
1784  *    - The null character (U+0000) has a column width of 0.
1785  *
1786  *    - Other C0/C1 control characters and DEL will lead to a return
1787  *      value of -1.
1788  *
1789  *    - Non-spacing and enclosing combining characters (general
1790  *      category code Mn or Me in the Unicode database) have a
1791  *      column width of 0.
1792  *
1793  *    - Spacing characters in the East Asian Wide (W) or East Asian
1794  *      FullWidth (F) category as defined in Unicode Technical
1795  *      Report #11 have a column width of 2.
1796  *
1797  *    - All remaining characters (including all printable
1798  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
1799  *      etc.) have a column width of 1.
1800  *
1801  * This implementation assumes that wchar_t characters are encoded
1802  * in ISO 10646.
1803  */
1804
1805 namespace {
1806
1807 struct interval {
1808   uint16_t first;
1809   uint16_t last;
1810 };
1811
1812 struct CombiningComparator {
1813   const char16_t mUcs;
1814   explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
1815   int operator()(const interval& combining) const {
1816     if (mUcs > combining.last) return 1;
1817     if (mUcs < combining.first) return -1;
1818
1819     MOZ_ASSERT(combining.first <= mUcs);
1820     MOZ_ASSERT(mUcs <= combining.last);
1821     return 0;
1822   }
1823 };
1824
1825 }  // namespace
1826
1827 int32_t GetUnicharWidth(char16_t ucs) {
1828   /* sorted list of non-overlapping intervals of non-spacing characters */
1829   static const interval combining[] = {
1830       {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486}, {0x0488, 0x0489},
1831       {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x05BF},
1832       {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
1833       {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
1834       {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
1835       {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963},
1836       {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
1837       {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
1838       {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82},
1839       {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD},
1840       {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
1841       {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
1842       {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
1843       {0x0C55, 0x0C56}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
1844       {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4},
1845       {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
1846       {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
1847       {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
1848       {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97},
1849       {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
1850       {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, {0x17B7, 0x17BD},
1851       {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x18A9, 0x18A9}, {0x20D0, 0x20E3},
1852       {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}};
1853
1854   /* test for 8-bit control characters */
1855   if (ucs == 0) return 0;
1856   if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return -1;
1857
1858   /* first quick check for Latin-1 etc. characters */
1859   if (ucs < combining[0].first) return 1;
1860
1861   /* binary search in table of non-spacing characters */
1862   size_t idx;
1863   if (BinarySearchIf(combining, 0, ArrayLength(combining),
1864                      CombiningComparator(ucs), &idx)) {
1865     return 0;
1866   }
1867
1868   /* if we arrive here, ucs is not a combining or C0/C1 control character */
1869
1870   /* fast test for majority of non-wide scripts */
1871   if (ucs < 0x1100) return 1;
1872
1873   return 1 +
1874          ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1875           (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1876            ucs != 0x303f) ||                  /* CJK ... Yi */
1877           (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1878           (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1879           (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1880           (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1881           (ucs >= 0xffe0 && ucs <= 0xffe6));
1882 }
1883
1884 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) {
1885   int32_t w, width = 0;
1886
1887   for (; *pwcs && n-- > 0; pwcs++)
1888     if ((w = GetUnicharWidth(*pwcs)) < 0)
1889       ++width;  // Taking 1 as the width of non-printable character, for bug#
1890                 // 94475.
1891     else
1892       width += w;
1893
1894   return width;
1895 }