Bug 1550519 - Show a translucent parent highlight when a subgrid is highlighted....
[gecko.git] / dom / base / nsPlainTextSerializer.cpp
blob6bb8dc27cdf3852521ac3864d288e3512786f55a
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * nsIContentSerializer implementation that can be used with an
9 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10 * (eg for copy/paste as plaintext).
13 #include "nsPlainTextSerializer.h"
14 #include "nsIServiceManager.h"
15 #include "nsGkAtoms.h"
16 #include "nsNameSpaceManager.h"
17 #include "nsTextFragment.h"
18 #include "nsContentUtils.h"
19 #include "nsReadableUtils.h"
20 #include "nsUnicharUtils.h"
21 #include "nsCRT.h"
22 #include "mozilla/dom/Element.h"
23 #include "mozilla/Preferences.h"
24 #include "mozilla/BinarySearch.h"
25 #include "nsComputedDOMStyle.h"
27 namespace mozilla {
28 class Encoding;
31 using namespace mozilla;
32 using namespace mozilla::dom;
34 #define PREF_STRUCTS "converter.html2txt.structs"
35 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
36 #define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby"
38 static const int32_t kTabSize = 4;
39 static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if
40 mHeaderStrategy = 1 or = 2.
41 Indention of other headers
42 is derived from that.
43 XXX center h1? */
44 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
45 indent h(x+1) this many
46 columns more than h(x) */
47 static const int32_t kIndentSizeList = kTabSize;
48 // Indention of non-first lines of ul and ol
49 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
50 static const char16_t kNBSP = 160;
51 static const char16_t kSPACE = ' ';
53 static int32_t HeaderLevel(nsAtom* aTag);
54 static int32_t GetUnicharWidth(char16_t ucs);
55 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
57 // Someday may want to make this non-const:
58 static const uint32_t TagStackSize = 500;
59 static const uint32_t OLStackSize = 100;
61 static bool gPreferenceInitialized = false;
62 static bool gAlwaysIncludeRuby = false;
64 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
65 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
67 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
68 NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
69 NS_INTERFACE_MAP_ENTRY(nsISupports)
70 NS_INTERFACE_MAP_END
72 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
74 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
75 RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
76 it.forget(aSerializer);
77 return NS_OK;
80 nsPlainTextSerializer::nsPlainTextSerializer()
81 : mFlags(0),
82 mFloatingLines(-1),
83 mLineBreakDue(false),
84 kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
86 mOutputString = nullptr;
87 mHeadLevel = 0;
88 mAtFirstColumn = true;
89 mIndent = 0;
90 mCiteQuoteLevel = 0;
91 mStructs = true; // will be read from prefs later
92 mHeaderStrategy = 1 /*indent increasingly*/; // ditto
93 mHasWrittenCiteBlockquote = false;
94 mSpanLevel = 0;
95 for (int32_t i = 0; i <= 6; i++) {
96 mHeaderCounter[i] = 0;
99 // Line breaker
100 mWrapColumn = 72; // XXX magic number, we expect someone to reset this
101 mCurrentLineWidth = 0;
103 // Flow
104 mEmptyLines = 1; // The start of the document is an "empty line" in itself,
105 mInWhitespace = false;
106 mPreFormattedMail = false;
107 mStartedOutput = false;
109 mPreformattedBlockBoundary = false;
110 mWithRubyAnnotation = false; // will be read from pref and flag later
112 // initialize the tag stack to zero:
113 // The stack only ever contains pointers to static atoms, so they don't
114 // need refcounting.
115 mTagStack = new nsAtom*[TagStackSize];
116 mTagStackIndex = 0;
117 mIgnoreAboveIndex = (uint32_t)kNotFound;
119 // initialize the OL stack, where numbers for ordered lists are kept
120 mOLStack = new int32_t[OLStackSize];
121 mOLStackIndex = 0;
123 mULCount = 0;
125 mIgnoredChildNodeLevel = 0;
127 if (!gPreferenceInitialized) {
128 Preferences::AddBoolVarCache(&gAlwaysIncludeRuby, PREF_ALWAYS_INCLUDE_RUBY,
129 true);
130 gPreferenceInitialized = true;
134 nsPlainTextSerializer::~nsPlainTextSerializer() {
135 delete[] mTagStack;
136 delete[] mOLStack;
137 NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
140 NS_IMETHODIMP
141 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
142 const Encoding* aEncoding, bool aIsCopying,
143 bool aIsWholeDocument,
144 bool* aNeedsPreformatScanning) {
145 #ifdef DEBUG
146 // Check if the major control flags are set correctly.
147 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
148 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
149 "If you want format=flowed, you must combine it with "
150 "nsIDocumentEncoder::OutputFormatted");
153 if (aFlags & nsIDocumentEncoder::OutputFormatted) {
154 NS_ASSERTION(
155 !(aFlags & nsIDocumentEncoder::OutputPreformatted),
156 "Can't do formatted and preformatted output at the same time!");
158 #endif
160 *aNeedsPreformatScanning = true;
161 mFlags = aFlags;
162 mWrapColumn = aWrapColumn;
164 // Only create a linebreaker if we will handle wrapping.
165 if (MayWrap() && MayBreakLines()) {
166 mLineBreaker = nsContentUtils::LineBreaker();
169 // Set the line break character:
170 if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
171 (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
172 // Windows
173 mLineBreak.AssignLiteral("\r\n");
174 } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
175 // Mac
176 mLineBreak.Assign(char16_t('\r'));
177 } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
178 // Unix/DOM
179 mLineBreak.Assign(char16_t('\n'));
180 } else {
181 // Platform/default
182 mLineBreak.AssignLiteral(NS_LINEBREAK);
185 mLineBreakDue = false;
186 mFloatingLines = -1;
188 mPreformattedBlockBoundary = false;
190 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
191 // Get some prefs that controls how we do formatted output
192 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
194 mHeaderStrategy =
195 Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
198 // The pref is default inited to false in libpref, but we use true
199 // as fallback value because we don't want to affect behavior in
200 // other places which use this serializer currently.
201 mWithRubyAnnotation =
202 gAlwaysIncludeRuby || (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
204 // XXX We should let the caller decide whether to do this or not
205 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
207 return NS_OK;
210 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
211 uint32_t size = aStack.Length();
212 if (size == 0) {
213 return false;
215 return aStack.ElementAt(size - 1);
218 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
219 uint32_t size = aStack.Length();
220 if (size > 0) {
221 aStack.ElementAt(size - 1) = aValue;
222 } else {
223 NS_ERROR("There is no \"Last\" value");
227 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
228 aStack.AppendElement(bool(aValue));
231 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
232 bool returnValue = false;
233 uint32_t size = aStack.Length();
234 if (size > 0) {
235 returnValue = aStack.ElementAt(size - 1);
236 aStack.RemoveElementAt(size - 1);
238 return returnValue;
241 bool nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(
242 nsAtom* aTag) {
243 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
244 // non-textual container element should be serialized as placeholder
245 // character and its child nodes should be ignored. See bug 895239.
246 if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
247 return false;
250 return (aTag == nsGkAtoms::audio) || (aTag == nsGkAtoms::canvas) ||
251 (aTag == nsGkAtoms::iframe) || (aTag == nsGkAtoms::meter) ||
252 (aTag == nsGkAtoms::progress) || (aTag == nsGkAtoms::object) ||
253 (aTag == nsGkAtoms::svg) || (aTag == nsGkAtoms::video);
256 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsAtom* aTag) {
257 if (mWithRubyAnnotation) {
258 return false;
261 return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
262 aTag == nsGkAtoms::rtc;
265 // Return true if aElement has 'display:none' or if we just don't know.
266 static bool IsDisplayNone(Element* aElement) {
267 RefPtr<ComputedStyle> computedStyle =
268 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
269 return !computedStyle ||
270 computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
273 static bool IsIgnorableScriptOrStyle(Element* aElement) {
274 return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
275 IsDisplayNone(aElement);
278 NS_IMETHODIMP
279 nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
280 int32_t aEndOffset, nsAString& aStr) {
281 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
282 return NS_OK;
285 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
286 if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
288 NS_ENSURE_ARG(aText);
290 nsresult rv = NS_OK;
292 nsIContent* content = aText;
293 const nsTextFragment* frag;
294 if (!content || !(frag = content->GetText())) {
295 return NS_ERROR_FAILURE;
298 int32_t fragLength = frag->GetLength();
299 int32_t endoffset =
300 (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
301 NS_ASSERTION(aStartOffset <= endoffset,
302 "A start offset is beyond the end of the text fragment!");
304 int32_t length = endoffset - aStartOffset;
305 if (length <= 0) {
306 return NS_OK;
309 nsAutoString textstr;
310 if (frag->Is2b()) {
311 textstr.Assign(frag->Get2b() + aStartOffset, length);
312 } else {
313 // AssignASCII is for 7-bit character only, so don't use it
314 const char* data = frag->Get1b();
315 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
318 mOutputString = &aStr;
320 // We have to split the string across newlines
321 // to match parser behavior
322 int32_t start = 0;
323 int32_t offset = textstr.FindCharInSet("\n\r");
324 while (offset != kNotFound) {
325 if (offset > start) {
326 // Pass in the line
327 DoAddText(false, Substring(textstr, start, offset - start));
330 // Pass in a newline
331 DoAddText(true, mLineBreak);
333 start = offset + 1;
334 offset = textstr.FindCharInSet("\n\r", start);
337 // Consume the last bit of the string if there's any left
338 if (start < length) {
339 if (start) {
340 DoAddText(false, Substring(textstr, start, length - start));
341 } else {
342 DoAddText(false, textstr);
346 mOutputString = nullptr;
348 return rv;
351 NS_IMETHODIMP
352 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
353 int32_t aStartOffset,
354 int32_t aEndOffset, nsAString& aStr) {
355 return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
358 NS_IMETHODIMP
359 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
360 mPreformatStack.push(IsElementPreformatted(aElement));
361 return NS_OK;
364 NS_IMETHODIMP
365 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
366 MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
367 "Tried to pop without previous push.");
368 mPreformatStack.pop();
369 return NS_OK;
372 NS_IMETHODIMP
373 nsPlainTextSerializer::AppendElementStart(Element* aElement,
374 Element* aOriginalElement,
375 nsAString& aStr) {
376 NS_ENSURE_ARG(aElement);
378 mElement = aElement;
380 nsresult rv;
381 nsAtom* id = GetIdForContent(mElement);
383 bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
385 mOutputString = &aStr;
387 if (isContainer) {
388 rv = DoOpenContainer(id);
389 } else {
390 rv = DoAddLeaf(id);
393 mElement = nullptr;
394 mOutputString = nullptr;
396 if (id == nsGkAtoms::head) {
397 ++mHeadLevel;
400 return rv;
403 NS_IMETHODIMP
404 nsPlainTextSerializer::AppendElementEnd(Element* aElement, nsAString& aStr) {
405 NS_ENSURE_ARG(aElement);
407 mElement = aElement;
409 nsresult rv;
410 nsAtom* id = GetIdForContent(mElement);
412 bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
414 mOutputString = &aStr;
416 rv = NS_OK;
417 if (isContainer) {
418 rv = DoCloseContainer(id);
421 mElement = nullptr;
422 mOutputString = nullptr;
424 if (id == nsGkAtoms::head) {
425 NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
426 --mHeadLevel;
429 return rv;
432 NS_IMETHODIMP
433 nsPlainTextSerializer::Flush(nsAString& aStr) {
434 mOutputString = &aStr;
435 FlushLine();
436 mOutputString = nullptr;
437 return NS_OK;
440 NS_IMETHODIMP
441 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument,
442 nsAString& aStr) {
443 return NS_OK;
446 nsresult nsPlainTextSerializer::DoOpenContainer(nsAtom* aTag) {
447 // Check if we need output current node as placeholder character and ignore
448 // child nodes.
449 if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
450 if (mIgnoredChildNodeLevel == 0) {
451 // Serialize current node as placeholder character
452 Write(NS_LITERAL_STRING(u"\xFFFC"));
454 // Ignore child nodes.
455 mIgnoredChildNodeLevel++;
456 return NS_OK;
458 if (IsIgnorableRubyAnnotation(aTag)) {
459 // Ignorable ruby annotation shouldn't be replaced by a placeholder
460 // character, neither any of its descendants.
461 mIgnoredChildNodeLevel++;
462 return NS_OK;
464 if (IsIgnorableScriptOrStyle(mElement)) {
465 mIgnoredChildNodeLevel++;
466 return NS_OK;
469 if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
470 if (mPreformattedBlockBoundary && DoOutput()) {
471 // Should always end a line, but get no more whitespace
472 if (mFloatingLines < 0) mFloatingLines = 0;
473 mLineBreakDue = true;
475 mPreformattedBlockBoundary = false;
478 if (mFlags & nsIDocumentEncoder::OutputRaw) {
479 // Raw means raw. Don't even think about doing anything fancy
480 // here like indenting, adding line breaks or any other
481 // characters such as list item bullets, quote characters
482 // around <q>, etc. I mean it! Don't make me smack you!
484 return NS_OK;
487 if (mTagStackIndex < TagStackSize) {
488 mTagStack[mTagStackIndex++] = aTag;
491 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
492 return NS_OK;
495 // Reset this so that <blockquote type=cite> doesn't affect the whitespace
496 // above random <pre>s below it.
497 mHasWrittenCiteBlockquote =
498 mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
500 bool isInCiteBlockquote = false;
502 // XXX special-case <blockquote type=cite> so that we don't add additional
503 // newlines before the text.
504 if (aTag == nsGkAtoms::blockquote) {
505 nsAutoString value;
506 nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
507 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
510 if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
512 // Check if this tag's content that should not be output
513 if ((aTag == nsGkAtoms::noscript &&
514 !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
515 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
516 !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
517 // Ignore everything that follows the current tag in
518 // question until a matching end tag is encountered.
519 mIgnoreAboveIndex = mTagStackIndex - 1;
520 return NS_OK;
523 if (aTag == nsGkAtoms::body) {
524 // Try to figure out here whether we have a
525 // preformatted style attribute set by Thunderbird.
527 // Trigger on the presence of a "pre-wrap" in the
528 // style attribute. That's a very simplistic way to do
529 // it, but better than nothing.
530 // Also set mWrapColumn to the value given there
531 // (which arguably we should only do if told to do so).
532 nsAutoString style;
533 int32_t whitespace;
534 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
535 (kNotFound != (whitespace = style.Find("white-space:")))) {
536 if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
537 #ifdef DEBUG_preformatted
538 printf("Set mPreFormattedMail based on style pre-wrap\n");
539 #endif
540 mPreFormattedMail = true;
541 int32_t widthOffset = style.Find("width:");
542 if (widthOffset >= 0) {
543 // We have to search for the ch before the semicolon,
544 // not for the semicolon itself, because nsString::ToInteger()
545 // considers 'c' to be a valid numeric char (even if radix=10)
546 // but then gets confused if it sees it next to the number
547 // when the radix specified was 10, and returns an error code.
548 int32_t semiOffset = style.Find("ch", false, widthOffset + 6);
549 int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
550 : style.Length() - widthOffset);
551 nsAutoString widthstr;
552 style.Mid(widthstr, widthOffset + 6, length);
553 nsresult err;
554 int32_t col = widthstr.ToInteger(&err);
556 if (NS_SUCCEEDED(err)) {
557 mWrapColumn = (uint32_t)col;
558 #ifdef DEBUG_preformatted
559 printf("Set wrap column to %d based on style\n", mWrapColumn);
560 #endif
563 } else if (kNotFound != style.Find("pre", true, whitespace)) {
564 #ifdef DEBUG_preformatted
565 printf("Set mPreFormattedMail based on style pre\n");
566 #endif
567 mPreFormattedMail = true;
568 mWrapColumn = 0;
570 } else {
571 /* See comment at end of function. */
572 mInWhitespace = true;
573 mPreFormattedMail = false;
576 return NS_OK;
579 // Keep this in sync with DoCloseContainer!
580 if (!DoOutput()) {
581 return NS_OK;
584 if (aTag == nsGkAtoms::p)
585 EnsureVerticalSpace(1);
586 else if (aTag == nsGkAtoms::pre) {
587 if (GetLastBool(mIsInCiteBlockquote))
588 EnsureVerticalSpace(0);
589 else if (mHasWrittenCiteBlockquote) {
590 EnsureVerticalSpace(0);
591 mHasWrittenCiteBlockquote = false;
592 } else
593 EnsureVerticalSpace(1);
594 } else if (aTag == nsGkAtoms::tr) {
595 PushBool(mHasWrittenCellsForRow, false);
596 } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
597 // We must make sure that the content of two table cells get a
598 // space between them.
600 // To make the separation between cells most obvious and
601 // importable, we use a TAB.
602 if (GetLastBool(mHasWrittenCellsForRow)) {
603 // Bypass |Write| so that the TAB isn't compressed away.
604 AddToLine(u"\t", 1);
605 mInWhitespace = true;
606 } else if (mHasWrittenCellsForRow.IsEmpty()) {
607 // We don't always see a <tr> (nor a <table>) before the <td> if we're
608 // copying part of a table
609 PushBool(mHasWrittenCellsForRow, true); // will never be popped
610 } else {
611 SetLastBool(mHasWrittenCellsForRow, true);
613 } else if (aTag == nsGkAtoms::ul) {
614 // Indent here to support nested lists, which aren't included in li :-(
615 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
616 // Must end the current line before we change indention
617 mIndent += kIndentSizeList;
618 mULCount++;
619 } else if (aTag == nsGkAtoms::ol) {
620 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
621 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
622 // Must end the current line before we change indention
623 if (mOLStackIndex < OLStackSize) {
624 nsAutoString startAttr;
625 int32_t startVal = 1;
626 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
627 nsresult rv = NS_OK;
628 startVal = startAttr.ToInteger(&rv);
629 if (NS_FAILED(rv)) startVal = 1;
631 mOLStack[mOLStackIndex++] = startVal;
633 } else {
634 mOLStackIndex++;
636 mIndent += kIndentSizeList; // see ul
637 } else if (aTag == nsGkAtoms::li &&
638 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
639 if (mTagStackIndex > 1 && IsInOL()) {
640 if (mOLStackIndex > 0) {
641 nsAutoString valueAttr;
642 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
643 nsresult rv = NS_OK;
644 int32_t valueAttrVal = valueAttr.ToInteger(&rv);
645 if (NS_SUCCEEDED(rv)) mOLStack[mOLStackIndex - 1] = valueAttrVal;
647 // This is what nsBulletFrame does for OLs:
648 mInIndentString.AppendInt(mOLStack[mOLStackIndex - 1]++, 10);
649 } else {
650 mInIndentString.Append(char16_t('#'));
653 mInIndentString.Append(char16_t('.'));
655 } else {
656 static const char bulletCharArray[] = "*o+#";
657 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
658 char bulletChar = bulletCharArray[index % 4];
659 mInIndentString.Append(char16_t(bulletChar));
662 mInIndentString.Append(char16_t(' '));
663 } else if (aTag == nsGkAtoms::dl) {
664 EnsureVerticalSpace(1);
665 } else if (aTag == nsGkAtoms::dt) {
666 EnsureVerticalSpace(0);
667 } else if (aTag == nsGkAtoms::dd) {
668 EnsureVerticalSpace(0);
669 mIndent += kIndentSizeDD;
670 } else if (aTag == nsGkAtoms::span) {
671 ++mSpanLevel;
672 } else if (aTag == nsGkAtoms::blockquote) {
673 // Push
674 PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
675 if (isInCiteBlockquote) {
676 EnsureVerticalSpace(0);
677 mCiteQuoteLevel++;
678 } else {
679 EnsureVerticalSpace(1);
680 mIndent += kTabSize; // Check for some maximum value?
682 } else if (aTag == nsGkAtoms::q) {
683 Write(NS_LITERAL_STRING("\""));
686 // Else make sure we'll separate block level tags,
687 // even if we're about to leave, before doing any other formatting.
688 else if (IsElementBlock(mElement)) {
689 EnsureVerticalSpace(0);
692 //////////////////////////////////////////////////////////////
693 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
694 return NS_OK;
696 //////////////////////////////////////////////////////////////
697 // The rest of this routine is formatted output stuff,
698 // which we should skip if we're not formatted:
699 //////////////////////////////////////////////////////////////
701 // Push on stack
702 bool currentNodeIsConverted = IsCurrentNodeConverted();
704 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
705 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
706 EnsureVerticalSpace(2);
707 if (mHeaderStrategy == 2) { // numbered
708 mIndent += kIndentSizeHeaders;
709 // Caching
710 int32_t level = HeaderLevel(aTag);
711 // Increase counter for current level
712 mHeaderCounter[level]++;
713 // Reset all lower levels
714 int32_t i;
716 for (i = level + 1; i <= 6; i++) {
717 mHeaderCounter[i] = 0;
720 // Construct numbers
721 nsAutoString leadup;
722 for (i = 1; i <= level; i++) {
723 leadup.AppendInt(mHeaderCounter[i]);
724 leadup.Append(char16_t('.'));
726 leadup.Append(char16_t(' '));
727 Write(leadup);
728 } else if (mHeaderStrategy == 1) { // indent increasingly
729 mIndent += kIndentSizeHeaders;
730 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
731 // for h(x), run x-1 times
732 mIndent += kIndentIncrementHeaders;
735 } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
736 nsAutoString url;
737 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
738 !url.IsEmpty()) {
739 mURL = url;
741 } else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
742 Write(NS_LITERAL_STRING("^"));
743 } else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
744 Write(NS_LITERAL_STRING("_"));
745 } else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
746 Write(NS_LITERAL_STRING("|"));
747 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && mStructs &&
748 !currentNodeIsConverted) {
749 Write(NS_LITERAL_STRING("*"));
750 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && mStructs &&
751 !currentNodeIsConverted) {
752 Write(NS_LITERAL_STRING("/"));
753 } else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
754 Write(NS_LITERAL_STRING("_"));
757 /* Container elements are always block elements, so we shouldn't
758 output any whitespace immediately after the container tag even if
759 there's extra whitespace there because the HTML is pretty-printed
760 or something. To ensure that happens, tell the serializer we're
761 already in whitespace so it won't output more. */
762 mInWhitespace = true;
764 return NS_OK;
767 nsresult nsPlainTextSerializer::DoCloseContainer(nsAtom* aTag) {
768 if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
769 mIgnoredChildNodeLevel--;
770 return NS_OK;
772 if (IsIgnorableRubyAnnotation(aTag)) {
773 mIgnoredChildNodeLevel--;
774 return NS_OK;
776 if (IsIgnorableScriptOrStyle(mElement)) {
777 mIgnoredChildNodeLevel--;
778 return NS_OK;
781 if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
782 if (DoOutput() && IsInPre() && IsElementBlock(mElement)) {
783 // If we're closing a preformatted block element, output a line break
784 // when we find a new container.
785 mPreformattedBlockBoundary = true;
789 if (mFlags & nsIDocumentEncoder::OutputRaw) {
790 // Raw means raw. Don't even think about doing anything fancy
791 // here like indenting, adding line breaks or any other
792 // characters such as list item bullets, quote characters
793 // around <q>, etc. I mean it! Don't make me smack you!
795 return NS_OK;
798 if (mTagStackIndex > 0) {
799 --mTagStackIndex;
802 if (mTagStackIndex >= mIgnoreAboveIndex) {
803 if (mTagStackIndex == mIgnoreAboveIndex) {
804 // We're dealing with the close tag whose matching
805 // open tag had set the mIgnoreAboveIndex value.
806 // Reset mIgnoreAboveIndex before discarding this tag.
807 mIgnoreAboveIndex = (uint32_t)kNotFound;
809 return NS_OK;
812 // End current line if we're ending a block level tag
813 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
814 // We want the output to end with a new line,
815 // but in preformatted areas like text fields,
816 // we can't emit newlines that weren't there.
817 // So add the newline only in the case of formatted output.
818 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
819 EnsureVerticalSpace(0);
820 } else {
821 FlushLine();
823 // We won't want to do anything with these in formatted mode either,
824 // so just return now:
825 return NS_OK;
828 // Keep this in sync with DoOpenContainer!
829 if (!DoOutput()) {
830 return NS_OK;
833 if (aTag == nsGkAtoms::tr) {
834 PopBool(mHasWrittenCellsForRow);
835 // Should always end a line, but get no more whitespace
836 if (mFloatingLines < 0) mFloatingLines = 0;
837 mLineBreakDue = true;
838 } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
839 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
840 // Items that should always end a line, but get no more whitespace
841 if (mFloatingLines < 0) mFloatingLines = 0;
842 mLineBreakDue = true;
843 } else if (aTag == nsGkAtoms::pre) {
844 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
845 mLineBreakDue = true;
846 } else if (aTag == nsGkAtoms::ul) {
847 FlushLine();
848 mIndent -= kIndentSizeList;
849 if (--mULCount + mOLStackIndex == 0) {
850 mFloatingLines = 1;
851 mLineBreakDue = true;
853 } else if (aTag == nsGkAtoms::ol) {
854 FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
855 mIndent -= kIndentSizeList;
856 NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
857 mOLStackIndex--;
858 if (mULCount + mOLStackIndex == 0) {
859 mFloatingLines = 1;
860 mLineBreakDue = true;
862 } else if (aTag == nsGkAtoms::dl) {
863 mFloatingLines = 1;
864 mLineBreakDue = true;
865 } else if (aTag == nsGkAtoms::dd) {
866 FlushLine();
867 mIndent -= kIndentSizeDD;
868 } else if (aTag == nsGkAtoms::span) {
869 NS_ASSERTION(mSpanLevel, "Span level will be negative!");
870 --mSpanLevel;
871 } else if (aTag == nsGkAtoms::div) {
872 if (mFloatingLines < 0) mFloatingLines = 0;
873 mLineBreakDue = true;
874 } else if (aTag == nsGkAtoms::blockquote) {
875 FlushLine(); // Is this needed?
877 // Pop
878 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
880 if (isInCiteBlockquote) {
881 NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
882 mCiteQuoteLevel--;
883 mFloatingLines = 0;
884 mHasWrittenCiteBlockquote = true;
885 } else {
886 mIndent -= kTabSize;
887 mFloatingLines = 1;
889 mLineBreakDue = true;
890 } else if (aTag == nsGkAtoms::q) {
891 Write(NS_LITERAL_STRING("\""));
892 } else if (IsElementBlock(mElement)) {
893 // All other blocks get 1 vertical space after them
894 // in formatted mode, otherwise 0.
895 // This is hard. Sometimes 0 is a better number, but
896 // how to know?
897 if (mFlags & nsIDocumentEncoder::OutputFormatted)
898 EnsureVerticalSpace(1);
899 else {
900 if (mFloatingLines < 0) mFloatingLines = 0;
901 mLineBreakDue = true;
905 //////////////////////////////////////////////////////////////
906 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
907 return NS_OK;
909 //////////////////////////////////////////////////////////////
910 // The rest of this routine is formatted output stuff,
911 // which we should skip if we're not formatted:
912 //////////////////////////////////////////////////////////////
914 // Pop the currentConverted stack
915 bool currentNodeIsConverted = IsCurrentNodeConverted();
917 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
918 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
919 if (mHeaderStrategy) { /*numbered or indent increasingly*/
920 mIndent -= kIndentSizeHeaders;
922 if (mHeaderStrategy == 1 /*indent increasingly*/) {
923 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
924 // for h(x), run x-1 times
925 mIndent -= kIndentIncrementHeaders;
928 EnsureVerticalSpace(1);
929 } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted &&
930 !mURL.IsEmpty()) {
931 nsAutoString temp;
932 temp.AssignLiteral(" <");
933 temp += mURL;
934 temp.Append(char16_t('>'));
935 Write(temp);
936 mURL.Truncate();
937 } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) && mStructs &&
938 !currentNodeIsConverted) {
939 Write(kSpace);
940 } else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
941 Write(NS_LITERAL_STRING("|"));
942 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && mStructs &&
943 !currentNodeIsConverted) {
944 Write(NS_LITERAL_STRING("*"));
945 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && mStructs &&
946 !currentNodeIsConverted) {
947 Write(NS_LITERAL_STRING("/"));
948 } else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
949 Write(NS_LITERAL_STRING("_"));
952 return NS_OK;
955 bool nsPlainTextSerializer::MustSuppressLeaf() {
956 if (mIgnoredChildNodeLevel > 0) {
957 return true;
960 if ((mTagStackIndex > 1 &&
961 mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
962 (mTagStackIndex > 0 &&
963 mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
964 // Don't output the contents of SELECT elements;
965 // Might be nice, eventually, to output just the selected element.
966 // Read more in bug 31994.
967 return true;
970 return false;
973 void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
974 const nsAString& aText) {
975 // If we don't want any output, just return
976 if (!DoOutput()) {
977 return;
980 if (!aIsLineBreak) {
981 // Make sure to reset this, since it's no longer true.
982 mHasWrittenCiteBlockquote = false;
985 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
987 if (MustSuppressLeaf()) {
988 return;
991 if (aIsLineBreak) {
992 // The only times we want to pass along whitespace from the original
993 // html source are if we're forced into preformatted mode via flags,
994 // or if we're prettyprinting and we're inside a <pre>.
995 // Otherwise, either we're collapsing to minimal text, or we're
996 // prettyprinting to mimic the html format, and in neither case
997 // does the formatting of the html source help us.
998 if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
999 (mPreFormattedMail && !mWrapColumn) || IsInPre()) {
1000 EnsureVerticalSpace(mEmptyLines + 1);
1001 } else if (!mInWhitespace) {
1002 Write(kSpace);
1003 mInWhitespace = true;
1005 return;
1008 /* Check, if we are in a link (symbolized with mURL containing the URL)
1009 and the text is equal to the URL. In that case we don't want to output
1010 the URL twice so we scrap the text in mURL. */
1011 if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1012 mURL.Truncate();
1014 Write(aText);
1017 nsresult nsPlainTextSerializer::DoAddLeaf(nsAtom* aTag) {
1018 mPreformattedBlockBoundary = false;
1020 // If we don't want any output, just return
1021 if (!DoOutput()) {
1022 return NS_OK;
1025 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1027 if (MustSuppressLeaf()) {
1028 return NS_OK;
1031 if (aTag == nsGkAtoms::br) {
1032 // Another egregious editor workaround, see bug 38194:
1033 // ignore the bogus br tags that the editor sticks here and there.
1034 nsAutoString tagAttr;
1035 if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) ||
1036 !tagAttr.EqualsLiteral("_moz")) {
1037 EnsureVerticalSpace(mEmptyLines + 1);
1039 } else if (aTag == nsGkAtoms::hr &&
1040 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1041 EnsureVerticalSpace(0);
1043 // Make a line of dashes as wide as the wrap width
1044 // XXX honoring percentage would be nice
1045 nsAutoString line;
1046 uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
1047 while (line.Length() < width) {
1048 line.Append(char16_t('-'));
1050 Write(line);
1052 EnsureVerticalSpace(0);
1053 } else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
1054 Write(NS_LITERAL_STRING(u"\xFFFC"));
1055 } else if (aTag == nsGkAtoms::img) {
1056 /* Output (in decreasing order of preference)
1057 alt, title or nothing */
1058 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1059 nsAutoString imageDescription;
1060 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
1061 // If the alt attribute has an empty value (|alt=""|), output nothing
1062 } else if (NS_SUCCEEDED(
1063 GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
1064 !imageDescription.IsEmpty()) {
1065 imageDescription =
1066 NS_LITERAL_STRING(" [") + imageDescription + NS_LITERAL_STRING("] ");
1069 Write(imageDescription);
1072 return NS_OK;
1076 * Adds as many newline as necessary to get |noOfRows| empty lines
1078 * noOfRows = -1 : Being in the middle of some line of text
1079 * noOfRows = 0 : Being at the start of a line
1080 * noOfRows = n>0 : Having n empty lines before the current line.
1082 void nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) {
1083 // If we have something in the indent we probably want to output
1084 // it and it's not included in the count for empty lines so we don't
1085 // realize that we should start a new line.
1086 if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1087 EndLine(false);
1088 mInWhitespace = true;
1091 while (mEmptyLines < noOfRows) {
1092 EndLine(false);
1093 mInWhitespace = true;
1095 mLineBreakDue = false;
1096 mFloatingLines = -1;
1100 * This empties the current line cache without adding a NEWLINE.
1101 * Should not be used if line wrapping is of importance since
1102 * this function destroys the cache information.
1104 * It will also write indentation and quotes if we believe us to be
1105 * at the start of the line.
1107 void nsPlainTextSerializer::FlushLine() {
1108 if (!mCurrentLine.IsEmpty()) {
1109 if (mAtFirstColumn) {
1110 OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1113 Output(mCurrentLine);
1114 mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1115 mCurrentLine.Truncate();
1116 mCurrentLineWidth = 0;
1121 * Prints the text to output to our current output device (the string
1122 * mOutputString). The only logic here is to replace non breaking spaces with a
1123 * normal space since most (all?) receivers of the result won't understand the
1124 * nbsp and even be confused by it.
1126 void nsPlainTextSerializer::Output(nsString& aString) {
1127 if (!aString.IsEmpty()) {
1128 mStartedOutput = true;
1131 if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1132 // First, replace all nbsp characters with spaces,
1133 // which the unicode encoder won't do for us.
1134 aString.ReplaceChar(kNBSP, kSPACE);
1136 mOutputString->Append(aString);
1139 static bool IsSpaceStuffable(const char16_t* s) {
1140 if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1141 NS_strncmp(s, u"From ", 5) == 0)
1142 return true;
1143 else
1144 return false;
1148 * This function adds a piece of text to the current stored line. If we are
1149 * wrapping text and the stored line will become too long, a suitable
1150 * location to wrap will be found and the line that's complete will be
1151 * output.
1153 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
1154 int32_t aLineFragmentLength) {
1155 uint32_t prefixwidth =
1156 (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1 : 0) + mIndent;
1158 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1160 int32_t linelength = mCurrentLine.Length();
1161 if (0 == linelength) {
1162 if (0 == aLineFragmentLength) {
1163 // Nothing at all. Are you kidding me?
1164 return;
1167 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1168 if (IsSpaceStuffable(aLineFragment) &&
1169 mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1171 // Space stuffing a la RFC 2646 (format=flowed).
1172 mCurrentLine.Append(char16_t(' '));
1174 if (MayWrap()) {
1175 mCurrentLineWidth += GetUnicharWidth(' ');
1176 #ifdef DEBUG_wrapping
1177 NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1178 mCurrentLine.Length()) ==
1179 (int32_t)mCurrentLineWidth,
1180 "mCurrentLineWidth and reality out of sync!");
1181 #endif
1185 mEmptyLines = -1;
1188 mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1189 if (MayWrap()) {
1190 mCurrentLineWidth +=
1191 GetUnicharStringWidth(aLineFragment, aLineFragmentLength);
1192 #ifdef DEBUG_wrapping
1193 NS_ASSERTION(
1194 GetUnicharstringWidth(mCurrentLine.get(), mCurrentLine.Length()) ==
1195 (int32_t)mCurrentLineWidth,
1196 "mCurrentLineWidth and reality out of sync!");
1197 #endif
1200 linelength = mCurrentLine.Length();
1202 // Wrap?
1203 if (MayWrap()) {
1204 #ifdef DEBUG_wrapping
1205 NS_ASSERTION(
1206 GetUnicharstringWidth(mCurrentLine.get(), mCurrentLine.Length()) ==
1207 (int32_t)mCurrentLineWidth,
1208 "mCurrentLineWidth and reality out of sync!");
1209 #endif
1210 // Yes, wrap!
1211 // The "+4" is to avoid wrap lines that only would be a couple
1212 // of letters too long. We give this bonus only if the
1213 // wrapcolumn is more than 20.
1214 uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1216 // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1217 while (mCurrentLineWidth + prefixwidth > mWrapColumn + bonuswidth) {
1218 // We go from the end removing one letter at a time until
1219 // we have a reasonable width
1220 int32_t goodSpace = mCurrentLine.Length();
1221 uint32_t width = mCurrentLineWidth;
1222 while (goodSpace > 0 && (width + prefixwidth > mWrapColumn)) {
1223 goodSpace--;
1224 width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1227 goodSpace++;
1229 if (mLineBreaker) {
1230 goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1231 mCurrentLine.Length(), goodSpace);
1232 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1233 nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace - 1))) {
1234 --goodSpace; // adjust the position since line breaker returns a
1235 // position next to space
1238 // fallback if the line breaker is unavailable or failed
1239 if (!mLineBreaker) {
1240 if (mCurrentLine.IsEmpty() || mWrapColumn < prefixwidth) {
1241 goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
1242 } else {
1243 goodSpace =
1244 std::min(mWrapColumn - prefixwidth, mCurrentLine.Length() - 1);
1245 while (goodSpace >= 0 &&
1246 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1247 goodSpace--;
1252 nsAutoString restOfLine;
1253 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1254 // If we didn't find a good place to break, accept long line and
1255 // try to find another place to break
1256 goodSpace =
1257 (prefixwidth > mWrapColumn + 1) ? 1 : mWrapColumn - prefixwidth + 1;
1258 if (mLineBreaker) {
1259 if ((uint32_t)goodSpace < mCurrentLine.Length())
1260 goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1261 mCurrentLine.Length(), goodSpace);
1262 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1263 goodSpace = mCurrentLine.Length();
1265 // fallback if the line breaker is unavailable or failed
1266 if (!mLineBreaker) {
1267 goodSpace =
1268 (prefixwidth > mWrapColumn) ? 1 : mWrapColumn - prefixwidth;
1269 while (goodSpace < linelength &&
1270 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1271 goodSpace++;
1276 if ((goodSpace < linelength) && (goodSpace > 0)) {
1277 // Found a place to break
1279 // -1 (trim a char at the break position)
1280 // only if the line break was a space.
1281 if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1282 mCurrentLine.Right(restOfLine, linelength - goodSpace - 1);
1283 } else {
1284 mCurrentLine.Right(restOfLine, linelength - goodSpace);
1286 // if breaker was U+0020, it has to consider for delsp=yes support
1287 bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1288 mCurrentLine.Truncate(goodSpace);
1289 EndLine(true, breakBySpace);
1290 mCurrentLine.Truncate();
1291 // Space stuff new line?
1292 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1293 if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) &&
1294 mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1296 // Space stuffing a la RFC 2646 (format=flowed).
1297 mCurrentLine.Append(char16_t(' '));
1298 // XXX doesn't seem to work correctly for ' '
1301 mCurrentLine.Append(restOfLine);
1302 mCurrentLineWidth =
1303 GetUnicharStringWidth(mCurrentLine.get(), mCurrentLine.Length());
1304 linelength = mCurrentLine.Length();
1305 mEmptyLines = -1;
1306 } else {
1307 // Nothing to do. Hopefully we get more data later
1308 // to use for a place to break line
1309 break;
1312 } else {
1313 // No wrapping.
1318 * Outputs the contents of mCurrentLine, and resets line specific
1319 * variables. Also adds an indentation and prefix if there is
1320 * one specified. Strips ending spaces from the line if it isn't
1321 * preformatted.
1323 void nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) {
1324 uint32_t currentlinelength = mCurrentLine.Length();
1326 if (aSoftlinebreak && 0 == currentlinelength) {
1327 // No meaning
1328 return;
1331 /* In non-preformatted mode, remove spaces from the end of the line for
1332 * format=flowed compatibility. Don't do this for these special cases:
1333 * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1334 * "- -- ", the OpenPGP dash-escaped signature separator in inline
1335 * signed messages according to the OpenPGP standard (RFC 2440).
1337 if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1338 !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
1339 (aSoftlinebreak || !(mCurrentLine.EqualsLiteral("-- ") ||
1340 mCurrentLine.EqualsLiteral("- -- ")))) {
1341 // Remove spaces from the end of the line.
1342 while (currentlinelength > 0 &&
1343 mCurrentLine[currentlinelength - 1] == ' ') {
1344 --currentlinelength;
1346 mCurrentLine.SetLength(currentlinelength);
1349 if (aSoftlinebreak && (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1350 (mIndent == 0)) {
1351 // Add the soft part of the soft linebreak (RFC 2646 4.1)
1352 // We only do this when there is no indentation since format=flowed
1353 // lines and indentation doesn't work well together.
1355 // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1356 // add twice space.
1357 if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1358 mCurrentLine.AppendLiteral(" ");
1359 else
1360 mCurrentLine.Append(char16_t(' '));
1363 if (aSoftlinebreak) {
1364 mEmptyLines = 0;
1365 } else {
1366 // Hard break
1367 if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1368 mEmptyLines = -1;
1371 mEmptyLines++;
1374 if (mAtFirstColumn) {
1375 // If we don't have anything "real" to output we have to
1376 // make sure the indent doesn't end in a space since that
1377 // would trick a format=flowed-aware receiver.
1378 bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1379 OutputQuotesAndIndent(stripTrailingSpaces);
1382 mCurrentLine.Append(mLineBreak);
1383 Output(mCurrentLine);
1384 mCurrentLine.Truncate();
1385 mCurrentLineWidth = 0;
1386 mAtFirstColumn = true;
1387 mInWhitespace = true;
1388 mLineBreakDue = false;
1389 mFloatingLines = -1;
1393 * Outputs the calculated and stored indent and text in the indentation. That is
1394 * quote chars and numbers for numbered lists and such. It will also reset any
1395 * stored text to put in the indentation after using it.
1397 void nsPlainTextSerializer::OutputQuotesAndIndent(
1398 bool stripTrailingSpaces /* = false */) {
1399 nsAutoString stringToOutput;
1401 // Put the mail quote "> " chars in, if appropriate:
1402 if (mCiteQuoteLevel > 0) {
1403 nsAutoString quotes;
1404 for (int i = 0; i < mCiteQuoteLevel; i++) {
1405 quotes.Append(char16_t('>'));
1407 if (!mCurrentLine.IsEmpty()) {
1408 /* Better don't output a space here, if the line is empty,
1409 in case a receiving f=f-aware UA thinks, this were a flowed line,
1410 which it isn't - it's just empty.
1411 (Flowed lines may be joined with the following one,
1412 so the empty line may be lost completely.) */
1413 quotes.Append(char16_t(' '));
1415 stringToOutput = quotes;
1416 mAtFirstColumn = false;
1419 // Indent if necessary
1420 int32_t indentwidth = mIndent - mInIndentString.Length();
1421 if (indentwidth > 0 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1422 // Don't make empty lines look flowed
1424 nsAutoString spaces;
1425 for (int i = 0; i < indentwidth; ++i) spaces.Append(char16_t(' '));
1426 stringToOutput += spaces;
1427 mAtFirstColumn = false;
1430 if (!mInIndentString.IsEmpty()) {
1431 stringToOutput += mInIndentString;
1432 mAtFirstColumn = false;
1433 mInIndentString.Truncate();
1436 if (stripTrailingSpaces) {
1437 int32_t lineLength = stringToOutput.Length();
1438 while (lineLength > 0 && ' ' == stringToOutput[lineLength - 1]) {
1439 --lineLength;
1441 stringToOutput.SetLength(lineLength);
1444 if (!stringToOutput.IsEmpty()) {
1445 Output(stringToOutput);
1450 * Write a string. This is the highlevel function to use to get text output.
1451 * By using AddToLine, Output, EndLine and other functions it handles quotation,
1452 * line wrapping, indentation, whitespace compression and other things.
1454 void nsPlainTextSerializer::Write(const nsAString& aStr) {
1455 // XXX Copy necessary to use nsString methods and gain
1456 // access to underlying buffer
1457 nsAutoString str(aStr);
1459 #ifdef DEBUG_wrapping
1460 printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
1461 mWrapColumn);
1462 #endif
1464 int32_t bol = 0;
1465 int32_t newline;
1467 int32_t totLen = str.Length();
1469 // If the string is empty, do nothing:
1470 if (totLen <= 0) return;
1472 // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1473 // to be cut off along with usual spaces if required. (bug #125928)
1474 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1475 for (int32_t i = totLen - 1; i >= 0; i--) {
1476 char16_t c = str[i];
1477 if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) continue;
1478 if (kNBSP == c)
1479 str.Replace(i, 1, ' ');
1480 else
1481 break;
1485 // We have two major codepaths here. One that does preformatted text and one
1486 // that does normal formatted text. The one for preformatted text calls
1487 // Output directly while the other code path goes through AddToLine.
1488 if ((mPreFormattedMail && !mWrapColumn) ||
1489 (IsInPre() && !mPreFormattedMail) ||
1490 (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1491 // No intelligent wrapping.
1493 // This mustn't be mixed with intelligent wrapping without clearing
1494 // the mCurrentLine buffer before!!!
1495 NS_ASSERTION(mCurrentLine.IsEmpty() || (IsInPre() && !mPreFormattedMail),
1496 "Mixed wrapping data and nonwrapping data on the same line");
1497 if (!mCurrentLine.IsEmpty()) {
1498 FlushLine();
1501 // Put the mail quote "> " chars in, if appropriate.
1502 // Have to put it in before every line.
1503 while (bol < totLen) {
1504 bool outputQuotes = mAtFirstColumn;
1505 bool atFirstColumn;
1506 bool outputLineBreak = false;
1507 bool spacesOnly = true;
1509 // Find one of '\n' or '\r' using iterators since nsAString
1510 // doesn't have the old FindCharInSet function.
1511 nsAString::const_iterator iter;
1512 str.BeginReading(iter);
1513 nsAString::const_iterator done_searching;
1514 str.EndReading(done_searching);
1515 iter.advance(bol);
1516 int32_t new_newline = bol;
1517 newline = kNotFound;
1518 while (iter != done_searching) {
1519 if ('\n' == *iter || '\r' == *iter) {
1520 newline = new_newline;
1521 break;
1523 if (' ' != *iter) spacesOnly = false;
1524 ++new_newline;
1525 ++iter;
1528 // Done searching
1529 nsAutoString stringpart;
1530 if (newline == kNotFound) {
1531 // No new lines.
1532 stringpart.Assign(Substring(str, bol, totLen - bol));
1533 if (!stringpart.IsEmpty()) {
1534 char16_t lastchar = stringpart[stringpart.Length() - 1];
1535 if ((lastchar == '\t') || (lastchar == ' ') || (lastchar == '\r') ||
1536 (lastchar == '\n')) {
1537 mInWhitespace = true;
1538 } else {
1539 mInWhitespace = false;
1542 mEmptyLines = -1;
1543 atFirstColumn = mAtFirstColumn && (totLen - bol) == 0;
1544 bol = totLen;
1545 } else {
1546 // There is a newline
1547 stringpart.Assign(Substring(str, bol, newline - bol));
1548 mInWhitespace = true;
1549 outputLineBreak = true;
1550 mEmptyLines = 0;
1551 atFirstColumn = true;
1552 bol = newline + 1;
1553 if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1554 // There was a CRLF in the input. This used to be illegal and
1555 // stripped by the parser. Apparently not anymore. Let's skip
1556 // over the LF.
1557 bol++;
1561 mCurrentLine.Truncate();
1562 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1563 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1564 !IsQuotedLine(stringpart) && !stringpart.EqualsLiteral("-- ") &&
1565 !stringpart.EqualsLiteral("- -- "))
1566 stringpart.Trim(" ", false, true, true);
1567 if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart))
1568 mCurrentLine.Append(char16_t(' '));
1570 mCurrentLine.Append(stringpart);
1572 if (outputQuotes) {
1573 // Note: this call messes with mAtFirstColumn
1574 OutputQuotesAndIndent();
1577 Output(mCurrentLine);
1578 if (outputLineBreak) {
1579 Output(mLineBreak);
1581 mAtFirstColumn = atFirstColumn;
1584 // Reset mCurrentLine.
1585 mCurrentLine.Truncate();
1587 #ifdef DEBUG_wrapping
1588 printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
1589 #endif
1590 return;
1593 // Intelligent handling of text
1594 // If needed, strip out all "end of lines"
1595 // and multiple whitespace between words
1596 int32_t nextpos;
1597 const char16_t* offsetIntoBuffer = nullptr;
1599 while (bol < totLen) { // Loop over lines
1600 // Find a place where we may have to do whitespace compression
1601 nextpos = str.FindCharInSet(" \t\n\r", bol);
1602 #ifdef DEBUG_wrapping
1603 nsAutoString remaining;
1604 str.Right(remaining, totLen - bol);
1605 foo = ToNewCString(remaining);
1606 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
1607 // "string = '%s'\n", bol, nextpos, totLen, foo);
1608 free(foo);
1609 #endif
1611 if (nextpos == kNotFound) {
1612 // The rest of the string
1613 offsetIntoBuffer = str.get() + bol;
1614 AddToLine(offsetIntoBuffer, totLen - bol);
1615 bol = totLen;
1616 mInWhitespace = false;
1617 } else {
1618 // There's still whitespace left in the string
1619 if (nextpos != 0 && (nextpos + 1) < totLen) {
1620 offsetIntoBuffer = str.get() + nextpos;
1621 // skip '\n' if it is between CJ chars
1622 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
1623 IS_CJ_CHAR(offsetIntoBuffer[1])) {
1624 offsetIntoBuffer = str.get() + bol;
1625 AddToLine(offsetIntoBuffer, nextpos - bol);
1626 bol = nextpos + 1;
1627 continue;
1630 // If we're already in whitespace and not preformatted, just skip it:
1631 if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1632 !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1633 // Skip whitespace
1634 bol++;
1635 continue;
1638 if (nextpos == bol) {
1639 // Note that we are in whitespace.
1640 mInWhitespace = true;
1641 offsetIntoBuffer = str.get() + nextpos;
1642 AddToLine(offsetIntoBuffer, 1);
1643 bol++;
1644 continue;
1647 mInWhitespace = true;
1649 offsetIntoBuffer = str.get() + bol;
1650 if (mPreFormattedMail ||
1651 (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1652 // Preserve the real whitespace character
1653 nextpos++;
1654 AddToLine(offsetIntoBuffer, nextpos - bol);
1655 bol = nextpos;
1656 } else {
1657 // Replace the whitespace with a space
1658 AddToLine(offsetIntoBuffer, nextpos - bol);
1659 AddToLine(kSpace.get(), 1);
1660 bol = nextpos + 1; // Let's eat the whitespace
1663 } // Continue looping over the string
1667 * Gets the value of an attribute in a string. If the function returns
1668 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1670 nsresult nsPlainTextSerializer::GetAttributeValue(nsAtom* aName,
1671 nsString& aValueRet) {
1672 if (mElement) {
1673 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1674 return NS_OK;
1678 return NS_ERROR_NOT_AVAILABLE;
1682 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1683 * In this case, we should ignore it.
1685 bool nsPlainTextSerializer::IsCurrentNodeConverted() {
1686 nsAutoString value;
1687 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1688 return (NS_SUCCEEDED(rv) && (value.EqualsIgnoreCase("moz-txt", 7) ||
1689 value.EqualsIgnoreCase("\"moz-txt", 8)));
1692 // static
1693 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
1694 if (!aContent->IsHTMLElement()) {
1695 return nullptr;
1698 nsAtom* localName = aContent->NodeInfo()->NameAtom();
1699 return localName->IsStatic() ? localName : nullptr;
1702 bool nsPlainTextSerializer::IsInPre() {
1703 return !mPreformatStack.empty() && mPreformatStack.top();
1706 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
1707 RefPtr<ComputedStyle> computedStyle =
1708 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1709 if (computedStyle) {
1710 const nsStyleText* textStyle = computedStyle->StyleText();
1711 return textStyle->WhiteSpaceOrNewlineIsSignificant();
1713 // Fall back to looking at the tag, in case there is no style information.
1714 return GetIdForContent(aElement) == nsGkAtoms::pre;
1717 bool nsPlainTextSerializer::IsElementBlock(Element* aElement) {
1718 RefPtr<ComputedStyle> computedStyle =
1719 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1720 if (computedStyle) {
1721 const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1722 return displayStyle->IsBlockOutsideStyle();
1724 // Fall back to looking at the tag, in case there is no style information.
1725 return nsContentUtils::IsHTMLBlock(aElement);
1729 * This method is required only to identify LI's inside OL.
1730 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1732 bool nsPlainTextSerializer::IsInOL() {
1733 int32_t i = mTagStackIndex;
1734 while (--i >= 0) {
1735 if (mTagStack[i] == nsGkAtoms::ol) return true;
1736 if (mTagStack[i] == nsGkAtoms::ul) {
1737 // If a UL is reached first, LI belongs the UL nested in OL.
1738 return false;
1741 // We may reach here for orphan LI's.
1742 return false;
1746 @return 0 = no header, 1 = h1, ..., 6 = h6
1748 int32_t HeaderLevel(nsAtom* aTag) {
1749 if (aTag == nsGkAtoms::h1) {
1750 return 1;
1752 if (aTag == nsGkAtoms::h2) {
1753 return 2;
1755 if (aTag == nsGkAtoms::h3) {
1756 return 3;
1758 if (aTag == nsGkAtoms::h4) {
1759 return 4;
1761 if (aTag == nsGkAtoms::h5) {
1762 return 5;
1764 if (aTag == nsGkAtoms::h6) {
1765 return 6;
1767 return 0;
1771 * This is an implementation of GetUnicharWidth() and
1772 * GetUnicharStringWidth() as defined in
1773 * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1774 * <http://www.UNIX-systems.org/online.html>
1776 * Markus Kuhn -- 2000-02-08 -- public domain
1778 * Minor alterations to fit Mozilla's data types by Daniel Bratell
1781 /* These functions define the column width of an ISO 10646 character
1782 * as follows:
1784 * - The null character (U+0000) has a column width of 0.
1786 * - Other C0/C1 control characters and DEL will lead to a return
1787 * value of -1.
1789 * - Non-spacing and enclosing combining characters (general
1790 * category code Mn or Me in the Unicode database) have a
1791 * column width of 0.
1793 * - Spacing characters in the East Asian Wide (W) or East Asian
1794 * FullWidth (F) category as defined in Unicode Technical
1795 * Report #11 have a column width of 2.
1797 * - All remaining characters (including all printable
1798 * ISO 8859-1 and WGL4 characters, Unicode control characters,
1799 * etc.) have a column width of 1.
1801 * This implementation assumes that wchar_t characters are encoded
1802 * in ISO 10646.
1805 namespace {
1807 struct interval {
1808 uint16_t first;
1809 uint16_t last;
1812 struct CombiningComparator {
1813 const char16_t mUcs;
1814 explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
1815 int operator()(const interval& combining) const {
1816 if (mUcs > combining.last) return 1;
1817 if (mUcs < combining.first) return -1;
1819 MOZ_ASSERT(combining.first <= mUcs);
1820 MOZ_ASSERT(mUcs <= combining.last);
1821 return 0;
1825 } // namespace
1827 int32_t GetUnicharWidth(char16_t ucs) {
1828 /* sorted list of non-overlapping intervals of non-spacing characters */
1829 static const interval combining[] = {
1830 {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486}, {0x0488, 0x0489},
1831 {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x05BF},
1832 {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
1833 {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
1834 {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
1835 {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963},
1836 {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
1837 {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
1838 {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82},
1839 {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD},
1840 {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
1841 {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
1842 {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
1843 {0x0C55, 0x0C56}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
1844 {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4},
1845 {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
1846 {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
1847 {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
1848 {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97},
1849 {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
1850 {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, {0x17B7, 0x17BD},
1851 {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x18A9, 0x18A9}, {0x20D0, 0x20E3},
1852 {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}};
1854 /* test for 8-bit control characters */
1855 if (ucs == 0) return 0;
1856 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return -1;
1858 /* first quick check for Latin-1 etc. characters */
1859 if (ucs < combining[0].first) return 1;
1861 /* binary search in table of non-spacing characters */
1862 size_t idx;
1863 if (BinarySearchIf(combining, 0, ArrayLength(combining),
1864 CombiningComparator(ucs), &idx)) {
1865 return 0;
1868 /* if we arrive here, ucs is not a combining or C0/C1 control character */
1870 /* fast test for majority of non-wide scripts */
1871 if (ucs < 0x1100) return 1;
1873 return 1 +
1874 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1875 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1876 ucs != 0x303f) || /* CJK ... Yi */
1877 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1878 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1879 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1880 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1881 (ucs >= 0xffe0 && ucs <= 0xffe6));
1884 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) {
1885 int32_t w, width = 0;
1887 for (; *pwcs && n-- > 0; pwcs++)
1888 if ((w = GetUnicharWidth(*pwcs)) < 0)
1889 ++width; // Taking 1 as the width of non-printable character, for bug#
1890 // 94475.
1891 else
1892 width += w;
1894 return width;