no bug - Import translations from android-l10n r=release a=l10n CLOSED TREE
[gecko.git] / dom / serializers / nsPlainTextSerializer.h
blob4afd83f1a0052094c5fddc6556046d6ec53bff99
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
3 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
4 /* This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 /*
9 * nsIContentSerializer implementation that can be used with an
10 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
11 * (eg for copy/paste as plaintext).
14 #ifndef nsPlainTextSerializer_h__
15 #define nsPlainTextSerializer_h__
17 #include "mozilla/Maybe.h"
18 #include "nsAtom.h"
19 #include "nsCycleCollectionParticipant.h"
20 #include "nsIContentSerializer.h"
21 #include "nsIDocumentEncoder.h"
22 #include "nsString.h"
23 #include "nsTArray.h"
25 #include <stack>
27 class nsIContent;
29 namespace mozilla::dom {
30 class DocumentType;
31 class Element;
32 } // namespace mozilla::dom
34 class nsPlainTextSerializer final : public nsIContentSerializer {
35 public:
36 nsPlainTextSerializer();
38 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
39 NS_DECL_CYCLE_COLLECTION_CLASS(nsPlainTextSerializer)
41 // nsIContentSerializer
42 NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
43 const mozilla::Encoding* aEncoding, bool aIsCopying,
44 bool aIsWholeDocument, bool* aNeedsPreformatScanning,
45 nsAString& aOutput) override;
47 NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
48 int32_t aEndOffset) override;
49 NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset,
50 int32_t aEndOffset) override;
51 NS_IMETHOD AppendProcessingInstruction(
52 mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
53 int32_t aEndOffset) override {
54 return NS_OK;
56 NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
57 int32_t aStartOffset, int32_t aEndOffset) override {
58 return NS_OK;
60 NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override {
61 return NS_OK;
63 NS_IMETHOD AppendElementStart(
64 mozilla::dom::Element* aElement,
65 mozilla::dom::Element* aOriginalElement) override;
66 NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
67 mozilla::dom::Element* aOriginalElement) override;
69 NS_IMETHOD FlushAndFinish() override;
71 NS_IMETHOD Finish() override;
73 NS_IMETHOD GetOutputLength(uint32_t& aLength) const override;
75 NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
77 NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override;
78 NS_IMETHOD ForgetElementForPreformat(
79 mozilla::dom::Element* aElement) override;
81 private:
82 ~nsPlainTextSerializer();
84 nsresult GetAttributeValue(const nsAtom* aName, nsString& aValueRet) const;
85 void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
87 void MaybeWrapAndOutputCompleteLines();
89 // @param aSoftLineBreak A soft line break is a space followed by a linebreak
90 // (cf. https://www.ietf.org/rfc/rfc3676.txt, section 4.2).
91 void EndLine(bool aSoftLineBreak, bool aBreakBySpace = false);
93 void EnsureVerticalSpace(int32_t noOfRows);
95 void ConvertToLinesAndOutput(const nsAString& aString);
97 void Write(const nsAString& aString);
99 // @return true, iff the elements' whitespace and newline characters have to
100 // be preserved according to its style or because it's a `<pre>`
101 // element.
102 bool IsElementPreformatted() const;
103 bool IsInOL() const;
104 bool IsInOlOrUl() const;
105 bool IsCurrentNodeConverted() const;
106 bool MustSuppressLeaf() const;
109 * Returns the local name of the element as an atom if the element is an
110 * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
112 static nsAtom* GetIdForContent(nsIContent* aContent);
113 nsresult DoOpenContainer(const nsAtom* aTag);
114 void OpenContainerForOutputFormatted(const nsAtom* aTag);
115 nsresult DoCloseContainer(const nsAtom* aTag);
116 void CloseContainerForOutputFormatted(const nsAtom* aTag);
117 nsresult DoAddLeaf(const nsAtom* aTag);
119 void DoAddText();
120 // @param aText Ignored if aIsLineBreak is true.
121 void DoAddText(bool aIsLineBreak, const nsAString& aText);
123 inline bool DoOutput() const { return mHeadLevel == 0; }
125 static inline bool IsQuotedLine(const nsAString& aLine) {
126 return !aLine.IsEmpty() && aLine.First() == char16_t('>');
129 // Stack handling functions
130 bool GetLastBool(const nsTArray<bool>& aStack);
131 void SetLastBool(nsTArray<bool>& aStack, bool aValue);
132 void PushBool(nsTArray<bool>& aStack, bool aValue);
133 bool PopBool(nsTArray<bool>& aStack);
135 bool IsIgnorableRubyAnnotation(const nsAtom* aTag) const;
137 // @return true, iff the elements' whitespace and newline characters have to
138 // be preserved according to its style or because it's a `<pre>`
139 // element.
140 static bool IsElementPreformatted(mozilla::dom::Element* aElement);
142 // https://drafts.csswg.org/css-display/#block-level
143 static bool IsCssBlockLevelElement(mozilla::dom::Element* aElement);
145 private:
146 uint32_t mHeadLevel;
148 class Settings {
149 public:
150 enum class HeaderStrategy {
151 kNoIndentation,
152 kIndentIncreasedWithHeaderLevel,
153 kNumberHeadingsAndIndentSlightly
156 // May adapt the flags.
158 // @param aFlags As defined in nsIDocumentEncoder.idl.
159 void Init(int32_t aFlags, uint32_t aWrapColumn);
161 // Pref: converter.html2txt.structs.
162 bool GetStructs() const { return mStructs; }
164 // Pref: converter.html2txt.header_strategy.
165 HeaderStrategy GetHeaderStrategy() const { return mHeaderStrategy; }
167 // @return As defined in nsIDocumentEncoder.idl.
168 int32_t GetFlags() const { return mFlags; }
170 // @param aFlag As defined in nsIDocumentEncoder.idl. May consist of
171 // multiple bitwise or'd flags.
172 bool HasFlag(int32_t aFlag) const { return mFlags & aFlag; }
174 // Whether the output should include ruby annotations.
175 bool GetWithRubyAnnotation() const { return mWithRubyAnnotation; }
177 uint32_t GetWrapColumn() const { return mWrapColumn; }
179 bool MayWrap() const {
180 return GetWrapColumn() && HasFlag(nsIDocumentEncoder::OutputFormatted |
181 nsIDocumentEncoder::OutputWrap);
184 bool MayBreakLines() const {
185 return !HasFlag(nsIDocumentEncoder::OutputDisallowLineBreaking);
188 private:
189 // @param aPrefHeaderStrategy Pref: converter.html2txt.header_strategy.
190 static HeaderStrategy Convert(int32_t aPrefHeaderStrategy);
192 // Pref: converter.html2txt.structs.
193 bool mStructs = true;
195 // Pref: converter.html2txt.header_strategy.
196 HeaderStrategy mHeaderStrategy =
197 HeaderStrategy::kIndentIncreasedWithHeaderLevel;
199 // Flags defined in nsIDocumentEncoder.idl.
200 int32_t mFlags = 0;
202 // Whether the output should include ruby annotations.
203 bool mWithRubyAnnotation = false;
205 // The wrap column is how many fixed-pitch narrow
206 // (https://unicode.org/reports/tr11/) (e.g. Latin) characters
207 // should be allowed on a line. There could be less chars if the chars
208 // are wider than latin chars of more if the chars are more narrow.
209 uint32_t mWrapColumn = 0;
212 Settings mSettings;
214 struct Indentation {
215 // The number of space characters to be inserted including the length of
216 // mHeader.
217 int32_t mLength = 0;
219 // The header that has to be written in the indent.
220 // That could be, for instance, the bullet in a bulleted list.
221 nsString mHeader;
224 class CurrentLine {
225 public:
226 void ResetContentAndIndentationHeader();
228 // @param aFlags As defined in nsIDocumentEncoder.idl.
229 void MaybeReplaceNbspsInContent(int32_t aFlags);
231 void CreateQuotesAndIndent(nsAString& aResult) const;
233 bool HasContentOrIndentationHeader() const {
234 return !mContent.IsEmpty() || !mIndentation.mHeader.IsEmpty();
237 // @param aLineBreaker May be nullptr.
238 int32_t FindWrapIndexForContent(uint32_t aWrapColumn,
239 bool aUseLineBreaker) const;
241 // @return Combined width of cite quote level and indentation.
242 uint32_t DeterminePrefixWidth() const {
243 // XXX: Should calculate prefixwidth with GetUnicharStringWidth
244 return (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1 : 0) +
245 mIndentation.mLength + uint32_t(mSpaceStuffed);
248 Indentation mIndentation;
250 // The number of '>' characters.
251 int32_t mCiteQuoteLevel = 0;
253 // Whether this line is getting space-stuffed, see
254 // https://datatracker.ietf.org/doc/html/rfc2646#section-4.4
255 bool mSpaceStuffed = false;
257 // Excludes indentation and quotes.
258 nsString mContent;
261 CurrentLine mCurrentLine;
263 class OutputManager {
264 public:
266 * @param aFlags As defined in nsIDocumentEncoder.idl.
267 * @param aOutput An empty string.
269 OutputManager(int32_t aFlags, nsAString& aOutput);
271 enum class StripTrailingWhitespaces { kMaybe, kNo };
273 void Append(const CurrentLine& aCurrentLine,
274 StripTrailingWhitespaces aStripTrailingWhitespaces);
276 void AppendLineBreak();
279 * This empties the current line cache without adding a NEWLINE.
280 * Should not be used if line wrapping is of importance since
281 * this function destroys the cache information.
283 * It will also write indentation and quotes if we believe us to be
284 * at the start of the line.
286 void Flush(CurrentLine& aCurrentLine);
288 bool IsAtFirstColumn() const { return mAtFirstColumn; }
290 uint32_t GetOutputLength() const;
292 private:
294 * @param aString Last character is expected to not be a line break.
296 void Append(const nsAString& aString);
298 // As defined in nsIDocumentEncoder.idl.
299 const int32_t mFlags;
301 nsAString& mOutput;
303 bool mAtFirstColumn;
305 nsString mLineBreak;
308 mozilla::Maybe<OutputManager> mOutputManager;
310 // If we've just written out a cite blockquote, we need to remember it
311 // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
312 // old messages).
313 bool mHasWrittenCiteBlockquote;
315 int32_t mFloatingLines; // To store the number of lazy line breaks
317 // Treat quoted text as though it's preformatted -- don't wrap it.
318 // Having it on a pref is a temporary measure, See bug 69638.
319 int32_t mSpanLevel;
321 int32_t mEmptyLines; // Will be the number of empty lines before
322 // the current. 0 if we are starting a new
323 // line and -1 if we are in a line.
325 bool mInWhitespace;
326 bool mPreFormattedMail; // we're dealing with special DOM
327 // used by Thunderbird code.
329 // While handling a new tag, this variable should remind if any line break
330 // is due because of a closing tag. Setting it to "TRUE" while closing the
331 // tags. Hence opening tags are guaranteed to start with appropriate line
332 // breaks.
333 bool mLineBreakDue;
335 bool mPreformattedBlockBoundary;
337 int32_t mHeaderCounter[7]; /* For header-numbering:
338 Number of previous headers of
339 the same depth and in the same
340 section.
341 mHeaderCounter[1] for <h1> etc. */
343 RefPtr<mozilla::dom::Element> mElement;
345 // For handling table rows
346 AutoTArray<bool, 8> mHasWrittenCellsForRow;
348 // Values gotten in OpenContainer that is (also) needed in CloseContainer
349 AutoTArray<bool, 8> mIsInCiteBlockquote;
351 // The tag stack: the stack of tags we're operating on, so we can nest.
352 // The stack only ever points to static atoms, so they don't need to be
353 // refcounted.
354 const nsAtom** mTagStack;
355 uint32_t mTagStackIndex;
357 // The stack indicating whether the elements we've been operating on are
358 // CSS preformatted elements, so that we can tell if the text inside them
359 // should be formatted.
360 std::stack<bool> mPreformatStack;
362 // Content in the stack above this index should be ignored:
363 uint32_t mIgnoreAboveIndex;
365 // The stack for ordered lists
366 AutoTArray<int32_t, 100> mOLStack;
368 uint32_t mULCount;
370 bool mUseLineBreaker = false;
372 // Conveniance constant. It would be nice to have it as a const static
373 // variable, but that causes issues with OpenBSD and module unloading.
374 const nsString kSpace;
376 // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
377 // child node. The initial value of mIgnoredChildNodeLevel is 0. When
378 // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
379 // and is greater than 0. Otherwise when serializer leaves those nodes,
380 // mIgnoredChildNodeLevel decreases.
381 uint32_t mIgnoredChildNodeLevel;
384 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
386 #endif