HTML: rework the HTMLification of the plaintext mails
[trojita.git] / src / Composer / PlainTextFormatter.cpp
blob733fe3c269aac4422ef2e63afeb10ee887db3f3c
1 /* Copyright (C) 2012 Thomas Lübking <thomas.luebking@gmail.com>
2 Copyright (C) 2006 - 2013 Jan Kundrát <jkt@flaska.net>
4 This file is part of the Trojita Qt IMAP e-mail client,
5 http://trojita.flaska.net/
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of
10 the License or (at your option) version 3 or any later version
11 accepted by the membership of KDE e.V. (or its successor approved
12 by the membership of KDE e.V.), which shall act as a proxy
13 defined in Section 14 of version 3 of the license.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include <QObject>
25 #if QT_VERSION < QT_VERSION_CHECK(5, 0, 0)
26 #include <QTextDocument>
27 #endif
28 #include "PlainTextFormatter.h"
30 #include <QDebug> // FIXME: remove me
32 namespace Composer {
33 namespace Util {
35 /** @short Helper for plainTextToHtml for applying the HTML formatting
37 This funciton recognizes http and https links, e-mail addresses, *bold*, /italic/ and _underline_ text.
39 QString helperHtmlifySingleLine(QString line)
41 // Static regexps for the engine construction
42 static const QRegExp linkRe("("
43 "https?://" // scheme prefix
44 "[;/?:@=&$\\-_.+!',0-9a-zA-Z%#~\\[\\]\\(\\)*]+" // allowed characters
45 "[/@=&$\\-_+'0-9a-zA-Z%#~]" // termination
46 ")");
47 static const QRegExp mailRe("([a-zA-Z0-9\\.\\-_\\+]+@[a-zA-Z0-9\\.\\-_]+)");
48 static QString intro("(^|[\\s\\(\\[\\{])");
49 static QString extro("($|[\\s\\),;.\\]\\}])");
50 #define TROJITA_RE_BOLD "\\*(\\S*)\\*"
51 #define TROJITA_RE_ITALIC "/(\\S*)/"
52 #define TROJITA_RE_UNDERLINE "_(\\S*)_"
53 static const QRegExp boldRe(intro + TROJITA_RE_BOLD + extro);
54 static const QRegExp italicRe(intro + TROJITA_RE_ITALIC + extro);
55 static const QRegExp underlineRe(intro + TROJITA_RE_UNDERLINE + extro);
56 static const QRegExp anyFormattingRe(intro + "(" TROJITA_RE_BOLD "|" TROJITA_RE_ITALIC "|" TROJITA_RE_UNDERLINE ")" + extro);
57 #undef TROJITA_RE_BOLD
58 #undef TROJITA_RE_ITALIC
59 #undef TROJITA_RE_UNDERLINE
61 // RE instances to work on
62 QRegExp link(linkRe), mail(mailRe), bold(boldRe), italic(italicRe), underline(underlineRe), anyFormatting(anyFormattingRe);
64 // Now prepare markup *bold*, /italic/ and _underline_ and also turn links into HTML.
65 // This is a bit more involved because we want to apply the regular expressions in a certain order and also at the same
66 // time prevent the lower-priority regexps from clobbering the output of the previous stages.
67 int start = 0;
68 while (start < line.size()) {
69 qDebug() << "Main loop:" << start << line.size() << line;
70 // Find the position of the first thing which matches
71 int posLink = link.indexIn(line, start, QRegExp::CaretAtOffset);
72 if (posLink == -1)
73 posLink = line.size();
75 int posMail = mail.indexIn(line, start, QRegExp::CaretAtOffset);
76 if (posMail == -1)
77 posMail = line.size();
79 int posFormatting = anyFormatting.indexIn(line, start, QRegExp::CaretAtOffset);
80 if (posFormatting == -1)
81 posFormatting = line.size();
83 const int firstSpecial = qMin(qMin(posLink, posMail), posFormatting);
84 if (firstSpecial == line.size()) {
85 qDebug() << "nothing else";
86 // No further matches for this line -> we're done
87 break;
89 qDebug() << "some RE has matched";
91 if (firstSpecial == posLink) {
92 QString replacement = QString::fromUtf8("<a href=\"%1\">%1</a>").arg(link.cap(1));
93 line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + link.matchedLength());
94 start = firstSpecial + replacement.size();
95 } else if (firstSpecial == posMail) {
96 QString replacement = QString::fromUtf8("<a href=\"mailto:%1\">%1</a>").arg(mail.cap(1));
97 line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + mail.matchedLength());
98 start = firstSpecial + replacement.size();
99 } else if (firstSpecial == posFormatting) {
100 // Careful here; the inner contents of the current match shall be formatted as well which is why we need recursion
101 QChar elementName;
102 QChar markupChar;
103 const QRegExp *re = 0;
105 if (posFormatting == bold.indexIn(line, start, QRegExp::CaretAtOffset)) {
106 elementName = QLatin1Char('b');
107 markupChar = QLatin1Char('*');
108 re = &bold;
109 } else if (posFormatting == italic.indexIn(line, start, QRegExp::CaretAtOffset)) {
110 elementName = QLatin1Char('i');
111 markupChar = QLatin1Char('/');
112 re = &italic;
113 } else if (posFormatting == underline.indexIn(line, start, QRegExp::CaretAtOffset)) {
114 elementName = QLatin1Char('u');
115 markupChar = QLatin1Char('_');
116 re = &underline;
118 Q_ASSERT(re);
119 qDebug() << "Got formatting";
120 qDebug() << " old line:" << line;
121 qDebug() << " at:" << line.mid(start);
122 qDebug() << " prefix:" << line.left(firstSpecial);
123 qDebug() << " suffix:" << line.mid(firstSpecial + re->matchedLength());
124 QString replacement = QString::fromUtf8("%1<%2><span class=\"markup\">%3</span>%4<span class=\"markup\">%3</span></%2>%5")
125 .arg(re->cap(1), elementName, markupChar, helperHtmlifySingleLine(re->cap(2)), re->cap(3));
127 qDebug() << " replacement:" << replacement;
128 line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + re->matchedLength());
129 start = firstSpecial + replacement.size();
130 qDebug() << " chunk to be still processed:" << line.mid(start);
131 } else {
132 Q_ASSERT(false);
136 return line;
139 QStringList plainTextToHtml(const QString &plaintext, const FlowedFormat flowed)
143 // Processing:
144 // the plain text is split into lines
145 // leading quotemarks are counted and stripped
146 // next, the line is marked up (*bold*, /italic/, _underline_ and active link support)
147 // if the last line ended with a space, the result is appended, otherwise canonical quotemarkes are
148 // prepended and the line appended to the markup list (see http://tools.ietf.org/html/rfc3676)
149 // whenever the quote level grows, a <blockquote> is opened and closed when it shrinks
151 int quoteLevel = 0;
152 QStringList plain(plaintext.split('\n'));
153 QStringList markup;
154 // have we seen the signature separator and should we therefore explicitly close that block later?
155 bool shallCloseSignature = false;
156 for (int i = 0; i < plain.count(); ++i) {
157 QString &line = plain[i];
159 // ignore empty lines
160 if (line.isEmpty()) {
161 markup << line;
162 continue;
164 // determine quotelevel
165 int cQuoteLevel = 0;
166 if (line.at(0) == '>') {
167 int j = 1;
168 cQuoteLevel = 1;
169 while (j < line.length() && (line.at(j) == '>' || line.at(j) == ' '))
170 cQuoteLevel += line.at(j++) == '>';
172 // strip quotemarks
173 if (cQuoteLevel) {
174 static QRegExp quotemarks("^[>\\s]*");
175 line.remove(quotemarks);
177 // Escape the HTML entities
178 #if QT_VERSION < QT_VERSION_CHECK(5, 0, 0)
179 line = Qt::escape(line);
180 #else
181 line = line.toHtmlEscaped();
182 #endif
184 line = helperHtmlifySingleLine(line);
186 // if this is a non floating new line, prepend canonical quotemarks
187 if (cQuoteLevel && !(cQuoteLevel == quoteLevel && markup.last().endsWith(' '))) {
188 QString quotemarks("<span class=\"quotemarks\">");
189 for (int i = 0; i < cQuoteLevel; ++i)
190 quotemarks += "&gt;";
191 quotemarks += " </span>";
192 line.prepend(quotemarks);
195 if (cQuoteLevel < quoteLevel) {
196 // this line is ascending in the quoted depth
197 Q_ASSERT(!markup.isEmpty());
198 for (int i = 0; i < quoteLevel - cQuoteLevel; ++i) {
199 markup.last().append("</blockquote>");
201 } else if (cQuoteLevel > quoteLevel) {
202 // even more nested quotations
203 for (int i = 0; i < cQuoteLevel - quoteLevel; ++i) {
204 line.prepend("<blockquote>");
208 if (!shallCloseSignature && line == QLatin1String("-- ")) {
209 // Only recognize the first signature separator
210 shallCloseSignature = true;
211 line.prepend(QLatin1String("<span class=\"signature\">"));
214 // appaned or join the line
215 if (markup.isEmpty()) {
216 markup << line;
217 } else if (flowed == FORMAT_FLOWED) {
218 if ((quoteLevel == cQuoteLevel) && markup.last().endsWith(QLatin1Char(' ')) &&
219 markup.last() != QLatin1String("<span class=\"signature\">-- "))
220 markup.last().append(line);
221 else
222 markup << line;
223 } else {
224 markup << line;
227 quoteLevel = cQuoteLevel;
230 // close any open elements
231 QString closer;
232 if (shallCloseSignature)
233 closer = QLatin1String("</span>");
234 // close open blockquotes
235 // (bottom quoters, we're unfortunately -yet- not permittet to shoot them, so we need to deal with them ;-)
236 while (quoteLevel > 0) {
237 closer.append("</blockquote>");
238 --quoteLevel;
240 if (!closer.isEmpty()) {
241 Q_ASSERT(!markup.isEmpty());
242 markup.last().append(closer);
245 return markup;