From 0099cf69960c0f23c51c5d5235b58462c383a9f8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20Kundr=C3=A1t?= Date: Tue, 8 Jan 2013 20:43:59 +0100 Subject: [PATCH] HTML: rework the HTMLification of the plaintext mails - Support a fe more characters in the URLs (like parentheses, square brackets and stars) - Prevent recursive matches where the href attribute would get clobbered by *bold* recognition - Support HTTP URLs which embed e-mail addresses There's always an upper bound on what we can do, though -- this code will break on URLs like *http://example/*checkout*/foo*, of course. Sorry, there isn't much to be done here. --- src/Composer/PlainTextFormatter.cpp | 121 ++++++++++++++++++--- .../test_Composer_responses.cpp | 28 ++++- 2 files changed, 132 insertions(+), 17 deletions(-) diff --git a/src/Composer/PlainTextFormatter.cpp b/src/Composer/PlainTextFormatter.cpp index 6d31f5c7..733fe3c2 100644 --- a/src/Composer/PlainTextFormatter.cpp +++ b/src/Composer/PlainTextFormatter.cpp @@ -25,25 +25,120 @@ #if QT_VERSION < QT_VERSION_CHECK(5, 0, 0) #include #endif - #include "PlainTextFormatter.h" +#include // FIXME: remove me + namespace Composer { namespace Util { -QStringList plainTextToHtml(const QString &plaintext, const FlowedFormat flowed) +/** @short Helper for plainTextToHtml for applying the HTML formatting + +This funciton recognizes http and https links, e-mail addresses, *bold*, /italic/ and _underline_ text. +*/ +QString helperHtmlifySingleLine(QString line) { - static const QRegExp link("(" + // Static regexps for the engine construction + static const QRegExp linkRe("(" "https?://" // scheme prefix - "[;/?:@=&$\\-_.+!',0-9a-zA-Z%#~]+" // allowed characters + "[;/?:@=&$\\-_.+!',0-9a-zA-Z%#~\\[\\]\\(\\)*]+" // allowed characters "[/@=&$\\-_+'0-9a-zA-Z%#~]" // termination ")"); - static const QRegExp mail("([a-zA-Z0-9\\.\\-_\\+]+@[a-zA-Z0-9\\.\\-_]+)"); + static const QRegExp mailRe("([a-zA-Z0-9\\.\\-_\\+]+@[a-zA-Z0-9\\.\\-_]+)"); static QString intro("(^|[\\s\\(\\[\\{])"); static QString extro("($|[\\s\\),;.\\]\\}])"); - static const QRegExp bold(intro + "\\*(\\S*)\\*" + extro); - static const QRegExp italic(intro + "/(\\S*)/" + extro); - static const QRegExp underline(intro + "_(\\S*)_" + extro); +#define TROJITA_RE_BOLD "\\*(\\S*)\\*" +#define TROJITA_RE_ITALIC "/(\\S*)/" +#define TROJITA_RE_UNDERLINE "_(\\S*)_" + static const QRegExp boldRe(intro + TROJITA_RE_BOLD + extro); + static const QRegExp italicRe(intro + TROJITA_RE_ITALIC + extro); + static const QRegExp underlineRe(intro + TROJITA_RE_UNDERLINE + extro); + static const QRegExp anyFormattingRe(intro + "(" TROJITA_RE_BOLD "|" TROJITA_RE_ITALIC "|" TROJITA_RE_UNDERLINE ")" + extro); +#undef TROJITA_RE_BOLD +#undef TROJITA_RE_ITALIC +#undef TROJITA_RE_UNDERLINE + + // RE instances to work on + QRegExp link(linkRe), mail(mailRe), bold(boldRe), italic(italicRe), underline(underlineRe), anyFormatting(anyFormattingRe); + + // Now prepare markup *bold*, /italic/ and _underline_ and also turn links into HTML. + // This is a bit more involved because we want to apply the regular expressions in a certain order and also at the same + // time prevent the lower-priority regexps from clobbering the output of the previous stages. + int start = 0; + while (start < line.size()) { + qDebug() << "Main loop:" << start << line.size() << line; + // Find the position of the first thing which matches + int posLink = link.indexIn(line, start, QRegExp::CaretAtOffset); + if (posLink == -1) + posLink = line.size(); + + int posMail = mail.indexIn(line, start, QRegExp::CaretAtOffset); + if (posMail == -1) + posMail = line.size(); + + int posFormatting = anyFormatting.indexIn(line, start, QRegExp::CaretAtOffset); + if (posFormatting == -1) + posFormatting = line.size(); + + const int firstSpecial = qMin(qMin(posLink, posMail), posFormatting); + if (firstSpecial == line.size()) { + qDebug() << "nothing else"; + // No further matches for this line -> we're done + break; + } + qDebug() << "some RE has matched"; + + if (firstSpecial == posLink) { + QString replacement = QString::fromUtf8("%1").arg(link.cap(1)); + line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + link.matchedLength()); + start = firstSpecial + replacement.size(); + } else if (firstSpecial == posMail) { + QString replacement = QString::fromUtf8("%1").arg(mail.cap(1)); + line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + mail.matchedLength()); + start = firstSpecial + replacement.size(); + } else if (firstSpecial == posFormatting) { + // Careful here; the inner contents of the current match shall be formatted as well which is why we need recursion + QChar elementName; + QChar markupChar; + const QRegExp *re = 0; + + if (posFormatting == bold.indexIn(line, start, QRegExp::CaretAtOffset)) { + elementName = QLatin1Char('b'); + markupChar = QLatin1Char('*'); + re = &bold; + } else if (posFormatting == italic.indexIn(line, start, QRegExp::CaretAtOffset)) { + elementName = QLatin1Char('i'); + markupChar = QLatin1Char('/'); + re = &italic; + } else if (posFormatting == underline.indexIn(line, start, QRegExp::CaretAtOffset)) { + elementName = QLatin1Char('u'); + markupChar = QLatin1Char('_'); + re = &underline; + } + Q_ASSERT(re); + qDebug() << "Got formatting"; + qDebug() << " old line:" << line; + qDebug() << " at:" << line.mid(start); + qDebug() << " prefix:" << line.left(firstSpecial); + qDebug() << " suffix:" << line.mid(firstSpecial + re->matchedLength()); + QString replacement = QString::fromUtf8("%1<%2>%3%4%3%5") + .arg(re->cap(1), elementName, markupChar, helperHtmlifySingleLine(re->cap(2)), re->cap(3)); + + qDebug() << " replacement:" << replacement; + line = line.left(firstSpecial) + replacement + line.mid(firstSpecial + re->matchedLength()); + start = firstSpecial + replacement.size(); + qDebug() << " chunk to be still processed:" << line.mid(start); + } else { + Q_ASSERT(false); + } + } + + return line; +} + +QStringList plainTextToHtml(const QString &plaintext, const FlowedFormat flowed) +{ + // Processing: // the plain text is split into lines @@ -85,14 +180,8 @@ QStringList plainTextToHtml(const QString &plaintext, const FlowedFormat flowed) #else line = line.toHtmlEscaped(); #endif - // markup *bold*, /italic/, _underline_ and active links - line.replace(link, "\\1"); - line.replace(mail, "\\1"); -#define MARKUP(_item_) ""#_item_"" - line.replace(bold, "\\1" MARKUP(*) "\\2" MARKUP(*) "\\3"); - line.replace(italic, "\\1" MARKUP(/) "\\2" MARKUP(/) "\\3"); - line.replace(underline, "\\1" MARKUP(_) "\\2" MARKUP(_) "\\3"); -#undef MARKUP + + line = helperHtmlifySingleLine(line); // if this is a non floating new line, prepend canonical quotemarks if (cQuoteLevel && !(cQuoteLevel == quoteLevel && markup.last().endsWith(' '))) { diff --git a/tests/tests/test_Composer_responses/test_Composer_responses.cpp b/tests/tests/test_Composer_responses/test_Composer_responses.cpp index 12d5c4a3..ace7e4c8 100644 --- a/tests/tests/test_Composer_responses/test_Composer_responses.cpp +++ b/tests/tests/test_Composer_responses/test_Composer_responses.cpp @@ -151,7 +151,22 @@ void ComposerResponsesTest::testPlainTextFormatting_data() << QString("

ahoj & blesmrt

") << QString("<p>ahoj &amp; blesmrt</p>") << QString("<p>ahoj &amp; blesmrt</p>"); - QTest::newRow("basic-formatting") + QTest::newRow("basic-formatting-1") << QString("foo bar") << QString("foo bar") << QString("foo bar"); + QTest::newRow("basic-formatting-2") + << QString("ahoj *cau* nazdar") + << QString("ahoj *cau* nazdar") + << QString("ahoj *cau* nazdar"); + QTest::newRow("basic-formatting-3") + << QString("/ahoj/ *cau*") + << QString("/ahoj/ *cau*") + << QString("/ahoj/ *cau*"); + QTest::newRow("basic-formatting-4") + << QString("ahoj *_cau_* nazdar") + << QString("ahoj *_cau" + "_* nazdar") + << QString("ahoj *_cau" + "_* nazdar"); + QTest::newRow("basic-formatting-666") << QString("foo *bar* _baz_ /pwn/ yay foo@ @bar @ blesmrt") << QString("foo *bar* " "_baz_ " @@ -289,6 +304,17 @@ void ComposerResponsesTest::testPlainTextFormatting_data() "foo@example.org else\n" "test@domain"); + QTest::newRow("http-link-with-nested-mail-and-formatting-chars") + << QString::fromUtf8("http://example.org/meh/yay/?foo=test@example.org\n" + "http://example.org/(*checkout*)/pwn\n" + "*https://domain.org/yay*") + << QString::fromUtf8("http://example.org/meh/yay/?foo=test@example.org\n" + "http://example.org/(*checkout*)/pwn\n" + "*https://domain.org/yay*") + << QString::fromUtf8("http://example.org/meh/yay/?foo=test@example.org\n" + "http://example.org/(*checkout*)/pwn\n" + "*https://domain.org/yay*"); + } /** @short Test that the link recognition in plaintext -> HTML formatting recognizes the interesting links */ -- 2.11.4.GIT