mirror of
https://git.jami.net/savoirfairelinux/jami-client-qt.git
synced 2025-03-28 14:56:19 +01:00
messageparser: fix link recognition
The attempted fix in commit65d3bef
was based on the incorrect assumption that md4c systematically fails to detect links that contain at least one +, - or _. This commit removes the postprocessing step added in65d3bef
and instead patches md4c to directly fix its link recognition algorithm. GitLab: #1903 GitLab: #1964 Change-Id: Ic1d6302e3250390af66611a16715a6397578abb5
This commit is contained in:
parent
04a1544d56
commit
6fac40340b
5 changed files with 40 additions and 58 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -21,7 +21,7 @@
|
|||
ignore = dirty
|
||||
[submodule "3rdparty/md4c"]
|
||||
path = 3rdparty/md4c
|
||||
url = https://github.com/mity/md4c.git
|
||||
url = https://github.com/fsimonfc/md4c.git
|
||||
ignore = dirty
|
||||
[submodule "3rdparty/tidy-html5"]
|
||||
path = 3rdparty/tidy-html5
|
||||
|
|
2
3rdparty/md4c
vendored
2
3rdparty/md4c
vendored
|
@ -1 +1 @@
|
|||
Subproject commit ad8d41127b94e2f0633ad14b3787f0bc4613a689
|
||||
Subproject commit 635f29673582c55fb0b897b2b03236dd9f86c1ed
|
|
@ -167,38 +167,6 @@ MessageParser::preprocessMarkdown(QString& markdown)
|
|||
markdown += block.second;
|
||||
}
|
||||
}
|
||||
const QRegularExpression linkRegex(R"((http[s]?://[^\s]*[\+\-_][^\s]*|www\.[^\s]*[\+\-_][^\s]*))");
|
||||
const QString anchorTagTemplate("<a href=\"%1\">%1</a>");
|
||||
|
||||
QByteArray
|
||||
MessageParser::postprocessHTML(const QByteArray& html)
|
||||
{
|
||||
// HACK : https://github.com/mity/md4c/issues/251
|
||||
// Check for any links that haven't been properly detected by md4c.
|
||||
// We can do this by checking for any text that starts with "http" or "www" and contains a + or
|
||||
// a -. If we find any, we insert a <a> tag around the link.
|
||||
// This function could become obsolete if the md4c library fixes the issue it's addressing.
|
||||
|
||||
QByteArray processedHtml = html;
|
||||
QRegularExpressionMatchIterator it = linkRegex.globalMatch(html);
|
||||
|
||||
int offset = 0;
|
||||
while (it.hasNext()) {
|
||||
QRegularExpressionMatch match = it.next();
|
||||
QString link = match.captured(0);
|
||||
int pTagIndex = link.indexOf("</p>");
|
||||
QString anchorTag = anchorTagTemplate.arg(pTagIndex != -1 ? link.left(pTagIndex) : link);
|
||||
if (pTagIndex != -1) {
|
||||
anchorTag += "</p>";
|
||||
}
|
||||
processedHtml.replace(match.capturedStart(0) + offset,
|
||||
match.capturedLength(0),
|
||||
anchorTag.toUtf8());
|
||||
offset += anchorTag.toUtf8().length() - match.capturedLength(0);
|
||||
}
|
||||
|
||||
return processedHtml;
|
||||
}
|
||||
|
||||
QString
|
||||
MessageParser::markdownToHtml(const char* markdown)
|
||||
|
@ -211,6 +179,5 @@ MessageParser::markdownToHtml(const char* markdown)
|
|||
}
|
||||
QByteArray array;
|
||||
const int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0);
|
||||
array = postprocessHTML(array);
|
||||
return result == 0 ? QString::fromUtf8(array) : QString();
|
||||
}
|
||||
|
|
|
@ -60,9 +60,6 @@ private:
|
|||
// Preprocess the markdown message (e.g. handle line breaks).
|
||||
void preprocessMarkdown(QString& markdown);
|
||||
|
||||
// Postprocess HTML to account for issues in md4c
|
||||
QByteArray postprocessHTML(const QByteArray& html);
|
||||
|
||||
// Transform markdown syntax into HTML.
|
||||
QString markdownToHtml(const char* markdown);
|
||||
|
||||
|
|
|
@ -108,8 +108,8 @@ TEST_F(MessageParserFixture, ALinkIsParsedCorrectly)
|
|||
}
|
||||
|
||||
/*!
|
||||
* WHEN We parse a text body with a link and a + or a -.
|
||||
* THEN The HTML body should be returned correctly including the link even if MD4C doesn't detect it.
|
||||
* WHEN We parse a text body with a link containing special characters ( +, -, _, etc.)
|
||||
* THEN The HTML body should be returned correctly including the link.
|
||||
*/
|
||||
TEST_F(MessageParserFixture, AComplexLinkIsParsedCorrectly)
|
||||
{
|
||||
|
@ -119,27 +119,45 @@ TEST_F(MessageParserFixture, AComplexLinkIsParsedCorrectly)
|
|||
QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed);
|
||||
QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady);
|
||||
|
||||
// Parse a message with a link containing a + or a -.
|
||||
globalEnv.messageParser->parseMessage("msgId_03",
|
||||
"https://review.jami.net/q/status:open+-is:wip",
|
||||
false,
|
||||
linkColor,
|
||||
backgroundColor);
|
||||
struct TestCase {
|
||||
QString link;
|
||||
QString expectedMessage;
|
||||
};
|
||||
static const std::vector<TestCase> testCases = {
|
||||
{"https://review.jami.net/q/status:open+-is:wip",
|
||||
"<style>a{color:#0000ff;}</style><p><a href=\"https://review.jami.net/q/status:open+-is:wip\">https://review.jami.net/q/status:open+-is:wip</a></p>\n"},
|
||||
{"https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907",
|
||||
"<style>a{color:#0000ff;}</style><p><a href=\"https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907\">https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907</a></p>\n"},
|
||||
{"https://www.youtube.com/watch?v=_tEL8bJ7yqU",
|
||||
"<style>a{color:#0000ff;}</style><p><a href=\"https://www.youtube.com/watch?v=_tEL8bJ7yqU\">https://www.youtube.com/watch?v=_tEL8bJ7yqU</a></p>\n"},
|
||||
{"https://www.youtube.com/watch?v=d_z77s7nifU",
|
||||
"<style>a{color:#0000ff;}</style><p><a href=\"https://www.youtube.com/watch?v=d_z77s7nifU\">https://www.youtube.com/watch?v=d_z77s7nifU</a></p>\n"},
|
||||
{"http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3",
|
||||
"<style>a{color:#0000ff;}</style><p><a href=\"http://www.example.com/foo.php?bar%5B%5D=1&bar%5B%5D=2&bar%5B%5D=3\">http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3</a></p>\n"},
|
||||
};
|
||||
|
||||
// Wait for the messageParsed signal which should be emitted once.
|
||||
messageParsedSpy.wait();
|
||||
EXPECT_EQ(messageParsedSpy.count(), 1);
|
||||
static const QString expectedMessageTemplate = " <style>a{color:#0000ff;}</style><p><a href=\"%1\">%1</a></p>\n";
|
||||
|
||||
QList<QVariant> messageParserArguments = messageParsedSpy.takeFirst();
|
||||
EXPECT_TRUE(messageParserArguments.at(0).typeId() == qMetaTypeId<QString>());
|
||||
EXPECT_EQ(messageParserArguments.at(0).toString(), "msgId_03");
|
||||
EXPECT_TRUE(messageParserArguments.at(1).typeId() == qMetaTypeId<QString>());
|
||||
EXPECT_EQ(messageParserArguments.at(1).toString(),
|
||||
"<style>a{color:#0000ff;}</style><p><a "
|
||||
"href=\"https://review.jami.net/q/status:open+-is:wip\">https://review.jami.net/q/"
|
||||
"status:open+-is:wip</a></p>\n");
|
||||
for (const auto& testCase : testCases) {
|
||||
globalEnv.messageParser->parseMessage("msgId",
|
||||
testCase.link,
|
||||
false,
|
||||
linkColor,
|
||||
backgroundColor);
|
||||
|
||||
// The rest of the link info is not tested here.
|
||||
// Wait for the messageParsed signal which should be emitted once.
|
||||
messageParsedSpy.wait();
|
||||
EXPECT_EQ(messageParsedSpy.count(), 1);
|
||||
|
||||
QList<QVariant> messageParserArguments = messageParsedSpy.takeFirst();
|
||||
EXPECT_TRUE(messageParserArguments.at(1).typeId() == qMetaTypeId<QString>());
|
||||
|
||||
QString result = messageParserArguments.at(1).toString();
|
||||
QString expected = testCase.expectedMessage;
|
||||
// Convert to std::string to get better error messages in case of test
|
||||
// failure (GoogleTest can't print QStrings).
|
||||
EXPECT_EQ(result.toStdString(), expected.toStdString());
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
|
|
Loading…
Add table
Reference in a new issue