1
0
Fork 0
mirror of https://git.jami.net/savoirfairelinux/jami-client-qt.git synced 2025-03-28 14:56:19 +01:00

messageparser: fix link recognition

The attempted fix in commit 65d3bef was based on the incorrect
assumption that md4c systematically fails to detect links that contain
at least one +, - or _. This commit removes the postprocessing step
added in 65d3bef and instead patches md4c to directly fix its link
recognition algorithm.

GitLab: #1903
GitLab: #1964
Change-Id: Ic1d6302e3250390af66611a16715a6397578abb5
This commit is contained in:
François-Simon Fauteux-Chapleau 2025-03-18 13:35:55 -04:00
parent 04a1544d56
commit 6fac40340b
5 changed files with 40 additions and 58 deletions

2
.gitmodules vendored
View file

@ -21,7 +21,7 @@
ignore = dirty
[submodule "3rdparty/md4c"]
path = 3rdparty/md4c
url = https://github.com/mity/md4c.git
url = https://github.com/fsimonfc/md4c.git
ignore = dirty
[submodule "3rdparty/tidy-html5"]
path = 3rdparty/tidy-html5

2
3rdparty/md4c vendored

@ -1 +1 @@
Subproject commit ad8d41127b94e2f0633ad14b3787f0bc4613a689
Subproject commit 635f29673582c55fb0b897b2b03236dd9f86c1ed

View file

@ -167,38 +167,6 @@ MessageParser::preprocessMarkdown(QString& markdown)
markdown += block.second;
}
}
const QRegularExpression linkRegex(R"((http[s]?://[^\s]*[\+\-_][^\s]*|www\.[^\s]*[\+\-_][^\s]*))");
const QString anchorTagTemplate("<a href=\"%1\">%1</a>");
QByteArray
MessageParser::postprocessHTML(const QByteArray& html)
{
// HACK : https://github.com/mity/md4c/issues/251
// Check for any links that haven't been properly detected by md4c.
// We can do this by checking for any text that starts with "http" or "www" and contains a + or
// a -. If we find any, we insert a <a> tag around the link.
// This function could become obsolete if the md4c library fixes the issue it's addressing.
QByteArray processedHtml = html;
QRegularExpressionMatchIterator it = linkRegex.globalMatch(html);
int offset = 0;
while (it.hasNext()) {
QRegularExpressionMatch match = it.next();
QString link = match.captured(0);
int pTagIndex = link.indexOf("</p>");
QString anchorTag = anchorTagTemplate.arg(pTagIndex != -1 ? link.left(pTagIndex) : link);
if (pTagIndex != -1) {
anchorTag += "</p>";
}
processedHtml.replace(match.capturedStart(0) + offset,
match.capturedLength(0),
anchorTag.toUtf8());
offset += anchorTag.toUtf8().length() - match.capturedLength(0);
}
return processedHtml;
}
QString
MessageParser::markdownToHtml(const char* markdown)
@ -211,6 +179,5 @@ MessageParser::markdownToHtml(const char* markdown)
}
QByteArray array;
const int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0);
array = postprocessHTML(array);
return result == 0 ? QString::fromUtf8(array) : QString();
}

View file

@ -60,9 +60,6 @@ private:
// Preprocess the markdown message (e.g. handle line breaks).
void preprocessMarkdown(QString& markdown);
// Postprocess HTML to account for issues in md4c
QByteArray postprocessHTML(const QByteArray& html);
// Transform markdown syntax into HTML.
QString markdownToHtml(const char* markdown);

View file

@ -108,8 +108,8 @@ TEST_F(MessageParserFixture, ALinkIsParsedCorrectly)
}
/*!
* WHEN We parse a text body with a link and a + or a -.
* THEN The HTML body should be returned correctly including the link even if MD4C doesn't detect it.
* WHEN We parse a text body with a link containing special characters ( +, -, _, etc.)
* THEN The HTML body should be returned correctly including the link.
*/
TEST_F(MessageParserFixture, AComplexLinkIsParsedCorrectly)
{
@ -119,27 +119,45 @@ TEST_F(MessageParserFixture, AComplexLinkIsParsedCorrectly)
QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed);
QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady);
// Parse a message with a link containing a + or a -.
globalEnv.messageParser->parseMessage("msgId_03",
"https://review.jami.net/q/status:open+-is:wip",
false,
linkColor,
backgroundColor);
struct TestCase {
QString link;
QString expectedMessage;
};
static const std::vector<TestCase> testCases = {
{"https://review.jami.net/q/status:open+-is:wip",
"<style>a{color:#0000ff;}</style><p><a href=\"https://review.jami.net/q/status:open+-is:wip\">https://review.jami.net/q/status:open+-is:wip</a></p>\n"},
{"https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907",
"<style>a{color:#0000ff;}</style><p><a href=\"https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907\">https://git.jami.net/savoirfairelinux/jami-client-qt/-/issues/1885#note_53907</a></p>\n"},
{"https://www.youtube.com/watch?v=_tEL8bJ7yqU",
"<style>a{color:#0000ff;}</style><p><a href=\"https://www.youtube.com/watch?v=_tEL8bJ7yqU\">https://www.youtube.com/watch?v=_tEL8bJ7yqU</a></p>\n"},
{"https://www.youtube.com/watch?v=d_z77s7nifU",
"<style>a{color:#0000ff;}</style><p><a href=\"https://www.youtube.com/watch?v=d_z77s7nifU\">https://www.youtube.com/watch?v=d_z77s7nifU</a></p>\n"},
{"http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3",
"<style>a{color:#0000ff;}</style><p><a href=\"http://www.example.com/foo.php?bar%5B%5D=1&amp;bar%5B%5D=2&amp;bar%5B%5D=3\">http://www.example.com/foo.php?bar[]=1&amp;bar[]=2&amp;bar[]=3</a></p>\n"},
};
// Wait for the messageParsed signal which should be emitted once.
messageParsedSpy.wait();
EXPECT_EQ(messageParsedSpy.count(), 1);
static const QString expectedMessageTemplate = " <style>a{color:#0000ff;}</style><p><a href=\"%1\">%1</a></p>\n";
QList<QVariant> messageParserArguments = messageParsedSpy.takeFirst();
EXPECT_TRUE(messageParserArguments.at(0).typeId() == qMetaTypeId<QString>());
EXPECT_EQ(messageParserArguments.at(0).toString(), "msgId_03");
EXPECT_TRUE(messageParserArguments.at(1).typeId() == qMetaTypeId<QString>());
EXPECT_EQ(messageParserArguments.at(1).toString(),
"<style>a{color:#0000ff;}</style><p><a "
"href=\"https://review.jami.net/q/status:open+-is:wip\">https://review.jami.net/q/"
"status:open+-is:wip</a></p>\n");
for (const auto& testCase : testCases) {
globalEnv.messageParser->parseMessage("msgId",
testCase.link,
false,
linkColor,
backgroundColor);
// The rest of the link info is not tested here.
// Wait for the messageParsed signal which should be emitted once.
messageParsedSpy.wait();
EXPECT_EQ(messageParsedSpy.count(), 1);
QList<QVariant> messageParserArguments = messageParsedSpy.takeFirst();
EXPECT_TRUE(messageParserArguments.at(1).typeId() == qMetaTypeId<QString>());
QString result = messageParserArguments.at(1).toString();
QString expected = testCase.expectedMessage;
// Convert to std::string to get better error messages in case of test
// failure (GoogleTest can't print QStrings).
EXPECT_EQ(result.toStdString(), expected.toStdString());
}
}
/*!