tags. htmlParser_->parseHtmlString(html); - auto tagsMap = htmlParser_->getTags({TidyTag_A, TidyTag_DEL, TidyTag_PRE}); + auto tagsMap = htmlParser_->getTagsNodes({TidyTag_A, TidyTag_DEL, TidyTag_PRE}); - static QString styleTag(""); + static const QString styleTag(""); QString style; // Check for anytags. If there are any, we need to: @@ -89,11 +101,9 @@ MessageParser::parseMessage(const QString& messageId, // If the user has enabled link previews, then we need to generate the link preview. if (previewLinks) { // Get the first link in the message. - auto anchorTag = tagsMap[TidyTag_A].first(); - static QRegularExpression hrefRegex("href=\"(.*?)\""); - auto match = hrefRegex.match(anchorTag); - if (match.hasMatch()) { - Q_EMIT previewEngine_->parseLink(messageId, match.captured(1)); + auto href = htmlParser_->getNodeAttr(tagsMap[TidyTag_A].first(), TidyAttr_HREF); + if (!href.isEmpty()) { + Q_EMIT previewEngine_->parseLink(messageId, href); } } @@ -110,13 +120,13 @@ void MessageParser::preprocessMarkdown(QString& markdown) { // Match all instances of the linefeed character. - static QRegularExpression newlineRegex("\n"); + static const QRegularExpression newlineRegex("\\r?\\n"); static const QString newline = " \n"; // Replace all instances of the linefeed character with 2 spaces + a linefeed character // in order to force a line break in the HTML. // Note: we should only do this for non-code fenced blocks. - static QRegularExpression codeFenceRe("`{1,3}([\\s\\S]*?)`{1,3}"); + static const QRegularExpression codeFenceRe("`{1,3}([\\s\\S]*?)`{1,3}"); auto match = codeFenceRe.globalMatch(markdown); // If there are no code blocks, then we can just replace all linefeeds with 2 spaces @@ -132,7 +142,7 @@ MessageParser::preprocessMarkdown(QString& markdown) enum BlockType { Text, Code }; QVector> codeBlocks; - int start = 0; + qsizetype start = 0; while (match.hasNext()) { auto m = match.next(); auto nonCodelength = m.capturedStart() - start; @@ -158,27 +168,16 @@ MessageParser::preprocessMarkdown(QString& markdown) } } -// A callback function that will be called by the md4c library (`md_html`) to output the HTML. -static void -htmlChunkCb(const MD_CHAR* data, MD_SIZE data_size, void* userData) -{ - QByteArray* array = static_cast (userData); - if (data_size > 0) { - array->append(data, int(data_size)); - } -}; - QString MessageParser::markdownToHtml(const char* markdown) { static auto md_flags = MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOINDENTEDCODEBLOCKS | MD_FLAG_TASKLISTS | MD_FLAG_STRIKETHROUGH | MD_FLAG_UNDERLINE; - size_t data_len = strlen(markdown); + const size_t data_len = strlen(markdown); if (data_len <= 0) { return QString(); - } else { - QByteArray array; - int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0); - return result == 0 ? QString::fromUtf8(array) : QString(); } + QByteArray array; + const int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0); + return result == 0 ? QString::fromUtf8(array) : QString(); } diff --git a/src/app/previewengine.cpp b/src/app/previewengine.cpp index 5f4490f1..5e56fbb8 100644 --- a/src/app/previewengine.cpp +++ b/src/app/previewengine.cpp @@ -19,15 +19,6 @@ #include -static QString -getInnerHtml(const QString& tag) -{ - static const QRegularExpression re(">([^<]+)<"); - const auto match = re.match(tag); - return match.hasMatch() ? match.captured(1) : QString {}; -}; - -// Portable newline regex. const QRegularExpression PreviewEngine::newlineRe("\\r?\\n"); PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent) @@ -39,12 +30,11 @@ PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent) } QString -PreviewEngine::getTagContent(QList & tags, const QString& value) +PreviewEngine::getTagContent(const QList & tags, const QString& value) { Q_FOREACH (auto tag, tags) { const QRegularExpression re("(property|name)=\"(og:|twitter:|)" + value + "\".*?content=\"([^\"]+)\""); - const auto match = re.match(tag.remove(newlineRe)); if (match.hasMatch()) { return match.captured(3); @@ -54,45 +44,44 @@ PreviewEngine::getTagContent(QList & tags, const QString& value) } QString -PreviewEngine::getTitle(HtmlParser::TagInfoList& metaTags) +PreviewEngine::getTitle(const QList & metaTags) { // Try with opengraph/twitter props - QString title = getTagContent(metaTags[TidyTag_META], "title"); + QString title = getTagContent(metaTags, "title"); if (title.isEmpty()) { // Try with title tag - title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_TITLE)); + title = htmlParser_->getTagInnerHtml(TidyTag_TITLE); } if (title.isEmpty()) { // Try with h1 tag - title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_H1)); + title = htmlParser_->getTagInnerHtml(TidyTag_H1); } if (title.isEmpty()) { // Try with h2 tag - title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_H2)); + title = htmlParser_->getTagInnerHtml(TidyTag_H2); } return title; } QString -PreviewEngine::getDescription(HtmlParser::TagInfoList& metaTags) +PreviewEngine::getDescription(const QList & metaTags) { // Try with og/twitter props - QString d = getTagContent(metaTags[TidyTag_META], "description"); - if (d.isEmpty()) { // Try with first paragraph - d = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_P)); + QString desc = getTagContent(metaTags, "description"); + if (desc.isEmpty()) { // Try with first paragraph + desc = htmlParser_->getTagInnerHtml(TidyTag_P); } - return d; + return desc; } QString -PreviewEngine::getImage(HtmlParser::TagInfoList& metaTags) +PreviewEngine::getImage(const QList & metaTags) { // Try with og/twitter props - QString image = getTagContent(metaTags[TidyTag_META], "image"); + QString image = getTagContent(metaTags, "image"); if (image.isEmpty()) { // Try with href of link tag (rel="image_src") - auto tags = htmlParser_->getTags({TidyTag_LINK}); - Q_FOREACH (auto tag, tags[TidyTag_LINK]) { - static const QRegularExpression re("rel=\"image_src\".*?href=\"([^\"]+)\""); - const auto match = re.match(tag.remove(newlineRe)); - if (match.hasMatch()) { - return match.captured(1); + auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_LINK}); + Q_FOREACH (auto tag, tagsNodes[TidyTag_LINK]) { + QString href = htmlParser_->getNodeAttr(tag, TidyAttr_HREF); + if (!href.isEmpty()) { + return href; } } } @@ -104,7 +93,12 @@ PreviewEngine::onParseLink(const QString& messageId, const QString& link) { sendGetRequest(QUrl(link), [this, messageId, link](const QByteArray& html) { htmlParser_->parseHtmlString(html); - auto metaTags = htmlParser_->getTags({TidyTag_META}); + auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_META}); + auto metaTagNodes = tagsNodes[TidyTag_META]; + QList metaTags; + Q_FOREACH (auto tag, metaTagNodes) { + metaTags.append(htmlParser_->getNodeText(tag)); + } QString domain = QUrl(link).host(); if (domain.isEmpty()) { domain = link; diff --git a/src/app/previewengine.h b/src/app/previewengine.h index db14a968..2f0144ad 100644 --- a/src/app/previewengine.h +++ b/src/app/previewengine.h @@ -39,10 +39,10 @@ private: // An instance of HtmlParser used to parse HTML. HtmlParser* htmlParser_; - QString getTagContent(QList & tags, const QString& value); - QString getTitle(HtmlParser::TagInfoList& metaTags); - QString getDescription(HtmlParser::TagInfoList& metaTags); - QString getImage(HtmlParser::TagInfoList& metaTags); + QString getTagContent(const QList & tags, const QString& value); + QString getTitle(const QList & metaTags); + QString getDescription(const QList & metaTags); + QString getImage(const QList & metaTags); static const QRegularExpression newlineRe; }; diff --git a/tests/unittests/messageparser_unittest.cpp b/tests/unittests/messageparser_unittest.cpp index 4cbe16d9..8f91834b 100644 --- a/tests/unittests/messageparser_unittest.cpp +++ b/tests/unittests/messageparser_unittest.cpp @@ -117,7 +117,6 @@ TEST_F(MessageParserFixture, EndOfLineCharactersAreParsedCorrectly) auto backgroundColor = QColor::fromRgb(0, 0, 255); QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed); - QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady); // Parse a message with a link. globalEnv.messageParser->parseMessage("msgId_03", @@ -148,7 +147,6 @@ TEST_F(MessageParserFixture, FencedCodeIsParsedCorrectly) auto backgroundColor = QColor::fromRgb(0, 0, 255); QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed); - QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady); // Parse a message with a link. globalEnv.messageParser->parseMessage("msgId_04", @@ -169,3 +167,41 @@ TEST_F(MessageParserFixture, FencedCodeIsParsedCorrectly) " Text with
\n\n"); } + +/*! + * WHEN We parse a text body with a youtube link. + * THEN PreviewEngine::parseLink should be called with the correct arguments. + */ +TEST_F(MessageParserFixture, YoutubeLinkIsParsedCorrectly) +{ + auto url = "https://www.youtube.com/watch?v=1234567890"; + auto msg = "blah blah " + QString(url) + " blah blah"; + + QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed); + QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady); + + // Parse a message with a link. + globalEnv.messageParser->parseMessage("msgId_05", + msg, + true, + QColor::fromRgb(0, 0, 255), + QColor::fromRgb(0, 0, 255)); + + // Wait for the messageParsed signal which should be emitted once. + messageParsedSpy.wait(); + EXPECT_EQ(messageParsedSpy.count(), 1); + + QListcode\n
messageParserArguments = messageParsedSpy.takeFirst(); + EXPECT_TRUE(messageParserArguments.at(0).typeId() == qMetaTypeId ()); + + // Wait for the linkInfoReady signal which should be emitted once. + linkInfoReadySpy.wait(); + EXPECT_EQ(linkInfoReadySpy.count(), 1); + + QList linkInfoReadyArguments = linkInfoReadySpy.takeFirst(); + EXPECT_TRUE(linkInfoReadyArguments.at(0).typeId() == qMetaTypeId ()); + EXPECT_EQ(linkInfoReadyArguments.at(0).toString(), "msgId_05"); + EXPECT_TRUE(linkInfoReadyArguments.at(1).typeId() == qMetaTypeId ()); + QVariantMap linkInfo = linkInfoReadyArguments.at(1).toMap(); + EXPECT_EQ(linkInfo["url"].toString(), url); +}