/*
* Copyright (C) 2021-2024 Savoir-faire Linux Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include "previewengine.h"
#include
#include
const QRegularExpression PreviewEngine::newlineRe("\\r?\\n");
PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent)
: NetworkManager(cm, parent)
, htmlParser_(new HtmlParser(this))
{
// Run this object in a separate thread.
thread_ = new QThread();
moveToThread(thread_);
thread_->start();
// Connect on a queued connection to avoid blocking caller thread.
connect(this, &PreviewEngine::parseLink, this, &PreviewEngine::onParseLink, Qt::QueuedConnection);
}
PreviewEngine::~PreviewEngine()
{
thread_->quit();
thread_->wait();
}
QString
PreviewEngine::getTagContent(const QList& tags, const QString& value)
{
Q_FOREACH (auto tag, tags) {
const QRegularExpression re("(property|name)=\"(og:|twitter:|)" + value
+ "\".*?content=\"([^\"]+)\"");
const auto match = re.match(tag.remove(newlineRe));
if (match.hasMatch()) {
return match.captured(3);
}
}
return QString {};
}
QString
PreviewEngine::getTitle(const QList& metaTags)
{
// Try with opengraph/twitter props
QString title = getTagContent(metaTags, "title");
if (title.isEmpty()) { // Try with title tag
title = htmlParser_->getTagInnerHtml(TidyTag_TITLE);
}
if (title.isEmpty()) { // Try with h1 tag
title = htmlParser_->getTagInnerHtml(TidyTag_H1);
}
if (title.isEmpty()) { // Try with h2 tag
title = htmlParser_->getTagInnerHtml(TidyTag_H2);
}
return title;
}
QString
PreviewEngine::getDescription(const QList& metaTags)
{
// Try with og/twitter props
QString desc = getTagContent(metaTags, "description");
if (desc.isEmpty()) { // Try with first paragraph
desc = htmlParser_->getTagInnerHtml(TidyTag_P);
}
return desc;
}
QString
PreviewEngine::getImage(const QList& metaTags)
{
// Try with og/twitter props
QString image = getTagContent(metaTags, "image");
if (image.isEmpty()) { // Try with href of link tag (rel="image_src")
auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_LINK});
Q_FOREACH (auto tag, tagsNodes[TidyTag_LINK]) {
QString href = htmlParser_->getNodeAttr(tag, TidyAttr_HREF);
if (!href.isEmpty()) {
return href;
}
}
}
return image;
}
void
PreviewEngine::onParseLink(const QString& messageId, const QString& link)
{
sendGetRequest(QUrl(link), [this, messageId, link](const QByteArray& html) {
htmlParser_->parseHtmlString(html);
auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_META});
auto metaTagNodes = tagsNodes[TidyTag_META];
QList metaTags;
Q_FOREACH (auto tag, metaTagNodes) {
metaTags.append(htmlParser_->getNodeText(tag));
}
QString domain = QUrl(link).host();
if (domain.isEmpty()) {
domain = link;
}
Q_EMIT infoReady(messageId,
{{"title", getTitle(metaTags)},
{"description", getDescription(metaTags)},
{"image", getImage(metaTags)},
{"url", link},
{"domain", domain}});
});
}