From 154ce53e2def2f340ebe05cbefc855fd81f0f9a3 Mon Sep 17 00:00:00 2001 From: Andreas Traczyk Date: Mon, 29 Jan 2024 12:17:55 -0500 Subject: [PATCH] link-preview: fix UTF-8 decoding The HTML string was being converted to a local 8-bit string prior to parsing with tidy. According to Qt documentation, this works on Unix systems as toLocal8Bit() is equivalent to toUtf8(), however, it may not on Windows, and may result in character replacement. Gitlab: #1536 Change-Id: I7aec98cffbd9d72771d1c489f52b38dbb3a1ed9f --- src/app/htmlparser.h | 9 ++- tests/CMakeLists.txt | 2 + tests/unittests/previewengine_unittest.cpp | 93 ++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 tests/unittests/previewengine_unittest.cpp diff --git a/src/app/htmlparser.h b/src/app/htmlparser.h index 0b35385e..76f636e0 100644 --- a/src/app/htmlparser.h +++ b/src/app/htmlparser.h @@ -49,7 +49,7 @@ public: bool parseHtmlString(const QString& html) { - return tidyParseString(doc_, html.toLocal8Bit().data()) >= 0; + return tidyParseString(doc_, html.toUtf8().data()) >= 0; } using TagNodeList = QMap>; @@ -86,11 +86,14 @@ public: // Extract the text value from a node. QString getNodeText(TidyNode node) { - TidyBuffer nodeValue = {}; + TidyBuffer nodeValue = {0}; if (!node || tidyNodeGetText(doc_, node, &nodeValue) != yes) { return QString(); } - QString result = QString::fromUtf8((char*) nodeValue.bp, nodeValue.size); + QString result; + if (nodeValue.bp && nodeValue.size > 0) { + result = QString::fromUtf8(reinterpret_cast(nodeValue.bp), nodeValue.size); + } tidyBufFree(&nodeValue); return result; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d50908cf..bc68fcf0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ set(QT_TESTING_MODULES QuickTest Test Widgets + HttpServer ) find_package(Qt${QT_VERSION_MAJOR} CONFIG REQUIRED ${QT_TESTING_MODULES}) foreach(MODULE ${QT_TESTING_MODULES}) @@ -84,6 +85,7 @@ set(UNIT_TESTS_SOURCE_FILES ${CMAKE_SOURCE_DIR}/tests/unittests/account_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/contact_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/messageparser_unittest.cpp + ${CMAKE_SOURCE_DIR}/tests/unittests/previewengine_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/globaltestenvironment.h ${COMMON_TESTS_SOURCES}) diff --git a/tests/unittests/previewengine_unittest.cpp b/tests/unittests/previewengine_unittest.cpp new file mode 100644 index 00000000..70808dfd --- /dev/null +++ b/tests/unittests/previewengine_unittest.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2024 Savoir-faire Linux Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "globaltestenvironment.h" + +#include + +class PreviewEngineFixture : public ::testing::Test +{ +public: + // Prepare unit test context. Called at + // prior each unit test execution + void SetUp() override { + server = new QHttpServer(); + // Setup a server that can return an HTML body. + server->listen(QHostAddress::LocalHost, 8000); + } + + // Close unit test context. Called + // after each unit test ending + void TearDown() override { + delete server; + } + + // An instance of QHttpServer used to create a server. + QHttpServer* server; +}; + +/*! + * WHEN We parse a link + * THEN The infoReady signal should be emitted once with the correct info + */ +TEST_F(PreviewEngineFixture, ParsingALinkEmitsInfoReadySignal) +{ + auto link = QString("http://localhost:8000/test"); + server->route("/test", [] () { + return QString(""); + }); + + QSignalSpy infoReadySpy(globalEnv.previewEngine.data(), &PreviewEngine::infoReady); + + Q_EMIT globalEnv.previewEngine->parseLink("msgId_01", link); + + // Wait for the infoReady signal which should be emitted once with the correct ID. + infoReadySpy.wait(); + EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once"; + + QList infoReadyArguments = infoReadySpy.takeFirst(); + EXPECT_TRUE(infoReadyArguments.at(0).typeId() == qMetaTypeId()); + EXPECT_EQ(infoReadyArguments.at(0).toString(), "msgId_01"); +} + +/*! + * WHEN We parse a link that has a description containing characters encoded using UTF-8 + * THEN The description should be parsed and match the original string + */ +TEST_F(PreviewEngineFixture, UTF8CharactersAreParsedCorrectly) +{ + auto link = QString("http://localhost:8000/test"); + server->route("/test", [] () { + return QString("parseLink("msgId_01", link); + + // Wait for the infoReady signal which should be emitted once. + infoReadySpy.wait(); + EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once"; + + QList infoReadyArguments = infoReadySpy.takeFirst(); + EXPECT_TRUE(infoReadyArguments.at(1).typeId() == qMetaTypeId()); + + // Check that the description is parsed correctly. + QVariantMap info = infoReadyArguments.at(1).toMap(); + EXPECT_TRUE(info.contains("description")); + EXPECT_EQ(info["description"].toString(), "Test de caractères Utf-8"); +}