From 162f5a20095c6937030d23ee03fb8f72c51eefa1 Mon Sep 17 00:00:00 2001 From: tobias Date: Sun, 6 Jun 2021 15:47:06 +0200 Subject: tdf#142669 Consider BOM on text encoding detection Return a flag if the auto detected text has a BOM. Save the flag in SwAsciiOptions so that BOM gets set correctly when file is written. Change-Id: I358c3ba243bc326a552c2dc24773c94f8319c700 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/116759 Tested-by: Jenkins Reviewed-by: Noel Grandin --- sw/qa/extras/txtexport/data/UTF16LEBOMCRLF.txt | Bin 0 -> 18 bytes sw/qa/extras/txtexport/data/UTF16LECRLF.txt | Bin 18 -> 16 bytes sw/qa/extras/txtexport/data/UTF8CRLF.txt | 2 + sw/qa/extras/txtexport/txtexport.cxx | 49 ++++++++++++++++--------- 4 files changed, 34 insertions(+), 17 deletions(-) create mode 100644 sw/qa/extras/txtexport/data/UTF16LEBOMCRLF.txt create mode 100644 sw/qa/extras/txtexport/data/UTF8CRLF.txt (limited to 'sw/qa/extras') diff --git a/sw/qa/extras/txtexport/data/UTF16LEBOMCRLF.txt b/sw/qa/extras/txtexport/data/UTF16LEBOMCRLF.txt new file mode 100644 index 000000000000..be232521eafc Binary files /dev/null and b/sw/qa/extras/txtexport/data/UTF16LEBOMCRLF.txt differ diff --git a/sw/qa/extras/txtexport/data/UTF16LECRLF.txt b/sw/qa/extras/txtexport/data/UTF16LECRLF.txt index be232521eafc..b74e964113de 100644 Binary files a/sw/qa/extras/txtexport/data/UTF16LECRLF.txt and b/sw/qa/extras/txtexport/data/UTF16LECRLF.txt differ diff --git a/sw/qa/extras/txtexport/data/UTF8CRLF.txt b/sw/qa/extras/txtexport/data/UTF8CRLF.txt new file mode 100644 index 000000000000..62d4d44677b6 --- /dev/null +++ b/sw/qa/extras/txtexport/data/UTF8CRLF.txt @@ -0,0 +1,2 @@ +フー +バー diff --git a/sw/qa/extras/txtexport/txtexport.cxx b/sw/qa/extras/txtexport/txtexport.cxx index a5f989cb6689..0e52f51a4e34 100644 --- a/sw/qa/extras/txtexport/txtexport.cxx +++ b/sw/qa/extras/txtexport/txtexport.cxx @@ -20,19 +20,25 @@ public: } protected: - OString readExportedFile() + template std::vector readMemoryStream() { SvMemoryStream aMemoryStream; SvFileStream aStream(maTempFile.GetURL(), StreamMode::READ); aStream.ReadStream(aMemoryStream); - const char* pData = static_cast(aMemoryStream.GetData()); + const T* pData = static_cast(aMemoryStream.GetData()); + return std::vector(pData, pData + aMemoryStream.GetSize()); + } + + OString readExportedFile() + { + std::vector aMemStream = readMemoryStream(); int offset = 0; - if (aMemoryStream.GetSize() > 2 && pData[0] == '\xEF' && pData[1] == '\xBB' - && pData[2] == '\xBF') + if (aMemStream.size() > 2 && aMemStream[0] == '\xEF' && aMemStream[1] == '\xBB' + && aMemStream[2] == '\xBF') offset = 3; - return OString(pData + offset, aMemoryStream.GetSize() - offset); + return OString(aMemStream.data() + offset, aMemStream.size() - offset); } }; @@ -64,25 +70,34 @@ DECLARE_TXTEXPORT_TEST(testBullets, "bullets.odt") CPPUNIT_ASSERT_EQUAL(aExpected, aData); } -DECLARE_TXTEXPORT_TEST(testTdf120574_utf8, "UTF8BOMCRLF.txt") +DECLARE_TXTEXPORT_TEST(testTdf120574_utf8bom, "UTF8BOMCRLF.txt") { - SvMemoryStream aMemoryStream; - SvFileStream aStream(maTempFile.GetURL(), StreamMode::READ); - aStream.ReadStream(aMemoryStream); - const char* pData = static_cast(aMemoryStream.GetData()); - OString aData(std::string_view(pData, aMemoryStream.GetSize())); + std::vector aMemStream = readMemoryStream(); + OString aData(std::string_view(aMemStream.data(), aMemStream.size())); CPPUNIT_ASSERT_EQUAL(OString(u8"\uFEFFフー\r\nバー\r\n"), aData); } -DECLARE_TXTEXPORT_TEST(testTdf120574_utf16le, "UTF16LECRLF.txt") +DECLARE_TXTEXPORT_TEST(testTdf120574_utf16lebom, "UTF16LEBOMCRLF.txt") { - SvMemoryStream aMemoryStream; - SvFileStream aStream(maTempFile.GetURL(), StreamMode::READ); - aStream.ReadStream(aMemoryStream); - const sal_Unicode* pData = static_cast(aMemoryStream.GetData()); - OUString aData(pData, aMemoryStream.GetSize() / sizeof(sal_Unicode)); + std::vector aMemStream = readMemoryStream(); + OUString aData(aMemStream.data(), aMemStream.size() / sizeof(sal_Unicode)); CPPUNIT_ASSERT_EQUAL(OUString(u"\uFEFFフー\r\nバー\r\n"), aData); } + +DECLARE_TXTEXPORT_TEST(testTdf142669_utf8, "UTF8CRLF.txt") +{ + std::vector aMemStream = readMemoryStream(); + OString aData(std::string_view(aMemStream.data(), aMemStream.size())); + CPPUNIT_ASSERT_EQUAL(OString(u8"フー\r\nバー\r\n"), aData); +} + +DECLARE_TXTEXPORT_TEST(testTdf142669_utf16le, "UTF16LECRLF.txt") +{ + std::vector aMemStream = readMemoryStream(); + OUString aData(aMemStream.data(), aMemStream.size() / sizeof(sal_Unicode)); + CPPUNIT_ASSERT_EQUAL(OUString(u"フー\r\nバー\r\n"), aData); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3