diff options
author | Sune Vuorela <sune@vuorela.dk> | 2025-01-15 16:57:52 +0100 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2025-01-15 22:28:36 +0000 |
commit | 2ed82df7fbd398410a5a42005958aa70699d6b4c (patch) | |
tree | f761a46cd1c09e0d3a0e7bf2f911ad7837d36786 | |
parent | 5a3689a023718647257b4c32542b06bea4ea6872 (diff) |
Simplify utf8toutf16 functions
-rw-r--r-- | poppler/UTF.cc | 54 | ||||
-rw-r--r-- | poppler/UTF.h | 10 | ||||
-rw-r--r-- | qt5/tests/check_utf_conversion.cpp | 15 | ||||
-rw-r--r-- | qt6/tests/check_utf_conversion.cpp | 15 | ||||
-rw-r--r-- | utils/Win32Console.cc | 5 |
5 files changed, 30 insertions, 69 deletions
diff --git a/poppler/UTF.cc b/poppler/UTF.cc index 6f67dccf..38fb0a28 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -430,14 +430,14 @@ int utf16CountUtf8Bytes(const uint16_t *utf16) return count; } -int utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8) +std::string utf16ToUtf8(const uint16_t *utf16, int maxUtf16) { uint32_t codepoint = 0; uint32_t state = 0; int nIn = 0; - int nOut = 0; - char *p = utf8; - while (*utf16 && nIn < maxUtf16 && nOut < maxUtf8 - 1) { + char p[4]; + std::string utf8; + while (*utf16 && nIn < maxUtf16) { decodeUtf16(&state, &codepoint, *utf16); if (state == UTF16_ACCEPT || state == UTF16_REJECT) { if (state == UTF16_REJECT || codepoint > UCS4_MAX) { @@ -445,38 +445,17 @@ int utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8) state = 0; } - int bufSize = maxUtf8 - nOut; - int count = mapUTF8(codepoint, p, bufSize); - p += count; - nOut += count; + int count = mapUTF8(codepoint, p, 4); + utf8.append(std::string_view(p, count)); } utf16++; nIn++; } // replace any trailing bytes too short for a valid UTF-8 with a replacement char - if (state != UTF16_ACCEPT && state != UTF16_REJECT && nOut < maxUtf8 - 1) { - int bufSize = maxUtf8 - nOut; - int count = mapUTF8(REPLACEMENT_CHAR, p, bufSize); - p += count; - nOut += count; - nOut++; - } - if (nOut > maxUtf8 - 1) { - nOut = maxUtf8 - 1; + if (state != UTF16_ACCEPT && state != UTF16_REJECT) { + int count = mapUTF8(REPLACEMENT_CHAR, p, 4); + utf8.append(std::string_view(p, count)); } - utf8[nOut] = 0; - return nOut; -} - -// Allocate utf8 string and convert utf16 into it. -char *utf16ToUtf8(const uint16_t *utf16, int *len) -{ - const int n = utf16CountUtf8Bytes(utf16); - if (len) { - *len = n; - } - char *utf8 = (char *)gmalloc(n + 1); - utf16ToUtf8(utf16, INT_MAX, utf8, n + 1); return utf8; } @@ -539,28 +518,25 @@ std::string TextStringToUtf8(const std::string &textStr) { int i, len; const char *s; - char *utf8; + std::string utf8; len = textStr.size(); s = textStr.c_str(); if (hasUnicodeByteOrderMark(textStr)) { - uint16_t *utf16; + std::vector<uint16_t> utf16; len = len / 2 - 1; - utf16 = new uint16_t[len + 1]; + utf16.resize(len + 1); for (i = 0; i < len; i++) { utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff); } utf16[i] = 0; - utf8 = utf16ToUtf8(utf16); - delete[] utf16; + utf8 = utf16ToUtf8(utf16.data(), utf16.size()); } else { - utf8 = (char *)gmalloc(len + 1); + utf8.resize(len + 1); for (i = 0; i < len; i++) { utf8[i] = pdfDocEncoding[s[i] & 0xff]; } utf8[i] = 0; } - std::string utf8_string(utf8); - gfree(utf8); - return utf8_string; + return utf8; } diff --git a/poppler/UTF.h b/poppler/UTF.h index f09ce1cf..4486560f 100644 --- a/poppler/UTF.h +++ b/poppler/UTF.h @@ -134,14 +134,8 @@ int POPPLER_PRIVATE_EXPORT utf16CountUtf8Bytes(const uint16_t *utf16); // maxUtf16 is set the the exact number of code units to convert. // maxUtf16 - Maximum number of UTF-16 code units to convert. Conversion stops // when either this count is reached or a null is encountered. -// utf8 - Output buffer to write the UTF-8 string to. Output will always be -// null terminated. -// maxUtf8 - Maximum size of the output buffer including space for null. -// Returns number of UTF-8 bytes written (excluding NULL). -int POPPLER_PRIVATE_EXPORT utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8); - -// Allocate utf8 string and convert utf16 into it. -char POPPLER_PRIVATE_EXPORT *utf16ToUtf8(const uint16_t *utf16, int *len = nullptr); +// Returns utf8 string. +std::string POPPLER_PRIVATE_EXPORT utf16ToUtf8(const uint16_t *utf16, int maxUtf16 = INT_MAX); // Convert a UCS-4 string to pure ASCII (7bit) // in - UCS-4 string bytes diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp index 88a32359..05ef519e 100644 --- a/qt5/tests/check_utf_conversion.cpp +++ b/qt5/tests/check_utf_conversion.cpp @@ -79,14 +79,13 @@ void TestUTFConversion::testUTF_data() void TestUTFConversion::testUTF() { - char utf8Buf[1000]; - char *utf8String; + std::string utf8String; uint16_t utf16Buf[1000]; uint16_t *utf16String; int len; QFETCH(QString, s); - char *str = strdup(s.toUtf8().constData()); + QByteArray str = s.toUtf8(); // UTF-8 to UTF-16 @@ -111,17 +110,13 @@ void TestUTFConversion::testUTF() len = utf16CountUtf8Bytes(s.utf16()); QCOMPARE(len, (int)strlen(str)); - Q_ASSERT(len < (int)sizeof(utf8Buf)); // if this fails, make utf8Buf larger - len = utf16ToUtf8(s.utf16(), INT_MAX, utf8Buf, sizeof(utf8Buf)); - QVERIFY(compare(utf8Buf, str)); + utf8String = utf16ToUtf8(s.utf16(), INT_MAX); + QVERIFY(compare(utf8String.c_str(), str)); QCOMPARE(len, (int)strlen(str)); utf8String = utf16ToUtf8(s.utf16()); - QVERIFY(compare(utf8String, str)); - free(utf8String); - - free(str); + QVERIFY(compare(utf8String.c_str(), str)); } void TestUTFConversion::testUnicodeToAscii7() diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp index 62ce65eb..53fe4bec 100644 --- a/qt6/tests/check_utf_conversion.cpp +++ b/qt6/tests/check_utf_conversion.cpp @@ -77,14 +77,13 @@ void TestUTFConversion::testUTF_data() void TestUTFConversion::testUTF() { - char utf8Buf[1000]; - char *utf8String; + std::string utf8String; uint16_t utf16Buf[1000]; uint16_t *utf16String; int len; QFETCH(QString, s); - char *str = strdup(s.toUtf8().constData()); + QByteArray str = s.toUtf8().constData(); // UTF-8 to UTF-16 @@ -109,17 +108,13 @@ void TestUTFConversion::testUTF() len = utf16CountUtf8Bytes(s.utf16()); QCOMPARE(len, (int)strlen(str)); - Q_ASSERT(len < (int)sizeof(utf8Buf)); // if this fails, make utf8Buf larger - len = utf16ToUtf8(s.utf16(), INT_MAX, utf8Buf, sizeof(utf8Buf)); - QVERIFY(compare(utf8Buf, str)); + utf8String = utf16ToUtf8(s.utf16(), INT_MAX); + QVERIFY(compare(utf8String.c_str(), str)); QCOMPARE(len, (int)strlen(str)); utf8String = utf16ToUtf8(s.utf16()); - QVERIFY(compare(utf8String, str)); - free(utf8String); - - free(str); + QVERIFY(compare(utf8String.c_str(), str)); } void TestUTFConversion::testUnicodeToAscii7() diff --git a/utils/Win32Console.cc b/utils/Win32Console.cc index deac2458..181d3bb7 100644 --- a/utils/Win32Console.cc +++ b/utils/Win32Console.cc @@ -120,7 +120,8 @@ Win32Console::Win32Console(int *argc, char **argv[]) argList = new char *[numArgs]; privateArgList = new char *[numArgs]; for (int i = 0; i < numArgs; i++) { - argList[i] = utf16ToUtf8((uint16_t *)(wargv[i])); + std::string arg = utf16ToUtf8((uint16_t *)(wargv[i])); + argList[i] = strdup(arg.c_str()); // parseArgs will rearrange the argv list so we keep our own copy // to use for freeing all the strings privateArgList[i] = argList[i]; @@ -155,7 +156,7 @@ Win32Console::~Win32Console() flush(true); if (argList) { for (int i = 0; i < numArgs; i++) - gfree(privateArgList[i]); + free(privateArgList[i]); delete[] argList; delete[] privateArgList; } |