summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSune Vuorela <sune@vuorela.dk>2025-01-15 16:57:52 +0100
committerAlbert Astals Cid <aacid@kde.org>2025-01-15 22:28:36 +0000
commit2ed82df7fbd398410a5a42005958aa70699d6b4c (patch)
treef761a46cd1c09e0d3a0e7bf2f911ad7837d36786
parent5a3689a023718647257b4c32542b06bea4ea6872 (diff)
Simplify utf8toutf16 functions
-rw-r--r--poppler/UTF.cc54
-rw-r--r--poppler/UTF.h10
-rw-r--r--qt5/tests/check_utf_conversion.cpp15
-rw-r--r--qt6/tests/check_utf_conversion.cpp15
-rw-r--r--utils/Win32Console.cc5
5 files changed, 30 insertions, 69 deletions
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index 6f67dccf..38fb0a28 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -430,14 +430,14 @@ int utf16CountUtf8Bytes(const uint16_t *utf16)
return count;
}
-int utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8)
+std::string utf16ToUtf8(const uint16_t *utf16, int maxUtf16)
{
uint32_t codepoint = 0;
uint32_t state = 0;
int nIn = 0;
- int nOut = 0;
- char *p = utf8;
- while (*utf16 && nIn < maxUtf16 && nOut < maxUtf8 - 1) {
+ char p[4];
+ std::string utf8;
+ while (*utf16 && nIn < maxUtf16) {
decodeUtf16(&state, &codepoint, *utf16);
if (state == UTF16_ACCEPT || state == UTF16_REJECT) {
if (state == UTF16_REJECT || codepoint > UCS4_MAX) {
@@ -445,38 +445,17 @@ int utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8)
state = 0;
}
- int bufSize = maxUtf8 - nOut;
- int count = mapUTF8(codepoint, p, bufSize);
- p += count;
- nOut += count;
+ int count = mapUTF8(codepoint, p, 4);
+ utf8.append(std::string_view(p, count));
}
utf16++;
nIn++;
}
// replace any trailing bytes too short for a valid UTF-8 with a replacement char
- if (state != UTF16_ACCEPT && state != UTF16_REJECT && nOut < maxUtf8 - 1) {
- int bufSize = maxUtf8 - nOut;
- int count = mapUTF8(REPLACEMENT_CHAR, p, bufSize);
- p += count;
- nOut += count;
- nOut++;
- }
- if (nOut > maxUtf8 - 1) {
- nOut = maxUtf8 - 1;
+ if (state != UTF16_ACCEPT && state != UTF16_REJECT) {
+ int count = mapUTF8(REPLACEMENT_CHAR, p, 4);
+ utf8.append(std::string_view(p, count));
}
- utf8[nOut] = 0;
- return nOut;
-}
-
-// Allocate utf8 string and convert utf16 into it.
-char *utf16ToUtf8(const uint16_t *utf16, int *len)
-{
- const int n = utf16CountUtf8Bytes(utf16);
- if (len) {
- *len = n;
- }
- char *utf8 = (char *)gmalloc(n + 1);
- utf16ToUtf8(utf16, INT_MAX, utf8, n + 1);
return utf8;
}
@@ -539,28 +518,25 @@ std::string TextStringToUtf8(const std::string &textStr)
{
int i, len;
const char *s;
- char *utf8;
+ std::string utf8;
len = textStr.size();
s = textStr.c_str();
if (hasUnicodeByteOrderMark(textStr)) {
- uint16_t *utf16;
+ std::vector<uint16_t> utf16;
len = len / 2 - 1;
- utf16 = new uint16_t[len + 1];
+ utf16.resize(len + 1);
for (i = 0; i < len; i++) {
utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
}
utf16[i] = 0;
- utf8 = utf16ToUtf8(utf16);
- delete[] utf16;
+ utf8 = utf16ToUtf8(utf16.data(), utf16.size());
} else {
- utf8 = (char *)gmalloc(len + 1);
+ utf8.resize(len + 1);
for (i = 0; i < len; i++) {
utf8[i] = pdfDocEncoding[s[i] & 0xff];
}
utf8[i] = 0;
}
- std::string utf8_string(utf8);
- gfree(utf8);
- return utf8_string;
+ return utf8;
}
diff --git a/poppler/UTF.h b/poppler/UTF.h
index f09ce1cf..4486560f 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -134,14 +134,8 @@ int POPPLER_PRIVATE_EXPORT utf16CountUtf8Bytes(const uint16_t *utf16);
// maxUtf16 is set the the exact number of code units to convert.
// maxUtf16 - Maximum number of UTF-16 code units to convert. Conversion stops
// when either this count is reached or a null is encountered.
-// utf8 - Output buffer to write the UTF-8 string to. Output will always be
-// null terminated.
-// maxUtf8 - Maximum size of the output buffer including space for null.
-// Returns number of UTF-8 bytes written (excluding NULL).
-int POPPLER_PRIVATE_EXPORT utf16ToUtf8(const uint16_t *utf16, int maxUtf16, char *utf8, int maxUtf8);
-
-// Allocate utf8 string and convert utf16 into it.
-char POPPLER_PRIVATE_EXPORT *utf16ToUtf8(const uint16_t *utf16, int *len = nullptr);
+// Returns utf8 string.
+std::string POPPLER_PRIVATE_EXPORT utf16ToUtf8(const uint16_t *utf16, int maxUtf16 = INT_MAX);
// Convert a UCS-4 string to pure ASCII (7bit)
// in - UCS-4 string bytes
diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp
index 88a32359..05ef519e 100644
--- a/qt5/tests/check_utf_conversion.cpp
+++ b/qt5/tests/check_utf_conversion.cpp
@@ -79,14 +79,13 @@ void TestUTFConversion::testUTF_data()
void TestUTFConversion::testUTF()
{
- char utf8Buf[1000];
- char *utf8String;
+ std::string utf8String;
uint16_t utf16Buf[1000];
uint16_t *utf16String;
int len;
QFETCH(QString, s);
- char *str = strdup(s.toUtf8().constData());
+ QByteArray str = s.toUtf8();
// UTF-8 to UTF-16
@@ -111,17 +110,13 @@ void TestUTFConversion::testUTF()
len = utf16CountUtf8Bytes(s.utf16());
QCOMPARE(len, (int)strlen(str));
- Q_ASSERT(len < (int)sizeof(utf8Buf)); // if this fails, make utf8Buf larger
- len = utf16ToUtf8(s.utf16(), INT_MAX, utf8Buf, sizeof(utf8Buf));
- QVERIFY(compare(utf8Buf, str));
+ utf8String = utf16ToUtf8(s.utf16(), INT_MAX);
+ QVERIFY(compare(utf8String.c_str(), str));
QCOMPARE(len, (int)strlen(str));
utf8String = utf16ToUtf8(s.utf16());
- QVERIFY(compare(utf8String, str));
- free(utf8String);
-
- free(str);
+ QVERIFY(compare(utf8String.c_str(), str));
}
void TestUTFConversion::testUnicodeToAscii7()
diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp
index 62ce65eb..53fe4bec 100644
--- a/qt6/tests/check_utf_conversion.cpp
+++ b/qt6/tests/check_utf_conversion.cpp
@@ -77,14 +77,13 @@ void TestUTFConversion::testUTF_data()
void TestUTFConversion::testUTF()
{
- char utf8Buf[1000];
- char *utf8String;
+ std::string utf8String;
uint16_t utf16Buf[1000];
uint16_t *utf16String;
int len;
QFETCH(QString, s);
- char *str = strdup(s.toUtf8().constData());
+ QByteArray str = s.toUtf8().constData();
// UTF-8 to UTF-16
@@ -109,17 +108,13 @@ void TestUTFConversion::testUTF()
len = utf16CountUtf8Bytes(s.utf16());
QCOMPARE(len, (int)strlen(str));
- Q_ASSERT(len < (int)sizeof(utf8Buf)); // if this fails, make utf8Buf larger
- len = utf16ToUtf8(s.utf16(), INT_MAX, utf8Buf, sizeof(utf8Buf));
- QVERIFY(compare(utf8Buf, str));
+ utf8String = utf16ToUtf8(s.utf16(), INT_MAX);
+ QVERIFY(compare(utf8String.c_str(), str));
QCOMPARE(len, (int)strlen(str));
utf8String = utf16ToUtf8(s.utf16());
- QVERIFY(compare(utf8String, str));
- free(utf8String);
-
- free(str);
+ QVERIFY(compare(utf8String.c_str(), str));
}
void TestUTFConversion::testUnicodeToAscii7()
diff --git a/utils/Win32Console.cc b/utils/Win32Console.cc
index deac2458..181d3bb7 100644
--- a/utils/Win32Console.cc
+++ b/utils/Win32Console.cc
@@ -120,7 +120,8 @@ Win32Console::Win32Console(int *argc, char **argv[])
argList = new char *[numArgs];
privateArgList = new char *[numArgs];
for (int i = 0; i < numArgs; i++) {
- argList[i] = utf16ToUtf8((uint16_t *)(wargv[i]));
+ std::string arg = utf16ToUtf8((uint16_t *)(wargv[i]));
+ argList[i] = strdup(arg.c_str());
// parseArgs will rearrange the argv list so we keep our own copy
// to use for freeing all the strings
privateArgList[i] = argList[i];
@@ -155,7 +156,7 @@ Win32Console::~Win32Console()
flush(true);
if (argList) {
for (int i = 0; i < numArgs; i++)
- gfree(privateArgList[i]);
+ free(privateArgList[i]);
delete[] argList;
delete[] privateArgList;
}