summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Sander <oliver.sander@tu-dresden.de>2024-04-17 09:23:46 +0200
committerAlbert Astals Cid <aacid@kde.org>2024-04-20 10:30:56 +0000
commit1f06dca08c32ed18c3030530d98a0e30d41dd7a2 (patch)
treee7eae08c5f3fe7029559c983c39c58a9d5e15afe
parent98fabb298b0e8eaef9193bbce68c99c85473a314 (diff)
Move method GooString::hasUnicodeMarkerLE to UTF.h
... and rename it to hasUnicodeByteOrderMarkLE. This allows to replace GooString by std::string in a few places. (In a future commit)
-rw-r--r--cpp/poppler-private.cpp2
-rw-r--r--glib/poppler-document.cc2
-rw-r--r--goo/GooString.h3
-rw-r--r--poppler/UTF.cc2
-rw-r--r--poppler/UTF.h11
-rw-r--r--qt5/src/poppler-private.cc2
-rw-r--r--qt6/src/poppler-private.cc2
7 files changed, 15 insertions, 9 deletions
diff --git a/cpp/poppler-private.cpp b/cpp/poppler-private.cpp
index 6953724f..9e89c7b8 100644
--- a/cpp/poppler-private.cpp
+++ b/cpp/poppler-private.cpp
@@ -65,7 +65,7 @@ ustring detail::unicode_GooString_to_ustring(const GooString *str)
const char *data = str->c_str();
const int len = str->getLength();
- const bool is_unicodeLE = str->hasUnicodeMarkerLE();
+ const bool is_unicodeLE = hasUnicodeByteOrderMarkLE(str->toStr());
const bool is_unicode = hasUnicodeByteOrderMark(str->toStr()) || is_unicodeLE;
int i = is_unicode ? 2 : 0;
ustring::size_type ret_len = len - i;
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 9c516146..5dc034c5 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -1101,7 +1101,7 @@ char *_poppler_goo_string_to_utf8(const GooString *s)
if (hasUnicodeByteOrderMark(s->toStr())) {
result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
- } else if (s->hasUnicodeMarkerLE()) {
+ } else if (hasUnicodeByteOrderMarkLE(s->toStr())) {
result = g_convert(s->c_str() + 2, s->getLength() - 2, "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr);
} else {
int len;
diff --git a/goo/GooString.h b/goo/GooString.h
index 33f037e0..05aa630f 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -243,9 +243,6 @@ public:
// Return true if string ends with suffix
using std::string::ends_with;
- bool hasUnicodeMarkerLE() const { return hasUnicodeMarkerLE(*this); }
- static bool hasUnicodeMarkerLE(const std::string &s) { return s.size() >= 2 && s[0] == '\xff' && s[1] == '\xfe'; }
-
POPPLER_PRIVATE_EXPORT void prependUnicodeMarker();
};
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index 48645780..74c5a113 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -91,7 +91,7 @@ std::vector<Unicode> TextStringToUCS4(const std::string &textStr)
if (hasUnicodeByteOrderMark(textStr)) {
isUnicode = true;
isUnicodeLE = false;
- } else if (GooString::hasUnicodeMarkerLE(textStr)) {
+ } else if (hasUnicodeByteOrderMarkLE(textStr)) {
isUnicode = false;
isUnicodeLE = true;
} else {
diff --git a/poppler/UTF.h b/poppler/UTF.h
index 9f0290db..812506eb 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -27,9 +27,12 @@
#include "CharTypes.h"
#include "poppler_private_export.h"
-// Magic bytes that mark the byte order in a UTF-16 unicode string
+// Magic bytes that mark the byte order in a UTF-16 unicode string (big-endian case)
constexpr std::string_view unicodeByteOrderMark = "\xFE\xFF";
+// Magic bytes that mark the byte order in a UTF-16 unicode string (little-endian case)
+constexpr std::string_view unicodeByteOrderMarkLE = "\xFF\xFE";
+
// Convert a UTF-16 string to a UCS-4
// utf16 - utf16 bytes
// utf16_len - number of UTF-16 characters
@@ -54,6 +57,12 @@ inline bool hasUnicodeByteOrderMark(const std::string &s)
return s.starts_with(unicodeByteOrderMark);
}
+// check whether string starts with Little-Endian byte order mark
+inline bool hasUnicodeByteOrderMarkLE(const std::string &s)
+{
+ return s.starts_with(unicodeByteOrderMarkLE);
+}
+
// is a unicode whitespace character
bool UnicodeIsWhitespace(Unicode ucs4);
diff --git a/qt5/src/poppler-private.cc b/qt5/src/poppler-private.cc
index 3f4d286f..3e0f0f57 100644
--- a/qt5/src/poppler-private.cc
+++ b/qt5/src/poppler-private.cc
@@ -122,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1)
return QString();
}
- if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) {
+ if (hasUnicodeByteOrderMark(s1) || hasUnicodeByteOrderMarkLE(s1)) {
return QString::fromUtf16(reinterpret_cast<const ushort *>(s1.c_str()), s1.size() / 2);
} else {
int stringLength;
diff --git a/qt6/src/poppler-private.cc b/qt6/src/poppler-private.cc
index 052360bd..480bb5ec 100644
--- a/qt6/src/poppler-private.cc
+++ b/qt6/src/poppler-private.cc
@@ -122,7 +122,7 @@ QString UnicodeParsedString(const std::string &s1)
return QString();
}
- if (hasUnicodeByteOrderMark(s1) || GooString::hasUnicodeMarkerLE(s1)) {
+ if (hasUnicodeByteOrderMark(s1) || hasUnicodeByteOrderMarkLE(s1)) {
return QString::fromUtf16(reinterpret_cast<const char16_t *>(s1.c_str()), s1.size() / 2);
} else {
int stringLength;