diff options
author | Christophe Fergeau <cfergeau@redhat.com> | 2017-09-26 11:02:58 +0200 |
---|---|---|
committer | Albert Astals Cid <tsdgeos@yahoo.es> | 2019-03-28 11:41:28 +0000 |
commit | 0bb9dbc608a73df0a5579c0db3347f2d147266c4 (patch) | |
tree | 14dbf91164408c032845a8016161401fbb407dc9 | |
parent | 926ea4645fa36d29d4bf89009719716668103366 (diff) |
document: Handle UTF16-LE annotations
I can produce such annotations when adding annotations to a PDF
attachement from the standard mail app on my iPhone (iOS 12.1).
They currently all show as "ÿþÚ" rather than the actual string content.
UTF16-BE vs UTF16-LE is detected by inferring the endianness from the
first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order
Marker).
-rw-r--r-- | glib/poppler-document.cc | 4 | ||||
-rw-r--r-- | goo/GooString.h | 1 |
2 files changed, 5 insertions, 0 deletions
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 9772c16a..78a57b10 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -844,6 +844,10 @@ char *_poppler_goo_string_to_utf8(const GooString *s) result = g_convert (s->c_str () + 2, s->getLength () - 2, "UTF-8", "UTF-16BE", nullptr, nullptr, nullptr); + } else if (s->hasUnicodeMarkerLE()) { + result = g_convert (s->c_str () + 2, + s->getLength () - 2, + "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr); } else { int len; gunichar *ucs4_temp; diff --git a/goo/GooString.h b/goo/GooString.h index bae3a180..5b403e7d 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -172,6 +172,7 @@ public: bool endsWith(const char *suffix) const; bool hasUnicodeMarker() const { return size() >= 2 && (*this)[0] == char(0xfe) && (*this)[1] == char(0xff); } + bool hasUnicodeMarkerLE() const { return size() >= 2 && (*this)[0] == char(0xff) && (*this)[1] == char(0xfe); } bool hasJustUnicodeMarker() const { return size() == 2 && hasUnicodeMarker(); } void prependUnicodeMarker(); |