summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Fergeau <cfergeau@redhat.com>2017-09-26 11:02:58 +0200
committerAlbert Astals Cid <tsdgeos@yahoo.es>2019-03-28 11:41:28 +0000
commit0bb9dbc608a73df0a5579c0db3347f2d147266c4 (patch)
tree14dbf91164408c032845a8016161401fbb407dc9
parent926ea4645fa36d29d4bf89009719716668103366 (diff)
document: Handle UTF16-LE annotations
I can produce such annotations when adding annotations to a PDF attachement from the standard mail app on my iPhone (iOS 12.1). They currently all show as "ÿþÚ" rather than the actual string content. UTF16-BE vs UTF16-LE is detected by inferring the endianness from the first two bytes of the string (0xFF 0xFE and 0xFE 0xFF aka Byte Order Marker).
-rw-r--r--glib/poppler-document.cc4
-rw-r--r--goo/GooString.h1
2 files changed, 5 insertions, 0 deletions
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 9772c16a..78a57b10 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -844,6 +844,10 @@ char *_poppler_goo_string_to_utf8(const GooString *s)
result = g_convert (s->c_str () + 2,
s->getLength () - 2,
"UTF-8", "UTF-16BE", nullptr, nullptr, nullptr);
+ } else if (s->hasUnicodeMarkerLE()) {
+ result = g_convert (s->c_str () + 2,
+ s->getLength () - 2,
+ "UTF-8", "UTF-16LE", nullptr, nullptr, nullptr);
} else {
int len;
gunichar *ucs4_temp;
diff --git a/goo/GooString.h b/goo/GooString.h
index bae3a180..5b403e7d 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -172,6 +172,7 @@ public:
bool endsWith(const char *suffix) const;
bool hasUnicodeMarker() const { return size() >= 2 && (*this)[0] == char(0xfe) && (*this)[1] == char(0xff); }
+ bool hasUnicodeMarkerLE() const { return size() >= 2 && (*this)[0] == char(0xff) && (*this)[1] == char(0xfe); }
bool hasJustUnicodeMarker() const { return size() == 2 && hasUnicodeMarker(); }
void prependUnicodeMarker();