summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNelson Benítez León <nbenitezl@gmail.com>2020-07-09 01:37:20 -0400
committerAlbert Astals Cid <tsdgeos@yahoo.es>2020-07-10 20:54:23 +0000
commit232cba307e8be35022426ba85f34198af7406899 (patch)
tree79de8273a02f4ebec61853dae3dffd947b2cd9ec
parentd3af7282507be3846c680a4f66b84b6b3e54853a (diff)
Make TextStringToUCS4() support UTF16-LE too
UTF16-LE strings can 'de facto' appear on pdf's (eg. title of Outline items) and Acrobat display them fine, so let's support that so we don't show an ugly 'ÿþ' at start of the text (Okular) or even no text at all (Evince). Issue #941 Evince issue: https://gitlab.gnome.org/GNOME/evince/-/issues/1444
-rw-r--r--poppler/UTF.cc17
1 files changed, 16 insertions, 1 deletions
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index 112986af..d231bde1 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -90,6 +90,7 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4)
int i, len;
const char *s;
Unicode *u;
+ bool isUnicode, isUnicodeLE;
len = textStr->getLength();
s = textStr->c_str();
@@ -99,12 +100,26 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4)
}
if (textStr->hasUnicodeMarker()) {
+ isUnicode = true;
+ isUnicodeLE = false;
+ } else if (textStr->hasUnicodeMarkerLE()) {
+ isUnicode = false;
+ isUnicodeLE = true;
+ } else {
+ isUnicode = false;
+ isUnicodeLE = false;
+ }
+
+ if (isUnicode || isUnicodeLE) {
Unicode *utf16;
len = len / 2 - 1;
if (len > 0) {
utf16 = new Unicode[len];
for (i = 0; i < len; i++) {
- utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
+ if (isUnicode)
+ utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff);
+ else // UnicodeLE
+ utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8;
}
len = UTF16toUCS4(utf16, len, &u);
delete[] utf16;