diff options
author | Nelson Benítez León <nbenitezl@gmail.com> | 2020-07-09 01:37:20 -0400 |
---|---|---|
committer | Albert Astals Cid <tsdgeos@yahoo.es> | 2020-07-10 20:54:23 +0000 |
commit | 232cba307e8be35022426ba85f34198af7406899 (patch) | |
tree | 79de8273a02f4ebec61853dae3dffd947b2cd9ec | |
parent | d3af7282507be3846c680a4f66b84b6b3e54853a (diff) |
Make TextStringToUCS4() support UTF16-LE too
UTF16-LE strings can 'de facto' appear on pdf's
(eg. title of Outline items) and Acrobat display
them fine, so let's support that so we don't
show an ugly 'ÿþ' at start of the text (Okular)
or even no text at all (Evince).
Issue #941
Evince issue:
https://gitlab.gnome.org/GNOME/evince/-/issues/1444
-rw-r--r-- | poppler/UTF.cc | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/poppler/UTF.cc b/poppler/UTF.cc index 112986af..d231bde1 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -90,6 +90,7 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4) int i, len; const char *s; Unicode *u; + bool isUnicode, isUnicodeLE; len = textStr->getLength(); s = textStr->c_str(); @@ -99,12 +100,26 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4) } if (textStr->hasUnicodeMarker()) { + isUnicode = true; + isUnicodeLE = false; + } else if (textStr->hasUnicodeMarkerLE()) { + isUnicode = false; + isUnicodeLE = true; + } else { + isUnicode = false; + isUnicodeLE = false; + } + + if (isUnicode || isUnicodeLE) { Unicode *utf16; len = len / 2 - 1; if (len > 0) { utf16 = new Unicode[len]; for (i = 0; i < len; i++) { - utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff); + if (isUnicode) + utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff); + else // UnicodeLE + utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8; } len = UTF16toUCS4(utf16, len, &u); delete[] utf16; |