summaryrefslogtreecommitdiff
path: root/poppler/UTF.h
diff options
context:
space:
mode:
authorJason Crain <jason@inspiresomeone.us>2016-09-28 14:56:02 +0000
committerCarlos Garcia Campos <carlosgc@gnome.org>2016-10-03 17:58:45 +0200
commit3cfbc4efde1df6dcb9ef18a0fb26c7e199e6e8f5 (patch)
treead06cc977648a7385dff87d02d340d6097327c64 /poppler/UTF.h
parent27cf7fabad27648019b36b2d6352e6767bfc8689 (diff)
TextOutputDev: Break words on all whitespace characters
Some PDF creators like Chrome use no-break spaces or other whitespace characters between words, causing pdftotext -bbox to not break words as expected. Fix this by breaking words on any character with the Unicode whitespace property. Bug #97399
Diffstat (limited to 'poppler/UTF.h')
-rw-r--r--poppler/UTF.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/poppler/UTF.h b/poppler/UTF.h
index 248c168c..5a479020 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -35,5 +35,7 @@ int TextStringToUCS4(GooString *textStr, Unicode **ucs4);
// check if UCS-4 character is valid
bool UnicodeIsValid(Unicode ucs4);
+// is a unicode whitespace character
+bool UnicodeIsWhitespace(Unicode ucs4);
#endif