diff options
author | Adrian Johnson <ajohnson@redneon.com> | 2021-07-02 22:09:07 +0930 |
---|---|---|
committer | Albert Astals Cid <tsdgeos@yahoo.es> | 2021-07-29 21:52:32 +0000 |
commit | 2636e51212b99359cc940b806d645a9e43c33d74 (patch) | |
tree | 7c0478ff1a363ef4690ce5b40180c9e6579e2191 | |
parent | fc3afe21523b3fdc3a27254a3ef8139a82d35385 (diff) |
Date string may be in unicode
-rw-r--r-- | glib/poppler-document.cc | 2 | ||||
-rw-r--r-- | poppler/DateInfo.cc | 19 | ||||
-rw-r--r-- | poppler/DateInfo.h | 2 | ||||
-rw-r--r-- | qt5/src/poppler-document.cc | 3 | ||||
-rw-r--r-- | qt6/src/poppler-document.cc | 3 | ||||
-rw-r--r-- | utils/pdfinfo.cc | 55 | ||||
-rw-r--r-- | utils/pdftohtml.cc | 3 |
7 files changed, 47 insertions, 40 deletions
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index eafe9b63..25cc39b4 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -3459,7 +3459,7 @@ GDateTime *_poppler_convert_pdf_date_to_date_time(const GooString *date) int year, mon, day, hour, min, sec, tzHours, tzMins; char tz; - if (parseDateString(date->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { + if (parseDateString(date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { if (tz == '+' || tz == '-') { gchar *identifier; diff --git a/poppler/DateInfo.cc b/poppler/DateInfo.cc index 010a0dfb..fef3c05d 100644 --- a/poppler/DateInfo.cc +++ b/poppler/DateInfo.cc @@ -26,16 +26,27 @@ #include <config.h> #include "glibc.h" +#include "gmem.h" #include "DateInfo.h" +#include "UTF.h" #include <cstdio> #include <cstring> /* See PDF Reference 1.3, Section 3.8.2 for PDF Date representation */ -bool parseDateString(const char *dateString, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute) +bool parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute) { - if (dateString == nullptr) - return false; + Unicode *u; + int len = TextStringToUCS4(date, &u); + GooString s; + for (int i = 0; i < len; i++) { + // Ignore any non ASCII characters + if (u[i] < 128) + s.append(u[i]); + } + gfree(u); + const char *dateString = s.c_str(); + if (strlen(dateString) < 2) return false; @@ -107,7 +118,7 @@ time_t dateStringToTime(const GooString *dateString) struct tm tm; time_t time; - if (!parseDateString(dateString->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) + if (!parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) return -1; tm.tm_year = year - 1900; diff --git a/poppler/DateInfo.h b/poppler/DateInfo.h index 640f60d5..2b3f8a69 100644 --- a/poppler/DateInfo.h +++ b/poppler/DateInfo.h @@ -28,7 +28,7 @@ #include "poppler_private_export.h" #include <ctime> -bool POPPLER_PRIVATE_EXPORT parseDateString(const char *string, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute); +bool POPPLER_PRIVATE_EXPORT parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute); /* Converts the time_t into a PDF Date format string. * If timeA is NULL, current time is used. diff --git a/qt5/src/poppler-document.cc b/qt5/src/poppler-document.cc index 3460cf53..05fed3d8 100644 --- a/qt5/src/poppler-document.cc +++ b/qt5/src/poppler-document.cc @@ -853,7 +853,8 @@ QDateTime convertDate(const char *dateString) int year, mon, day, hour, min, sec, tzHours, tzMins; char tz; - if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { + GooString date(dateString); + if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { QDate d(year, mon, day); QTime t(hour, min, sec); if (d.isValid() && t.isValid()) { diff --git a/qt6/src/poppler-document.cc b/qt6/src/poppler-document.cc index 4725c1b7..46cd7aff 100644 --- a/qt6/src/poppler-document.cc +++ b/qt6/src/poppler-document.cc @@ -829,7 +829,8 @@ QDateTime convertDate(const char *dateString) int year, mon, day, hour, min, sec, tzHours, tzMins; char tz; - if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { + GooString date(dateString); + if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) { QDate d(year, mon, day); QTime t(hour, min, sec); if (d.isValid() && t.isValid()) { diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index cdc88d32..9726c1a9 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -107,30 +107,33 @@ static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to { "-?", argFlag, &printHelp, 0, "print usage information" }, {} }; -static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) +static void printTextString(const GooString *s, const UnicodeMap *uMap) { - const GooString *s1; Unicode *u; char buf[8]; - int i, n, len; + int len = TextStringToUCS4(s, &u); + for (int i = 0; i < len; i++) { + int n = uMap->mapUnicode(u[i], buf, sizeof(buf)); + fwrite(buf, 1, n, stdout); + } + gfree(u); +} + +static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) +{ + const GooString *s1; Object obj = infoDict->lookup(key); if (obj.isString()) { fputs(text, stdout); s1 = obj.getString(); - len = TextStringToUCS4(s1, &u); - for (i = 0; i < len; i++) { - n = uMap->mapUnicode(u[i], buf, sizeof(buf)); - fwrite(buf, 1, n, stdout); - } - gfree(u); + printTextString(s1, uMap); fputc('\n', stdout); } } -static void printInfoDate(Dict *infoDict, const char *key, const char *text) +static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) { - const char *s; int year, mon, day, hour, min, sec, tz_hour, tz_minute; char tz; struct tm tmStruct; @@ -140,7 +143,7 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text) Object obj = infoDict->lookup(key); if (obj.isString()) { fputs(text, stdout); - s = obj.getString()->c_str(); + const GooString *s = obj.getString(); // TODO do something with the timezone info if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) { tmStruct.tm_year = year - 1900; @@ -163,25 +166,24 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text) strftime(buf, sizeof(buf), "%c %Z", &tmStruct); fputs(buf, stdout); } else { - fputs(s, stdout); + printTextString(s, uMap); } } else { - fputs(s, stdout); + printTextString(s, uMap); } fputc('\n', stdout); } } -static void printISODate(Dict *infoDict, const char *key, const char *text) +static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) { - const char *s; int year, mon, day, hour, min, sec, tz_hour, tz_minute; char tz; Object obj = infoDict->lookup(key); if (obj.isString()) { fputs(text, stdout); - s = obj.getString()->c_str(); + const GooString *s = obj.getString(); if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) { fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec); if (tz_hour == 0 && tz_minute == 0) { @@ -192,7 +194,7 @@ static void printISODate(Dict *infoDict, const char *key, const char *text) fprintf(stdout, ":%02d", tz_minute); } } else { - fputs(s, stdout); + printTextString(obj.getString(), uMap); } fputc('\n', stdout); } @@ -389,14 +391,7 @@ static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap) printf("%4d ", i); printLinkDest(it.second); printf(" \""); - Unicode *u; - char buf[8]; - const int len = TextStringToUCS4(it.first, &u); - for (int j = 0; j < len; j++) { - const int n = uMap->mapUnicode(u[j], buf, sizeof(buf)); - fwrite(buf, 1, n, stdout); - } - gfree(u); + printTextString(it.first, uMap); printf("\"\n"); delete it.first; } @@ -657,14 +652,14 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b printInfoString(info.getDict(), "Creator", "Creator: ", uMap); printInfoString(info.getDict(), "Producer", "Producer: ", uMap); if (isoDates) { - printISODate(info.getDict(), "CreationDate", "CreationDate: "); - printISODate(info.getDict(), "ModDate", "ModDate: "); + printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap); + printISODate(info.getDict(), "ModDate", "ModDate: ", uMap); } else if (rawDates) { printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap); printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap); } else { - printInfoDate(info.getDict(), "CreationDate", "CreationDate: "); - printInfoDate(info.getDict(), "ModDate", "ModDate: "); + printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap); + printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap); } } diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index 9f3fa17a..ebd72c12 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -447,7 +447,6 @@ static GooString *getInfoString(Dict *infoDict, const char *key) static GooString *getInfoDate(Dict *infoDict, const char *key) { Object obj; - const char *s; int year, mon, day, hour, min, sec, tz_hour, tz_minute; char tz; struct tm tmStruct; @@ -456,7 +455,7 @@ static GooString *getInfoDate(Dict *infoDict, const char *key) obj = infoDict->lookup(key); if (obj.isString()) { - s = obj.getString()->c_str(); + const GooString *s = obj.getString(); // TODO do something with the timezone info if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) { tmStruct.tm_year = year - 1900; |