summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Johnson <ajohnson@redneon.com>2021-07-02 22:09:07 +0930
committerAlbert Astals Cid <tsdgeos@yahoo.es>2021-07-29 21:52:32 +0000
commit2636e51212b99359cc940b806d645a9e43c33d74 (patch)
tree7c0478ff1a363ef4690ce5b40180c9e6579e2191
parentfc3afe21523b3fdc3a27254a3ef8139a82d35385 (diff)
Date string may be in unicode
-rw-r--r--glib/poppler-document.cc2
-rw-r--r--poppler/DateInfo.cc19
-rw-r--r--poppler/DateInfo.h2
-rw-r--r--qt5/src/poppler-document.cc3
-rw-r--r--qt6/src/poppler-document.cc3
-rw-r--r--utils/pdfinfo.cc55
-rw-r--r--utils/pdftohtml.cc3
7 files changed, 47 insertions, 40 deletions
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index eafe9b63..25cc39b4 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -3459,7 +3459,7 @@ GDateTime *_poppler_convert_pdf_date_to_date_time(const GooString *date)
int year, mon, day, hour, min, sec, tzHours, tzMins;
char tz;
- if (parseDateString(date->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+ if (parseDateString(date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
if (tz == '+' || tz == '-') {
gchar *identifier;
diff --git a/poppler/DateInfo.cc b/poppler/DateInfo.cc
index 010a0dfb..fef3c05d 100644
--- a/poppler/DateInfo.cc
+++ b/poppler/DateInfo.cc
@@ -26,16 +26,27 @@
#include <config.h>
#include "glibc.h"
+#include "gmem.h"
#include "DateInfo.h"
+#include "UTF.h"
#include <cstdio>
#include <cstring>
/* See PDF Reference 1.3, Section 3.8.2 for PDF Date representation */
-bool parseDateString(const char *dateString, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute)
+bool parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute)
{
- if (dateString == nullptr)
- return false;
+ Unicode *u;
+ int len = TextStringToUCS4(date, &u);
+ GooString s;
+ for (int i = 0; i < len; i++) {
+ // Ignore any non ASCII characters
+ if (u[i] < 128)
+ s.append(u[i]);
+ }
+ gfree(u);
+ const char *dateString = s.c_str();
+
if (strlen(dateString) < 2)
return false;
@@ -107,7 +118,7 @@ time_t dateStringToTime(const GooString *dateString)
struct tm tm;
time_t time;
- if (!parseDateString(dateString->c_str(), &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute))
+ if (!parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute))
return -1;
tm.tm_year = year - 1900;
diff --git a/poppler/DateInfo.h b/poppler/DateInfo.h
index 640f60d5..2b3f8a69 100644
--- a/poppler/DateInfo.h
+++ b/poppler/DateInfo.h
@@ -28,7 +28,7 @@
#include "poppler_private_export.h"
#include <ctime>
-bool POPPLER_PRIVATE_EXPORT parseDateString(const char *string, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute);
+bool POPPLER_PRIVATE_EXPORT parseDateString(const GooString *date, int *year, int *month, int *day, int *hour, int *minute, int *second, char *tz, int *tzHour, int *tzMinute);
/* Converts the time_t into a PDF Date format string.
* If timeA is NULL, current time is used.
diff --git a/qt5/src/poppler-document.cc b/qt5/src/poppler-document.cc
index 3460cf53..05fed3d8 100644
--- a/qt5/src/poppler-document.cc
+++ b/qt5/src/poppler-document.cc
@@ -853,7 +853,8 @@ QDateTime convertDate(const char *dateString)
int year, mon, day, hour, min, sec, tzHours, tzMins;
char tz;
- if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+ GooString date(dateString);
+ if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
QDate d(year, mon, day);
QTime t(hour, min, sec);
if (d.isValid() && t.isValid()) {
diff --git a/qt6/src/poppler-document.cc b/qt6/src/poppler-document.cc
index 4725c1b7..46cd7aff 100644
--- a/qt6/src/poppler-document.cc
+++ b/qt6/src/poppler-document.cc
@@ -829,7 +829,8 @@ QDateTime convertDate(const char *dateString)
int year, mon, day, hour, min, sec, tzHours, tzMins;
char tz;
- if (parseDateString(dateString, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
+ GooString date(dateString);
+ if (parseDateString(&date, &year, &mon, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
QDate d(year, mon, day);
QTime t(hour, min, sec);
if (d.isValid() && t.isValid()) {
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index cdc88d32..9726c1a9 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -107,30 +107,33 @@ static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to
{ "-?", argFlag, &printHelp, 0, "print usage information" },
{} };
-static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
+static void printTextString(const GooString *s, const UnicodeMap *uMap)
{
- const GooString *s1;
Unicode *u;
char buf[8];
- int i, n, len;
+ int len = TextStringToUCS4(s, &u);
+ for (int i = 0; i < len; i++) {
+ int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
+ fwrite(buf, 1, n, stdout);
+ }
+ gfree(u);
+}
+
+static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
+{
+ const GooString *s1;
Object obj = infoDict->lookup(key);
if (obj.isString()) {
fputs(text, stdout);
s1 = obj.getString();
- len = TextStringToUCS4(s1, &u);
- for (i = 0; i < len; i++) {
- n = uMap->mapUnicode(u[i], buf, sizeof(buf));
- fwrite(buf, 1, n, stdout);
- }
- gfree(u);
+ printTextString(s1, uMap);
fputc('\n', stdout);
}
}
-static void printInfoDate(Dict *infoDict, const char *key, const char *text)
+static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
{
- const char *s;
int year, mon, day, hour, min, sec, tz_hour, tz_minute;
char tz;
struct tm tmStruct;
@@ -140,7 +143,7 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text)
Object obj = infoDict->lookup(key);
if (obj.isString()) {
fputs(text, stdout);
- s = obj.getString()->c_str();
+ const GooString *s = obj.getString();
// TODO do something with the timezone info
if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
tmStruct.tm_year = year - 1900;
@@ -163,25 +166,24 @@ static void printInfoDate(Dict *infoDict, const char *key, const char *text)
strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
fputs(buf, stdout);
} else {
- fputs(s, stdout);
+ printTextString(s, uMap);
}
} else {
- fputs(s, stdout);
+ printTextString(s, uMap);
}
fputc('\n', stdout);
}
}
-static void printISODate(Dict *infoDict, const char *key, const char *text)
+static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
{
- const char *s;
int year, mon, day, hour, min, sec, tz_hour, tz_minute;
char tz;
Object obj = infoDict->lookup(key);
if (obj.isString()) {
fputs(text, stdout);
- s = obj.getString()->c_str();
+ const GooString *s = obj.getString();
if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
if (tz_hour == 0 && tz_minute == 0) {
@@ -192,7 +194,7 @@ static void printISODate(Dict *infoDict, const char *key, const char *text)
fprintf(stdout, ":%02d", tz_minute);
}
} else {
- fputs(s, stdout);
+ printTextString(obj.getString(), uMap);
}
fputc('\n', stdout);
}
@@ -389,14 +391,7 @@ static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap)
printf("%4d ", i);
printLinkDest(it.second);
printf(" \"");
- Unicode *u;
- char buf[8];
- const int len = TextStringToUCS4(it.first, &u);
- for (int j = 0; j < len; j++) {
- const int n = uMap->mapUnicode(u[j], buf, sizeof(buf));
- fwrite(buf, 1, n, stdout);
- }
- gfree(u);
+ printTextString(it.first, uMap);
printf("\"\n");
delete it.first;
}
@@ -657,14 +652,14 @@ static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, b
printInfoString(info.getDict(), "Creator", "Creator: ", uMap);
printInfoString(info.getDict(), "Producer", "Producer: ", uMap);
if (isoDates) {
- printISODate(info.getDict(), "CreationDate", "CreationDate: ");
- printISODate(info.getDict(), "ModDate", "ModDate: ");
+ printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
+ printISODate(info.getDict(), "ModDate", "ModDate: ", uMap);
} else if (rawDates) {
printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap);
printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap);
} else {
- printInfoDate(info.getDict(), "CreationDate", "CreationDate: ");
- printInfoDate(info.getDict(), "ModDate", "ModDate: ");
+ printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
+ printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap);
}
}
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 9f3fa17a..ebd72c12 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -447,7 +447,6 @@ static GooString *getInfoString(Dict *infoDict, const char *key)
static GooString *getInfoDate(Dict *infoDict, const char *key)
{
Object obj;
- const char *s;
int year, mon, day, hour, min, sec, tz_hour, tz_minute;
char tz;
struct tm tmStruct;
@@ -456,7 +455,7 @@ static GooString *getInfoDate(Dict *infoDict, const char *key)
obj = infoDict->lookup(key);
if (obj.isString()) {
- s = obj.getString()->c_str();
+ const GooString *s = obj.getString();
// TODO do something with the timezone info
if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
tmStruct.tm_year = year - 1900;