summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--poppler/GlobalParams.cc35
-rw-r--r--poppler/GlobalParams.h13
-rw-r--r--poppler/TextOutputDev.cc18
-rw-r--r--poppler/TextOutputDev.h22
-rw-r--r--utils/pdftotext.cc26
5 files changed, 49 insertions, 65 deletions
diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc
index 58364cb5..ea5d8125 100644
--- a/poppler/GlobalParams.cc
+++ b/poppler/GlobalParams.cc
@@ -403,12 +403,6 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
psShrinkLarger = true;
psLevel = psLevel2;
textEncoding = new GooString("UTF-8");
-#if defined(_WIN32)
- textEOL = eolDOS;
-#else
- textEOL = eolUnix;
-#endif
- textPageBreaks = true;
overprintPreview = false;
printCommands = false;
profileCommands = false;
@@ -1138,16 +1132,6 @@ std::string GlobalParams::getTextEncodingName() const {
return textEncoding->toStr();
}
-EndOfLineKind GlobalParams::getTextEOL() {
- globalParamsLocker();
- return textEOL;
-}
-
-bool GlobalParams::getTextPageBreaks() {
- globalParamsLocker();
- return textPageBreaks;
-}
-
bool GlobalParams::getPrintCommands() {
globalParamsLocker();
return printCommands;
@@ -1246,25 +1230,6 @@ void GlobalParams::setTextEncoding(const char *encodingName) {
textEncoding = new GooString(encodingName);
}
-bool GlobalParams::setTextEOL(const char *s) {
- globalParamsLocker();
- if (!strcmp(s, "unix")) {
- textEOL = eolUnix;
- } else if (!strcmp(s, "dos")) {
- textEOL = eolDOS;
- } else if (!strcmp(s, "mac")) {
- textEOL = eolMac;
- } else {
- return false;
- }
- return true;
-}
-
-void GlobalParams::setTextPageBreaks(bool pageBreaks) {
- globalParamsLocker();
- textPageBreaks = pageBreaks;
-}
-
void GlobalParams::setOverprintPreview(bool overprintPreviewA) {
globalParamsLocker();
overprintPreview = overprintPreviewA;
diff --git a/poppler/GlobalParams.h b/poppler/GlobalParams.h
index 9363be34..755cdc1a 100644
--- a/poppler/GlobalParams.h
+++ b/poppler/GlobalParams.h
@@ -88,12 +88,6 @@ enum PSLevel {
//------------------------------------------------------------------------
-enum EndOfLineKind {
- eolUnix, // LF
- eolDOS, // CR+LF
- eolMac // CR
-};
-
//------------------------------------------------------------------------
class GlobalParams {
@@ -134,8 +128,6 @@ public:
bool getPSShrinkLarger();
PSLevel getPSLevel();
std::string getTextEncodingName() const;
- EndOfLineKind getTextEOL();
- bool getTextPageBreaks();
bool getOverprintPreview() { return overprintPreview; }
bool getPrintCommands();
bool getProfileCommands();
@@ -154,8 +146,6 @@ public:
void setPSShrinkLarger(bool shrink);
void setPSLevel(PSLevel level);
void setTextEncoding(const char *encodingName);
- bool setTextEOL(const char *s);
- void setTextPageBreaks(bool pageBreaks);
void setOverprintPreview(bool overprintPreviewA);
void setPrintCommands(bool printCommandsA);
void setProfileCommands(bool profileCommandsA);
@@ -209,9 +199,6 @@ private:
PSLevel psLevel; // PostScript level to generate
GooString *textEncoding; // encoding (unicodeMap) to use for text
// output
- EndOfLineKind textEOL; // type of EOL marker to use for text
- // output
- bool textPageBreaks; // insert end-of-page markers?
bool overprintPreview; // enable overprint preview
bool printCommands; // print the drawing commands
bool profileCommands; // profile the drawing commands
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 6447eedd..2c630403 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4166,7 +4166,7 @@ bool TextPage::findText(const Unicode *s, int len,
}
GooString *TextPage::getText(double xMin, double yMin,
- double xMax, double yMax) const {
+ double xMax, double yMax, EndOfLineKind textEOL) const {
GooString *s;
UnicodeMap *uMap;
TextBlock *blk;
@@ -4209,7 +4209,7 @@ GooString *TextPage::getText(double xMin, double yMin,
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
eolLen = 0; // make gcc happy
- switch (globalParams->getTextEOL()) {
+ switch (textEOL) {
case eolUnix:
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
break;
@@ -5289,7 +5289,7 @@ bool TextPage::findCharRange(int pos, int length,
}
void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
- bool physLayout) {
+ bool physLayout, EndOfLineKind textEOL, bool pageBreaks) {
UnicodeMap *uMap;
TextFlow *flow;
TextBlock *blk;
@@ -5300,7 +5300,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
TextLineFrag *frag;
char space[8], eol[16], eop[8];
int spaceLen, eolLen, eopLen;
- bool pageBreaks;
GooString *s;
double delta;
int col, i, j, d, n;
@@ -5311,7 +5310,7 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
}
spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
eolLen = 0; // make gcc happy
- switch (globalParams->getTextEOL()) {
+ switch (textEOL) {
case eolUnix:
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
break;
@@ -5324,7 +5323,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
break;
}
eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
- pageBreaks = globalParams->getTextPageBreaks();
//~ writing mode (horiz/vert)
@@ -5674,6 +5672,8 @@ TextOutputDev::TextOutputDev(const char *fileName, bool physLayoutA,
rawOrder = rawOrderA;
discardDiag = discardDiagA;
doHTML = false;
+ textEOL = defaultEndOfLine();
+ textPageBreaks = true;
ok = true;
// open file
@@ -5716,6 +5716,8 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
doHTML = false;
text = new TextPage(rawOrderA, discardDiagA);
actualText = new ActualText(text);
+ textEOL = defaultEndOfLine();
+ textPageBreaks = true;
ok = true;
}
@@ -5737,7 +5739,7 @@ void TextOutputDev::endPage() {
text->endPage();
text->coalesce(physLayout, fixedPitch, doHTML);
if (outputStream) {
- text->dump(outputStream, outputFunc, physLayout);
+ text->dump(outputStream, outputFunc, physLayout, textEOL, textPageBreaks);
}
}
@@ -5934,7 +5936,7 @@ bool TextOutputDev::findText(const Unicode *s, int len,
GooString *TextOutputDev::getText(double xMin, double yMin,
double xMax, double yMax) const {
- return text->getText(xMin, yMin, xMax, yMax);
+ return text->getText(xMin, yMin, xMax, yMax, textEOL);
}
void TextOutputDev::drawSelection(OutputDev *out,
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 51e397f5..0d008b3d 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -70,6 +70,12 @@ enum SelectionStyle {
selectionStyleLine
};
+enum EndOfLineKind {
+ eolUnix, // LF
+ eolDOS, // CR+LF
+ eolMac // CR
+};
+
//------------------------------------------------------------------------
// TextFontInfo
//------------------------------------------------------------------------
@@ -626,7 +632,7 @@ public:
// Get the text which is inside the specified rectangle.
GooString *getText(double xMin, double yMin,
- double xMax, double yMax) const;
+ double xMax, double yMax, EndOfLineKind textEOL) const;
void visitSelection(TextSelectionVisitor *visitor,
const PDFRectangle *selection,
@@ -659,7 +665,7 @@ public:
// Dump contents of page to a file.
void dump(void *outputStream, TextOutputFunc outputFunc,
- bool physLayout);
+ bool physLayout, EndOfLineKind textEOL, bool pageBreaks);
// Get the head of the linked list of TextFlows.
const TextFlow *getFlows() const { return flows; }
@@ -912,6 +918,16 @@ public:
// last rasterized page.
const TextFlow *getFlows() const;
+ static constexpr EndOfLineKind defaultEndOfLine() {
+#if defined(_WIN32)
+ return eolDOS;
+#else
+ return eolUnix;
+#endif
+ }
+ void setTextEOL(EndOfLineKind textEOLA) { textEOL = textEOLA; }
+ void setTextPageBreaks(bool textPageBreaksA) { textPageBreaks = textPageBreaksA; }
+
private:
TextOutputFunc outputFunc; // output function
@@ -930,6 +946,8 @@ private:
// to skip watermarks drawn on top of body text, etc.
bool doHTML; // extra processing for HTML conversion
bool ok; // set up ok?
+ bool textPageBreaks; // insert end-of-page markers?
+ EndOfLineKind textEOL; // type of EOL marker to use
ActualText *actualText;
};
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 1d366234..53f2e131 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -88,7 +88,7 @@ static bool rawOrder = false;
static bool discardDiag = false;
static bool htmlMeta = false;
static char textEncName[128] = "";
-static char textEOL[16] = "";
+static char textEOLStr[16] = "";
static bool noPageBreaks = false;
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
@@ -126,7 +126,7 @@ static const ArgDesc argDesc[] = {
"output text encoding name"},
{"-listenc",argFlag, &printEnc, 0,
"list available encodings"},
- {"-eol", argString, textEOL, sizeof(textEOL),
+ {"-eol", argString, textEOLStr, sizeof(textEOLStr),
"output end-of-line convention (unix, dos, or mac)"},
{"-nopgbrk", argFlag, &noPageBreaks, 0,
"don't insert page breaks between pages"},
@@ -188,6 +188,7 @@ int main(int argc, char *argv[]) {
Object info;
bool ok;
int exitCode;
+ EndOfLineKind textEOL = TextOutputDev::defaultEndOfLine();
Win32Console win32Console(&argc, &argv);
exitCode = 99;
@@ -229,14 +230,17 @@ int main(int argc, char *argv[]) {
if (textEncName[0]) {
globalParams->setTextEncoding(textEncName);
}
- if (textEOL[0]) {
- if (!globalParams->setTextEOL(textEOL)) {
+ if (textEOLStr[0]) {
+ if (!strcmp(textEOLStr, "unix")) {
+ textEOL = eolUnix;
+ } else if (!strcmp(textEOLStr, "dos")) {
+ textEOL = eolDOS;
+ } else if (!strcmp(textEOLStr, "mac")) {
+ textEOL = eolMac;
+ } else {
fprintf(stderr, "Bad '-eol' value on command line\n");
}
}
- if (noPageBreaks) {
- globalParams->setTextPageBreaks(false);
- }
if (quiet) {
globalParams->setErrQuiet(quiet);
}
@@ -370,6 +374,10 @@ int main(int argc, char *argv[]) {
textOut = new TextOutputDev(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
if (textOut->isOk()) {
+ textOut->setTextEOL(textEOL);
+ if (noPageBreaks) {
+ textOut->setTextPageBreaks(false);
+ }
if (bboxLayout) {
printDocBBox(f, doc, textOut, firstPage, lastPage);
}
@@ -384,6 +392,10 @@ int main(int argc, char *argv[]) {
textOut = new TextOutputDev(textFileName->c_str(),
physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag);
if (textOut->isOk()) {
+ textOut->setTextEOL(textEOL);
+ if (noPageBreaks) {
+ textOut->setTextPageBreaks(false);
+ }
if ((w==0) && (h==0) && (x==0) && (y==0)) {
doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0,
true, false, false);