From ac485b0309f5e51d74fdb7484b9f6a7f79448f52 Mon Sep 17 00:00:00 2001 From: Albert Astals Cid Date: Thu, 28 Nov 2019 16:31:32 +0100 Subject: Move textEOL and textPageBreaks out of GlobalParams to TextOutputDev --- poppler/GlobalParams.cc | 35 ----------------------------------- poppler/GlobalParams.h | 13 ------------- poppler/TextOutputDev.cc | 18 ++++++++++-------- poppler/TextOutputDev.h | 22 ++++++++++++++++++++-- utils/pdftotext.cc | 26 +++++++++++++++++++------- 5 files changed, 49 insertions(+), 65 deletions(-) diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc index 58364cb5..ea5d8125 100644 --- a/poppler/GlobalParams.cc +++ b/poppler/GlobalParams.cc @@ -403,12 +403,6 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir) psShrinkLarger = true; psLevel = psLevel2; textEncoding = new GooString("UTF-8"); -#if defined(_WIN32) - textEOL = eolDOS; -#else - textEOL = eolUnix; -#endif - textPageBreaks = true; overprintPreview = false; printCommands = false; profileCommands = false; @@ -1138,16 +1132,6 @@ std::string GlobalParams::getTextEncodingName() const { return textEncoding->toStr(); } -EndOfLineKind GlobalParams::getTextEOL() { - globalParamsLocker(); - return textEOL; -} - -bool GlobalParams::getTextPageBreaks() { - globalParamsLocker(); - return textPageBreaks; -} - bool GlobalParams::getPrintCommands() { globalParamsLocker(); return printCommands; @@ -1246,25 +1230,6 @@ void GlobalParams::setTextEncoding(const char *encodingName) { textEncoding = new GooString(encodingName); } -bool GlobalParams::setTextEOL(const char *s) { - globalParamsLocker(); - if (!strcmp(s, "unix")) { - textEOL = eolUnix; - } else if (!strcmp(s, "dos")) { - textEOL = eolDOS; - } else if (!strcmp(s, "mac")) { - textEOL = eolMac; - } else { - return false; - } - return true; -} - -void GlobalParams::setTextPageBreaks(bool pageBreaks) { - globalParamsLocker(); - textPageBreaks = pageBreaks; -} - void GlobalParams::setOverprintPreview(bool overprintPreviewA) { globalParamsLocker(); overprintPreview = overprintPreviewA; diff --git a/poppler/GlobalParams.h b/poppler/GlobalParams.h index 9363be34..755cdc1a 100644 --- a/poppler/GlobalParams.h +++ b/poppler/GlobalParams.h @@ -88,12 +88,6 @@ enum PSLevel { //------------------------------------------------------------------------ -enum EndOfLineKind { - eolUnix, // LF - eolDOS, // CR+LF - eolMac // CR -}; - //------------------------------------------------------------------------ class GlobalParams { @@ -134,8 +128,6 @@ public: bool getPSShrinkLarger(); PSLevel getPSLevel(); std::string getTextEncodingName() const; - EndOfLineKind getTextEOL(); - bool getTextPageBreaks(); bool getOverprintPreview() { return overprintPreview; } bool getPrintCommands(); bool getProfileCommands(); @@ -154,8 +146,6 @@ public: void setPSShrinkLarger(bool shrink); void setPSLevel(PSLevel level); void setTextEncoding(const char *encodingName); - bool setTextEOL(const char *s); - void setTextPageBreaks(bool pageBreaks); void setOverprintPreview(bool overprintPreviewA); void setPrintCommands(bool printCommandsA); void setProfileCommands(bool profileCommandsA); @@ -209,9 +199,6 @@ private: PSLevel psLevel; // PostScript level to generate GooString *textEncoding; // encoding (unicodeMap) to use for text // output - EndOfLineKind textEOL; // type of EOL marker to use for text - // output - bool textPageBreaks; // insert end-of-page markers? bool overprintPreview; // enable overprint preview bool printCommands; // print the drawing commands bool profileCommands; // profile the drawing commands diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 6447eedd..2c630403 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4166,7 +4166,7 @@ bool TextPage::findText(const Unicode *s, int len, } GooString *TextPage::getText(double xMin, double yMin, - double xMax, double yMax) const { + double xMax, double yMax, EndOfLineKind textEOL) const { GooString *s; UnicodeMap *uMap; TextBlock *blk; @@ -4209,7 +4209,7 @@ GooString *TextPage::getText(double xMin, double yMin, spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { + switch (textEOL) { case eolUnix: eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); break; @@ -5289,7 +5289,7 @@ bool TextPage::findCharRange(int pos, int length, } void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, - bool physLayout) { + bool physLayout, EndOfLineKind textEOL, bool pageBreaks) { UnicodeMap *uMap; TextFlow *flow; TextBlock *blk; @@ -5300,7 +5300,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, TextLineFrag *frag; char space[8], eol[16], eop[8]; int spaceLen, eolLen, eopLen; - bool pageBreaks; GooString *s; double delta; int col, i, j, d, n; @@ -5311,7 +5310,7 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, } spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { + switch (textEOL) { case eolUnix: eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); break; @@ -5324,7 +5323,6 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, break; } eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop)); - pageBreaks = globalParams->getTextPageBreaks(); //~ writing mode (horiz/vert) @@ -5674,6 +5672,8 @@ TextOutputDev::TextOutputDev(const char *fileName, bool physLayoutA, rawOrder = rawOrderA; discardDiag = discardDiagA; doHTML = false; + textEOL = defaultEndOfLine(); + textPageBreaks = true; ok = true; // open file @@ -5716,6 +5716,8 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, doHTML = false; text = new TextPage(rawOrderA, discardDiagA); actualText = new ActualText(text); + textEOL = defaultEndOfLine(); + textPageBreaks = true; ok = true; } @@ -5737,7 +5739,7 @@ void TextOutputDev::endPage() { text->endPage(); text->coalesce(physLayout, fixedPitch, doHTML); if (outputStream) { - text->dump(outputStream, outputFunc, physLayout); + text->dump(outputStream, outputFunc, physLayout, textEOL, textPageBreaks); } } @@ -5934,7 +5936,7 @@ bool TextOutputDev::findText(const Unicode *s, int len, GooString *TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const { - return text->getText(xMin, yMin, xMax, yMax); + return text->getText(xMin, yMin, xMax, yMax, textEOL); } void TextOutputDev::drawSelection(OutputDev *out, diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 51e397f5..0d008b3d 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -70,6 +70,12 @@ enum SelectionStyle { selectionStyleLine }; +enum EndOfLineKind { + eolUnix, // LF + eolDOS, // CR+LF + eolMac // CR +}; + //------------------------------------------------------------------------ // TextFontInfo //------------------------------------------------------------------------ @@ -626,7 +632,7 @@ public: // Get the text which is inside the specified rectangle. GooString *getText(double xMin, double yMin, - double xMax, double yMax) const; + double xMax, double yMax, EndOfLineKind textEOL) const; void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, @@ -659,7 +665,7 @@ public: // Dump contents of page to a file. void dump(void *outputStream, TextOutputFunc outputFunc, - bool physLayout); + bool physLayout, EndOfLineKind textEOL, bool pageBreaks); // Get the head of the linked list of TextFlows. const TextFlow *getFlows() const { return flows; } @@ -912,6 +918,16 @@ public: // last rasterized page. const TextFlow *getFlows() const; + static constexpr EndOfLineKind defaultEndOfLine() { +#if defined(_WIN32) + return eolDOS; +#else + return eolUnix; +#endif + } + void setTextEOL(EndOfLineKind textEOLA) { textEOL = textEOLA; } + void setTextPageBreaks(bool textPageBreaksA) { textPageBreaks = textPageBreaksA; } + private: TextOutputFunc outputFunc; // output function @@ -930,6 +946,8 @@ private: // to skip watermarks drawn on top of body text, etc. bool doHTML; // extra processing for HTML conversion bool ok; // set up ok? + bool textPageBreaks; // insert end-of-page markers? + EndOfLineKind textEOL; // type of EOL marker to use ActualText *actualText; }; diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc index 1d366234..53f2e131 100644 --- a/utils/pdftotext.cc +++ b/utils/pdftotext.cc @@ -88,7 +88,7 @@ static bool rawOrder = false; static bool discardDiag = false; static bool htmlMeta = false; static char textEncName[128] = ""; -static char textEOL[16] = ""; +static char textEOLStr[16] = ""; static bool noPageBreaks = false; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -126,7 +126,7 @@ static const ArgDesc argDesc[] = { "output text encoding name"}, {"-listenc",argFlag, &printEnc, 0, "list available encodings"}, - {"-eol", argString, textEOL, sizeof(textEOL), + {"-eol", argString, textEOLStr, sizeof(textEOLStr), "output end-of-line convention (unix, dos, or mac)"}, {"-nopgbrk", argFlag, &noPageBreaks, 0, "don't insert page breaks between pages"}, @@ -188,6 +188,7 @@ int main(int argc, char *argv[]) { Object info; bool ok; int exitCode; + EndOfLineKind textEOL = TextOutputDev::defaultEndOfLine(); Win32Console win32Console(&argc, &argv); exitCode = 99; @@ -229,14 +230,17 @@ int main(int argc, char *argv[]) { if (textEncName[0]) { globalParams->setTextEncoding(textEncName); } - if (textEOL[0]) { - if (!globalParams->setTextEOL(textEOL)) { + if (textEOLStr[0]) { + if (!strcmp(textEOLStr, "unix")) { + textEOL = eolUnix; + } else if (!strcmp(textEOLStr, "dos")) { + textEOL = eolDOS; + } else if (!strcmp(textEOLStr, "mac")) { + textEOL = eolMac; + } else { fprintf(stderr, "Bad '-eol' value on command line\n"); } } - if (noPageBreaks) { - globalParams->setTextPageBreaks(false); - } if (quiet) { globalParams->setErrQuiet(quiet); } @@ -370,6 +374,10 @@ int main(int argc, char *argv[]) { textOut = new TextOutputDev(nullptr, physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag); if (textOut->isOk()) { + textOut->setTextEOL(textEOL); + if (noPageBreaks) { + textOut->setTextPageBreaks(false); + } if (bboxLayout) { printDocBBox(f, doc, textOut, firstPage, lastPage); } @@ -384,6 +392,10 @@ int main(int argc, char *argv[]) { textOut = new TextOutputDev(textFileName->c_str(), physLayout, fixedPitch, rawOrder, htmlMeta, discardDiag); if (textOut->isOk()) { + textOut->setTextEOL(textEOL); + if (noPageBreaks) { + textOut->setTextPageBreaks(false); + } if ((w==0) && (h==0) && (x==0) && (y==0)) { doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0, true, false, false); -- cgit v1.2.3