From 4e9b03d04f740a0cbafa22a4f3cedfae7f37a994 Mon Sep 17 00:00:00 2001 From: Tomaž Vajngerl Date: Sun, 28 Jun 2020 13:46:41 +0200 Subject: pdf: add text page object attribs, refactor ImpSdrPdfImport, tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This refactors ImpSdrPdfImport to push more functions into the PDFium wrapper. The focus is on text page object attributes. Change-Id: Ie1faf5e3743eec7c77050835651533f9e227c2a7 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97366 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl --- include/vcl/filter/PDFiumLibrary.hxx | 7 ++++ sd/qa/unit/SdrPdfImportTest.cxx | 11 +++++- svx/source/inc/svdpdf.hxx | 4 ++- svx/source/svdraw/svdpdf.cxx | 69 ++++++++++-------------------------- vcl/qa/cppunit/PDFiumLibraryTest.cxx | 16 ++++++++- vcl/source/pdf/PDFiumLibrary.cxx | 62 ++++++++++++++++++++++++++++++-- 6 files changed, 113 insertions(+), 56 deletions(-) diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx index 9f34bdb92ad6..f7dcc4b2c99e 100644 --- a/include/vcl/filter/PDFiumLibrary.hxx +++ b/include/vcl/filter/PDFiumLibrary.hxx @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -93,6 +94,12 @@ public: std::unique_ptr getFormObject(int nIndex); basegfx::B2DHomMatrix getMatrix(); + basegfx::B2DRectangle getBounds(); + double getFontSize(); + OUString getFontName(); + int getTextRenderMode(); + Color getFillColor(); + Color getStrokeColor(); }; class VCL_DLLPUBLIC PDFiumTextPage final diff --git a/sd/qa/unit/SdrPdfImportTest.cxx b/sd/qa/unit/SdrPdfImportTest.cxx index e8dd60b0a806..07f90ba26e24 100644 --- a/sd/qa/unit/SdrPdfImportTest.cxx +++ b/sd/qa/unit/SdrPdfImportTest.cxx @@ -88,6 +88,9 @@ CPPUNIT_TEST_FIXTURE(SdrPdfImportTest, testImportSimpleText) SdPage* pPage = pViewShell->GetActualPage(); CPPUNIT_ASSERT(pPage); + // Check there is one object on the page only + CPPUNIT_ASSERT_EQUAL(size_t(1), pPage->GetObjCount()); + // Get the first object - there should be only one. SdrObject* pObject = pPage->GetObj(0); CPPUNIT_ASSERT(pObject); @@ -109,11 +112,17 @@ CPPUNIT_TEST_FIXTURE(SdrPdfImportTest, testImportSimpleText) // Execute the break operation - to turn the PDF into shapes/objects pViewShell->GetDrawView()->DoImportMarkedMtf(); - // Check Objects after import + // Check there is one object on the page only + CPPUNIT_ASSERT_EQUAL(size_t(1), pPage->GetObjCount()); + // Get the object SdrObject* pImportedObject = pPage->GetObj(0); CPPUNIT_ASSERT(pImportedObject); + // Check the object position + CPPUNIT_ASSERT_EQUAL(tools::Rectangle(Point(2011, 2098), Size(2106 + 1, 302 + 1)), + pImportedObject->GetLogicRect()); + // Object should be a text object containing one paragraph with // content "This is PDF!" diff --git a/svx/source/inc/svdpdf.hxx b/svx/source/inc/svdpdf.hxx index 0bfdb2b3e6ef..6482b60ef867 100644 --- a/svx/source/inc/svdpdf.hxx +++ b/svx/source/inc/svdpdf.hxx @@ -115,7 +115,9 @@ class SVXCORE_DLLPUBLIC ImpSdrPdfImport final int nPageObjectIndex); void ImportImage(FPDF_PAGEOBJECT pPageObject, int nPageObjectIndex); void ImportPath(FPDF_PAGEOBJECT pPageObject, int nPageObjectIndex); - void ImportText(FPDF_PAGEOBJECT pPageObject, FPDF_TEXTPAGE pTextPage, int nPageObjectIndex); + void ImportText(std::unique_ptr const& pPageObject, + std::unique_ptr const& pTextPage, + int nPageObjectIndex); void InsertTextObject(const Point& rPos, const Size& rSize, const OUString& rStr); void SetupPageScale(const double dPageWidth, const double dPageHeight); diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx index 79693be693e1..30413fa1a30f 100644 --- a/svx/source/svdraw/svdpdf.cxx +++ b/svx/source/svdraw/svdpdf.cxx @@ -677,7 +677,7 @@ void ImpSdrPdfImport::ImportPdfObject( switch (nPageObjectType) { case FPDF_PAGEOBJ_TEXT: - ImportText(pPageObject->getPointer(), pTextPage->getPointer(), nPageObjectIndex); + ImportText(pPageObject, pTextPage, nPageObjectIndex); break; case FPDF_PAGEOBJ_PATH: ImportPath(pPageObject->getPointer(), nPageObjectIndex); @@ -719,46 +719,24 @@ void ImpSdrPdfImport::ImportForm(std::unique_ptr con maCurrentMatrix = aOldMatrix; } -void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, FPDF_TEXTPAGE pTextPage, +void ImpSdrPdfImport::ImportText(std::unique_ptr const& pPageObject, + std::unique_ptr const& pTextPage, int /*nPageObjectIndex*/) { - float left; - float bottom; - float right; - float top; - if (!FPDFPageObj_GetBounds(pPageObject, &left, &bottom, &right, &top)) - { - SAL_WARN("sd.filter", "FAILED to get TEXT bounds"); - } + basegfx::B2DRectangle aTextRect = pPageObject->getBounds(); + basegfx::B2DHomMatrix aMatrix = pPageObject->getMatrix(); - if (left == right || top == bottom) - return; - - FS_MATRIX matrix; - FPDFTextObj_GetMatrix(pPageObject, &matrix); basegfx::B2DHomMatrix aTextMatrix(maCurrentMatrix); - basegfx::B2DRange aTextRect(left, top, right, bottom); + aTextRect *= aTextMatrix; const tools::Rectangle aRect = PointsToLogic(aTextRect.getMinX(), aTextRect.getMaxX(), aTextRect.getMinY(), aTextRect.getMaxY()); - const int nBytes = FPDFTextObj_GetText(pPageObject, pTextPage, nullptr, 0); - std::unique_ptr pText(new sal_Unicode[nBytes]); - - const int nActualBytes = FPDFTextObj_GetText(pPageObject, pTextPage, pText.get(), nBytes); - if (nActualBytes <= 0) - { - return; - } - - // Let's rely on null-termination for the length of the string. We - // just know the number of bytes the string takes, but in OUString - // needs the number of characters. - OUString sText(pText.get()); + OUString sText = pPageObject->getText(pTextPage); - const double dFontSize = FPDFTextObj_GetFontSize(pPageObject); - double dFontSizeH = fabs(sqrt2(matrix.a, matrix.c) * dFontSize); - double dFontSizeV = fabs(sqrt2(matrix.b, matrix.d) * dFontSize); + const double dFontSize = pPageObject->getFontSize(); + double dFontSizeH = fabs(sqrt2(aMatrix.a(), aMatrix.c()) * dFontSize); + double dFontSizeV = fabs(sqrt2(aMatrix.b(), aMatrix.d()) * dFontSize); dFontSizeH = convertPointToMm100(dFontSizeH); dFontSizeV = convertPointToMm100(dFontSizeV); @@ -772,25 +750,18 @@ void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, FPDF_TEXTPAGE pTex mbFntDirty = true; } - const int nFontName = 80 + 1; - std::unique_ptr pFontName(new char[nFontName]); // + terminating null - char* pCharFontName = reinterpret_cast(pFontName.get()); - int nFontNameChars = FPDFTextObj_GetFontName(pPageObject, pCharFontName, nFontName); - if (nFontName >= nFontNameChars) + OUString sFontName = pPageObject->getFontName(); + if (!sFontName.isEmpty() && sFontName != aFnt.GetFamilyName()) { - OUString sFontName = OUString::createFromAscii(pFontName.get()); - if (sFontName != aFnt.GetFamilyName()) - { - aFnt.SetFamilyName(sFontName); - mpVD->SetFont(aFnt); - mbFntDirty = true; - } + aFnt.SetFamilyName(sFontName); + mpVD->SetFont(aFnt); + mbFntDirty = true; } Color aTextColor(COL_TRANSPARENT); bool bFill = false; bool bUse = true; - switch (FPDFTextObj_GetTextRenderMode(pPageObject)) + switch (pPageObject->getTextRenderMode()) { case FPDF_TEXTRENDERMODE_FILL: case FPDF_TEXTRENDERMODE_FILL_CLIP: @@ -809,11 +780,9 @@ void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, FPDF_TEXTPAGE pTex } if (bUse) { - unsigned int nR, nG, nB, nA; - bool bRet = bFill ? FPDFPageObj_GetFillColor(pPageObject, &nR, &nG, &nB, &nA) - : FPDFPageObj_GetStrokeColor(pPageObject, &nR, &nG, &nB, &nA); - if (bRet) - aTextColor = Color(nR, nG, nB); + Color aColor = bFill ? pPageObject->getFillColor() : pPageObject->getStrokeColor(); + if (aColor != COL_TRANSPARENT) + aTextColor = aColor.GetRGBColor(); } if (aTextColor != mpVD->GetTextColor()) diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx index 9c0c92607b14..43d6b92bae27 100644 --- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx +++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx @@ -134,10 +134,24 @@ void PDFiumLibraryTest::testPageObjects() auto pPageObject = pPage->getObject(0); auto pTextPage = pPage->getTextPage(); - CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType()); + CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType()); // FPDF_PAGEOBJ_TEXT + CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when " "MTV ax quiz prog. Junk MTV quiz "), pPageObject->getText(pTextPage)); + + CPPUNIT_ASSERT_EQUAL(12.0, pPageObject->getFontSize()); + CPPUNIT_ASSERT_EQUAL(OUString("Liberation Serif"), pPageObject->getFontName()); + CPPUNIT_ASSERT_EQUAL(0, pPageObject->getTextRenderMode()); // FPDF_TEXTRENDERMODE_FILL + CPPUNIT_ASSERT_EQUAL(COL_BLACK, pPageObject->getFillColor()); + CPPUNIT_ASSERT_EQUAL(COL_BLACK, pPageObject->getStrokeColor()); + + CPPUNIT_ASSERT_EQUAL(true, pPageObject->getMatrix().isIdentity()); + + CPPUNIT_ASSERT_DOUBLES_EQUAL(057.01, pPageObject->getBounds().getMinX(), 1E-2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(721.51, pPageObject->getBounds().getMinY(), 1E-2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(539.48, pPageObject->getBounds().getMaxX(), 1E-2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(732.54, pPageObject->getBounds().getMaxY(), 1E-2); } void PDFiumLibraryTest::testAnnotationsMadeInEvince() diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx index 0dc36d94b7d0..b58878e7881e 100644 --- a/vcl/source/pdf/PDFiumLibrary.cxx +++ b/vcl/source/pdf/PDFiumLibrary.cxx @@ -253,10 +253,66 @@ std::unique_ptr PDFiumPageObject::getFormObject(int nIndex) basegfx::B2DHomMatrix PDFiumPageObject::getMatrix() { + basegfx::B2DHomMatrix aB2DMatrix; FS_MATRIX matrix; - FPDFFormObj_GetMatrix(mpPageObject, &matrix); - return basegfx::B2DHomMatrix::abcdef(matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, - matrix.f); + if (FPDFFormObj_GetMatrix(mpPageObject, &matrix)) + aB2DMatrix = basegfx::B2DHomMatrix::abcdef(matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, + matrix.f); + return aB2DMatrix; +} + +basegfx::B2DRectangle PDFiumPageObject::getBounds() +{ + basegfx::B2DRectangle aB2DRectangle; + + float left = 0; + float bottom = 0; + float right = 0; + float top = 0; + if (FPDFPageObj_GetBounds(mpPageObject, &left, &bottom, &right, &top)) + { + aB2DRectangle = basegfx::B2DRectangle(left, top, right, bottom); + } + return aB2DRectangle; +} + +double PDFiumPageObject::getFontSize() { return FPDFTextObj_GetFontSize(mpPageObject); } + +OUString PDFiumPageObject::getFontName() +{ + OUString sFontName; + const int nFontName = 80 + 1; + std::unique_ptr pFontName(new char[nFontName]); // + terminating null + int nFontNameChars = FPDFTextObj_GetFontName(mpPageObject, pFontName.get(), nFontName); + if (nFontName >= nFontNameChars) + { + sFontName = OUString::createFromAscii(pFontName.get()); + } + return sFontName; +} + +int PDFiumPageObject::getTextRenderMode() { return FPDFTextObj_GetTextRenderMode(mpPageObject); } + +Color PDFiumPageObject::getFillColor() +{ + Color aColor = COL_TRANSPARENT; + unsigned int nR, nG, nB, nA; + if (FPDFPageObj_GetFillColor(mpPageObject, &nR, &nG, &nB, &nA)) + { + aColor = Color(0xFF - nA, nR, nG, nB); + } + return aColor; +} + +Color PDFiumPageObject::getStrokeColor() +{ + Color aColor = COL_TRANSPARENT; + unsigned int nR, nG, nB, nA; + if (FPDFPageObj_GetStrokeColor(mpPageObject, &nR, &nG, &nB, &nA)) + { + aColor = Color(0xFF - nA, nR, nG, nB); + } + return aColor; } PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation) -- cgit v1.2.3