summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAshod Nakashian <ashod.nakashian@collabora.co.uk>2018-04-13 20:26:11 -0400
committerJan Holesovsky <kendy@collabora.com>2018-06-06 12:48:31 +0200
commit85a98016087571154d1fed684b197c3bec74db13 (patch)
tree1099d2fb8dbccb5ce15b1abe2e351fc821be7c14
parentcee3f93a4660e1d02daaa589100f5aa2608fbb6e (diff)
svx: improved text importing from PDF
Change-Id: I9a2fc2c8511655c1aa362c1a03a5e82ae3ba697e (cherry picked from commit d057cf3d9184cc5d96af9c957411911f5e788f4d)
-rw-r--r--external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.262
-rw-r--r--external/pdfium/UnpackedTarball_pdfium.mk1
-rw-r--r--svx/source/svdraw/svdpdf.cxx84
3 files changed, 108 insertions, 39 deletions
diff --git a/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2 b/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2
new file mode 100644
index 000000000000..e7afda576d15
--- /dev/null
+++ b/external/pdfium/0007-svx-improved-text-importing-from-PDF.patch.2
@@ -0,0 +1,62 @@
+From 87f3da183a87f3ff5df854971a0c3bc2134ecd61 Mon Sep 17 00:00:00 2001
+From: Ashod Nakashian <ashod.nakashian@collabora.co.uk>
+Date: Tue, 5 Jun 2018 11:31:35 +0200
+Subject: [PATCH 07/14] svx: improved text importing from PDF
+
+---
+ pdfium/fpdfsdk/fpdf_editpage.cpp | 9 +++++++--
+ pdfium/public/fpdf_edit.h | 6 +++++-
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp
+index f8e2418..2249e8e 100644
+--- a/pdfium/fpdfsdk/fpdf_editpage.cpp
++++ b/pdfium/fpdfsdk/fpdf_editpage.cpp
+@@ -652,8 +652,11 @@ FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object,
+ double* a,
+ double* b,
+ double* c,
+- double* d) {
+- if (!text_object)
++ double* d,
++ double* e,
++ double* f)
++{
++ if (!text_object || !a || !b || !c || !d || !e || !f)
+ return;
+
+ CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object);
+@@ -662,6 +665,8 @@ FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object,
+ *b = matrix.b;
+ *c = matrix.c;
+ *d = matrix.d;
++ *e = matrix.e;
++ *f = matrix.f;
+ }
+
+ FPDF_EXPORT int FPDF_CALLCONV
+diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h
+index 89ec8cf..fc906f4 100644
+--- a/pdfium/public/fpdf_edit.h
++++ b/pdfium/public/fpdf_edit.h
+@@ -1038,12 +1038,16 @@ FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text_object);
+ // b - Pointer to a double value receiving coefficient "b" of the matrix.
+ // c - Pointer to a double value receiving coefficient "c" of the matrix.
+ // d - Pointer to a double value receiving coefficient "d" of the matrix.
++// e - Pointer to a double value receiving coefficient "e" of the matrix.
++// f - Pointer to a double value receiving coefficient "f" of the matrix.
+ FPDF_EXPORT void FPDF_CALLCONV
+ FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object,
+ double* a,
+ double* b,
+ double* c,
+- double* d);
++ double* d,
++ double* e,
++ double* f);
+
+ // Get the unicode of a special character in a text object.
+ //
+--
+2.16.3
+
diff --git a/external/pdfium/UnpackedTarball_pdfium.mk b/external/pdfium/UnpackedTarball_pdfium.mk
index af7841dcd120..f0ee740ddf94 100644
--- a/external/pdfium/UnpackedTarball_pdfium.mk
+++ b/external/pdfium/UnpackedTarball_pdfium.mk
@@ -20,6 +20,7 @@ pdfium_patches += 0003-svx-import-PDF-images-as-BGRA.patch.2
pdfium_patches += 0004-svx-support-PDF-text-color.patch.2
pdfium_patches += 0005-svx-support-Paths-in-PDFs-while-importing.patch.2
pdfium_patches += 0006-svx-improve-path-importing-from-PDF.patch.2
+pdfium_patches += 0007-svx-improved-text-importing-from-PDF.patch.2
$(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium))
diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx
index 04e5f3f7f48e..7ef3d8614e7a 100644
--- a/svx/source/svdraw/svdpdf.cxx
+++ b/svx/source/svdraw/svdpdf.cxx
@@ -103,6 +103,8 @@ static inline long lcl_ToLogic(double value)
const long out = OutputDevice::LogicToLogic(in, MapUnit::MapPixel, MapUnit::Map100thMM);
return out / 100;
}
+
+static inline double sqrt2(double a, double b) { return sqrt(a * a + b * b); }
}
struct FPDFBitmapDeleter
@@ -1024,53 +1026,56 @@ void ImpSdrPdfImport::ImportText(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd
SAL_WARN("sd.filter", "FAILED to get TEXT bounds");
}
- SAL_WARN("sd.filter", "Got TEXT bounds left: " << left << ", right: " << right
- << ", top: " << top << ", bottom: " << bottom);
- tools::Rectangle aRect = PointsToLogic(left, right, top, bottom);
+ if (left == right || top == bottom)
+ {
+ SAL_WARN("sd.filter", "Skipping empty TEXT #" << nPageObjectIndex << " left: " << left
+ << ", right: " << right << ", top: " << top
+ << ", bottom: " << bottom);
+ return;
+ }
- double dFontScale = 1.0;
- geometry::Matrix2D aMatrix;
- FPDFTextObj_GetMatrix(pPageObject, &aMatrix.m00, &aMatrix.m01, &aMatrix.m10, &aMatrix.m11);
- if (aMatrix.m00 != aMatrix.m11 || aMatrix.m00 <= 0)
+ const int nChars = FPDFTextObj_CountChars(pPageObject);
+ std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars + 1]); // + terminating null
+
+ unsigned short* pShortText = reinterpret_cast<unsigned short*>(pText.get());
+ const int nActualChars = FPDFTextObj_GetText(pPageObject, 0, nChars, pShortText);
+ if (nActualChars <= 0)
{
- SAL_WARN("sd.filter", "Bogus font scale matrix (" << aMatrix.m00 << ',' << aMatrix.m11
- << "), will use heuristic height of "
- << aRect.GetHeight() << ".");
- dFontScale = aRect.GetHeight();
+ SAL_WARN("sd.filter", "Got not TEXT");
+ return;
}
- else
- dFontScale = aMatrix.m00;
- double dFontSize = FPDFTextObj_GetFontSize(pPageObject);
- SAL_WARN("sd.filter", "Got Font Size: " << dFontSize);
- dFontSize *= dFontScale;
- SAL_WARN("sd.filter", "Got Font Size Scaled: " << dFontSize);
- dFontSize = lcl_PointToPixel(dFontSize);
- SAL_WARN("sd.filter", "Got Font Pixel Size: " << dFontSize);
- dFontSize = lcl_ToLogic(dFontSize);
- SAL_WARN("sd.filter", "Got Font Logic Size: " << dFontSize);
+ OUString sText(pText.get(), nActualChars);
+ SAL_WARN("sd.filter", "Got Text (" << nChars << "): [" << sText << "].");
+
+ double a, b, c, d, e, f;
+ FPDFTextObj_GetMatrix(pPageObject, &a, &b, &c, &d, &e, &f);
+ SAL_WARN("sd.filter", "Got font scale matrix (" << a << ", " << b << ", " << c << ", " << d
+ << ", " << e << ", " << f << ')');
+ Point aPos = PointsToLogic(e, f);
+ SAL_WARN("sd.filter", "Got TEXT origin: " << aPos);
+
+ const double dFontSize = FPDFTextObj_GetFontSize(pPageObject);
+ double dFontSizeH = fabs(sqrt2(a, c) * dFontSize);
+ double dFontSizeV = fabs(sqrt2(b, d) * dFontSize);
+ SAL_WARN("sd.filter", "Got Font Size: " << dFontSize << ", Scaled Font Size H: " << dFontSizeH
+ << ", V: " << dFontSizeV);
+ dFontSizeH = lcl_PointToPixel(dFontSizeH);
+ dFontSizeV = lcl_PointToPixel(dFontSizeV);
+ SAL_WARN("sd.filter", "Got Pixel Font Size H: " << dFontSizeH << ", V: " << dFontSizeV);
+ dFontSizeH = lcl_ToLogic(dFontSizeH);
+ dFontSizeV = lcl_ToLogic(dFontSizeV);
+ SAL_WARN("sd.filter", "Got Logic Font Size H: " << dFontSizeH << ", V: " << dFontSizeV);
unsigned int nR, nG, nB, nA;
if (FPDFTextObj_GetStrokeColor(pPageObject, &nR, &nG, &nB, &nA))
mpVD->SetTextColor(Color(nR, nG, nB));
vcl::Font aFnt = mpVD->GetFont();
- aFnt.SetFontSize(Size(dFontSize, dFontSize));
+ aFnt.SetFontSize(Size(dFontSizeH, dFontSizeV));
mpVD->SetFont(aFnt);
- const int nChars = FPDFTextObj_CountChars(pPageObject);
- std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nChars + 1]); // + terminating null
-
- unsigned short* pShortText = reinterpret_cast<unsigned short*>(pText.get());
- const int nActualChars = FPDFTextObj_GetText(pPageObject, 0, nChars, pShortText);
- OUString sText(pText.get(), nActualChars);
-
- // for (int nChar = 0; nChar < nChars; ++nChar)
- // pText[nChar] = static_cast<sal_Unicode>(FPDFTextObj_GetUnicode(pPageObject, nChar));
- // OUString sText(pText.get(), nChars);
- SAL_WARN("sd.filter", "Got Text (" << nChars << "): [" << sText << "].");
-
- ImportText(aRect.TopLeft(), sText);
+ ImportText(aPos, sText);
}
void ImpSdrPdfImport::ImportText(const Point& rPos, const OUString& rStr)
@@ -1083,11 +1088,12 @@ void ImpSdrPdfImport::ImportText(const Point& rPos, const OUString& rStr)
sal_Int32 nTextWidth = static_cast<sal_Int32>(mpVD->GetTextWidth(rStr) * mfScaleX);
sal_Int32 nTextHeight = static_cast<sal_Int32>(mpVD->GetTextHeight() * mfScaleY);
- SAL_WARN("sd.filter", "TextWidth: " << nTextWidth << ", TextHeight: " << nTextHeight);
+ SAL_WARN("sd.filter",
+ "Unscaled text size: " << mpVD->GetTextWidth(rStr) << 'x' << mpVD->GetTextHeight()
+ << ", Scaled: " << nTextWidth << 'x' << nTextHeight);
Point aPos(FRound(rPos.X() * mfScaleX + maOfs.X()), FRound(rPos.Y() * mfScaleY + maOfs.Y()));
Size aSize(nTextWidth, nTextHeight);
- SAL_WARN("sd.filter", "Text Pos: " << aPos << ", Size: " << aSize);
if (eAlg == ALIGN_BASELINE)
aPos.AdjustY(-(FRound(aFontMetric.GetAscent() * mfScaleY)));
@@ -1325,7 +1331,7 @@ void ImpSdrPdfImport::ImportPath(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd
}
const basegfx::B2DHomMatrix aTransform(
- basegfx::tools::createScaleTranslateB2DHomMatrix(mfScaleX, mfScaleY, maOfs.X(), maOfs.Y()));
+ basegfx::utils::createScaleTranslateB2DHomMatrix(mfScaleX, mfScaleY, maOfs.X(), maOfs.Y()));
aPoly.transform(aTransform);
float fWidth = 1;
@@ -1353,7 +1359,7 @@ void ImpSdrPdfImport::ImportPath(FPDF_PAGEOBJECT pPageObject, int nPageObjectInd
// if(!mbLastObjWasPolyWithoutLine || !CheckLastPolyLineAndFillMerge(basegfx::B2DPolyPolygon(aSource)))
aPoly.setClosed(true); // TODO: Review
- SdrPathObj* pPath = new SdrPathObj(OBJ_POLY, basegfx::B2DPolyPolygon(aPoly));
+ SdrPathObj* pPath = new SdrPathObj(*mpModel, OBJ_POLY, basegfx::B2DPolyPolygon(aPoly));
SetAttributes(pPath);
InsertObj(pPath, false);
}