summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2020-10-28 13:55:23 +0100
committerTomaž Vajngerl <quikee@gmail.com>2020-11-02 20:11:03 +0100
commitc724c1bec549f224656e7ca3290494159dda7e26 (patch)
tree0447dfeba6192a3e021d3f04355e5060e7d13c12
parentbe1eeabaae38f4890a55cd206f22b03818253fb6 (diff)
pdf: deduplicate resources when copying from external PDF stream
When using external PDF stream/data (from PDF graphic objects), make sure to copy the content of external PDF resources (fonts, bitmaps, forms) only one time (by sharing the map between calls) and every other use, just use the reference to the objects. Change-Id: Ibaa632c8f74806eb195e69404551db6fd077a986 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104935 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Miklos Vajna <vmiklos@collabora.com> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105163 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--vcl/Library_vcl.mk1
-rw-r--r--vcl/inc/pdf/ExternalPDFStreams.hxx70
-rw-r--r--vcl/inc/pdf/objectcopier.hxx3
-rw-r--r--vcl/qa/cppunit/pdfexport/pdfexport.cxx2
-rw-r--r--vcl/source/gdi/pdfobjectcopier.cxx7
-rw-r--r--vcl/source/gdi/pdfwriter_impl.cxx33
-rw-r--r--vcl/source/gdi/pdfwriter_impl.hxx21
-rw-r--r--vcl/source/pdf/ExternalPDFStreams.cxx43
8 files changed, 155 insertions, 25 deletions
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index ee68260fbd50..bc6b8f8f9af7 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -322,6 +322,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/pdf/Matrix3 \
vcl/source/pdf/XmpMetadata \
vcl/source/pdf/PDFiumLibrary \
+ vcl/source/pdf/ExternalPDFStreams \
vcl/source/graphic/GraphicID \
vcl/source/graphic/GraphicLoader \
vcl/source/graphic/GraphicObject \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
new file mode 100644
index 000000000000..3a9ea38bc9db
--- /dev/null
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <sal/types.h>
+#include <sal/log.hxx>
+#include <vcl/dllapi.h>
+
+#include <map>
+#include <vector>
+#include <memory>
+
+#include <vcl/filter/pdfdocument.hxx>
+
+namespace vcl
+{
+// A external PDF stream, which stores the PDF stream data as byte array.
+// This struct is also responsible to parsing the stream as a PDFDocument,
+// and store its instance for the life-cycle of the struct, so that it
+// reused to avoid unneccesary parsing.
+struct VCL_DLLPUBLIC ExternalPDFStream
+{
+ std::vector<sal_uInt8> maData;
+ std::shared_ptr<filter::PDFDocument> mpPDFDocument;
+ std::map<sal_Int32, sal_Int32> maCopiedResources;
+
+ std::map<sal_Int32, sal_Int32>& getCopiedResources() { return maCopiedResources; }
+
+ filter::PDFDocument& getPDFDocument()
+ {
+ if (!mpPDFDocument)
+ {
+ SvMemoryStream aPDFStream;
+ aPDFStream.WriteBytes(maData.data(), maData.size());
+ aPDFStream.Seek(0);
+ mpPDFDocument = std::make_shared<filter::PDFDocument>();
+ if (!mpPDFDocument->Read(aPDFStream))
+ {
+ SAL_WARN("vcl.pdfwriter",
+ "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
+ }
+ }
+ return *mpPDFDocument;
+ }
+};
+
+// Class to manage external PDF streams, for the de-duplication purpuse.
+class VCL_DLLPUBLIC ExternalPDFStreams
+{
+private:
+ std::map<std::vector<sal_uInt8>, sal_Int32> maStreamIndexMap;
+ std::vector<ExternalPDFStream> maStreamList;
+
+public:
+ ExternalPDFStreams() {}
+
+ sal_Int32 store(const sal_uInt8* pData, sal_uInt32 nLength);
+
+ ExternalPDFStream& get(sal_uInt32 nIndex);
+};
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx
index 6e98ed0834f9..65dbbb49aef4 100644
--- a/vcl/inc/pdf/objectcopier.hxx
+++ b/vcl/inc/pdf/objectcopier.hxx
@@ -48,6 +48,9 @@ public:
/// Copies resources of pPage into rLine.
void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine);
+ void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources);
+
/// Copies page one or more page streams from rContentStreams into rStream.
static sal_Int32 copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
SvMemoryStream& rStream, bool& rCompressed);
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index e9027e7e0aad..762bdf0dbce1 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -2114,7 +2114,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testMultiPagePDF)
{ // embedded PDF page 2
vcl::filter::PDFObjectElement* pXObject2 = pXObjects->LookupObject(rIDs[1]);
CPPUNIT_ASSERT(pXObject2);
- CPPUNIT_ASSERT_EQUAL(OString("Im34"), rIDs[1]);
+ CPPUNIT_ASSERT_EQUAL(OString("Im24"), rIDs[1]);
auto pSubtype2 = dynamic_cast<vcl::filter::PDFNameElement*>(pXObject2->Lookup("Subtype"));
CPPUNIT_ASSERT(pSubtype2);
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx b/vcl/source/gdi/pdfobjectcopier.cxx
index 5e54ee68c289..a953c864c122 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -275,13 +275,18 @@ void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OString
{
// Maps from source object id (PDF image) to target object id (export result).
std::map<sal_Int32, sal_Int32> aCopiedResources;
+ copyPageResources(pPage, rLine, aCopiedResources);
+}
+void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources)
+{
rLine.append(" /Resources <<");
static const std::initializer_list<OString> aKeys
= { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
for (const auto& rKey : aKeys)
{
- rLine.append(copyExternalResources(*pPage, rKey, aCopiedResources));
+ rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
}
rLine.append(">>");
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 1e7d8db238e1..11de3436e531 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8358,7 +8358,7 @@ bool PDFWriterImpl::writeGradientFunction( GradientEmit const & rObject )
void PDFWriterImpl::writeJPG( JPGEmit& rObject )
{
- if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && !m_aContext.UseReferenceXObject)
+ if (rObject.m_aReferenceXObject.hasExternalPDFData() && !m_aContext.UseReferenceXObject)
{
writeReferenceXObject(rObject.m_aReferenceXObject);
return;
@@ -8457,23 +8457,19 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
{
// Parse the PDF data, we need that to write the PDF dictionary of our
// object.
- SvMemoryStream aPDFStream;
- aPDFStream.WriteBytes(rEmit.m_aPDFData.data(), rEmit.m_aPDFData.size());
- aPDFStream.Seek(0);
- filter::PDFDocument aPDFDocument;
- if (!aPDFDocument.Read(aPDFStream))
- {
- SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
+ if (rEmit.m_nExternalPDFDataIndex < 0)
return;
- }
- std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
+ auto & rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
+ auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
+
+ std::vector<filter::PDFObjectElement*> aPages = rPDFDocument.GetPages();
if (aPages.empty())
{
SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: no pages");
return;
}
- size_t nPageIndex = rEmit.m_nPDFPageIndex >= 0 ? rEmit.m_nPDFPageIndex : 0;
+ size_t nPageIndex = rEmit.m_nExternalPDFPageIndex >= 0 ? rEmit.m_nExternalPDFPageIndex : 0;
filter::PDFObjectElement* pPage = aPages[nPageIndex];
if (!pPage)
@@ -8545,7 +8541,9 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
}
PDFObjectCopier aCopier(*this);
- aCopier.copyPageResources(pPage, aLine);
+ auto & rResources = rExternalPDFStream.getCopiedResources();
+ aCopier.copyPageResources(pPage, aLine, rResources);
+
aLine.append(" /BBox [ 0 0 ");
aLine.append(nWidth);
aLine.append(" ");
@@ -8687,7 +8685,7 @@ namespace
bool PDFWriterImpl::writeBitmapObject( BitmapEmit& rObject, bool bMask )
{
- if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && !m_aContext.UseReferenceXObject)
+ if (rObject.m_aReferenceXObject.hasExternalPDFData() && !m_aContext.UseReferenceXObject)
{
writeReferenceXObject(rObject.m_aReferenceXObject);
return true;
@@ -9009,10 +9007,10 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
sal_uInt32 nLength = rGraphic.getVectorGraphicData()->getVectorGraphicDataArrayLength();
auto const & rArray = rGraphic.getVectorGraphicData()->getVectorGraphicDataArray();
- auto pPDFData = std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), rArray.getConstArray() + nLength);
-
if (m_aContext.UseReferenceXObject)
{
+ auto pPDFData = std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), rArray.getConstArray() + nLength);
+
// Store the original PDF data as an embedded file.
m_aEmbeddedFiles.emplace_back();
m_aEmbeddedFiles.back().m_nObject = createObject();
@@ -9021,8 +9019,9 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
}
else
{
- rEmit.m_nPDFPageIndex = rGraphic.getVectorGraphicData()->getPageIndex();
- rEmit.m_aPDFData = *pPDFData;
+ sal_Int32 aIndex = m_aExternalPDFStreams.store(reinterpret_cast<const sal_uInt8*>(rArray.getConstArray()), nLength);
+ rEmit.m_nExternalPDFPageIndex = rGraphic.getVectorGraphicData()->getPageIndex();
+ rEmit.m_nExternalPDFDataIndex = aIndex;
}
rEmit.m_nFormObject = createObject();
diff --git a/vcl/source/gdi/pdfwriter_impl.hxx b/vcl/source/gdi/pdfwriter_impl.hxx
index f89cfd2e94f6..dc96454609f9 100644
--- a/vcl/source/gdi/pdfwriter_impl.hxx
+++ b/vcl/source/gdi/pdfwriter_impl.hxx
@@ -53,6 +53,7 @@
#include <outdata.hxx>
#include <vcl/filter/pdfobjectcontainer.hxx>
+#include <pdf/ExternalPDFStreams.hxx>
#include "pdffontcache.hxx"
#include "pdfbuildin_fonts.hxx"
@@ -190,19 +191,25 @@ struct ReferenceXObjectEmit
/// Size of the bitmap replacement, in pixels.
Size m_aPixelSize;
/// PDF data from the graphic object, if not writing a reference XObject.
- std::vector<sal_Int8> m_aPDFData;
- sal_Int32 m_nPDFPageIndex;
+ sal_Int32 m_nExternalPDFDataIndex;
+ sal_Int32 m_nExternalPDFPageIndex;
ReferenceXObjectEmit()
- : m_nFormObject(0),
- m_nEmbeddedObject(0),
- m_nBitmapObject(0),
- m_nPDFPageIndex(-1)
+ : m_nFormObject(0)
+ , m_nEmbeddedObject(0)
+ , m_nBitmapObject(0)
+ , m_nExternalPDFDataIndex(-1)
+ , m_nExternalPDFPageIndex(-1)
{
}
/// Returns the ID one should use when referring to this bitmap.
sal_Int32 getObject() const;
+
+ bool hasExternalPDFData() const
+ {
+ return m_nExternalPDFDataIndex >= 0;
+ }
};
struct BitmapEmit
@@ -740,6 +747,8 @@ private:
osl::File m_aFile;
bool m_bOpen;
+ ExternalPDFStreams m_aExternalPDFStreams;
+
/* output redirection; e.g. to accumulate content streams for
XObjects
*/
diff --git a/vcl/source/pdf/ExternalPDFStreams.cxx b/vcl/source/pdf/ExternalPDFStreams.cxx
new file mode 100644
index 000000000000..08f31ed22829
--- /dev/null
+++ b/vcl/source/pdf/ExternalPDFStreams.cxx
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <pdf/ExternalPDFStreams.hxx>
+#include <comphelper/hash.hxx>
+
+namespace vcl
+{
+sal_Int32 ExternalPDFStreams::store(const sal_uInt8* pData, sal_uInt32 nLength)
+{
+ sal_Int32 nIndex = -1;
+
+ std::vector<sal_uInt8> aHash
+ = comphelper::Hash::calculateHash(pData, nLength, comphelper::HashType::SHA1);
+
+ auto it = maStreamIndexMap.find(aHash);
+ if (it == maStreamIndexMap.end())
+ {
+ auto& rExternalStream = maStreamList.emplace_back();
+ rExternalStream.maData.resize(nLength);
+ std::copy(pData, pData + nLength, rExternalStream.maData.begin());
+ nIndex = maStreamList.size() - 1;
+ maStreamIndexMap.emplace(aHash, nIndex);
+ }
+ else
+ {
+ nIndex = it->second;
+ }
+
+ return nIndex;
+}
+
+ExternalPDFStream& ExternalPDFStreams::get(sal_uInt32 nIndex) { return maStreamList.at(nIndex); }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */