summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToma┼ż Vajngerl <tomaz.vajngerl@collabora.co.uk>2020-10-28 13:55:23 +0100
committerMiklos Vajna <vmiklos@collabora.com>2020-10-28 18:24:24 +0100
commite528293bc17ecce92124e8dd8841bcea2bda562e (patch)
tree7bebe3d37dd0348c5acf3be615ecf312cf789525
parent5245723d92e92773d4679b3cd60a70706e3b4782 (diff)
pdf: deduplicate resources when copying from external PDF stream
When using external PDF stream/data (from PDF graphic objects), make sure to copy the content of external PDF resources (fonts, bitmaps, forms) only one time (by sharing the map between calls) and every other use, just use the reference to the objects. Change-Id: Ibaa632c8f74806eb195e69404551db6fd077a986 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104935 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
-rw-r--r--vcl/Library_vcl.mk1
-rw-r--r--vcl/inc/pdf/ExternalPDFStreams.hxx65
-rw-r--r--vcl/inc/pdf/objectcopier.hxx3
-rw-r--r--vcl/qa/cppunit/pdfexport/pdfexport.cxx2
-rw-r--r--vcl/source/gdi/pdfobjectcopier.cxx7
-rw-r--r--vcl/source/gdi/pdfwriter_impl.cxx33
-rw-r--r--vcl/source/gdi/pdfwriter_impl.hxx21
-rw-r--r--vcl/source/pdf/ExternalPDFStreams.cxx43
8 files changed, 151 insertions, 24 deletions
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index e9d820806870..9afdac80ef86 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -319,6 +319,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/gdi/CommonSalLayout \
vcl/source/gdi/TypeSerializer \
vcl/source/pdf/PDFiumLibrary \
+ vcl/source/pdf/ExternalPDFStreams \
vcl/source/graphic/GraphicID \
vcl/source/graphic/GraphicLoader \
vcl/source/graphic/GraphicObject \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
new file mode 100644
index 000000000000..3bd59478c212
--- /dev/null
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <sal/types.h>
+#include <sal/log.hxx>
+#include <vcl/dllapi.h>
+
+#include <map>
+#include <vector>
+#include <memory>
+
+#include <vcl/filter/pdfdocument.hxx>
+
+namespace vcl
+{
+struct VCL_DLLPUBLIC ExternalPDFStream
+{
+ std::vector<sal_uInt8> maData;
+ std::shared_ptr<filter::PDFDocument> mpPDFDocument;
+ std::map<sal_Int32, sal_Int32> maCopiedResources;
+
+ std::map<sal_Int32, sal_Int32>& getCopiedResources() { return maCopiedResources; }
+
+ filter::PDFDocument& getPDFDocument()
+ {
+ if (!mpPDFDocument)
+ {
+ SvMemoryStream aPDFStream;
+ aPDFStream.WriteBytes(maData.data(), maData.size());
+ aPDFStream.Seek(0);
+ mpPDFDocument = std::make_unique<filter::PDFDocument>();
+ if (!mpPDFDocument->Read(aPDFStream))
+ {
+ SAL_WARN("vcl.pdfwriter",
+ "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
+ }
+ }
+ return *mpPDFDocument;
+ }
+};
+
+class VCL_DLLPUBLIC ExternalPDFStreams
+{
+private:
+ std::map<std::vector<sal_uInt8>, sal_Int32> maStreamIndexMap;
+ std::vector<ExternalPDFStream> maStreamList;
+
+public:
+ ExternalPDFStreams() {}
+
+ sal_Int32 store(const sal_uInt8* pData, sal_uInt32 nLength);
+
+ ExternalPDFStream& get(sal_uInt32 nIndex);
+};
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx
index a6ff12d116af..487d03186682 100644
--- a/vcl/inc/pdf/objectcopier.hxx
+++ b/vcl/inc/pdf/objectcopier.hxx
@@ -48,6 +48,9 @@ public:
/// Copies resources of pPage into rLine.
void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine);
+ void copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources);
+
/// Copies page one or more page streams from rContentStreams into rStream.
static sal_Int32 copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
SvMemoryStream& rStream, bool& rCompressed);
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 190e26cce19b..5f50661b013e 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -2131,7 +2131,7 @@ void PdfExportTest::testMultiPagePDF()
{ // embedded PDF page 2
vcl::filter::PDFObjectElement* pXObject2 = pXObjects->LookupObject(rIDs[1]);
CPPUNIT_ASSERT(pXObject2);
- CPPUNIT_ASSERT_EQUAL(OString("Im34"), rIDs[1]);
+ CPPUNIT_ASSERT_EQUAL(OString("Im24"), rIDs[1]);
auto pSubtype2 = dynamic_cast<vcl::filter::PDFNameElement*>(pXObject2->Lookup("Subtype"));
CPPUNIT_ASSERT(pSubtype2);
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx b/vcl/source/gdi/pdfobjectcopier.cxx
index 5e54ee68c289..a953c864c122 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -275,13 +275,18 @@ void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OString
{
// Maps from source object id (PDF image) to target object id (export result).
std::map<sal_Int32, sal_Int32> aCopiedResources;
+ copyPageResources(pPage, rLine, aCopiedResources);
+}
+void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources)
+{
rLine.append(" /Resources <<");
static const std::initializer_list<OString> aKeys
= { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
for (const auto& rKey : aKeys)
{
- rLine.append(copyExternalResources(*pPage, rKey, aCopiedResources));
+ rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
}
rLine.append(">>");
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 6f18a2882645..2aa973e567c5 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8585,7 +8585,7 @@ bool PDFWriterImpl::writeGradientFunction( GradientEmit const & rObject )
void PDFWriterImpl::writeJPG( JPGEmit& rObject )
{
- if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && !m_aContext.UseReferenceXObject)
+ if (rObject.m_aReferenceXObject.hasExternalPDFData() && !m_aContext.UseReferenceXObject)
{
writeReferenceXObject(rObject.m_aReferenceXObject);
return;
@@ -8684,23 +8684,19 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
{
// Parse the PDF data, we need that to write the PDF dictionary of our
// object.
- SvMemoryStream aPDFStream;
- aPDFStream.WriteBytes(rEmit.m_aPDFData.data(), rEmit.m_aPDFData.size());
- aPDFStream.Seek(0);
- filter::PDFDocument aPDFDocument;
- if (!aPDFDocument.Read(aPDFStream))
- {
- SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
+ if (rEmit.m_nExternalPDFDataIndex < 0)
return;
- }
- std::vector<filter::PDFObjectElement*> aPages = aPDFDocument.GetPages();
+ auto & rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
+ auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
+
+ std::vector<filter::PDFObjectElement*> aPages = rPDFDocument.GetPages();
if (aPages.empty())
{
SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: no pages");
return;
}
- size_t nPageIndex = rEmit.m_nPDFPageIndex >= 0 ? rEmit.m_nPDFPageIndex : 0;
+ size_t nPageIndex = rEmit.m_nExternalPDFPageIndex >= 0 ? rEmit.m_nExternalPDFPageIndex : 0;
filter::PDFObjectElement* pPage = aPages[nPageIndex];
if (!pPage)
@@ -8772,7 +8768,9 @@ void PDFWriterImpl::writeReferenceXObject(ReferenceXObjectEmit& rEmit)
}
PDFObjectCopier aCopier(*this);
- aCopier.copyPageResources(pPage, aLine);
+ auto & rResources = rExternalPDFStream.getCopiedResources();
+ aCopier.copyPageResources(pPage, aLine, rResources);
+
aLine.append(" /BBox [ 0 0 ");
aLine.append(nWidth);
aLine.append(" ");
@@ -8914,7 +8912,7 @@ namespace
bool PDFWriterImpl::writeBitmapObject( BitmapEmit& rObject, bool bMask )
{
- if (!rObject.m_aReferenceXObject.m_aPDFData.empty() && !m_aContext.UseReferenceXObject)
+ if (rObject.m_aReferenceXObject.hasExternalPDFData() && !m_aContext.UseReferenceXObject)
{
writeReferenceXObject(rObject.m_aReferenceXObject);
return true;
@@ -9236,10 +9234,10 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
sal_uInt32 nLength = rGraphic.getVectorGraphicData()->getVectorGraphicDataArrayLength();
auto const & rArray = rGraphic.getVectorGraphicData()->getVectorGraphicDataArray();
- auto pPDFData = std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), rArray.getConstArray() + nLength);
-
if (m_aContext.UseReferenceXObject)
{
+ auto pPDFData = std::make_shared<std::vector<sal_Int8>>(rArray.getConstArray(), rArray.getConstArray() + nLength);
+
// Store the original PDF data as an embedded file.
m_aEmbeddedFiles.emplace_back();
m_aEmbeddedFiles.back().m_nObject = createObject();
@@ -9248,8 +9246,9 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
}
else
{
- rEmit.m_nPDFPageIndex = rGraphic.getVectorGraphicData()->getPageIndex();
- rEmit.m_aPDFData = *pPDFData;
+ sal_Int32 aIndex = m_aExternalPDFStreams.store(reinterpret_cast<const sal_uInt8*>(rArray.getConstArray()), nLength);
+ rEmit.m_nExternalPDFPageIndex = rGraphic.getVectorGraphicData()->getPageIndex();
+ rEmit.m_nExternalPDFDataIndex = aIndex;
}
rEmit.m_nFormObject = createObject();
diff --git a/vcl/source/gdi/pdfwriter_impl.hxx b/vcl/source/gdi/pdfwriter_impl.hxx
index 79df86f9b679..17e5f6d8c3ea 100644
--- a/vcl/source/gdi/pdfwriter_impl.hxx
+++ b/vcl/source/gdi/pdfwriter_impl.hxx
@@ -46,6 +46,7 @@
#include <outdata.hxx>
#include <vcl/filter/pdfobjectcontainer.hxx>
+#include <pdf/ExternalPDFStreams.hxx>
#include "pdffontcache.hxx"
#include "pdfbuildin_fonts.hxx"
@@ -208,19 +209,27 @@ public:
sal_Int32 m_nBitmapObject;
/// Size of the bitmap replacement, in pixels.
Size m_aPixelSize;
+
/// PDF data from the graphic object, if not writing a reference XObject.
- std::vector<sal_Int8> m_aPDFData;
- sal_Int32 m_nPDFPageIndex;
+ sal_Int32 m_nExternalPDFDataIndex;
+ sal_Int32 m_nExternalPDFPageIndex;
ReferenceXObjectEmit()
- : m_nFormObject(0),
- m_nEmbeddedObject(0),
- m_nBitmapObject(0)
+ : m_nFormObject(0)
+ , m_nEmbeddedObject(0)
+ , m_nBitmapObject(0)
+ , m_nExternalPDFDataIndex(-1)
+ , m_nExternalPDFPageIndex(-1)
{
}
/// Returns the ID one should use when referring to this bitmap.
sal_Int32 getObject() const;
+
+ bool hasExternalPDFData() const
+ {
+ return m_nExternalPDFDataIndex >= 0;
+ }
};
struct BitmapEmit
@@ -709,6 +718,8 @@ private:
osl::File m_aFile;
bool m_bOpen;
+ ExternalPDFStreams m_aExternalPDFStreams;
+
/* output redirection; e.g. to accumulate content streams for
XObjects
*/
diff --git a/vcl/source/pdf/ExternalPDFStreams.cxx b/vcl/source/pdf/ExternalPDFStreams.cxx
new file mode 100644
index 000000000000..08f31ed22829
--- /dev/null
+++ b/vcl/source/pdf/ExternalPDFStreams.cxx
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <pdf/ExternalPDFStreams.hxx>
+#include <comphelper/hash.hxx>
+
+namespace vcl
+{
+sal_Int32 ExternalPDFStreams::store(const sal_uInt8* pData, sal_uInt32 nLength)
+{
+ sal_Int32 nIndex = -1;
+
+ std::vector<sal_uInt8> aHash
+ = comphelper::Hash::calculateHash(pData, nLength, comphelper::HashType::SHA1);
+
+ auto it = maStreamIndexMap.find(aHash);
+ if (it == maStreamIndexMap.end())
+ {
+ auto& rExternalStream = maStreamList.emplace_back();
+ rExternalStream.maData.resize(nLength);
+ std::copy(pData, pData + nLength, rExternalStream.maData.begin());
+ nIndex = maStreamList.size() - 1;
+ maStreamIndexMap.emplace(aHash, nIndex);
+ }
+ else
+ {
+ nIndex = it->second;
+ }
+
+ return nIndex;
+}
+
+ExternalPDFStream& ExternalPDFStreams::get(sal_uInt32 nIndex) { return maStreamList.at(nIndex); }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */