summaryrefslogtreecommitdiff
path: root/include/xmlsecurity
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2017-02-24 13:46:52 +0100
committerMiklos Vajna <vmiklos@collabora.co.uk>2017-02-24 15:49:15 +0000
commit58eac1105f8504bd5320911fc6fe967ccaa3d469 (patch)
treea71f1ba78135b13bedf7a20924db1bd2df18a94e /include/xmlsecurity
parent3cc3dc176e00062d8395d9f3d83b49ab24542a61 (diff)
vcl: add initial CppunitTest_vcl_pdfexport
Invoke the PDF export filter and then use the PDF tokenizer from xmlsecurity to assert the contents of created PDF file. The testcase fails with commit 6db0f1feb1d9931d2726dd11a889c58815710ce0 (tdf#106059 PDF export: create a reference XObject for PDF images, 2017-02-22) reverted. Change-Id: I90526fef41d9560ae447f586df766bc50a491c43 Reviewed-on: https://gerrit.libreoffice.org/34609 Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk> Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'include/xmlsecurity')
-rw-r--r--include/xmlsecurity/pdfio/pdfdocument.hxx278
-rw-r--r--include/xmlsecurity/xmlsecuritydllapi.h23
2 files changed, 301 insertions, 0 deletions
diff --git a/include/xmlsecurity/pdfio/pdfdocument.hxx b/include/xmlsecurity/pdfio/pdfdocument.hxx
new file mode 100644
index 000000000000..0b27014c7418
--- /dev/null
+++ b/include/xmlsecurity/pdfio/pdfdocument.hxx
@@ -0,0 +1,278 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_XMLSECURITY_PDFIO_PDFDOCUMENT_HXX
+#define INCLUDED_XMLSECURITY_PDFIO_PDFDOCUMENT_HXX
+
+#include <map>
+#include <vector>
+
+#include <com/sun/star/security/XCertificate.hpp>
+
+#include <tools/stream.hxx>
+
+#include <xmlsecurity/xmlsecuritydllapi.h>
+
+struct SignatureInformation;
+
+namespace xmlsecurity
+{
+namespace pdfio
+{
+
+class PDFTrailerElement;
+class PDFHexStringElement;
+class PDFReferenceElement;
+class PDFDocument;
+class PDFDictionaryElement;
+class PDFArrayElement;
+class PDFStreamElement;
+
+/// A byte range in a PDF file.
+class XMLSECURITY_DLLPUBLIC PDFElement
+{
+public:
+ virtual bool Read(SvStream& rStream) = 0;
+ virtual ~PDFElement() { }
+};
+
+/// Indirect object: something with a unique ID.
+class XMLSECURITY_DLLPUBLIC PDFObjectElement : public PDFElement
+{
+ PDFDocument& m_rDoc;
+ double m_fObjectValue;
+ double m_fGenerationValue;
+ std::map<OString, PDFElement*> m_aDictionary;
+ /// Position after the '<<' token.
+ sal_uInt64 m_nDictionaryOffset;
+ /// Length of the dictionary buffer till (before) the '<<' token.
+ sal_uInt64 m_nDictionaryLength;
+ PDFDictionaryElement* m_pDictionaryElement;
+ /// The contained direct array, if any.
+ PDFArrayElement* m_pArrayElement;
+ /// The stream of this object, used when this is an object stream.
+ PDFStreamElement* m_pStreamElement;
+ /// Objects of an object stream.
+ std::vector< std::unique_ptr<PDFObjectElement> > m_aStoredElements;
+ /// Elements of an object in an object stream.
+ std::vector< std::unique_ptr<PDFElement> > m_aElements;
+ /// Uncompressed buffer of an object in an object stream.
+ std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
+
+public:
+ PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
+ bool Read(SvStream& rStream) override;
+ PDFElement* Lookup(const OString& rDictionaryKey);
+ PDFObjectElement* LookupObject(const OString& rDictionaryKey);
+ double GetObjectValue() const;
+ void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
+ sal_uInt64 GetDictionaryOffset();
+ void SetDictionaryLength(sal_uInt64 nDictionaryLength);
+ sal_uInt64 GetDictionaryLength();
+ PDFDictionaryElement* GetDictionary() const;
+ void SetDictionary(PDFDictionaryElement* pDictionaryElement);
+ void SetArray(PDFArrayElement* pArrayElement);
+ void SetStream(PDFStreamElement* pStreamElement);
+ PDFArrayElement* GetArray() const;
+ /// Parse objects stored in this object stream.
+ void ParseStoredObjects();
+ std::vector< std::unique_ptr<PDFElement> >& GetStoredElements();
+ SvMemoryStream* GetStreamBuffer() const;
+ void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
+};
+
+/// Name object: a key string.
+class XMLSECURITY_DLLPUBLIC PDFNameElement : public PDFElement
+{
+ OString m_aValue;
+ /// Offset after the '/' token.
+ sal_uInt64 m_nLocation;
+ /// Length till the next token start.
+ sal_uInt64 m_nLength;
+public:
+ PDFNameElement();
+ bool Read(SvStream& rStream) override;
+ const OString& GetValue() const;
+ sal_uInt64 GetLocation() const;
+ sal_uInt64 GetLength() const;
+};
+
+/// Dictionary object: a set key-value pairs.
+class XMLSECURITY_DLLPUBLIC PDFDictionaryElement : public PDFElement
+{
+ /// Key-value pairs when the dictionary is a nested value.
+ std::map<OString, PDFElement*> m_aItems;
+ /// Offset after the '<<' token.
+ sal_uInt64 m_nLocation = 0;
+ /// Position after the '/' token.
+ std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
+ /// Length of the dictionary key and value, till (before) the next token.
+ std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
+
+public:
+ PDFDictionaryElement();
+ bool Read(SvStream& rStream) override;
+
+ static size_t Parse(const std::vector< std::unique_ptr<PDFElement> >& rElements, PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary);
+ static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary, const OString& rKey);
+ void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
+ sal_uInt64 GetKeyOffset(const OString& rKey) const;
+ void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
+ sal_uInt64 GetKeyValueLength(const OString& rKey) const;
+ const std::map<OString, PDFElement*>& GetItems() const;
+ /// Looks up an object which is only referenced in this dictionary.
+ PDFObjectElement* LookupObject(const OString& rDictionaryKey);
+};
+
+enum class TokenizeMode
+{
+ /// Full file.
+ END_OF_STREAM,
+ /// Till the first %%EOF token.
+ EOF_TOKEN,
+ /// Till the end of the current object.
+ END_OF_OBJECT,
+ /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
+ STORED_OBJECT
+};
+
+/// The type column of an entry in a cross-reference stream.
+enum class XRefEntryType
+{
+ /// xref "f" or xref stream "0".
+ FREE,
+ /// xref "n" or xref stream "1".
+ NOT_COMPRESSED,
+ /// xref stream "2.
+ COMPRESSED
+};
+
+/// An entry in a cross-reference stream.
+struct XRefEntry
+{
+ XRefEntryType m_eType;
+ /**
+ * Non-compressed: The byte offset of the object, starting from the
+ * beginning of the file.
+ * Compressed: The object number of the object stream in which this object is
+ * stored.
+ */
+ sal_uInt64 m_nOffset;
+ /**
+ * Non-compressed: The generation number of the object.
+ * Compressed: The index of this object within the object stream.
+ */
+ sal_uInt64 m_nGenerationNumber;
+ /// Are changed as part of an incremental update?.
+ bool m_bDirty;
+
+ XRefEntry();
+};
+
+/**
+ * In-memory representation of an on-disk PDF document.
+ *
+ * The PDF element list is not meant to be saved back to disk, but some
+ * elements remember their source offset / length, and based on that it's
+ * possible to modify the input file.
+ */
+class XMLSECURITY_DLLPUBLIC PDFDocument
+{
+ /// This vector owns all elements.
+ std::vector< std::unique_ptr<PDFElement> > m_aElements;
+ /// Object ID <-> object offset map.
+ std::map<size_t, XRefEntry> m_aXRef;
+ /// Object offset <-> Object pointer map.
+ std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
+ /// Object ID <-> Object pointer map.
+ std::map<size_t, PDFObjectElement*> m_aIDObjects;
+ /// List of xref offsets we know.
+ std::vector<size_t> m_aStartXRefs;
+ /// List of EOF offsets we know.
+ std::vector<size_t> m_aEOFs;
+ PDFTrailerElement* m_pTrailer;
+ /// When m_pTrailer is nullptr, this can still have a dictionary.
+ PDFObjectElement* m_pXRefStream;
+ /// All editing takes place in this buffer, if it happens.
+ SvMemoryStream m_aEditBuffer;
+
+ static int AsHex(char ch);
+ /// Decode a hex dump.
+ static std::vector<unsigned char> DecodeHexString(PDFHexStringElement* pElement);
+ /// Suggest a minimal, yet free signature ID to use for the next signature.
+ sal_uInt32 GetNextSignature();
+ /// Write the signature object as part of signing.
+ sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, sal_uInt64& rLastByteRangeOffset, sal_Int64& rSignatureContentOffset);
+ /// Write the appearance object as part of signing.
+ sal_Int32 WriteAppearanceObject();
+ /// Write the annot object as part of signing.
+ sal_Int32 WriteAnnotObject(PDFObjectElement& rFirstPage, sal_Int32 nSignatureId, sal_Int32 nAppearanceId);
+ /// Write the updated Page object as part of signing.
+ bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
+ /// Write the updated Catalog object as part of signing.
+ bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
+ /// Write the updated cross-references as part of signing.
+ void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement* pRoot);
+
+public:
+ PDFDocument();
+ PDFDocument& operator=(const PDFDocument&) = delete;
+ PDFDocument(const PDFDocument&) = delete;
+ /// @name Low-level functions, to be used by PDFElement subclasses.
+ //@{
+ static OString ReadKeyword(SvStream& rStream);
+ static size_t FindStartXRef(SvStream& rStream);
+ void ReadXRef(SvStream& rStream);
+ void ReadXRefStream(SvStream& rStream);
+ static void SkipWhitespace(SvStream& rStream);
+ /// Instead of all whitespace, just skip CR and NL characters.
+ static void SkipLineBreaks(SvStream& rStream);
+ size_t GetObjectOffset(size_t nIndex) const;
+ const std::vector< std::unique_ptr<PDFElement> >& GetElements();
+ std::vector<PDFObjectElement*> GetPages();
+ /// Remember the end location of an EOF token.
+ void PushBackEOF(size_t nOffset);
+ /// Look up object based on object number, possibly by parsing object streams.
+ PDFObjectElement* LookupObject(size_t nObjectNumber);
+ /// Access to the input document, even after the input stream is gone.
+ SvMemoryStream& GetEditBuffer();
+ /// Tokenize elements from current offset.
+ bool Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< std::unique_ptr<PDFElement> >& rElements, PDFObjectElement* pObject);
+ /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
+ void SetIDObject(size_t nID, PDFObjectElement* pObject);
+ //@}
+
+ /// @name High-level functions, to be used by others.
+ //@{
+ /// Read elements from the start of the stream till its end.
+ bool Read(SvStream& rStream);
+ /// Sign the read document with xCertificate in the edit buffer.
+ bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, const OUString& rDescription, bool bAdES);
+ /// Serializes the contents of the edit buffer.
+ bool Write(SvStream& rStream);
+ /// Get a list of signatures embedded into this document.
+ std::vector<PDFObjectElement*> GetSignatureWidgets();
+ /**
+ * @param rInformation The actual result.
+ * @param bLast If this is the last signature in the file, so it covers the whole file physically.
+ * @return If we can determinate a result.
+ */
+ static bool ValidateSignature(SvStream& rStream, PDFObjectElement* pSignature, SignatureInformation& rInformation, bool bLast);
+ /// Remove the nth signature from read document in the edit buffer.
+ bool RemoveSignature(size_t nPosition);
+ //@}
+};
+
+} // namespace pdfio
+} // namespace xmlsecurity
+
+#endif // INCLUDED_XMLSECURITY_PDFIO_PDFDOCUMENT_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/xmlsecurity/xmlsecuritydllapi.h b/include/xmlsecurity/xmlsecuritydllapi.h
new file mode 100644
index 000000000000..48da5464b727
--- /dev/null
+++ b/include/xmlsecurity/xmlsecuritydllapi.h
@@ -0,0 +1,23 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_XMLSECURITY_XMLSECURITYDLLAPI_H
+#define INCLUDED_XMLSECURITY_XMLSECURITYDLLAPI_H
+
+#include <sal/types.h>
+
+#if defined(XMLSECURITY_DLLIMPLEMENTATION)
+#define XMLSECURITY_DLLPUBLIC SAL_DLLPUBLIC_EXPORT
+#else
+#define XMLSECURITY_DLLPUBLIC SAL_DLLPUBLIC_IMPORT
+#endif
+
+#endif // INCLUDED_XMLSECURITY_XMLSECURITYDLLAPI_H
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */