summaryrefslogtreecommitdiff
path: root/sdext/source/pdfimport/inc/pdfparse.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'sdext/source/pdfimport/inc/pdfparse.hxx')
-rw-r--r--sdext/source/pdfimport/inc/pdfparse.hxx314
1 files changed, 314 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx b/sdext/source/pdfimport/inc/pdfparse.hxx
new file mode 100644
index 000000000000..1cfd82e9c4cd
--- /dev/null
+++ b/sdext/source/pdfimport/inc/pdfparse.hxx
@@ -0,0 +1,314 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+#ifndef INCLUDED_PDFI_PDFPARSE_HXX
+#define INCLUDED_PDFI_PDFPARSE_HXX
+
+#include <sal/types.h>
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+
+#include <vector>
+#include <boost/unordered_map.hpp>
+
+namespace pdfparse
+{
+
+struct EmitImplData;
+struct PDFContainer;
+class EmitContext
+{
+ public:
+ virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
+ virtual unsigned int getCurPos() = 0;
+ virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
+ virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
+
+ EmitContext( const PDFContainer* pTop = NULL );
+ virtual ~EmitContext();
+
+ // set this to deflate contained streams
+ bool m_bDeflate;
+ // set this to decrypt the PDF file
+ bool m_bDecrypt;
+
+ private:
+ friend struct PDFEntry;
+ EmitImplData* m_pImplData;
+};
+
+struct PDFEntry
+{
+ PDFEntry() {}
+ virtual ~PDFEntry();
+
+ virtual bool emit( EmitContext& rWriteContext ) const = 0;
+ virtual PDFEntry* clone() const = 0;
+
+ protected:
+ EmitImplData* getEmitData( EmitContext& rContext ) const;
+ void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
+};
+
+struct PDFComment : public PDFEntry
+{
+ rtl::OString m_aComment;
+
+ PDFComment( const rtl::OString& rComment )
+ : PDFEntry(), m_aComment( rComment ) {}
+ virtual ~PDFComment();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFValue : public PDFEntry
+{
+ // abstract base class for simple values
+ PDFValue() : PDFEntry() {}
+ virtual ~PDFValue();
+};
+
+struct PDFName : public PDFValue
+{
+ rtl::OString m_aName;
+
+ PDFName( const rtl::OString& rName )
+ : PDFValue(), m_aName( rName ) {}
+ virtual ~PDFName();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ rtl::OUString getFilteredName() const;
+};
+
+struct PDFString : public PDFValue
+{
+ rtl::OString m_aString;
+
+ PDFString( const rtl::OString& rString )
+ : PDFValue(), m_aString( rString ) {}
+ virtual ~PDFString();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ rtl::OString getFilteredString() const;
+};
+
+struct PDFNumber : public PDFValue
+{
+ double m_fValue;
+
+ PDFNumber( double fVal )
+ : PDFValue(), m_fValue( fVal ) {}
+ virtual ~PDFNumber();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFBool : public PDFValue
+{
+ bool m_bValue;
+
+ PDFBool( bool bVal )
+ : PDFValue(), m_bValue( bVal ) {}
+ virtual ~PDFBool();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFObjectRef : public PDFValue
+{
+ unsigned int m_nNumber;
+ unsigned int m_nGeneration;
+
+ PDFObjectRef( unsigned int nNr, unsigned int nGen )
+ : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
+ virtual ~PDFObjectRef();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFNull : public PDFValue
+{
+ PDFNull() {}
+ virtual ~PDFNull();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFObject;
+struct PDFContainer : public PDFEntry
+{
+ sal_Int32 m_nOffset;
+ std::vector<PDFEntry*> m_aSubElements;
+
+ // this is an abstract base class for identifying
+ // entries that can contain sub elements besides comments
+ PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
+ virtual ~PDFContainer();
+ virtual bool emitSubElements( EmitContext& rWriteContext ) const;
+ virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
+
+ PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
+ PDFObject* findObject( PDFObjectRef* pRef ) const
+ { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
+};
+
+struct PDFArray : public PDFContainer
+{
+ PDFArray() {}
+ virtual ~PDFArray();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFDict : public PDFContainer
+{
+ typedef boost::unordered_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
+ Map m_aMap;
+
+ PDFDict() {}
+ virtual ~PDFDict();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ // inserting a value of NULL will remove rName and the previous value
+ // from the dictionary
+ void insertValue( const rtl::OString& rName, PDFEntry* pValue );
+ // removes a name/value pair from the dict
+ void eraseValue( const rtl::OString& rName );
+ // builds new map as of sub elements
+ // returns NULL if successfull, else the first offending element
+ PDFEntry* buildMap();
+};
+
+struct PDFStream : public PDFEntry
+{
+ unsigned int m_nBeginOffset;
+ unsigned int m_nEndOffset; // offset of the byte after the stream
+ PDFDict* m_pDict;
+
+ PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
+ : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
+ virtual ~PDFStream();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
+};
+
+struct PDFTrailer : public PDFContainer
+{
+ PDFDict* m_pDict;
+
+ PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
+ virtual ~PDFTrailer();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+struct PDFFileImplData;
+struct PDFFile : public PDFContainer
+{
+ private:
+ mutable PDFFileImplData* m_pData;
+ PDFFileImplData* impl_getData() const;
+ public:
+ unsigned int m_nMajor; // PDF major
+ unsigned int m_nMinor; // PDF minor
+
+ PDFFile()
+ : PDFContainer(),
+ m_pData( NULL ),
+ m_nMajor( 0 ), m_nMinor( 0 )
+ {}
+ virtual ~PDFFile();
+
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ bool isEncrypted() const;
+ // this method checks whether rPwd is compatible with
+ // either user or owner password and sets up decrypt data in that case
+ // returns true if decryption can be done
+ bool setupDecryptionData( const rtl::OString& rPwd ) const;
+
+ bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
+ sal_uInt8* pOutBuffer,
+ unsigned int nObject, unsigned int nGeneration ) const;
+
+ rtl::OUString getDecryptionKey() const;
+};
+
+struct PDFObject : public PDFContainer
+{
+ PDFEntry* m_pObject;
+ PDFStream* m_pStream;
+ unsigned int m_nNumber;
+ unsigned int m_nGeneration;
+
+ PDFObject( unsigned int nNr, unsigned int nGen )
+ : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
+ virtual ~PDFObject();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+
+ // writes only the contained stream, deflated if necessary
+ bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
+
+ private:
+ // returns true if stream is deflated
+ // fills *ppStream and *pBytes with start of stream and count of bytes
+ // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
+ // fills in NULL and 0 in case of error
+ bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
+};
+
+struct PDFPart : public PDFContainer
+{
+ PDFPart() : PDFContainer() {}
+ virtual ~PDFPart();
+ virtual bool emit( EmitContext& rWriteContext ) const;
+ virtual PDFEntry* clone() const;
+};
+
+class PDFReader
+{
+ public:
+ PDFReader() {}
+ ~PDFReader() {}
+
+ PDFEntry* read( const char* pFileName );
+ PDFEntry* read( const char* pBuffer, unsigned int nLen );
+};
+
+} // namespace
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */