summaryrefslogtreecommitdiff
path: root/sdext/source/pdfimport/pdfparse
diff options
context:
space:
mode:
Diffstat (limited to 'sdext/source/pdfimport/pdfparse')
-rw-r--r--sdext/source/pdfimport/pdfparse/makefile.mk58
-rw-r--r--sdext/source/pdfimport/pdfparse/pdfentries.cxx1364
-rw-r--r--sdext/source/pdfimport/pdfparse/pdfparse.cxx701
3 files changed, 2123 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/pdfparse/makefile.mk b/sdext/source/pdfimport/pdfparse/makefile.mk
new file mode 100644
index 000000000000..db549581e6f5
--- /dev/null
+++ b/sdext/source/pdfimport/pdfparse/makefile.mk
@@ -0,0 +1,58 @@
+#*************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# Copyright 2000, 2010 Oracle and/or its affiliates.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org. If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#*************************************************************************
+
+PRJ=..$/..$/..
+
+PRJNAME=sdext
+TARGET=pdfparse
+ENABLE_EXCEPTIONS=TRUE
+EXTERNAL_WARNINGS_NOT_ERRORS=TRUE
+
+# --- Settings -----------------------------------------------------
+
+.INCLUDE : settings.mk
+
+.IF "$(SYSTEM_ZLIB)" == "YES"
+CFLAGS+=-DSYSTEM_ZLIB
+.ENDIF
+
+ENVCFLAGS += -DBOOST_SPIRIT_USE_OLD_NAMESPACE
+
+# --- Files --------------------------------------------------------
+
+SLOFILES=\
+ $(SLO)$/pdfparse.obj \
+ $(SLO)$/pdfentries.obj
+
+# --- Targets ------------------------------------------------------
+
+.IF "$(ENABLE_PDFIMPORT)" == "NO"
+@all:
+ @echo "PDF Import extension disabled."
+.ENDIF
+
+.INCLUDE : target.mk
diff --git a/sdext/source/pdfimport/pdfparse/pdfentries.cxx b/sdext/source/pdfimport/pdfparse/pdfentries.cxx
new file mode 100644
index 000000000000..a2ff6b996ff2
--- /dev/null
+++ b/sdext/source/pdfimport/pdfparse/pdfentries.cxx
@@ -0,0 +1,1364 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_sdext.hxx"
+
+#include <pdfparse.hxx>
+
+#include <rtl/strbuf.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/alloc.h>
+#include <rtl/digest.h>
+#include <rtl/cipher.h>
+#include <rtl/memory.h>
+#ifdef SYSTEM_ZLIB
+#include "zlib.h"
+#else
+#include <zlib/zlib.h>
+#endif
+
+#include <math.h>
+#include <map>
+
+#include <stdio.h>
+
+using namespace rtl;
+
+namespace pdfparse
+{
+
+struct EmitImplData
+{
+ // xref table: maps object number to a pair of (generation, buffer offset)
+ typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
+ XRefTable m_aXRefTable;
+ // container of all indirect objects (usually a PDFFile*)
+ const PDFContainer* m_pObjectContainer;
+
+ // returns true if the xref table was updated
+ bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
+ {
+ XRefTable::iterator it = m_aXRefTable.find( nObject );
+ if( it == m_aXRefTable.end() )
+ {
+ // new entry
+ m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
+ return true;
+ }
+ // update old entry, if generation number is higher
+ if( it->second.first < nGeneration )
+ {
+ it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
+ return true;
+ }
+ return false;
+ }
+
+ EmitImplData( const PDFContainer* pTopContainer ) :
+ m_pObjectContainer( pTopContainer )
+ {}
+ ~EmitImplData() {}
+ bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
+ unsigned int nObject, unsigned int nGeneration ) const
+ {
+ const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
+ return pFile ? pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration ) : false;
+ }
+};
+
+}
+
+using namespace pdfparse;
+
+EmitContext::EmitContext( const PDFContainer* pTop ) :
+ m_bDeflate( false ),
+ m_bDecrypt( false ),
+ m_pImplData( NULL )
+{
+ if( pTop )
+ m_pImplData = new EmitImplData( pTop );
+}
+
+EmitContext::~EmitContext()
+{
+ delete m_pImplData;
+}
+
+PDFEntry::~PDFEntry()
+{
+}
+
+EmitImplData* PDFEntry::getEmitData( EmitContext& rContext ) const
+{
+ return rContext.m_pImplData;
+}
+
+void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const
+{
+ if( rContext.m_pImplData && rContext.m_pImplData != pNewEmitData )
+ delete rContext.m_pImplData;
+ rContext.m_pImplData = pNewEmitData;
+}
+
+PDFValue::~PDFValue()
+{
+}
+
+PDFComment::~PDFComment()
+{
+}
+
+bool PDFComment::emit( EmitContext& rWriteContext ) const
+{
+ return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
+}
+
+PDFEntry* PDFComment::clone() const
+{
+ return new PDFComment( m_aComment );
+}
+
+PDFName::~PDFName()
+{
+}
+
+bool PDFName::emit( EmitContext& rWriteContext ) const
+{
+ if( ! rWriteContext.write( " /", 2 ) )
+ return false;
+ return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
+}
+
+PDFEntry* PDFName::clone() const
+{
+ return new PDFName( m_aName );
+}
+
+OUString PDFName::getFilteredName() const
+{
+ OStringBuffer aFilter( m_aName.getLength() );
+ const sal_Char* pStr = m_aName.getStr();
+ unsigned int nLen = m_aName.getLength();
+ for( unsigned int i = 0; i < nLen; i++ )
+ {
+ if( pStr[i] == '#' && i < nLen - 3 )
+ {
+ sal_Char rResult = 0;
+ i++;
+ if( pStr[i] >= '0' && pStr[i] <= '9' )
+ rResult = sal_Char( pStr[i]-'0' ) << 4;
+ else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
+ rResult = sal_Char( pStr[i]-'a' + 10 ) << 4;
+ else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
+ rResult = sal_Char( pStr[i]-'A' + 10 ) << 4;
+ i++;
+ if( pStr[i] >= '0' && pStr[i] <= '9' )
+ rResult |= sal_Char( pStr[i]-'0' );
+ else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
+ rResult |= sal_Char( pStr[i]-'a' + 10 );
+ else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
+ rResult |= sal_Char( pStr[i]-'A' + 10 );
+ aFilter.append( rResult );
+ }
+ else
+ aFilter.append( pStr[i] );
+ }
+ return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
+}
+
+PDFString::~PDFString()
+{
+}
+
+bool PDFString::emit( EmitContext& rWriteContext ) const
+{
+ if( ! rWriteContext.write( " ", 1 ) )
+ return false;
+ return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
+}
+
+PDFEntry* PDFString::clone() const
+{
+ return new PDFString( m_aString );
+}
+
+OString PDFString::getFilteredString() const
+{
+ int nLen = m_aString.getLength();
+ OStringBuffer aBuf( nLen );
+
+ const sal_Char* pStr = m_aString.getStr();
+ if( *pStr == '(' )
+ {
+ const sal_Char* pRun = pStr+1;
+ while( pRun - pStr < nLen-1 )
+ {
+ if( *pRun == '\\' )
+ {
+ pRun++;
+ if( pRun - pStr < nLen )
+ {
+ sal_Char aEsc = 0;
+ if( *pRun == 'n' )
+ aEsc = '\n';
+ else if( *pRun == 'r' )
+ aEsc = '\r';
+ else if( *pRun == 't' )
+ aEsc = '\t';
+ else if( *pRun == 'b' )
+ aEsc = '\b';
+ else if( *pRun == 'f' )
+ aEsc = '\f';
+ else if( *pRun == '(' )
+ aEsc = '(';
+ else if( *pRun == ')' )
+ aEsc = ')';
+ else if( *pRun == '\\' )
+ aEsc = '\\';
+ else if( *pRun == '\n' )
+ {
+ pRun++;
+ continue;
+ }
+ else if( *pRun == '\r' )
+ {
+ pRun++;
+ if( *pRun == '\n' )
+ pRun++;
+ continue;
+ }
+ else
+ {
+ int i = 0;
+ while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
+ aEsc = 8*aEsc + (*pRun++ - '0');
+ // move pointer back to last character of octal sequence
+ pRun--;
+ }
+ aBuf.append( aEsc );
+ }
+ }
+ else
+ aBuf.append( *pRun );
+ // move pointer to next character
+ pRun++;
+ }
+ }
+ else if( *pStr == '<' )
+ {
+ const sal_Char* pRun = pStr+1;
+ while( *pRun != '>' && pRun - pStr < nLen )
+ {
+ sal_Char rResult = 0;
+ if( *pRun >= '0' && *pRun <= '9' )
+ rResult = sal_Char( *pRun-'0' ) << 4;
+ else if( *pRun >= 'a' && *pRun <= 'f' )
+ rResult = sal_Char( *pRun-'a' + 10 ) << 4;
+ else if( *pRun >= 'A' && *pRun <= 'F' )
+ rResult = sal_Char( *pRun-'A' + 10 ) << 4;
+ pRun++;
+ if( *pRun != '>' && pRun - pStr < nLen )
+ {
+ if( *pRun >= '0' && *pRun <= '9' )
+ rResult |= sal_Char( *pRun-'0' );
+ else if( *pRun >= 'a' && *pRun <= 'f' )
+ rResult |= sal_Char( *pRun-'a' + 10 );
+ else if( *pRun >= 'A' && *pRun <= 'F' )
+ rResult |= sal_Char( *pRun-'A' + 10 );
+ }
+ pRun++;
+ aBuf.append( rResult );
+ }
+ }
+
+ return aBuf.makeStringAndClear();
+}
+
+PDFNumber::~PDFNumber()
+{
+}
+
+bool PDFNumber::emit( EmitContext& rWriteContext ) const
+{
+ rtl::OStringBuffer aBuf( 32 );
+ aBuf.append( ' ' );
+
+ double fValue = m_fValue;
+ bool bNeg = false;
+ int nPrecision = 5;
+ if( fValue < 0.0 )
+ {
+ bNeg = true;
+ fValue=-fValue;
+ }
+
+ sal_Int64 nInt = (sal_Int64)fValue;
+ fValue -= (double)nInt;
+ // optimizing hardware may lead to a value of 1.0 after the subtraction
+ if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
+ {
+ nInt++;
+ fValue = 0.0;
+ }
+ sal_Int64 nFrac = 0;
+ if( fValue )
+ {
+ fValue *= pow( 10.0, (double)nPrecision );
+ nFrac = (sal_Int64)fValue;
+ }
+ if( bNeg && ( nInt || nFrac ) )
+ aBuf.append( '-' );
+ aBuf.append( nInt );
+ if( nFrac )
+ {
+ int i;
+ aBuf.append( '.' );
+ sal_Int64 nBound = (sal_Int64)(pow( 10.0, nPrecision - 1.0 )+0.5);
+ for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
+ {
+ sal_Int64 nNumb = nFrac / nBound;
+ nFrac -= nNumb * nBound;
+ aBuf.append( nNumb );
+ nBound /= 10;
+ }
+ }
+
+ return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
+}
+
+PDFEntry* PDFNumber::clone() const
+{
+ return new PDFNumber( m_fValue );
+}
+
+
+PDFBool::~PDFBool()
+{
+}
+
+bool PDFBool::emit( EmitContext& rWriteContext ) const
+{
+ return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
+}
+
+PDFEntry* PDFBool::clone() const
+{
+ return new PDFBool( m_bValue );
+}
+
+PDFNull::~PDFNull()
+{
+}
+
+bool PDFNull::emit( EmitContext& rWriteContext ) const
+{
+ return rWriteContext.write( " null", 5 );
+}
+
+PDFEntry* PDFNull::clone() const
+{
+ return new PDFNull();
+}
+
+
+PDFObjectRef::~PDFObjectRef()
+{
+}
+
+bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
+{
+ OStringBuffer aBuf( 16 );
+ aBuf.append( ' ' );
+ aBuf.append( sal_Int32( m_nNumber ) );
+ aBuf.append( ' ' );
+ aBuf.append( sal_Int32( m_nGeneration ) );
+ aBuf.append( " R", 2 );
+ return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
+}
+
+PDFEntry* PDFObjectRef::clone() const
+{
+ return new PDFObjectRef( m_nNumber, m_nGeneration );
+}
+
+PDFContainer::~PDFContainer()
+{
+ int nEle = m_aSubElements.size();
+ for( int i = 0; i < nEle; i++ )
+ delete m_aSubElements[i];
+}
+
+bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
+{
+ int nEle = m_aSubElements.size();
+ for( int i = 0; i < nEle; i++ )
+ {
+ if( ! m_aSubElements[i]->emit( rWriteContext ) )
+ return false;
+ }
+ return true;
+}
+
+void PDFContainer::cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const
+{
+ int nEle = m_aSubElements.size();
+ for( int i = 0; i < nEle; i++ )
+ rNewSubElements.push_back( m_aSubElements[i]->clone() );
+}
+
+PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
+{
+ unsigned int nEle = m_aSubElements.size();
+ for( unsigned int i = 0; i < nEle; i++ )
+ {
+ PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i]);
+ if( pObject &&
+ pObject->m_nNumber == nNumber &&
+ pObject->m_nGeneration == nGeneration )
+ {
+ return pObject;
+ }
+ }
+ return NULL;
+}
+
+PDFArray::~PDFArray()
+{
+}
+
+bool PDFArray::emit( EmitContext& rWriteContext ) const
+{
+ if( ! rWriteContext.write( "[", 1 ) )
+ return false;
+ if( ! emitSubElements( rWriteContext ) )
+ return false;
+ return rWriteContext.write( "]", 1 );
+}
+
+PDFEntry* PDFArray::clone() const
+{
+ PDFArray* pNewAr = new PDFArray();
+ cloneSubElements( pNewAr->m_aSubElements );
+ return pNewAr;
+}
+
+PDFDict::~PDFDict()
+{
+}
+
+bool PDFDict::emit( EmitContext& rWriteContext ) const
+{
+ if( ! rWriteContext.write( "<<\n", 3 ) )
+ return false;
+ if( ! emitSubElements( rWriteContext ) )
+ return false;
+ return rWriteContext.write( "\n>>\n", 4 );
+}
+
+void PDFDict::insertValue( const OString& rName, PDFEntry* pValue )
+{
+ if( ! pValue )
+ eraseValue( rName );
+
+ std::hash_map<OString,PDFEntry*,OStringHash>::iterator it = m_aMap.find( rName );
+ if( it == m_aMap.end() )
+ {
+ // new name/value, pair, append it
+ m_aSubElements.push_back( new PDFName( rName ) );
+ m_aSubElements.push_back( pValue );
+ }
+ else
+ {
+ unsigned int nSub = m_aSubElements.size();
+ for( unsigned int i = 0; i < nSub; i++ )
+ if( m_aSubElements[i] == it->second )
+ m_aSubElements[i] = pValue;
+ delete it->second;
+ }
+ m_aMap[ rName ] = pValue;
+}
+
+void PDFDict::eraseValue( const OString& rName )
+{
+ unsigned int nEle = m_aSubElements.size();
+ for( unsigned int i = 0; i < nEle; i++ )
+ {
+ PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i]);
+ if( pName && pName->m_aName.equals( rName ) )
+ {
+ for( unsigned int j = i+1; j < nEle; j++ )
+ {
+ if( dynamic_cast<PDFComment*>(m_aSubElements[j]) == NULL )
+ {
+ // free name and value
+ delete m_aSubElements[j];
+ delete m_aSubElements[i];
+ // remove subelements from vector
+ m_aSubElements.erase( m_aSubElements.begin()+j );
+ m_aSubElements.erase( m_aSubElements.begin()+i );
+ buildMap();
+ return;
+ }
+ }
+ }
+ }
+}
+
+PDFEntry* PDFDict::buildMap()
+{
+ // clear map
+ m_aMap.clear();
+ // build map
+ unsigned int nEle = m_aSubElements.size();
+ PDFName* pName = NULL;
+ for( unsigned int i = 0; i < nEle; i++ )
+ {
+ if( dynamic_cast<PDFComment*>(m_aSubElements[i]) == NULL )
+ {
+ if( pName )
+ {
+ m_aMap[ pName->m_aName ] = m_aSubElements[i];
+ pName = NULL;
+ }
+ else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i])) == NULL )
+ return m_aSubElements[i];
+ }
+ }
+ return pName;
+}
+
+PDFEntry* PDFDict::clone() const
+{
+ PDFDict* pNewDict = new PDFDict();
+ cloneSubElements( pNewDict->m_aSubElements );
+ pNewDict->buildMap();
+ return pNewDict;
+}
+
+PDFStream::~PDFStream()
+{
+}
+
+bool PDFStream::emit( EmitContext& rWriteContext ) const
+{
+ return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
+}
+
+PDFEntry* PDFStream::clone() const
+{
+ return new PDFStream( m_nBeginOffset, m_nEndOffset, NULL );
+}
+
+unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
+{
+ if( ! m_pDict )
+ return 0;
+ // find /Length entry, can either be a direct or indirect number object
+ std::hash_map<OString,PDFEntry*,OStringHash>::const_iterator it =
+ m_pDict->m_aMap.find( "Length" );
+ if( it == m_pDict->m_aMap.end() )
+ return 0;
+ PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
+ if( ! pNum && pContainer )
+ {
+ PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
+ if( pRef )
+ {
+ int nEle = pContainer->m_aSubElements.size();
+ for( int i = 0; i < nEle && ! pNum; i++ )
+ {
+ PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i]);
+ if( pObj &&
+ pObj->m_nNumber == pRef->m_nNumber &&
+ pObj->m_nGeneration == pRef->m_nGeneration )
+ {
+ if( pObj->m_pObject )
+ pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
+ break;
+ }
+ }
+ }
+ }
+ return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
+}
+
+PDFObject::~PDFObject()
+{
+}
+
+bool PDFObject::getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
+{
+ bool bIsDeflated = false;
+ if( m_pStream && m_pStream->m_pDict &&
+ m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
+ )
+ {
+ unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
+ *ppStream = static_cast<char*>(rtl_allocateMemory( nOuterStreamLen ));
+ if( ! ppStream )
+ {
+ *pBytes = 0;
+ return false;
+ }
+ unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, *ppStream );
+ if( nRead != nOuterStreamLen )
+ {
+ rtl_freeMemory( *ppStream );
+ *ppStream = NULL;
+ *pBytes = 0;
+ return false;
+ }
+ // is there a filter entry ?
+ std::hash_map<OString,PDFEntry*,OStringHash>::const_iterator it =
+ m_pStream->m_pDict->m_aMap.find( "Filter" );
+ if( it != m_pStream->m_pDict->m_aMap.end() )
+ {
+ PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
+ if( ! pFilter )
+ {
+ PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
+ if( pArray && ! pArray->m_aSubElements.empty() )
+ {
+ pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front());
+ }
+ }
+
+ // is the (first) filter FlateDecode ?
+ if( pFilter && pFilter->m_aName.equals( "FlateDecode" ) )
+ {
+ bIsDeflated = true;
+ }
+ }
+ // prepare compressed data section
+ char* pStream = *ppStream;
+ if( pStream[0] == 's' )
+ pStream += 6; // skip "stream"
+ // skip line end after "stream"
+ while( *pStream == '\r' || *pStream == '\n' )
+ pStream++;
+ // get the compressed length
+ *pBytes = m_pStream->getDictLength( pObjectContainer );
+ if( pStream != *ppStream )
+ rtl_moveMemory( *ppStream, pStream, *pBytes );
+ if( rContext.m_bDecrypt )
+ {
+ EmitImplData* pEData = getEmitData( rContext );
+ pEData->decrypt( reinterpret_cast<const sal_uInt8*>(*ppStream),
+ *pBytes,
+ reinterpret_cast<sal_uInt8*>(*ppStream),
+ m_nNumber,
+ m_nGeneration
+ ); // decrypt inplace
+ }
+ }
+ else
+ *ppStream = NULL, *pBytes = 0;
+ return bIsDeflated;
+}
+
+static void unzipToBuffer( const char* pBegin, unsigned int nLen,
+ sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
+{
+ z_stream aZStr;
+ aZStr.next_in = (Bytef*)pBegin;
+ aZStr.avail_in = nLen;
+ aZStr.zalloc = ( alloc_func )0;
+ aZStr.zfree = ( free_func )0;
+ aZStr.opaque = ( voidpf )0;
+ inflateInit(&aZStr);
+
+ const unsigned int buf_increment_size = 16384;
+
+ *pOutBuf = (sal_uInt8*)rtl_reallocateMemory( *pOutBuf, buf_increment_size );
+ aZStr.next_out = (Bytef*)*pOutBuf;
+ aZStr.avail_out = buf_increment_size;
+ int err = Z_OK;
+ *pOutLen = buf_increment_size;
+ while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
+ {
+ err = inflate( &aZStr, Z_NO_FLUSH );
+ if( aZStr.avail_out == 0 )
+ {
+ if( err != Z_STREAM_END )
+ {
+ const int nNewAlloc = *pOutLen + buf_increment_size;
+ *pOutBuf = (sal_uInt8*)rtl_reallocateMemory( *pOutBuf, nNewAlloc );
+ aZStr.next_out = (Bytef*)(*pOutBuf + *pOutLen);
+ aZStr.avail_out = buf_increment_size;
+ *pOutLen = nNewAlloc;
+ }
+ }
+ }
+ if( err == Z_STREAM_END )
+ {
+ if( aZStr.avail_out > 0 )
+ *pOutLen -= aZStr.avail_out;;
+ }
+ inflateEnd(&aZStr);
+ if( err < Z_OK )
+ {
+ rtl_freeMemory( *pOutBuf );
+ *pOutBuf = NULL;
+ *pOutLen = 0;
+ }
+}
+
+bool PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
+{
+ bool bSuccess = false;
+ if( m_pStream )
+ {
+ char* pStream = NULL;
+ unsigned int nBytes = 0;
+ if( getDeflatedStream( &pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes )
+ {
+ sal_uInt8* pOutBytes = NULL;
+ sal_uInt32 nOutBytes = 0;
+ unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
+ rWriteContext.write( pOutBytes, nOutBytes );
+ rtl_freeMemory( pOutBytes );
+ }
+ else if( pStream && nBytes )
+ rWriteContext.write( pStream, nBytes );
+ rtl_freeMemory( pStream );
+ }
+ return bSuccess;
+}
+
+bool PDFObject::emit( EmitContext& rWriteContext ) const
+{
+ if( ! rWriteContext.write( "\n", 1 ) )
+ return false;
+
+ EmitImplData* pEData = getEmitData( rWriteContext );
+ if( pEData )
+ pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
+
+ OStringBuffer aBuf( 32 );
+ aBuf.append( sal_Int32( m_nNumber ) );
+ aBuf.append( ' ' );
+ aBuf.append( sal_Int32( m_nGeneration ) );
+ aBuf.append( " obj\n" );
+ if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+ return false;
+
+ if( rWriteContext.m_bDeflate && pEData )
+ {
+ char* pStream = NULL;
+ unsigned int nBytes = 0;
+ if( getDeflatedStream( &pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext )
+ && pStream && nBytes )
+ {
+ // unzip the stream
+ sal_uInt8* pOutBytes = NULL;
+ sal_uInt32 nOutBytes = 0;
+ unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
+ if( nOutBytes )
+ {
+ // clone this object
+ PDFObject* pClone = static_cast<PDFObject*>(clone());
+ // set length in the dictionary to new stream length
+ PDFNumber* pNewLen = new PDFNumber( double(nOutBytes) );
+ pClone->m_pStream->m_pDict->insertValue( "Length", pNewLen );
+ // delete flatedecode filter
+ pClone->m_pStream->m_pDict->eraseValue( "Filter" );
+
+ // write sub elements except stream
+ bool bRet = true;
+ unsigned int nEle = pClone->m_aSubElements.size();
+ for( unsigned int i = 0; i < nEle && bRet; i++ )
+ {
+ if( pClone->m_aSubElements[i] != pClone->m_pStream )
+ bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
+ }
+ delete pClone;
+ // write stream
+ if( bRet )
+ rWriteContext.write( "stream\n", 7 );
+ if( bRet )
+ bRet = rWriteContext.write( pOutBytes, nOutBytes );
+ if( bRet )
+ bRet = rWriteContext.write( "\nendstream\nendobj\n", 18 );
+ rtl_freeMemory( pStream );
+ rtl_freeMemory( pOutBytes );
+ return bRet;
+ }
+ rtl_freeMemory( pOutBytes );
+ }
+ rtl_freeMemory( pStream );
+ }
+
+ if( ! emitSubElements( rWriteContext ) )
+ return false;
+ return rWriteContext.write( "\nendobj\n", 8 );
+}
+
+PDFEntry* PDFObject::clone() const
+{
+ PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
+ cloneSubElements( pNewOb->m_aSubElements );
+ unsigned int nEle = m_aSubElements.size();
+ for( unsigned int i = 0; i < nEle; i++ )
+ {
+ if( m_aSubElements[i] == m_pObject )
+ pNewOb->m_pObject = pNewOb->m_aSubElements[i];
+ else if( m_aSubElements[i] == m_pStream && pNewOb->m_pObject )
+ {
+ pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i]);
+ PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
+ if( pNewDict )
+ pNewOb->m_pStream->m_pDict = pNewDict;
+ }
+ }
+ return pNewOb;
+}
+
+PDFTrailer::~PDFTrailer()
+{
+}
+
+bool PDFTrailer::emit( EmitContext& rWriteContext ) const
+{
+ // get xref offset
+ unsigned int nXRefPos = rWriteContext.getCurPos();
+ // begin xref section, object 0 is always free
+ if( ! rWriteContext.write( "xref\r\n"
+ "0 1\r\n"
+ "0000000000 65535 f\r\n", 31 ) )
+ return false;
+ // check if we are emitting a complete PDF file
+ EmitImplData* pEData = getEmitData( rWriteContext );
+ if( pEData )
+ {
+ // emit object xrefs
+ const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
+ EmitImplData::XRefTable::const_iterator section_begin, section_end;
+ section_begin = rXRefs.begin();
+ while( section_begin != rXRefs.end() )
+ {
+ // find end of continuous object numbers
+ section_end = section_begin;
+ unsigned int nLast = section_begin->first;
+ while( (++section_end) != rXRefs.end() &&
+ section_end->first == nLast+1 )
+ nLast = section_end->first;
+ // write first object number and number of following entries
+ OStringBuffer aBuf( 21 );
+ aBuf.append( sal_Int32( section_begin->first ) );
+ aBuf.append( ' ' );
+ aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
+ aBuf.append( "\r\n" );
+ if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+ return false;
+ while( section_begin != section_end )
+ {
+ // write 20 char entry of form
+ // 0000offset 00gen n\r\n
+ aBuf.setLength( 0 );
+ OString aOffset( OString::valueOf( sal_Int64(section_begin->second.second ) ) );
+ int nPad = 10 - aOffset.getLength();
+ for( int i = 0; i < nPad; i++ )
+ aBuf.append( '0' );
+ aBuf.append( aOffset );
+ aBuf.append( ' ' );
+ OString aGeneration( OString::valueOf( sal_Int32(section_begin->second.first ) ) );
+ nPad = 5 - aGeneration.getLength();
+ for( int i = 0; i < nPad; i++ )
+ aBuf.append( '0' );
+ aBuf.append( aGeneration );
+ aBuf.append( " n\r\n" );
+ if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
+ return false;
+ ++section_begin;
+ }
+ }
+ }
+ if( ! rWriteContext.write( "trailer\n", 8 ) )
+ return false;
+ if( ! emitSubElements( rWriteContext ) )
+ return false;
+ if( ! rWriteContext.write( "startxref\n", 10 ) )
+ return false;
+ rtl::OString aOffset( rtl::OString::valueOf( sal_Int32(nXRefPos) ) );
+ if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
+ return false;
+ return rWriteContext.write( "\n%%EOF\n", 7 );
+}
+
+PDFEntry* PDFTrailer::clone() const
+{
+ PDFTrailer* pNewTr = new PDFTrailer();
+ cloneSubElements( pNewTr->m_aSubElements );
+ unsigned int nEle = m_aSubElements.size();
+ for( unsigned int i = 0; i < nEle; i++ )
+ {
+ if( m_aSubElements[i] == m_pDict )
+ {
+ pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i]);
+ break;
+ }
+ }
+ return pNewTr;
+}
+
+#define ENCRYPTION_KEY_LEN 16
+#define ENCRYPTION_BUF_LEN 32
+
+namespace pdfparse {
+struct PDFFileImplData
+{
+ bool m_bIsEncrypted;
+ bool m_bStandardHandler;
+ sal_uInt32 m_nAlgoVersion;
+ sal_uInt32 m_nStandardRevision;
+ sal_uInt32 m_nKeyLength;
+ sal_uInt8 m_aOEntry[32];
+ sal_uInt8 m_aUEntry[32];
+ sal_uInt32 m_nPEntry;
+ OString m_aDocID;
+ rtlCipher m_aCipher;
+ rtlDigest m_aDigest;
+
+ sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]; // maximum handled key length
+
+ PDFFileImplData() :
+ m_bIsEncrypted( false ),
+ m_bStandardHandler( false ),
+ m_nAlgoVersion( 0 ),
+ m_nStandardRevision( 0 ),
+ m_nKeyLength( 0 ),
+ m_nPEntry( 0 ),
+ m_aCipher( NULL ),
+ m_aDigest( NULL )
+ {
+ rtl_zeroMemory( m_aOEntry, sizeof( m_aOEntry ) );
+ rtl_zeroMemory( m_aUEntry, sizeof( m_aUEntry ) );
+ rtl_zeroMemory( m_aDecryptionKey, sizeof( m_aDecryptionKey ) );
+ }
+
+ ~PDFFileImplData()
+ {
+ if( m_aCipher )
+ rtl_cipher_destroyARCFOUR( m_aCipher );
+ if( m_aDigest )
+ rtl_digest_destroyMD5( m_aDigest );
+ }
+};
+}
+
+PDFFile::~PDFFile()
+{
+ if( m_pData )
+ delete m_pData;
+}
+
+bool PDFFile::isEncrypted() const
+{
+ return impl_getData()->m_bIsEncrypted;
+}
+
+bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
+ unsigned int nObject, unsigned int nGeneration ) const
+{
+ if( ! isEncrypted() )
+ return false;
+
+ if( ! m_pData->m_aCipher )
+ m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
+
+ // modify encryption key
+ sal_uInt32 i = m_pData->m_nKeyLength;
+ m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
+ m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
+ m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
+ m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
+ m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
+
+ sal_uInt8 aSum[ENCRYPTION_KEY_LEN];
+ rtl_digest_updateMD5( m_pData->m_aDigest, m_pData->m_aDecryptionKey, i );
+ rtl_digest_getMD5( m_pData->m_aDigest, aSum, sizeof( aSum ) );
+
+ if( i > 16 )
+ i = 16;
+
+ rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
+ rtl_Cipher_DirectionDecode,
+ aSum, i,
+ NULL, 0 );
+ if( aErr == rtl_Cipher_E_None )
+ aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+ pInBuffer, nLen,
+ pOutBuffer, nLen );
+ return aErr == rtl_Cipher_E_None;
+}
+
+static const sal_uInt8 nPadString[32] =
+{
+ 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
+ 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
+};
+
+static void pad_or_truncate_to_32( const OString& rStr, sal_Char* pBuffer )
+{
+ int nLen = rStr.getLength();
+ if( nLen > 32 )
+ nLen = 32;
+ const sal_Char* pStr = rStr.getStr();
+ rtl_copyMemory( pBuffer, pStr, nLen );
+ int i = 0;
+ while( nLen < 32 )
+ pBuffer[nLen++] = nPadString[i++];
+}
+
+// pass at least pData->m_nKeyLength bytes in
+static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData* pData, bool bComputeO )
+{
+ // see PDF reference 1.4 Algorithm 3.2
+ // encrypt pad string
+ sal_Char aPadPwd[ENCRYPTION_BUF_LEN];
+ pad_or_truncate_to_32( rPwd, aPadPwd );
+ rtl_digest_updateMD5( pData->m_aDigest, aPadPwd, sizeof( aPadPwd ) );
+ if( ! bComputeO )
+ {
+ rtl_digest_updateMD5( pData->m_aDigest, pData->m_aOEntry, 32 );
+ sal_uInt8 aPEntry[4];
+ aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
+ aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
+ aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
+ aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
+ rtl_digest_updateMD5( pData->m_aDigest, aPEntry, sizeof(aPEntry) );
+ rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
+ }
+ sal_uInt8 nSum[RTL_DIGEST_LENGTH_MD5];
+ rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
+ if( pData->m_nStandardRevision == 3 )
+ {
+ for( int i = 0; i < 50; i++ )
+ {
+ rtl_digest_updateMD5( pData->m_aDigest, nSum, sizeof(nSum) );
+ rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
+ }
+ }
+ sal_uInt32 nLen = pData->m_nKeyLength;
+ if( nLen > RTL_DIGEST_LENGTH_MD5 )
+ nLen = RTL_DIGEST_LENGTH_MD5;
+ rtl_copyMemory( pOutKey, nSum, nLen );
+ return nLen;
+}
+
+static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
+{
+ // see PDF reference 1.4 Algorithm 3.6
+ bool bValid = false;
+ sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
+ sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN];
+ rtl_zeroMemory( nEncryptedEntry, sizeof(nEncryptedEntry) );
+ sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
+ // save (at this time potential) decryption key for later use
+ rtl_copyMemory( pData->m_aDecryptionKey, aKey, nKeyLen );
+ if( pData->m_nStandardRevision == 2 )
+ {
+ // see PDF reference 1.4 Algorithm 3.4
+ // encrypt pad string
+ rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+ aKey, nKeyLen,
+ NULL, 0 );
+ rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
+ nEncryptedEntry, sizeof( nEncryptedEntry ) );
+ bValid = (rtl_compareMemory( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
+ }
+ else if( pData->m_nStandardRevision == 3 )
+ {
+ // see PDF reference 1.4 Algorithm 3.5
+ rtl_digest_updateMD5( pData->m_aDigest, nPadString, sizeof( nPadString ) );
+ rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
+ rtl_digest_getMD5( pData->m_aDigest, nEncryptedEntry, sizeof(nEncryptedEntry) );
+ rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+ aKey, sizeof(aKey), NULL, 0 );
+ rtl_cipher_encodeARCFOUR( pData->m_aCipher,
+ nEncryptedEntry, 16,
+ nEncryptedEntry, 16 ); // encrypt in place
+ for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
+ {
+ sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
+ for( sal_uInt32 j = 0; j < sizeof(aTempKey); j++ )
+ aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
+
+ rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+ aTempKey, sizeof(aTempKey), NULL, 0 );
+ rtl_cipher_encodeARCFOUR( pData->m_aCipher,
+ nEncryptedEntry, 16,
+ nEncryptedEntry, 16 ); // encrypt in place
+ }
+ bValid = (rtl_compareMemory( nEncryptedEntry, pData->m_aUEntry, 16 ) == 0);
+ }
+ return bValid;
+}
+
+bool PDFFile::setupDecryptionData( const OString& rPwd ) const
+{
+ if( !impl_getData()->m_bIsEncrypted )
+ return rPwd.getLength() == 0;
+
+ // check if we can handle this encryption at all
+ if( ! m_pData->m_bStandardHandler ||
+ m_pData->m_nAlgoVersion < 1 ||
+ m_pData->m_nAlgoVersion > 2 ||
+ m_pData->m_nStandardRevision < 2 ||
+ m_pData->m_nStandardRevision > 3 )
+ return false;
+
+ if( ! m_pData->m_aCipher )
+ m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
+ if( ! m_pData->m_aDigest )
+ m_pData->m_aDigest = rtl_digest_createMD5();
+
+ // first try user password
+ bool bValid = check_user_password( rPwd, m_pData );
+
+ if( ! bValid )
+ {
+ // try owner password
+ // see PDF reference 1.4 Algorithm 3.7
+ sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
+ sal_uInt8 nPwd[ENCRYPTION_BUF_LEN];
+ rtl_zeroMemory( nPwd, sizeof(nPwd) );
+ sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData, true );
+ if( m_pData->m_nStandardRevision == 2 )
+ {
+ rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
+ aKey, nKeyLen, NULL, 0 );
+ rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+ m_pData->m_aOEntry, 32,
+ nPwd, 32 );
+ }
+ else if( m_pData->m_nStandardRevision == 3 )
+ {
+ rtl_copyMemory( nPwd, m_pData->m_aOEntry, 32 );
+ for( int i = 19; i >= 0; i-- )
+ {
+ sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
+ for( unsigned int j = 0; j < sizeof(nTempKey); j++ )
+ nTempKey[j] = sal_uInt8(aKey[j] ^ i);
+ rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
+ nTempKey, nKeyLen, NULL, 0 );
+ rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+ nPwd, 32,
+ nPwd, 32 ); // decrypt inplace
+ }
+ }
+ bValid = check_user_password( OString( (sal_Char*)nPwd, 32 ), m_pData );
+ }
+
+ return bValid;
+}
+
+PDFFileImplData* PDFFile::impl_getData() const
+{
+ if( m_pData )
+ return m_pData;
+ m_pData = new PDFFileImplData();
+ // check for encryption dict in a trailer
+ unsigned int nElements = m_aSubElements.size();
+ while( nElements-- > 0 )
+ {
+ PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements]);
+ if( pTrailer && pTrailer->m_pDict )
+ {
+ // search doc id
+ PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
+ if( doc_id != pTrailer->m_pDict->m_aMap.end() )
+ {
+ PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
+ if( pArr && pArr->m_aSubElements.size() > 0 )
+ {
+ PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0]);
+ if( pStr )
+ m_pData->m_aDocID = pStr->getFilteredString();
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "DocId is <" );
+ for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
+ fprintf( stderr, "%.2x", (unsigned int)sal_uInt8(m_pData->m_aDocID.getStr()[i]) );
+ fprintf( stderr, ">\n" );
+ #endif
+ }
+ }
+ // search Encrypt entry
+ PDFDict::Map::iterator enc =
+ pTrailer->m_pDict->m_aMap.find( "Encrypt" );
+ if( enc != pTrailer->m_pDict->m_aMap.end() )
+ {
+ PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
+ if( ! pDict )
+ {
+ PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
+ if( pRef )
+ {
+ PDFObject* pObj = findObject( pRef );
+ if( pObj && pObj->m_pObject )
+ pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
+ }
+ }
+ if( pDict )
+ {
+ PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
+ PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
+ PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
+ PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
+ PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
+ PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
+ PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
+ if( filter != pDict->m_aMap.end() )
+ {
+ m_pData->m_bIsEncrypted = true;
+ m_pData->m_nKeyLength = 5;
+ if( version != pDict->m_aMap.end() )
+ {
+ PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
+ if( pNum )
+ m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
+ }
+ if( m_pData->m_nAlgoVersion >= 3 )
+ m_pData->m_nKeyLength = 16;
+ if( len != pDict->m_aMap.end() )
+ {
+ PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
+ if( pNum )
+ m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
+ }
+ PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
+ if( pFilter && pFilter->getFilteredName().equalsAscii( "Standard" ) )
+ m_pData->m_bStandardHandler = true;
+ if( o_ent != pDict->m_aMap.end() )
+ {
+ PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
+ if( pString )
+ {
+ OString aEnt = pString->getFilteredString();
+ if( aEnt.getLength() == 32 )
+ rtl_copyMemory( m_pData->m_aOEntry, aEnt.getStr(), 32 );
+ #if OSL_DEBUG_LEVEL > 1
+ else
+ {
+ fprintf( stderr, "O entry has length %d, should be 32 <", (int)aEnt.getLength() );
+ for( int i = 0; i < aEnt.getLength(); i++ )
+ fprintf( stderr, " %.2X", (unsigned int)sal_uInt8(aEnt.getStr()[i]) );
+ fprintf( stderr, ">\n" );
+ }
+ #endif
+ }
+ }
+ if( u_ent != pDict->m_aMap.end() )
+ {
+ PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
+ if( pString )
+ {
+ OString aEnt = pString->getFilteredString();
+ if( aEnt.getLength() == 32 )
+ rtl_copyMemory( m_pData->m_aUEntry, aEnt.getStr(), 32 );
+ #if OSL_DEBUG_LEVEL > 1
+ else
+ {
+ fprintf( stderr, "U entry has length %d, should be 32 <", (int)aEnt.getLength() );
+ for( int i = 0; i < aEnt.getLength(); i++ )
+ fprintf( stderr, " %.2X", (unsigned int)sal_uInt8(aEnt.getStr()[i]) );
+ fprintf( stderr, ">\n" );
+ }
+ #endif
+ }
+ }
+ if( r_ent != pDict->m_aMap.end() )
+ {
+ PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
+ if( pNum )
+ m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
+ }
+ if( p_ent != pDict->m_aMap.end() )
+ {
+ PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
+ if( pNum )
+ m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "p entry is %p\n", m_pData->m_nPEntry );
+ #endif
+ }
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "Encryption dict: sec handler: %s, version = %d, revision = %d, key length = %d\n",
+ pFilter ? OUStringToOString( pFilter->getFilteredName(), RTL_TEXTENCODING_UTF8 ).getStr() : "<unknown>",
+ (int)m_pData->m_nAlgoVersion, (int)m_pData->m_nStandardRevision, m_pData->m_nKeyLength );
+ #endif
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return m_pData;
+}
+
+bool PDFFile::emit( EmitContext& rWriteContext ) const
+{
+ setEmitData( rWriteContext, new EmitImplData( this ) );
+
+ OStringBuffer aBuf( 32 );
+ aBuf.append( "%PDF-" );
+ aBuf.append( sal_Int32( m_nMajor ) );
+ aBuf.append( '.' );
+ aBuf.append( sal_Int32( m_nMinor ) );
+ aBuf.append( "\n" );
+ if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+ return false;
+ return emitSubElements( rWriteContext );
+}
+
+PDFEntry* PDFFile::clone() const
+{
+ PDFFile* pNewFl = new PDFFile();
+ pNewFl->m_nMajor = m_nMajor;
+ pNewFl->m_nMinor = m_nMinor;
+ cloneSubElements( pNewFl->m_aSubElements );
+ return pNewFl;
+}
+
+PDFPart::~PDFPart()
+{
+}
+
+bool PDFPart::emit( EmitContext& rWriteContext ) const
+{
+ return emitSubElements( rWriteContext );
+}
+
+PDFEntry* PDFPart::clone() const
+{
+ PDFPart* pNewPt = new PDFPart();
+ cloneSubElements( pNewPt->m_aSubElements );
+ return pNewPt;
+}
+
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
new file mode 100644
index 000000000000..b3ffa64427dd
--- /dev/null
+++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
@@ -0,0 +1,701 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_sdext.hxx"
+
+#if defined __SUNPRO_CC
+#pragma disable_warn
+#elif defined _MSC_VER
+#pragma warning(push, 1)
+#endif
+
+#include "pdfparse.hxx"
+
+// workaround windows compiler: do not include multi_pass.hpp
+//#include <boost/spirit.hpp>
+#include <boost/spirit/include/classic_core.hpp>
+#include <boost/spirit/include/classic_utility.hpp>
+#include <boost/spirit/include/classic_error_handling.hpp>
+#include <boost/spirit/include/classic_file_iterator.hpp>
+#include <boost/bind.hpp>
+#include <string>
+
+#include <rtl/strbuf.hxx>
+#include <rtl/memory.h>
+#include <rtl/alloc.h>
+
+// disable warnings again because someone along the line has enabled them
+#if defined __SUNPRO_CC
+#pragma disable_warn
+#elif defined _MSC_VER
+#pragma warning(push, 1)
+#endif
+
+using namespace boost::spirit;
+using namespace rtl;
+using namespace pdfparse;
+
+class StringEmitContext : public EmitContext
+{
+ OStringBuffer m_aBuf;
+ public:
+ StringEmitContext() : EmitContext(), m_aBuf(256) {}
+ virtual ~StringEmitContext() {}
+ virtual bool write( const void* pBuf, unsigned int nLen ) throw()
+ {
+ m_aBuf.append( (const sal_Char*)pBuf, nLen );
+ return true;
+ }
+ virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
+ virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
+ { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
+ write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
+ virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
+ {
+ if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
+ {
+ rtl_copyMemory( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
+ return nLen;
+ }
+ return 0;
+ }
+
+ OString getString() { return m_aBuf.makeStringAndClear(); }
+};
+
+template< class iteratorT >
+class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
+{
+public:
+
+ PDFGrammar( const iteratorT& first )
+ : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
+ ~PDFGrammar()
+ {
+ if( !m_aObjectStack.empty() )
+ delete m_aObjectStack.front();
+ }
+
+ double m_fDouble;
+ std::vector< unsigned int > m_aUIntStack;
+ std::vector< PDFEntry* > m_aObjectStack;
+ rtl::OString m_aErrorString;
+ iteratorT m_aGlobalBegin;
+
+public:
+ struct pdf_string_parser
+ {
+ typedef nil_t result_t;
+ template <typename ScannerT>
+ std::ptrdiff_t
+ operator()(ScannerT const& scan, result_t& result) const
+ {
+ std::ptrdiff_t len = 0;
+
+ int nBraceLevel = 0;
+ while( ! scan.at_end() )
+ {
+ char c = *scan;
+ if( c == ')' )
+ {
+ nBraceLevel--;
+ if( nBraceLevel < 0 )
+ break;
+ }
+ else if( c == '(' )
+ nBraceLevel++;
+ else if( c == '\\' ) // ignore escaped braces
+ {
+ ++len;
+ ++scan;
+ if( scan.at_end() )
+ break;
+ }
+ ++len;
+ ++scan;
+ }
+ return scan.at_end() ? -1 : len;
+ }
+ };
+
+ template< typename ScannerT >
+ struct definition
+ {
+ definition( const PDFGrammar<iteratorT>& rSelf )
+ {
+ PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
+
+ // workaround workshop compiler: comment_p doesn't work
+ // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
+ comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
+
+ boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
+
+ // workaround workshop compiler: confix_p doesn't work
+ //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
+ stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
+
+ name = lexeme_d[
+ ch_p('/')
+ >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
+ [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
+
+ // workaround workshop compiler: confix_p doesn't work
+ //stringtype = ( confix_p("(",*anychar_p, ")") |
+ // confix_p("<",*xdigit_p, ">") )
+ // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
+
+ stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
+ ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
+ [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
+
+ null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
+
+ #ifdef USE_ASSIGN_ACTOR
+ objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> ch_p('R')
+ >> eps_p
+ )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
+ #else
+ objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> ch_p('R')
+ >> eps_p
+ )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
+ #endif
+
+ #ifdef USE_ASSIGN_ACTOR
+ simple_type = objectref | name |
+ ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
+ [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
+ | stringtype | boolean | null_object;
+ #else
+ simple_type = objectref | name |
+ ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
+ [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
+ | stringtype | boolean | null_object;
+ #endif
+
+ dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
+ dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
+
+ array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
+ array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
+
+ #ifdef USE_ASSIGN_ACTOR
+ object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
+ #else
+ object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
+ #endif
+ object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
+
+ xref = str_p( "xref" ) >> uint_p >> uint_p
+ >> lexeme_d[
+ +( repeat_p(10)[digit_p]
+ >> blank_p
+ >> repeat_p(5)[digit_p]
+ >> blank_p
+ >> ( ch_p('n') | ch_p('f') )
+ >> repeat_p(2)[space_p]
+ ) ];
+
+ dict_element= dict_begin | comment | simple_type
+ | array_begin | array_end | dict_end;
+
+ object = object_begin
+ >> *dict_element
+ >> !stream
+ >> object_end;
+
+ trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
+ >> *dict_element
+ >> str_p("startxref")
+ >> uint_p
+ >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
+
+ #ifdef USE_ASSIGN_ACTOR
+ pdfrule = ! (lexeme_d[
+ str_p( "%PDF-" )
+ >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> ch_p('.')
+ >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+ >> *((~ch_p('\r') & ~ch_p('\n')))
+ >> eol_p
+ ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
+ >> *( comment | object | ( xref >> trailer ) );
+ #else
+ pdfrule = ! (lexeme_d[
+ str_p( "%PDF-" )
+ >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> ch_p('.')
+ >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+ >> *((~ch_p('\r') & ~ch_p('\n')))
+ >> eol_p
+ ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
+ >> *( comment | object | ( xref >> trailer ) );
+ #endif
+ }
+ rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
+ objectref, array, value, dict_element, dict_begin, dict_end,
+ array_begin, array_end, object, object_begin, object_end,
+ xref, trailer, pdfrule;
+
+ const rule< ScannerT >& start() const { return pdfrule; }
+ };
+
+ #ifndef USE_ASSIGN_ACTOR
+ void push_back_action_uint( unsigned int i )
+ {
+ m_aUIntStack.push_back( i );
+ }
+ void assign_action_double( double d )
+ {
+ m_fDouble = d;
+ }
+ #endif
+
+ void parseError( const char* pMessage, iteratorT pLocation )
+ {
+ throw_( pLocation, pMessage );
+ }
+
+ rtl::OString iteratorToString( iteratorT first, iteratorT last ) const
+ {
+ rtl::OStringBuffer aStr( 32 );
+ while( first != last )
+ {
+ aStr.append( *first );
+ ++first;
+ }
+ return aStr.makeStringAndClear();
+ }
+
+ void haveFile( iteratorT pBegin, iteratorT /*pEnd*/ )
+ {
+ if( m_aObjectStack.empty() )
+ {
+ PDFFile* pFile = new PDFFile();
+ pFile->m_nMinor = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+ pFile->m_nMajor = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+ m_aObjectStack.push_back( pFile );
+ }
+ else
+ parseError( "found file header in unusual place", pBegin );
+ }
+
+ void pushComment( iteratorT first, iteratorT last )
+ {
+ // add a comment to the current stack element
+ PDFComment* pComment =
+ new PDFComment(iteratorToString(first,last));
+ if( m_aObjectStack.empty() )
+ m_aObjectStack.push_back( new PDFPart() );
+ PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+ if( pContainer == NULL )
+ parseError( "comment without container", first );
+ pContainer->m_aSubElements.push_back( pComment );
+ }
+
+ void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
+ {
+ PDFContainer* pContainer = NULL;
+ const char* pMsg = NULL;
+ if( ! m_aObjectStack.empty() &&
+ (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
+ {
+ if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
+ dynamic_cast<PDFArray*>(pContainer) == NULL )
+ {
+ PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
+ if( pObj )
+ {
+ if( pObj->m_pObject == NULL )
+ pObj->m_pObject = pNewValue;
+ else
+ {
+ pMsg = "second value for object";
+ pContainer = NULL;
+ }
+ }
+ else if( dynamic_cast<PDFDict*>(pNewValue) )
+ {
+ PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
+ if( pTrailer )
+ {
+ if( pTrailer->m_pDict == NULL )
+ pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
+ else
+ pContainer = NULL;
+ }
+ else
+ pContainer = NULL;
+ }
+ else
+ pContainer = NULL;
+ }
+ }
+ if( pContainer )
+ pContainer->m_aSubElements.push_back( pNewValue );
+ else
+ {
+ if( ! pMsg )
+ {
+ if( dynamic_cast<PDFContainer*>(pNewValue) )
+ pMsg = "array without container";
+ else
+ pMsg = "value without container";
+ }
+ delete pNewValue;
+ parseError( pMsg, pPos );
+ }
+ }
+
+ void pushName( iteratorT first, iteratorT last )
+ {
+ insertNewValue( new PDFName(iteratorToString(first,last)), first );
+ }
+
+ void pushDouble( iteratorT first, iteratorT /*last*/ )
+ {
+ insertNewValue( new PDFNumber(m_fDouble), first );
+ }
+
+ void pushString( iteratorT first, iteratorT last )
+ {
+ insertNewValue( new PDFString(iteratorToString(first,last)), first );
+ }
+
+ void pushBool( iteratorT first, iteratorT last )
+ {
+ insertNewValue( new PDFBool( (last-first == 4) ), first );
+ }
+
+ void pushNull( iteratorT first, iteratorT )
+ {
+ insertNewValue( new PDFNull(), first );
+ }
+
+
+ void beginObject( iteratorT first, iteratorT /*last*/ )
+ {
+ if( m_aObjectStack.empty() )
+ m_aObjectStack.push_back( new PDFPart() );
+
+ unsigned int nGeneration = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+ unsigned int nObject = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+
+ PDFObject* pObj = new PDFObject( nObject, nGeneration );
+ pObj->m_nOffset = first - m_aGlobalBegin;
+
+ PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+ if( pContainer &&
+ ( dynamic_cast<PDFFile*>(pContainer) ||
+ dynamic_cast<PDFPart*>(pContainer) ) )
+ {
+ pContainer->m_aSubElements.push_back( pObj );
+ m_aObjectStack.push_back( pObj );
+ }
+ else
+ parseError( "object in wrong place", first );
+ }
+
+ void endObject( iteratorT first, iteratorT )
+ {
+ if( m_aObjectStack.empty() )
+ parseError( "endobj without obj", first );
+ else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
+ parseError( "spurious endobj", first );
+ else
+ m_aObjectStack.pop_back();
+ }
+
+ void pushObjectRef( iteratorT first, iteratorT )
+ {
+ unsigned int nGeneration = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+ unsigned int nObject = m_aUIntStack.back();
+ m_aUIntStack.pop_back();
+ insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
+ }
+
+ void beginDict( iteratorT first, iteratorT )
+ {
+ PDFDict* pDict = new PDFDict();
+ pDict->m_nOffset = first - m_aGlobalBegin;
+
+ insertNewValue( pDict, first );
+ // will not come here if insertion fails (exception)
+ m_aObjectStack.push_back( pDict );
+ }
+ void endDict( iteratorT first, iteratorT )
+ {
+ PDFDict* pDict = NULL;
+ if( m_aObjectStack.empty() )
+ parseError( "dictionary end without begin", first );
+ else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
+ parseError( "spurious dictionary end", first );
+ else
+ m_aObjectStack.pop_back();
+
+ PDFEntry* pOffender = pDict->buildMap();
+ if( pOffender )
+ {
+ StringEmitContext aCtx;
+ aCtx.write( "offending dictionary element: ", 30 );
+ pOffender->emit( aCtx );
+ m_aErrorString = aCtx.getString();
+ parseError( m_aErrorString.getStr(), first );
+ }
+ }
+
+ void beginArray( iteratorT first, iteratorT )
+ {
+ PDFArray* pArray = new PDFArray();
+ pArray->m_nOffset = first - m_aGlobalBegin;
+
+ insertNewValue( pArray, first );
+ // will not come here if insertion fails (exception)
+ m_aObjectStack.push_back( pArray );
+ }
+
+ void endArray( iteratorT first, iteratorT )
+ {
+ if( m_aObjectStack.empty() )
+ parseError( "array end without begin", first );
+ else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
+ parseError( "spurious array end", first );
+ else
+ m_aObjectStack.pop_back();
+ }
+
+ void emitStream( iteratorT first, iteratorT last )
+ {
+ if( m_aObjectStack.empty() )
+ parseError( "stream without object", first );
+ PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
+ if( pObj && pObj->m_pObject )
+ {
+ if( pObj->m_pStream )
+ parseError( "multiple streams in object", first );
+
+ PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
+ if( pDict )
+ {
+ PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
+
+ pObj->m_pStream = pStream;
+ pObj->m_aSubElements.push_back( pStream );
+ }
+ }
+ else
+ parseError( "stream without object", first );
+ }
+
+ void beginTrailer( iteratorT first, iteratorT )
+ {
+ if( m_aObjectStack.empty() )
+ m_aObjectStack.push_back( new PDFPart() );
+
+ PDFTrailer* pTrailer = new PDFTrailer();
+ pTrailer->m_nOffset = first - m_aGlobalBegin;
+
+ PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+ if( pContainer &&
+ ( dynamic_cast<PDFFile*>(pContainer) ||
+ dynamic_cast<PDFPart*>(pContainer) ) )
+ {
+ pContainer->m_aSubElements.push_back( pTrailer );
+ m_aObjectStack.push_back( pTrailer );
+ }
+ else
+ parseError( "trailer in wrong place", first );
+ }
+
+ void endTrailer( iteratorT first, iteratorT )
+ {
+ if( m_aObjectStack.empty() )
+ parseError( "%%EOF without trailer", first );
+ else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
+ parseError( "spurious %%EOF", first );
+ else
+ m_aObjectStack.pop_back();
+ }
+};
+
+PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
+{
+ PDFGrammar<const char*> aGrammar( pBuffer );
+
+ try
+ {
+ boost::spirit::parse_info<const char*> aInfo =
+ boost::spirit::parse( pBuffer,
+ pBuffer+nLen,
+ aGrammar,
+ boost::spirit::space_p );
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n",
+ aInfo.stop, pBuffer, aInfo.stop - pBuffer,
+ aInfo.hit ? "true" : "false",
+ aInfo.full ? "true" : "false",
+ aInfo.length );
+ #endif
+ }
+ catch( parser_error<const char*, const char*>& rError )
+ {
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
+ rError.descriptor, rError.where - pBuffer );
+ unsigned int nElem = aGrammar.m_aObjectStack.size();
+ for( unsigned int i = 0; i < nElem; i++ )
+ {
+ fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
+ }
+ #endif
+ }
+
+ PDFEntry* pRet = NULL;
+ unsigned int nEntries = aGrammar.m_aObjectStack.size();
+ if( nEntries == 1 )
+ {
+ pRet = aGrammar.m_aObjectStack.back();
+ aGrammar.m_aObjectStack.pop_back();
+ }
+ #if OSL_DEBUG_LEVEL > 1
+ else if( nEntries > 1 )
+ fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
+ #endif
+
+ return pRet;
+}
+
+PDFEntry* PDFReader::read( const char* pFileName )
+{
+ #ifdef WIN32
+ /* #i106583#
+ since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
+ C++ stdlib istream_iterator does not allow "-" apparently
+ using spirit 2.0 doesn't work in our environment with the MSC
+
+ So for the time being bite the bullet and read the whole file.
+ FIXME: give Spirit 2.x another try when we upgrade boost again.
+ */
+ PDFEntry* pRet = NULL;
+ FILE* fp = fopen( pFileName, "rb" );
+ if( fp )
+ {
+ fseek( fp, 0, SEEK_END );
+ unsigned int nLen = (unsigned int)ftell( fp );
+ fseek( fp, 0, SEEK_SET );
+ char* pBuf = (char*)rtl_allocateMemory( nLen );
+ if( pBuf )
+ {
+ fread( pBuf, 1, nLen, fp );
+ pRet = read( pBuf, nLen );
+ rtl_freeMemory( pBuf );
+ }
+ fclose( fp );
+ }
+ return pRet;
+ #else
+ file_iterator<> file_start( pFileName );
+ if( ! file_start )
+ return NULL;
+ file_iterator<> file_end = file_start.make_end();
+ PDFGrammar< file_iterator<> > aGrammar( file_start );
+
+ try
+ {
+ boost::spirit::parse_info< file_iterator<> > aInfo =
+ boost::spirit::parse( file_start,
+ file_end,
+ aGrammar,
+ boost::spirit::space_p );
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "parseinfo: stop at offset = %d, hit = %s, full = %s, length = %d\n",
+ aInfo.stop - file_start,
+ aInfo.hit ? "true" : "false",
+ aInfo.full ? "true" : "false",
+ aInfo.length );
+ #endif
+ }
+ catch( parser_error< const char*, file_iterator<> >& rError )
+ {
+ #if OSL_DEBUG_LEVEL > 1
+ fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
+ rError.descriptor, rError.where - file_start );
+ unsigned int nElem = aGrammar.m_aObjectStack.size();
+ for( unsigned int i = 0; i < nElem; i++ )
+ {
+ fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
+ }
+ #endif
+ }
+
+ PDFEntry* pRet = NULL;
+ unsigned int nEntries = aGrammar.m_aObjectStack.size();
+ if( nEntries == 1 )
+ {
+ pRet = aGrammar.m_aObjectStack.back();
+ aGrammar.m_aObjectStack.pop_back();
+ }
+ #if OSL_DEBUG_LEVEL > 1
+ else if( nEntries > 1 )
+ {
+ fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
+ for( unsigned int i = 0; i < nEntries; i++ )
+ {
+ fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
+ PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
+ if( pObj )
+ fprintf( stderr, " -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration );
+ else
+ fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
+ }
+ }
+ #endif
+ return pRet;
+ #endif // WIN32
+}
+
+#if defined __SUNPRO_CC
+#pragma enable_warn
+#elif defined _MSC_VER
+#pragma warning(pop)
+#endif
+
+