summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2023-01-23 12:32:25 +0900
committerTomaž Vajngerl <quikee@gmail.com>2023-01-24 10:50:34 +0000
commit217ef2ed9b8a757b7b02feac799621d20d0f312e (patch)
tree852a38dd06566ac94fb79899b7089ec2c120e3dc
parent5d135edd8843471f582a88f4e1a9a9b9fc0fd89b (diff)
pdfimport: refactor pdf and hybrid format detection code
Change-Id: I3421fbcc717a75377db887f567ce3bb9631a4f28 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/146052 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--sdext/source/pdfimport/filterdet.cxx333
1 files changed, 190 insertions, 143 deletions
diff --git a/sdext/source/pdfimport/filterdet.cxx b/sdext/source/pdfimport/filterdet.cxx
index 5c2abd6be90a..24bff08ccb6f 100644
--- a/sdext/source/pdfimport/filterdet.cxx
+++ b/sdext/source/pdfimport/filterdet.cxx
@@ -188,183 +188,230 @@ PDFDetector::PDFDetector( uno::Reference< uno::XComponentContext > xContext) :
m_xContext(std::move( xContext ))
{}
-// XExtendedFilterDetection
-OUString SAL_CALL PDFDetector::detect( uno::Sequence< beans::PropertyValue >& rFilterData )
+namespace
{
- osl::MutexGuard const guard( m_aMutex );
- bool bSuccess = false;
- // get the InputStream carrying the PDF content
- uno::Reference< io::XInputStream > xInput;
- uno::Reference< io::XStream > xEmbedStream;
- OUString aOutFilterName, aOutTypeName;
- OUString aURL;
- OUString aPwd;
+sal_Int32 fillAttributes(uno::Sequence<beans::PropertyValue> const& rFilterData, uno::Reference<io::XInputStream>& xInput, OUString& aURL, sal_Int32& nFilterNamePos, sal_Int32& nPasswordPos, OUString& aPassword)
+{
const beans::PropertyValue* pAttribs = rFilterData.getConstArray();
sal_Int32 nAttribs = rFilterData.getLength();
- sal_Int32 nFilterNamePos = -1;
- sal_Int32 nPwdPos = -1;
- for( sal_Int32 i = 0; i < nAttribs; i++ )
+ for (sal_Int32 i = 0; i < nAttribs; i++)
{
OUString aVal( "<no string>" );
pAttribs[i].Value >>= aVal;
- SAL_INFO( "sdext.pdfimport", "doDetection: Attrib: " + pAttribs[i].Name + " = " + aVal);
+ SAL_INFO("sdext.pdfimport", "doDetection: Attrib: " + pAttribs[i].Name + " = " + aVal);
- if ( pAttribs[i].Name == "InputStream" )
+ if (pAttribs[i].Name == "InputStream")
pAttribs[i].Value >>= xInput;
- else if ( pAttribs[i].Name == "URL" )
+ else if (pAttribs[i].Name == "URL")
pAttribs[i].Value >>= aURL;
- else if ( pAttribs[i].Name == "FilterName" )
+ else if (pAttribs[i].Name == "FilterName")
nFilterNamePos = i;
- else if ( pAttribs[i].Name == "Password" )
+ else if (pAttribs[i].Name == "Password")
{
- nPwdPos = i;
- pAttribs[i].Value >>= aPwd;
+ nPasswordPos = i;
+ pAttribs[i].Value >>= aPassword;
}
}
- if( xInput.is() )
+ return nAttribs;
+}
+
+// read the first 1024 byte (see PDF reference implementation note 12)
+constexpr const sal_Int32 constHeaderSize = 1024;
+
+bool detectPDF(uno::Reference<io::XInputStream> const& xInput, uno::Sequence<sal_Int8>& aHeader, sal_uInt64& nHeaderReadSize)
+{
+ try
{
- oslFileHandle aFile = nullptr;
- try {
- uno::Reference< io::XSeekable > xSeek( xInput, uno::UNO_QUERY );
- if( xSeek.is() )
- xSeek->seek( 0 );
- // read the first 1024 byte (see PDF reference implementation note 12)
- const sal_Int32 nHeaderSize = 1024;
- uno::Sequence< sal_Int8 > aBuf( nHeaderSize );
- sal_uInt64 nBytes = xInput->readBytes( aBuf, nHeaderSize );
- if( nBytes > 5 )
+ uno::Reference<io::XSeekable> xSeek(xInput, uno::UNO_QUERY);
+ if (xSeek.is())
+ xSeek->seek(0);
+
+ nHeaderReadSize = xInput->readBytes(aHeader, constHeaderSize);
+ if (nHeaderReadSize <= 5)
+ return false;
+
+ const sal_Int8* pBytes = aHeader.getConstArray();
+ for (sal_uInt64 i = 0; i < nHeaderReadSize - 5; i++)
+ {
+ if (pBytes[i+0] == '%' &&
+ pBytes[i+1] == 'P' &&
+ pBytes[i+2] == 'D' &&
+ pBytes[i+3] == 'F' &&
+ pBytes[i+4] == '-')
{
- const sal_Int8* pBytes = aBuf.getConstArray();
- for( sal_uInt64 i = 0; i < nBytes-5; i++ )
- {
- if( pBytes[i] == '%' &&
- pBytes[i+1] == 'P' &&
- pBytes[i+2] == 'D' &&
- pBytes[i+3] == 'F' &&
- pBytes[i+4] == '-' )
- {
- bSuccess = true;
- break;
- }
- }
+ return true;
}
+ }
+ }
+ catch (const css::io::IOException &)
+ {
+ TOOLS_WARN_EXCEPTION("sdext.pdfimport", "caught");
+ }
+ return false;
+}
+
+bool copyToTemp(uno::Reference<io::XInputStream> const& xInput, oslFileHandle& rFileHandle, uno::Sequence<sal_Int8> const& aHeader, sal_uInt64 nHeaderReadSize)
+{
+ try
+ {
+ sal_uInt64 nWritten = 0;
+ osl_writeFile(rFileHandle, aHeader.getConstArray(), nHeaderReadSize, &nWritten);
+
+ const sal_uInt64 nBufferSize = 4096;
+ uno::Sequence<sal_Int8> aBuffer(nBufferSize);
- // check for hybrid PDF
- if( bSuccess &&
- ( aURL.isEmpty() || !comphelper::isFileUrl(aURL) )
- )
+ // copy the bytes
+ sal_uInt64 nRead = 0;
+ do
+ {
+ nRead = xInput->readBytes(aBuffer, nBufferSize);
+ if (nRead > 0)
{
- sal_uInt64 nWritten = 0;
- if( osl_createTempFile( nullptr, &aFile, &aURL.pData ) != osl_File_E_None )
- {
- bSuccess = false;
- }
- else
- {
- SAL_INFO( "sdext.pdfimport", "created temp file " + aURL );
+ osl_writeFile(rFileHandle, aBuffer.getConstArray(), nRead, &nWritten);
+ if (nWritten != nRead)
+ return false;
+ }
+ }
+ while (nRead == nBufferSize);
+ }
+ catch (const css::io::IOException &)
+ {
+ TOOLS_WARN_EXCEPTION("sdext.pdfimport", "caught");
+ }
+ return false;
+}
- osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
+} // end anonymous namespace
- SAL_WARN_IF( nWritten != nBytes, "sdext.pdfimport", "writing of header bytes failed" );
+// XExtendedFilterDetection
+OUString SAL_CALL PDFDetector::detect( uno::Sequence< beans::PropertyValue >& rFilterData )
+{
+ osl::MutexGuard const guard( m_aMutex );
+ bool bSuccess = false;
- if( nWritten == nBytes )
- {
- const sal_uInt32 nBufSize = 4096;
- aBuf = uno::Sequence<sal_Int8>(nBufSize);
- // copy the bytes
- do
- {
- nBytes = xInput->readBytes( aBuf, nBufSize );
- if( nBytes > 0 )
- {
- osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
- if( nWritten != nBytes )
- {
- bSuccess = false;
- break;
- }
- }
- } while( nBytes == nBufSize );
- }
- }
- osl_closeFile( aFile );
- }
- } catch (const css::io::IOException &) {
- TOOLS_WARN_EXCEPTION("sdext.pdfimport", "caught");
- return OUString();
+ // get the InputStream carrying the PDF content
+ uno::Reference<io::XInputStream> xInput;
+ uno::Reference<io::XStream> xEmbedStream;
+ OUString aOutFilterName;
+ OUString aOutTypeName;
+ OUString aURL;
+ OUString aPassword;
+
+ sal_Int32 nFilterNamePos = -1;
+ sal_Int32 nPasswordPos = -1;
+ sal_Int32 nAttribs = fillAttributes(rFilterData, xInput, aURL, nFilterNamePos, nPasswordPos, aPassword);
+
+ if (!xInput.is())
+ return OUString();
+
+
+ uno::Sequence<sal_Int8> aHeader(constHeaderSize);
+ sal_uInt64 nHeaderReadSize = 0;
+ bSuccess = detectPDF(xInput, aHeader, nHeaderReadSize);
+
+ if (!bSuccess)
+ return OUString();
+
+ oslFileHandle aFileHandle = nullptr;
+
+ // check for hybrid PDF
+ if (bSuccess && (aURL.isEmpty() || !comphelper::isFileUrl(aURL)))
+ {
+ if (osl_createTempFile(nullptr, &aFileHandle, &aURL.pData) != osl_File_E_None)
+ {
+ bSuccess = false;
}
- OUString aEmbedMimetype;
- xEmbedStream = getAdditionalStream( aURL, aEmbedMimetype, aPwd, m_xContext, rFilterData, false );
- if( aFile )
- osl_removeFile( aURL.pData );
- if( !aEmbedMimetype.isEmpty() )
+ else
{
- if( aEmbedMimetype == "application/vnd.oasis.opendocument.text"
- || aEmbedMimetype == "application/vnd.oasis.opendocument.text-master" )
- aOutFilterName = "writer_pdf_addstream_import";
- else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.presentation" )
- aOutFilterName = "impress_pdf_addstream_import";
- else if( aEmbedMimetype == "application/vnd.oasis.opendocument.graphics"
- || aEmbedMimetype == "application/vnd.oasis.opendocument.drawing" )
- aOutFilterName = "draw_pdf_addstream_import";
- else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.spreadsheet" )
- aOutFilterName = "calc_pdf_addstream_import";
+ SAL_INFO( "sdext.pdfimport", "created temp file " + aURL);
+ bSuccess = copyToTemp(xInput, aFileHandle, aHeader, nHeaderReadSize);
}
+ osl_closeFile(aFileHandle);
}
- if( bSuccess )
+ if (!bSuccess)
{
- if( !aOutFilterName.isEmpty() )
+ if (aFileHandle)
+ osl_removeFile(aURL.pData);
+ return OUString();
+ }
+
+ OUString aEmbedMimetype;
+ xEmbedStream = getAdditionalStream(aURL, aEmbedMimetype, aPassword, m_xContext, rFilterData, false);
+
+ if (aFileHandle)
+ osl_removeFile(aURL.pData);
+
+ if (!aEmbedMimetype.isEmpty())
+ {
+ if( aEmbedMimetype == "application/vnd.oasis.opendocument.text"
+ || aEmbedMimetype == "application/vnd.oasis.opendocument.text-master" )
+ aOutFilterName = "writer_pdf_addstream_import";
+ else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.presentation" )
+ aOutFilterName = "impress_pdf_addstream_import";
+ else if( aEmbedMimetype == "application/vnd.oasis.opendocument.graphics"
+ || aEmbedMimetype == "application/vnd.oasis.opendocument.drawing" )
+ aOutFilterName = "draw_pdf_addstream_import";
+ else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.spreadsheet" )
+ aOutFilterName = "calc_pdf_addstream_import";
+ }
+
+ if (!bSuccess)
+ return OUString();
+
+ if (!aOutFilterName.isEmpty())
+ {
+ if( nFilterNamePos == -1 )
{
- if( nFilterNamePos == -1 )
- {
- nFilterNamePos = nAttribs;
- rFilterData.realloc( ++nAttribs );
- rFilterData.getArray()[ nFilterNamePos ].Name = "FilterName";
- }
- auto pFilterData = rFilterData.getArray();
- aOutTypeName = "pdf_Portable_Document_Format";
+ nFilterNamePos = nAttribs;
+ rFilterData.realloc( ++nAttribs );
+ rFilterData.getArray()[ nFilterNamePos ].Name = "FilterName";
+ }
+ auto pFilterData = rFilterData.getArray();
+ aOutTypeName = "pdf_Portable_Document_Format";
- pFilterData[nFilterNamePos].Value <<= aOutFilterName;
- if( xEmbedStream.is() )
- {
- rFilterData.realloc( ++nAttribs );
- pFilterData = rFilterData.getArray();
- pFilterData[nAttribs-1].Name = "EmbeddedSubstream";
- pFilterData[nAttribs-1].Value <<= xEmbedStream;
- }
- if( !aPwd.isEmpty() )
- {
- if( nPwdPos == -1 )
- {
- nPwdPos = nAttribs;
- rFilterData.realloc( ++nAttribs );
- pFilterData = rFilterData.getArray();
- pFilterData[ nPwdPos ].Name = "Password";
- }
- pFilterData[ nPwdPos ].Value <<= aPwd;
- }
+ pFilterData[nFilterNamePos].Value <<= aOutFilterName;
+ if( xEmbedStream.is() )
+ {
+ rFilterData.realloc( ++nAttribs );
+ pFilterData = rFilterData.getArray();
+ pFilterData[nAttribs-1].Name = "EmbeddedSubstream";
+ pFilterData[nAttribs-1].Value <<= xEmbedStream;
}
- else
+ if (!aPassword.isEmpty())
{
- css::beans::PropertyValue* pFilterData;
- if( nFilterNamePos == -1 )
+ if (nPasswordPos == -1)
{
- nFilterNamePos = nAttribs;
- rFilterData.realloc( ++nAttribs );
+ nPasswordPos = nAttribs;
+ rFilterData.realloc(++nAttribs);
pFilterData = rFilterData.getArray();
- pFilterData[ nFilterNamePos ].Name = "FilterName";
+ pFilterData[nPasswordPos].Name = "Password";
}
- else
- pFilterData = rFilterData.getArray();
+ pFilterData[nPasswordPos].Value <<= aPassword;
+ }
+ }
+ else
+ {
+ css::beans::PropertyValue* pFilterData;
+ if( nFilterNamePos == -1 )
+ {
+ nFilterNamePos = nAttribs;
+ rFilterData.realloc( ++nAttribs );
+ pFilterData = rFilterData.getArray();
+ pFilterData[ nFilterNamePos ].Name = "FilterName";
+ }
+ else
+ pFilterData = rFilterData.getArray();
- const sal_Int32 nDocumentType = 0; //const sal_Int32 nDocumentType = queryDocumentTypeDialog(m_xContext,aURL);
- if( nDocumentType < 0 )
- {
- return OUString();
- }
- else switch( nDocumentType )
+ const sal_Int32 nDocumentType = 0; //const sal_Int32 nDocumentType = queryDocumentTypeDialog(m_xContext,aURL);
+ if( nDocumentType < 0 )
+ {
+ return OUString();
+ }
+ else
+ {
+ switch (nDocumentType)
{
case 0:
pFilterData[nFilterNamePos].Value <<= OUString( "draw_pdf_import" );
@@ -381,9 +428,9 @@ OUString SAL_CALL PDFDetector::detect( uno::Sequence< beans::PropertyValue >& rF
default:
assert(!"Unexpected case");
}
-
- aOutTypeName = "pdf_Portable_Document_Format";
}
+
+ aOutTypeName = "pdf_Portable_Document_Format";
}
return aOutTypeName;