summaryrefslogtreecommitdiff
path: root/sdext/source
diff options
context:
space:
mode:
Diffstat (limited to 'sdext/source')
-rw-r--r--sdext/source/pdfimport/test/pdfunzip.cxx154
1 files changed, 152 insertions, 2 deletions
diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx b/sdext/source/pdfimport/test/pdfunzip.cxx
index 0feaa268b760..c9e5ece0b3ac 100644
--- a/sdext/source/pdfimport/test/pdfunzip.cxx
+++ b/sdext/source/pdfimport/test/pdfunzip.cxx
@@ -48,12 +48,22 @@ using namespace pdfparse;
void printHelp( const char* pExe )
{
fprintf( stdout,
- "USAGE: %s [-h,--help] [-a, --extract-add-streams] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
+ "USAGE: %s [-h,--help]\n"
+ " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
+ " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
+ " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
+ " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
" -h, --help: show help\n"
" -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
" and prints the mimetype found to stdout\n"
+ " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
+ " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
+ " object numbers, where object number and generation number are separated by \':\'\n"
+ " an omitted generation number defaults to 0\n"
" -pw, --password: use password for decryption\n"
- , pExe );
+ "\n"
+ "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
+ , pExe, pExe, pExe, pExe, pExe );
}
class FileEmitContext : public EmitContext
@@ -313,6 +323,119 @@ int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFi
return nRet;
}
+int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
+{
+ int nRet = 0;
+ unsigned int nElements = i_pPDFFile->m_aSubElements.size();
+ for( unsigned i = 0; i < nElements && nRet == 0; i++ )
+ {
+ // search FontDescriptors
+ PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
+ if( ! pObj )
+ continue;
+ PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
+ if( ! pDict )
+ continue;
+
+ std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it =
+ pDict->m_aMap.find( "Type" );
+ if( map_it == pDict->m_aMap.end() )
+ continue;
+
+ PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
+ if( ! pName )
+ continue;
+ if( ! pName->m_aName.equals( "FontDescriptor" ) )
+ continue;
+
+ // the font name will be helpful, also there must be one in
+ // a font descriptor
+ map_it = pDict->m_aMap.find( "FontName" );
+ if( map_it == pDict->m_aMap.end() )
+ continue;
+ pName = dynamic_cast<PDFName*>(map_it->second);
+ if( ! pName )
+ continue;
+ rtl::OString aFontName( pName->m_aName );
+
+ PDFObjectRef* pStreamRef = 0;
+ const char* pFileType = NULL;
+ // we have a font descriptor, try for a type 1 font
+ map_it = pDict->m_aMap.find( "FontFile" );
+ if( map_it != pDict->m_aMap.end() )
+ {
+ pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
+ if( pStreamRef )
+ pFileType = "pfa";
+ }
+
+ // perhaps it's a truetype file ?
+ if( ! pStreamRef )
+ {
+ map_it = pDict->m_aMap.find( "FontFile2" );
+ if( map_it != pDict->m_aMap.end() )
+ {
+ pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
+ if( pStreamRef )
+ pFileType = "ttf";
+ }
+ }
+
+ if( ! pStreamRef )
+ continue;
+
+ PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
+ if( ! pStream )
+ continue;
+
+ rtl::OStringBuffer aOutStream( i_pOutFile );
+ aOutStream.append( "_font_" );
+ aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
+ aOutStream.append( "_" );
+ aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
+ aOutStream.append( "_" );
+ aOutStream.append( aFontName );
+ if( pFileType )
+ {
+ aOutStream.append( "." );
+ aOutStream.append( pFileType );
+ }
+ FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
+ aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
+ pStream->writeStream( aContext, i_pPDFFile );
+ }
+ return nRet;
+}
+
+std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
+
+int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
+{
+ int nRet = 0;
+ unsigned int nElements = s_aEmitObjects.size();
+ for( unsigned i = 0; i < nElements && nRet == 0; i++ )
+ {
+ sal_Int32 nObject = s_aEmitObjects[i].first;
+ sal_Int32 nGeneration = s_aEmitObjects[i].second;
+ PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
+ if( ! pStream )
+ {
+ fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
+ continue;
+ }
+
+ rtl::OStringBuffer aOutStream( i_pOutFile );
+ aOutStream.append( "_stream_" );
+ aOutStream.append( nObject );
+ aOutStream.append( "_" );
+ aOutStream.append( nGeneration );
+ FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
+ aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
+ pStream->writeStream( aContext, i_pPDFFile );
+ }
+ return nRet;
+}
+
SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
{
const char* pInFile = NULL;
@@ -347,6 +470,33 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
{
aHdl = write_addStreams;
}
+ else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
+ ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
+ {
+ aHdl = write_fonts;
+ }
+ else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
+ ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
+ {
+ aHdl = write_objects;
+ nArg++;
+ if( nArg < argc )
+ {
+ rtl::OString aObjs( argv[nArg] );
+ sal_Int32 nIndex = 0;
+ while( nIndex != -1 )
+ {
+ rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) );
+ sal_Int32 nObject = 0;
+ sal_Int32 nGeneration = 0;
+ sal_Int32 nGenIndex = 0;
+ nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
+ if( nGenIndex != -1 )
+ nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
+ s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
+ }
+ }
+ }
else
{
fprintf( stderr, "unrecognized option \"%s\"\n",