diff options
Diffstat (limited to 'sdext/source')
-rw-r--r-- | sdext/source/pdfimport/test/pdfunzip.cxx | 154 |
1 files changed, 152 insertions, 2 deletions
diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx b/sdext/source/pdfimport/test/pdfunzip.cxx index 0feaa268b760..c9e5ece0b3ac 100644 --- a/sdext/source/pdfimport/test/pdfunzip.cxx +++ b/sdext/source/pdfimport/test/pdfunzip.cxx @@ -48,12 +48,22 @@ using namespace pdfparse; void printHelp( const char* pExe ) { fprintf( stdout, - "USAGE: %s [-h,--help] [-a, --extract-add-streams] [-pw, --password <password>] <inputfile> [<outputfile>]\n" + "USAGE: %s [-h,--help]\n" + " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n" + " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n" + " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n" + " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n" " -h, --help: show help\n" " -a, --extract-add-streams: extracts additional streams to outputfile_object\n" " and prints the mimetype found to stdout\n" + " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n" + " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n" + " object numbers, where object number and generation number are separated by \':\'\n" + " an omitted generation number defaults to 0\n" " -pw, --password: use password for decryption\n" - , pExe ); + "\n" + "note: -f, -a, -o and normal unzip operation are mutually exclusive\n" + , pExe, pExe, pExe, pExe, pExe ); } class FileEmitContext : public EmitContext @@ -313,6 +323,119 @@ int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFi return nRet; } +int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile ) +{ + int nRet = 0; + unsigned int nElements = i_pPDFFile->m_aSubElements.size(); + for( unsigned i = 0; i < nElements && nRet == 0; i++ ) + { + // search FontDescriptors + PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]); + if( ! pObj ) + continue; + PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject); + if( ! pDict ) + continue; + + std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it = + pDict->m_aMap.find( "Type" ); + if( map_it == pDict->m_aMap.end() ) + continue; + + PDFName* pName = dynamic_cast<PDFName*>(map_it->second); + if( ! pName ) + continue; + if( ! pName->m_aName.equals( "FontDescriptor" ) ) + continue; + + // the font name will be helpful, also there must be one in + // a font descriptor + map_it = pDict->m_aMap.find( "FontName" ); + if( map_it == pDict->m_aMap.end() ) + continue; + pName = dynamic_cast<PDFName*>(map_it->second); + if( ! pName ) + continue; + rtl::OString aFontName( pName->m_aName ); + + PDFObjectRef* pStreamRef = 0; + const char* pFileType = NULL; + // we have a font descriptor, try for a type 1 font + map_it = pDict->m_aMap.find( "FontFile" ); + if( map_it != pDict->m_aMap.end() ) + { + pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second); + if( pStreamRef ) + pFileType = "pfa"; + } + + // perhaps it's a truetype file ? + if( ! pStreamRef ) + { + map_it = pDict->m_aMap.find( "FontFile2" ); + if( map_it != pDict->m_aMap.end() ) + { + pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second); + if( pStreamRef ) + pFileType = "ttf"; + } + } + + if( ! pStreamRef ) + continue; + + PDFObject* pStream = i_pPDFFile->findObject( pStreamRef ); + if( ! pStream ) + continue; + + rtl::OStringBuffer aOutStream( i_pOutFile ); + aOutStream.append( "_font_" ); + aOutStream.append( sal_Int32(pStreamRef->m_nNumber) ); + aOutStream.append( "_" ); + aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) ); + aOutStream.append( "_" ); + aOutStream.append( aFontName ); + if( pFileType ) + { + aOutStream.append( "." ); + aOutStream.append( pFileType ); + } + FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile ); + aContext.m_bDecrypt = i_pPDFFile->isEncrypted(); + pStream->writeStream( aContext, i_pPDFFile ); + } + return nRet; +} + +std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects; + +int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile ) +{ + int nRet = 0; + unsigned int nElements = s_aEmitObjects.size(); + for( unsigned i = 0; i < nElements && nRet == 0; i++ ) + { + sal_Int32 nObject = s_aEmitObjects[i].first; + sal_Int32 nGeneration = s_aEmitObjects[i].second; + PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration ); + if( ! pStream ) + { + fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration ); + continue; + } + + rtl::OStringBuffer aOutStream( i_pOutFile ); + aOutStream.append( "_stream_" ); + aOutStream.append( nObject ); + aOutStream.append( "_" ); + aOutStream.append( nGeneration ); + FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile ); + aContext.m_bDecrypt = i_pPDFFile->isEncrypted(); + pStream->writeStream( aContext, i_pPDFFile ); + } + return nRet; +} + SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv ) { const char* pInFile = NULL; @@ -347,6 +470,33 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv ) { aHdl = write_addStreams; } + else if( ! rtl_str_compare( "-f", argv[nArg] ) || + ! rtl_str_compare( "--extract-fonts", argv[nArg] ) ) + { + aHdl = write_fonts; + } + else if( ! rtl_str_compare( "-o", argv[nArg] ) || + ! rtl_str_compare( "--extract-objects", argv[nArg] ) ) + { + aHdl = write_objects; + nArg++; + if( nArg < argc ) + { + rtl::OString aObjs( argv[nArg] ); + sal_Int32 nIndex = 0; + while( nIndex != -1 ) + { + rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) ); + sal_Int32 nObject = 0; + sal_Int32 nGeneration = 0; + sal_Int32 nGenIndex = 0; + nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32(); + if( nGenIndex != -1 ) + nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32(); + s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) ); + } + } + } else { fprintf( stderr, "unrecognized option \"%s\"\n", |