From d401b4a40d093210d5a3a7f1f3b0379f8cafc173 Mon Sep 17 00:00:00 2001 From: Lionel Elie Mamane Date: Sun, 2 Jul 2017 11:28:09 +0200 Subject: factorisation du décodage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I0130945b5c616beaa3eaedb34e5ca77b1bd70547 --- connectivity/source/commontools/dbtools.cxx | 83 +++++++++++++++++++++ connectivity/source/drivers/dbase/DTable.cxx | 10 ++- include/connectivity/dbtools.hxx | 45 ++++++++++- sc/CppunitTest_sc_ucalc.mk | 1 + sc/Library_sc.mk | 1 + sc/source/ui/unoobj/filtuno.cxx | 107 ++++++--------------------- 6 files changed, 159 insertions(+), 88 deletions(-) diff --git a/connectivity/source/commontools/dbtools.cxx b/connectivity/source/commontools/dbtools.cxx index 97316e5e2536..fa34331d85f9 100644 --- a/connectivity/source/commontools/dbtools.cxx +++ b/connectivity/source/commontools/dbtools.cxx @@ -2026,6 +2026,89 @@ OSQLColumns::Vector::const_iterator find(OSQLColumns::Vector::const_iterator fir ++first; return first; } + +namespace dbase +{ + bool dbfDecodeCharset(rtl_TextEncoding &_out_encoding, sal_uInt8 nType, sal_uInt8 nCodepage) + { + switch (nType) + { + case dBaseIII: + case dBaseIV: + case dBaseV: + case VisualFoxPro: + case VisualFoxProAuto: + case dBaseFS: + case dBaseFSMemo: + case dBaseIVMemoSQL: + case dBaseIIIMemo: + case FoxProMemo: + { + if (nCodepage != 0x00) + { + auto eEncoding(RTL_TEXTENCODING_DONTKNOW); + switch(nCodepage) + { + case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437 + case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850 + case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252 + case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh + case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852 + case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866 + case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865 + case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS + //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS + //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS + case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G) + case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS + case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada + case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese + case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul) + case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese + case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis) + case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai + case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew + case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic + case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh + case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh + case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh + case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250 + case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows + case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows + case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows + case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic + } + if(eEncoding != RTL_TEXTENCODING_DONTKNOW) + { + _out_encoding = eEncoding; + return true; + } + } + } + } + return false; + } + + bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream) + { + sal_uInt8 nType=0; + dbf_Stream->ReadUChar( nType ); + + dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29); + if (dbf_Stream->IsEof()) + { + return false; + } + else + { + sal_uInt8 nEncoding=0; + dbf_Stream->ReadUChar( nEncoding ); + return dbfDecodeCharset(nCharSet, nType, nEncoding); + } + } + +} + } //namespace connectivity /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/connectivity/source/drivers/dbase/DTable.cxx b/connectivity/source/drivers/dbase/DTable.cxx index 51b400300566..3e14a594bbb7 100644 --- a/connectivity/source/drivers/dbase/DTable.cxx +++ b/connectivity/source/drivers/dbase/DTable.cxx @@ -249,9 +249,13 @@ void ODbaseTable::readHeader() case dBaseIVMemoSQL: case dBaseIIIMemo: case FoxProMemo: - // TODO: check that the code removed here is not needed when opening a DBF file - // from something else than Calc. E.g. Base. If it is, then factorise it into - // a function accessible from connectivity and from sc + m_pFileStream->SetEndian(SvStreamEndian::LITTLE); + if( getConnection()->isTextEncodingDefaulted() && + !dbfDecodeCharset(m_eEncoding, nType, m_aHeader.trailer[17])) + { + m_eEncoding = RTL_TEXTENCODING_IBM_850; + } + break; case dBaseIVMemo: m_pFileStream->SetEndian(SvStreamEndian::LITTLE); break; diff --git a/include/connectivity/dbtools.hxx b/include/connectivity/dbtools.hxx index 7f4719bd41c8..4b493ee98267 100644 --- a/include/connectivity/dbtools.hxx +++ b/include/connectivity/dbtools.hxx @@ -27,6 +27,7 @@ #include #include #include +#include namespace com { namespace sun { namespace star { @@ -786,9 +787,51 @@ namespace dbtools OUStringBuffer& _out_rSQLPredicate ); - } // namespace dbtools +namespace connectivity::dbase +{ + enum DBFType { dBaseIII = 0x03, + dBaseIV = 0x04, + dBaseV = 0x05, + VisualFoxPro = 0x30, + VisualFoxProAuto = 0x31, // Visual FoxPro with AutoIncrement field + dBaseFS = 0x43, + dBaseFSMemo = 0xB3, + dBaseIIIMemo = 0x83, + dBaseIVMemo = 0x8B, + dBaseIVMemoSQL = 0x8E, + FoxProMemo = 0xF5 + }; + + /** decode a DBase file's codepage byte to a RTL charset + @param _out_nCharset + in case of success, the decoded RTL charset is written there. + else, this is not written to. + @param nType + the file's type byte + @param nCodepage + the file's codepage byte + @return + true if a RTL charset was successfully decoded and written to _out_nCharset + false if nothing was written to _out_nCharset + */ + OOO_DLLPUBLIC_DBTOOLS bool dbfDecodeCharset(rtl_TextEncoding &_out_nCharset, sal_uInt8 nType, sal_uInt8 nCodepage); + + /** decode a DBase file's codepage byte to a RTL charset + @param _out_nCharset + in case of success, the decoded RTL charset is written there. + else, this is not written to. + @param dbf_Stream + pointer to a SvStream encapsulating the DBase file. + The stream will be rewinded and read from. + No guarantee is made on its position afterwards. Caller must reposition it itself. + @return + true if a RTL charset was successfully decoded and written to _out_nCharset + false if nothing was written to _out_nCharset + */ + OOO_DLLPUBLIC_DBTOOLS bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream); +} // namespace connectivity::dbase #endif // INCLUDED_CONNECTIVITY_DBTOOLS_HXX diff --git a/sc/CppunitTest_sc_ucalc.mk b/sc/CppunitTest_sc_ucalc.mk index d918182a0c4f..d423f6c73a7b 100644 --- a/sc/CppunitTest_sc_ucalc.mk +++ b/sc/CppunitTest_sc_ucalc.mk @@ -45,6 +45,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_ucalc, \ comphelper \ cppu \ cppuhelper \ + dbtools \ drawinglayer \ editeng \ for \ diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk index 5b7d2fcf9275..c9f54638e62e 100644 --- a/sc/Library_sc.mk +++ b/sc/Library_sc.mk @@ -58,6 +58,7 @@ $(eval $(call gb_Library_use_libraries,sc,\ comphelper \ cppu \ cppuhelper \ + dbtools \ drawinglayer \ editeng \ for \ diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx index de16f6b28f55..b2da4408e874 100644 --- a/sc/source/ui/unoobj/filtuno.cxx +++ b/sc/source/ui/unoobj/filtuno.cxx @@ -22,6 +22,7 @@ #include #include #include +#include #include "editutil.hxx" #include "filtuno.hxx" @@ -44,6 +45,7 @@ using namespace com::sun::star; using namespace com::sun::star::uno; +using namespace connectivity::dbase; #define SCFILTEROPTIONSOBJ_SERVICE "com.sun.star.ui.dialogs.FilterOptionsDialog" #define SCFILTEROPTIONSOBJ_IMPLNAME "com.sun.star.comp.Calc.FilterOptionsDialog" @@ -84,97 +86,34 @@ namespace charsetSource load_CharSet(rtl_TextEncoding &nCharSet, bool bExport, SvStream* dbf_Stream) { + if (dbfReadCharset(nCharSet, dbf_Stream)) { - sal_uInt8 nType=0; - dbf_Stream->ReadUChar( nType ); - - switch (nType) - { - case dBaseIII: - case dBaseIV: - case dBaseV: - case VisualFoxPro: - case VisualFoxProAuto: - case dBaseFS: - case dBaseFSMemo: - case dBaseIVMemoSQL: - case dBaseIIIMemo: - case FoxProMemo: - dbf_Stream->SetEndian(SvStreamEndian::LITTLE); - - dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29); - if (! dbf_Stream->IsEof()) - { - sal_uInt8 nEncoding=0; - dbf_Stream->ReadUChar( nEncoding ); - if (nEncoding != 0x00) - { - auto eEncoding(RTL_TEXTENCODING_DONTKNOW); - switch(nEncoding) - { - case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437 - case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850 - case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252 - case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh - case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852 - case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866 - case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865 - case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS - //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS - //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS - case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G) - case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS - case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada - case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese - case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul) - case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese - case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis) - case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai - case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew - case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic - case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh - case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh - case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh - case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250 - case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows - case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows - case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows - case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic - } - if(eEncoding != RTL_TEXTENCODING_DONTKNOW) - { - nCharSet = eEncoding; - return charsetSource::charset_from_file; - } - } - } - } + return charsetSource::charset_from_file; } - { - Sequence aValues; - const Any *pProperties; - Sequence aNames { DBF_CHAR_SET }; - ScLinkConfigItem aItem( OUString::createFromAscii( - bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) ); - aValues = aItem.GetProperties( aNames ); - pProperties = aValues.getConstArray(); + Sequence aValues; + const Any *pProperties; + Sequence aNames { DBF_CHAR_SET }; + ScLinkConfigItem aItem( OUString::createFromAscii( + bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) ); + + aValues = aItem.GetProperties( aNames ); + pProperties = aValues.getConstArray(); - if( pProperties[0].hasValue() ) + if( pProperties[0].hasValue() ) + { + sal_Int32 nChar = 0; + pProperties[0] >>= nChar; + if( nChar >= 0) { - sal_Int32 nChar = 0; - pProperties[0] >>= nChar; - if( nChar >= 0) - { - nCharSet = (rtl_TextEncoding) nChar; - return charsetSource::charset_from_user_setting; - } + nCharSet = (rtl_TextEncoding) nChar; + return charsetSource::charset_from_user_setting; } - - // Default choice - nCharSet = RTL_TEXTENCODING_IBM_850; - return charsetSource::charset_default; } + + // Default choice + nCharSet = RTL_TEXTENCODING_IBM_850; + return charsetSource::charset_default; } void save_CharSet( rtl_TextEncoding nCharSet, bool bExport ) -- cgit v1.2.3