diff options
author | Lionel Elie Mamane <lionel@mamane.lu> | 2017-06-25 17:21:45 +0200 |
---|---|---|
committer | Julien Nabet <serval2412@yahoo.fr> | 2017-07-04 09:32:16 +0200 |
commit | 6e0eafe576436ec229c6d90f654ff1b11ff9bdfd (patch) | |
tree | 274bae3e4959fc461eb719369573fe88d965a41a /connectivity | |
parent | 788a87d46a64dc9e50eb371680c883a1d274b3e1 (diff) |
tdf#108789: branch 5.4 only
- Calc: make the complete "what encoding to use" decision before
calling the connectivity driver, so that the driver has
no ambiguity about whether it should override our setting
or not.
To this end, factorise the part of the driver that reads
the encoding from the file header into dbtools.
- Calc: don't ask for encoding when the file's header give the encoding.
- don't confuse CP850 (the default) and "don't know", including:
* don't ignore CP850 user setting
* don't overwrite user setting with CP850
Cherry-pick:
- https://cgit.freedesktop.org/libreoffice/core/commit/?id=7f1465a9599e9665159dd2d823a6e9064cca5703
- https://cgit.freedesktop.org/libreoffice/core/commit/?id=857d64ed3ebbeb0ee4e8a75bfeaa4eb406944571
- https://cgit.freedesktop.org/libreoffice/core/commit/?id=9170d10cc57c3f0f3e82b27ce4b2cd9c897e669d
Change-Id: Id80b7c505858b88f717b0ce6bd890527909e5fd1
Reviewed-on: https://gerrit.libreoffice.org/39451
Reviewed-by: Lionel Elie Mamane <lionel@mamane.lu>
Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'connectivity')
-rw-r--r-- | connectivity/source/commontools/dbtools.cxx | 83 | ||||
-rw-r--r-- | connectivity/source/drivers/dbase/DTable.cxx | 43 |
2 files changed, 85 insertions, 41 deletions
diff --git a/connectivity/source/commontools/dbtools.cxx b/connectivity/source/commontools/dbtools.cxx index 1b17eb112d49..d458c1509722 100644 --- a/connectivity/source/commontools/dbtools.cxx +++ b/connectivity/source/commontools/dbtools.cxx @@ -2035,6 +2035,89 @@ OSQLColumns::Vector::const_iterator find(OSQLColumns::Vector::const_iterator fir ++first; return first; } + +namespace dbase +{ + bool dbfDecodeCharset(rtl_TextEncoding &_out_encoding, sal_uInt8 nType, sal_uInt8 nCodepage) + { + switch (nType) + { + case dBaseIII: + case dBaseIV: + case dBaseV: + case VisualFoxPro: + case VisualFoxProAuto: + case dBaseFS: + case dBaseFSMemo: + case dBaseIVMemoSQL: + case dBaseIIIMemo: + case FoxProMemo: + { + if (nCodepage != 0x00) + { + auto eEncoding(RTL_TEXTENCODING_DONTKNOW); + switch(nCodepage) + { + case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437 + case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850 + case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252 + case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh + case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852 + case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866 + case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865 + case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS + //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS + //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS + case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G) + case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS + case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada + case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese + case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul) + case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese + case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis) + case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai + case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew + case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic + case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh + case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh + case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh + case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250 + case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows + case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows + case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows + case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic + } + if(eEncoding != RTL_TEXTENCODING_DONTKNOW) + { + _out_encoding = eEncoding; + return true; + } + } + } + } + return false; + } + + bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream) + { + sal_uInt8 nType=0; + dbf_Stream->ReadUChar( nType ); + + dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29); + if (dbf_Stream->IsEof()) + { + return false; + } + else + { + sal_uInt8 nEncoding=0; + dbf_Stream->ReadUChar( nEncoding ); + return dbfDecodeCharset(nCharSet, nType, nEncoding); + } + } + +} + } //namespace connectivity /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/connectivity/source/drivers/dbase/DTable.cxx b/connectivity/source/drivers/dbase/DTable.cxx index 12e7b1aa7647..b89e1b55a42a 100644 --- a/connectivity/source/drivers/dbase/DTable.cxx +++ b/connectivity/source/drivers/dbase/DTable.cxx @@ -244,48 +244,9 @@ void ODbaseTable::readHeader() case dBaseIIIMemo: case FoxProMemo: m_pFileStream->SetEndian(SvStreamEndian::LITTLE); - if ( m_aHeader.db_frei[17] != 0x00 - && !m_aHeader.db_frei[18] && !m_aHeader.db_frei[19] && getConnection()->isTextEncodingDefaulted() ) + if( getConnection()->isTextEncodingDefaulted() && + !dbfDecodeCharset(m_eEncoding, nType, m_aHeader.db_frei[17])) { - switch(m_aHeader.db_frei[17]) - { - case 0x01: m_eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437 - case 0x02: m_eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850 - case 0x03: m_eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252 - case 0x04: m_eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh - case 0x64: m_eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852 - case 0x65: m_eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866 - case 0x66: m_eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865 - case 0x67: m_eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS - //case 0x68: m_eEncoding = ; break; // Kamenicky (Czech) MS-DOS - //case 0x69: m_eEncoding = ; break; // Mazovia (Polish) MS-DOS - case 0x6A: m_eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G) - case 0x6B: m_eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS - case 0x6C: m_eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada - case 0x78: m_eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese - case 0x79: m_eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul) - case 0x7A: m_eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese - case 0x7B: m_eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis) - case 0x7C: m_eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai - case 0x7D: m_eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew - case 0x7E: m_eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic - case 0x96: m_eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh - case 0x97: m_eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh - case 0x98: m_eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh - case 0xC8: m_eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250 - case 0xC9: m_eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows - case 0xCA: m_eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows - case 0xCB: m_eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows - case 0xCC: m_eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic - default: - // Default Encoding - m_eEncoding = RTL_TEXTENCODING_IBM_850; - break; - } - } - else - { - // Default Encoding m_eEncoding = RTL_TEXTENCODING_IBM_850; } break; |