summaryrefslogtreecommitdiff
path: root/connectivity
diff options
context:
space:
mode:
authorLionel Elie Mamane <lionel@mamane.lu>2017-06-25 17:21:45 +0200
committerJulien Nabet <serval2412@yahoo.fr>2017-07-04 09:32:16 +0200
commit6e0eafe576436ec229c6d90f654ff1b11ff9bdfd (patch)
tree274bae3e4959fc461eb719369573fe88d965a41a /connectivity
parent788a87d46a64dc9e50eb371680c883a1d274b3e1 (diff)
tdf#108789: branch 5.4 only
- Calc: make the complete "what encoding to use" decision before calling the connectivity driver, so that the driver has no ambiguity about whether it should override our setting or not. To this end, factorise the part of the driver that reads the encoding from the file header into dbtools. - Calc: don't ask for encoding when the file's header give the encoding. - don't confuse CP850 (the default) and "don't know", including: * don't ignore CP850 user setting * don't overwrite user setting with CP850 Cherry-pick: - https://cgit.freedesktop.org/libreoffice/core/commit/?id=7f1465a9599e9665159dd2d823a6e9064cca5703 - https://cgit.freedesktop.org/libreoffice/core/commit/?id=857d64ed3ebbeb0ee4e8a75bfeaa4eb406944571 - https://cgit.freedesktop.org/libreoffice/core/commit/?id=9170d10cc57c3f0f3e82b27ce4b2cd9c897e669d Change-Id: Id80b7c505858b88f717b0ce6bd890527909e5fd1 Reviewed-on: https://gerrit.libreoffice.org/39451 Reviewed-by: Lionel Elie Mamane <lionel@mamane.lu> Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'connectivity')
-rw-r--r--connectivity/source/commontools/dbtools.cxx83
-rw-r--r--connectivity/source/drivers/dbase/DTable.cxx43
2 files changed, 85 insertions, 41 deletions
diff --git a/connectivity/source/commontools/dbtools.cxx b/connectivity/source/commontools/dbtools.cxx
index 1b17eb112d49..d458c1509722 100644
--- a/connectivity/source/commontools/dbtools.cxx
+++ b/connectivity/source/commontools/dbtools.cxx
@@ -2035,6 +2035,89 @@ OSQLColumns::Vector::const_iterator find(OSQLColumns::Vector::const_iterator fir
++first;
return first;
}
+
+namespace dbase
+{
+ bool dbfDecodeCharset(rtl_TextEncoding &_out_encoding, sal_uInt8 nType, sal_uInt8 nCodepage)
+ {
+ switch (nType)
+ {
+ case dBaseIII:
+ case dBaseIV:
+ case dBaseV:
+ case VisualFoxPro:
+ case VisualFoxProAuto:
+ case dBaseFS:
+ case dBaseFSMemo:
+ case dBaseIVMemoSQL:
+ case dBaseIIIMemo:
+ case FoxProMemo:
+ {
+ if (nCodepage != 0x00)
+ {
+ auto eEncoding(RTL_TEXTENCODING_DONTKNOW);
+ switch(nCodepage)
+ {
+ case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
+ case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
+ case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
+ case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
+ case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
+ case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
+ case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
+ case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
+ //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS
+ //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS
+ case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
+ case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
+ case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
+ case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
+ case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
+ case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
+ case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
+ case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
+ case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
+ case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
+ case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
+ case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
+ case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
+ case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
+ case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
+ case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
+ case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
+ case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
+ }
+ if(eEncoding != RTL_TEXTENCODING_DONTKNOW)
+ {
+ _out_encoding = eEncoding;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream)
+ {
+ sal_uInt8 nType=0;
+ dbf_Stream->ReadUChar( nType );
+
+ dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29);
+ if (dbf_Stream->IsEof())
+ {
+ return false;
+ }
+ else
+ {
+ sal_uInt8 nEncoding=0;
+ dbf_Stream->ReadUChar( nEncoding );
+ return dbfDecodeCharset(nCharSet, nType, nEncoding);
+ }
+ }
+
+}
+
} //namespace connectivity
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/connectivity/source/drivers/dbase/DTable.cxx b/connectivity/source/drivers/dbase/DTable.cxx
index 12e7b1aa7647..b89e1b55a42a 100644
--- a/connectivity/source/drivers/dbase/DTable.cxx
+++ b/connectivity/source/drivers/dbase/DTable.cxx
@@ -244,48 +244,9 @@ void ODbaseTable::readHeader()
case dBaseIIIMemo:
case FoxProMemo:
m_pFileStream->SetEndian(SvStreamEndian::LITTLE);
- if ( m_aHeader.db_frei[17] != 0x00
- && !m_aHeader.db_frei[18] && !m_aHeader.db_frei[19] && getConnection()->isTextEncodingDefaulted() )
+ if( getConnection()->isTextEncodingDefaulted() &&
+ !dbfDecodeCharset(m_eEncoding, nType, m_aHeader.db_frei[17]))
{
- switch(m_aHeader.db_frei[17])
- {
- case 0x01: m_eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
- case 0x02: m_eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
- case 0x03: m_eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
- case 0x04: m_eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
- case 0x64: m_eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
- case 0x65: m_eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
- case 0x66: m_eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
- case 0x67: m_eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
- //case 0x68: m_eEncoding = ; break; // Kamenicky (Czech) MS-DOS
- //case 0x69: m_eEncoding = ; break; // Mazovia (Polish) MS-DOS
- case 0x6A: m_eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
- case 0x6B: m_eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
- case 0x6C: m_eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
- case 0x78: m_eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
- case 0x79: m_eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
- case 0x7A: m_eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
- case 0x7B: m_eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
- case 0x7C: m_eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
- case 0x7D: m_eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
- case 0x7E: m_eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
- case 0x96: m_eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
- case 0x97: m_eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
- case 0x98: m_eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
- case 0xC8: m_eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
- case 0xC9: m_eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
- case 0xCA: m_eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
- case 0xCB: m_eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
- case 0xCC: m_eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
- default:
- // Default Encoding
- m_eEncoding = RTL_TEXTENCODING_IBM_850;
- break;
- }
- }
- else
- {
- // Default Encoding
m_eEncoding = RTL_TEXTENCODING_IBM_850;
}
break;