summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Elie Mamane <lionel@mamane.lu>2017-06-25 17:21:45 +0200
committerLionel Elie Mamane <lionel@mamane.lu>2017-07-02 13:28:20 +0200
commit7f1465a9599e9665159dd2d823a6e9064cca5703 (patch)
tree4d0db6763f43e76819bdd877141bef20cf3c260e
parent3f4a92cbfda902ea462aec7f6b06062a20b3042c (diff)
tdf#108789 and others: overhaul DBase files encoding handling
- Calc: make the complete "what encoding to use" decision before calling the connectivity driver, so that the driver has no ambiguity about whether it should override our setting or not. To this end, factorise the part of the driver that reads the encoding from the file header into dbtools. - Calc: don't ask for encoding when the file's header give the encoding. - don't confuse CP850 (the default) and "don't know", including: * don't ignore CP850 user setting * don't overwrite user setting with CP850 Thanks to Julien Nabet for the extensive collaboration on this. Change-Id: Id80b7c505858b88f717b0ce6bd890527909e5fd1
-rw-r--r--connectivity/source/commontools/dbtools.cxx83
-rw-r--r--connectivity/source/drivers/dbase/DTable.cxx43
-rw-r--r--include/connectivity/dbtools.hxx45
-rw-r--r--sc/CppunitTest_sc_ucalc.mk1
-rw-r--r--sc/Library_sc.mk1
-rw-r--r--sc/source/ui/docshell/docsh8.cxx4
-rw-r--r--sc/source/ui/unoobj/filtuno.cxx140
7 files changed, 232 insertions, 85 deletions
diff --git a/connectivity/source/commontools/dbtools.cxx b/connectivity/source/commontools/dbtools.cxx
index 97316e5e2536..fa34331d85f9 100644
--- a/connectivity/source/commontools/dbtools.cxx
+++ b/connectivity/source/commontools/dbtools.cxx
@@ -2026,6 +2026,89 @@ OSQLColumns::Vector::const_iterator find(OSQLColumns::Vector::const_iterator fir
++first;
return first;
}
+
+namespace dbase
+{
+ bool dbfDecodeCharset(rtl_TextEncoding &_out_encoding, sal_uInt8 nType, sal_uInt8 nCodepage)
+ {
+ switch (nType)
+ {
+ case dBaseIII:
+ case dBaseIV:
+ case dBaseV:
+ case VisualFoxPro:
+ case VisualFoxProAuto:
+ case dBaseFS:
+ case dBaseFSMemo:
+ case dBaseIVMemoSQL:
+ case dBaseIIIMemo:
+ case FoxProMemo:
+ {
+ if (nCodepage != 0x00)
+ {
+ auto eEncoding(RTL_TEXTENCODING_DONTKNOW);
+ switch(nCodepage)
+ {
+ case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
+ case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
+ case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
+ case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
+ case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
+ case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
+ case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
+ case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
+ //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS
+ //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS
+ case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
+ case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
+ case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
+ case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
+ case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
+ case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
+ case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
+ case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
+ case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
+ case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
+ case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
+ case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
+ case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
+ case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
+ case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
+ case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
+ case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
+ case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
+ }
+ if(eEncoding != RTL_TEXTENCODING_DONTKNOW)
+ {
+ _out_encoding = eEncoding;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream)
+ {
+ sal_uInt8 nType=0;
+ dbf_Stream->ReadUChar( nType );
+
+ dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29);
+ if (dbf_Stream->IsEof())
+ {
+ return false;
+ }
+ else
+ {
+ sal_uInt8 nEncoding=0;
+ dbf_Stream->ReadUChar( nEncoding );
+ return dbfDecodeCharset(nCharSet, nType, nEncoding);
+ }
+ }
+
+}
+
} //namespace connectivity
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/connectivity/source/drivers/dbase/DTable.cxx b/connectivity/source/drivers/dbase/DTable.cxx
index 6da1f893e6fc..3e14a594bbb7 100644
--- a/connectivity/source/drivers/dbase/DTable.cxx
+++ b/connectivity/source/drivers/dbase/DTable.cxx
@@ -250,48 +250,9 @@ void ODbaseTable::readHeader()
case dBaseIIIMemo:
case FoxProMemo:
m_pFileStream->SetEndian(SvStreamEndian::LITTLE);
- // trailer[17] corresponds to language code, see DBFHeader def in connectivity/source/inc/dbase/DTable.hxx
- if ( m_aHeader.trailer[17] != 0x00 && getConnection()->isTextEncodingDefaulted() )
+ if( getConnection()->isTextEncodingDefaulted() &&
+ !dbfDecodeCharset(m_eEncoding, nType, m_aHeader.trailer[17]))
{
- switch(m_aHeader.trailer[17])
- {
- case 0x01: m_eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
- case 0x02: m_eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
- case 0x03: m_eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
- case 0x04: m_eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
- case 0x64: m_eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
- case 0x65: m_eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
- case 0x66: m_eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
- case 0x67: m_eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
- //case 0x68: m_eEncoding = ; break; // Kamenicky (Czech) MS-DOS
- //case 0x69: m_eEncoding = ; break; // Mazovia (Polish) MS-DOS
- case 0x6A: m_eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
- case 0x6B: m_eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
- case 0x6C: m_eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
- case 0x78: m_eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
- case 0x79: m_eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
- case 0x7A: m_eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
- case 0x7B: m_eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
- case 0x7C: m_eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
- case 0x7D: m_eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
- case 0x7E: m_eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
- case 0x96: m_eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
- case 0x97: m_eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
- case 0x98: m_eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
- case 0xC8: m_eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
- case 0xC9: m_eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
- case 0xCA: m_eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
- case 0xCB: m_eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
- case 0xCC: m_eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
- default:
- // Default Encoding
- m_eEncoding = RTL_TEXTENCODING_IBM_850;
- break;
- }
- }
- else
- {
- // Default Encoding
m_eEncoding = RTL_TEXTENCODING_IBM_850;
}
break;
diff --git a/include/connectivity/dbtools.hxx b/include/connectivity/dbtools.hxx
index 7f4719bd41c8..4b493ee98267 100644
--- a/include/connectivity/dbtools.hxx
+++ b/include/connectivity/dbtools.hxx
@@ -27,6 +27,7 @@
#include <unotools/sharedunocomponent.hxx>
#include <connectivity/dbtoolsdllapi.hxx>
#include <connectivity/FValue.hxx>
+#include <tools/stream.hxx>
namespace com { namespace sun { namespace star {
@@ -786,9 +787,51 @@ namespace dbtools
OUStringBuffer& _out_rSQLPredicate
);
-
} // namespace dbtools
+namespace connectivity::dbase
+{
+ enum DBFType { dBaseIII = 0x03,
+ dBaseIV = 0x04,
+ dBaseV = 0x05,
+ VisualFoxPro = 0x30,
+ VisualFoxProAuto = 0x31, // Visual FoxPro with AutoIncrement field
+ dBaseFS = 0x43,
+ dBaseFSMemo = 0xB3,
+ dBaseIIIMemo = 0x83,
+ dBaseIVMemo = 0x8B,
+ dBaseIVMemoSQL = 0x8E,
+ FoxProMemo = 0xF5
+ };
+
+ /** decode a DBase file's codepage byte to a RTL charset
+ @param _out_nCharset
+ in case of success, the decoded RTL charset is written there.
+ else, this is not written to.
+ @param nType
+ the file's type byte
+ @param nCodepage
+ the file's codepage byte
+ @return
+ true if a RTL charset was successfully decoded and written to _out_nCharset
+ false if nothing was written to _out_nCharset
+ */
+ OOO_DLLPUBLIC_DBTOOLS bool dbfDecodeCharset(rtl_TextEncoding &_out_nCharset, sal_uInt8 nType, sal_uInt8 nCodepage);
+
+ /** decode a DBase file's codepage byte to a RTL charset
+ @param _out_nCharset
+ in case of success, the decoded RTL charset is written there.
+ else, this is not written to.
+ @param dbf_Stream
+ pointer to a SvStream encapsulating the DBase file.
+ The stream will be rewinded and read from.
+ No guarantee is made on its position afterwards. Caller must reposition it itself.
+ @return
+ true if a RTL charset was successfully decoded and written to _out_nCharset
+ false if nothing was written to _out_nCharset
+ */
+ OOO_DLLPUBLIC_DBTOOLS bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream);
+} // namespace connectivity::dbase
#endif // INCLUDED_CONNECTIVITY_DBTOOLS_HXX
diff --git a/sc/CppunitTest_sc_ucalc.mk b/sc/CppunitTest_sc_ucalc.mk
index d918182a0c4f..d423f6c73a7b 100644
--- a/sc/CppunitTest_sc_ucalc.mk
+++ b/sc/CppunitTest_sc_ucalc.mk
@@ -45,6 +45,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_ucalc, \
comphelper \
cppu \
cppuhelper \
+ dbtools \
drawinglayer \
editeng \
for \
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index 5b7d2fcf9275..c9f54638e62e 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -58,6 +58,7 @@ $(eval $(call gb_Library_use_libraries,sc,\
comphelper \
cppu \
cppuhelper \
+ dbtools \
drawinglayer \
editeng \
for \
diff --git a/sc/source/ui/docshell/docsh8.cxx b/sc/source/ui/docshell/docsh8.cxx
index b21e6426e99b..5a676d24d029 100644
--- a/sc/source/ui/docshell/docsh8.cxx
+++ b/sc/source/ui/docshell/docsh8.cxx
@@ -301,10 +301,6 @@ ErrCode ScDocShell::DBaseImport( const OUString& rFullFileName, rtl_TextEncoding
ErrCode nErr = ERRCODE_NONE;
- // Try to get the Text Encoding from the driver
- if( eCharSet == RTL_TEXTENCODING_IBM_850 )
- eCharSet = RTL_TEXTENCODING_DONTKNOW;
-
try
{
long i;
diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx
index c0cc98782879..fd98a2cb590c 100644
--- a/sc/source/ui/unoobj/filtuno.cxx
+++ b/sc/source/ui/unoobj/filtuno.cxx
@@ -22,6 +22,7 @@
#include <vcl/msgbox.hxx>
#include <vcl/svapp.hxx>
#include <unotools/ucbstreamhelper.hxx>
+#include <connectivity/dbtools.hxx>
#include "editutil.hxx"
#include "filtuno.hxx"
@@ -44,6 +45,7 @@
using namespace com::sun::star;
using namespace com::sun::star::uno;
+using namespace connectivity::dbase;
#define SCFILTEROPTIONSOBJ_SERVICE "com.sun.star.ui.dialogs.FilterOptionsDialog"
#define SCFILTEROPTIONSOBJ_IMPLNAME "com.sun.star.comp.Calc.FilterOptionsDialog"
@@ -59,44 +61,75 @@ SC_SIMPLE_SERVICE_INFO( ScFilterOptionsObj, SCFILTEROPTIONSOBJ_IMPLNAME, SCFILTE
#define DBF_SEP_PATH_IMPORT "Office.Calc/Dialogs/DBFImport"
#define DBF_SEP_PATH_EXPORT "Office.Calc/Dialogs/DBFExport"
-static void load_CharSet( rtl_TextEncoding &nCharSet, bool bExport )
+namespace
{
- Sequence<Any> aValues;
- const Any *pProperties;
- Sequence<OUString> aNames { DBF_CHAR_SET };
- ScLinkConfigItem aItem( OUString::createFromAscii(
- bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
- aValues = aItem.GetProperties( aNames );
- pProperties = aValues.getConstArray();
+ enum class charsetSource
+ {
+ charset_from_file,
+ charset_from_user_setting,
+ charset_default
+ };
+ // TODO: use DTable.hxx instead
+ enum DBFType { dBaseIII = 0x03,
+ dBaseIV = 0x04,
+ dBaseV = 0x05,
+ VisualFoxPro = 0x30,
+ VisualFoxProAuto = 0x31, // Visual FoxPro with AutoIncrement field
+ dBaseFS = 0x43,
+ dBaseFSMemo = 0xB3,
+ dBaseIIIMemo = 0x83,
+ dBaseIVMemo = 0x8B,
+ dBaseIVMemoSQL = 0x8E,
+ FoxProMemo = 0xF5
+ };
+
+ charsetSource load_CharSet(rtl_TextEncoding &nCharSet, bool bExport, SvStream* dbf_Stream)
+ {
+ if (dbfReadCharset(nCharSet, dbf_Stream))
+ {
+ return charsetSource::charset_from_file;
+ }
- // Default choice
- nCharSet = RTL_TEXTENCODING_IBM_850;
+ Sequence<Any> aValues;
+ const Any *pProperties;
+ Sequence<OUString> aNames { DBF_CHAR_SET };
+ ScLinkConfigItem aItem( OUString::createFromAscii(
+ bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
- if( pProperties[0].hasValue() )
- {
- sal_Int32 nChar = 0;
- pProperties[0] >>= nChar;
- if( nChar >= 0)
+ aValues = aItem.GetProperties( aNames );
+ pProperties = aValues.getConstArray();
+
+ if( pProperties[0].hasValue() )
{
- nCharSet = (rtl_TextEncoding) nChar;
+ sal_Int32 nChar = 0;
+ pProperties[0] >>= nChar;
+ if( nChar >= 0)
+ {
+ nCharSet = (rtl_TextEncoding) nChar;
+ return charsetSource::charset_from_user_setting;
+ }
}
+
+ // Default choice
+ nCharSet = RTL_TEXTENCODING_IBM_850;
+ return charsetSource::charset_default;
}
-}
-static void save_CharSet( rtl_TextEncoding nCharSet, bool bExport )
-{
- Sequence<Any> aValues;
- Any *pProperties;
- Sequence<OUString> aNames { DBF_CHAR_SET };
- ScLinkConfigItem aItem( OUString::createFromAscii(
- bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
+ void save_CharSet( rtl_TextEncoding nCharSet, bool bExport )
+ {
+ Sequence<Any> aValues;
+ Any *pProperties;
+ Sequence<OUString> aNames { DBF_CHAR_SET };
+ ScLinkConfigItem aItem( OUString::createFromAscii(
+ bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
- aValues = aItem.GetProperties( aNames );
- pProperties = aValues.getArray();
- pProperties[0] <<= (sal_Int32) nCharSet;
+ aValues = aItem.GetProperties( aNames );
+ pProperties = aValues.getArray();
+ pProperties[0] <<= (sal_Int32) nCharSet;
- aItem.PutProperties(aNames, aValues);
+ aItem.PutProperties(aNames, aValues);
+ }
}
ScFilterOptionsObj::ScFilterOptionsObj() :
@@ -209,6 +242,7 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
{
bool bDBEnc = false;
bool bAscii = false;
+ bool skipDialog = false;
sal_Unicode const cStrDel = '"';
sal_Unicode cAsciiDel = ';';
@@ -250,8 +284,20 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
// dBase import
aTitle = ScGlobal::GetRscString( STR_IMPORT_DBF );
}
- load_CharSet( eEncoding, bExport );
+
+ std::unique_ptr<SvStream> pInStream;
+ if ( xInputStream.is() )
+ pInStream.reset(utl::UcbStreamHelper::CreateStream( xInputStream ));
+ switch(load_CharSet( eEncoding, bExport, pInStream.get()))
+ {
+ case charsetSource::charset_from_file:
+ skipDialog = true;break;
+ case charsetSource::charset_from_user_setting:
+ case charsetSource::charset_default:
+ break;
+ }
bDBEnc = true;
+ // pInStream goes out of scope, the stream is automatically closed
}
else if ( aFilterString == ScDocShell::GetDifFilterName() )
{
@@ -270,21 +316,37 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
}
ScImportOptions aOptions( cAsciiDel, cStrDel, eEncoding);
-
- ScopedVclPtr<AbstractScImportOptionsDlg> pDlg(pFact->CreateScImportOptionsDlg(
- bAscii, &aOptions, &aTitle, true/*bMultiByte*/, bDBEnc,
- !bExport));
- OSL_ENSURE(pDlg, "Dialog create fail!");
- if ( pDlg->Execute() == RET_OK )
+ if(skipDialog)
+ {
+ // TODO: check we are not missing some of the stuff that ScImportOptionsDlg::GetImportOptions
+ // (file sc/source/ui/dbgui/scuiimoptdlg.cxx) does
+ // that is, if the dialog sets options that are not selected by the user (!)
+ // then we are missing them here.
+ // Then we may need to rip them out of the dialog.
+ // Or we actually change the dialog to not display if skipDialog==true
+ // in that case, add an argument skipDialog to CreateScImportOptionsDlg
+ nRet = ui::dialogs::ExecutableDialogResults::OK;
+ }
+ else
+ {
+ ScopedVclPtr<AbstractScImportOptionsDlg> pDlg(pFact->CreateScImportOptionsDlg(
+ bAscii, &aOptions, &aTitle, true/*bMultiByte*/,
+ bDBEnc, !bExport));
+ OSL_ENSURE(pDlg, "Dialog create fail!");
+ if ( pDlg->Execute() == RET_OK )
+ {
+ pDlg->SaveImportOptions();
+ pDlg->GetImportOptions( aOptions );
+ save_CharSet( aOptions.eCharSet, bExport );
+ nRet = ui::dialogs::ExecutableDialogResults::OK;
+ }
+ }
+ if (nRet == ui::dialogs::ExecutableDialogResults::OK)
{
- pDlg->SaveImportOptions();
- pDlg->GetImportOptions( aOptions );
- save_CharSet( aOptions.eCharSet, bExport );
if ( bAscii )
aFilterOptions = aOptions.BuildString();
else
aFilterOptions = aOptions.aStrFont;
- nRet = ui::dialogs::ExecutableDialogResults::OK;
}
}