summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Elie Mamane <lionel@mamane.lu>2017-06-25 17:21:45 +0200
committerJulien Nabet <serval2412@yahoo.fr>2017-07-04 09:32:16 +0200
commit6e0eafe576436ec229c6d90f654ff1b11ff9bdfd (patch)
tree274bae3e4959fc461eb719369573fe88d965a41a
parent788a87d46a64dc9e50eb371680c883a1d274b3e1 (diff)
tdf#108789: branch 5.4 only
- Calc: make the complete "what encoding to use" decision before calling the connectivity driver, so that the driver has no ambiguity about whether it should override our setting or not. To this end, factorise the part of the driver that reads the encoding from the file header into dbtools. - Calc: don't ask for encoding when the file's header give the encoding. - don't confuse CP850 (the default) and "don't know", including: * don't ignore CP850 user setting * don't overwrite user setting with CP850 Cherry-pick: - https://cgit.freedesktop.org/libreoffice/core/commit/?id=7f1465a9599e9665159dd2d823a6e9064cca5703 - https://cgit.freedesktop.org/libreoffice/core/commit/?id=857d64ed3ebbeb0ee4e8a75bfeaa4eb406944571 - https://cgit.freedesktop.org/libreoffice/core/commit/?id=9170d10cc57c3f0f3e82b27ce4b2cd9c897e669d Change-Id: Id80b7c505858b88f717b0ce6bd890527909e5fd1 Reviewed-on: https://gerrit.libreoffice.org/39451 Reviewed-by: Lionel Elie Mamane <lionel@mamane.lu> Tested-by: Jenkins <ci@libreoffice.org>
-rw-r--r--connectivity/source/commontools/dbtools.cxx83
-rw-r--r--connectivity/source/drivers/dbase/DTable.cxx43
-rw-r--r--include/connectivity/dbtools.hxx47
-rw-r--r--sc/CppunitTest_sc_ucalc.mk1
-rw-r--r--sc/Library_sc.mk1
-rw-r--r--sc/source/ui/docshell/docsh8.cxx4
-rw-r--r--sc/source/ui/unoobj/filtuno.cxx128
7 files changed, 221 insertions, 86 deletions
diff --git a/connectivity/source/commontools/dbtools.cxx b/connectivity/source/commontools/dbtools.cxx
index 1b17eb112d49..d458c1509722 100644
--- a/connectivity/source/commontools/dbtools.cxx
+++ b/connectivity/source/commontools/dbtools.cxx
@@ -2035,6 +2035,89 @@ OSQLColumns::Vector::const_iterator find(OSQLColumns::Vector::const_iterator fir
++first;
return first;
}
+
+namespace dbase
+{
+ bool dbfDecodeCharset(rtl_TextEncoding &_out_encoding, sal_uInt8 nType, sal_uInt8 nCodepage)
+ {
+ switch (nType)
+ {
+ case dBaseIII:
+ case dBaseIV:
+ case dBaseV:
+ case VisualFoxPro:
+ case VisualFoxProAuto:
+ case dBaseFS:
+ case dBaseFSMemo:
+ case dBaseIVMemoSQL:
+ case dBaseIIIMemo:
+ case FoxProMemo:
+ {
+ if (nCodepage != 0x00)
+ {
+ auto eEncoding(RTL_TEXTENCODING_DONTKNOW);
+ switch(nCodepage)
+ {
+ case 0x01: eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
+ case 0x02: eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
+ case 0x03: eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
+ case 0x04: eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
+ case 0x64: eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
+ case 0x65: eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
+ case 0x66: eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
+ case 0x67: eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
+ //case 0x68: eEncoding = ; break; // Kamenicky (Czech) MS-DOS
+ //case 0x69: eEncoding = ; break; // Mazovia (Polish) MS-DOS
+ case 0x6A: eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
+ case 0x6B: eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
+ case 0x6C: eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
+ case 0x78: eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
+ case 0x79: eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
+ case 0x7A: eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
+ case 0x7B: eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
+ case 0x7C: eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
+ case 0x7D: eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
+ case 0x7E: eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
+ case 0x96: eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
+ case 0x97: eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
+ case 0x98: eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
+ case 0xC8: eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
+ case 0xC9: eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
+ case 0xCA: eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
+ case 0xCB: eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
+ case 0xCC: eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
+ }
+ if(eEncoding != RTL_TEXTENCODING_DONTKNOW)
+ {
+ _out_encoding = eEncoding;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream)
+ {
+ sal_uInt8 nType=0;
+ dbf_Stream->ReadUChar( nType );
+
+ dbf_Stream->Seek(STREAM_SEEK_TO_BEGIN + 29);
+ if (dbf_Stream->IsEof())
+ {
+ return false;
+ }
+ else
+ {
+ sal_uInt8 nEncoding=0;
+ dbf_Stream->ReadUChar( nEncoding );
+ return dbfDecodeCharset(nCharSet, nType, nEncoding);
+ }
+ }
+
+}
+
} //namespace connectivity
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/connectivity/source/drivers/dbase/DTable.cxx b/connectivity/source/drivers/dbase/DTable.cxx
index 12e7b1aa7647..b89e1b55a42a 100644
--- a/connectivity/source/drivers/dbase/DTable.cxx
+++ b/connectivity/source/drivers/dbase/DTable.cxx
@@ -244,48 +244,9 @@ void ODbaseTable::readHeader()
case dBaseIIIMemo:
case FoxProMemo:
m_pFileStream->SetEndian(SvStreamEndian::LITTLE);
- if ( m_aHeader.db_frei[17] != 0x00
- && !m_aHeader.db_frei[18] && !m_aHeader.db_frei[19] && getConnection()->isTextEncodingDefaulted() )
+ if( getConnection()->isTextEncodingDefaulted() &&
+ !dbfDecodeCharset(m_eEncoding, nType, m_aHeader.db_frei[17]))
{
- switch(m_aHeader.db_frei[17])
- {
- case 0x01: m_eEncoding = RTL_TEXTENCODING_IBM_437; break; // DOS USA code page 437
- case 0x02: m_eEncoding = RTL_TEXTENCODING_IBM_850; break; // DOS Multilingual code page 850
- case 0x03: m_eEncoding = RTL_TEXTENCODING_MS_1252; break; // Windows ANSI code page 1252
- case 0x04: m_eEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break; // Standard Macintosh
- case 0x64: m_eEncoding = RTL_TEXTENCODING_IBM_852; break; // EE MS-DOS code page 852
- case 0x65: m_eEncoding = RTL_TEXTENCODING_IBM_866; break; // Russian MS-DOS code page 866
- case 0x66: m_eEncoding = RTL_TEXTENCODING_IBM_865; break; // Nordic MS-DOS code page 865
- case 0x67: m_eEncoding = RTL_TEXTENCODING_IBM_861; break; // Icelandic MS-DOS
- //case 0x68: m_eEncoding = ; break; // Kamenicky (Czech) MS-DOS
- //case 0x69: m_eEncoding = ; break; // Mazovia (Polish) MS-DOS
- case 0x6A: m_eEncoding = RTL_TEXTENCODING_IBM_737; break; // Greek MS-DOS (437G)
- case 0x6B: m_eEncoding = RTL_TEXTENCODING_IBM_857; break; // Turkish MS-DOS
- case 0x6C: m_eEncoding = RTL_TEXTENCODING_IBM_863; break; // MS-DOS, Canada
- case 0x78: m_eEncoding = RTL_TEXTENCODING_MS_950; break; // Windows, Traditional Chinese
- case 0x79: m_eEncoding = RTL_TEXTENCODING_MS_949; break; // Windows, Korean (Hangul)
- case 0x7A: m_eEncoding = RTL_TEXTENCODING_MS_936; break; // Windows, Simplified Chinese
- case 0x7B: m_eEncoding = RTL_TEXTENCODING_MS_932; break; // Windows, Japanese (Shift-jis)
- case 0x7C: m_eEncoding = RTL_TEXTENCODING_MS_874; break; // Windows, Thai
- case 0x7D: m_eEncoding = RTL_TEXTENCODING_MS_1255; break; // Windows, Hebrew
- case 0x7E: m_eEncoding = RTL_TEXTENCODING_MS_1256; break; // Windows, Arabic
- case 0x96: m_eEncoding = RTL_TEXTENCODING_APPLE_CYRILLIC; break; // Russian Macintosh
- case 0x97: m_eEncoding = RTL_TEXTENCODING_APPLE_CENTEURO; break; // Eastern European Macintosh
- case 0x98: m_eEncoding = RTL_TEXTENCODING_APPLE_GREEK; break; // Greek Macintosh
- case 0xC8: m_eEncoding = RTL_TEXTENCODING_MS_1250; break; // Windows EE code page 1250
- case 0xC9: m_eEncoding = RTL_TEXTENCODING_MS_1251; break; // Russian Windows
- case 0xCA: m_eEncoding = RTL_TEXTENCODING_MS_1254; break; // Turkish Windows
- case 0xCB: m_eEncoding = RTL_TEXTENCODING_MS_1253; break; // Greek Windows
- case 0xCC: m_eEncoding = RTL_TEXTENCODING_MS_1257; break; // Windows, Baltic
- default:
- // Default Encoding
- m_eEncoding = RTL_TEXTENCODING_IBM_850;
- break;
- }
- }
- else
- {
- // Default Encoding
m_eEncoding = RTL_TEXTENCODING_IBM_850;
}
break;
diff --git a/include/connectivity/dbtools.hxx b/include/connectivity/dbtools.hxx
index 93fad78229a6..1899ce9761ed 100644
--- a/include/connectivity/dbtools.hxx
+++ b/include/connectivity/dbtools.hxx
@@ -27,6 +27,7 @@
#include <unotools/sharedunocomponent.hxx>
#include <connectivity/dbtoolsdllapi.hxx>
#include <connectivity/FValue.hxx>
+#include <tools/stream.hxx>
namespace com { namespace sun { namespace star {
@@ -786,10 +787,54 @@ namespace dbtools
OUStringBuffer& _out_rSQLPredicate
);
-
} // namespace dbtools
+namespace connectivity
+{
+namespace dbase
+{
+ enum DBFType { dBaseIII = 0x03,
+ dBaseIV = 0x04,
+ dBaseV = 0x05,
+ VisualFoxPro = 0x30,
+ VisualFoxProAuto = 0x31, // Visual FoxPro with AutoIncrement field
+ dBaseFS = 0x43,
+ dBaseFSMemo = 0xB3,
+ dBaseIIIMemo = 0x83,
+ dBaseIVMemo = 0x8B,
+ dBaseIVMemoSQL = 0x8E,
+ FoxProMemo = 0xF5
+ };
+ /** decode a DBase file's codepage byte to a RTL charset
+ @param _out_nCharset
+ in case of success, the decoded RTL charset is written there.
+ else, this is not written to.
+ @param nType
+ the file's type byte
+ @param nCodepage
+ the file's codepage byte
+ @return
+ true if a RTL charset was successfully decoded and written to _out_nCharset
+ false if nothing was written to _out_nCharset
+ */
+ OOO_DLLPUBLIC_DBTOOLS bool dbfDecodeCharset(rtl_TextEncoding &_out_nCharset, sal_uInt8 nType, sal_uInt8 nCodepage);
+
+ /** decode a DBase file's codepage byte to a RTL charset
+ @param _out_nCharset
+ in case of success, the decoded RTL charset is written there.
+ else, this is not written to.
+ @param dbf_Stream
+ pointer to a SvStream encapsulating the DBase file.
+ The stream will be rewinded and read from.
+ No guarantee is made on its position afterwards. Caller must reposition it itself.
+ @return
+ true if a RTL charset was successfully decoded and written to _out_nCharset
+ false if nothing was written to _out_nCharset
+ */
+ OOO_DLLPUBLIC_DBTOOLS bool dbfReadCharset(rtl_TextEncoding &nCharSet, SvStream* dbf_Stream);
+} // namespace connectivity::dbase
+} // namespace connectivity
#endif // INCLUDED_CONNECTIVITY_DBTOOLS_HXX
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/CppunitTest_sc_ucalc.mk b/sc/CppunitTest_sc_ucalc.mk
index 5e91862f416c..10449cd76f52 100644
--- a/sc/CppunitTest_sc_ucalc.mk
+++ b/sc/CppunitTest_sc_ucalc.mk
@@ -45,6 +45,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_ucalc, \
comphelper \
cppu \
cppuhelper \
+ dbtools \
drawinglayer \
editeng \
for \
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index 3b2e88529977..8a07d1628be9 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -58,6 +58,7 @@ $(eval $(call gb_Library_use_libraries,sc,\
comphelper \
cppu \
cppuhelper \
+ dbtools \
drawinglayer \
editeng \
for \
diff --git a/sc/source/ui/docshell/docsh8.cxx b/sc/source/ui/docshell/docsh8.cxx
index 725a8b906649..a021745855d9 100644
--- a/sc/source/ui/docshell/docsh8.cxx
+++ b/sc/source/ui/docshell/docsh8.cxx
@@ -301,10 +301,6 @@ sal_uLong ScDocShell::DBaseImport( const OUString& rFullFileName, rtl_TextEncodi
sal_uLong nErr = eERR_OK;
- // Try to get the Text Encoding from the driver
- if( eCharSet == RTL_TEXTENCODING_IBM_850 )
- eCharSet = RTL_TEXTENCODING_DONTKNOW;
-
try
{
long i;
diff --git a/sc/source/ui/unoobj/filtuno.cxx b/sc/source/ui/unoobj/filtuno.cxx
index 7b805717aae6..65de5d10b4e9 100644
--- a/sc/source/ui/unoobj/filtuno.cxx
+++ b/sc/source/ui/unoobj/filtuno.cxx
@@ -22,6 +22,7 @@
#include <vcl/msgbox.hxx>
#include <vcl/svapp.hxx>
#include <unotools/ucbstreamhelper.hxx>
+#include <connectivity/dbtools.hxx>
#include "editutil.hxx"
#include "filtuno.hxx"
@@ -43,6 +44,7 @@
using namespace com::sun::star;
using namespace com::sun::star::uno;
+using namespace connectivity::dbase;
#define SCFILTEROPTIONSOBJ_SERVICE "com.sun.star.ui.dialogs.FilterOptionsDialog"
#define SCFILTEROPTIONSOBJ_IMPLNAME "com.sun.star.comp.Calc.FilterOptionsDialog"
@@ -58,44 +60,62 @@ SC_SIMPLE_SERVICE_INFO( ScFilterOptionsObj, SCFILTEROPTIONSOBJ_IMPLNAME, SCFILTE
#define DBF_SEP_PATH_IMPORT "Office.Calc/Dialogs/DBFImport"
#define DBF_SEP_PATH_EXPORT "Office.Calc/Dialogs/DBFExport"
-static void load_CharSet( rtl_TextEncoding &nCharSet, bool bExport )
+namespace
{
- Sequence<Any> aValues;
- const Any *pProperties;
- Sequence<OUString> aNames { DBF_CHAR_SET };
- ScLinkConfigItem aItem( OUString::createFromAscii(
- bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
- aValues = aItem.GetProperties( aNames );
- pProperties = aValues.getConstArray();
-
- // Default choice
- nCharSet = RTL_TEXTENCODING_IBM_850;
+ enum class charsetSource
+ {
+ charset_from_file,
+ charset_from_user_setting,
+ charset_default
+ };
- if( pProperties[0].hasValue() )
+ charsetSource load_CharSet(rtl_TextEncoding &nCharSet, bool bExport, SvStream* dbf_Stream)
{
- sal_Int32 nChar = 0;
- pProperties[0] >>= nChar;
- if( nChar >= 0)
+ if (dbfReadCharset(nCharSet, dbf_Stream))
{
- nCharSet = (rtl_TextEncoding) nChar;
+ return charsetSource::charset_from_file;
}
+
+ Sequence<Any> aValues;
+ const Any *pProperties;
+ Sequence<OUString> aNames { DBF_CHAR_SET };
+ ScLinkConfigItem aItem( OUString::createFromAscii(
+ bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
+
+ aValues = aItem.GetProperties( aNames );
+ pProperties = aValues.getConstArray();
+
+ if( pProperties[0].hasValue() )
+ {
+ sal_Int32 nChar = 0;
+ pProperties[0] >>= nChar;
+ if( nChar >= 0)
+ {
+ nCharSet = (rtl_TextEncoding) nChar;
+ return charsetSource::charset_from_user_setting;
+ }
+ }
+
+ // Default choice
+ nCharSet = RTL_TEXTENCODING_IBM_850;
+ return charsetSource::charset_default;
}
-}
-static void save_CharSet( rtl_TextEncoding nCharSet, bool bExport )
-{
- Sequence<Any> aValues;
- Any *pProperties;
- Sequence<OUString> aNames { DBF_CHAR_SET };
- ScLinkConfigItem aItem( OUString::createFromAscii(
- bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
+ void save_CharSet( rtl_TextEncoding nCharSet, bool bExport )
+ {
+ Sequence<Any> aValues;
+ Any *pProperties;
+ Sequence<OUString> aNames { DBF_CHAR_SET };
+ ScLinkConfigItem aItem( OUString::createFromAscii(
+ bExport?DBF_SEP_PATH_EXPORT:DBF_SEP_PATH_IMPORT ) );
- aValues = aItem.GetProperties( aNames );
- pProperties = aValues.getArray();
- pProperties[0] <<= (sal_Int32) nCharSet;
+ aValues = aItem.GetProperties( aNames );
+ pProperties = aValues.getArray();
+ pProperties[0] <<= (sal_Int32) nCharSet;
- aItem.PutProperties(aNames, aValues);
+ aItem.PutProperties(aNames, aValues);
+ }
}
ScFilterOptionsObj::ScFilterOptionsObj() :
@@ -210,9 +230,9 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
}
else
{
- bool bMultiByte = true;
bool bDBEnc = false;
bool bAscii = false;
+ bool skipDialog = false;
sal_Unicode cStrDel = '"';
sal_Unicode cAsciiDel = ';';
@@ -254,8 +274,20 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
// dBase import
aTitle = ScGlobal::GetRscString( STR_IMPORT_DBF );
}
- load_CharSet( eEncoding, bExport );
+
+ std::unique_ptr<SvStream> pInStream;
+ if ( xInputStream.is() )
+ pInStream.reset(utl::UcbStreamHelper::CreateStream( xInputStream ));
+ switch(load_CharSet( eEncoding, bExport, pInStream.get()))
+ {
+ case charsetSource::charset_from_file:
+ skipDialog = true;break;
+ case charsetSource::charset_from_user_setting:
+ case charsetSource::charset_default:
+ break;
+ }
bDBEnc = true;
+ // pInStream goes out of scope, the stream is automatically closed
}
else if ( aFilterString == ScDocShell::GetDifFilterName() )
{
@@ -274,21 +306,37 @@ sal_Int16 SAL_CALL ScFilterOptionsObj::execute()
}
ScImportOptions aOptions( cAsciiDel, cStrDel, eEncoding);
-
- ScopedVclPtr<AbstractScImportOptionsDlg> pDlg(pFact->CreateScImportOptionsDlg(
- bAscii, &aOptions, &aTitle, bMultiByte, bDBEnc,
- !bExport));
- OSL_ENSURE(pDlg, "Dialog create fail!");
- if ( pDlg->Execute() == RET_OK )
+ if(skipDialog)
+ {
+ // TODO: check we are not missing some of the stuff that ScImportOptionsDlg::GetImportOptions
+ // (file sc/source/ui/dbgui/scuiimoptdlg.cxx) does
+ // that is, if the dialog sets options that are not selected by the user (!)
+ // then we are missing them here.
+ // Then we may need to rip them out of the dialog.
+ // Or we actually change the dialog to not display if skipDialog==true
+ // in that case, add an argument skipDialog to CreateScImportOptionsDlg
+ nRet = ui::dialogs::ExecutableDialogResults::OK;
+ }
+ else
+ {
+ ScopedVclPtr<AbstractScImportOptionsDlg> pDlg(pFact->CreateScImportOptionsDlg(
+ bAscii, &aOptions, &aTitle, true/*bMultiByte*/,
+ bDBEnc, !bExport));
+ OSL_ENSURE(pDlg, "Dialog create fail!");
+ if ( pDlg->Execute() == RET_OK )
+ {
+ pDlg->SaveImportOptions();
+ pDlg->GetImportOptions( aOptions );
+ save_CharSet( aOptions.eCharSet, bExport );
+ nRet = ui::dialogs::ExecutableDialogResults::OK;
+ }
+ }
+ if (nRet == ui::dialogs::ExecutableDialogResults::OK)
{
- pDlg->SaveImportOptions();
- pDlg->GetImportOptions( aOptions );
- save_CharSet( aOptions.eCharSet, bExport );
if ( bAscii )
aFilterOptions = aOptions.BuildString();
else
aFilterOptions = aOptions.aStrFont;
- nRet = ui::dialogs::ExecutableDialogResults::OK;
}
}