summaryrefslogtreecommitdiff
path: root/lingucomponent/source/languageguessing/guesslang.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'lingucomponent/source/languageguessing/guesslang.cxx')
-rw-r--r--lingucomponent/source/languageguessing/guesslang.cxx484
1 files changed, 484 insertions, 0 deletions
diff --git a/lingucomponent/source/languageguessing/guesslang.cxx b/lingucomponent/source/languageguessing/guesslang.cxx
new file mode 100644
index 000000000000..c8486c1bc471
--- /dev/null
+++ b/lingucomponent/source/languageguessing/guesslang.cxx
@@ -0,0 +1,484 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_lingucomponent.hxx"
+
+#include <iostream>
+
+#include <tools/debug.hxx>
+
+#include <sal/config.h>
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/implementationentry.hxx>
+#include <cppuhelper/implbase2.hxx>
+#include <tools/string.hxx>
+
+#include <simpleguesser.hxx>
+#include <guess.hxx>
+
+//#include <cppuhelper/queryinterface.hxx> // helper for queryInterface() impl
+
+//#include <com/sun/star/lang/XMultiServiceFactory.hpp>
+#include <com/sun/star/registry/XRegistryKey.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/linguistic2/XLanguageGuessing.hpp>
+#include <unotools/pathoptions.hxx>
+#include <unotools/localfilehelper.hxx>
+#include <osl/thread.h>
+
+using namespace ::rtl;
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::linguistic2;
+
+namespace css = ::com::sun::star;
+
+//==================================================================================================
+
+#define A2OU(x) ::rtl::OUString::createFromAscii( x )
+
+#define SERVICENAME "com.sun.star.linguistic2.LanguageGuessing"
+
+#define IMPLNAME "com.sun.star.lingu2.LanguageGuessing"
+
+static Sequence< OUString > getSupportedServiceNames_LangGuess_Impl()
+{
+ Sequence<OUString> names(1);
+ names[0] = A2OU( SERVICENAME );
+ return names;
+}
+
+static OUString getImplementationName_LangGuess_Impl()
+{
+ return A2OU( IMPLNAME );
+}
+
+static osl::Mutex & GetLangGuessMutex()
+{
+ static osl::Mutex aMutex;
+ return aMutex;
+}
+
+
+class LangGuess_Impl :
+ public ::cppu::WeakImplHelper2<
+ XLanguageGuessing,
+ XServiceInfo >
+{
+ SimpleGuesser m_aGuesser;
+ bool m_bInitialized;
+ css::uno::Reference< css::uno::XComponentContext > m_xContext;
+
+ LangGuess_Impl( const LangGuess_Impl & ); // not defined
+ LangGuess_Impl & operator =( const LangGuess_Impl & ); // not defined
+
+ virtual ~LangGuess_Impl() {}
+ void EnsureInitialized();
+
+public:
+ explicit LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext);
+
+ // XServiceInfo implementation
+ virtual OUString SAL_CALL getImplementationName( ) throw(RuntimeException);
+ virtual sal_Bool SAL_CALL supportsService( const OUString& ServiceName ) throw(RuntimeException);
+ virtual Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw(RuntimeException);
+ static Sequence< OUString > SAL_CALL getSupportedServiceNames_Static( );
+
+ // XLanguageGuessing implementation
+ virtual ::com::sun::star::lang::Locale SAL_CALL guessPrimaryLanguage( const ::rtl::OUString& aText, ::sal_Int32 nStartPos, ::sal_Int32 nLen ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
+ virtual void SAL_CALL disableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
+ virtual void SAL_CALL enableLanguages( const ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale >& aLanguages ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
+ virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getAvailableLanguages( ) throw (::com::sun::star::uno::RuntimeException);
+ virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getEnabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
+ virtual ::com::sun::star::uno::Sequence< ::com::sun::star::lang::Locale > SAL_CALL getDisabledLanguages( ) throw (::com::sun::star::uno::RuntimeException);
+
+ // implementation specific
+ void SetFingerPrintsDB( const rtl::OUString &fileName ) throw (RuntimeException);
+
+ static const OUString & SAL_CALL getImplementationName_Static() throw();
+
+};
+
+//*************************************************************************
+
+LangGuess_Impl::LangGuess_Impl(css::uno::Reference< css::uno::XComponentContext > const & rxContext) :
+ m_bInitialized( false ),
+ m_xContext( rxContext )
+{
+}
+
+//*************************************************************************
+
+void LangGuess_Impl::EnsureInitialized()
+{
+ if (!m_bInitialized)
+ {
+ // set this to true at the very start to prevent loops because of
+ // implicitly called functions below
+ m_bInitialized = true;
+
+ // set default fingerprint path to where those get installed
+ String aPhysPath;
+ String aURL( SvtPathOptions().GetFingerprintPath() );
+ utl::LocalFileHelper::ConvertURLToPhysicalName( aURL, aPhysPath );
+#ifdef WNT
+ aPhysPath += '\\';
+#else
+ aPhysPath += '/';
+#endif
+
+ SetFingerPrintsDB( aPhysPath );
+
+ //
+ // disable currently not functional languages...
+ //
+ struct LangCountry
+ {
+ const char *pLang;
+ const char *pCountry;
+ };
+ LangCountry aDisable[] =
+ {
+ {"gv", ""}, {"sco", ""}, // no lang-id available yet...
+// {"hy", ""}, {"drt", ""}, // 0 bytes fingerprints...
+ {"zh", "CN"}, {"zh", "TW"}, {"ja", ""}, {"ko", ""}, // not yet correct functional...
+ {"ka", ""}, {"hi", ""}, {"mr", ""}, {"ne", ""},
+ {"sa", ""}, {"ta", ""}, {"th", ""},
+ {"qu", ""}, {"yi", ""}
+ };
+ sal_Int32 nNum = sizeof(aDisable) / sizeof(aDisable[0]);
+ Sequence< Locale > aDisableSeq( nNum );
+ Locale *pDisableSeq = aDisableSeq.getArray();
+ for (sal_Int32 i = 0; i < nNum; ++i)
+ {
+ Locale aLocale;
+ aLocale.Language = OUString::createFromAscii( aDisable[i].pLang );
+ aLocale.Country = OUString::createFromAscii( aDisable[i].pCountry );
+ pDisableSeq[i] = aLocale;
+ }
+ disableLanguages( aDisableSeq );
+ DBG_ASSERT( nNum == getDisabledLanguages().getLength(), "size mismatch" );
+ }
+}
+
+//*************************************************************************
+
+/* TL: currently not part of the API
+Sequence< com::sun::star::lang::Locale > SAL_CALL LangGuess_Impl::guessLanguages(
+ const rtl::OUString &rText,
+ sal_Int32 nStartPos,
+ sal_Int32 nLen )
+ throw (RuntimeException)
+{
+ Sequence< com::sun::star::lang::Locale > aRes;
+
+ OString o = OUStringToOString( rText, RTL_TEXTENCODING_UTF8 );
+ vector<Guess> gs = m_aGuesser.GuessLanguage(o.pData->buffer);
+
+ aRes.realloc(gs.size());
+
+ com::sun::star::lang::Locale *pRes = aRes.getArray();
+
+#ifdef DEBUG
+ std::cout << " We have " << gs.size() << " candidates" << std::endl;
+#endif
+
+ for(int i = 0; i < gs.size() ; i++ ){
+ com::sun::star::lang::Locale current_aRes;
+
+ current_aRes.Language = A2OU( gs[i].getLanguage().c_str() );
+ current_aRes.Country = A2OU( gs[i].getCountry().c_str() );
+
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+*/
+//*************************************************************************
+
+Locale SAL_CALL LangGuess_Impl::guessPrimaryLanguage(
+ const ::rtl::OUString& rText,
+ ::sal_Int32 nStartPos,
+ ::sal_Int32 nLen )
+ throw (lang::IllegalArgumentException, uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ lang::Locale aRes;
+ if (nStartPos >=0 && nLen >= 0 && nStartPos + nLen <= rText.getLength())
+ {
+ OString o( OUStringToOString( rText.copy(nStartPos, nLen), RTL_TEXTENCODING_UTF8 ) );
+ Guess g = m_aGuesser.GuessPrimaryLanguage((char*)o.getStr());
+ aRes.Language = OUString::createFromAscii(g.GetLanguage().c_str());
+ aRes.Country = OUString::createFromAscii(g.GetCountry().c_str());
+ }
+ else
+ throw lang::IllegalArgumentException();
+
+ return aRes;
+}
+
+//*************************************************************************
+#define DEFAULT_CONF_FILE_NAME "fpdb.conf"
+
+void LangGuess_Impl::SetFingerPrintsDB(
+ const rtl::OUString &filePath )
+ throw (RuntimeException)
+{
+ //! text encoding for file name / path needs to be in the same encoding the OS uses
+ OString path = OUStringToOString( filePath, osl_getThreadTextEncoding() );
+ OString conf_file_name( DEFAULT_CONF_FILE_NAME );
+ OString conf_file_path(path);
+ conf_file_path += conf_file_name;
+
+ //cout << "Conf file : " << conf_file_path.getStr() << " directory : " << path.getStr() << endl;
+
+ m_aGuesser.SetDBPath((const char*)conf_file_path.getStr(), (const char*)path.getStr());
+}
+
+//*************************************************************************
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getAvailableLanguages( )
+ throw (uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< com::sun::star::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetAllManagedLanguages();
+ aRes.realloc(gs.size());
+
+ com::sun::star::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ com::sun::star::lang::Locale current_aRes;
+ current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+//*************************************************************************
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getEnabledLanguages( )
+ throw (uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< com::sun::star::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetAvailableLanguages();
+ aRes.realloc(gs.size());
+
+ com::sun::star::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ com::sun::star::lang::Locale current_aRes;
+ current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+//*************************************************************************
+uno::Sequence< Locale > SAL_CALL LangGuess_Impl::getDisabledLanguages( )
+ throw (uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ Sequence< com::sun::star::lang::Locale > aRes;
+ vector<Guess> gs = m_aGuesser.GetUnavailableLanguages();
+ aRes.realloc(gs.size());
+
+ com::sun::star::lang::Locale *pRes = aRes.getArray();
+
+ for(size_t i = 0; i < gs.size() ; i++ ){
+ com::sun::star::lang::Locale current_aRes;
+ current_aRes.Language = A2OU( gs[i].GetLanguage().c_str() );
+ current_aRes.Country = A2OU( gs[i].GetCountry().c_str() );
+ pRes[i] = current_aRes;
+ }
+
+ return aRes;
+}
+
+//*************************************************************************
+void SAL_CALL LangGuess_Impl::disableLanguages(
+ const uno::Sequence< Locale >& rLanguages )
+ throw (lang::IllegalArgumentException, uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ sal_Int32 nLanguages = rLanguages.getLength();
+ const Locale *pLanguages = rLanguages.getConstArray();
+
+ for (sal_Int32 i = 0; i < nLanguages; ++i)
+ {
+ string language;
+
+ OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
+ OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
+
+ language += l.getStr();
+ language += "-";
+ language += c.getStr();
+ m_aGuesser.DisableLanguage(language);
+ }
+}
+
+//*************************************************************************
+void SAL_CALL LangGuess_Impl::enableLanguages(
+ const uno::Sequence< Locale >& rLanguages )
+ throw (lang::IllegalArgumentException, uno::RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+
+ EnsureInitialized();
+
+ sal_Int32 nLanguages = rLanguages.getLength();
+ const Locale *pLanguages = rLanguages.getConstArray();
+
+ for (sal_Int32 i = 0; i < nLanguages; ++i)
+ {
+ string language;
+
+ OString l = OUStringToOString( pLanguages[i].Language, RTL_TEXTENCODING_ASCII_US );
+ OString c = OUStringToOString( pLanguages[i].Country, RTL_TEXTENCODING_ASCII_US );
+
+ language += l.getStr();
+ language += "-";
+ language += c.getStr();
+ m_aGuesser.EnableLanguage(language);
+ }
+}
+
+//*************************************************************************
+OUString SAL_CALL LangGuess_Impl::getImplementationName( )
+ throw(RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+ return A2OU( IMPLNAME );
+}
+
+//*************************************************************************
+sal_Bool SAL_CALL LangGuess_Impl::supportsService( const OUString& ServiceName )
+ throw(RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+ Sequence< OUString > aSNL = getSupportedServiceNames();
+ const OUString * pArray = aSNL.getArray();
+ for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
+ if( pArray[i] == ServiceName )
+ return sal_True;
+ return sal_False;
+}
+
+//*************************************************************************
+Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames( )
+ throw(RuntimeException)
+{
+ osl::MutexGuard aGuard( GetLangGuessMutex() );
+ return getSupportedServiceNames_Static();
+}
+
+//*************************************************************************
+Sequence<OUString> SAL_CALL LangGuess_Impl::getSupportedServiceNames_Static( )
+{
+ OUString aName( A2OU( SERVICENAME ) );
+ return Sequence< OUString >( &aName, 1 );
+}
+
+//*************************************************************************
+
+
+/**
+ * Function to create a new component instance; is needed by factory helper implementation.
+ * @param xMgr service manager to if the components needs other component instances
+ */
+Reference< XInterface > SAL_CALL LangGuess_Impl_create(
+ Reference< XComponentContext > const & xContext )
+ SAL_THROW( () )
+{
+ return static_cast< ::cppu::OWeakObject * >( new LangGuess_Impl(xContext) );
+}
+
+//##################################################################################################
+//#### EXPORTED ### functions to allow for registration and creation of the UNO component
+//##################################################################################################
+
+static struct ::cppu::ImplementationEntry s_component_entries [] =
+{
+ {
+ LangGuess_Impl_create, getImplementationName_LangGuess_Impl,
+ getSupportedServiceNames_LangGuess_Impl,
+ ::cppu::createSingleComponentFactory,
+ 0, 0
+ },
+ { 0, 0, 0, 0, 0, 0 }
+};
+
+extern "C"
+{
+
+void SAL_CALL component_getImplementationEnvironment(
+ sal_Char const ** ppEnvTypeName, uno_Environment ** /*ppEnv*/ )
+{
+ *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
+}
+
+sal_Bool SAL_CALL component_writeInfo(
+ XMultiServiceFactory * xMgr, registry::XRegistryKey * xRegistry )
+{
+ return ::cppu::component_writeInfoHelper(
+ xMgr, xRegistry, s_component_entries );
+}
+
+void * SAL_CALL component_getFactory(
+ sal_Char const * implName, lang::XMultiServiceFactory * xMgr,
+ registry::XRegistryKey * xRegistry )
+{
+ return ::cppu::component_getFactoryHelper(
+ implName, xMgr, xRegistry, s_component_entries );
+}
+
+}
+