summaryrefslogtreecommitdiff
path: root/i18nlangtag/source/languagetag/languagetag.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18nlangtag/source/languagetag/languagetag.cxx')
-rw-r--r--i18nlangtag/source/languagetag/languagetag.cxx1254
1 files changed, 1254 insertions, 0 deletions
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
new file mode 100644
index 000000000000..e1eea3b75f04
--- /dev/null
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -0,0 +1,1254 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "i18nlangtag/languagetag.hxx"
+#include "i18nlangtag/mslangid.hxx"
+#include <rtl/ustrbuf.hxx>
+#include <rtl/bootstrap.hxx>
+#include <osl/file.hxx>
+#include <rtl/instance.hxx>
+#include <rtl/locale.h>
+
+//#define erDEBUG
+
+#if defined(ENABLE_LIBLANGTAG)
+#include <liblangtag/langtag.h>
+#else
+/* Replacement code for LGPL phobic and Android systems.
+ * For iOS we could probably use NSLocale instead, that should have more or
+ * less required functionality. If it is good enough, it could be used for Mac
+ * OS X, too.
+ */
+#include "simple-langtag.cxx"
+#endif
+
+using rtl::OUString;
+using rtl::OString;
+using rtl::OUStringBuffer;
+using namespace com::sun::star;
+
+// The actual pointer type of mpImplLangtag that is declared void* to not
+// pollute the entire code base with liblangtag.
+#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
+#define MPLANGTAG LANGTAGCAST(mpImplLangtag)
+
+/** Convention to signal presence of BCP 47 language tag in a Locale's Variant
+ field. The Locale's Language field then will contain this ISO 639-2
+ reserved for local use code. */
+#define ISO639_LANGUAGE_TAG "qlt"
+
+
+// Helper to ensure lt_error_t is free'd
+struct myLtError
+{
+ lt_error_t* p;
+ myLtError() : p(NULL) {}
+ ~myLtError() { if (p) lt_error_unref( p); }
+};
+
+
+// "statics" to be returned as const reference to an empty locale and string.
+namespace {
+struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
+struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
+}
+
+
+/** A reference holder for liblangtag data de/initialization, one static
+ instance. Currently implemented such that the first "ref" inits and dtor
+ (our library deinitialized) tears down.
+*/
+class LiblantagDataRef
+{
+public:
+ LiblantagDataRef();
+ ~LiblantagDataRef();
+ inline void incRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && !mnRef++)
+ setup();
+ }
+ inline void decRef()
+ {
+ if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
+ teardown();
+ }
+private:
+ rtl::OString maDataPath; // path to liblangtag data, "|" if system
+ sal_uInt32 mnRef;
+
+ void setupDataPath();
+ void setup();
+ void teardown();
+};
+
+namespace {
+struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
+}
+
+LiblantagDataRef::LiblantagDataRef()
+ :
+ mnRef(0)
+{
+}
+
+LiblantagDataRef::~LiblantagDataRef()
+{
+ // When destructed we're tearing down unconditionally.
+ if (mnRef)
+ mnRef = 1;
+ decRef();
+}
+
+void LiblantagDataRef::setup()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
+ if (maDataPath.isEmpty())
+ setupDataPath();
+ lt_db_initialize();
+ // Hold ref eternally.
+ mnRef = SAL_MAX_UINT32;
+}
+
+void LiblantagDataRef::teardown()
+{
+ SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
+ lt_db_finalize();
+}
+
+void LiblantagDataRef::setupDataPath()
+{
+ // maDataPath is assumed to be empty here.
+ OUString aURL("$BRAND_BASE_DIR/share/liblangtag");
+ rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
+
+ // Check if data is in our own installation, else assume system
+ // installation.
+ OUString aData( aURL);
+ aData += "/language-subtag-registry.xml";
+ osl::DirectoryItem aDirItem;
+ if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
+ {
+ OUString aPath;
+ if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
+ maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
+ }
+ if (maDataPath.isEmpty())
+ maDataPath = "|"; // assume system
+ else
+ lt_db_set_datadir( maDataPath.getStr());
+}
+
+LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+ :
+ maBcp47( rBcp47LanguageTag),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rBcp47LanguageTag.isEmpty()),
+ mbInitializedBcp47( !mbSystemLocale),
+ mbInitializedLocale( false),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
+ :
+ maLocale( rLocale),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( LanguageType nLanguage )
+ :
+ mpImplLangtag( NULL),
+ mnLangID( nLanguage),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( false),
+ mbInitializedLangID( !mbSystemLocale),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const OUString& rLanguage, const OUString& rCountry )
+ :
+ maLocale( rLanguage, rCountry, ""),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( rLanguage.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+}
+
+
+LanguageTag::LanguageTag( const rtl_Locale & rLocale )
+ :
+ maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
+ mpImplLangtag( NULL),
+ mnLangID( LANGUAGE_DONTKNOW),
+ meIsValid( DECISION_DONTKNOW),
+ meIsIsoLocale( DECISION_DONTKNOW),
+ meIsIsoODF( DECISION_DONTKNOW),
+ meIsLiblangtagNeeded( DECISION_DONTKNOW),
+ mbSystemLocale( maLocale.Language.isEmpty()),
+ mbInitializedBcp47( false),
+ mbInitializedLocale( !mbSystemLocale),
+ mbInitializedLangID( false),
+ mbCachedLanguage( false),
+ mbCachedScript( false),
+ mbCachedCountry( false),
+ mbIsFallback( false)
+{
+ convertFromRtlLocale();
+}
+
+
+LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
+ :
+ maLocale( rLanguageTag.maLocale),
+ maBcp47( rLanguageTag.maBcp47),
+ maCachedLanguage( rLanguageTag.maCachedLanguage),
+ maCachedScript( rLanguageTag.maCachedScript),
+ maCachedCountry( rLanguageTag.maCachedCountry),
+ mpImplLangtag( rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
+ mnLangID( rLanguageTag.mnLangID),
+ meIsValid( rLanguageTag.meIsValid),
+ meIsIsoLocale( rLanguageTag.meIsIsoLocale),
+ meIsIsoODF( rLanguageTag.meIsIsoODF),
+ meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
+ mbSystemLocale( rLanguageTag.mbSystemLocale),
+ mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
+ mbInitializedLocale( rLanguageTag.mbInitializedLocale),
+ mbInitializedLangID( rLanguageTag.mbInitializedLangID),
+ mbCachedLanguage( rLanguageTag.mbCachedLanguage),
+ mbCachedScript( rLanguageTag.mbCachedScript),
+ mbCachedCountry( rLanguageTag.mbCachedCountry),
+ mbIsFallback( rLanguageTag.mbIsFallback)
+{
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+}
+
+
+LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
+{
+ maLocale = rLanguageTag.maLocale;
+ maBcp47 = rLanguageTag.maBcp47;
+ maCachedLanguage = rLanguageTag.maCachedLanguage;
+ maCachedScript = rLanguageTag.maCachedScript;
+ maCachedCountry = rLanguageTag.maCachedCountry;
+ mpImplLangtag = rLanguageTag.mpImplLangtag;
+ mpImplLangtag = rLanguageTag.mpImplLangtag ?
+ lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
+ mnLangID = rLanguageTag.mnLangID;
+ meIsValid = rLanguageTag.meIsValid;
+ meIsIsoLocale = rLanguageTag.meIsIsoLocale;
+ meIsIsoODF = rLanguageTag.meIsIsoODF;
+ meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
+ mbSystemLocale = rLanguageTag.mbSystemLocale;
+ mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
+ mbInitializedLocale = rLanguageTag.mbInitializedLocale;
+ mbInitializedLangID = rLanguageTag.mbInitializedLangID;
+ mbCachedLanguage = rLanguageTag.mbCachedLanguage;
+ mbCachedScript = rLanguageTag.mbCachedScript;
+ mbCachedCountry = rLanguageTag.mbCachedCountry;
+ mbIsFallback = rLanguageTag.mbIsFallback;
+ if (mpImplLangtag)
+ theDataRef::get().incRef();
+ return *this;
+}
+
+
+LanguageTag::~LanguageTag()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ theDataRef::get().decRef();
+ }
+}
+
+
+void LanguageTag::resetVars()
+{
+ if (mpImplLangtag)
+ {
+ lt_tag_unref( MPLANGTAG);
+ mpImplLangtag = NULL;
+ theDataRef::get().decRef();
+ }
+
+ maLocale = lang::Locale();
+ if (!maBcp47.isEmpty())
+ maBcp47 = OUString();
+ if (!maCachedLanguage.isEmpty())
+ maCachedLanguage= OUString();
+ if (!maCachedScript.isEmpty())
+ maCachedScript = OUString();
+ if (!maCachedCountry.isEmpty())
+ maCachedCountry = OUString();
+ mnLangID = LANGUAGE_DONTKNOW;
+ meIsValid = DECISION_DONTKNOW;
+ meIsIsoLocale = DECISION_DONTKNOW;
+ meIsIsoODF = DECISION_DONTKNOW;
+ meIsLiblangtagNeeded= DECISION_DONTKNOW;
+ mbSystemLocale = true;
+ mbInitializedBcp47 = false;
+ mbInitializedLocale = false;
+ mbInitializedLangID = false;
+ mbCachedLanguage = false;
+ mbCachedScript = false;
+ mbCachedCountry = false;
+ mbIsFallback = false;
+}
+
+
+void LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
+{
+ resetVars();
+ maBcp47 = rBcp47LanguageTag;
+ mbSystemLocale = rBcp47LanguageTag.isEmpty();
+ mbInitializedBcp47 = !mbSystemLocale;
+
+ if (bCanonicalize)
+ canonicalize();
+}
+
+
+void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
+{
+ resetVars();
+ maLocale = rLocale;
+ mbSystemLocale = rLocale.Language.isEmpty();
+ mbInitializedLocale = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( LanguageType nLanguage )
+{
+ resetVars();
+ mnLangID = nLanguage;
+ mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
+ mbInitializedLangID = !mbSystemLocale;
+}
+
+
+void LanguageTag::reset( const rtl_Locale & rLocale )
+{
+ reset( lang::Locale( rLocale.Language, rLocale.Country, rLocale.Variant));
+ convertFromRtlLocale();
+}
+
+
+bool LanguageTag::canonicalize()
+{
+#ifdef erDEBUG
+ // dump once
+ struct dumper
+ {
+ void** mpp;
+ dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
+ ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
+ };
+ dumper aDumper( &mpImplLangtag);
+#endif
+
+ // Side effect: have maBcp47 in any case, resolved system.
+ // Some methods calling canonicalize() (or not calling it due to
+ // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
+ // meIsLiblangtagNeeded anywhere else than hereafter.
+ getBcp47( true );
+
+ // The simple cases and known locales don't need liblangtag processing,
+ // which also avoids loading liblangtag data on startup.
+ if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
+ {
+ bool bTemporaryLocale = false;
+ bool bTemporaryLangID = false;
+ if (!mbInitializedLocale && !mbInitializedLangID)
+ {
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ else
+ {
+ // Now this is getting funny.. we only have some BCP47 string
+ // and want to determine if parsing it would be possible
+ // without using liblangtag just to see if it is a simple known
+ // locale.
+ OUString aLanguage, aScript, aCountry;
+ Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
+ if (eExt != EXTRACTED_NONE)
+ {
+ if (eExt == EXTRACTED_LSC && aScript.isEmpty())
+ {
+ maLocale.Language = aLanguage;
+ maLocale.Country = aCountry;
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = aCountry;
+ maLocale.Variant = maBcp47;
+ }
+ bTemporaryLocale = mbInitializedLocale = true;
+ }
+ }
+ }
+ if (mbInitializedLangID && !mbInitializedLocale)
+ {
+ // Do not call getLocale() here because that prefers
+ // convertBcp47ToLocale() which would end up in recursion via
+ // isIsoLocale()!
+
+ // Prepare to verify that we have a known locale, not just an
+ // arbitrary MS-LangID.
+ convertLangToLocale();
+ }
+ if (mbInitializedLocale)
+ {
+ if (maLocale.Variant.isEmpty())
+ meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
+ else
+ {
+ if (!mbInitializedLangID)
+ {
+ convertLocaleToLang();
+ if (bTemporaryLocale)
+ bTemporaryLangID = true;
+ }
+ if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
+ meIsLiblangtagNeeded = DECISION_NO; // known locale
+ }
+ }
+ if (bTemporaryLocale)
+ {
+ mbInitializedLocale = false;
+ maLocale = lang::Locale();
+ }
+ if (bTemporaryLangID)
+ {
+ mbInitializedLangID = false;
+ mnLangID = LANGUAGE_DONTKNOW;
+ }
+ }
+ if (meIsLiblangtagNeeded == DECISION_NO)
+ {
+ meIsValid = DECISION_YES; // really, known must be valid ...
+ return true; // that's it
+ }
+ meIsLiblangtagNeeded = DECISION_YES;
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
+
+ if (!mpImplLangtag)
+ {
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_new();
+ }
+
+ myLtError aError;
+
+ if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
+ {
+ char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
+ SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47);
+ if (pTag)
+ {
+ OUString aOld( maBcp47);
+ maBcp47 = OUString::createFromAscii( pTag);
+ // Make the lt_tag_t follow the new string if different, which
+ // removes default script and such.
+ if (maBcp47 != aOld)
+ {
+ if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag::canonicalize: could not reparse " << maBcp47);
+ free( pTag);
+ meIsValid = DECISION_NO;
+ return false;
+ }
+ }
+ free( pTag);
+ meIsValid = DECISION_YES;
+ return true;
+ }
+ }
+ else
+ {
+ SAL_INFO( "i18nlangtag", "LanguageTag::canonicalize: could not parse " << maBcp47);
+ }
+ meIsValid = DECISION_NO;
+ return false;
+}
+
+
+void LanguageTag::convertLocaleToBcp47()
+{
+ if (mbSystemLocale && !mbInitializedLocale)
+ convertLangToLocale();
+
+ if (maLocale.Language == ISO639_LANGUAGE_TAG)
+ {
+ maBcp47 = maLocale.Variant;
+ meIsIsoLocale = DECISION_NO;
+ }
+ else
+ {
+ /* XXX NOTE: most legacy code never evaluated the Variant field, so for
+ * now just concatenate language and country. In case we stumbled over
+ * variant aware code we'd have to take care of that. */
+ if (maLocale.Country.isEmpty())
+ maBcp47 = maLocale.Language;
+ else
+ {
+ OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
+ aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
+ maBcp47 = aBuf.makeStringAndClear();
+ }
+ }
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertLocaleToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to
+ * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
+ mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertBcp47ToLocale()
+{
+ bool bIso = isIsoLocale();
+ if (bIso)
+ {
+ maLocale.Language = getLanguageFromLangtag();
+ maLocale.Country = getRegionFromLangtag();
+ maLocale.Variant = OUString();
+ }
+ else
+ {
+ maLocale.Language = ISO639_LANGUAGE_TAG;
+ maLocale.Country = getCountry();
+ maLocale.Variant = maBcp47;
+ }
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertBcp47ToLang()
+{
+ if (mbSystemLocale)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ }
+ else
+ {
+ /* FIXME: this is temporary. If we support locales that consist not
+ * only of language and country, e.g. added script, this probably needs
+ * to be adapted. */
+ if (!mbInitializedLocale)
+ convertBcp47ToLocale();
+ convertLocaleToLang();
+ }
+ mbInitializedLangID = true;
+}
+
+
+void LanguageTag::convertLangToLocale()
+{
+ if (mbSystemLocale && !mbInitializedLangID)
+ {
+ mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
+ mbInitializedLangID = true;
+ }
+ /* FIXME: this is temporary until code base is converted to not use
+ * MsLangId::convert...() anymore. After that, proper new method has to be
+ * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
+ // Resolve system here!
+ maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
+ mbInitializedLocale = true;
+}
+
+
+void LanguageTag::convertLangToBcp47()
+{
+ /* FIXME: this is temporary. If we support locales that consist not only of
+ * language and country, e.g. added script, this probably needs to be
+ * adapted. */
+ if (!mbInitializedLocale)
+ convertLangToLocale();
+ convertLocaleToBcp47();
+ mbInitializedBcp47 = true;
+}
+
+
+void LanguageTag::convertFromRtlLocale()
+{
+ // The rtl_Locale follows the Open Group Base Specification,
+ // 8.2 Internationalization Variables
+ // language[_territory][.codeset][@modifier]
+ // On GNU/Linux systems usually being glibc locales.
+ // sal/osl/unx/nlsupport.c _parse_locale() parses them into
+ // Language: language 2 or 3 alpha code
+ // Country: [territory] 2 alpha code
+ // Variant: [.codeset][@modifier]
+ // Variant effectively contains anything that follows the territory, not
+ // looking for '.' dot delimiter or '@' modifier content.
+ if (!maLocale.Variant.isEmpty())
+ {
+ OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
+ RTL_TEXTENCODING_UTF8);
+ /* FIXME: let liblangtag parse this entirely with
+ * lt_tag_convert_from_locale() but that needs a patch to pass the
+ * string. */
+#if 0
+ myLtError aError;
+ theDataRef::get().incRef();
+ mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
+ maBcp47 = OStringToOUString( lt_tag_get_string( MPLANGTAG), RTL_TEXTENCODING_UTF8);
+ mbInitializedBcp47 = true;
+#else
+ mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
+ if (mnLangID == LANGUAGE_DONTKNOW)
+ {
+ SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
+ mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
+ }
+ mbInitializedLangID = true;
+#endif
+ maLocale = lang::Locale();
+ mbInitializedLocale = false;
+ }
+}
+
+
+const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyBcp47::get();
+ if (!mbInitializedBcp47)
+ {
+ if (mbInitializedLocale)
+ const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToBcp47();
+ }
+ return maBcp47;
+}
+
+
+OUString LanguageTag::getLanguageFromLangtag()
+{
+ OUString aLanguage;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aLanguage;
+ if (mpImplLangtag)
+ {
+ const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
+ SAL_WARN_IF( !pLangT, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
+ if (!pLangT)
+ return aLanguage;
+ const char* pLang = lt_lang_get_tag( pLangT);
+ SAL_WARN_IF( !pLang, "i18nlangtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
+ if (pLang)
+ aLanguage = OUString::createFromAscii( pLang);
+ }
+ else
+ {
+ if (mbCachedLanguage || cacheSimpleLSC())
+ aLanguage = maCachedLanguage;
+ }
+ return aLanguage;
+}
+
+
+OUString LanguageTag::getScriptFromLangtag()
+{
+ OUString aScript;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aScript;
+ if (mpImplLangtag)
+ {
+ const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
+ // pScriptT==NULL is valid for default scripts
+ if (!pScriptT)
+ return aScript;
+ const char* pScript = lt_script_get_tag( pScriptT);
+ SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
+ if (pScript)
+ aScript = OUString::createFromAscii( pScript);
+ }
+ else
+ {
+ if (mbCachedScript || cacheSimpleLSC())
+ aScript = maCachedScript;
+ }
+ return aScript;
+}
+
+
+OUString LanguageTag::getRegionFromLangtag()
+{
+ OUString aRegion;
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ canonicalize();
+ if (maBcp47.isEmpty())
+ return aRegion;
+ if (mpImplLangtag)
+ {
+ const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
+ // pRegionT==NULL is valid for language only tags, rough check here
+ // that does not take sophisticated tags into account that actually
+ // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
+ // that ll-CC and lll-CC actually fail.
+ SAL_WARN_IF( !pRegionT &&
+ maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
+ maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
+ "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
+ if (!pRegionT)
+ return aRegion;
+ const char* pRegion = lt_region_get_tag( pRegionT);
+ SAL_WARN_IF( !pRegion, "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
+ if (pRegion)
+ aRegion = OUString::createFromAscii( pRegion);
+ }
+ else
+ {
+ if (mbCachedCountry || cacheSimpleLSC())
+ aRegion = maCachedCountry;
+ }
+ return aRegion;
+}
+
+
+const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return theEmptyLocale::get();
+ if (!mbInitializedLocale)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
+ else
+ const_cast<LanguageTag*>(this)->convertLangToLocale();
+ }
+ return maLocale;
+}
+
+
+LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
+{
+ if (!bResolveSystem && mbSystemLocale)
+ return LANGUAGE_SYSTEM;
+ if (!mbInitializedLangID)
+ {
+ if (mbInitializedBcp47)
+ const_cast<LanguageTag*>(this)->convertBcp47ToLang();
+ else
+ const_cast<LanguageTag*>(this)->convertLocaleToLang();
+ }
+ return mnLangID;
+}
+
+
+void LanguageTag::getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const
+{
+ if (!isIsoLocale())
+ {
+ rLanguage = OUString();
+ rCountry = OUString();
+ return;
+ }
+ // After isIsoLocale() it's safe to call getLanguage() for ISO code.
+ rLanguage = getLanguage();
+ rCountry = getCountry();
+}
+
+
+namespace
+{
+
+bool isLowerAscii( sal_Unicode c )
+{
+ return 'a' <= c && c <= 'z';
+}
+
+bool isUpperAscii( sal_Unicode c )
+{
+ return 'A' <= c && c <= 'Z';
+}
+
+}
+
+
+// static
+bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ bool b2chars;
+ if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
+ isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
+ (b2chars || isLowerAscii( rLanguage[2])))
+ return true;
+ SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
+ (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
+ (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
+ "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoCountry( const OUString& rRegion )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rRegion.isEmpty() ||
+ (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
+ return true;
+ SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
+ "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
+ return false;
+}
+
+
+// static
+bool LanguageTag::isIsoScript( const OUString& rScript )
+{
+ /* TODO: ignore case? For now let's see where rubbish is used. */
+ if (rScript.isEmpty() ||
+ (rScript.getLength() == 4 &&
+ isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
+ isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
+ return true;
+ SAL_WARN_IF( rScript.getLength() == 4 &&
+ (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
+ isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
+ "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
+ return false;
+}
+
+
+OUString LanguageTag::getLanguage() const
+{
+ if (!mbCachedLanguage)
+ {
+ maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
+ mbCachedLanguage = true;
+ }
+ return maCachedLanguage;
+}
+
+
+OUString LanguageTag::getScript() const
+{
+ if (!mbCachedScript)
+ {
+ maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
+ mbCachedScript = true;
+ }
+ return maCachedScript;
+}
+
+
+OUString LanguageTag::getLanguageAndScript() const
+{
+ OUString aLanguageScript( getLanguage());
+ OUString aScript( getScript());
+ if (!aScript.isEmpty())
+ {
+ OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
+ aBuf.append( aLanguageScript).append( '-').append( aScript);
+ aLanguageScript = aBuf.makeStringAndClear();
+ }
+ return aLanguageScript;
+}
+
+
+OUString LanguageTag::getCountry() const
+{
+ if (!mbCachedCountry)
+ {
+ maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+ if (!isIsoCountry( maCachedCountry))
+ maCachedCountry = OUString();
+ mbCachedCountry = true;
+ }
+ return maCachedCountry;
+}
+
+
+OUString LanguageTag::getRegion() const
+{
+ return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
+}
+
+
+OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
+{
+ OUString aRet;
+ if (isIsoLocale())
+ {
+ OUString aCountry( getCountry());
+ if (aCountry.isEmpty())
+ aRet = getLanguage() + rEncoding;
+ else
+ aRet = getLanguage() + "_" + aCountry + rEncoding;
+ }
+ else
+ {
+ /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
+ * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
+ * So far no code was prepared for anything else than a simple
+ * language_country locale so we don't loose anything here right now.
+ * */
+ }
+ return aRet;
+}
+
+
+bool LanguageTag::hasScript() const
+{
+ if (!mbCachedScript)
+ getScript();
+ return !maCachedScript.isEmpty();
+}
+
+
+bool LanguageTag::cacheSimpleLSC()
+{
+ OUString aLanguage, aScript, aCountry;
+ bool bRet = (simpleExtract( maBcp47, aLanguage, aScript, aCountry) == EXTRACTED_LSC);
+ if (bRet)
+ {
+ maCachedLanguage = aLanguage;
+ maCachedScript = aScript;
+ maCachedCountry = aCountry;
+ mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
+ }
+ return bRet;
+}
+
+
+bool LanguageTag::isIsoLocale() const
+{
+ if (meIsIsoLocale == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ // It must be at most ll-CC or lll-CC
+ // Do not use getCountry() here, use getRegion() instead.
+ meIsIsoLocale = ((maBcp47.isEmpty() ||
+ (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoLocale == DECISION_YES;
+}
+
+
+bool LanguageTag::isIsoODF() const
+{
+ if (meIsIsoODF == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ if (!isIsoScript( getScript()))
+ return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
+ // The usual case is lll-CC so simply check that first.
+ if (isIsoLocale())
+ return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
+ // If this is not ISO locale for which script must not exist it can
+ // still be ISO locale plus ISO script lll-Ssss-CC
+ meIsIsoODF = ((maBcp47.getLength() <= 11 &&
+ isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
+ DECISION_YES : DECISION_NO);
+ }
+ return meIsIsoODF == DECISION_YES;
+}
+
+
+bool LanguageTag::isValidBcp47() const
+{
+ if (meIsValid == DECISION_DONTKNOW)
+ {
+ if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
+ const_cast<LanguageTag*>(this)->canonicalize();
+ SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
+ "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
+ }
+ return meIsValid == DECISION_YES;
+}
+
+
+bool LanguageTag::isSystemLocale() const
+{
+ return mbSystemLocale;
+}
+
+
+LanguageTag & LanguageTag::makeFallback()
+{
+ if (!mbIsFallback)
+ {
+ if (mbInitializedLangID)
+ {
+ LanguageType nLang1 = getLanguageType();
+ LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1);
+ if (nLang1 != nLang2)
+ reset( nLang2);
+ }
+ else
+ {
+ const lang::Locale& rLocale1 = getLocale();
+ lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
+ if ( rLocale1.Language != aLocale2.Language ||
+ rLocale1.Country != aLocale2.Country ||
+ rLocale1.Variant != aLocale2.Variant)
+ reset( aLocale2);
+ }
+ mbIsFallback = true;
+ }
+ return *this;
+}
+
+
+::std::vector< OUString > LanguageTag::getFallbackStrings() const
+{
+ ::std::vector< OUString > aVec;
+ OUString aLanguage( getLanguage());
+ OUString aCountry( getCountry());
+ if (isIsoLocale())
+ {
+ if (!aCountry.isEmpty())
+ aVec.push_back( aLanguage + "-" + aCountry);
+ aVec.push_back( aLanguage);
+ return aVec;
+ }
+ aVec.push_back( getBcp47());
+ OUString aTmp;
+ if (hasScript())
+ {
+ OUString aScript( getScript());
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aScript + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage + "-" + aScript;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ if (!aCountry.isEmpty())
+ {
+ aTmp = aLanguage + "-" + aCountry;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ }
+ aTmp = aLanguage;
+ if (aTmp != aVec[0])
+ aVec.push_back( aTmp);
+ return aVec;
+}
+
+
+bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
+{
+ // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
+ // can use the operator==() optimization.
+ if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
+ return operator==( rLanguageTag);
+
+ // Compare full language tag strings.
+ return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
+}
+
+
+bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
+{
+ if (isSystemLocale() && rLanguageTag.isSystemLocale())
+ return true; // both SYSTEM
+
+ // No need to convert to BCP47 if both Lang-IDs are available.
+ if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
+ {
+ // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
+ return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
+ }
+
+ // Compare full language tag strings but SYSTEM unresolved.
+ return getBcp47( false) == rLanguageTag.getBcp47( false);
+}
+
+
+bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
+{
+ return !operator==( rLanguageTag);
+}
+
+
+// static
+LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47,
+ OUString& rLanguage, OUString& rScript, OUString& rCountry )
+{
+ Extraction eRet = EXTRACTED_NONE;
+ const sal_Int32 nLen = rBcp47.getLength();
+ const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
+ if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
+ {
+ // It's f*d up but we need to recognize this.
+ eRet = EXTRACTED_X_JOKER;
+ }
+ else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
+ {
+ // x-... privateuse tags MUST be known to us by definition.
+ eRet = EXTRACTED_X;
+ }
+ else if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll
+ {
+ rLanguage = rBcp47;
+ rScript = rCountry = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nLen == 5 && nHyph1 == 2) // ll-CC
+ || (nLen == 6 && nHyph1 == 3)) // lll-CC
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rCountry = rBcp47.copy( nHyph1 + 1, 2);
+ rScript = OUString();
+ eRet = EXTRACTED_LSC;
+ }
+ else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check
+ || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check
+ {
+ const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
+ if (nHyph2 == nHyph1 + 5)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1);
+ rScript = rBcp47.copy( nHyph1 + 1, 4);
+ rCountry = rBcp47.copy( nHyph2 + 1, 2);
+ eRet = EXTRACTED_LSC;
+ }
+ }
+ if (eRet == EXTRACTED_NONE)
+ rLanguage = rScript = rCountry = OUString();
+ return eRet;
+}
+
+
+// static
+::std::vector< OUString >::const_iterator LanguageTag::getFallback(
+ const ::std::vector< OUString > & rList, const OUString & rReference )
+{
+ if (rList.empty())
+ return rList.end();
+
+ ::std::vector< OUString >::const_iterator it;
+
+ // Try the simple case first without constructing fallbacks.
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == rReference)
+ return it; // exact match
+ }
+
+ ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings());
+ aFallbacks.erase( aFallbacks.begin()); // first is full BCP47, we already checked that
+ if (rReference != "en-US")
+ {
+ aFallbacks.push_back( "en-US");
+ if (rReference != "en")
+ aFallbacks.push_back( "en");
+ }
+ if (rReference != "x-default")
+ aFallbacks.push_back( "x-default");
+ if (rReference != "x-no-translate")
+ aFallbacks.push_back( "x-no-translate");
+ /* TODO: the original comphelper::Locale::getFallback() code had
+ * "x-notranslate" instead of "x-no-translate", but all .xcu files use
+ * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
+ * Did that ever work? Was it supposed to work at all like this? */
+
+ for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
+ {
+ for (it = rList.begin(); it != rList.end(); ++it)
+ {
+ if (*it == *fb)
+ return it; // fallback found
+ }
+ }
+
+ // Did not find anything so return something of the list, the first value
+ // will do as well as any other as none did match any of the possible
+ // fallbacks.
+ return rList.begin();
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */