diff options
Diffstat (limited to 'i18nlangtag/source/languagetag/languagetag.cxx')
-rw-r--r-- | i18nlangtag/source/languagetag/languagetag.cxx | 283 |
1 files changed, 197 insertions, 86 deletions
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 6f6a766e861f..26a3d5db2c79 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -17,12 +17,12 @@ #include <rtl/bootstrap.hxx> #include <sal/log.hxx> #include <osl/file.hxx> -#include <osl/mutex.hxx> -#include <rtl/instance.hxx> #include <rtl/locale.h> -#include <tools/long.hxx> +#include <o3tl/string_view.hxx> #include <algorithm> +#include <atomic> #include <map> +#include <mutex> #include <string_view> #include <unordered_set> @@ -37,6 +37,10 @@ #include <osl/detail/android-bootstrap.h> #endif +#ifdef EMSCRIPTEN +#include <osl/detail/emscripten-bootstrap.h> +#endif + using namespace com::sun::star; namespace { @@ -49,24 +53,22 @@ struct myLtError ~myLtError() { if (p) lt_error_unref( p); } }; -// "static" to be returned as const reference to an empty locale. -struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {}; } -typedef std::unordered_set< OUString > KnownTagSet; namespace { -struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {}; -struct theMutex : public rtl::Static< osl::Mutex, theMutex > {}; +std::recursive_mutex& theMutex() +{ + static std::recursive_mutex SINGLETON; + return SINGLETON; +} } +typedef std::unordered_set< OUString > KnownTagSet; static const KnownTagSet & getKnowns() { - KnownTagSet & rKnowns = theKnowns::get(); - if (rKnowns.empty()) - { - osl::MutexGuard aGuard( theMutex::get()); - if (rKnowns.empty()) + static KnownTagSet theKnowns = []() { + KnownTagSet tmpSet; ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags()); for (auto const& elemDefined : aDefined) { @@ -76,36 +78,47 @@ static const KnownTagSet & getKnowns() ::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true)); for (auto const& fallback : aFallbacks) { - rKnowns.insert(fallback); + tmpSet.insert(fallback); } } - } - } - return rKnowns; + return tmpSet; + }(); + return theKnowns; } namespace { struct compareIgnoreAsciiCaseLess { - bool operator()( const OUString& r1, std::u16string_view r2 ) const + bool operator()( std::u16string_view r1, std::u16string_view r2 ) const { - return r1.compareToIgnoreAsciiCase( r2) < 0; + return o3tl::compareToIgnoreAsciiCase(r1, r2) < 0; } }; typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47; typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID; -struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {}; -struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {}; -struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {}; -struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {}; +MapBcp47& theMapBcp47() +{ + static MapBcp47 SINGLETON; + return SINGLETON; +} +MapLangID& theMapLangID() +{ + static MapLangID SINGLETON; + return SINGLETON; +} +LanguageTag::ImplPtr& theSystemLocale() +{ + static LanguageTag::ImplPtr SINGLETON; + return SINGLETON; +} } static LanguageType getNextOnTheFlyLanguage() { static LanguageType nOnTheFlyLanguage(0); - osl::MutexGuard aGuard( theMutex::get()); + std::unique_lock aGuard( theMutex()); if (!nOnTheFlyLanguage) nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START); else @@ -171,7 +184,11 @@ private: static void teardown(); }; -struct theDataRef : public rtl::Static< LiblangtagDataRef, theDataRef > {}; +LiblangtagDataRef& theDataRef() +{ + static LiblangtagDataRef SINGLETON; + return SINGLETON; +} } LiblangtagDataRef::LiblangtagDataRef() @@ -203,7 +220,7 @@ void LiblangtagDataRef::teardown() void LiblangtagDataRef::setupDataPath() { -#if defined(ANDROID) +#if defined(ANDROID) || defined(EMSCRIPTEN) maDataPath = OString(lo_get_app_data_dir()) + "/share/liblangtag"; #else // maDataPath is assumed to be empty here. @@ -222,7 +239,7 @@ void LiblangtagDataRef::setupDataPath() } #endif if (maDataPath.isEmpty()) - maDataPath = "|"; // assume system + maDataPath = "|"_ostr; // assume system else lt_db_set_datadir( maDataPath.getStr()); } @@ -344,6 +361,7 @@ private: EXTRACTED_NONE, EXTRACTED_LSC, EXTRACTED_LV, + EXTRACTED_LR, EXTRACTED_C_LOCALE, EXTRACTED_X, EXTRACTED_X_JOKER, @@ -358,6 +376,7 @@ private: @return EXTRACTED_LSC if simple tag was detected (i.e. one that would fulfill the isIsoODF() condition), EXTRACTED_LV if a tag with variant was detected, + EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected EXTRACTED_C_LOCALE if a 'C' locale was detected, EXTRACTED_X if x-... privateuse tag was detected, EXTRACTED_X_JOKER if "*" joker was detected, @@ -368,6 +387,7 @@ private: OUString& rLanguage, OUString& rScript, OUString& rCountry, + OUString& rRegion, OUString& rVariants ); /** Convert Locale to BCP 47 string without resolving system and creating @@ -429,7 +449,7 @@ LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl ) mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString) { if (mpImplLangtag) - theDataRef::get().init(); + theDataRef().init(); } @@ -465,7 +485,7 @@ LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTa mbCachedVariants = rLanguageTagImpl.mbCachedVariants; mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString; if (mpImplLangtag && !oldTag) - theDataRef::get().init(); + theDataRef().init(); return *this; } @@ -596,9 +616,9 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID return pImpl; } - osl::MutexGuard aGuard( theMutex::get()); + std::unique_lock aGuard( theMutex()); - MapBcp47& rMapBcp47 = theMapBcp47::get(); + MapBcp47& rMapBcp47 = theMapBcp47(); MapBcp47::const_iterator it( rMapBcp47.find( maBcp47)); bool bOtherImpl = false; if (it != rMapBcp47.end()) @@ -634,7 +654,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID // different, otherwise we would end up with ambiguous assignments // of different language tags, for example for the same primary // LangID with "no", "nb" and "nn". - const MapLangID& rMapLangID = theMapLangID::get(); + const MapLangID& rMapLangID = theMapLangID(); MapLangID::const_iterator itID( rMapLangID.find( nRegisterID)); if (itID != rMapLangID.end()) { @@ -667,7 +687,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID } ::std::pair< MapLangID::const_iterator, bool > res( - theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl))); + theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl))); if (res.second) { SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x" @@ -686,7 +706,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID LanguageTag::ScriptType LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID ) { - const MapLangID& rMapLangID = theMapLangID::get(); + const MapLangID& rMapLangID = theMapLangID(); MapLangID::const_iterator itID( rMapLangID.find( nRegisterID)); if (itID != rMapLangID.end()) return (*itID).second->getScriptType(); @@ -709,7 +729,7 @@ void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang ) MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang); // Reset system locale to none and let registerImpl() do the rest to // initialize a new one. - theSystemLocale::get().reset(); + theSystemLocale().reset(); LanguageTag aLanguageTag( LANGUAGE_SYSTEM); aLanguageTag.registerImpl(); } @@ -740,7 +760,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const ImplPtr pImpl; #if OSL_DEBUG_LEVEL > 0 - static size_t nCalls = 0; + static std::atomic_int nCalls = 0; ++nCalls; SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls"); #endif @@ -749,7 +769,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // and take the system locale shortcut if possible. if (mbSystemLocale) { - pImpl = theSystemLocale::get(); + pImpl = theSystemLocale(); if (pImpl) { #if OSL_DEBUG_LEVEL > 0 @@ -771,13 +791,13 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const { if (mnLangID == LANGUAGE_DONTKNOW) { + static LanguageTag::ImplPtr theDontKnow; // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the // conversion attempts. At the same time provide a central breakpoint // to inspect such places. - LanguageTag::ImplPtr& rDontKnow = theDontKnow::get(); - if (!rDontKnow) - rDontKnow = std::make_shared<LanguageTagImpl>( *this); - pImpl = rDontKnow; + if (!theDontKnow) + theDontKnow = std::make_shared<LanguageTagImpl>( *this); + pImpl = theDontKnow; #if OSL_DEBUG_LEVEL > 0 static size_t nCallsDontKnow = 0; ++nCallsDontKnow; @@ -788,7 +808,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const else { // A great share are calls for a system equal locale. - pImpl = theSystemLocale::get(); + pImpl = theSystemLocale(); if (pImpl && pImpl->mnLangID == mnLangID) { #if OSL_DEBUG_LEVEL > 0 @@ -819,7 +839,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const if (mbInitializedBcp47) { // A great share are calls for a system equal locale. - pImpl = theSystemLocale::get(); + pImpl = theSystemLocale(); if (pImpl && pImpl->maBcp47 == maBcp47) { #if OSL_DEBUG_LEVEL > 0 @@ -837,10 +857,10 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls"); #endif - osl::MutexGuard aGuard( theMutex::get()); + std::unique_lock aGuard( theMutex()); #if OSL_DEBUG_LEVEL > 0 - static tools::Long nRunning = 0; + static long nRunning = 0; // Entering twice here is ok, which is needed for fallback init in // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below, // everything else is suspicious. @@ -852,7 +872,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // Prefer LangID map as find+insert needs less comparison work. if (mbInitializedLangID) { - MapLangID& rMap = theMapLangID::get(); + MapLangID& rMap = theMapLangID(); MapLangID::const_iterator it( rMap.find( mnLangID)); if (it != rMap.end()) { @@ -874,7 +894,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const if (!pImpl->mbInitializedBcp47) pImpl->convertLocaleToBcp47(); ::std::pair< MapBcp47::const_iterator, bool > res( - theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl))); + theMapBcp47().insert( ::std::make_pair( pImpl->maBcp47, pImpl))); if (res.second) { SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID); @@ -895,7 +915,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const } else if (!maBcp47.isEmpty()) { - MapBcp47& rMap = theMapBcp47::get(); + MapBcp47& rMap = theMapBcp47(); MapBcp47::const_iterator it( rMap.find( maBcp47)); if (it != rMap.end()) { @@ -949,7 +969,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // May have involved canonicalize(), so compare with // pImpl->maBcp47 instead of maBcp47! aBcp47 = LanguageTagImpl::convertToBcp47( - MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID )); + MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true)); bInsert = (aBcp47 == pImpl->maBcp47); } } @@ -957,7 +977,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const if (bInsert) { ::std::pair< MapLangID::const_iterator, bool > res( - theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl))); + theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl))); if (res.second) { SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x" @@ -989,7 +1009,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // above, so add it. if (mbSystemLocale && mbInitializedLangID) { - theSystemLocale::get() = pImpl; + theSystemLocale() = pImpl; SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x" << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'"); } @@ -1104,20 +1124,22 @@ bool LanguageTagImpl::canonicalize() // and want to determine if parsing it would be possible // without using liblangtag just to see if it is a simple known // locale or could fall back to one. - OUString aLanguage, aScript, aCountry, aVariants; - Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants); + OUString aLanguage, aScript, aCountry, aRegion, aVariants; + Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants); if (eExt != EXTRACTED_NONE) { - if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV) + if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR) { // Rebuild bcp47 with proper casing of tags. OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() + - 1 + aCountry.getLength() + 1 + aVariants.getLength()); + 1 + aCountry.getLength() + 1 + aRegion.getLength() + 1 + aVariants.getLength()); aBuf.append( aLanguage); if (!aScript.isEmpty()) aBuf.append("-" + aScript); if (!aCountry.isEmpty()) aBuf.append("-" + aCountry); + if (!aRegion.isEmpty()) + aBuf.append("-" + aRegion); if (!aVariants.isEmpty()) aBuf.append("-" + aVariants); OUString aStr( aBuf.makeStringAndClear()); @@ -1211,7 +1233,7 @@ bool LanguageTagImpl::canonicalize() if (!mpImplLangtag) { - theDataRef::get().init(); + theDataRef().init(); mpImplLangtag = lt_tag_new(); } @@ -1340,7 +1362,7 @@ void LanguageTagImpl::convertLocaleToBcp47() // locale via LanguageTag::convertToBcp47(LanguageType) and // LanguageTag::convertToLocale(LanguageType) would instantiate another // LanguageTag. - maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false); } if (maLocale.Language.isEmpty()) { @@ -1484,7 +1506,7 @@ void LanguageTagImpl::convertLangToLocale() mbInitializedLangID = true; } // Resolve system here! The original is remembered as mbSystemLocale. - maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false); mbInitializedLocale = true; } @@ -1520,7 +1542,7 @@ void LanguageTag::convertFromRtlLocale() if (maLocale.Variant.isEmpty()) return; - OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(OUStringConcatenation(maLocale.Country + maLocale.Variant), + OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(Concat2View(maLocale.Country + maLocale.Variant), RTL_TEXTENCODING_UTF8); /* FIXME: let liblangtag parse this entirely with * lt_tag_convert_from_locale() but that needs a patch to pass the @@ -1560,7 +1582,7 @@ const OUString & LanguageTagImpl::getBcp47() const const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const { - static const OUString theEmptyBcp47 = u""; + static constexpr OUString theEmptyBcp47 = u""_ustr; if (!bResolveSystem && mbSystemLocale) return theEmptyBcp47; @@ -1698,8 +1720,11 @@ OUString LanguageTagImpl::getVariantsFromLangtag() const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const { + // "static" to be returned as const reference to an empty locale. + static lang::Locale theEmptyLocale; + if (!bResolveSystem && mbSystemLocale) - return theEmptyLocale::get(); + return theEmptyLocale; if (!mbInitializedLocale) syncVarsFromImpl(); if (!mbInitializedLocale) @@ -2017,9 +2042,9 @@ void LanguageTag::setScriptType(LanguageTag::ScriptType st) bool LanguageTagImpl::cacheSimpleLSCV() { - OUString aLanguage, aScript, aCountry, aVariants; - Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants); - bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV); + OUString aLanguage, aScript, aCountry, aRegion, aVariants; + Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants); + bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR); if (bRet) { maCachedLanguage = aLanguage; @@ -2162,8 +2187,10 @@ LanguageTag & LanguageTag::makeFallback() aVec.emplace_back(aLanguage + "-" + aCountry); if (aLanguage == "zh") { - // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also - // list zh-CN. + // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all + // other zh-XX also list zh-CN to get zh-Hans; both of which we + // use the legacy forms instead of the more correct script + // tags that unfortunately most pieces don't understand. if (aCountry == "HK" || aCountry == "MO") aVec.emplace_back(aLanguage + "-TW"); else if (aCountry != "CN") @@ -2361,7 +2388,7 @@ LanguageTag & LanguageTag::makeFallback() } // Original language-only. - if (aLanguage != maBcp47) + if (!aLanguage.isEmpty() && aLanguage != maBcp47) aVec.push_back( aLanguage); return aVec; @@ -2419,7 +2446,7 @@ bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const // static LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47, - OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants ) + OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rRegion, OUString& rVariants ) { Extraction eRet = EXTRACTED_NONE; const sal_Int32 nLen = rBcp47.getLength(); @@ -2443,6 +2470,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = "C"; rScript.clear(); rCountry.clear(); + rRegion.clear(); rVariants.clear(); } else if (nLen == 2 || nLen == 3) // ll or lll @@ -2452,6 +2480,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.toAsciiLowerCase(); rScript.clear(); rCountry.clear(); + rRegion.clear(); rVariants.clear(); eRet = EXTRACTED_LSC; } @@ -2463,11 +2492,25 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp { rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase(); + rRegion.clear(); rScript.clear(); rVariants.clear(); eRet = EXTRACTED_LSC; } } + else if ( (nHyph1 == 2 && nLen == 6) // ll-rrr + || (nHyph1 == 3 && nLen == 7)) // lll-rrr + { + if (nHyph2 < 0) + { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rCountry.clear(); + rRegion = rBcp47.copy( nHyph1 + 1, 3); + rScript.clear(); + rVariants.clear(); + eRet = EXTRACTED_LR; + } + } else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv { @@ -2480,6 +2523,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rScript.clear(); rCountry.clear(); + rRegion.clear(); rVariants = rBcp47.copy( nHyph1 + 1); eRet = EXTRACTED_LV; } @@ -2489,6 +2533,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); rCountry.clear(); + rRegion.clear(); rVariants.clear(); eRet = EXTRACTED_LSC; } @@ -2502,10 +2547,24 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase(); + rRegion.clear(); rVariants.clear(); eRet = EXTRACTED_LSC; } } + else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 11) // ll-Ssss-rrr + || (nHyph1 == 3 && nHyph2 == 8 && nLen == 12)) // lll-Ssss-rrr + { + if (nHyph3 < 0) + { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); + rCountry.clear(); + rRegion = rBcp47.copy( nHyph2 + 1, 3); + rVariants.clear(); + eRet = EXTRACTED_LR; + } + } else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...] || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...] { @@ -2516,10 +2575,26 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase(); + rRegion.clear(); rVariants = rBcp47.copy( nHyph3 + 1); eRet = EXTRACTED_LV; } } + else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 11 && nLen >= 16) // ll-Ssss-rrr-vvvv[vvvv][-...] + || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 12 && nLen >= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...] + { + if (nHyph4 < 0) + nHyph4 = rBcp47.getLength(); + if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9) + { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); + rCountry.clear(); + rRegion = rBcp47.copy( nHyph2 + 1, 3); + rVariants = rBcp47.copy( nHyph3 + 1); + eRet = EXTRACTED_LR; + } + } else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-... || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-... { @@ -2535,6 +2610,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = "es"; rScript.clear(); rCountry = "ES"; + rRegion.clear(); rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag. eRet = EXTRACTED_LV; } @@ -2550,10 +2626,26 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rScript.clear(); rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase(); + rRegion.clear(); rVariants = rBcp47.copy( nHyph2 + 1); eRet = EXTRACTED_LV; } } + else if ( (nHyph1 == 2 && nHyph2 == 6 && nLen >= 11) // ll-rrr-vvvv[vvvv][-...] + || (nHyph1 == 3 && nHyph2 == 7 && nLen >= 12)) // lll-rrr-vvvv[vvvv][-...] + { + if (nHyph3 < 0) + nHyph3 = rBcp47.getLength(); + if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9) + { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript.clear(); + rCountry.clear(); + rRegion = rBcp47.copy( nHyph1 + 1, 3); + rVariants = rBcp47.copy( nHyph2 + 1); + eRet = EXTRACTED_LR; + } + } else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...] || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...] { @@ -2564,6 +2656,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); rScript.clear(); rCountry.clear(); + rRegion.clear(); rVariants = rBcp47.copy( nHyph1 + 1); eRet = EXTRACTED_LV; } @@ -2577,6 +2670,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = "en"; rScript.clear(); rCountry = "GB"; + rRegion.clear(); rVariants = "oed"; eRet = EXTRACTED_LV; } @@ -2587,6 +2681,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage = "es"; rScript.clear(); rCountry = "ES"; + rRegion.clear(); rVariants = "tradnl"; // this is nonsense, but... ignored. eRet = EXTRACTED_KNOWN_BAD; } @@ -2598,8 +2693,19 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp rLanguage.clear(); rScript.clear(); rCountry.clear(); + rRegion.clear(); rVariants.clear(); } + else + { + assert(rLanguage.getLength() == 2 || rLanguage.getLength() == 3 + || eRet == EXTRACTED_X_JOKER || eRet == EXTRACTED_X || eRet == EXTRACTED_C_LOCALE); + assert(rScript.isEmpty() || rScript.getLength() == 4); + assert(rCountry.isEmpty() || rRegion.isEmpty()); // [2ALPHA / 3DIGIT] + assert(rCountry.isEmpty() || rCountry.getLength() == 2); + assert(rRegion.isEmpty() || rRegion.getLength() == 3); + assert(rVariants.isEmpty() || rVariants.getLength() >= 4 || rVariants == "oed"); + } return eRet; } @@ -2668,10 +2774,8 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size()); size_t i = 0; for (auto const& elem : rList) - { - ::std::vector< OUString > aTmp( LanguageTag(elem).getFallbackStrings( true)); - aListFallbacks[i++] = aTmp; - } + aListFallbacks[i++] = LanguageTag(elem).getFallbackStrings(true); + for (auto const& rfb : aFallbacks) { size_t nPosFb = 0; @@ -2813,9 +2917,9 @@ css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp // static -LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale, bool bResolveSystem ) +LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale ) { - if (rLocale.Language.isEmpty() && !bResolveSystem) + if (rLocale.Language.isEmpty()) return LANGUAGE_SYSTEM; return LanguageTag( rLocale).makeFallback().getLanguageType(); @@ -2823,7 +2927,8 @@ LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Lo // static -bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate ) +bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, + LanguageTag::PrivateUse ePrivateUse ) { bool bValid = false; @@ -2832,7 +2937,7 @@ bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicali lt_tag_t* mpLangtag; guard() { - theDataRef::get().init(); + theDataRef().init(); mpLangtag = lt_tag_new(); } ~guard() @@ -2850,30 +2955,37 @@ bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicali if (pTag) { bValid = true; - if (bDisallowPrivate) + if (ePrivateUse != PrivateUse::ALLOW) { - const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag); - if (pPrivate && lt_string_length( pPrivate) > 0) - bValid = false; - else + do { + const char* pLang = nullptr; const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag); if (pLangT) { - const char* pLang = lt_lang_get_tag( pLangT); - if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0) + pLang = lt_lang_get_tag( pLangT); + if (pLang && strcmp( pLang, I18NLANGTAG_QLT_ASCII) == 0) { - // Disallow 'qlt' privateuse code to prevent + // Disallow 'qlt' localuse code to prevent // confusion with our internal usage. bValid = false; + break; } } + if (ePrivateUse == PrivateUse::ALLOW_ART_X && pLang && strcmp( pLang, "art") == 0) + { + // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'. + break; + } + const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag); + if (pPrivate && lt_string_length( pPrivate) > 0) + bValid = false; } + while (false); } if (o_pCanonicalized) *o_pCanonicalized = OUString::createFromAscii( pTag); free( pTag); - return bValid; } } else @@ -3228,7 +3340,6 @@ LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage) return LanguageTag("ga-Latg"); case AppleLanguageId::TONGAN: return LanguageTag("to"); - break; case AppleLanguageId::GREEK_POLYTONIC: nLang = LANGUAGE_USER_ANCIENT_GREEK; break; |