summaryrefslogtreecommitdiff
path: root/i18nlangtag/source/languagetag/languagetag.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18nlangtag/source/languagetag/languagetag.cxx')
-rw-r--r--i18nlangtag/source/languagetag/languagetag.cxx283
1 files changed, 197 insertions, 86 deletions
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx
index 6f6a766e861f..26a3d5db2c79 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -17,12 +17,12 @@
#include <rtl/bootstrap.hxx>
#include <sal/log.hxx>
#include <osl/file.hxx>
-#include <osl/mutex.hxx>
-#include <rtl/instance.hxx>
#include <rtl/locale.h>
-#include <tools/long.hxx>
+#include <o3tl/string_view.hxx>
#include <algorithm>
+#include <atomic>
#include <map>
+#include <mutex>
#include <string_view>
#include <unordered_set>
@@ -37,6 +37,10 @@
#include <osl/detail/android-bootstrap.h>
#endif
+#ifdef EMSCRIPTEN
+#include <osl/detail/emscripten-bootstrap.h>
+#endif
+
using namespace com::sun::star;
namespace {
@@ -49,24 +53,22 @@ struct myLtError
~myLtError() { if (p) lt_error_unref( p); }
};
-// "static" to be returned as const reference to an empty locale.
-struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
}
-typedef std::unordered_set< OUString > KnownTagSet;
namespace {
-struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
-struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
+std::recursive_mutex& theMutex()
+{
+ static std::recursive_mutex SINGLETON;
+ return SINGLETON;
+}
}
+typedef std::unordered_set< OUString > KnownTagSet;
static const KnownTagSet & getKnowns()
{
- KnownTagSet & rKnowns = theKnowns::get();
- if (rKnowns.empty())
- {
- osl::MutexGuard aGuard( theMutex::get());
- if (rKnowns.empty())
+ static KnownTagSet theKnowns = []()
{
+ KnownTagSet tmpSet;
::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
for (auto const& elemDefined : aDefined)
{
@@ -76,36 +78,47 @@ static const KnownTagSet & getKnowns()
::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true));
for (auto const& fallback : aFallbacks)
{
- rKnowns.insert(fallback);
+ tmpSet.insert(fallback);
}
}
- }
- }
- return rKnowns;
+ return tmpSet;
+ }();
+ return theKnowns;
}
namespace {
struct compareIgnoreAsciiCaseLess
{
- bool operator()( const OUString& r1, std::u16string_view r2 ) const
+ bool operator()( std::u16string_view r1, std::u16string_view r2 ) const
{
- return r1.compareToIgnoreAsciiCase( r2) < 0;
+ return o3tl::compareToIgnoreAsciiCase(r1, r2) < 0;
}
};
typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
-struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
-struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
-struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
-struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
+MapBcp47& theMapBcp47()
+{
+ static MapBcp47 SINGLETON;
+ return SINGLETON;
+}
+MapLangID& theMapLangID()
+{
+ static MapLangID SINGLETON;
+ return SINGLETON;
+}
+LanguageTag::ImplPtr& theSystemLocale()
+{
+ static LanguageTag::ImplPtr SINGLETON;
+ return SINGLETON;
+}
}
static LanguageType getNextOnTheFlyLanguage()
{
static LanguageType nOnTheFlyLanguage(0);
- osl::MutexGuard aGuard( theMutex::get());
+ std::unique_lock aGuard( theMutex());
if (!nOnTheFlyLanguage)
nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
else
@@ -171,7 +184,11 @@ private:
static void teardown();
};
-struct theDataRef : public rtl::Static< LiblangtagDataRef, theDataRef > {};
+LiblangtagDataRef& theDataRef()
+{
+ static LiblangtagDataRef SINGLETON;
+ return SINGLETON;
+}
}
LiblangtagDataRef::LiblangtagDataRef()
@@ -203,7 +220,7 @@ void LiblangtagDataRef::teardown()
void LiblangtagDataRef::setupDataPath()
{
-#if defined(ANDROID)
+#if defined(ANDROID) || defined(EMSCRIPTEN)
maDataPath = OString(lo_get_app_data_dir()) + "/share/liblangtag";
#else
// maDataPath is assumed to be empty here.
@@ -222,7 +239,7 @@ void LiblangtagDataRef::setupDataPath()
}
#endif
if (maDataPath.isEmpty())
- maDataPath = "|"; // assume system
+ maDataPath = "|"_ostr; // assume system
else
lt_db_set_datadir( maDataPath.getStr());
}
@@ -344,6 +361,7 @@ private:
EXTRACTED_NONE,
EXTRACTED_LSC,
EXTRACTED_LV,
+ EXTRACTED_LR,
EXTRACTED_C_LOCALE,
EXTRACTED_X,
EXTRACTED_X_JOKER,
@@ -358,6 +376,7 @@ private:
@return EXTRACTED_LSC if simple tag was detected (i.e. one that
would fulfill the isIsoODF() condition),
EXTRACTED_LV if a tag with variant was detected,
+ EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected
EXTRACTED_C_LOCALE if a 'C' locale was detected,
EXTRACTED_X if x-... privateuse tag was detected,
EXTRACTED_X_JOKER if "*" joker was detected,
@@ -368,6 +387,7 @@ private:
OUString& rLanguage,
OUString& rScript,
OUString& rCountry,
+ OUString& rRegion,
OUString& rVariants );
/** Convert Locale to BCP 47 string without resolving system and creating
@@ -429,7 +449,7 @@ LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString)
{
if (mpImplLangtag)
- theDataRef::get().init();
+ theDataRef().init();
}
@@ -465,7 +485,7 @@ LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTa
mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString;
if (mpImplLangtag && !oldTag)
- theDataRef::get().init();
+ theDataRef().init();
return *this;
}
@@ -596,9 +616,9 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
return pImpl;
}
- osl::MutexGuard aGuard( theMutex::get());
+ std::unique_lock aGuard( theMutex());
- MapBcp47& rMapBcp47 = theMapBcp47::get();
+ MapBcp47& rMapBcp47 = theMapBcp47();
MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
bool bOtherImpl = false;
if (it != rMapBcp47.end())
@@ -634,7 +654,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
// different, otherwise we would end up with ambiguous assignments
// of different language tags, for example for the same primary
// LangID with "no", "nb" and "nn".
- const MapLangID& rMapLangID = theMapLangID::get();
+ const MapLangID& rMapLangID = theMapLangID();
MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
if (itID != rMapLangID.end())
{
@@ -667,7 +687,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
}
::std::pair< MapLangID::const_iterator, bool > res(
- theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
+ theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
if (res.second)
{
SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
@@ -686,7 +706,7 @@ LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
LanguageTag::ScriptType LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID )
{
- const MapLangID& rMapLangID = theMapLangID::get();
+ const MapLangID& rMapLangID = theMapLangID();
MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
if (itID != rMapLangID.end())
return (*itID).second->getScriptType();
@@ -709,7 +729,7 @@ void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
// Reset system locale to none and let registerImpl() do the rest to
// initialize a new one.
- theSystemLocale::get().reset();
+ theSystemLocale().reset();
LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
aLanguageTag.registerImpl();
}
@@ -740,7 +760,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
ImplPtr pImpl;
#if OSL_DEBUG_LEVEL > 0
- static size_t nCalls = 0;
+ static std::atomic_int nCalls = 0;
++nCalls;
SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
#endif
@@ -749,7 +769,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
// and take the system locale shortcut if possible.
if (mbSystemLocale)
{
- pImpl = theSystemLocale::get();
+ pImpl = theSystemLocale();
if (pImpl)
{
#if OSL_DEBUG_LEVEL > 0
@@ -771,13 +791,13 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
{
if (mnLangID == LANGUAGE_DONTKNOW)
{
+ static LanguageTag::ImplPtr theDontKnow;
// Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
// conversion attempts. At the same time provide a central breakpoint
// to inspect such places.
- LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
- if (!rDontKnow)
- rDontKnow = std::make_shared<LanguageTagImpl>( *this);
- pImpl = rDontKnow;
+ if (!theDontKnow)
+ theDontKnow = std::make_shared<LanguageTagImpl>( *this);
+ pImpl = theDontKnow;
#if OSL_DEBUG_LEVEL > 0
static size_t nCallsDontKnow = 0;
++nCallsDontKnow;
@@ -788,7 +808,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
else
{
// A great share are calls for a system equal locale.
- pImpl = theSystemLocale::get();
+ pImpl = theSystemLocale();
if (pImpl && pImpl->mnLangID == mnLangID)
{
#if OSL_DEBUG_LEVEL > 0
@@ -819,7 +839,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
if (mbInitializedBcp47)
{
// A great share are calls for a system equal locale.
- pImpl = theSystemLocale::get();
+ pImpl = theSystemLocale();
if (pImpl && pImpl->maBcp47 == maBcp47)
{
#if OSL_DEBUG_LEVEL > 0
@@ -837,10 +857,10 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
#endif
- osl::MutexGuard aGuard( theMutex::get());
+ std::unique_lock aGuard( theMutex());
#if OSL_DEBUG_LEVEL > 0
- static tools::Long nRunning = 0;
+ static long nRunning = 0;
// Entering twice here is ok, which is needed for fallback init in
// getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
// everything else is suspicious.
@@ -852,7 +872,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
// Prefer LangID map as find+insert needs less comparison work.
if (mbInitializedLangID)
{
- MapLangID& rMap = theMapLangID::get();
+ MapLangID& rMap = theMapLangID();
MapLangID::const_iterator it( rMap.find( mnLangID));
if (it != rMap.end())
{
@@ -874,7 +894,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
if (!pImpl->mbInitializedBcp47)
pImpl->convertLocaleToBcp47();
::std::pair< MapBcp47::const_iterator, bool > res(
- theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
+ theMapBcp47().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
if (res.second)
{
SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
@@ -895,7 +915,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
}
else if (!maBcp47.isEmpty())
{
- MapBcp47& rMap = theMapBcp47::get();
+ MapBcp47& rMap = theMapBcp47();
MapBcp47::const_iterator it( rMap.find( maBcp47));
if (it != rMap.end())
{
@@ -949,7 +969,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
// May have involved canonicalize(), so compare with
// pImpl->maBcp47 instead of maBcp47!
aBcp47 = LanguageTagImpl::convertToBcp47(
- MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID ));
+ MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
bInsert = (aBcp47 == pImpl->maBcp47);
}
}
@@ -957,7 +977,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
if (bInsert)
{
::std::pair< MapLangID::const_iterator, bool > res(
- theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
+ theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
if (res.second)
{
SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
@@ -989,7 +1009,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
// above, so add it.
if (mbSystemLocale && mbInitializedLangID)
{
- theSystemLocale::get() = pImpl;
+ theSystemLocale() = pImpl;
SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
<< ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
}
@@ -1104,20 +1124,22 @@ bool LanguageTagImpl::canonicalize()
// and want to determine if parsing it would be possible
// without using liblangtag just to see if it is a simple known
// locale or could fall back to one.
- OUString aLanguage, aScript, aCountry, aVariants;
- Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
+ OUString aLanguage, aScript, aCountry, aRegion, aVariants;
+ Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
if (eExt != EXTRACTED_NONE)
{
- if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
+ if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR)
{
// Rebuild bcp47 with proper casing of tags.
OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
- 1 + aCountry.getLength() + 1 + aVariants.getLength());
+ 1 + aCountry.getLength() + 1 + aRegion.getLength() + 1 + aVariants.getLength());
aBuf.append( aLanguage);
if (!aScript.isEmpty())
aBuf.append("-" + aScript);
if (!aCountry.isEmpty())
aBuf.append("-" + aCountry);
+ if (!aRegion.isEmpty())
+ aBuf.append("-" + aRegion);
if (!aVariants.isEmpty())
aBuf.append("-" + aVariants);
OUString aStr( aBuf.makeStringAndClear());
@@ -1211,7 +1233,7 @@ bool LanguageTagImpl::canonicalize()
if (!mpImplLangtag)
{
- theDataRef::get().init();
+ theDataRef().init();
mpImplLangtag = lt_tag_new();
}
@@ -1340,7 +1362,7 @@ void LanguageTagImpl::convertLocaleToBcp47()
// locale via LanguageTag::convertToBcp47(LanguageType) and
// LanguageTag::convertToLocale(LanguageType) would instantiate another
// LanguageTag.
- maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM );
+ maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false);
}
if (maLocale.Language.isEmpty())
{
@@ -1484,7 +1506,7 @@ void LanguageTagImpl::convertLangToLocale()
mbInitializedLangID = true;
}
// Resolve system here! The original is remembered as mbSystemLocale.
- maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID );
+ maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false);
mbInitializedLocale = true;
}
@@ -1520,7 +1542,7 @@ void LanguageTag::convertFromRtlLocale()
if (maLocale.Variant.isEmpty())
return;
- OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(OUStringConcatenation(maLocale.Country + maLocale.Variant),
+ OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(Concat2View(maLocale.Country + maLocale.Variant),
RTL_TEXTENCODING_UTF8);
/* FIXME: let liblangtag parse this entirely with
* lt_tag_convert_from_locale() but that needs a patch to pass the
@@ -1560,7 +1582,7 @@ const OUString & LanguageTagImpl::getBcp47() const
const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
{
- static const OUString theEmptyBcp47 = u"";
+ static constexpr OUString theEmptyBcp47 = u""_ustr;
if (!bResolveSystem && mbSystemLocale)
return theEmptyBcp47;
@@ -1698,8 +1720,11 @@ OUString LanguageTagImpl::getVariantsFromLangtag()
const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
{
+ // "static" to be returned as const reference to an empty locale.
+ static lang::Locale theEmptyLocale;
+
if (!bResolveSystem && mbSystemLocale)
- return theEmptyLocale::get();
+ return theEmptyLocale;
if (!mbInitializedLocale)
syncVarsFromImpl();
if (!mbInitializedLocale)
@@ -2017,9 +2042,9 @@ void LanguageTag::setScriptType(LanguageTag::ScriptType st)
bool LanguageTagImpl::cacheSimpleLSCV()
{
- OUString aLanguage, aScript, aCountry, aVariants;
- Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
- bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
+ OUString aLanguage, aScript, aCountry, aRegion, aVariants;
+ Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
+ bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR);
if (bRet)
{
maCachedLanguage = aLanguage;
@@ -2162,8 +2187,10 @@ LanguageTag & LanguageTag::makeFallback()
aVec.emplace_back(aLanguage + "-" + aCountry);
if (aLanguage == "zh")
{
- // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
- // list zh-CN.
+ // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
+ // other zh-XX also list zh-CN to get zh-Hans; both of which we
+ // use the legacy forms instead of the more correct script
+ // tags that unfortunately most pieces don't understand.
if (aCountry == "HK" || aCountry == "MO")
aVec.emplace_back(aLanguage + "-TW");
else if (aCountry != "CN")
@@ -2361,7 +2388,7 @@ LanguageTag & LanguageTag::makeFallback()
}
// Original language-only.
- if (aLanguage != maBcp47)
+ if (!aLanguage.isEmpty() && aLanguage != maBcp47)
aVec.push_back( aLanguage);
return aVec;
@@ -2419,7 +2446,7 @@ bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
// static
LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
- OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
+ OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rRegion, OUString& rVariants )
{
Extraction eRet = EXTRACTED_NONE;
const sal_Int32 nLen = rBcp47.getLength();
@@ -2443,6 +2470,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = "C";
rScript.clear();
rCountry.clear();
+ rRegion.clear();
rVariants.clear();
}
else if (nLen == 2 || nLen == 3) // ll or lll
@@ -2452,6 +2480,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.toAsciiLowerCase();
rScript.clear();
rCountry.clear();
+ rRegion.clear();
rVariants.clear();
eRet = EXTRACTED_LSC;
}
@@ -2463,11 +2492,25 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
{
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
+ rRegion.clear();
rScript.clear();
rVariants.clear();
eRet = EXTRACTED_LSC;
}
}
+ else if ( (nHyph1 == 2 && nLen == 6) // ll-rrr
+ || (nHyph1 == 3 && nLen == 7)) // lll-rrr
+ {
+ if (nHyph2 < 0)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rCountry.clear();
+ rRegion = rBcp47.copy( nHyph1 + 1, 3);
+ rScript.clear();
+ rVariants.clear();
+ eRet = EXTRACTED_LR;
+ }
+ }
else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
|| (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
{
@@ -2480,6 +2523,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rScript.clear();
rCountry.clear();
+ rRegion.clear();
rVariants = rBcp47.copy( nHyph1 + 1);
eRet = EXTRACTED_LV;
}
@@ -2489,6 +2533,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
rCountry.clear();
+ rRegion.clear();
rVariants.clear();
eRet = EXTRACTED_LSC;
}
@@ -2502,10 +2547,24 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
+ rRegion.clear();
rVariants.clear();
eRet = EXTRACTED_LSC;
}
}
+ else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 11) // ll-Ssss-rrr
+ || (nHyph1 == 3 && nHyph2 == 8 && nLen == 12)) // lll-Ssss-rrr
+ {
+ if (nHyph3 < 0)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
+ rCountry.clear();
+ rRegion = rBcp47.copy( nHyph2 + 1, 3);
+ rVariants.clear();
+ eRet = EXTRACTED_LR;
+ }
+ }
else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
|| (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
{
@@ -2516,10 +2575,26 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
+ rRegion.clear();
rVariants = rBcp47.copy( nHyph3 + 1);
eRet = EXTRACTED_LV;
}
}
+ else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 11 && nLen >= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
+ || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 12 && nLen >= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
+ {
+ if (nHyph4 < 0)
+ nHyph4 = rBcp47.getLength();
+ if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
+ rCountry.clear();
+ rRegion = rBcp47.copy( nHyph2 + 1, 3);
+ rVariants = rBcp47.copy( nHyph3 + 1);
+ eRet = EXTRACTED_LR;
+ }
+ }
else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-...
|| (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-...
{
@@ -2535,6 +2610,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = "es";
rScript.clear();
rCountry = "ES";
+ rRegion.clear();
rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
eRet = EXTRACTED_LV;
}
@@ -2550,10 +2626,26 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rScript.clear();
rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
+ rRegion.clear();
rVariants = rBcp47.copy( nHyph2 + 1);
eRet = EXTRACTED_LV;
}
}
+ else if ( (nHyph1 == 2 && nHyph2 == 6 && nLen >= 11) // ll-rrr-vvvv[vvvv][-...]
+ || (nHyph1 == 3 && nHyph2 == 7 && nLen >= 12)) // lll-rrr-vvvv[vvvv][-...]
+ {
+ if (nHyph3 < 0)
+ nHyph3 = rBcp47.getLength();
+ if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
+ {
+ rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
+ rScript.clear();
+ rCountry.clear();
+ rRegion = rBcp47.copy( nHyph1 + 1, 3);
+ rVariants = rBcp47.copy( nHyph2 + 1);
+ eRet = EXTRACTED_LR;
+ }
+ }
else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
|| (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
{
@@ -2564,6 +2656,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
rScript.clear();
rCountry.clear();
+ rRegion.clear();
rVariants = rBcp47.copy( nHyph1 + 1);
eRet = EXTRACTED_LV;
}
@@ -2577,6 +2670,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = "en";
rScript.clear();
rCountry = "GB";
+ rRegion.clear();
rVariants = "oed";
eRet = EXTRACTED_LV;
}
@@ -2587,6 +2681,7 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage = "es";
rScript.clear();
rCountry = "ES";
+ rRegion.clear();
rVariants = "tradnl"; // this is nonsense, but... ignored.
eRet = EXTRACTED_KNOWN_BAD;
}
@@ -2598,8 +2693,19 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
rLanguage.clear();
rScript.clear();
rCountry.clear();
+ rRegion.clear();
rVariants.clear();
}
+ else
+ {
+ assert(rLanguage.getLength() == 2 || rLanguage.getLength() == 3
+ || eRet == EXTRACTED_X_JOKER || eRet == EXTRACTED_X || eRet == EXTRACTED_C_LOCALE);
+ assert(rScript.isEmpty() || rScript.getLength() == 4);
+ assert(rCountry.isEmpty() || rRegion.isEmpty()); // [2ALPHA / 3DIGIT]
+ assert(rCountry.isEmpty() || rCountry.getLength() == 2);
+ assert(rRegion.isEmpty() || rRegion.getLength() == 3);
+ assert(rVariants.isEmpty() || rVariants.getLength() >= 4 || rVariants == "oed");
+ }
return eRet;
}
@@ -2668,10 +2774,8 @@ LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp
::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
size_t i = 0;
for (auto const& elem : rList)
- {
- ::std::vector< OUString > aTmp( LanguageTag(elem).getFallbackStrings( true));
- aListFallbacks[i++] = aTmp;
- }
+ aListFallbacks[i++] = LanguageTag(elem).getFallbackStrings(true);
+
for (auto const& rfb : aFallbacks)
{
size_t nPosFb = 0;
@@ -2813,9 +2917,9 @@ css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp
// static
-LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale, bool bResolveSystem )
+LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale )
{
- if (rLocale.Language.isEmpty() && !bResolveSystem)
+ if (rLocale.Language.isEmpty())
return LANGUAGE_SYSTEM;
return LanguageTag( rLocale).makeFallback().getLanguageType();
@@ -2823,7 +2927,8 @@ LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Lo
// static
-bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
+bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized,
+ LanguageTag::PrivateUse ePrivateUse )
{
bool bValid = false;
@@ -2832,7 +2937,7 @@ bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicali
lt_tag_t* mpLangtag;
guard()
{
- theDataRef::get().init();
+ theDataRef().init();
mpLangtag = lt_tag_new();
}
~guard()
@@ -2850,30 +2955,37 @@ bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicali
if (pTag)
{
bValid = true;
- if (bDisallowPrivate)
+ if (ePrivateUse != PrivateUse::ALLOW)
{
- const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
- if (pPrivate && lt_string_length( pPrivate) > 0)
- bValid = false;
- else
+ do
{
+ const char* pLang = nullptr;
const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
if (pLangT)
{
- const char* pLang = lt_lang_get_tag( pLangT);
- if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
+ pLang = lt_lang_get_tag( pLangT);
+ if (pLang && strcmp( pLang, I18NLANGTAG_QLT_ASCII) == 0)
{
- // Disallow 'qlt' privateuse code to prevent
+ // Disallow 'qlt' localuse code to prevent
// confusion with our internal usage.
bValid = false;
+ break;
}
}
+ if (ePrivateUse == PrivateUse::ALLOW_ART_X && pLang && strcmp( pLang, "art") == 0)
+ {
+ // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
+ break;
+ }
+ const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
+ if (pPrivate && lt_string_length( pPrivate) > 0)
+ bValid = false;
}
+ while (false);
}
if (o_pCanonicalized)
*o_pCanonicalized = OUString::createFromAscii( pTag);
free( pTag);
- return bValid;
}
}
else
@@ -3228,7 +3340,6 @@ LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
return LanguageTag("ga-Latg");
case AppleLanguageId::TONGAN:
return LanguageTag("to");
- break;
case AppleLanguageId::GREEK_POLYTONIC:
nLang = LANGUAGE_USER_ANCIENT_GREEK;
break;