diff options
Diffstat (limited to 'i18npool/source')
-rw-r--r-- | i18npool/source/isolang/insys.cxx | 35 | ||||
-rw-r--r-- | i18npool/source/isolang/inunx.cxx | 139 | ||||
-rw-r--r-- | i18npool/source/isolang/inwnt.cxx | 104 | ||||
-rw-r--r-- | i18npool/source/isolang/isolang.cxx | 1085 | ||||
-rwxr-xr-x | i18npool/source/isolang/langid.pl | 409 | ||||
-rw-r--r-- | i18npool/source/isolang/lcid.awk | 187 | ||||
-rw-r--r-- | i18npool/source/isolang/mslangid.cxx | 492 | ||||
-rw-r--r-- | i18npool/source/languagetag/languagetag.cxx | 1254 | ||||
-rw-r--r-- | i18npool/source/languagetag/simple-langtag.cxx | 400 | ||||
-rw-r--r-- | i18npool/source/localedata/localedata.cxx | 2 | ||||
-rw-r--r-- | i18npool/source/nativenumber/nativenumbersupplier.cxx | 2 |
11 files changed, 2 insertions, 4107 deletions
diff --git a/i18npool/source/isolang/insys.cxx b/i18npool/source/isolang/insys.cxx deleted file mode 100644 index a0f1ab496c4d..000000000000 --- a/i18npool/source/isolang/insys.cxx +++ /dev/null @@ -1,35 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - - -#if defined( WNT ) - -#include "inwnt.cxx" - -#elif defined( UNX ) - -#include "inunx.cxx" - -#else - -#error unknown platform - -#endif - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/isolang/inunx.cxx b/i18npool/source/isolang/inunx.cxx deleted file mode 100644 index 005421c76c18..000000000000 --- a/i18npool/source/isolang/inunx.cxx +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - -#include <stdlib.h> // for getenv() -#include <stdio.h> - -#ifdef MACOSX -#include <osl/process.h> -#include <rtl/locale.h> -#include <rtl/ustring.hxx> - -#else // MACOSX -#include <rtl/string.hxx> - -#endif // MACOSX -#include <rtl/instance.hxx> -#include "i18npool/languagetag.hxx" -#include "i18npool/mslangid.hxx" - -// ======================================================================= - -static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; -static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; - -// ----------------------------------------------------------------------- - -// Get locale of category LC_CTYPE of environment variables -static const sal_Char* getLangFromEnvironment() -{ - static const sal_Char* pFallback = "C"; - const sal_Char *pLang = NULL; - - pLang = getenv ( "LC_ALL" ); - if (! pLang || pLang[0] == 0) - pLang = getenv ( "LC_CTYPE" ); - if (! pLang || pLang[0] == 0) - pLang = getenv( "LANG" ); - if (! pLang || pLang[0] == 0) - pLang = pFallback; - - return pLang; -} - -// ----------------------------------------------------------------------- - -// Get locale of category LC_MESSAGES of environment variables -static const sal_Char* getUILangFromEnvironment() -{ - static const sal_Char* pFallback = "C"; - const sal_Char *pLang = NULL; - - pLang = getenv ( "LANGUAGE" ); // respect the GNU extension - if (! pLang || pLang[0] == 0) - pLang = getenv ( "LC_ALL" ); - if (! pLang || pLang[0] == 0) - pLang = getenv ( "LC_MESSAGES" ); - if (! pLang || pLang[0] == 0) - pLang = getenv( "LANG" ); - if (! pLang || pLang[0] == 0) - pLang = pFallback; - - return pLang; -} - -// ----------------------------------------------------------------------- - -typedef const sal_Char * (*getLangFromEnv)(); - -static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, - getLangFromEnv pGetLangFromEnv ) -{ - /* get the language from the user environment */ - LanguageType nLang = rSystemLanguage; - if ( nLang == LANGUAGE_DONTKNOW ) - { - ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); - nLang = rSystemLanguage; - if ( nLang == LANGUAGE_DONTKNOW ) - { -#ifdef MACOSX - rtl_Locale *procLocale; - (void) pGetLangFromEnv; /* unused */ - - if ( osl_getProcessLocale(&procLocale) == osl_Process_E_None ) - { - nLang = LanguageTag( *procLocale ).getLanguageType(); - OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); - rSystemLanguage = nLang; -#ifdef DEBUG - if ( rSystemLanguage == LANGUAGE_DONTKNOW ) - fprintf( stderr, "intnunx.cxx: failed to convert osl_getProcessLocale() language to system language.\n" ); -#endif - } -#else /* MACOSX */ - rtl::OString aUnxLang( (pGetLangFromEnv)() ); - nLang = MsLangId::convertUnxByteStringToLanguage( aUnxLang ); - OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); - rSystemLanguage = nLang; -#endif /* MACOSX */ - } - else { - OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); - } - } -} - -// ----------------------------------------------------------------------- - -LanguageType MsLangId::getPlatformSystemLanguage() -{ - getPlatformSystemLanguageImpl( nImplSystemLanguage, &getLangFromEnvironment); - return nImplSystemLanguage; -} - -// ----------------------------------------------------------------------- - -LanguageType MsLangId::getPlatformSystemUILanguage() -{ - getPlatformSystemLanguageImpl( nImplSystemUILanguage, &getUILangFromEnvironment); - return nImplSystemUILanguage; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/isolang/inwnt.cxx b/i18npool/source/isolang/inwnt.cxx deleted file mode 100644 index 07661bdc1ce0..000000000000 --- a/i18npool/source/isolang/inwnt.cxx +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - - -#include <sal/config.h> - -#define WINVER 0x0500 - -#ifdef _MSC_VER -#pragma warning(push,1) // disable warnings within system headers -#endif -#include <windef.h> // needed by winnls.h -#include <winbase.h> // needed by winnls.h -#include <winnls.h> -#ifdef _MSC_VER -#pragma warning(pop) -#endif -#include <rtl/instance.hxx> -#include "i18npool/mslangid.hxx" - -static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; -static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; - -// ======================================================================= - -static LanguageType GetSVLang( LANGID nWinLangId ) -{ - // No Translation, we work with the original MS code without the SORT_ID. - // So we can get never LANG-ID's from MS, which are currently not defined - // by us. - return LanguageType( static_cast<sal_uInt16>(nWinLangId & 0xffff)); -} - -// ----------------------------------------------------------------------- - -typedef LANGID (WINAPI *getLangFromEnv)(); - -static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, - getLangFromEnv pGetUserDefault, getLangFromEnv pGetSystemDefault ) -{ - LanguageType nLang = rSystemLanguage; - if ( nLang == LANGUAGE_DONTKNOW ) - { - ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); - nLang = rSystemLanguage; - if ( nLang == LANGUAGE_DONTKNOW ) - { - LANGID nLangId; - - nLangId = (pGetUserDefault)(); - nLang = GetSVLang( nLangId ); - - if ( nLang == LANGUAGE_DONTKNOW ) - { - nLangId = (pGetSystemDefault)(); - nLang = GetSVLang( nLangId ); - } - OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); - rSystemLanguage = nLang; - } - else - { - OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); - } - } -} - -// ----------------------------------------------------------------------- - -LanguageType MsLangId::getPlatformSystemLanguage() -{ - getPlatformSystemLanguageImpl( nImplSystemLanguage, - &GetUserDefaultLangID, &GetSystemDefaultLangID); - return nImplSystemLanguage; -} - -// ----------------------------------------------------------------------- - -LanguageType MsLangId::getPlatformSystemUILanguage() -{ - // TODO: this could be distinguished, #if(WINVER >= 0x0500) - // needs _run_ time differentiation though, not at compile time. - getPlatformSystemLanguageImpl( nImplSystemUILanguage, - &GetUserDefaultUILanguage, &GetSystemDefaultUILanguage); - return nImplSystemUILanguage; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/isolang/isolang.cxx b/i18npool/source/isolang/isolang.cxx deleted file mode 100644 index 17a803343877..000000000000 --- a/i18npool/source/isolang/isolang.cxx +++ /dev/null @@ -1,1085 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - -#include <rtl/ustring.hxx> -#include <rtl/string.hxx> -#include <rtl/ustrbuf.hxx> -#include <rtl/strbuf.hxx> - -#include "i18npool/mslangid.hxx" - -// ======================================================================= - -struct IsoLangEngEntry -{ - LanguageType mnLang; - sal_Char maCountry[3]; -}; - -struct IsoLangNoneStdEntry -{ - LanguageType mnLang; - sal_Char maLangStr[4]; - sal_Char maCountry[9]; -}; - -struct IsoLangOtherEntry -{ - LanguageType mnLang; - const sal_Char* mpLangStr; -}; - -// ----------------------------------------------------------------------- - -// Entries for languages are lower case, for countries upper case, as -// recommended by rfc4646 (obsoletes rfc3066 (obsoletes rfc1766)). -// convertIsoNamesToLanguage() is case insensitive -// -// Sort order: Most used first. -// -// The default entry for a LangID <-> ISO mapping has to be first. For -// conversion of legacy mappings one LangID can map to multiple ISO codes, and -// one ISO code combination can map to multiple LangIDs. For compatibility with -// already existing calls it can also be a sequence as follows: - -// LANGUAGE_ENGLISH, "en", "" -// LANGUAGE_ENGLISH_US, "en", "US" - -// Here, in a convertIsoNamesToLanguage() call "en-US" is converted to -// LANGUAGE_ENGLISH_US and "en" is converted to LANGUAGE_ENGLISH. A call with -// "en-ZZ" (not in table) would result in LANGUAGE_ENGLISH because the first -// entry matching the language and not having a country is returned, regardless -// of whether being sorted before or after other entries of the same language -// with some country. To obtain a _locale_ (not language only) in the order -// given, convertLocaleToLanguageWithFallback() must be called. - -// If the sequence instead was - -// LANGUAGE_ENGLISH_US, "en", "US" -// LANGUAGE_ENGLISH, "en", "" - -// in a convertIsoNamesToLanguage() call "en-US" is still converted to -// LANGUAGE_ENGLISH_US, but "en" is _also_ converted to LANGUAGE_ENGLISH_US -// because no country was passed and it is the first entry to match the -// language, see code. A call with "en-ZZ" (not in table) would still result in -// LANGUAGE_ENGLISH. - -/* erAck: 2007-07-05T20:01+0200 TODO: The entire suite's "primary language - * only" usage and locale fall back should be cleaned up and made consistent. I - * strongly doubt that most callers exactly expect the behavior described. - * Currently these primary LangIDs are used literally in OOo code: - * LANGUAGE_ENGLISH LANGUAGE_CHINESE LANGUAGE_MALAY - * LANGUAGE_AZERI LANGUAGE_URDU LANGUAGE_KASHMIRI - */ - -static MsLangId::IsoLangEntry const aImplIsoLangEntries[] = -{ - // MS-LANGID codes ISO639-1/2/3 ISO3166 - { LANGUAGE_ENGLISH, "en", "" }, - { LANGUAGE_ENGLISH_US, "en", "US" }, - { LANGUAGE_ENGLISH_UK, "en", "GB" }, - { LANGUAGE_ENGLISH_AUS, "en", "AU" }, - { LANGUAGE_ENGLISH_CAN, "en", "CA" }, - { LANGUAGE_FRENCH, "fr", "FR" }, - { LANGUAGE_FRENCH, "fr", "" }, - { LANGUAGE_GERMAN, "de", "DE" }, - { LANGUAGE_ITALIAN, "it", "IT" }, - { LANGUAGE_DUTCH, "nl", "NL" }, - { LANGUAGE_SPANISH_MODERN, "es", "ES" }, - { LANGUAGE_SPANISH_DATED, "es", "ES" }, - { LANGUAGE_PORTUGUESE, "pt", "PT" }, - { LANGUAGE_PORTUGUESE_BRAZILIAN, "pt", "BR" }, - { LANGUAGE_DANISH, "da", "DK" }, - { LANGUAGE_GREEK, "el", "GR" }, - { LANGUAGE_CHINESE, "zh", "" }, - { LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN" }, - { LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW" }, - { LANGUAGE_CHINESE_HONGKONG, "zh", "HK" }, - { LANGUAGE_CHINESE_SINGAPORE, "zh", "SG" }, - { LANGUAGE_CHINESE_MACAU, "zh", "MO" }, - { LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK" }, - { LANGUAGE_JAPANESE, "ja", "JP" }, - { LANGUAGE_KOREAN, "ko", "KR" }, - { LANGUAGE_KOREAN_JOHAB, "ko", "KR" }, - { LANGUAGE_USER_KOREAN_NORTH, "ko", "KP" }, - { LANGUAGE_SWEDISH, "sv", "SE" }, - { LANGUAGE_SWEDISH_FINLAND, "sv", "FI" }, - { LANGUAGE_FINNISH, "fi", "FI" }, - { LANGUAGE_RUSSIAN, "ru", "RU" }, - { LANGUAGE_TATAR, "tt", "RU" }, - { LANGUAGE_ENGLISH_NZ, "en", "NZ" }, - { LANGUAGE_ENGLISH_EIRE, "en", "IE" }, - { LANGUAGE_DUTCH_BELGIAN, "nl", "BE" }, - { LANGUAGE_FRENCH_BELGIAN, "fr", "BE" }, - { LANGUAGE_FRENCH_CANADIAN, "fr", "CA" }, - { LANGUAGE_FRENCH_SWISS, "fr", "CH" }, - { LANGUAGE_GERMAN_SWISS, "de", "CH" }, - { LANGUAGE_GERMAN_AUSTRIAN, "de", "AT" }, - { LANGUAGE_ITALIAN_SWISS, "it", "CH" }, - { LANGUAGE_ALBANIAN, "sq", "AL" }, - { LANGUAGE_ARABIC_SAUDI_ARABIA, "ar", "SA" }, - { LANGUAGE_ARABIC_EGYPT, "ar", "EG" }, - { LANGUAGE_ARABIC_UAE, "ar", "AE" }, - { LANGUAGE_ARABIC_IRAQ, "ar", "IQ" }, - { LANGUAGE_ARABIC_LIBYA, "ar", "LY" }, - { LANGUAGE_ARABIC_ALGERIA, "ar", "DZ" }, - { LANGUAGE_ARABIC_MOROCCO, "ar", "MA" }, - { LANGUAGE_ARABIC_TUNISIA, "ar", "TN" }, - { LANGUAGE_ARABIC_OMAN, "ar", "OM" }, - { LANGUAGE_ARABIC_YEMEN, "ar", "YE" }, - { LANGUAGE_ARABIC_SYRIA, "ar", "SY" }, - { LANGUAGE_ARABIC_JORDAN, "ar", "JO" }, - { LANGUAGE_ARABIC_LEBANON, "ar", "LB" }, - { LANGUAGE_ARABIC_KUWAIT, "ar", "KW" }, - { LANGUAGE_ARABIC_BAHRAIN, "ar", "BH" }, - { LANGUAGE_ARABIC_QATAR, "ar", "QA" }, - { LANGUAGE_USER_ARABIC_CHAD, "ar", "TD" }, - { LANGUAGE_USER_ARABIC_COMOROS, "ar", "KM" }, - { LANGUAGE_USER_ARABIC_DJIBOUTI, "ar", "DJ" }, - { LANGUAGE_USER_ARABIC_ERITREA, "ar", "ER" }, - { LANGUAGE_USER_ARABIC_ISRAEL, "ar", "IL" }, - { LANGUAGE_USER_ARABIC_MAURITANIA, "ar", "MR" }, - { LANGUAGE_USER_ARABIC_PALESTINE, "ar", "PS" }, - { LANGUAGE_USER_ARABIC_SOMALIA, "ar", "SO" }, - { LANGUAGE_USER_ARABIC_SUDAN, "ar", "SD" }, - { LANGUAGE_ARABIC_PRIMARY_ONLY, "ar", "" }, - { LANGUAGE_BASQUE, "eu", "" }, - { LANGUAGE_BULGARIAN, "bg", "BG" }, - { LANGUAGE_CZECH, "cs", "CZ" }, - { LANGUAGE_CZECH, "cz", "" }, - { LANGUAGE_ENGLISH_JAMAICA, "en", "JM" }, - { LANGUAGE_ENGLISH_CARRIBEAN, "en", "BS" }, // not 100%, because AG is Bahamas - { LANGUAGE_ENGLISH_BELIZE, "en", "BZ" }, - { LANGUAGE_ENGLISH_TRINIDAD, "en", "TT" }, - { LANGUAGE_ENGLISH_ZIMBABWE, "en", "ZW" }, - { LANGUAGE_ENGLISH_INDONESIA, "en", "ID" }, - { LANGUAGE_ESTONIAN, "et", "EE" }, - { LANGUAGE_FAEROESE, "fo", "FO" }, - { LANGUAGE_FARSI, "fa", "IR" }, - { LANGUAGE_FRENCH_LUXEMBOURG, "fr", "LU" }, - { LANGUAGE_FRENCH_MONACO, "fr", "MC" }, - { LANGUAGE_GERMAN_LUXEMBOURG, "de", "LU" }, - { LANGUAGE_GERMAN_LIECHTENSTEIN, "de", "LI" }, - { LANGUAGE_HEBREW, "he", "IL" }, // new: old was "iw" - { LANGUAGE_HEBREW, "iw", "IL" }, // old: new is "he" - { LANGUAGE_HUNGARIAN, "hu", "HU" }, - { LANGUAGE_ICELANDIC, "is", "IS" }, - { LANGUAGE_INDONESIAN, "id", "ID" }, // new: old was "in" - { LANGUAGE_INDONESIAN, "in", "ID" }, // old: new is "id" - { LANGUAGE_NORWEGIAN, "no", "NO" }, - { LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO" }, - { LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO" }, - { LANGUAGE_POLISH, "pl", "PL" }, - { LANGUAGE_RHAETO_ROMAN, "rm", "CH" }, - { LANGUAGE_ROMANIAN, "ro", "RO" }, - { LANGUAGE_ROMANIAN_MOLDOVA, "ro", "MD" }, - { LANGUAGE_SLOVAK, "sk", "SK" }, - { LANGUAGE_SLOVENIAN, "sl", "SI" }, - { LANGUAGE_SPANISH_MEXICAN, "es", "MX" }, - { LANGUAGE_SPANISH_GUATEMALA, "es", "GT" }, - { LANGUAGE_SPANISH_COSTARICA, "es", "CR" }, - { LANGUAGE_SPANISH_PANAMA, "es", "PA" }, - { LANGUAGE_SPANISH_DOMINICAN_REPUBLIC, "es", "DO" }, - { LANGUAGE_SPANISH_VENEZUELA, "es", "VE" }, - { LANGUAGE_SPANISH_COLOMBIA, "es", "CO" }, - { LANGUAGE_SPANISH_PERU, "es", "PE" }, - { LANGUAGE_SPANISH_ARGENTINA, "es", "AR" }, - { LANGUAGE_SPANISH_ECUADOR, "es", "EC" }, - { LANGUAGE_SPANISH_CHILE, "es", "CL" }, - { LANGUAGE_SPANISH_URUGUAY, "es", "UY" }, - { LANGUAGE_SPANISH_PARAGUAY, "es", "PY" }, - { LANGUAGE_SPANISH_BOLIVIA, "es", "BO" }, - { LANGUAGE_SPANISH_EL_SALVADOR, "es", "SV" }, - { LANGUAGE_SPANISH_HONDURAS, "es", "HN" }, - { LANGUAGE_SPANISH_NICARAGUA, "es", "NI" }, - { LANGUAGE_SPANISH_PUERTO_RICO, "es", "PR" }, - { LANGUAGE_SPANISH_UNITED_STATES, "es", "US" }, - { LANGUAGE_SPANISH_LATIN_AMERICA, "es", "" }, - { LANGUAGE_TURKISH, "tr", "TR" }, - { LANGUAGE_UKRAINIAN, "uk", "UA" }, - { LANGUAGE_VIETNAMESE, "vi", "VN" }, - { LANGUAGE_LATVIAN, "lv", "LV" }, - { LANGUAGE_MACEDONIAN, "mk", "MK" }, - { LANGUAGE_MALAY, "ms", "" }, - { LANGUAGE_MALAY_MALAYSIA, "ms", "MY" }, - { LANGUAGE_MALAY_BRUNEI_DARUSSALAM, "ms", "BN" }, - { LANGUAGE_ENGLISH_MALAYSIA, "en", "MY" }, - { LANGUAGE_THAI, "th", "TH" }, - { LANGUAGE_LITHUANIAN, "lt", "LT" }, - { LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT" }, - { LANGUAGE_CROATIAN, "hr", "HR" }, // Croatian in Croatia - { LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA" }, - { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA" }, -// { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_AND_HERZEGOVINA, "bs", "BA" }, // script codes not supported yet - { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS" }, // Serbian Cyrillic in Serbia - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU" }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_CS instead, sr_CS not supported by ICU 2.6 (3.4 does) - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS" }, // alias to be able to integrate localizations, rsc needs it - { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME" }, - { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA" }, - { LANGUAGE_SERBIAN, "sr", "" }, // SERBIAN is only LID, MS-LCID not defined (was dupe of CROATIAN) - { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS" }, // Serbian Latin in Serbia; kludge, needed to be sr_Latn_RS instead, script codes not supported yet - { LANGUAGE_SERBIAN_LATIN, "sh", "YU" }, // legacy Serbian Latin in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_Latn_CS instead, script codes not supported yet - { LANGUAGE_SERBIAN_LATIN, "sh", "CS" }, // Serbian Latin in Serbia and Montenegro; kludge, needed to be sr_Latn_CS instead, script codes not supported yet - { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME" }, // Serbian Latin in Montenegro; kludge, needed to be sr_Latn_ME instead, script codes not supported yet - { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA" }, - { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" }, // kludge, needed to be sr_Latn instead, script codes not supported yet - { LANGUAGE_ARMENIAN, "hy", "AM" }, - { LANGUAGE_AZERI, "az", "" }, - { LANGUAGE_AZERI_LATIN, "az", "AZ" }, -// { LANGUAGE_AZERI_CYRILLIC, "az", "AZ" }, // script codes not supported yet - { LANGUAGE_UZBEK_LATIN, "uz", "UZ" }, -// { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ" }, // script codes not supported yet - { LANGUAGE_BENGALI_BANGLADESH, "bn", "BD" }, - { LANGUAGE_BENGALI, "bn", "IN" }, - { LANGUAGE_BURMESE, "my", "MM" }, - { LANGUAGE_KAZAKH, "kk", "KZ" }, - { LANGUAGE_ENGLISH_INDIA, "en", "IN" }, - { LANGUAGE_URDU, "ur", "" }, - { LANGUAGE_URDU_INDIA, "ur", "IN" }, - { LANGUAGE_URDU_PAKISTAN, "ur", "PK" }, - { LANGUAGE_HINDI, "hi", "IN" }, - { LANGUAGE_GUJARATI, "gu", "IN" }, - { LANGUAGE_KANNADA, "kn", "IN" }, - { LANGUAGE_ASSAMESE, "as", "IN" }, - { LANGUAGE_KASHMIRI, "ks", "" }, - { LANGUAGE_KASHMIRI_INDIA, "ks", "IN" }, - { LANGUAGE_MALAYALAM, "ml", "IN" }, - { LANGUAGE_MANIPURI, "mni", "IN" }, - { LANGUAGE_MARATHI, "mr", "IN" }, - { LANGUAGE_KONKANI, "kok", "IN" }, - { LANGUAGE_NEPALI, "ne", "NP" }, - { LANGUAGE_NEPALI_INDIA, "ne", "IN" }, - { LANGUAGE_ORIYA, "or", "IN" }, - { LANGUAGE_PUNJABI, "pa", "IN" }, - { LANGUAGE_SANSKRIT, "sa", "IN" }, - { LANGUAGE_SINDHI, "sd", "IN" }, - { LANGUAGE_TAMIL, "ta", "IN" }, - { LANGUAGE_TELUGU, "te", "IN" }, - { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK" }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB - { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK" }, - { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK" }, - { LANGUAGE_BELARUSIAN, "be", "BY" }, - { LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default) - { LANGUAGE_CATALAN, "ca", "AD" }, // Andorra - { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content! - { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content! -// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back. - { LANGUAGE_FRENCH_CAMEROON, "fr", "CM" }, - { LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" }, - { LANGUAGE_FRENCH_MALI, "fr", "ML" }, - { LANGUAGE_FRENCH_SENEGAL, "fr", "SN" }, - { LANGUAGE_FRENCH_ZAIRE, "fr", "CD" }, // Democratic Republic Of Congo - { LANGUAGE_FRENCH_MOROCCO, "fr", "MA" }, - { LANGUAGE_FRENCH_REUNION, "fr", "RE" }, - { LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" }, - { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" }, // unknown ISO country code - { LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL" }, - { LANGUAGE_GAELIC_IRELAND, "ga", "IE" }, - { LANGUAGE_GAELIC_SCOTLAND, "gd", "GB" }, - { LANGUAGE_GALICIAN, "gl", "ES" }, - { LANGUAGE_GEORGIAN, "ka", "GE" }, - { LANGUAGE_KHMER, "km", "KH" }, - { LANGUAGE_KIRGHIZ, "ky", "KG" }, - { LANGUAGE_LAO, "lo", "LA" }, - { LANGUAGE_MALTESE, "mt", "MT" }, - { LANGUAGE_MONGOLIAN, "mn", "MN" }, // Cyrillic script - { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn", "MN" }, - { LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD" }, - { LANGUAGE_SWAHILI, "sw", "KE" }, - { LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ" }, - { LANGUAGE_TAJIK, "tg", "TJ" }, - { LANGUAGE_TIBETAN, "bo", "CN" }, // CN politically correct? - { LANGUAGE_DZONGKHA, "dz", "BT" }, - { LANGUAGE_TURKMEN, "tk", "TM" }, - { LANGUAGE_WELSH, "cy", "GB" }, - { LANGUAGE_SESOTHO, "st", "ZA" }, - { LANGUAGE_SEPEDI, "nso", "ZA" }, - { LANGUAGE_SEPEDI, "ns", "ZA" }, // fake "ns" for compatibility with existing OOo1.1.x localization to be able to read those documents - { LANGUAGE_TSONGA, "ts", "ZA" }, - { LANGUAGE_TSWANA, "tn", "ZA" }, - { LANGUAGE_ENGLISH_SAFRICA, "en", "ZA" }, - { LANGUAGE_AFRIKAANS, "af", "ZA" }, - { LANGUAGE_VENDA, "ve", "ZA" }, // default 639-1 - { LANGUAGE_VENDA, "ven", "ZA" }, // 639-2 may have been used temporarily since 2004-07-23 - { LANGUAGE_XHOSA, "xh", "ZA" }, - { LANGUAGE_ZULU, "zu", "ZA" }, - { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC" }, - { LANGUAGE_QUECHUA_PERU, "qu", "PE" }, - { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO" }, // macro: quh-BO, qul-BO - { LANGUAGE_PASHTO, "ps", "AF" }, - { LANGUAGE_OROMO, "om", "ET" }, - { LANGUAGE_DHIVEHI, "dv", "MV" }, - { LANGUAGE_UIGHUR_CHINA, "ug", "CN" }, - { LANGUAGE_TIGRIGNA_ETHIOPIA, "ti", "ET" }, - { LANGUAGE_TIGRIGNA_ERITREA, "ti", "ER" }, - { LANGUAGE_AMHARIC_ETHIOPIA, "am", "ET" }, - { LANGUAGE_GUARANI_PARAGUAY, "gug", "PY" }, - { LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US" }, - { LANGUAGE_EDO, "bin", "NG" }, - { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG" }, - { LANGUAGE_HAUSA_NIGERIA, "ha", "NG" }, - { LANGUAGE_USER_HAUSA_GHANA, "ha", "GH" }, - { LANGUAGE_IGBO_NIGERIA, "ig", "NG" }, - { LANGUAGE_KANURI_NIGERIA, "kr", "NG" }, - { LANGUAGE_YORUBA, "yo", "NG" }, - { LANGUAGE_SOMALI, "so", "SO" }, - { LANGUAGE_PAPIAMENTU, "pap", "AN" }, - { LANGUAGE_USER_PAPIAMENTU_ARUBA, "pap", "AW" }, - { LANGUAGE_USER_PAPIAMENTU_CURACAO, "pap", "CW" }, - { LANGUAGE_USER_PAPIAMENTU_BONAIRE, "pap", "BQ" }, - { LANGUAGE_ENGLISH_SINGAPORE, "en", "SG" }, - { LANGUAGE_USER_YIDDISH_US, "yi", "US" }, - { LANGUAGE_YIDDISH, "yi", "IL" }, // new: old was "ji" - { LANGUAGE_YIDDISH, "ji", "IL" }, // old: new is "yi" - { LANGUAGE_SYRIAC, "syr", "TR" }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC - { LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK" }, - { LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US" }, - { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA" }, -// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA" }, // script codes not supported yet - { LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO" }, - { LANGUAGE_SAMI_INARI, "smn", "FI" }, - { LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO" }, - { LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE" }, - { LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI" }, - { LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE" }, - { LANGUAGE_SAMI_SKOLT, "sms", "FI" }, - { LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO" }, - { LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE" }, - { LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU" }, - { LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL" }, - { LANGUAGE_CORSICAN_FRANCE, "co", "FR" }, - { LANGUAGE_ALSATIAN_FRANCE, "gsw", "FR" }, // in fact 'gsw' is Schwyzerduetsch (Swiss German), which is a dialect of Alemannic German, as is Alsatian. They aren't distinct languages and share this code. - { LANGUAGE_YAKUT_RUSSIA, "sah", "RU" }, - { LANGUAGE_MOHAWK_CANADA, "moh", "CA" }, - { LANGUAGE_BASHKIR_RUSSIA, "ba", "RU" }, - { LANGUAGE_KICHE_GUATEMALA, "qut", "GT" }, - { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF" }, - { LANGUAGE_WOLOF_SENEGAL, "wo", "SN" }, - { LANGUAGE_FILIPINO, "fil", "PH" }, - { LANGUAGE_USER_TAGALOG, "tl", "PH" }, - { LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH" }, -// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG" }, // ISO "nic" is only a collective language code - { LANGUAGE_YI, "ii", "CN" }, - { LANGUAGE_TAMAZIGHT_LATIN, "kab", "DZ" }, // In practice Kabyle is the language used for this - { LANGUAGE_OBSOLETE_USER_KABYLE, "kab", "DZ" }, - { LANGUAGE_TAMAZIGHT_LATIN, "ber", "DZ" }, // In practice Algeria has standardized on Kabyle as the member of the "ber" collective which gets used there. - { LANGUAGE_TAMAZIGHT_TIFINAGH, "ber", "MA" }, // Morocco is officially using Tifinagh for its Berber languages so store it to distinguish explicitly from LANGUAGE_TAMAZIGHT_LATIN, even though as a collective language its not of much use -// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" }, // ISO "ber" only collective! - { LANGUAGE_LATIN, "la", "VA" }, - { LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA" }, - { LANGUAGE_USER_ESPERANTO, "eo", "" }, - { LANGUAGE_USER_INTERLINGUA, "ia", "" }, - { LANGUAGE_MAORI_NEW_ZEALAND, "mi", "NZ" }, - { LANGUAGE_OBSOLETE_USER_MAORI, "mi", "NZ" }, - { LANGUAGE_KINYARWANDA_RWANDA, "rw", "RW" }, - { LANGUAGE_OBSOLETE_USER_KINYARWANDA, "rw", "RW" }, - { LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE" }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN - { LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE" }, - { LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE" }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE - { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE" }, - { LANGUAGE_OCCITAN_FRANCE, "oc", "FR" }, - { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR" }, - { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR" }, - { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY" }, - { LANGUAGE_USER_KURDISH_IRAQ, "ku", "IQ" }, - { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR" }, - { LANGUAGE_USER_SARDINIAN, "sc", "IT" }, // macrolanguage code - { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT" }, - { LANGUAGE_USER_SARDINIAN_GALLURESE, "sdn", "IT" }, - { LANGUAGE_USER_SARDINIAN_LOGUDORESE, "src", "IT" }, - { LANGUAGE_USER_SARDINIAN_SASSARESE, "sdc", "IT" }, - { LANGUAGE_BRETON_FRANCE, "br", "FR" }, - { LANGUAGE_OBSOLETE_USER_BRETON, "br", "FR" }, - { LANGUAGE_KALAALLISUT_GREENLAND, "kl", "GL" }, - { LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL" }, - { LANGUAGE_USER_SWAZI, "ss", "ZA" }, - { LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA" }, - { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW" }, - { LANGUAGE_USER_MOORE, "mos", "BF" }, - { LANGUAGE_USER_BAMBARA, "bm", "ML" }, - { LANGUAGE_USER_AKAN, "ak", "GH" }, - { LANGUAGE_LUXEMBOURGISH_LUXEMBOURG, "lb", "LU" }, - { LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH, "lb", "LU" }, - { LANGUAGE_USER_FRIULIAN, "fur", "IT" }, - { LANGUAGE_USER_FIJIAN, "fj", "FJ" }, - { LANGUAGE_USER_AFRIKAANS_NAMIBIA, "af", "NA" }, - { LANGUAGE_USER_ENGLISH_NAMIBIA, "en", "NA" }, - { LANGUAGE_USER_WALLOON, "wa", "BE" }, - { LANGUAGE_USER_COPTIC, "cop", "EG" }, - { LANGUAGE_USER_GASCON, "gsc", "FR" }, - { LANGUAGE_USER_GERMAN_BELGIUM, "de", "BE" }, - { LANGUAGE_USER_CHUVASH, "cv", "RU" }, - { LANGUAGE_USER_EWE_GHANA, "ee", "GH" }, - { LANGUAGE_USER_ENGLISH_GHANA, "en", "GH" }, - { LANGUAGE_USER_SANGO, "sg", "CF" }, - { LANGUAGE_USER_GANDA, "lg", "UG" }, - { LANGUAGE_USER_LINGALA_DRCONGO, "ln", "CD" }, - { LANGUAGE_USER_LOW_GERMAN, "nds", "DE" }, - { LANGUAGE_USER_HILIGAYNON, "hil", "PH" }, - { LANGUAGE_USER_ENGLISH_MALAWI, "en", "MW" }, /* en default for MW */ - { LANGUAGE_USER_NYANJA, "ny", "MW" }, - { LANGUAGE_USER_KASHUBIAN, "csb", "PL" }, - { LANGUAGE_USER_SPANISH_CUBA, "es", "CU" }, - { LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA, "qul", "BO" }, - { LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA, "quh", "BO" }, - { LANGUAGE_USER_BODO_INDIA, "brx", "IN" }, - { LANGUAGE_USER_DOGRI_INDIA, "dgo", "IN" }, - { LANGUAGE_USER_MAITHILI_INDIA, "mai", "IN" }, - { LANGUAGE_USER_SANTALI_INDIA, "sat", "IN" }, - { LANGUAGE_USER_TETUN, "tet", "ID" }, - { LANGUAGE_USER_TETUN_TIMOR_LESTE, "tet", "TL" }, - { LANGUAGE_USER_TOK_PISIN, "tpi", "PG" }, - { LANGUAGE_USER_SHUSWAP, "shs", "CA" }, - { LANGUAGE_USER_ANCIENT_GREEK, "grc", "GR" }, - { LANGUAGE_USER_ASTURIAN, "ast", "ES" }, - { LANGUAGE_USER_LATGALIAN, "ltg", "LV" }, - { LANGUAGE_USER_MAORE, "swb", "YT" }, - { LANGUAGE_USER_BUSHI, "buc", "YT" }, - { LANGUAGE_USER_TAHITIAN, "ty", "PF" }, - { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" }, - { LANGUAGE_USER_MALAGASY_PLATEAU, "mg", "MG" }, - { LANGUAGE_USER_BAFIA, "ksf", "CM" }, - { LANGUAGE_USER_GIKUYU, "ki", "KE" }, - { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" }, - { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" }, - { LANGUAGE_USER_LIMBU, "lif", "NP" }, - { LANGUAGE_USER_LOJBAN, "jbo", "" }, - { LANGUAGE_USER_HAITIAN, "ht", "HT" }, - { LANGUAGE_FRENCH_HAITI, "fr", "HT" }, - { LANGUAGE_USER_BEEMBE, "beq", "CG" }, - { LANGUAGE_USER_BEKWEL, "bkw", "CG" }, - { LANGUAGE_USER_KITUBA, "mkw", "CG" }, - { LANGUAGE_USER_LARI, "ldi", "CG" }, - { LANGUAGE_USER_MBOCHI, "mdw", "CG" }, - { LANGUAGE_USER_TEKE_EBOO, "ebo", "CG" }, - { LANGUAGE_USER_TEKE_IBALI, "tek", "CG" }, - { LANGUAGE_USER_TEKE_TYEE, "tyx", "CG" }, - { LANGUAGE_USER_VILI, "vif", "CG" }, - { LANGUAGE_USER_PORTUGUESE_ANGOLA, "pt", "AO" }, - { LANGUAGE_USER_MANX, "gv", "GB" }, - { LANGUAGE_USER_ARAGONESE, "an", "ES" }, - { LANGUAGE_USER_KEYID, "qtz", "" }, // key id pseudolanguage used for UI testing - { LANGUAGE_USER_PALI_LATIN, "pli", "" }, // Pali with Latin script - { LANGUAGE_USER_KYRGYZ_CHINA, "ky", "CN" }, - { LANGUAGE_USER_KOMI_ZYRIAN, "kpv", "RU" }, - { LANGUAGE_USER_KOMI_PERMYAK, "koi", "RU" }, - { LANGUAGE_USER_PITJANTJATJARA, "pjt", "AU" }, - { LANGUAGE_USER_ERZYA, "myv", "RU" }, - { LANGUAGE_USER_MARI_MEADOW, "mhr", "RU" }, - { LANGUAGE_USER_KHANTY, "kca", "RU" }, - { LANGUAGE_USER_LIVONIAN, "liv", "RU" }, - { LANGUAGE_USER_MOKSHA, "mdf", "RU" }, - { LANGUAGE_USER_MARI_HILL, "mrj", "RU" }, - { LANGUAGE_USER_NGANASAN, "nio", "RU" }, - { LANGUAGE_USER_OLONETS, "olo", "RU" }, - { LANGUAGE_USER_VEPS, "vep", "RU" }, - { LANGUAGE_USER_VORO, "vro", "EE" }, - { LANGUAGE_USER_NENETS, "yrk", "RU" }, - { LANGUAGE_USER_AKA, "axk", "CF" }, - { LANGUAGE_USER_AKA_CONGO, "axk", "CG" }, - { LANGUAGE_USER_DIBOLE, "bvx", "CG" }, - { LANGUAGE_USER_DOONDO, "dde", "CG" }, - { LANGUAGE_USER_KAAMBA, "xku", "CG" }, - { LANGUAGE_USER_KOONGO, "kng", "CD" }, - { LANGUAGE_USER_KOONGO_CONGO, "kng", "CG" }, - { LANGUAGE_USER_KUNYI, "njx", "CG" }, - { LANGUAGE_USER_NGUNGWEL, "ngz", "CG" }, - { LANGUAGE_USER_NJYEM, "njy", "CM" }, - { LANGUAGE_USER_NJYEM_CONGO, "njy", "CG" }, - { LANGUAGE_USER_PUNU, "puu", "GA" }, - { LANGUAGE_USER_PUNU_CONGO, "puu", "CG" }, - { LANGUAGE_USER_SUUNDI, "sdj", "CG" }, - { LANGUAGE_USER_TEKE_KUKUYA, "kkw", "CG" }, - { LANGUAGE_USER_TSAANGI, "tsa", "CG" }, - { LANGUAGE_USER_YAKA, "iyx", "CG" }, - { LANGUAGE_USER_YOMBE, "yom", "CD" }, - { LANGUAGE_USER_YOMBE_CONGO, "yom", "CG" }, - { LANGUAGE_USER_SIDAMA, "sid", "ET" }, - { LANGUAGE_MULTIPLE, "mul", "" }, // multiple languages, many languages are used - { LANGUAGE_UNDETERMINED, "und", "" }, // undetermined language, language cannot be identified - { LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information - { LANGUAGE_DONTKNOW, "", "" } // marks end of table -}; - -static MsLangId::IsoLangEntry aLastResortFallbackEntry = -{ LANGUAGE_ENGLISH_US, "en", "US" }; - -OUString MsLangId::IsoLangEntry::getTagString() const -{ - if (maCountry[0]) - return OUString( OUString::createFromAscii( maLangStr) + "-" + OUString::createFromAscii( maCountry)); - else - return OUString::createFromAscii( maLangStr); -} - -// ----------------------------------------------------------------------- - -// In this table are the countries which should mapped to a specific -// english language -static IsoLangEngEntry const aImplIsoLangEngEntries[] = -{ - { LANGUAGE_ENGLISH_UK, "AO" }, // Angola - { LANGUAGE_ENGLISH_UK, "BJ" }, // Benin - { LANGUAGE_ENGLISH_UK, "BW" }, // Botswana - { LANGUAGE_ENGLISH_UK, "BI" }, // Burundi - { LANGUAGE_ENGLISH_UK, "CM" }, // Cameroon - { LANGUAGE_ENGLISH_UK, "GA" }, // Gabon - { LANGUAGE_ENGLISH_UK, "GM" }, // Gambia - { LANGUAGE_ENGLISH_UK, "GH" }, // Ghana - { LANGUAGE_ENGLISH_UK, "GN" }, // Guinea - { LANGUAGE_ENGLISH_UK, "LS" }, // Lesotho - { LANGUAGE_ENGLISH_UK, "MW" }, // Malawi - { LANGUAGE_ENGLISH_UK, "MT" }, // Malta - { LANGUAGE_ENGLISH_UK, "NA" }, // Namibia - { LANGUAGE_ENGLISH_UK, "NG" }, // Nigeria - { LANGUAGE_ENGLISH_UK, "UG" }, // Uganda - { LANGUAGE_ENGLISH_UK, "ZM" }, // Zambia - { LANGUAGE_ENGLISH_UK, "ZW" }, // Zimbabwe - { LANGUAGE_ENGLISH_UK, "SZ" }, // Swaziland - { LANGUAGE_ENGLISH_UK, "NG" }, // Sierra Leone - { LANGUAGE_ENGLISH_UK, "KN" }, // Saint Kitts and Nevis - { LANGUAGE_ENGLISH_UK, "SH" }, // St. Helena - { LANGUAGE_ENGLISH_UK, "IO" }, // British Indian Oceanic Territory - { LANGUAGE_ENGLISH_UK, "FK" }, // Falkland Islands - { LANGUAGE_ENGLISH_UK, "GI" }, // Gibraltar - { LANGUAGE_ENGLISH_UK, "KI" }, // Kiribati - { LANGUAGE_ENGLISH_UK, "VG" }, // Virgin Islands - { LANGUAGE_ENGLISH_UK, "MU" }, // Mauritius - { LANGUAGE_ENGLISH_UK, "FJ" }, // Fiji - { LANGUAGE_ENGLISH_US, "KI" }, // Kiribati - { LANGUAGE_ENGLISH_US, "LR" }, // Liberia - { LANGUAGE_ENGLISH_US, "GU" }, // Guam - { LANGUAGE_ENGLISH_US, "MH" }, // Marshall Islands - { LANGUAGE_ENGLISH_US, "PW" }, // Palau - { LANGUAGE_ENGLISH_CARRIBEAN, "AI" }, // Anguilla - { LANGUAGE_ENGLISH_CARRIBEAN, "AG" }, // Antigua and Barbuda - { LANGUAGE_ENGLISH_CARRIBEAN, "BS" }, // Bahamas - { LANGUAGE_ENGLISH_CARRIBEAN, "BB" }, // Barbedos - { LANGUAGE_ENGLISH_CARRIBEAN, "BM" }, // Bermuda - { LANGUAGE_ENGLISH_CARRIBEAN, "KY" }, // Cayman Islands - { LANGUAGE_ENGLISH_CARRIBEAN, "GD" }, // Grenada - { LANGUAGE_ENGLISH_CARRIBEAN, "DM" }, // Dominica - { LANGUAGE_ENGLISH_CARRIBEAN, "HT" }, // Haiti - { LANGUAGE_ENGLISH_CARRIBEAN, "MS" }, // Montserrat - { LANGUAGE_ENGLISH_CARRIBEAN, "FM" }, // Micronesia - { LANGUAGE_ENGLISH_CARRIBEAN, "VC" }, // St. Vincent / Grenadines - { LANGUAGE_ENGLISH_CARRIBEAN, "LC" }, // Saint Lucia - { LANGUAGE_ENGLISH_CARRIBEAN, "TC" }, // Turks & Caicos Islands - { LANGUAGE_ENGLISH_CARRIBEAN, "GY" }, // Guyana - { LANGUAGE_ENGLISH_CARRIBEAN, "TT" }, // Trinidad and Tobago - { LANGUAGE_ENGLISH_AUS, "CX" }, // Christmas Islands - { LANGUAGE_ENGLISH_AUS, "CC" }, // Cocos (Keeling) Islands - { LANGUAGE_ENGLISH_AUS, "NF" }, // Norfolk Island - { LANGUAGE_ENGLISH_AUS, "PG" }, // Papua New Guinea - { LANGUAGE_ENGLISH_AUS, "SB" }, // Solomon Islands - { LANGUAGE_ENGLISH_AUS, "TV" }, // Tuvalu - { LANGUAGE_ENGLISH_AUS, "NR" }, // Nauru - { LANGUAGE_ENGLISH_NZ, "CK" }, // Cook Islands - { LANGUAGE_ENGLISH_NZ, "NU" }, // Niue - { LANGUAGE_ENGLISH_NZ, "TK" }, // Tokelau - { LANGUAGE_ENGLISH_NZ, "TO" }, // Tonga - { LANGUAGE_DONTKNOW, "" } // marks end of table -}; - -// ----------------------------------------------------------------------- - -static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] = -{ - { LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766 - { LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766 - { LANGUAGE_SERBIAN_LATIN, "sr", "latin" }, - { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" }, - { LANGUAGE_AZERI_LATIN, "az", "latin" }, - { LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" }, - { LANGUAGE_DONTKNOW, "", "" } // marks end of table -}; - -// ----------------------------------------------------------------------- - -// in this table are only names to find the best language -static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries2[] = -{ - { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmaal" }, - { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmal" }, - { LANGUAGE_NORWEGIAN_NYNORSK, "no", "nynorsk" }, - { LANGUAGE_DONTKNOW, "", "" } // marks end of table -}; - -// ----------------------------------------------------------------------- - -// in this table are only names to find the best language -static IsoLangOtherEntry const aImplOtherEntries[] = -{ - { LANGUAGE_ENGLISH_US, "c" }, - { LANGUAGE_CHINESE, "chinese" }, - { LANGUAGE_GERMAN, "german" }, - { LANGUAGE_JAPANESE, "japanese" }, - { LANGUAGE_KOREAN, "korean" }, - { LANGUAGE_ENGLISH_US, "posix" }, - { LANGUAGE_CHINESE_TRADITIONAL, "tchinese" }, - { LANGUAGE_DONTKNOW, NULL } // marks end of table -}; - - -// in this table are only privateuse names -static IsoLangOtherEntry const aImplPrivateUseEntries[] = -{ - { LANGUAGE_USER_PRIV_NOTRANSLATE, "x-no-translate" }, //! not BCP47 but legacy in .xcu configmgr - { LANGUAGE_USER_PRIV_DEFAULT, "x-default" }, - { LANGUAGE_USER_PRIV_COMMENT, "x-comment" }, - { LANGUAGE_USER_PRIV_JOKER, "*" }, //! not BCP47 but transferable in configmgr - { LANGUAGE_DONTKNOW, NULL } // marks end of table -}; - -// ======================================================================= - -// static -void MsLangId::Conversion::convertLanguageToIsoNames( LanguageType nLang, - OUString& rLangStr, OUString& rCountry ) -{ - if ( nLang == LANGUAGE_SYSTEM ) - nLang = MsLangId::getSystemLanguage(); - - // Search for LangID (in this table we find only defined ISO combinations) - const IsoLangEntry* pEntry = aImplIsoLangEntries; - do - { - if ( pEntry->mnLang == nLang ) - { - rLangStr = OUString::createFromAscii( pEntry->maLangStr ); - rCountry = OUString::createFromAscii( pEntry->maCountry ); - return; - } - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - // Search for LangID if we didn't find a specific ISO combination. - // All entries in this table are allowed for mime specifications, - // but not defined ISO combinations. - const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; - do - { - if ( pNoneStdEntry->mnLang == nLang ) - { - rLangStr = OUString::createFromAscii( pNoneStdEntry->maLangStr ); - rCountry = OUString::createFromAscii( pNoneStdEntry->maCountry ); - return; - } - ++pNoneStdEntry; - } - while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); - - // Look for privateuse definitions. - const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries; - do - { - if ( pPrivateEntry->mnLang == nLang ) - { - rLangStr = OUString::createFromAscii( pPrivateEntry->mpLangStr ); - rCountry = OUString(); - return; - } - ++pPrivateEntry; - } - while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW ); - - // not found - rLangStr = OUString(); - rCountry = OUString(); -} - -// ----------------------------------------------------------------------- - -// ----------------------------------------------------------------------- - -static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( LanguageType nLang ) -{ - LanguageType nPrimary = MsLangId::getPrimaryLanguage( nLang); - - // Search for LangID and remember first lang-only. - const MsLangId::IsoLangEntry* pFirstPrimary = NULL; - const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; - do - { - if (pEntry->mnLang == nLang) - { - if (*pEntry->maCountry) - return *pEntry; - switch (nLang) - { - // These are known to have no country assigned. - case LANGUAGE_BASQUE: - case LANGUAGE_USER_ESPERANTO: - case LANGUAGE_USER_INTERLINGUA: - case LANGUAGE_USER_LOJBAN: - return *pEntry; - default: - ; // nothing - } - } - if (!pFirstPrimary && - MsLangId::getPrimaryLanguage( pEntry->mnLang) == nPrimary) - pFirstPrimary = pEntry; - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - // Language not found at all => use default. - if (!pFirstPrimary) - return aLastResortFallbackEntry; - - // Search for first entry of primary language with any country. - pEntry = pFirstPrimary; - do - { - if (MsLangId::getPrimaryLanguage( pEntry->mnLang) == nLang) - { - if (*pEntry->maCountry) - return *pEntry; - } - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - return aLastResortFallbackEntry; -} - -// static -LanguageType MsLangId::Conversion::lookupFallbackLanguage( LanguageType nLang ) -{ - return lcl_lookupFallbackEntry( nLang).mnLang; -} - - -// static -::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale( LanguageType nLang ) -{ - const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( nLang); - return ::com::sun::star::lang::Locale( - OUString::createFromAscii( rEntry.maLangStr), - OUString::createFromAscii( rEntry.maCountry), - OUString()); -} - -// ----------------------------------------------------------------------- - -static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( - const ::com::sun::star::lang::Locale & rLocale ) -{ - // language is lower case in table - OUString aLowerLang = rLocale.Language.toAsciiLowerCase(); - // country is upper case in table - OUString aUpperCountry = rLocale.Country.toAsciiUpperCase(); - sal_Int32 nCountryLen = aUpperCountry.getLength(); - - // Search for locale and remember first lang-only. - const MsLangId::IsoLangEntry* pFirstLang = NULL; - const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; - do - { - if (aLowerLang.equalsAscii( pEntry->maLangStr)) - { - if (*pEntry->maCountry) - { - if (nCountryLen && aUpperCountry.equalsAscii( pEntry->maCountry)) - return *pEntry; - } - else - { - switch (pEntry->mnLang) - { - // These are known to have no country assigned. - case LANGUAGE_BASQUE: - case LANGUAGE_USER_ESPERANTO: - case LANGUAGE_USER_INTERLINGUA: - case LANGUAGE_USER_LOJBAN: - return *pEntry; - default: - ; // nothing - } - } - if (!pFirstLang) - pFirstLang = pEntry; - } - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - // Language not found at all => use default. - if (!pFirstLang) - return aLastResortFallbackEntry; - - // Search for first entry of language with any country. - pEntry = pFirstLang; - do - { - if (aLowerLang.equalsAscii( pEntry->maLangStr)) - { - if (*pEntry->maCountry) - return *pEntry; - } - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - return aLastResortFallbackEntry; -} - - -// static -::com::sun::star::lang::Locale MsLangId::Conversion::lookupFallbackLocale( - const ::com::sun::star::lang::Locale & rLocale ) -{ - const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( rLocale); - return ::com::sun::star::lang::Locale( - OUString::createFromAscii( rEntry.maLangStr), - OUString::createFromAscii( rEntry.maCountry), - OUString()); -} - -// ======================================================================= - -// static -LanguageType MsLangId::Conversion::convertPrivateUseToLanguage( const OUString& rPriv ) -{ - const IsoLangOtherEntry* pPrivateEntry = aImplPrivateUseEntries; - do - { - if ( rPriv.equalsIgnoreAsciiCaseAscii( pPrivateEntry->mpLangStr ) ) - return pPrivateEntry->mnLang; - ++pPrivateEntry; - } while ( pPrivateEntry->mnLang != LANGUAGE_DONTKNOW ); - return LANGUAGE_DONTKNOW; -} - - -// static -LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OUString& rLang, - const OUString& rCountry ) -{ - // language is lower case in table - OUString aLowerLang = rLang.toAsciiLowerCase(); - // country is upper case in table - OUString aUpperCountry = rCountry.toAsciiUpperCase(); - - // first look for exact match - const IsoLangEntry* pFirstLang = NULL; - const IsoLangEntry* pEntry = aImplIsoLangEntries; - do - { - if ( aLowerLang.equalsAscii( pEntry->maLangStr ) ) - { - if ( aUpperCountry.isEmpty() || - aUpperCountry.equalsAscii( pEntry->maCountry ) ) - return pEntry->mnLang; - if ( !pFirstLang ) - pFirstLang = pEntry; - else if ( !*pEntry->maCountry ) - pFirstLang = pEntry; - } - ++pEntry; - } - while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); - - // some eng countries should be mapped to a specific english language - if ( aLowerLang == "en" ) - { - const IsoLangEngEntry* pEngEntry = aImplIsoLangEngEntries; - do - { - if ( aUpperCountry.equalsAscii( pEngEntry->maCountry ) ) - return pEngEntry->mnLang; - ++pEngEntry; - } - while ( pEngEntry->mnLang != LANGUAGE_DONTKNOW ); - } - - // test for specific languages which are not used standard ISO 3166 codes - const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; - do - { - if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) - { - // The countries in this table are not all in upper case - if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) - return pNoneStdEntry->mnLang; - } - ++pNoneStdEntry; - } - while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); - pNoneStdEntry = aImplIsoNoneStdLangEntries2; - do - { - if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) - { - // The countries in this table are not all in upper case - if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) - return pNoneStdEntry->mnLang; - } - ++pNoneStdEntry; - } - while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); - - // If the language is correct, than we return the default language - if ( pFirstLang ) - return pFirstLang->mnLang; - - // if only the country is set, look for any entry matching the country - // (to allow reading country and language in separate steps, in any order) - if ( !rCountry.isEmpty() && rLang.isEmpty() ) - { - const IsoLangEntry* pEntry2 = aImplIsoLangEntries; - do - { - if ( aUpperCountry.equalsAscii( pEntry2->maCountry ) ) - return pEntry2->mnLang; - ++pEntry2; - } - while ( pEntry2->mnLang != LANGUAGE_DONTKNOW ); - - aLowerLang = aUpperCountry.toAsciiLowerCase(); - } - - // Look for privateuse definitions. - LanguageType nLang = convertPrivateUseToLanguage( aLowerLang); - if (nLang != LANGUAGE_DONTKNOW) - return nLang; - - // Now look for all other definitions, which are not standard - const IsoLangOtherEntry* pOtherEntry = aImplOtherEntries; - do - { - if ( aLowerLang.equalsAscii( pOtherEntry->mpLangStr ) ) - return pOtherEntry->mnLang; - ++pOtherEntry; - } - while ( pOtherEntry->mnLang != LANGUAGE_DONTKNOW ); - - return LANGUAGE_DONTKNOW; -} - -// ----------------------------------------------------------------------- - -// static -LanguageType MsLangId::Conversion::convertIsoNamesToLanguage( const OString& rLang, - const OString& rCountry ) -{ - OUString aLang = OStringToOUString( rLang, RTL_TEXTENCODING_ASCII_US); - OUString aCountry = OStringToOUString( rCountry, RTL_TEXTENCODING_ASCII_US); - return convertIsoNamesToLanguage( aLang, aCountry); -} - -// ----------------------------------------------------------------------- - -struct IsoLangGLIBCModifiersEntry -{ - LanguageType mnLang; - sal_Char maLangStr[4]; - sal_Char maCountry[3]; - sal_Char maAtString[9]; -}; - -static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] = -{ - // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier - { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" }, - { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia - { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro - { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro - { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" }, - { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" }, - { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" }, - { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table -}; - -// convert a unix locale string into LanguageType - -// static -LanguageType MsLangId::convertUnxByteStringToLanguage( - const OString& rString ) -{ - OString aLang; - OString aCountry; - OString aAtString; - - sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' ); - sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' ); - sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' ); - - if (nCountrySepPos < 0) - nCountrySepPos = nAtPos; - if (nCountrySepPos < 0) - nCountrySepPos = rString.getLength(); - - if (nAtPos >= 0) - aAtString = rString.copy( nAtPos+1 ); - - if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos)) - || ((nLangSepPos < 0)) ) - { - // eg. "el.sun_eu_greek", "tchinese", "es.ISO8859-15" - aLang = rString.copy( 0, nCountrySepPos ); - } - else if ( nLangSepPos >= 0 ) - { - // well formed iso names like "en_US.UTF-8", "sh_BA.ISO8859-2@bosnia" - aLang = rString.copy( 0, nLangSepPos ); - aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1); - } - - // if there is a glibc modifier, first look for exact match in modifier table - if (!aAtString.isEmpty()) - { - // language is lower case in table - OString aLowerLang = aLang.toAsciiLowerCase(); - // country is upper case in table - OString aUpperCountry = aCountry.toAsciiUpperCase(); - const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries; - do - { // avoid embedded \0 warning - if (( aLowerLang.equals( static_cast< const char* >( pGLIBCModifiersEntry->maLangStr ))) && - ( aAtString.equals( static_cast< const char* >( pGLIBCModifiersEntry->maAtString )))) - { - if ( aUpperCountry.isEmpty() || - aUpperCountry.equals( static_cast< const char* >( pGLIBCModifiersEntry->maCountry ))) - { - return pGLIBCModifiersEntry->mnLang; - } - } - ++pGLIBCModifiersEntry; - } - while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW ); - } - - return Conversion::convertIsoNamesToLanguage( aLang, aCountry ); -} - -// ----------------------------------------------------------------------- -// pass one IsoLangEntry to the outer world of the resource compiler - -// static -const MsLangId::IsoLangEntry* MsLangId::getIsoLangEntry( size_t nIndex ) -{ - if (nIndex < SAL_N_ELEMENTS(aImplIsoLangEntries)) - return &aImplIsoLangEntries[ nIndex]; - return 0; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/isolang/langid.pl b/i18npool/source/isolang/langid.pl deleted file mode 100755 index b57c93629aa8..000000000000 --- a/i18npool/source/isolang/langid.pl +++ /dev/null @@ -1,409 +0,0 @@ -: # -*- perl -*- vim: ft=perl -eval 'exec perl -w -S $0 ${1+"$@"}' -if 0; -# -# This file is part of the LibreOffice project. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This file incorporates work covered by the following license notice: -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed -# with this work for additional information regarding copyright -# ownership. The ASF licenses this file to you under the Apache -# License, Version 2.0 (the "License"); you may not use this file -# except in compliance with the License. You may obtain a copy of -# the License at http://www.apache.org/licenses/LICENSE-2.0 . -# - -# See Usage() below or invoke without arguments for short instructions. -# For long instructions use the source, Luke ;-) - -use strict; - -sub Usage() -{ - print STDERR - "\n", - "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", - "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", - "listbox entries, langlist.mk, file_ooo.scp registry name, languages.pm and\n", - "msi-encodinglist.txt\n\n", - - "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", - - "A language string will be used as a generic string match in all searched files.\n", - "You may enclose the language string in word delimiters,\n", - "e.g. \\blanguage_german\\b for a specific match.\n", - "If the language string expression matches more than one define,\n", - "e.g. as in 'german', all matching defines will be processed.\n", - "If the language string does not match a define or an identifier in\n", - "langtab.src, a generic string match of the listbox entries will be tried.\n\n", - - "Numeric values of LangID,primarylanguage,sublanguage can be given\n", - "decimal, hexadecimal (leading 0x), octal (leading 0) or binary (leading 0b).\n", - "The exact language_define of an exact match will be used in remaining lookups.\n\n", - - "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n", - "for example: 'en-US' or 'de-' or '-CH',\n", - "xx and YY can be given case insensitive, will be lowered-uppered internally,\n", - "and xx and YY themselves may be regular expressions.\n", - "Also here a list of matches will be processed.\n\n", - - "If option --single is given, only the first match will be processed.\n\n"; -} - -my $SOLARVERSION = $ENV{"SOLARVERSION"}; -my $INPATH = $ENV{"INPATH"}; -my $SRC_ROOT = $ENV{"SRC_ROOT"}; -my $UPDMINOREXT = $ENV{"UPDMINOREXT"}; -if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT)) -{ - print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n"; - Usage(); - exit 1; -} -if (!defined($UPDMINOREXT)) { - $UPDMINOREXT = ''; -} -my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT"; - -my $LANGUAGE_MASK_PRIMARY = 0x03ff; - -sub getPrimaryLanguage($) -{ - my($lcid) = @_; - return $lcid & $LANGUAGE_MASK_PRIMARY; -} - -sub getSubLanguage($) -{ - my($lcid) = @_; - return $lcid >> 10; -} - -sub makeLangID($$) -{ - my( $sub, $pri) = @_; - return ($sub << 10) | $pri; -} - - -sub grepFile($$$$@) -{ - my( $regex, $path, $module, $name, @addregex) = @_; - my @result; - my $found = 0; - my $areopen = 0; - my $arecloser = ''; - my $file; - # Try module under current working directory first to catch local - # modifications. A Not yet delivered lang.h is a special case. - if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") { - $file = "./$module/inc/i18npool/lang.h"; } - else { - $file = "./$module/$name"; } - if (!($found = open( IN, $file))) - { - # Then with the given path. - $file = "$path/$module/$name"; - if (!($found = open( IN, $file))) - { - print "No $file\n"; - $file = "$path/$module.lnk/$name"; - if (!($found = open( IN, $file))) { - print "No $file.\n"; - $file = "$path/$module.link/$name"; - if (!($found = open( IN, $file))) { - print "No $file either.\n"; } - } - } - } - if ($found) - { - $found = 0; - while (my $line = <IN>) - { - if ($line =~ /$regex/) - { - if (!$found) - { - $found = 1; - print "$file:\n"; - } - chomp( $line); - print "$line\n"; - push( @result, $line); - } - elsif (@addregex) - { - # By convention first element is opener, second element is closer. - if (!$areopen) - { - if ($line =~ /$addregex[0]/) - { - $areopen = 1; - $arecloser = $addregex[1]; - } - } - if ($areopen) - { - for (my $i = 2; $i < @addregex; ++$i) - { - if ($line =~ /$addregex[$i]/) - { - if (!$found) - { - $found = 1; - print "$file:\n"; - } - chomp( $line); - print "$line\n"; - push( @result, $line); - } - } - if ($line =~ /$arecloser/) - { - $areopen = 0; - } - } - } - } - close( IN); - } - if (!$found) { - print "Not found in $file\n"; - #print "Not found in $file for $regex @addregex\n"; - } - return @result; -} - - -sub main() -{ - my( $lcid, @parts, $grepdef, $options, $single); - $grepdef = 0; - $single = 0; - for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options) - { - if ($ARGV[$options] eq '--single') { $single = 1; } - else { print "Unknown option: $ARGV[$options]\n"; } - } - if (@ARGV == 1 + $options) - { - # 0x hex, 0b bin, 0 oct - if ($ARGV[$options] =~ /^0/) { - $lcid = oct( $ARGV[0]); } - elsif ($ARGV[$options] =~ /^[0-9]/) { - $lcid = $ARGV[$options]; } - else - { - $grepdef = $ARGV[$options]; - $lcid = 0; - } - $parts[0] = getPrimaryLanguage( $lcid); - $parts[1] = getSubLanguage( $lcid); - } - elsif (@ARGV == 2 + $options) - { - for (my $i = $options; $i < 2 + $options; ++$i) - { - if ($ARGV[$i] =~ /^0/) { - $parts[$i] = oct( $ARGV[$i]); } - else { - $parts[$i] = $ARGV[$i]; } - } - $lcid = makeLangID( $parts[1], $parts[0]); - } - else - { - Usage(); - return 1; - } - my $modifier = "(?i)"; - my (@resultlist, @greplist, $result); - # If no string was given on the command line, but value(s) were, lookup the - # LangID value to obtain the define identifier. - if ($grepdef) - { - # #define LANGUAGE_AFRIKAANS 0x0436 - @resultlist = grepFile( - $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, - $SOLENVINC, "i18npool", "lang.h", ()); - } - else - { - printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid, - $lcid, $parts[0], $parts[1]); - my $buf = sprintf( "0x%04X", $lcid); - @resultlist = grepFile( - '^\s*#\s*define\s+\w+\s+' . $buf, - $SOLENVINC, "i18npool", "lang.h", ()); - } - for $result (@resultlist) - { - # #define LANGUAGE_AFRIKAANS 0x0436 - if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) - { - push( @greplist, '\b' . $1 . '\b'); - $modifier = ""; # complete identifier now case sensitive - if ($single) { - last; } - } - } - # If the string given is of the form xx-yy lookup a language,country pair - # to obtain the define identifier. xx and yy themselfs may be regexps. - # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy' - if ($grepdef =~ /^(.*)-$/) { - $grepdef = $1 . "-.*"; } - if ($grepdef =~ /^-(.*)$/) { - $grepdef = ".*-" . $1; } - if ($grepdef =~ /^(.*)-(.*)$/) - { - my $lang = $1; - my $coun = $2; - $lang = lc($lang); - $coun = uc($coun); - # { LANGUAGE_AFRIKAANS, "af", "ZA" }, - @resultlist = grepFile( - '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,', - "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); - for $result (@resultlist) - { - if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) - { - push( @greplist, '\b' . $1 . '\b'); - $modifier = ""; # complete identifier now case sensitive - if ($single) { - last; } - } - } - $grepdef = 0; - } - if (!@greplist && $grepdef) { - push( @greplist, $grepdef); } - for $grepdef (@greplist) - { - print "\nUsing: " . $grepdef . "\n"; - - # Decimal LCID, was needed for Langpack.ulf but isn't used anymore, - # keep just in case we'd need it again. - # #define LANGUAGE_AFRIKAANS 0x0436 - @resultlist = grepFile( - $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, - $SOLENVINC, "i18npool", "lang.h", ()); - my @lcidlist; - for $result (@resultlist) - { - # #define LANGUAGE_AFRIKAANS 0x0436 - if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) - { - push( @lcidlist, oct( $2)); - } - } - - # { LANGUAGE_AFRIKAANS, "af", "ZA" }, - @resultlist = grepFile( - $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,', - "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); - - my @langcoungreplist; - for $result (@resultlist) - { - if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) - { - my $lang = $1; - my $coun = $2; - my $loca; - if ($coun) - { - $loca = $lang . "_" . $coun; - push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); - } - else - { - $loca = $lang; - $coun = ""; - push( @langcoungreplist, '\b' . $lang . '\b'); - } - my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; - my $found; - if (!($found = open( LD, $file))) - { - $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml"; - if (!($found = open( LD, $file))) - { - $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml"; - $found = open( LD, $file); - } - } - if ($found) - { - print "Found $file:\n"; - my $on = 0; - while (my $line = <LD>) - { - if ($line =~ /<(Language|Country)>/) { - $on = 1; } - if ($on) { - print $line; } - if ($line =~ /<\/(Language|Country)>/) { - $on = 0; } - } - close( LD); - } - else { - print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; } - } - } - - # case LANGUAGE_ARABIC: - grepFile( - $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:', - "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ()); - - # With CWS 'langstatusbar' the language listbox resource file gets a new location. - my $module = "svx"; - my $name = "source/dialog/langtab.src"; - if (!(-e "$SRC_ROOT/$module/$name")) { - $module = "svtools"; - $name = "source/misc/langtab.src"; - } - # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ; - # lookup define - @resultlist = grepFile( - $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;', - "$SRC_ROOT", $module, $name, ()); - # lookup string - if (!@resultlist) { - grepFile( - $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;', - "$SRC_ROOT", $module, $name, ()); } - - for my $langcoun (@langcoungreplist) - { - # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu"; - grepFile( - '^\s*Name\s*\(' . $langcoun . '\)\s*=', - "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); - - # completelangiso=af ar as-IN ... zu - grepFile( - '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', - "$SRC_ROOT", "solenv", "inc/langlist.mk", - # needs a duplicated pair of backslashes to produce a literal \\ - ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); - - # af 1252 1078 # Afrikaans - grepFile( - '^\s*' . $langcoun . '', - "$SRC_ROOT", "l10ntools", "source/ulfconv/msi-encodinglist.txt", ()); - } - } - return 0; -} - -main(); diff --git a/i18npool/source/isolang/lcid.awk b/i18npool/source/isolang/lcid.awk deleted file mode 100644 index 371cd64106d6..000000000000 --- a/i18npool/source/isolang/lcid.awk +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/awk -f -# -# This file is part of the LibreOffice project. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This file incorporates work covered by the following license notice: -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed -# with this work for additional information regarding copyright -# ownership. The ASF licenses this file to you under the Apache -# License, Version 2.0 (the "License"); you may not use this file -# except in compliance with the License. You may obtain a copy of -# the License at http://www.apache.org/licenses/LICENSE-2.0 . -# -# Utility to compare MS-LANGID definitions with those defined in ../../inc/i18npool/lang.h -# Run in i18npool/source/isolang -# -# outputs new #define LANGUAGE_... 0x... and also some commented out substrings -# that were matched in already existing defines. -# -# ATTENTION! The sed filter in the command line examples below assures that a -# '|' border is drawn by html2text in data tables, and nowhere else, on which -# this awk script relies. This script also heavily relies on the column layout -# encountered. Should MS decide to change their layout or their CSS names -# ("data..."), this would probably break. Should html2text decide that the last -# border="..." attribute encountered wins instead of the first, this may break -# also. -# -# sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' -# -# After html2text best if file cleaned up to _only_ contain the table entries, -# but not necessary, entries are filtered. Check output. -# -# Expects input from the saved page of one of -# -# (1) -# http://www.microsoft.com/globaldev/reference/lcid-all.mspx -# filtered through ``html2text -nobs ...'', generated table: -# blank,name,hex,dec,blank fields: -# |Afrikaans_-_South_Africa___|0436___|1078___| -# -# complete command line: -# lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile -# -# -# (2) -# http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx -# filtered through ``html2text -nobs ...'', generated table: -# blank,name,hex,dec,inputlocales,collection,blank fields: -# |Afrikaans |0436 |1078 |0436:00000409, |Basic | -# -# complete command line: -# lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile -# -# -# (3) -# http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp -# filtered through ``html2text -nobs ...'', generated table: -# blank,hex,locale,name,blank fields: -# |0x0436___|af-ZA___|Afrikaans_(South_Africa)___| -# -# complete command line: -# lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile -# -# Author: Eike Rathke <erack@sun.com>, <er@openoffice.org> -# - -BEGIN { - while ((getline < "../../inc/i18npool/lang.h") > 0) - { - if ($0 ~ /^#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/) - { - # lang[HEX]=NAME - lang[toupper(substr($3,3))] = toupper($2) - #print substr($3,3) "=" $2 - } - } - # html2text table follows - FS = "\|" - filetype = 0 - lcid_all = 1 - xp_lcid = 2 - nls_238z = 3 - filetypename[filetype] = "unknown" - filetypename[lcid_all] = "lcid_all" - filetypename[xp_lcid] = "xp_lcid" - filetypename[nls_238z] = "nls_238z" - namefield[lcid_all] = 2 - namefield[xp_lcid] = 2 - namefield[nls_238z] = 4 - hexfield[lcid_all] = 3 - hexfield[xp_lcid] = 3 - hexfield[nls_238z] = 2 - locfield[lcid_all] = 0 - locfield[xp_lcid] = 0 - locfield[nls_238z] = 3 -} - -(NF < 5) { next } - -!filetype { - if (NF == 5) - { - if ($2 ~ /^0x/) - filetype = nls_238z - else if ($2 ~ /^Afrikaans/) - filetype = lcid_all - } - else if (NF == 7) - filetype = xp_lcid - if (!filetype) - next - name = namefield[filetype] - hex = hexfield[filetype] - loc = locfield[filetype] -} - -{ - gsub( /^[^:]*:/, "", $name) - gsub( /\..*/, "", $name) - gsub( /(^[ _]+)|([ _]+$)/, "", $hex) - gsub( /(^[ _]+)|([ _]+$)/, "", $name) - if (loc) - gsub( /(^[ _]+)|([ _]+$)/, "", $loc) -} - -($hex ~ /^0x/) { $hex = substr( $hex, 3) } - -# if only 464 instead of 0464, make it match lang.h -(length($hex) < 4) { $hex = "0" $hex } - -($hex !~ /^[0-9a-fA-F][0-9a-fA-F]*$/) { filtered[$hex] = $0; next } - -# all[HEX]=string -{ all[toupper($hex)] = $name } - -(loc) { comment[toupper($hex)] = " /* " $loc " */" } - -# new hex: newlang[HEX]=string -!(toupper($hex) in lang) { newlang[toupper($hex)] = $name } - -END { - if (!filetype) - { - print "No file type recognized." >>"/dev/stderr" - exit(1) - } - print "// assuming " filetypename[filetype] " file" - # every new language - for (x in newlang) - { - printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang[x], x, comment[x]) - n = split(newlang[x],arr,/[^A-Za-z0-9]/) - def = "" - for (i=1; i<=n; ++i) - { - if (length(arr[i])) - { - # each identifier word of the language name - if (def) - def = def "_" - aup = toupper(arr[i]) - def = def aup - for (l in lang) - { - # contained in already existing definitions? - if (lang[l] ~ aup) - printf( "// %-50s %s\n", arr[i] ": " lang[l], l) - } - } - } - printf( "#define LANGUAGE_%-26s 0x%s\n", def, x) - } - print "\n// --- reverse check follows ----------------------------------\n" - for (x in lang) - { - if (!(x in all)) - print "// not in input file: " x " " lang[x] - } - print "\n// --- filtered table entries follow (if any) -----------------\n" - for (x in filtered) - print "// filtered: " x " " filtered[x] -} diff --git a/i18npool/source/isolang/mslangid.cxx b/i18npool/source/isolang/mslangid.cxx deleted file mode 100644 index 12cf5a3ad1da..000000000000 --- a/i18npool/source/isolang/mslangid.cxx +++ /dev/null @@ -1,492 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - -#include <sal/config.h> -#include <rtl/ustring.hxx> -#include <rtl/string.hxx> -#include <com/sun/star/i18n/ScriptType.hpp> - -#include "i18npool/mslangid.hxx" - - -LanguageType MsLangId::nConfiguredSystemLanguage = LANGUAGE_SYSTEM; -LanguageType MsLangId::nConfiguredSystemUILanguage = LANGUAGE_SYSTEM; - -LanguageType MsLangId::nConfiguredWesternFallback = LANGUAGE_SYSTEM; -LanguageType MsLangId::nConfiguredAsianFallback = LANGUAGE_SYSTEM; -LanguageType MsLangId::nConfiguredComplexFallback = LANGUAGE_SYSTEM; - -// static -void MsLangId::setConfiguredSystemLanguage( LanguageType nLang ) -{ - nConfiguredSystemLanguage = nLang; -} - - -// static -void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang ) -{ - nConfiguredSystemUILanguage = nLang; -} - -// static -void MsLangId::setConfiguredWesternFallback( LanguageType nLang ) -{ - nConfiguredWesternFallback = nLang; -} - -// static -void MsLangId::setConfiguredAsianFallback( LanguageType nLang ) -{ - nConfiguredAsianFallback = nLang; -} - -// static -void MsLangId::setConfiguredComplexFallback( LanguageType nLang ) -{ - nConfiguredComplexFallback = nLang; -} - -// static -inline LanguageType MsLangId::simplifySystemLanguages( LanguageType nLang ) -{ - switch (nLang) - { - case LANGUAGE_PROCESS_OR_USER_DEFAULT : - case LANGUAGE_SYSTEM_DEFAULT : - case LANGUAGE_SYSTEM : - nLang = LANGUAGE_SYSTEM; - break; - default: - ; // nothing - } - return nLang; -} - -// static -LanguageType MsLangId::getRealLanguage( LanguageType nLang ) -{ - switch (simplifySystemLanguages( nLang)) - { - case LANGUAGE_SYSTEM : - if (nConfiguredSystemLanguage == LANGUAGE_SYSTEM) - nLang = getSystemLanguage(); - else - nLang = nConfiguredSystemLanguage; - break; - case LANGUAGE_HID_HUMAN_INTERFACE_DEVICE : - if (nConfiguredSystemUILanguage == LANGUAGE_SYSTEM) - nLang = getSystemUILanguage(); - else - nLang = nConfiguredSystemUILanguage; - break; - default: - /* TODO: would this be useful here? */ - //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); - ; // nothing - } - if (nLang == LANGUAGE_DONTKNOW) - nLang = LANGUAGE_ENGLISH_US; - return nLang; -} - - -// static -LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType ) -{ - if (nLang == LANGUAGE_NONE) - return nLang; - - nLang = getRealLanguage(nLang); - if (nType != ::com::sun::star::i18n::ScriptType::WEAK && getScriptType(nLang) != nType) - { - switch(nType) - { - case ::com::sun::star::i18n::ScriptType::ASIAN: - if (nConfiguredAsianFallback == LANGUAGE_SYSTEM) - nLang = LANGUAGE_CHINESE_SIMPLIFIED; - else - nLang = nConfiguredAsianFallback; - break; - case ::com::sun::star::i18n::ScriptType::COMPLEX: - if (nConfiguredComplexFallback == LANGUAGE_SYSTEM) - nLang = LANGUAGE_HINDI; - else - nLang = nConfiguredComplexFallback; - break; - default: - if (nConfiguredWesternFallback == LANGUAGE_SYSTEM) - nLang = LANGUAGE_ENGLISH_US; - else - nLang = nConfiguredWesternFallback; - break; - } - } - return nLang; -} - -// static -void MsLangId::Conversion::convertLanguageToLocale( LanguageType nLang, - ::com::sun::star::lang::Locale & rLocale ) -{ - if (!rLocale.Variant.isEmpty()) - rLocale.Variant = OUString(); - convertLanguageToIsoNames( nLang, rLocale.Language, rLocale.Country); - /* FIXME: this x-... is temporary until conversion will be moved up to - * LanguageTag. Also handle the nasty "*" joker as privateuse. */ - if (rLocale.Language.startsWith( "x-") || (rLocale.Language == "*")) - { - rLocale.Variant = rLocale.Language; - rLocale.Language = "qlt"; - } -} - - -// static -::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocale( - LanguageType nLang, bool bResolveSystem ) -{ - ::com::sun::star::lang::Locale aLocale; - if (!bResolveSystem && simplifySystemLanguages( nLang) == LANGUAGE_SYSTEM) - ; // nothing => empty locale - else - { - // Still resolve LANGUAGE_DONTKNOW if resolving is not requested, - // but not LANGUAGE_SYSTEM or others. - if (bResolveSystem || nLang == LANGUAGE_DONTKNOW) - nLang = MsLangId::getRealLanguage( nLang); - convertLanguageToLocale( nLang, aLocale); - } - return aLocale; -} - - -// static -LanguageType MsLangId::Conversion::convertLocaleToLanguage( - const ::com::sun::star::lang::Locale& rLocale ) -{ - // empty language => LANGUAGE_SYSTEM - if (rLocale.Language.isEmpty()) - return LANGUAGE_SYSTEM; - - /* FIXME: this x-... is temporary until conversion will be moved up to - * LanguageTag. Also handle the nasty "*" joker as privateuse. */ - LanguageType nRet = ((!rLocale.Variant.isEmpty() && - (rLocale.Variant.startsWithIgnoreAsciiCase( "x-") || (rLocale.Variant == "*"))) ? - convertPrivateUseToLanguage( rLocale.Variant) : - convertIsoNamesToLanguage( rLocale.Language, rLocale.Country)); - if (nRet == LANGUAGE_DONTKNOW) - nRet = LANGUAGE_SYSTEM; - - return nRet; -} - - -// static -::com::sun::star::lang::Locale MsLangId::Conversion::convertLanguageToLocaleWithFallback( - LanguageType nLang ) -{ - return lookupFallbackLocale( MsLangId::getRealLanguage( nLang)); -} - - -// static -::com::sun::star::lang::Locale MsLangId::getFallbackLocale( - const ::com::sun::star::lang::Locale & rLocale ) -{ - // empty language => LANGUAGE_SYSTEM - if (rLocale.Language.isEmpty()) - return Conversion::convertLanguageToLocaleWithFallback( LANGUAGE_SYSTEM); - - return Conversion::lookupFallbackLocale( rLocale); -} - -// static -bool MsLangId::isRightToLeft( LanguageType nLang ) -{ - switch( nLang & LANGUAGE_MASK_PRIMARY ) - { - case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_HEBREW & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_YIDDISH & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_URDU & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_FARSI & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_KASHMIRI & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_SINDHI & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_UIGHUR_CHINA & LANGUAGE_MASK_PRIMARY : - case LANGUAGE_USER_KYRGYZ_CHINA & LANGUAGE_MASK_PRIMARY : - return true; - - default: - break; - } - return false; -} - -// static -bool MsLangId::isSimplifiedChinese( LanguageType nLang ) -{ - return isChinese(nLang) && !isTraditionalChinese(nLang); -} - -// static -bool MsLangId::isSimplifiedChinese( const ::com::sun::star::lang::Locale & rLocale ) -{ - return rLocale.Language == "zh" && !isTraditionalChinese(rLocale); -} - -// static -bool MsLangId::isTraditionalChinese( LanguageType nLang ) -{ - bool bRet = false; - switch (nLang) - { - case LANGUAGE_CHINESE_TRADITIONAL: - case LANGUAGE_CHINESE_HONGKONG: - case LANGUAGE_CHINESE_MACAU: - bRet = true; - default: - break; - } - return bRet; -} - -// static -bool MsLangId::isTraditionalChinese( const ::com::sun::star::lang::Locale & rLocale ) -{ - return rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO"); -} - -//static -bool MsLangId::isChinese( LanguageType nLang ) -{ - return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_CHINESE; -} - -//static -bool MsLangId::isKorean( LanguageType nLang ) -{ - return MsLangId::getPrimaryLanguage(nLang) == LANGUAGE_KOREAN; -} - -// static -bool MsLangId::isCJK( LanguageType nLang ) -{ - switch (nLang & LANGUAGE_MASK_PRIMARY) - { - case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: - return true; - default: - break; - } - return false; -} - -// static -bool MsLangId::isFamilyNameFirst( LanguageType nLang ) -{ - return isCJK(nLang) || nLang == LANGUAGE_HUNGARIAN; -} - -// static -bool MsLangId::hasForbiddenCharacters( LanguageType nLang ) -{ - return isCJK(nLang); -} - - -// static -bool MsLangId::needsSequenceChecking( LanguageType nLang ) -{ - switch (nLang & LANGUAGE_MASK_PRIMARY) - { - case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_LAO & LANGUAGE_MASK_PRIMARY: - case LANGUAGE_THAI & LANGUAGE_MASK_PRIMARY: - return true; - default: - break; - } - return false; -} - - -// static -sal_Int16 MsLangId::getScriptType( LanguageType nLang ) -{ - sal_Int16 nScript; - switch( nLang ) - { - // CJK - // all LANGUAGE_CHINESE_... are caught below - case LANGUAGE_JAPANESE: - case LANGUAGE_KOREAN: - case LANGUAGE_KOREAN_JOHAB: - case LANGUAGE_USER_KOREAN_NORTH: - nScript = ::com::sun::star::i18n::ScriptType::ASIAN; - break; - - // CTL - // all LANGUAGE_ARABIC_... are caught below - case LANGUAGE_AMHARIC_ETHIOPIA: - case LANGUAGE_ASSAMESE: - case LANGUAGE_BENGALI: - case LANGUAGE_BENGALI_BANGLADESH: - case LANGUAGE_BURMESE: - case LANGUAGE_FARSI: - case LANGUAGE_HEBREW: - case LANGUAGE_YIDDISH: - case LANGUAGE_USER_YIDDISH_US: - case LANGUAGE_MARATHI: - case LANGUAGE_PUNJABI: - case LANGUAGE_GUJARATI: - case LANGUAGE_HINDI: - case LANGUAGE_KANNADA: - case LANGUAGE_KASHMIRI: - case LANGUAGE_KASHMIRI_INDIA: - case LANGUAGE_KHMER: - case LANGUAGE_LAO: - case LANGUAGE_MALAYALAM: - case LANGUAGE_MANIPURI: - case LANGUAGE_MONGOLIAN_MONGOLIAN: - case LANGUAGE_NEPALI: - case LANGUAGE_NEPALI_INDIA: - case LANGUAGE_ORIYA: - case LANGUAGE_SANSKRIT: - case LANGUAGE_SINDHI: - case LANGUAGE_SINDHI_PAKISTAN: - case LANGUAGE_SINHALESE_SRI_LANKA: - case LANGUAGE_SYRIAC: - case LANGUAGE_TAMIL: - case LANGUAGE_TELUGU: - case LANGUAGE_THAI: - case LANGUAGE_TIBETAN: - case LANGUAGE_DZONGKHA: - case LANGUAGE_URDU: - case LANGUAGE_URDU_PAKISTAN: - case LANGUAGE_URDU_INDIA: - case LANGUAGE_USER_KURDISH_IRAQ: - case LANGUAGE_USER_KURDISH_IRAN: - case LANGUAGE_DHIVEHI: - case LANGUAGE_USER_BODO_INDIA: - case LANGUAGE_USER_DOGRI_INDIA: - case LANGUAGE_USER_MAITHILI_INDIA: - case LANGUAGE_UIGHUR_CHINA: - case LANGUAGE_USER_LIMBU: - case LANGUAGE_USER_KYRGYZ_CHINA: - nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; - break; - -// currently not knowing scripttype - defaulted to LATIN: -/* -#define LANGUAGE_ARMENIAN 0x042B -#define LANGUAGE_INDONESIAN 0x0421 -#define LANGUAGE_KAZAKH 0x043F -#define LANGUAGE_KONKANI 0x0457 -#define LANGUAGE_MACEDONIAN 0x042F -#define LANGUAGE_TATAR 0x0444 -*/ - - default: - switch ( nLang & LANGUAGE_MASK_PRIMARY ) - { - // CJK catcher - case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: - nScript = ::com::sun::star::i18n::ScriptType::ASIAN; - break; - // CTL catcher - case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY: - nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; - break; - // Western (actually not necessarily Latin but also Cyrillic, for example) - default: - nScript = ::com::sun::star::i18n::ScriptType::LATIN; - } - break; - } - return nScript; -} - - -// static -LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bool bUserInterfaceSelection ) -{ - switch (nLang) - { - default: - break; // nothing - case LANGUAGE_OBSOLETE_USER_LATIN: - nLang = LANGUAGE_LATIN; - break; - case LANGUAGE_OBSOLETE_USER_MAORI: - nLang = LANGUAGE_MAORI_NEW_ZEALAND; - break; - case LANGUAGE_OBSOLETE_USER_KINYARWANDA: - nLang = LANGUAGE_KINYARWANDA_RWANDA; - break; - case LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN: - nLang = LANGUAGE_UPPER_SORBIAN_GERMANY; - break; - case LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN: - nLang = LANGUAGE_LOWER_SORBIAN_GERMANY; - break; - case LANGUAGE_OBSOLETE_USER_OCCITAN: - nLang = LANGUAGE_OCCITAN_FRANCE; - break; - case LANGUAGE_OBSOLETE_USER_BRETON: - nLang = LANGUAGE_BRETON_FRANCE; - break; - case LANGUAGE_OBSOLETE_USER_KALAALLISUT: - nLang = LANGUAGE_KALAALLISUT_GREENLAND; - break; - case LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH: - nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG; - break; - case LANGUAGE_OBSOLETE_USER_KABYLE: - nLang = LANGUAGE_TAMAZIGHT_LATIN; - break; - - // The following are not strictly obsolete but should be mapped to a - // replacement locale when encountered. - - // no_NO is an alias for nb_NO - case LANGUAGE_NORWEGIAN: - nLang = LANGUAGE_NORWEGIAN_BOKMAL; - break; - - // #i94435# A Spanish variant that differs only in collation details we - // do not support. - case LANGUAGE_SPANISH_DATED: - nLang = LANGUAGE_SPANISH_MODERN; - break; - - // Do not use ca-XV for document content. - /* TODO: remove in case we implement BCP47 language tags. */ - case LANGUAGE_USER_CATALAN_VALENCIAN: - if (!bUserInterfaceSelection) - nLang = LANGUAGE_CATALAN; - break; - } - return nLang; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/languagetag/languagetag.cxx b/i18npool/source/languagetag/languagetag.cxx deleted file mode 100644 index c93943d52362..000000000000 --- a/i18npool/source/languagetag/languagetag.cxx +++ /dev/null @@ -1,1254 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -#include "i18npool/languagetag.hxx" -#include "i18npool/mslangid.hxx" -#include <rtl/ustrbuf.hxx> -#include <rtl/bootstrap.hxx> -#include <osl/file.hxx> -#include <rtl/instance.hxx> -#include <rtl/locale.h> - -//#define erDEBUG - -#if defined(ENABLE_LIBLANGTAG) -#include <liblangtag/langtag.h> -#else -/* Replacement code for LGPL phobic and Android systems. - * For iOS we could probably use NSLocale instead, that should have more or - * less required functionality. If it is good enough, it could be used for Mac - * OS X, too. - */ -#include "simple-langtag.cxx" -#endif - -using rtl::OUString; -using rtl::OString; -using rtl::OUStringBuffer; -using namespace com::sun::star; - -// The actual pointer type of mpImplLangtag that is declared void* to not -// pollute the entire code base with liblangtag. -#define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p)) -#define MPLANGTAG LANGTAGCAST(mpImplLangtag) - -/** Convention to signal presence of BCP 47 language tag in a Locale's Variant - field. The Locale's Language field then will contain this ISO 639-2 - reserved for local use code. */ -#define ISO639_LANGUAGE_TAG "qlt" - - -// Helper to ensure lt_error_t is free'd -struct myLtError -{ - lt_error_t* p; - myLtError() : p(NULL) {} - ~myLtError() { if (p) lt_error_unref( p); } -}; - - -// "statics" to be returned as const reference to an empty locale and string. -namespace { -struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {}; -struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {}; -} - - -/** A reference holder for liblangtag data de/initialization, one static - instance. Currently implemented such that the first "ref" inits and dtor - (our library deinitialized) tears down. -*/ -class LiblantagDataRef -{ -public: - LiblantagDataRef(); - ~LiblantagDataRef(); - inline void incRef() - { - if (mnRef != SAL_MAX_UINT32 && !mnRef++) - setup(); - } - inline void decRef() - { - if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef) - teardown(); - } -private: - rtl::OString maDataPath; // path to liblangtag data, "|" if system - sal_uInt32 mnRef; - - void setupDataPath(); - void setup(); - void teardown(); -}; - -namespace { -struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {}; -} - -LiblantagDataRef::LiblantagDataRef() - : - mnRef(0) -{ -} - -LiblantagDataRef::~LiblantagDataRef() -{ - // When destructed we're tearing down unconditionally. - if (mnRef) - mnRef = 1; - decRef(); -} - -void LiblantagDataRef::setup() -{ - SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database"); - if (maDataPath.isEmpty()) - setupDataPath(); - lt_db_initialize(); - // Hold ref eternally. - mnRef = SAL_MAX_UINT32; -} - -void LiblantagDataRef::teardown() -{ - SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database"); - lt_db_finalize(); -} - -void LiblantagDataRef::setupDataPath() -{ - // maDataPath is assumed to be empty here. - OUString aURL("$BRAND_BASE_DIR/share/liblangtag"); - rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure - - // Check if data is in our own installation, else assume system - // installation. - OUString aData( aURL); - aData += "/language-subtag-registry.xml"; - osl::DirectoryItem aDirItem; - if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None) - { - OUString aPath; - if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None) - maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8); - } - if (maDataPath.isEmpty()) - maDataPath = "|"; // assume system - else - lt_db_set_datadir( maDataPath.getStr()); -} - -LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize ) - : - maBcp47( rBcp47LanguageTag), - mpImplLangtag( NULL), - mnLangID( LANGUAGE_DONTKNOW), - meIsValid( DECISION_DONTKNOW), - meIsIsoLocale( DECISION_DONTKNOW), - meIsIsoODF( DECISION_DONTKNOW), - meIsLiblangtagNeeded( DECISION_DONTKNOW), - mbSystemLocale( rBcp47LanguageTag.isEmpty()), - mbInitializedBcp47( !mbSystemLocale), - mbInitializedLocale( false), - mbInitializedLangID( false), - mbCachedLanguage( false), - mbCachedScript( false), - mbCachedCountry( false), - mbIsFallback( false) -{ - if (bCanonicalize) - canonicalize(); -} - - -LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale ) - : - maLocale( rLocale), - mpImplLangtag( NULL), - mnLangID( LANGUAGE_DONTKNOW), - meIsValid( DECISION_DONTKNOW), - meIsIsoLocale( DECISION_DONTKNOW), - meIsIsoODF( DECISION_DONTKNOW), - meIsLiblangtagNeeded( DECISION_DONTKNOW), - mbSystemLocale( rLocale.Language.isEmpty()), - mbInitializedBcp47( false), - mbInitializedLocale( !mbSystemLocale), - mbInitializedLangID( false), - mbCachedLanguage( false), - mbCachedScript( false), - mbCachedCountry( false), - mbIsFallback( false) -{ -} - - -LanguageTag::LanguageTag( LanguageType nLanguage ) - : - mpImplLangtag( NULL), - mnLangID( nLanguage), - meIsValid( DECISION_DONTKNOW), - meIsIsoLocale( DECISION_DONTKNOW), - meIsIsoODF( DECISION_DONTKNOW), - meIsLiblangtagNeeded( DECISION_DONTKNOW), - mbSystemLocale( nLanguage == LANGUAGE_SYSTEM), - mbInitializedBcp47( false), - mbInitializedLocale( false), - mbInitializedLangID( !mbSystemLocale), - mbCachedLanguage( false), - mbCachedScript( false), - mbCachedCountry( false), - mbIsFallback( false) -{ -} - - -LanguageTag::LanguageTag( const OUString& rLanguage, const OUString& rCountry ) - : - maLocale( rLanguage, rCountry, ""), - mpImplLangtag( NULL), - mnLangID( LANGUAGE_DONTKNOW), - meIsValid( DECISION_DONTKNOW), - meIsIsoLocale( DECISION_DONTKNOW), - meIsIsoODF( DECISION_DONTKNOW), - meIsLiblangtagNeeded( DECISION_DONTKNOW), - mbSystemLocale( rLanguage.isEmpty()), - mbInitializedBcp47( false), - mbInitializedLocale( !mbSystemLocale), - mbInitializedLangID( false), - mbCachedLanguage( false), - mbCachedScript( false), - mbCachedCountry( false), - mbIsFallback( false) -{ -} - - -LanguageTag::LanguageTag( const rtl_Locale & rLocale ) - : - maLocale( rLocale.Language, rLocale.Country, rLocale.Variant), - mpImplLangtag( NULL), - mnLangID( LANGUAGE_DONTKNOW), - meIsValid( DECISION_DONTKNOW), - meIsIsoLocale( DECISION_DONTKNOW), - meIsIsoODF( DECISION_DONTKNOW), - meIsLiblangtagNeeded( DECISION_DONTKNOW), - mbSystemLocale( maLocale.Language.isEmpty()), - mbInitializedBcp47( false), - mbInitializedLocale( !mbSystemLocale), - mbInitializedLangID( false), - mbCachedLanguage( false), - mbCachedScript( false), - mbCachedCountry( false), - mbIsFallback( false) -{ - convertFromRtlLocale(); -} - - -LanguageTag::LanguageTag( const LanguageTag & rLanguageTag ) - : - maLocale( rLanguageTag.maLocale), - maBcp47( rLanguageTag.maBcp47), - maCachedLanguage( rLanguageTag.maCachedLanguage), - maCachedScript( rLanguageTag.maCachedScript), - maCachedCountry( rLanguageTag.maCachedCountry), - mpImplLangtag( rLanguageTag.mpImplLangtag ? - lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL), - mnLangID( rLanguageTag.mnLangID), - meIsValid( rLanguageTag.meIsValid), - meIsIsoLocale( rLanguageTag.meIsIsoLocale), - meIsIsoODF( rLanguageTag.meIsIsoODF), - meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded), - mbSystemLocale( rLanguageTag.mbSystemLocale), - mbInitializedBcp47( rLanguageTag.mbInitializedBcp47), - mbInitializedLocale( rLanguageTag.mbInitializedLocale), - mbInitializedLangID( rLanguageTag.mbInitializedLangID), - mbCachedLanguage( rLanguageTag.mbCachedLanguage), - mbCachedScript( rLanguageTag.mbCachedScript), - mbCachedCountry( rLanguageTag.mbCachedCountry), - mbIsFallback( rLanguageTag.mbIsFallback) -{ - if (mpImplLangtag) - theDataRef::get().incRef(); -} - - -LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag ) -{ - maLocale = rLanguageTag.maLocale; - maBcp47 = rLanguageTag.maBcp47; - maCachedLanguage = rLanguageTag.maCachedLanguage; - maCachedScript = rLanguageTag.maCachedScript; - maCachedCountry = rLanguageTag.maCachedCountry; - mpImplLangtag = rLanguageTag.mpImplLangtag; - mpImplLangtag = rLanguageTag.mpImplLangtag ? - lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL; - mnLangID = rLanguageTag.mnLangID; - meIsValid = rLanguageTag.meIsValid; - meIsIsoLocale = rLanguageTag.meIsIsoLocale; - meIsIsoODF = rLanguageTag.meIsIsoODF; - meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded; - mbSystemLocale = rLanguageTag.mbSystemLocale; - mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47; - mbInitializedLocale = rLanguageTag.mbInitializedLocale; - mbInitializedLangID = rLanguageTag.mbInitializedLangID; - mbCachedLanguage = rLanguageTag.mbCachedLanguage; - mbCachedScript = rLanguageTag.mbCachedScript; - mbCachedCountry = rLanguageTag.mbCachedCountry; - mbIsFallback = rLanguageTag.mbIsFallback; - if (mpImplLangtag) - theDataRef::get().incRef(); - return *this; -} - - -LanguageTag::~LanguageTag() -{ - if (mpImplLangtag) - { - lt_tag_unref( MPLANGTAG); - theDataRef::get().decRef(); - } -} - - -void LanguageTag::resetVars() -{ - if (mpImplLangtag) - { - lt_tag_unref( MPLANGTAG); - mpImplLangtag = NULL; - theDataRef::get().decRef(); - } - - maLocale = lang::Locale(); - if (!maBcp47.isEmpty()) - maBcp47 = OUString(); - if (!maCachedLanguage.isEmpty()) - maCachedLanguage= OUString(); - if (!maCachedScript.isEmpty()) - maCachedScript = OUString(); - if (!maCachedCountry.isEmpty()) - maCachedCountry = OUString(); - mnLangID = LANGUAGE_DONTKNOW; - meIsValid = DECISION_DONTKNOW; - meIsIsoLocale = DECISION_DONTKNOW; - meIsIsoODF = DECISION_DONTKNOW; - meIsLiblangtagNeeded= DECISION_DONTKNOW; - mbSystemLocale = true; - mbInitializedBcp47 = false; - mbInitializedLocale = false; - mbInitializedLangID = false; - mbCachedLanguage = false; - mbCachedScript = false; - mbCachedCountry = false; - mbIsFallback = false; -} - - -void LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize ) -{ - resetVars(); - maBcp47 = rBcp47LanguageTag; - mbSystemLocale = rBcp47LanguageTag.isEmpty(); - mbInitializedBcp47 = !mbSystemLocale; - - if (bCanonicalize) - canonicalize(); -} - - -void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale ) -{ - resetVars(); - maLocale = rLocale; - mbSystemLocale = rLocale.Language.isEmpty(); - mbInitializedLocale = !mbSystemLocale; -} - - -void LanguageTag::reset( LanguageType nLanguage ) -{ - resetVars(); - mnLangID = nLanguage; - mbSystemLocale = nLanguage == LANGUAGE_SYSTEM; - mbInitializedLangID = !mbSystemLocale; -} - - -void LanguageTag::reset( const rtl_Locale & rLocale ) -{ - reset( lang::Locale( rLocale.Language, rLocale.Country, rLocale.Variant)); - convertFromRtlLocale(); -} - - -bool LanguageTag::canonicalize() -{ -#ifdef erDEBUG - // dump once - struct dumper - { - void** mpp; - dumper( void** pp ) : mpp( *pp ? NULL : pp) {} - ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); } - }; - dumper aDumper( &mpImplLangtag); -#endif - - // Side effect: have maBcp47 in any case, resolved system. - // Some methods calling canonicalize() (or not calling it due to - // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set - // meIsLiblangtagNeeded anywhere else than hereafter. - getBcp47( true ); - - // The simple cases and known locales don't need liblangtag processing, - // which also avoids loading liblangtag data on startup. - if (meIsLiblangtagNeeded == DECISION_DONTKNOW) - { - bool bTemporaryLocale = false; - bool bTemporaryLangID = false; - if (!mbInitializedLocale && !mbInitializedLangID) - { - if (mbSystemLocale) - { - mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); - mbInitializedLangID = true; - } - else - { - // Now this is getting funny.. we only have some BCP47 string - // and want to determine if parsing it would be possible - // without using liblangtag just to see if it is a simple known - // locale. - OUString aLanguage, aScript, aCountry; - Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry); - if (eExt != EXTRACTED_NONE) - { - if (eExt == EXTRACTED_LSC && aScript.isEmpty()) - { - maLocale.Language = aLanguage; - maLocale.Country = aCountry; - } - else - { - maLocale.Language = ISO639_LANGUAGE_TAG; - maLocale.Country = aCountry; - maLocale.Variant = maBcp47; - } - bTemporaryLocale = mbInitializedLocale = true; - } - } - } - if (mbInitializedLangID && !mbInitializedLocale) - { - // Do not call getLocale() here because that prefers - // convertBcp47ToLocale() which would end up in recursion via - // isIsoLocale()! - - // Prepare to verify that we have a known locale, not just an - // arbitrary MS-LangID. - convertLangToLocale(); - } - if (mbInitializedLocale) - { - if (maLocale.Variant.isEmpty()) - meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC] - else - { - if (!mbInitializedLangID) - { - convertLocaleToLang(); - if (bTemporaryLocale) - bTemporaryLangID = true; - } - if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM) - meIsLiblangtagNeeded = DECISION_NO; // known locale - } - } - if (bTemporaryLocale) - { - mbInitializedLocale = false; - maLocale = lang::Locale(); - } - if (bTemporaryLangID) - { - mbInitializedLangID = false; - mnLangID = LANGUAGE_DONTKNOW; - } - } - if (meIsLiblangtagNeeded == DECISION_NO) - { - meIsValid = DECISION_YES; // really, known must be valid ... - return true; // that's it - } - meIsLiblangtagNeeded = DECISION_YES; - SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47); - - if (!mpImplLangtag) - { - theDataRef::get().incRef(); - mpImplLangtag = lt_tag_new(); - } - - myLtError aError; - - if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p)) - { - char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p); - SAL_WARN_IF( !pTag, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47); - if (pTag) - { - OUString aOld( maBcp47); - maBcp47 = OUString::createFromAscii( pTag); - // Make the lt_tag_t follow the new string if different, which - // removes default script and such. - if (maBcp47 != aOld) - { - if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p)) - { - SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse " << maBcp47); - free( pTag); - meIsValid = DECISION_NO; - return false; - } - } - free( pTag); - meIsValid = DECISION_YES; - return true; - } - } - else - { - SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: could not parse " << maBcp47); - } - meIsValid = DECISION_NO; - return false; -} - - -void LanguageTag::convertLocaleToBcp47() -{ - if (mbSystemLocale && !mbInitializedLocale) - convertLangToLocale(); - - if (maLocale.Language == ISO639_LANGUAGE_TAG) - { - maBcp47 = maLocale.Variant; - meIsIsoLocale = DECISION_NO; - } - else - { - /* XXX NOTE: most legacy code never evaluated the Variant field, so for - * now just concatenate language and country. In case we stumbled over - * variant aware code we'd have to take care of that. */ - if (maLocale.Country.isEmpty()) - maBcp47 = maLocale.Language; - else - { - OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength()); - aBuf.append( maLocale.Language).append( '-').append( maLocale.Country); - maBcp47 = aBuf.makeStringAndClear(); - } - } - mbInitializedBcp47 = true; -} - - -void LanguageTag::convertLocaleToLang() -{ - if (mbSystemLocale) - { - mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); - } - else - { - /* FIXME: this is temporary until code base is converted to not use - * MsLangId::convert...() anymore. After that, proper new method has to - * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */ - mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale); - } - mbInitializedLangID = true; -} - - -void LanguageTag::convertBcp47ToLocale() -{ - bool bIso = isIsoLocale(); - if (bIso) - { - maLocale.Language = getLanguageFromLangtag(); - maLocale.Country = getRegionFromLangtag(); - maLocale.Variant = OUString(); - } - else - { - maLocale.Language = ISO639_LANGUAGE_TAG; - maLocale.Country = getCountry(); - maLocale.Variant = maBcp47; - } - mbInitializedLocale = true; -} - - -void LanguageTag::convertBcp47ToLang() -{ - if (mbSystemLocale) - { - mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); - } - else - { - /* FIXME: this is temporary. If we support locales that consist not - * only of language and country, e.g. added script, this probably needs - * to be adapted. */ - if (!mbInitializedLocale) - convertBcp47ToLocale(); - convertLocaleToLang(); - } - mbInitializedLangID = true; -} - - -void LanguageTag::convertLangToLocale() -{ - if (mbSystemLocale && !mbInitializedLangID) - { - mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM); - mbInitializedLangID = true; - } - /* FIXME: this is temporary until code base is converted to not use - * MsLangId::convert...() anymore. After that, proper new method has to be - * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */ - // Resolve system here! - maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true); - mbInitializedLocale = true; -} - - -void LanguageTag::convertLangToBcp47() -{ - /* FIXME: this is temporary. If we support locales that consist not only of - * language and country, e.g. added script, this probably needs to be - * adapted. */ - if (!mbInitializedLocale) - convertLangToLocale(); - convertLocaleToBcp47(); - mbInitializedBcp47 = true; -} - - -void LanguageTag::convertFromRtlLocale() -{ - // The rtl_Locale follows the Open Group Base Specification, - // 8.2 Internationalization Variables - // language[_territory][.codeset][@modifier] - // On GNU/Linux systems usually being glibc locales. - // sal/osl/unx/nlsupport.c _parse_locale() parses them into - // Language: language 2 or 3 alpha code - // Country: [territory] 2 alpha code - // Variant: [.codeset][@modifier] - // Variant effectively contains anything that follows the territory, not - // looking for '.' dot delimiter or '@' modifier content. - if (!maLocale.Variant.isEmpty()) - { - OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant, - RTL_TEXTENCODING_UTF8); - /* FIXME: let liblangtag parse this entirely with - * lt_tag_convert_from_locale() but that needs a patch to pass the - * string. */ -#if 0 - myLtError aError; - theDataRef::get().incRef(); - mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p); - maBcp47 = OStringToOUString( lt_tag_get_string( MPLANGTAG), RTL_TEXTENCODING_UTF8); - mbInitializedBcp47 = true; -#else - mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr); - if (mnLangID == LANGUAGE_DONTKNOW) - { - SAL_WARN( "i18npool.langtag", "LanguageTag(rtl_Locale) - unknown: " << aStr); - mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here - } - mbInitializedLangID = true; -#endif - maLocale = lang::Locale(); - mbInitializedLocale = false; - } -} - - -const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const -{ - if (!bResolveSystem && mbSystemLocale) - return theEmptyBcp47::get(); - if (!mbInitializedBcp47) - { - if (mbInitializedLocale) - const_cast<LanguageTag*>(this)->convertLocaleToBcp47(); - else - const_cast<LanguageTag*>(this)->convertLangToBcp47(); - } - return maBcp47; -} - - -OUString LanguageTag::getLanguageFromLangtag() -{ - OUString aLanguage; - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - canonicalize(); - if (maBcp47.isEmpty()) - return aLanguage; - if (mpImplLangtag) - { - const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG); - SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL"); - if (!pLangT) - return aLanguage; - const char* pLang = lt_lang_get_tag( pLangT); - SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL"); - if (pLang) - aLanguage = OUString::createFromAscii( pLang); - } - else - { - if (mbCachedLanguage || cacheSimpleLSC()) - aLanguage = maCachedLanguage; - } - return aLanguage; -} - - -OUString LanguageTag::getScriptFromLangtag() -{ - OUString aScript; - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - canonicalize(); - if (maBcp47.isEmpty()) - return aScript; - if (mpImplLangtag) - { - const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG); - // pScriptT==NULL is valid for default scripts - if (!pScriptT) - return aScript; - const char* pScript = lt_script_get_tag( pScriptT); - SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL"); - if (pScript) - aScript = OUString::createFromAscii( pScript); - } - else - { - if (mbCachedScript || cacheSimpleLSC()) - aScript = maCachedScript; - } - return aScript; -} - - -OUString LanguageTag::getRegionFromLangtag() -{ - OUString aRegion; - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - canonicalize(); - if (maBcp47.isEmpty()) - return aRegion; - if (mpImplLangtag) - { - const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG); - // pRegionT==NULL is valid for language only tags, rough check here - // that does not take sophisticated tags into account that actually - // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so - // that ll-CC and lll-CC actually fail. - SAL_WARN_IF( !pRegionT && - maBcp47.getLength() != 2 && maBcp47.getLength() != 3 && - maBcp47.getLength() != 7 && maBcp47.getLength() != 8, - "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL"); - if (!pRegionT) - return aRegion; - const char* pRegion = lt_region_get_tag( pRegionT); - SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL"); - if (pRegion) - aRegion = OUString::createFromAscii( pRegion); - } - else - { - if (mbCachedCountry || cacheSimpleLSC()) - aRegion = maCachedCountry; - } - return aRegion; -} - - -const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const -{ - if (!bResolveSystem && mbSystemLocale) - return theEmptyLocale::get(); - if (!mbInitializedLocale) - { - if (mbInitializedBcp47) - const_cast<LanguageTag*>(this)->convertBcp47ToLocale(); - else - const_cast<LanguageTag*>(this)->convertLangToLocale(); - } - return maLocale; -} - - -LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const -{ - if (!bResolveSystem && mbSystemLocale) - return LANGUAGE_SYSTEM; - if (!mbInitializedLangID) - { - if (mbInitializedBcp47) - const_cast<LanguageTag*>(this)->convertBcp47ToLang(); - else - const_cast<LanguageTag*>(this)->convertLocaleToLang(); - } - return mnLangID; -} - - -void LanguageTag::getIsoLanguageCountry( OUString& rLanguage, OUString& rCountry ) const -{ - if (!isIsoLocale()) - { - rLanguage = OUString(); - rCountry = OUString(); - return; - } - // After isIsoLocale() it's safe to call getLanguage() for ISO code. - rLanguage = getLanguage(); - rCountry = getCountry(); -} - - -namespace -{ - -bool isLowerAscii( sal_Unicode c ) -{ - return 'a' <= c && c <= 'z'; -} - -bool isUpperAscii( sal_Unicode c ) -{ - return 'A' <= c && c <= 'Z'; -} - -} - - -// static -bool LanguageTag::isIsoLanguage( const OUString& rLanguage ) -{ - /* TODO: ignore case? For now let's see where rubbish is used. */ - bool b2chars; - if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) && - isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) && - (b2chars || isLowerAscii( rLanguage[2]))) - return true; - SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) && - (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) || - (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag", - "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage); - return false; -} - - -// static -bool LanguageTag::isIsoCountry( const OUString& rRegion ) -{ - /* TODO: ignore case? For now let's see where rubbish is used. */ - if (rRegion.isEmpty() || - (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1]))) - return true; - SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])), - "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion); - return false; -} - - -// static -bool LanguageTag::isIsoScript( const OUString& rScript ) -{ - /* TODO: ignore case? For now let's see where rubbish is used. */ - if (rScript.isEmpty() || - (rScript.getLength() == 4 && - isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) && - isLowerAscii( rScript[2]) && isLowerAscii( rScript[3]))) - return true; - SAL_WARN_IF( rScript.getLength() == 4 && - (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) || - isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])), - "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript); - return false; -} - - -OUString LanguageTag::getLanguage() const -{ - if (!mbCachedLanguage) - { - maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag(); - mbCachedLanguage = true; - } - return maCachedLanguage; -} - - -OUString LanguageTag::getScript() const -{ - if (!mbCachedScript) - { - maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag(); - mbCachedScript = true; - } - return maCachedScript; -} - - -OUString LanguageTag::getLanguageAndScript() const -{ - OUString aLanguageScript( getLanguage()); - OUString aScript( getScript()); - if (!aScript.isEmpty()) - { - OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength()); - aBuf.append( aLanguageScript).append( '-').append( aScript); - aLanguageScript = aBuf.makeStringAndClear(); - } - return aLanguageScript; -} - - -OUString LanguageTag::getCountry() const -{ - if (!mbCachedCountry) - { - maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag(); - if (!isIsoCountry( maCachedCountry)) - maCachedCountry = OUString(); - mbCachedCountry = true; - } - return maCachedCountry; -} - - -OUString LanguageTag::getRegion() const -{ - return const_cast<LanguageTag*>(this)->getRegionFromLangtag(); -} - - -OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const -{ - OUString aRet; - if (isIsoLocale()) - { - OUString aCountry( getCountry()); - if (aCountry.isEmpty()) - aRet = getLanguage() + rEncoding; - else - aRet = getLanguage() + "_" + aCountry + rEncoding; - } - else - { - /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from - * i18npool/source/isolang/isolang.cxx or let liblangtag handle it. So - * far no code was prepared for anything else than a simple - * language_country locale so we don't loose anything here right now. - * */ - } - return aRet; -} - - -bool LanguageTag::hasScript() const -{ - if (!mbCachedScript) - getScript(); - return !maCachedScript.isEmpty(); -} - - -bool LanguageTag::cacheSimpleLSC() -{ - OUString aLanguage, aScript, aCountry; - bool bRet = (simpleExtract( maBcp47, aLanguage, aScript, aCountry) == EXTRACTED_LSC); - if (bRet) - { - maCachedLanguage = aLanguage; - maCachedScript = aScript; - maCachedCountry = aCountry; - mbCachedLanguage = mbCachedScript = mbCachedCountry = true; - } - return bRet; -} - - -bool LanguageTag::isIsoLocale() const -{ - if (meIsIsoLocale == DECISION_DONTKNOW) - { - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - const_cast<LanguageTag*>(this)->canonicalize(); - // It must be at most ll-CC or lll-CC - // Do not use getCountry() here, use getRegion() instead. - meIsIsoLocale = ((maBcp47.isEmpty() || - (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ? - DECISION_YES : DECISION_NO); - } - return meIsIsoLocale == DECISION_YES; -} - - -bool LanguageTag::isIsoODF() const -{ - if (meIsIsoODF == DECISION_DONTKNOW) - { - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - const_cast<LanguageTag*>(this)->canonicalize(); - if (!isIsoScript( getScript())) - return ((meIsIsoODF = DECISION_NO) == DECISION_YES); - // The usual case is lll-CC so simply check that first. - if (isIsoLocale()) - return ((meIsIsoODF = DECISION_YES) == DECISION_YES); - // If this is not ISO locale for which script must not exist it can - // still be ISO locale plus ISO script lll-Ssss-CC - meIsIsoODF = ((maBcp47.getLength() <= 11 && - isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ? - DECISION_YES : DECISION_NO); - } - return meIsIsoODF == DECISION_YES; -} - - -bool LanguageTag::isValidBcp47() const -{ - if (meIsValid == DECISION_DONTKNOW) - { - if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag) - const_cast<LanguageTag*>(this)->canonicalize(); - SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag", - "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid"); - } - return meIsValid == DECISION_YES; -} - - -bool LanguageTag::isSystemLocale() const -{ - return mbSystemLocale; -} - - -LanguageTag & LanguageTag::makeFallback() -{ - if (!mbIsFallback) - { - if (mbInitializedLangID) - { - LanguageType nLang1 = getLanguageType(); - LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1); - if (nLang1 != nLang2) - reset( nLang2); - } - else - { - const lang::Locale& rLocale1 = getLocale(); - lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1)); - if ( rLocale1.Language != aLocale2.Language || - rLocale1.Country != aLocale2.Country || - rLocale1.Variant != aLocale2.Variant) - reset( aLocale2); - } - mbIsFallback = true; - } - return *this; -} - - -::std::vector< OUString > LanguageTag::getFallbackStrings() const -{ - ::std::vector< OUString > aVec; - OUString aLanguage( getLanguage()); - OUString aCountry( getCountry()); - if (isIsoLocale()) - { - if (!aCountry.isEmpty()) - aVec.push_back( aLanguage + "-" + aCountry); - aVec.push_back( aLanguage); - return aVec; - } - aVec.push_back( getBcp47()); - OUString aTmp; - if (hasScript()) - { - OUString aScript( getScript()); - if (!aCountry.isEmpty()) - { - aTmp = aLanguage + "-" + aScript + "-" + aCountry; - if (aTmp != aVec[0]) - aVec.push_back( aTmp); - } - aTmp = aLanguage + "-" + aScript; - if (aTmp != aVec[0]) - aVec.push_back( aTmp); - } - if (!aCountry.isEmpty()) - { - aTmp = aLanguage + "-" + aCountry; - if (aTmp != aVec[0]) - aVec.push_back( aTmp); - } - aTmp = aLanguage; - if (aTmp != aVec[0]) - aVec.push_back( aTmp); - return aVec; -} - - -bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const -{ - // If SYSTEM is not to be resolved or either both are SYSTEM or none, we - // can use the operator==() optimization. - if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale()) - return operator==( rLanguageTag); - - // Compare full language tag strings. - return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem); -} - - -bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const -{ - if (isSystemLocale() && rLanguageTag.isSystemLocale()) - return true; // both SYSTEM - - // No need to convert to BCP47 if both Lang-IDs are available. - if (mbInitializedLangID && rLanguageTag.mbInitializedLangID) - { - // Equal if same ID and no SYSTEM is involved or both are SYSTEM. - return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale(); - } - - // Compare full language tag strings but SYSTEM unresolved. - return getBcp47( false) == rLanguageTag.getBcp47( false); -} - - -bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const -{ - return !operator==( rLanguageTag); -} - - -// static -LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47, - OUString& rLanguage, OUString& rScript, OUString& rCountry ) -{ - Extraction eRet = EXTRACTED_NONE; - const sal_Int32 nLen = rBcp47.getLength(); - const sal_Int32 nHyph1 = rBcp47.indexOf( '-'); - if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker - { - // It's f*d up but we need to recognize this. - eRet = EXTRACTED_X_JOKER; - } - else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse - { - // x-... privateuse tags MUST be known to us by definition. - eRet = EXTRACTED_X; - } - else if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll - { - rLanguage = rBcp47; - rScript = rCountry = OUString(); - eRet = EXTRACTED_LSC; - } - else if ( (nLen == 5 && nHyph1 == 2) // ll-CC - || (nLen == 6 && nHyph1 == 3)) // lll-CC - { - rLanguage = rBcp47.copy( 0, nHyph1); - rCountry = rBcp47.copy( nHyph1 + 1, 2); - rScript = OUString(); - eRet = EXTRACTED_LSC; - } - else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check - || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check - { - const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1); - if (nHyph2 == nHyph1 + 5) - { - rLanguage = rBcp47.copy( 0, nHyph1); - rScript = rBcp47.copy( nHyph1 + 1, 4); - rCountry = rBcp47.copy( nHyph2 + 1, 2); - eRet = EXTRACTED_LSC; - } - } - if (eRet == EXTRACTED_NONE) - rLanguage = rScript = rCountry = OUString(); - return eRet; -} - - -// static -::std::vector< OUString >::const_iterator LanguageTag::getFallback( - const ::std::vector< OUString > & rList, const OUString & rReference ) -{ - if (rList.empty()) - return rList.end(); - - ::std::vector< OUString >::const_iterator it; - - // Try the simple case first without constructing fallbacks. - for (it = rList.begin(); it != rList.end(); ++it) - { - if (*it == rReference) - return it; // exact match - } - - ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings()); - aFallbacks.erase( aFallbacks.begin()); // first is full BCP47, we already checked that - if (rReference != "en-US") - { - aFallbacks.push_back( "en-US"); - if (rReference != "en") - aFallbacks.push_back( "en"); - } - if (rReference != "x-default") - aFallbacks.push_back( "x-default"); - if (rReference != "x-no-translate") - aFallbacks.push_back( "x-no-translate"); - /* TODO: the original comphelper::Locale::getFallback() code had - * "x-notranslate" instead of "x-no-translate", but all .xcu files use - * "x-no-translate" and "x-notranslate" apparently was never used anywhere. - * Did that ever work? Was it supposed to work at all like this? */ - - for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb) - { - for (it = rList.begin(); it != rList.end(); ++it) - { - if (*it == *fb) - return it; // fallback found - } - } - - // Did not find anything so return something of the list, the first value - // will do as well as any other as none did match any of the possible - // fallbacks. - return rList.begin(); -} - - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/languagetag/simple-langtag.cxx b/i18npool/source/languagetag/simple-langtag.cxx deleted file mode 100644 index d96f721dbeef..000000000000 --- a/i18npool/source/languagetag/simple-langtag.cxx +++ /dev/null @@ -1,400 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -/** Cheap and cheesy replacement code for liblangtag on systems that do not - allow / want LGPL code or dependencies on glib. - - XXX NOTE: This code does not check language tags for validity or if they - are registered with IANA, does not canonicalize or strip default script - tags if included nor does it do any other fancy stuff that liblangtag is - capable of. It just makes depending code work without. - */ - -#include <cstdlib> -#include <cstring> -#include <cstdio> - -namespace { - -typedef int lt_bool_t; - -struct lt_error_t { - void *something; - lt_error_t() : something(NULL) {} -}; - -static void* g_malloc(size_t s) -{ - return malloc(s); -} - -static void g_free(void* p) -{ - if (p) - free(p); -} - -static void lt_error_unref(lt_error_t *error) -{ - if (error) - { - g_free( error->something); - g_free( error); - } -} - -struct my_ref -{ - sal_uInt32 mnRef; - explicit my_ref() : mnRef(1) {} - virtual ~my_ref() {} - void incRef() { ++mnRef; } - void decRef() { if (--mnRef == 0) delete this; } -}; - -struct my_t_impl : public my_ref -{ - char* mpStr; - explicit my_t_impl() : my_ref(), mpStr(NULL) {} - virtual ~my_t_impl() { g_free( mpStr); } - explicit my_t_impl( const my_t_impl& r ) - : - my_ref(), - mpStr(r.mpStr ? strdup( r.mpStr) : NULL) - { - } - my_t_impl& operator=( const my_t_impl& r ) - { - if (this == &r) - return *this; - g_free( mpStr); - mpStr = (r.mpStr ? strdup( r.mpStr) : NULL); - return *this; - } - void assign( const char* str ) - { - g_free( mpStr); - mpStr = (str ? strdup( str) : NULL); - } - void assign( const char* str, const char* stop ) - { - g_free( mpStr); - if (str && str < stop) - { - mpStr = static_cast<char*>(g_malloc( stop - str + 1)); - memcpy( mpStr, str, stop - str); - mpStr[stop - str] = 0; - } - else - mpStr = NULL; - } - void append( const char* str, const char* stop ) - { - if (str && str < stop) - { - size_t nOld = mpStr ? strlen( mpStr) : 0; - size_t nNew = nOld + (stop - str) + 1; - char* p = static_cast<char*>(g_malloc( nNew)); - if (nOld) - memcpy( p, mpStr, nOld); - memcpy( p + nOld, str, stop - str); - p[nNew-1] = 0; - g_free( mpStr); - mpStr = p; - } - } - void zero() - { - g_free( mpStr); - mpStr = NULL; - } -}; - -struct lt_lang_t : public my_t_impl -{ - explicit lt_lang_t() : my_t_impl() {} - virtual ~lt_lang_t() {} -}; - -struct lt_script_t : public my_t_impl -{ - explicit lt_script_t() : my_t_impl() {} - virtual ~lt_script_t() {} -}; - -struct lt_region_t : public my_t_impl -{ - explicit lt_region_t() : my_t_impl() {} - virtual ~lt_region_t() {} -}; - -struct lt_tag_t : public my_t_impl -{ - lt_lang_t maLanguage; - lt_script_t maScript; - lt_region_t maRegion; - explicit lt_tag_t() : my_t_impl(), maLanguage(), maScript(), maRegion() {} - virtual ~lt_tag_t() {} - explicit lt_tag_t( const lt_tag_t& r ) - : - my_t_impl( r), - maLanguage( r.maLanguage), - maScript( r.maScript), - maRegion( r.maRegion) - { - } - lt_tag_t& operator=( const lt_tag_t& r ) - { - if (this == &r) - return *this; - my_t_impl::operator=( r); - maLanguage = r.maLanguage; - maScript = r.maScript; - maRegion = r.maRegion; - return *this; - } - void assign( const char* str ) - { - maLanguage.zero(); - maScript.zero(); - maRegion.zero(); - my_t_impl::assign( str); - } -}; - -static void lt_db_initialize() { } -static void lt_db_finalize() { } -static void lt_db_set_datadir( const char* /* dir */ ) { } - -static lt_tag_t* lt_tag_new(void) -{ - return new lt_tag_t; -} - -static lt_tag_t* lt_tag_copy(lt_tag_t *tag) -{ - return (tag ? new lt_tag_t( *tag) : NULL); -} - -static void lt_tag_unref(lt_tag_t *tag) -{ - if (tag) - tag->decRef(); -} - -/** See http://tools.ietf.org/html/rfc5646 - - We are simply ignorant of grandfathered (irregular and regular) subtags and - may either bail out or accept them, sorry (or not). However, we do accept - any i-* irregular and x-* privateuse. Subtags are not checked for validity - (alpha, digit, registered, ...). - */ -static lt_bool_t lt_tag_parse(lt_tag_t *tag, - const char *tag_string, - lt_error_t **error) -{ - (void) error; - if (!tag) - return 0; - tag->assign( tag_string); - if (!tag_string) - return 0; - // In case we supported other subtags this would get more complicated. - my_t_impl* aSubtags[] = { &tag->maLanguage, &tag->maScript, &tag->maRegion, NULL }; - my_t_impl** ppSub = &aSubtags[0]; - const char* pStart = tag_string; - const char* p = pStart; - const char* pEnd = pStart + strlen( pStart); // scanning includes \0 - bool bStartLang = true; - bool bPrivate = false; - for ( ; p <= pEnd && ppSub && *ppSub; ++p) - { - if (p == pEnd || *p == '-') - { - size_t nLen = p - pStart; - if (*ppSub == &tag->maLanguage) - { - if (bStartLang) - { - bStartLang = false; - switch (nLen) - { - case 1: // irregular or privateuse - if (*pStart == 'i' || *pStart == 'x') - { - (*ppSub)->assign( pStart, p); - bPrivate = true; - } - else - return 0; // bad - break; - case 2: // ISO 639 alpha-2 - case 3: // ISO 639 alpha-3 - (*ppSub)->assign( pStart, p); - break; - case 4: // reserved for future use - return 0; // bad - break; - case 5: - case 6: - case 7: - case 8: // registered language subtag - (*ppSub++)->assign( pStart, p); - break; - default: - return 0; // bad - } - } - else - { - if (nLen > 8) - return 0; // bad - if (bPrivate) - { - // Any combination of "x" 1*("-" (2*8alphanum)) - // allowed, store first as language and return ok. - // For i-* simply assume the same. - (*ppSub)->append( pStart-1, p); - return !0; // ok - } - else if (nLen == 3) - { - // extlang subtag, 1 to 3 allowed we don't check that. - // But if it's numeric it's a region UN M.49 code - // instead and no script subtag is present, so advance. - if ('0' <= *pStart && *pStart <= '9') - { - ppSub += 2; // &tag->maRegion XXX watch this when inserting fields - --p; - continue; // for - } - else - (*ppSub)->append( pStart-1, p); - } - else - { - // Not part of language subtag, advance. - ++ppSub; - --p; - continue; // for - } - } - } - else if (*ppSub == &tag->maScript) - { - switch (nLen) - { - case 4: - // script subtag, or a (DIGIT 3alphanum) variant with - // no script and no region in which case we stop - // parsing. - if ('0' <= *pStart && *pStart <= '9') - ppSub = NULL; - else - (*ppSub++)->assign( pStart, p); - break; - case 3: - // This may be a region UN M.49 code if 3DIGIT and no - // script code present. Just check first character and - // advance. - if ('0' <= *pStart && *pStart <= '9') - { - ++ppSub; - --p; - continue; // for - } - else - return 0; // bad - break; - case 2: - // script omitted, region subtag, advance. - ++ppSub; - --p; - continue; // for - break; - case 1: - // script omitted, region omitted, extension subtag - // with singleton, stop parsing - ppSub = NULL; - break; - case 5: - case 6: - case 7: - case 8: - // script omitted, region omitted, variant subtag, stop - // parsing. - ppSub = NULL; - default: - return 0; // bad - } - } - else if (*ppSub == &tag->maRegion) - { - if (nLen == 2 || nLen == 3) - (*ppSub++)->assign( pStart, p); - else - return 0; // bad - } - pStart = p+1; - } - } - return !0; -} - -static char* lt_tag_canonicalize(lt_tag_t *tag, - lt_error_t **error) -{ - (void) error; - return tag && tag->mpStr ? strdup( tag->mpStr) : NULL; -} - -static const lt_lang_t* lt_tag_get_language(const lt_tag_t *tag) -{ - return tag && tag->maLanguage.mpStr ? &tag->maLanguage : NULL; -} - -static const lt_script_t *lt_tag_get_script(const lt_tag_t *tag) -{ - return tag && tag->maScript.mpStr ? &tag->maScript : NULL; -} - -static const lt_region_t *lt_tag_get_region(const lt_tag_t *tag) -{ - return tag && tag->maRegion.mpStr ? &tag->maRegion : NULL; -} - -static const char *lt_lang_get_tag(const lt_lang_t *lang) -{ - return lang ? lang->mpStr : NULL; -} - -static const char *lt_script_get_tag(const lt_script_t *script) -{ - return script ? script->mpStr : NULL; -} - -static const char *lt_region_get_tag(const lt_region_t *region) -{ - return region ? region->mpStr : NULL; -} - -#ifdef erDEBUG -static void lt_tag_dump(const lt_tag_t *tag) -{ - fprintf( stderr, "\n"); - fprintf( stderr, "SimpleLangtag langtag: %s\n", tag->mpStr); - fprintf( stderr, "SimpleLangtag language: %s\n", tag->maLanguage.mpStr); - fprintf( stderr, "SimpleLangtag script: %s\n", tag->maScript.mpStr); - fprintf( stderr, "SimpleLangtag region: %s\n", tag->maRegion.mpStr); -} -#endif - -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/localedata/localedata.cxx b/i18npool/source/localedata/localedata.cxx index ae2033570b33..66b51747bd1f 100644 --- a/i18npool/source/localedata/localedata.cxx +++ b/i18npool/source/localedata/localedata.cxx @@ -19,7 +19,7 @@ #include <localedata.hxx> -#include <i18npool/mslangid.hxx> +#include <i18nlangtag/mslangid.hxx> #include <rtl/ustrbuf.hxx> #include <string.h> #include <stdio.h> diff --git a/i18npool/source/nativenumber/nativenumbersupplier.cxx b/i18npool/source/nativenumber/nativenumbersupplier.cxx index c02562be385c..8de5a8cefc78 100644 --- a/i18npool/source/nativenumber/nativenumbersupplier.cxx +++ b/i18npool/source/nativenumber/nativenumbersupplier.cxx @@ -17,7 +17,7 @@ * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ -#include <i18npool/mslangid.hxx> +#include <i18nlangtag/mslangid.hxx> #include <rtl/ustrbuf.hxx> #include <sal/macros.h> #include <nativenumbersupplier.hxx> |