diff options
Diffstat (limited to 'i18npool/source/isolang')
-rw-r--r-- | i18npool/source/isolang/insys.cxx | 43 | ||||
-rw-r--r-- | i18npool/source/isolang/inunx.cxx | 150 | ||||
-rw-r--r-- | i18npool/source/isolang/inwnt.cxx | 107 | ||||
-rw-r--r-- | i18npool/source/isolang/isolang.cxx | 1105 | ||||
-rwxr-xr-x | i18npool/source/isolang/langid.pl | 424 | ||||
-rw-r--r-- | i18npool/source/isolang/lcid.awk | 171 | ||||
-rw-r--r-- | i18npool/source/isolang/makefile.mk | 69 | ||||
-rw-r--r-- | i18npool/source/isolang/mslangid.cxx | 467 |
8 files changed, 2536 insertions, 0 deletions
diff --git a/i18npool/source/isolang/insys.cxx b/i18npool/source/isolang/insys.cxx new file mode 100644 index 000000000000..65d78ef8efe4 --- /dev/null +++ b/i18npool/source/isolang/insys.cxx @@ -0,0 +1,43 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_i18npool.hxx" + +#if defined( WNT ) + +#include "inwnt.cxx" + +#elif defined( UNX ) || defined( OS2 ) + +#include "inunx.cxx" + +#else + +#error unknown platform + +#endif diff --git a/i18npool/source/isolang/inunx.cxx b/i18npool/source/isolang/inunx.cxx new file mode 100644 index 000000000000..5450f8b24c66 --- /dev/null +++ b/i18npool/source/isolang/inunx.cxx @@ -0,0 +1,150 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +// no include "precompiled_i18npool.hxx" because this file is included in insys.cxx + +#include <stdlib.h> // for getenv() +#include <stdio.h> + +#ifdef MACOSX +#include <osl/process.h> +#include <rtl/locale.h> +#include <rtl/ustring.hxx> + +#else // MACOSX +#include <rtl/string.hxx> + +#endif // MACOSX +#include <rtl/instance.hxx> +#include "i18npool/mslangid.hxx" + +// ======================================================================= + +static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; +static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; + +// ----------------------------------------------------------------------- + +// Get locale of category LC_CTYPE of environment variables +static const sal_Char* getLangFromEnvironment() +{ + static const sal_Char* pFallback = "C"; + const sal_Char *pLang = NULL; + + pLang = getenv ( "LC_ALL" ); + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_CTYPE" ); + if (! pLang || pLang[0] == 0) + pLang = getenv( "LANG" ); + if (! pLang || pLang[0] == 0) + pLang = pFallback; + + return pLang; +} + +// ----------------------------------------------------------------------- + +// Get locale of category LC_MESSAGES of environment variables +static const sal_Char* getUILangFromEnvironment() +{ + static const sal_Char* pFallback = "C"; + const sal_Char *pLang = NULL; + + pLang = getenv ( "LANGUAGE" ); // respect the GNU extension + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_ALL" ); + if (! pLang || pLang[0] == 0) + pLang = getenv ( "LC_MESSAGES" ); + if (! pLang || pLang[0] == 0) + pLang = getenv( "LANG" ); + if (! pLang || pLang[0] == 0) + pLang = pFallback; + + return pLang; +} + +// ----------------------------------------------------------------------- + +typedef const sal_Char * (*getLangFromEnv)(); + +static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, + getLangFromEnv pGetLangFromEnv ) +{ + /* get the language from the user environment */ + LanguageType nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); + nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { +#ifdef MACOSX + rtl_Locale *procLocale; + (void) pGetLangFromEnv; /* unused */ + + if ( osl_getProcessLocale(&procLocale) == osl_Process_E_None ) + { + rtl::OUString rLang( procLocale->Language ); + rtl::OUString rCountry( procLocale->Country ); + + nLang = MsLangId::convertIsoNamesToLanguage( rLang, rCountry ); + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; +#ifdef DEBUG + if ( rSystemLanguage == LANGUAGE_DONTKNOW ) + fprintf( stderr, "intnunx.cxx: failed to convert osl_getProcessLocale() language to system language.\n" ); +#endif + } +#else /* MACOSX */ + rtl::OString aUnxLang( (pGetLangFromEnv)() ); + nLang = MsLangId::convertUnxByteStringToLanguage( aUnxLang ); + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; +#endif /* MACOSX */ + } + else { + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + } + } +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemLanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemLanguage, &getLangFromEnvironment); + return nImplSystemLanguage; +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemUILanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemUILanguage, &getUILangFromEnvironment); + return nImplSystemUILanguage; +} diff --git a/i18npool/source/isolang/inwnt.cxx b/i18npool/source/isolang/inwnt.cxx new file mode 100644 index 000000000000..2e07e8ea1322 --- /dev/null +++ b/i18npool/source/isolang/inwnt.cxx @@ -0,0 +1,107 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// no include "precompiled_i18npool.hxx" because this file is included in insys.cxx + +#include <sal/config.h> + +#ifdef _MSC_VER +#pragma warning(push,1) // disable warnings within system headers +#endif +#include <windef.h> // needed by winnls.h +#include <winbase.h> // needed by winnls.h +#include <winnls.h> +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#include <rtl/instance.hxx> +#include "i18npool/mslangid.hxx" + +static LanguageType nImplSystemLanguage = LANGUAGE_DONTKNOW; +static LanguageType nImplSystemUILanguage = LANGUAGE_DONTKNOW; + +// ======================================================================= + +static LanguageType GetSVLang( LANGID nWinLangId ) +{ + // No Translation, we work with the original MS code without the SORT_ID. + // So we can get never LANG-ID's from MS, which are currently not defined + // by us. + return LanguageType( static_cast<sal_uInt16>(nWinLangId & 0xffff)); +} + +// ----------------------------------------------------------------------- + +typedef LANGID (WINAPI *getLangFromEnv)(); + +static void getPlatformSystemLanguageImpl( LanguageType& rSystemLanguage, + getLangFromEnv pGetUserDefault, getLangFromEnv pGetSystemDefault ) +{ + LanguageType nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + ::osl::MutexGuard aGuard( ::osl::Mutex::getGlobalMutex()); + nLang = rSystemLanguage; + if ( nLang == LANGUAGE_DONTKNOW ) + { + LANGID nLangId; + + nLangId = (pGetUserDefault)(); + nLang = GetSVLang( nLangId ); + + if ( nLang == LANGUAGE_DONTKNOW ) + { + nLangId = (pGetSystemDefault)(); + nLang = GetSVLang( nLangId ); + } + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + rSystemLanguage = nLang; + } + else + OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); + } +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemLanguage() +{ + getPlatformSystemLanguageImpl( nImplSystemLanguage, + &GetUserDefaultLangID, &GetSystemDefaultLangID); + return nImplSystemLanguage; +} + +// ----------------------------------------------------------------------- + +LanguageType MsLangId::getPlatformSystemUILanguage() +{ + // TODO: this could be distinguished, #if(WINVER >= 0x0500) + // needs _run_ time differentiation though, not at compile time. + getPlatformSystemLanguageImpl( nImplSystemUILanguage, + &GetUserDefaultUILanguage, &GetSystemDefaultUILanguage); + return nImplSystemUILanguage; +} diff --git a/i18npool/source/isolang/isolang.cxx b/i18npool/source/isolang/isolang.cxx new file mode 100644 index 000000000000..357be80a69ea --- /dev/null +++ b/i18npool/source/isolang/isolang.cxx @@ -0,0 +1,1105 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_i18npool.hxx" +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/strbuf.hxx> + +#include "i18npool/mslangid.hxx" + +// ======================================================================= + +struct IsoLangEngEntry +{ + LanguageType mnLang; + sal_Char maCountry[3]; +}; + +struct IsoLangNoneStdEntry +{ + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[9]; +}; + +struct IsoLangOtherEntry +{ + LanguageType mnLang; + const sal_Char* mpLangStr; +}; + +// ----------------------------------------------------------------------- + +// Entries for languages are lower case, for countries upper case, as +// recommended by rfc4646 (obsoletes rfc3066 (obsoletes rfc1766)). +// convertIsoNamesToLanguage() is case insensitive +// +// Sort order: Most used first. +// +// The default entry for a LangID <-> ISO mapping has to be first. For +// conversion of legacy mappings one LangID can map to multiple ISO codes, and +// one ISO code combination can map to multiple LangIDs. For compatibility with +// already existing calls it can also be a sequence as follows: + +// LANGUAGE_ENGLISH, "en", "" +// LANGUAGE_ENGLISH_US, "en", "US" + +// Here, in a convertIsoNamesToLanguage() call "en-US" is converted to +// LANGUAGE_ENGLISH_US and "en" is converted to LANGUAGE_ENGLISH. A call with +// "en-ZZ" (not in table) would result in LANGUAGE_ENGLISH because the first +// entry matching the language and not having a country is returned, regardless +// of whether being sorted before or after other entries of the same language +// with some country. To obtain a _locale_ (not language only) in the order +// given, convertLocaleToLanguageWithFallback() must be called. + +// If the sequence instead was + +// LANGUAGE_ENGLISH_US, "en", "US" +// LANGUAGE_ENGLISH, "en", "" + +// in a convertIsoNamesToLanguage() call "en-US" is still converted to +// LANGUAGE_ENGLISH_US, but "en" is _also_ converted to LANGUAGE_ENGLISH_US +// because no country was passed and it is the first entry to match the +// language, see code. A call with "en-ZZ" (not in table) would still result in +// LANGUAGE_ENGLISH. + +/* erAck: 2007-07-05T20:01+0200 TODO: The entire suite's "primary language + * only" usage and locale fall back should be cleaned up and made consistent. I + * strongly doubt that most callers exactly expect the behavior described. + * Currently these primary LangIDs are used literally in OOo code: + * LANGUAGE_ENGLISH LANGUAGE_CHINESE LANGUAGE_MALAY + * LANGUAGE_AZERI LANGUAGE_URDU LANGUAGE_KASHMIRI + */ + +static MsLangId::IsoLangEntry const aImplIsoLangEntries[] = +{ + // MS-LANGID codes ISO639-1/2/3 ISO3166 + { LANGUAGE_ENGLISH, "en", "" }, + { LANGUAGE_ENGLISH_US, "en", "US" }, + { LANGUAGE_ENGLISH_UK, "en", "GB" }, + { LANGUAGE_ENGLISH_AUS, "en", "AU" }, + { LANGUAGE_ENGLISH_CAN, "en", "CA" }, + { LANGUAGE_FRENCH, "fr", "FR" }, + { LANGUAGE_FRENCH, "fr", "" }, + { LANGUAGE_GERMAN, "de", "DE" }, + { LANGUAGE_ITALIAN, "it", "IT" }, + { LANGUAGE_DUTCH, "nl", "NL" }, + { LANGUAGE_SPANISH_MODERN, "es", "ES" }, + { LANGUAGE_SPANISH_DATED, "es", "ES" }, + { LANGUAGE_PORTUGUESE, "pt", "PT" }, + { LANGUAGE_PORTUGUESE_BRAZILIAN, "pt", "BR" }, + { LANGUAGE_DANISH, "da", "DK" }, + { LANGUAGE_GREEK, "el", "GR" }, + { LANGUAGE_CHINESE, "zh", "" }, + { LANGUAGE_CHINESE_SIMPLIFIED, "zh", "CN" }, + { LANGUAGE_CHINESE_TRADITIONAL, "zh", "TW" }, + { LANGUAGE_CHINESE_HONGKONG, "zh", "HK" }, + { LANGUAGE_CHINESE_SINGAPORE, "zh", "SG" }, + { LANGUAGE_CHINESE_MACAU, "zh", "MO" }, + { LANGUAGE_ENGLISH_HONG_KONG_SAR, "en", "HK" }, + { LANGUAGE_JAPANESE, "ja", "JP" }, + { LANGUAGE_KOREAN, "ko", "KR" }, + { LANGUAGE_KOREAN_JOHAB, "ko", "KR" }, + { LANGUAGE_USER_KOREAN_NORTH, "ko", "KP" }, + { LANGUAGE_SWEDISH, "sv", "SE" }, + { LANGUAGE_SWEDISH_FINLAND, "sv", "FI" }, + { LANGUAGE_FINNISH, "fi", "FI" }, + { LANGUAGE_RUSSIAN, "ru", "RU" }, + { LANGUAGE_TATAR, "tt", "RU" }, + { LANGUAGE_ENGLISH_NZ, "en", "NZ" }, + { LANGUAGE_ENGLISH_EIRE, "en", "IE" }, + { LANGUAGE_DUTCH_BELGIAN, "nl", "BE" }, + { LANGUAGE_FRENCH_BELGIAN, "fr", "BE" }, + { LANGUAGE_FRENCH_CANADIAN, "fr", "CA" }, + { LANGUAGE_FRENCH_SWISS, "fr", "CH" }, + { LANGUAGE_GERMAN_SWISS, "de", "CH" }, + { LANGUAGE_GERMAN_AUSTRIAN, "de", "AT" }, + { LANGUAGE_ITALIAN_SWISS, "it", "CH" }, + { LANGUAGE_ALBANIAN, "sq", "AL" }, + { LANGUAGE_ARABIC_SAUDI_ARABIA, "ar", "SA" }, + { LANGUAGE_ARABIC_EGYPT, "ar", "EG" }, + { LANGUAGE_ARABIC_UAE, "ar", "AE" }, + { LANGUAGE_ARABIC_IRAQ, "ar", "IQ" }, + { LANGUAGE_ARABIC_LIBYA, "ar", "LY" }, + { LANGUAGE_ARABIC_ALGERIA, "ar", "DZ" }, + { LANGUAGE_ARABIC_MOROCCO, "ar", "MA" }, + { LANGUAGE_ARABIC_TUNISIA, "ar", "TN" }, + { LANGUAGE_ARABIC_OMAN, "ar", "OM" }, + { LANGUAGE_ARABIC_YEMEN, "ar", "YE" }, + { LANGUAGE_ARABIC_SYRIA, "ar", "SY" }, + { LANGUAGE_ARABIC_JORDAN, "ar", "JO" }, + { LANGUAGE_ARABIC_LEBANON, "ar", "LB" }, + { LANGUAGE_ARABIC_KUWAIT, "ar", "KW" }, + { LANGUAGE_ARABIC_BAHRAIN, "ar", "BH" }, + { LANGUAGE_ARABIC_QATAR, "ar", "QA" }, + { LANGUAGE_USER_ARABIC_CHAD, "ar", "TD" }, + { LANGUAGE_USER_ARABIC_COMOROS, "ar", "KM" }, + { LANGUAGE_USER_ARABIC_DJIBOUTI, "ar", "DJ" }, + { LANGUAGE_USER_ARABIC_ERITREA, "ar", "ER" }, + { LANGUAGE_USER_ARABIC_ISRAEL, "ar", "IL" }, + { LANGUAGE_USER_ARABIC_MAURITANIA, "ar", "MR" }, + { LANGUAGE_USER_ARABIC_PALESTINE, "ar", "PS" }, + { LANGUAGE_USER_ARABIC_SOMALIA, "ar", "SO" }, + { LANGUAGE_USER_ARABIC_SUDAN, "ar", "SD" }, + { LANGUAGE_ARABIC_PRIMARY_ONLY, "ar", "" }, + { LANGUAGE_BASQUE, "eu", "" }, + { LANGUAGE_BULGARIAN, "bg", "BG" }, + { LANGUAGE_CZECH, "cs", "CZ" }, + { LANGUAGE_CZECH, "cz", "" }, + { LANGUAGE_ENGLISH_JAMAICA, "en", "JM" }, + { LANGUAGE_ENGLISH_CARRIBEAN, "en", "BS" }, // not 100%, because AG is Bahamas + { LANGUAGE_ENGLISH_BELIZE, "en", "BZ" }, + { LANGUAGE_ENGLISH_TRINIDAD, "en", "TT" }, + { LANGUAGE_ENGLISH_ZIMBABWE, "en", "ZW" }, + { LANGUAGE_ENGLISH_INDONESIA, "en", "ID" }, + { LANGUAGE_ESTONIAN, "et", "EE" }, + { LANGUAGE_FAEROESE, "fo", "FO" }, + { LANGUAGE_FARSI, "fa", "IR" }, + { LANGUAGE_FRENCH_LUXEMBOURG, "fr", "LU" }, + { LANGUAGE_FRENCH_MONACO, "fr", "MC" }, + { LANGUAGE_GERMAN_LUXEMBOURG, "de", "LU" }, + { LANGUAGE_GERMAN_LIECHTENSTEIN, "de", "LI" }, + { LANGUAGE_HEBREW, "he", "IL" }, // new: old was "iw" + { LANGUAGE_HEBREW, "iw", "IL" }, // old: new is "he" + { LANGUAGE_HUNGARIAN, "hu", "HU" }, + { LANGUAGE_ICELANDIC, "is", "IS" }, + { LANGUAGE_INDONESIAN, "id", "ID" }, // new: old was "in" + { LANGUAGE_INDONESIAN, "in", "ID" }, // old: new is "id" + { LANGUAGE_NORWEGIAN, "no", "NO" }, + { LANGUAGE_NORWEGIAN_BOKMAL, "nb", "NO" }, + { LANGUAGE_NORWEGIAN_NYNORSK, "nn", "NO" }, + { LANGUAGE_POLISH, "pl", "PL" }, + { LANGUAGE_RHAETO_ROMAN, "rm", "CH" }, + { LANGUAGE_ROMANIAN, "ro", "RO" }, + { LANGUAGE_ROMANIAN_MOLDOVA, "ro", "MD" }, + { LANGUAGE_SLOVAK, "sk", "SK" }, + { LANGUAGE_SLOVENIAN, "sl", "SI" }, + { LANGUAGE_SPANISH_MEXICAN, "es", "MX" }, + { LANGUAGE_SPANISH_GUATEMALA, "es", "GT" }, + { LANGUAGE_SPANISH_COSTARICA, "es", "CR" }, + { LANGUAGE_SPANISH_PANAMA, "es", "PA" }, + { LANGUAGE_SPANISH_DOMINICAN_REPUBLIC, "es", "DO" }, + { LANGUAGE_SPANISH_VENEZUELA, "es", "VE" }, + { LANGUAGE_SPANISH_COLOMBIA, "es", "CO" }, + { LANGUAGE_SPANISH_PERU, "es", "PE" }, + { LANGUAGE_SPANISH_ARGENTINA, "es", "AR" }, + { LANGUAGE_SPANISH_ECUADOR, "es", "EC" }, + { LANGUAGE_SPANISH_CHILE, "es", "CL" }, + { LANGUAGE_SPANISH_URUGUAY, "es", "UY" }, + { LANGUAGE_SPANISH_PARAGUAY, "es", "PY" }, + { LANGUAGE_SPANISH_BOLIVIA, "es", "BO" }, + { LANGUAGE_SPANISH_EL_SALVADOR, "es", "SV" }, + { LANGUAGE_SPANISH_HONDURAS, "es", "HN" }, + { LANGUAGE_SPANISH_NICARAGUA, "es", "NI" }, + { LANGUAGE_SPANISH_PUERTO_RICO, "es", "PR" }, + { LANGUAGE_SPANISH_UNITED_STATES, "es", "US" }, + { LANGUAGE_SPANISH_LATIN_AMERICA, "es", "" }, + { LANGUAGE_TURKISH, "tr", "TR" }, + { LANGUAGE_UKRAINIAN, "uk", "UA" }, + { LANGUAGE_VIETNAMESE, "vi", "VN" }, + { LANGUAGE_LATVIAN, "lv", "LV" }, + { LANGUAGE_MACEDONIAN, "mk", "MK" }, + { LANGUAGE_MALAY, "ms", "" }, + { LANGUAGE_MALAY_MALAYSIA, "ms", "MY" }, + { LANGUAGE_MALAY_BRUNEI_DARUSSALAM, "ms", "BN" }, + { LANGUAGE_ENGLISH_MALAYSIA, "en", "MY" }, + { LANGUAGE_THAI, "th", "TH" }, + { LANGUAGE_LITHUANIAN, "lt", "LT" }, + { LANGUAGE_LITHUANIAN_CLASSIC, "lt", "LT" }, + { LANGUAGE_CROATIAN, "hr", "HR" }, // Croatian in Croatia + { LANGUAGE_CROATIAN_BOSNIA_HERZEGOVINA, "hr", "BA" }, + { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs", "BA" }, +// { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_AND_HERZEGOVINA, "bs", "BA" }, // script codes not supported yet + { LANGUAGE_USER_SERBIAN_CYRILLIC_SERBIA, "sr", "RS" }, // Serbian Cyrillic in Serbia + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "YU" }, // legacy Serbian Cyrillic in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_CS instead, sr_CS not supported by ICU 2.6 (3.4 does) + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "CS" }, // alias to be able to integrate localizations, rsc needs it + { LANGUAGE_USER_SERBIAN_CYRILLIC_MONTENEGRO, "sr", "ME" }, + { LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "sr", "BA" }, + { LANGUAGE_SERBIAN, "sr", "" }, // SERBIAN is only LID, MS-LCID not defined (was dupe of CROATIAN) + { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sh", "RS" }, // Serbian Latin in Serbia; kludge, needed to be sr_Latn_RS instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN, "sh", "YU" }, // legacy Serbian Latin in Serbia and Montenegro (former Yugoslavia); kludge, needed to be sr_Latn_CS instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN, "sh", "CS" }, // Serbian Latin in Serbia and Montenegro; kludge, needed to be sr_Latn_CS instead, script codes not supported yet + { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sh", "ME" }, // Serbian Latin in Montenegro; kludge, needed to be sr_Latn_ME instead, script codes not supported yet + { LANGUAGE_SERBIAN_LATIN_BOSNIA_HERZEGOVINA, "sh", "BA" }, + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sh", "" }, // kludge, needed to be sr_Latn instead, script codes not supported yet + { LANGUAGE_ARMENIAN, "hy", "AM" }, + { LANGUAGE_AZERI, "az", "" }, + { LANGUAGE_AZERI_LATIN, "az", "AZ" }, +// { LANGUAGE_AZERI_CYRILLIC, "az", "AZ" }, // script codes not supported yet + { LANGUAGE_UZBEK_LATIN, "uz", "UZ" }, +// { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ" }, // script codes not supported yet + { LANGUAGE_BENGALI_BANGLADESH, "bn", "BD" }, + { LANGUAGE_BENGALI, "bn", "IN" }, + { LANGUAGE_BURMESE, "my", "MM" }, + { LANGUAGE_KAZAK, "kk", "KZ" }, + { LANGUAGE_ENGLISH_INDIA, "en", "IN" }, + { LANGUAGE_URDU, "ur", "" }, + { LANGUAGE_URDU_INDIA, "ur", "IN" }, + { LANGUAGE_URDU_PAKISTAN, "ur", "PK" }, + { LANGUAGE_HINDI, "hi", "IN" }, + { LANGUAGE_GUJARATI, "gu", "IN" }, + { LANGUAGE_KANNADA, "kn", "IN" }, + { LANGUAGE_ASSAMESE, "as", "IN" }, + { LANGUAGE_KASHMIRI, "ks", "" }, + { LANGUAGE_KASHMIRI_INDIA, "ks", "IN" }, + { LANGUAGE_MALAYALAM, "ml", "IN" }, + { LANGUAGE_MANIPURI, "mni", "IN" }, + { LANGUAGE_MARATHI, "mr", "IN" }, + { LANGUAGE_KONKANI, "kok", "IN" }, + { LANGUAGE_NEPALI, "ne", "NP" }, + { LANGUAGE_NEPALI_INDIA, "ne", "IN" }, + { LANGUAGE_ORIYA, "or", "IN" }, + { LANGUAGE_PUNJABI, "pa", "IN" }, + { LANGUAGE_SANSKRIT, "sa", "IN" }, + { LANGUAGE_SINDHI, "sd", "IN" }, + { LANGUAGE_TAMIL, "ta", "IN" }, + { LANGUAGE_TELUGU, "te", "IN" }, + { LANGUAGE_PUNJABI_PAKISTAN, "lah", "PK" }, // preferring "lah" over "pa" for Western Punjabi, see http://www.ethnologue.com/show_language.asp?code=PNB + { LANGUAGE_PUNJABI_PAKISTAN, "pa", "PK" }, + { LANGUAGE_SINDHI_PAKISTAN, "sd", "PK" }, + { LANGUAGE_BELARUSIAN, "be", "BY" }, + { LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default) + { LANGUAGE_CATALAN, "ca", "AD" }, // Andorra + { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content! + { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content! +// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back. + { LANGUAGE_FRENCH_CAMEROON, "fr", "CM" }, + { LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" }, + { LANGUAGE_FRENCH_HAITI, "fr", "HT" }, + { LANGUAGE_FRENCH_MALI, "fr", "ML" }, + { LANGUAGE_FRENCH_SENEGAL, "fr", "SN" }, + { LANGUAGE_FRENCH_ZAIRE, "fr", "CD" }, // Democratic Republic Of Congo + { LANGUAGE_FRENCH_MOROCCO, "fr", "MA" }, + { LANGUAGE_FRENCH_REUNION, "fr", "RE" }, + { LANGUAGE_FRENCH_NORTH_AFRICA, "fr", "" }, + { LANGUAGE_FRENCH_WEST_INDIES, "fr", "" }, // unknown ISO country code + { LANGUAGE_FRISIAN_NETHERLANDS, "fy", "NL" }, + { LANGUAGE_GAELIC_IRELAND, "ga", "IE" }, + { LANGUAGE_GAELIC_SCOTLAND, "gd", "GB" }, + { LANGUAGE_GALICIAN, "gl", "ES" }, + { LANGUAGE_GEORGIAN, "ka", "GE" }, + { LANGUAGE_KHMER, "km", "KH" }, + { LANGUAGE_KIRGHIZ, "ky", "KG" }, + { LANGUAGE_LAO, "lo", "LA" }, + { LANGUAGE_MALTESE, "mt", "MT" }, + { LANGUAGE_MONGOLIAN, "mn", "MN" }, // Cyrillic script + { LANGUAGE_MONGOLIAN_MONGOLIAN, "mn", "MN" }, + { LANGUAGE_RUSSIAN_MOLDOVA, "mo", "MD" }, + { LANGUAGE_SWAHILI, "sw", "KE" }, + { LANGUAGE_USER_SWAHILI_TANZANIA, "sw", "TZ" }, + { LANGUAGE_TAJIK, "tg", "TJ" }, + { LANGUAGE_TIBETAN, "bo", "CN" }, // CN politically correct? + { LANGUAGE_DZONGKHA, "dz", "BT" }, + { LANGUAGE_TURKMEN, "tk", "TM" }, + { LANGUAGE_WELSH, "cy", "GB" }, + { LANGUAGE_SESOTHO, "st", "ZA" }, + { LANGUAGE_SEPEDI, "nso", "ZA" }, + { LANGUAGE_SEPEDI, "ns", "ZA" }, // fake "ns" for compatibility with existing OOo1.1.x localization to be able to read those documents + { LANGUAGE_TSONGA, "ts", "ZA" }, + { LANGUAGE_TSWANA, "tn", "ZA" }, + { LANGUAGE_ENGLISH_SAFRICA, "en", "ZA" }, + { LANGUAGE_AFRIKAANS, "af", "ZA" }, + { LANGUAGE_VENDA, "ve", "ZA" }, // default 639-1 + { LANGUAGE_VENDA, "ven", "ZA" }, // 639-2 may have been used temporarily since 2004-07-23 + { LANGUAGE_XHOSA, "xh", "ZA" }, + { LANGUAGE_ZULU, "zu", "ZA" }, + { LANGUAGE_QUECHUA_ECUADOR, "qu", "EC" }, + { LANGUAGE_QUECHUA_PERU, "qu", "PE" }, + { LANGUAGE_QUECHUA_BOLIVIA, "qu", "BO" }, // macro: quh-BO, qul-BO + { LANGUAGE_PASHTO, "ps", "AF" }, + { LANGUAGE_OROMO, "om", "ET" }, + { LANGUAGE_DHIVEHI, "dv", "MV" }, + { LANGUAGE_UIGHUR_CHINA, "ug", "CN" }, + { LANGUAGE_TIGRIGNA_ETHIOPIA, "ti", "ET" }, + { LANGUAGE_TIGRIGNA_ERITREA, "ti", "ER" }, + { LANGUAGE_AMHARIC_ETHIOPIA, "am", "ET" }, + { LANGUAGE_GUARANI_PARAGUAY, "gug", "PY" }, + { LANGUAGE_HAWAIIAN_UNITED_STATES, "haw", "US" }, + { LANGUAGE_EDO, "bin", "NG" }, + { LANGUAGE_FULFULDE_NIGERIA, "ff", "NG" }, + { LANGUAGE_HAUSA_NIGERIA, "ha", "NG" }, + { LANGUAGE_USER_HAUSA_GHANA, "ha", "GH" }, + { LANGUAGE_IGBO_NIGERIA, "ig", "NG" }, + { LANGUAGE_KANURI_NIGERIA, "kr", "NG" }, + { LANGUAGE_YORUBA, "yo", "NG" }, + { LANGUAGE_SOMALI, "so", "SO" }, + { LANGUAGE_PAPIAMENTU, "pap", "AN" }, + { LANGUAGE_USER_PAPIAMENTU_ARUBA, "pap", "AW" }, + { LANGUAGE_ENGLISH_SINGAPORE, "en", "SG" }, + { LANGUAGE_YIDDISH, "yi", "IL" }, // new: old was "ji" + { LANGUAGE_YIDDISH, "ji", "IL" }, // old: new is "yi" + { LANGUAGE_SYRIAC, "syr", "TR" }, // "TR" according to http://www.ethnologue.com/show_language.asp?code=SYC + { LANGUAGE_SINHALESE_SRI_LANKA, "si", "LK" }, + { LANGUAGE_CHEROKEE_UNITED_STATES, "chr", "US" }, + { LANGUAGE_INUKTITUT_LATIN_CANADA, "iu", "CA" }, +// { LANGUAGE_INUKTITUT_SYLLABICS_CANADA, "iu", "CA" }, // script codes not supported yet + { LANGUAGE_SAMI_NORTHERN_NORWAY, "se", "NO" }, + { LANGUAGE_SAMI_INARI, "smn", "FI" }, + { LANGUAGE_SAMI_LULE_NORWAY, "smj", "NO" }, + { LANGUAGE_SAMI_LULE_SWEDEN, "smj", "SE" }, + { LANGUAGE_SAMI_NORTHERN_FINLAND, "se", "FI" }, + { LANGUAGE_SAMI_NORTHERN_SWEDEN, "se", "SE" }, + { LANGUAGE_SAMI_SKOLT, "sms", "FI" }, + { LANGUAGE_SAMI_SOUTHERN_NORWAY, "sma", "NO" }, + { LANGUAGE_SAMI_SOUTHERN_SWEDEN, "sma", "SE" }, + { LANGUAGE_USER_SAMI_KILDIN_RUSSIA, "sjd", "RU" }, + { LANGUAGE_MAPUDUNGUN_CHILE, "arn", "CL" }, + { LANGUAGE_CORSICAN_FRANCE, "co", "FR" }, + { LANGUAGE_ALSATIAN_FRANCE, "gsw", "FR" }, // in fact 'gsw' is Schwyzerduetsch (Swiss German), which is a dialect of Alemannic German, as is Alsatian. They aren't distinct languages and share this code. + { LANGUAGE_YAKUT_RUSSIA, "sah", "RU" }, + { LANGUAGE_MOHAWK_CANADA, "moh", "CA" }, + { LANGUAGE_BASHKIR_RUSSIA, "ba", "RU" }, + { LANGUAGE_KICHE_GUATEMALA, "qut", "GT" }, + { LANGUAGE_DARI_AFGHANISTAN, "gbz", "AF" }, + { LANGUAGE_WOLOF_SENEGAL, "wo", "SN" }, + { LANGUAGE_FILIPINO, "fil", "PH" }, + { LANGUAGE_USER_TAGALOG, "tl", "PH" }, + { LANGUAGE_ENGLISH_PHILIPPINES, "en", "PH" }, +// { LANGUAGE_IBIBIO_NIGERIA, "nic", "NG" }, // ISO "nic" is only a collective language code + { LANGUAGE_YI, "ii", "CN" }, +// { LANGUAGE_TAMAZIGHT_LATIN, "ber", "" }, // ISO "ber" only collective! +// { LANGUAGE_TAMAZIGHT_ARABIC, "ber", "" }, // ISO "ber" only collective! + { LANGUAGE_LATIN, "la", "VA" }, + { LANGUAGE_OBSOLETE_USER_LATIN, "la", "VA" }, + { LANGUAGE_USER_ESPERANTO, "eo", "" }, + { LANGUAGE_USER_INTERLINGUA, "ia", "" }, + { LANGUAGE_MAORI_NEW_ZEALAND, "mi", "NZ" }, + { LANGUAGE_OBSOLETE_USER_MAORI, "mi", "NZ" }, + { LANGUAGE_KINYARWANDA_RWANDA, "rw", "RW" }, + { LANGUAGE_OBSOLETE_USER_KINYARWANDA, "rw", "RW" }, + { LANGUAGE_UPPER_SORBIAN_GERMANY, "hsb", "DE" }, // MS maps this to 'wen-DE', which is nonsense. 'wen' is a collective language code, 'WEN' is a SIL code, see http://www.ethnologue.com/14/show_iso639.asp?code=wen and http://www.ethnologue.com/14/show_language.asp?code=WEN + { LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN,"hsb", "DE" }, + { LANGUAGE_LOWER_SORBIAN_GERMANY, "dsb", "DE" }, // MS maps this to 'wee-DE', which is nonsense. 'WEE' is a SIL code, see http://www.ethnologue.com/14/show_language.asp?code=WEE + { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE" }, + { LANGUAGE_OCCITAN_FRANCE, "oc", "FR" }, + { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR" }, + { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR" }, + { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY" }, + { LANGUAGE_USER_KURDISH_IRAQ, "ku", "IQ" }, + { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR" }, + { LANGUAGE_USER_SARDINIAN, "sc", "IT" }, // macrolanguage code + { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT" }, + { LANGUAGE_USER_SARDINIAN_GALLURESE, "sdn", "IT" }, + { LANGUAGE_USER_SARDINIAN_LOGUDORESE, "src", "IT" }, + { LANGUAGE_USER_SARDINIAN_SASSARESE, "sdc", "IT" }, + { LANGUAGE_BRETON_FRANCE, "br", "FR" }, + { LANGUAGE_OBSOLETE_USER_BRETON, "br", "FR" }, + { LANGUAGE_KALAALLISUT_GREENLAND, "kl", "GL" }, + { LANGUAGE_OBSOLETE_USER_KALAALLISUT, "kl", "GL" }, + { LANGUAGE_USER_SWAZI, "ss", "ZA" }, + { LANGUAGE_USER_NDEBELE_SOUTH, "nr", "ZA" }, + { LANGUAGE_USER_TSWANA_BOTSWANA, "tn", "BW" }, + { LANGUAGE_USER_MOORE, "mos", "BF" }, + { LANGUAGE_USER_BAMBARA, "bm", "ML" }, + { LANGUAGE_USER_AKAN, "ak", "GH" }, + { LANGUAGE_LUXEMBOURGISH_LUXEMBOURG, "lb", "LU" }, + { LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH, "lb", "LU" }, + { LANGUAGE_USER_FRIULIAN, "fur", "IT" }, + { LANGUAGE_USER_FIJIAN, "fj", "FJ" }, + { LANGUAGE_USER_AFRIKAANS_NAMIBIA, "af", "NA" }, + { LANGUAGE_USER_ENGLISH_NAMIBIA, "en", "NA" }, + { LANGUAGE_USER_WALLOON, "wa", "BE" }, + { LANGUAGE_USER_COPTIC, "cop", "EG" }, + { LANGUAGE_USER_GASCON, "gsc", "FR" }, + { LANGUAGE_USER_GERMAN_BELGIUM, "de", "BE" }, + { LANGUAGE_USER_CHUVASH, "cv", "RU" }, + { LANGUAGE_USER_EWE_GHANA, "ee", "GH" }, + { LANGUAGE_USER_ENGLISH_GHANA, "en", "GH" }, + { LANGUAGE_USER_SANGO, "sg", "CF" }, + { LANGUAGE_USER_GANDA, "lg", "UG" }, + { LANGUAGE_USER_LINGALA_DRCONGO, "ln", "CD" }, + { LANGUAGE_USER_LOW_GERMAN, "nds", "DE" }, + { LANGUAGE_USER_HILIGAYNON, "hil", "PH" }, + { LANGUAGE_USER_NYANJA, "ny", "MW" }, + { LANGUAGE_USER_KASHUBIAN, "csb", "PL" }, + { LANGUAGE_USER_SPANISH_CUBA, "es", "CU" }, + { LANGUAGE_USER_QUECHUA_NORTH_BOLIVIA, "qul", "BO" }, + { LANGUAGE_USER_QUECHUA_SOUTH_BOLIVIA, "quh", "BO" }, + { LANGUAGE_USER_BODO_INDIA, "brx", "IN" }, + { LANGUAGE_USER_DOGRI_INDIA, "dgo", "IN" }, + { LANGUAGE_USER_MAITHILI_INDIA, "mai", "IN" }, + { LANGUAGE_USER_SANTALI_INDIA, "sat", "IN" }, + { LANGUAGE_USER_TETUN, "tet", "ID" }, + { LANGUAGE_USER_TETUN_TIMOR_LESTE, "tet", "TL" }, + { LANGUAGE_USER_TOK_PISIN, "tpi", "PG" }, + { LANGUAGE_USER_SHUSWAP, "shs", "CA" }, + { LANGUAGE_USER_ANCIENT_GREEK, "grc", "GR" }, + { LANGUAGE_USER_ASTURIAN, "ast", "ES" }, + { LANGUAGE_USER_LATGALIAN, "ltg", "LV" }, + { LANGUAGE_USER_MAORE, "swb", "YT" }, + { LANGUAGE_USER_BUSHI, "buc", "YT" }, + { LANGUAGE_USER_TAHITIAN, "ty", "PF" }, + { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" }, + { LANGUAGE_USER_BAFIA, "ksf", "CM" }, + { LANGUAGE_USER_GIKUYU, "ki", "KE" }, + { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" }, + { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" }, + { LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +static MsLangId::IsoLangEntry aLastResortFallbackEntry = +{ LANGUAGE_ENGLISH_US, "en", "US" }; + +// ----------------------------------------------------------------------- + +// In this table are the countries which should mapped to a specific +// english language +static IsoLangEngEntry const aImplIsoLangEngEntries[] = +{ + { LANGUAGE_ENGLISH_UK, "AO" }, // Angola + { LANGUAGE_ENGLISH_UK, "BJ" }, // Benin + { LANGUAGE_ENGLISH_UK, "BW" }, // Botswana + { LANGUAGE_ENGLISH_UK, "BI" }, // Burundi + { LANGUAGE_ENGLISH_UK, "CM" }, // Cameroon + { LANGUAGE_ENGLISH_UK, "GA" }, // Gabon + { LANGUAGE_ENGLISH_UK, "GM" }, // Gambia + { LANGUAGE_ENGLISH_UK, "GH" }, // Ghana + { LANGUAGE_ENGLISH_UK, "GN" }, // Guinea + { LANGUAGE_ENGLISH_UK, "LS" }, // Lesotho + { LANGUAGE_ENGLISH_UK, "MW" }, // Malawi + { LANGUAGE_ENGLISH_UK, "MT" }, // Malta + { LANGUAGE_ENGLISH_UK, "NA" }, // Namibia + { LANGUAGE_ENGLISH_UK, "NG" }, // Nigeria + { LANGUAGE_ENGLISH_UK, "UG" }, // Uganda + { LANGUAGE_ENGLISH_UK, "ZM" }, // Zambia + { LANGUAGE_ENGLISH_UK, "ZW" }, // Zimbabwe + { LANGUAGE_ENGLISH_UK, "SZ" }, // Swaziland + { LANGUAGE_ENGLISH_UK, "NG" }, // Sierra Leone + { LANGUAGE_ENGLISH_UK, "KN" }, // Saint Kitts and Nevis + { LANGUAGE_ENGLISH_UK, "SH" }, // St. Helena + { LANGUAGE_ENGLISH_UK, "IO" }, // British Indian Oceanic Territory + { LANGUAGE_ENGLISH_UK, "FK" }, // Falkland Islands + { LANGUAGE_ENGLISH_UK, "GI" }, // Gibraltar + { LANGUAGE_ENGLISH_UK, "KI" }, // Kiribati + { LANGUAGE_ENGLISH_UK, "VG" }, // Virgin Islands + { LANGUAGE_ENGLISH_UK, "MU" }, // Mauritius + { LANGUAGE_ENGLISH_UK, "FJ" }, // Fiji + { LANGUAGE_ENGLISH_US, "KI" }, // Kiribati + { LANGUAGE_ENGLISH_US, "LR" }, // Liberia + { LANGUAGE_ENGLISH_US, "GU" }, // Guam + { LANGUAGE_ENGLISH_US, "MH" }, // Marshall Islands + { LANGUAGE_ENGLISH_US, "PW" }, // Palau + { LANGUAGE_ENGLISH_CARRIBEAN, "AI" }, // Anguilla + { LANGUAGE_ENGLISH_CARRIBEAN, "AG" }, // Antigua and Barbuda + { LANGUAGE_ENGLISH_CARRIBEAN, "BS" }, // Bahamas + { LANGUAGE_ENGLISH_CARRIBEAN, "BB" }, // Barbedos + { LANGUAGE_ENGLISH_CARRIBEAN, "BM" }, // Bermuda + { LANGUAGE_ENGLISH_CARRIBEAN, "KY" }, // Cayman Islands + { LANGUAGE_ENGLISH_CARRIBEAN, "GD" }, // Grenada + { LANGUAGE_ENGLISH_CARRIBEAN, "DM" }, // Dominica + { LANGUAGE_ENGLISH_CARRIBEAN, "HT" }, // Haiti + { LANGUAGE_ENGLISH_CARRIBEAN, "MS" }, // Montserrat + { LANGUAGE_ENGLISH_CARRIBEAN, "FM" }, // Micronesia + { LANGUAGE_ENGLISH_CARRIBEAN, "VC" }, // St. Vincent / Grenadines + { LANGUAGE_ENGLISH_CARRIBEAN, "LC" }, // Saint Lucia + { LANGUAGE_ENGLISH_CARRIBEAN, "TC" }, // Turks & Caicos Islands + { LANGUAGE_ENGLISH_CARRIBEAN, "GY" }, // Guyana + { LANGUAGE_ENGLISH_CARRIBEAN, "TT" }, // Trinidad and Tobago + { LANGUAGE_ENGLISH_AUS, "CX" }, // Christmas Islands + { LANGUAGE_ENGLISH_AUS, "CC" }, // Cocos (Keeling) Islands + { LANGUAGE_ENGLISH_AUS, "NF" }, // Norfolk Island + { LANGUAGE_ENGLISH_AUS, "PG" }, // Papua New Guinea + { LANGUAGE_ENGLISH_AUS, "SB" }, // Solomon Islands + { LANGUAGE_ENGLISH_AUS, "TV" }, // Tuvalu + { LANGUAGE_ENGLISH_AUS, "NR" }, // Nauru + { LANGUAGE_ENGLISH_NZ, "CK" }, // Cook Islands + { LANGUAGE_ENGLISH_NZ, "NU" }, // Niue + { LANGUAGE_ENGLISH_NZ, "TK" }, // Tokelau + { LANGUAGE_ENGLISH_NZ, "TO" }, // Tonga + { LANGUAGE_DONTKNOW, "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries[] = +{ + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "BOK" }, // registered subtags for "no" in rfc1766 + { LANGUAGE_NORWEGIAN_NYNORSK, "no", "NYN" }, // registered subtags for "no" in rfc1766 + { LANGUAGE_SERBIAN_LATIN, "sr", "latin" }, + { LANGUAGE_SERBIAN_CYRILLIC, "sr", "cyrillic" }, + { LANGUAGE_AZERI_LATIN, "az", "latin" }, + { LANGUAGE_AZERI_CYRILLIC, "az", "cyrillic" }, + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +// in this table are only names to find the best language +static IsoLangNoneStdEntry const aImplIsoNoneStdLangEntries2[] = +{ + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmaal" }, + { LANGUAGE_NORWEGIAN_BOKMAL, "no", "bokmal" }, + { LANGUAGE_NORWEGIAN_NYNORSK, "no", "nynorsk" }, + { LANGUAGE_DONTKNOW, "", "" } // marks end of table +}; + +// ----------------------------------------------------------------------- + +// in this table are only names to find the best language +static IsoLangOtherEntry const aImplOtherEntries[] = +{ + { LANGUAGE_ENGLISH_US, "c" }, + { LANGUAGE_CHINESE, "chinese" }, + { LANGUAGE_GERMAN, "german" }, + { LANGUAGE_JAPANESE, "japanese" }, + { LANGUAGE_KOREAN, "korean" }, + { LANGUAGE_ENGLISH_US, "posix" }, + { LANGUAGE_CHINESE_TRADITIONAL, "tchinese" }, + { LANGUAGE_DONTKNOW, NULL } // marks end of table +}; + +// ======================================================================= + +// static +void MsLangId::convertLanguageToIsoNames( LanguageType nLang, + rtl::OUString& rLangStr, rtl::OUString& rCountry ) +{ + if ( nLang == LANGUAGE_SYSTEM ) + nLang = MsLangId::getSystemLanguage(); + + // Search for LangID (in this table we find only defined ISO combinations) + const IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if ( pEntry->mnLang == nLang ) + { + rLangStr = rtl::OUString::createFromAscii( pEntry->maLangStr ); + rCountry = rtl::OUString::createFromAscii( pEntry->maCountry ); + return; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Search for LangID if we didn't find a specific ISO combination. + // All entries in this table are allowed for mime specifications, + // but not defined ISO combinations. + const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; + do + { + if ( pNoneStdEntry->mnLang == nLang ) + { + rLangStr = rtl::OUString::createFromAscii( pNoneStdEntry->maLangStr ); + rCountry = rtl::OUString::createFromAscii( pNoneStdEntry->maCountry ); + return; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + + // not found + rLangStr = rtl::OUString(); + rCountry = rtl::OUString(); +} + +// ----------------------------------------------------------------------- + +// static +void MsLangId::convertLanguageToIsoNames( LanguageType nLang, + rtl::OString& rLangStr, rtl::OString& rCountry ) +{ + if ( nLang == LANGUAGE_SYSTEM ) + nLang = MsLangId::getSystemLanguage(); + + // Search for LangID (in this table we find only defined ISO combinations) + const IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if ( pEntry->mnLang == nLang ) + { + rLangStr = rtl::OString( pEntry->maLangStr ); + rCountry = rtl::OString( pEntry->maCountry ); + return; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Search for LangID if we didn't find a specific ISO combination. + // All entries in this table are allowed for mime specifications, + // but not defined ISO combinations. + const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; + do + { + if ( pNoneStdEntry->mnLang == nLang ) + { + rLangStr = rtl::OString( pNoneStdEntry->maLangStr ); + rCountry = rtl::OString( pNoneStdEntry->maCountry ); + return; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + + // not found + rLangStr = rtl::OString(); + rCountry = rtl::OString(); +} + +// ----------------------------------------------------------------------- + +static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( LanguageType nLang ) +{ + LanguageType nPrimary = MsLangId::getPrimaryLanguage( nLang); + + // Search for LangID and remember first lang-only. + const MsLangId::IsoLangEntry* pFirstPrimary = NULL; + const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if (pEntry->mnLang == nLang) + { + if (*pEntry->maCountry) + return *pEntry; + switch (nLang) + { + // These are known to have no country assigned. + case LANGUAGE_BASQUE: + case LANGUAGE_USER_ESPERANTO: + case LANGUAGE_USER_INTERLINGUA: + return *pEntry; + default: + ; // nothing + } + } + if (!pFirstPrimary && + MsLangId::getPrimaryLanguage( pEntry->mnLang) == nPrimary) + pFirstPrimary = pEntry; + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Language not found at all => use default. + if (!pFirstPrimary) + return aLastResortFallbackEntry; + + // Search for first entry of primary language with any country. + pEntry = pFirstPrimary; + do + { + if (MsLangId::getPrimaryLanguage( pEntry->mnLang) == nLang) + { + if (*pEntry->maCountry) + return *pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + return aLastResortFallbackEntry; +} + +// static +LanguageType MsLangId::lookupFallbackLanguage( LanguageType nLang ) +{ + return lcl_lookupFallbackEntry( nLang).mnLang; +} + + +// static +::com::sun::star::lang::Locale MsLangId::lookupFallbackLocale( LanguageType nLang ) +{ + const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( nLang); + return ::com::sun::star::lang::Locale( + rtl::OUString::createFromAscii( rEntry.maLangStr), + rtl::OUString::createFromAscii( rEntry.maCountry), + rtl::OUString()); +} + +// ----------------------------------------------------------------------- + +static const MsLangId::IsoLangEntry & lcl_lookupFallbackEntry( + const ::com::sun::star::lang::Locale & rLocale ) +{ + // language is lower case in table + rtl::OUString aLowerLang = rLocale.Language.toAsciiLowerCase(); + // country is upper case in table + rtl::OUString aUpperCountry = rLocale.Country.toAsciiUpperCase(); + sal_Int32 nCountryLen = aUpperCountry.getLength(); + + // Search for locale and remember first lang-only. + const MsLangId::IsoLangEntry* pFirstLang = NULL; + const MsLangId::IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if (aLowerLang.equalsAscii( pEntry->maLangStr)) + { + if (*pEntry->maCountry) + { + if (nCountryLen && aUpperCountry.equalsAscii( pEntry->maCountry)) + return *pEntry; + } + else + { + switch (pEntry->mnLang) + { + // These are known to have no country assigned. + case LANGUAGE_BASQUE: + case LANGUAGE_USER_ESPERANTO: + case LANGUAGE_USER_INTERLINGUA: + return *pEntry; + default: + ; // nothing + } + } + if (!pFirstLang) + pFirstLang = pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // Language not found at all => use default. + if (!pFirstLang) + return aLastResortFallbackEntry; + + // Search for first entry of language with any country. + pEntry = pFirstLang; + do + { + if (aLowerLang.equalsAscii( pEntry->maLangStr)) + { + if (*pEntry->maCountry) + return *pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + return aLastResortFallbackEntry; +} + +// static +LanguageType MsLangId::lookupFallbackLanguage( + const ::com::sun::star::lang::Locale & rLocale ) +{ + return lcl_lookupFallbackEntry( rLocale).mnLang; +} + + +// static +::com::sun::star::lang::Locale MsLangId::lookupFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ) +{ + const MsLangId::IsoLangEntry& rEntry = lcl_lookupFallbackEntry( rLocale); + return ::com::sun::star::lang::Locale( + rtl::OUString::createFromAscii( rEntry.maLangStr), + rtl::OUString::createFromAscii( rEntry.maCountry), + rtl::OUString()); +} + +// ----------------------------------------------------------------------- + +// static +rtl::OUString MsLangId::convertLanguageToIsoString( LanguageType nLang, + sal_Unicode cSep ) +{ + rtl::OUString aLangStr; + rtl::OUString aCountry; + convertLanguageToIsoNames( nLang, aLangStr, aCountry ); + if ( aCountry.getLength() ) + { + rtl::OUStringBuffer aBuf( aLangStr); + aBuf.append( cSep ); + aBuf.append( aCountry ); + return aBuf.makeStringAndClear(); + } + else + return aLangStr; +} + +// ----------------------------------------------------------------------- + +// static +rtl::OString MsLangId::convertLanguageToIsoByteString( LanguageType nLang, + sal_Char cSep ) +{ + rtl::OString aLangStr; + rtl::OString aCountry; + convertLanguageToIsoNames( nLang, aLangStr, aCountry ); + if ( aCountry.getLength() ) + { + rtl::OStringBuffer aBuf( aLangStr); + aBuf.append( cSep ); + aBuf.append( aCountry ); + return aBuf.makeStringAndClear(); + } + return aLangStr; +} + +// ======================================================================= + +// static +LanguageType MsLangId::convertIsoNamesToLanguage( const rtl::OUString& rLang, + const rtl::OUString& rCountry ) +{ + // language is lower case in table + rtl::OUString aLowerLang = rLang.toAsciiLowerCase(); + // country is upper case in table + rtl::OUString aUpperCountry = rCountry.toAsciiUpperCase(); + + // first look for exact match + const IsoLangEntry* pFirstLang = NULL; + const IsoLangEntry* pEntry = aImplIsoLangEntries; + do + { + if ( aLowerLang.equalsAscii( pEntry->maLangStr ) ) + { + if ( !aUpperCountry.getLength() || + aUpperCountry.equalsAscii( pEntry->maCountry ) ) + return pEntry->mnLang; + if ( !pFirstLang ) + pFirstLang = pEntry; + else if ( !*pEntry->maCountry ) + pFirstLang = pEntry; + } + ++pEntry; + } + while ( pEntry->mnLang != LANGUAGE_DONTKNOW ); + + // some eng countries should be mapped to a specific english language + if ( aLowerLang.equalsAscii( "en" ) ) + { + const IsoLangEngEntry* pEngEntry = aImplIsoLangEngEntries; + do + { + if ( aUpperCountry.equalsAscii( pEngEntry->maCountry ) ) + return pEngEntry->mnLang; + ++pEngEntry; + } + while ( pEngEntry->mnLang != LANGUAGE_DONTKNOW ); + } + + // test for specific languages which are not used standard ISO 3166 codes + const IsoLangNoneStdEntry* pNoneStdEntry = aImplIsoNoneStdLangEntries; + do + { + if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) + { + // The countries in this table are not all in upper case + if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) + return pNoneStdEntry->mnLang; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + pNoneStdEntry = aImplIsoNoneStdLangEntries2; + do + { + if ( aLowerLang.equalsAscii( pNoneStdEntry->maLangStr ) ) + { + // The countries in this table are not all in upper case + if ( aUpperCountry.equalsIgnoreAsciiCaseAscii( pNoneStdEntry->maCountry ) ) + return pNoneStdEntry->mnLang; + } + ++pNoneStdEntry; + } + while ( pNoneStdEntry->mnLang != LANGUAGE_DONTKNOW ); + + // If the language is correct, than we return the default language + if ( pFirstLang ) + return pFirstLang->mnLang; + + // if only the country is set, look for any entry matching the country + // (to allow reading country and language in separate steps, in any order) + if ( rCountry.getLength() && !rLang.getLength() ) + { + const IsoLangEntry* pEntry2 = aImplIsoLangEntries; + do + { + if ( aUpperCountry.equalsAscii( pEntry2->maCountry ) ) + return pEntry2->mnLang; + ++pEntry2; + } + while ( pEntry2->mnLang != LANGUAGE_DONTKNOW ); + + aLowerLang = aUpperCountry.toAsciiLowerCase(); + } + + // Now look for all other definitions, which are not standard + const IsoLangOtherEntry* pOtherEntry = aImplOtherEntries; + do + { + if ( aLowerLang.equalsAscii( pOtherEntry->mpLangStr ) ) + return pOtherEntry->mnLang; + ++pOtherEntry; + } + while ( pOtherEntry->mnLang != LANGUAGE_DONTKNOW ); + + return LANGUAGE_DONTKNOW; +} + +// ----------------------------------------------------------------------- + +// static +LanguageType MsLangId::convertIsoNamesToLanguage( const rtl::OString& rLang, + const rtl::OString& rCountry ) +{ + rtl::OUString aLang = OStringToOUString( rLang, RTL_TEXTENCODING_ASCII_US); + rtl::OUString aCountry = OStringToOUString( rCountry, RTL_TEXTENCODING_ASCII_US); + return convertIsoNamesToLanguage( aLang, aCountry); +} + +// ----------------------------------------------------------------------- + +// static +LanguageType MsLangId::convertIsoStringToLanguage( + const rtl::OUString& rString, sal_Unicode cSep ) +{ + rtl::OUString aLang; + rtl::OUString aCountry; + sal_Int32 nSepPos = rString.indexOf( cSep ); + if ( nSepPos >= 0 ) + { + aLang = rString.copy( 0, nSepPos ); + aCountry = rString.copy( nSepPos+1 ); + } + else + aLang = rString; + + return convertIsoNamesToLanguage( aLang, aCountry ); +} + +// ----------------------------------------------------------------------- + +// static +LanguageType MsLangId::convertIsoByteStringToLanguage( + const rtl::OString& rString, sal_Char cSep ) +{ + rtl::OString aLang; + rtl::OString aCountry; + sal_Int32 nSepPos = rString.indexOf( cSep ); + if ( nSepPos >= 0 ) + { + aLang = rString.copy( 0, nSepPos ); + aCountry = rString.copy( nSepPos+1 ); + } + else + aLang = rString; + + return convertIsoNamesToLanguage( aLang, aCountry ); +} + +// ----------------------------------------------------------------------- + +struct IsoLangGLIBCModifiersEntry +{ + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[3]; + sal_Char maAtString[9]; +}; + +static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] = +{ + // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier + { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" }, + { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia + { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro + { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" }, + { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" }, + { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" }, + { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table +}; + +// convert a unix locale string into LanguageType + +// static +LanguageType MsLangId::convertUnxByteStringToLanguage( + const rtl::OString& rString ) +{ + rtl::OString aLang; + rtl::OString aCountry; + rtl::OString aAtString; + + sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' ); + sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' ); + sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' ); + + if (nCountrySepPos < 0) + nCountrySepPos = nAtPos; + if (nCountrySepPos < 0) + nCountrySepPos = rString.getLength(); + + if (nAtPos >= 0) + aAtString = rString.copy( nAtPos+1 ); + + if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos)) + || ((nLangSepPos < 0)) ) + { + // eg. "el.sun_eu_greek", "tchinese", "es.ISO8859-15" + aLang = rString.copy( 0, nCountrySepPos ); + } + else if ( nLangSepPos >= 0 ) + { + // well formed iso names like "en_US.UTF-8", "sh_BA.ISO8859-2@bosnia" + aLang = rString.copy( 0, nLangSepPos ); + aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1); + } + + // if there is a glibc modifier, first look for exact match in modifier table + if (aAtString.getLength()) + { + // language is lower case in table + rtl::OString aLowerLang = aLang.toAsciiLowerCase(); + // country is upper case in table + rtl::OString aUpperCountry = aCountry.toAsciiUpperCase(); + const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries; + do + { + if (( aLowerLang.equals( pGLIBCModifiersEntry->maLangStr ) ) && + ( aAtString.equals( pGLIBCModifiersEntry->maAtString ) )) + { + if ( !aUpperCountry.getLength() || + aUpperCountry.equals( pGLIBCModifiersEntry->maCountry ) ) + { + return pGLIBCModifiersEntry->mnLang; + } + } + ++pGLIBCModifiersEntry; + } + while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW ); + } + + return convertIsoNamesToLanguage( aLang, aCountry ); +} + +// ----------------------------------------------------------------------- +// pass one IsoLangEntry to the outer world of the resource compiler + +// static +const MsLangId::IsoLangEntry* MsLangId::getIsoLangEntry( size_t nIndex ) +{ + if (nIndex < sizeof( aImplIsoLangEntries) / sizeof( IsoLangEntry)) + return &aImplIsoLangEntries[ nIndex]; + return 0; +} diff --git a/i18npool/source/isolang/langid.pl b/i18npool/source/isolang/langid.pl new file mode 100755 index 000000000000..8035178b7bb5 --- /dev/null +++ b/i18npool/source/isolang/langid.pl @@ -0,0 +1,424 @@ +: # -*- perl -*- vim: ft=perl +eval 'exec perl -w -S $0 ${1+"$@"}' +if 0; +#************************************************************************* +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#************************************************************************* + +# See Usage() below or invoke without arguments for short instructions. +# For long instructions use the source, Luke ;-) + +use strict; + +sub Usage() +{ + print STDERR + "\n", + "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", + "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", + "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n", + "msi-encodinglist.txt\n\n", + + "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", + + "A language string will be used as a generic string match in all searched files.\n", + "You may enclose the language string in word delimiters,\n", + "e.g. \\blanguage_german\\b for a specific match.\n", + "If the language string expression matches more than one define,\n", + "e.g. as in 'german', all matching defines will be processed.\n", + "If the language string does not match a define or an identifier in\n", + "langtab.src, a generic string match of the listbox entries will be tried.\n\n", + + "Numeric values of LangID,primarylanguage,sublanguage can be given\n", + "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n", + "The exact language_define of an exact match will be used in remaining lookups.\n\n", + + "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n", + "for example: 'en-US' or 'de-' or '-CH',\n", + "xx and YY can be given case insensitive, will be lowered-uppered internally,\n", + "and xx and YY themselfs may be regular expressions.\n", + "Also here a list of matches will be processed.\n\n", + + "If option --single is given, only the first match will be processed.\n\n"; +} + +my $SOLARVERSION = $ENV{"SOLARVERSION"}; +my $INPATH = $ENV{"INPATH"}; +my $SRC_ROOT = $ENV{"SRC_ROOT"}; +my $UPDMINOREXT = $ENV{"UPDMINOREXT"}; +if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT)) +{ + print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n"; + Usage(); + exit 1; +} +if (!defined($UPDMINOREXT)) { + $UPDMINOREXT = ''; +} +my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT"; + +my $LANGUAGE_MASK_PRIMARY = 0x03ff; + +sub getPrimaryLanguage($) +{ + my($lcid) = @_; + return $lcid & $LANGUAGE_MASK_PRIMARY; +} + +sub getSubLanguage($) +{ + my($lcid) = @_; + return $lcid >> 10; +} + +sub makeLangID($$) +{ + my( $sub, $pri) = @_; + return ($sub << 10) | $pri; +} + + +sub grepFile($$$$@) +{ + my( $regex, $path, $module, $name, @addregex) = @_; + my @result; + my $found = 0; + my $areopen = 0; + my $arecloser = ''; + my $file; + # Try module under current working directory first to catch local + # modifications. A Not yet delivered lang.h is a special case. + if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") { + $file = "./$module/inc/i18npool/lang.h"; } + else { + $file = "./$module/$name"; } + if (!($found = open( IN, $file))) + { + # Then with the given path. + $file = "$path/$module/$name"; + if (!($found = open( IN, $file))) + { + print "No $file\n"; + $file = "$path/$module.lnk/$name"; + if (!($found = open( IN, $file))) { + print "No $file.\n"; + $file = "$path/$module.link/$name"; + if (!($found = open( IN, $file))) { + print "No $file either.\n"; } + } + } + } + if ($found) + { + $found = 0; + while (my $line = <IN>) + { + if ($line =~ /$regex/) + { + if (!$found) + { + $found = 1; + print "$file:\n"; + } + chomp( $line); + print "$line\n"; + push( @result, $line); + } + elsif (@addregex) + { + # By convention first element is opener, second element is closer. + if (!$areopen) + { + if ($line =~ /$addregex[0]/) + { + $areopen = 1; + $arecloser = $addregex[1]; + } + } + if ($areopen) + { + for (my $i = 2; $i < @addregex; ++$i) + { + if ($line =~ /$addregex[$i]/) + { + if (!$found) + { + $found = 1; + print "$file:\n"; + } + chomp( $line); + print "$line\n"; + push( @result, $line); + } + } + if ($line =~ /$arecloser/) + { + $areopen = 0; + } + } + } + } + close( IN); + } + if (!$found) { + print "Not found in $file\n"; + #print "Not found in $file for $regex @addregex\n"; + } + return @result; +} + + +sub main() +{ + my( $lcid, @parts, $grepdef, $options, $single); + $grepdef = 0; + $single = 0; + for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options) + { + if ($ARGV[$options] eq '--single') { $single = 1; } + else { print "Unknown option: $ARGV[$options]\n"; } + } + if (@ARGV == 1 + $options) + { + # 0x hex, 0b bin, 0 oct + if ($ARGV[$options] =~ /^0/) { + $lcid = oct( $ARGV[0]); } + elsif ($ARGV[$options] =~ /^[0-9]/) { + $lcid = $ARGV[$options]; } + else + { + $grepdef = $ARGV[$options]; + $lcid = 0; + } + $parts[0] = getPrimaryLanguage( $lcid); + $parts[1] = getSubLanguage( $lcid); + } + elsif (@ARGV == 2 + $options) + { + for (my $i = $options; $i < 2 + $options; ++$i) + { + if ($ARGV[$i] =~ /^0/) { + $parts[$i] = oct( $ARGV[$i]); } + else { + $parts[$i] = $ARGV[$i]; } + } + $lcid = makeLangID( $parts[1], $parts[0]); + } + else + { + Usage(); + return 1; + } + my $modifier = "(?i)"; + my (@resultlist, @greplist, $result); + # If no string was given on the command line, but value(s) were, lookup the + # LangID value to obtain the define identifier. + if ($grepdef) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + @resultlist = grepFile( + $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, + $SOLENVINC, "i18npool", "lang.h", ()); + } + else + { + printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid, + $lcid, $parts[0], $parts[1]); + my $buf = sprintf( "0x%04X", $lcid); + @resultlist = grepFile( + '^\s*#\s*define\s+\w+\s+' . $buf, + $SOLENVINC, "i18npool", "lang.h", ()); + } + for $result (@resultlist) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) + { + push( @greplist, '\b' . $1 . '\b'); + $modifier = ""; # complete identifier now case sensitive + if ($single) { + last; } + } + } + # If the string given is of the form xx-yy lookup a language,country pair + # to obtain the define identifier. xx and yy themselfs may be regexps. + # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy' + if ($grepdef =~ /^(.*)-$/) { + $grepdef = $1 . "-.*"; } + if ($grepdef =~ /^-(.*)$/) { + $grepdef = ".*-" . $1; } + if ($grepdef =~ /^(.*)-(.*)$/) + { + my $lang = $1; + my $coun = $2; + $lang = lc($lang); + $coun = uc($coun); + # { LANGUAGE_AFRIKAANS, "af", "ZA" }, + @resultlist = grepFile( + '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,', + "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); + for $result (@resultlist) + { + if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) + { + push( @greplist, '\b' . $1 . '\b'); + $modifier = ""; # complete identifier now case sensitive + if ($single) { + last; } + } + } + $grepdef = 0; + } + if (!@greplist && $grepdef) { + push( @greplist, $grepdef); } + for $grepdef (@greplist) + { + print "\nUsing: " . $grepdef . "\n"; + + # Decimal LCID, was needed for Langpack.ulf but isn't used anymore, + # keep just in case we'd need it again. + # #define LANGUAGE_AFRIKAANS 0x0436 + @resultlist = grepFile( + $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, + $SOLENVINC, "i18npool", "lang.h", ()); + my @lcidlist; + for $result (@resultlist) + { + # #define LANGUAGE_AFRIKAANS 0x0436 + if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) + { + push( @lcidlist, oct( $2)); + } + } + + # { LANGUAGE_AFRIKAANS, "af", "ZA" }, + @resultlist = grepFile( + $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,', + "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); + + my @langcoungreplist; + for $result (@resultlist) + { + if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) + { + my $lang = $1; + my $coun = $2; + my $loca; + if ($coun) + { + $loca = $lang . "_" . $coun; + push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); + } + else + { + $loca = $lang; + $coun = ""; + push( @langcoungreplist, '\b' . $lang . '\b'); + } + my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; + my $found; + if (!($found = open( LD, $file))) + { + $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml"; + if (!($found = open( LD, $file))) + { + $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml"; + $found = open( LD, $file); + } + } + if ($found) + { + print "Found $file:\n"; + my $on = 0; + while (my $line = <LD>) + { + if ($line =~ /<(Language|Country)>/) { + $on = 1; } + if ($on) { + print $line; } + if ($line =~ /<\/(Language|Country)>/) { + $on = 0; } + } + close( LD); + } + else { + print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; } + } + } + + # case LANGUAGE_ARABIC: + grepFile( + $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:', + "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ()); + + # With CWS 'langstatusbar' the language listbox resource file gets a new location. + my $module = "svx"; + my $name = "source/dialog/langtab.src"; + if (!(-e "$SRC_ROOT/$module/$name")) { + $module = "svtools"; + $name = "source/misc/langtab.src"; + } + # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ; + # lookup define + @resultlist = grepFile( + $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;', + "$SRC_ROOT", $module, $name, ()); + # lookup string + if (!@resultlist) { + grepFile( + $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;', + "$SRC_ROOT", $module, $name, ()); } + + for my $langcoun (@langcoungreplist) + { + # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu"; + grepFile( + '^\s*Name\s*\(' . $langcoun . '\)\s*=', + "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); + + # completelangiso=af ar as-IN ... zu + grepFile( + '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', + "$SRC_ROOT", "solenv", "inc/postset.mk", + # needs a duplicated pair of backslashes to produce a literal \\ + ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); + + # @noMSLocaleLangs = ( "br", "bs", ... ) + grepFile( + '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '', + "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm", + ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"')); + + # af 1252 1078 # Afrikaans + grepFile( + '^\s*' . $langcoun . '', + "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ()); + } + } + return 0; +} + +main(); diff --git a/i18npool/source/isolang/lcid.awk b/i18npool/source/isolang/lcid.awk new file mode 100644 index 000000000000..b8209e7a585e --- /dev/null +++ b/i18npool/source/isolang/lcid.awk @@ -0,0 +1,171 @@ +#!/usr/bin/awk -f +# +# Utility to compare MS-LANGID definitions with those defined in ../../inc/i18npool/lang.h +# Run in i18npool/source/isolang +# +# outputs new #define LANGUAGE_... 0x... and also some commented out substrings +# that were matched in already existing defines. +# +# ATTENTION! The sed filter in the command line examples below assures that a +# '|' border is drawn by html2text in data tables, and nowhere else, on which +# this awk script relies. This script also heavily relies on the column layout +# encountered. Should MS decide to change their layout or their CSS names +# ("data..."), this would probably break. Should html2text decide that the last +# border="..." attribute encountered wins instead of the first, this may break +# also. +# +# sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' +# +# After html2text best if file cleaned up to _only_ contain the table entries, +# but not necessary, entries are filtered. Check output. +# +# Expects input from the saved page of one of +# +# (1) +# http://www.microsoft.com/globaldev/reference/lcid-all.mspx +# filtered through ``html2text -nobs ...'', generated table: +# blank,name,hex,dec,blank fields: +# |Afrikaans_-_South_Africa___|0436___|1078___| +# +# complete command line: +# lynx -dump -source http://www.microsoft.com/globaldev/reference/lcid-all.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# +# (2) +# http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx +# filtered through ``html2text -nobs ...'', generated table: +# blank,name,hex,dec,inputlocales,collection,blank fields: +# |Afrikaans |0436 |1078 |0436:00000409, |Basic | +# +# complete command line: +# lynx -dump -source http://www.microsoft.com/globaldev/reference/winxp/xp-lcid.mspx | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# +# (3) +# http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp +# filtered through ``html2text -nobs ...'', generated table: +# blank,hex,locale,name,blank fields: +# |0x0436___|af-ZA___|Afrikaans_(South_Africa)___| +# +# complete command line: +# lynx -dump -source http://msdn.microsoft.com/library/en-us/intl/nls_238z.asp | sed -e 's/|/,/g; s/<TABLE/<table/g; /<table/\!b; s/\(<table[^>]*\)\(border\|BORDER\)="[0-9]*"/\1/g; s/\(<table\)\([^>]*\(class\|CLASS\)="data\)/\1 border="1"\2/g' | html2text -nobs -width 234 | awk -f lcid.awk >outfile +# +# Author: Eike Rathke <erack@sun.com>, <er@openoffice.org> +# + +BEGIN { + while ((getline < "../../inc/i18npool/lang.h") > 0) + { + if ($0 ~ /^#define[ ]*LANGUAGE_[_A-Za-z0-9]*[ ]*0x[0-9a-fA-F]/) + { + # lang[HEX]=NAME + lang[toupper(substr($3,3))] = toupper($2) + #print substr($3,3) "=" $2 + } + } + # html2text table follows + FS = "\|" + filetype = 0 + lcid_all = 1 + xp_lcid = 2 + nls_238z = 3 + filetypename[filetype] = "unknown" + filetypename[lcid_all] = "lcid_all" + filetypename[xp_lcid] = "xp_lcid" + filetypename[nls_238z] = "nls_238z" + namefield[lcid_all] = 2 + namefield[xp_lcid] = 2 + namefield[nls_238z] = 4 + hexfield[lcid_all] = 3 + hexfield[xp_lcid] = 3 + hexfield[nls_238z] = 2 + locfield[lcid_all] = 0 + locfield[xp_lcid] = 0 + locfield[nls_238z] = 3 +} + +(NF < 5) { next } + +!filetype { + if (NF == 5) + { + if ($2 ~ /^0x/) + filetype = nls_238z + else if ($2 ~ /^Afrikaans/) + filetype = lcid_all + } + else if (NF == 7) + filetype = xp_lcid + if (!filetype) + next + name = namefield[filetype] + hex = hexfield[filetype] + loc = locfield[filetype] +} + +{ + gsub( /^[^:]*:/, "", $name) + gsub( /\..*/, "", $name) + gsub( /(^[ _]+)|([ _]+$)/, "", $hex) + gsub( /(^[ _]+)|([ _]+$)/, "", $name) + if (loc) + gsub( /(^[ _]+)|([ _]+$)/, "", $loc) +} + +($hex ~ /^0x/) { $hex = substr( $hex, 3) } + +# if only 464 instead of 0464, make it match lang.h +(length($hex) < 4) { $hex = "0" $hex } + +($hex !~ /^[0-9a-fA-F][0-9a-fA-F]*$/) { filtered[$hex] = $0; next } + +# all[HEX]=string +{ all[toupper($hex)] = $name } + +(loc) { comment[toupper($hex)] = " /* " $loc " */" } + +# new hex: newlang[HEX]=string +!(toupper($hex) in lang) { newlang[toupper($hex)] = $name } + +END { + if (!filetype) + { + print "No file type recognized." >>"/dev/stderr" + exit(1) + } + print "// assuming " filetypename[filetype] " file" + # every new language + for (x in newlang) + { + printf( "xxxxxxx LANGUAGE_%-26s 0x%s%s\n", newlang[x], x, comment[x]) + n = split(newlang[x],arr,/[^A-Za-z0-9]/) + def = "" + for (i=1; i<=n; ++i) + { + if (length(arr[i])) + { + # each identifier word of the language name + if (def) + def = def "_" + aup = toupper(arr[i]) + def = def aup + for (l in lang) + { + # contained in already existing definitions? + if (lang[l] ~ aup) + printf( "// %-50s %s\n", arr[i] ": " lang[l], l) + } + } + } + printf( "#define LANGUAGE_%-26s 0x%s\n", def, x) + } + print "\n// --- reverse check follows ----------------------------------\n" + for (x in lang) + { + if (!(x in all)) + print "// not in input file: " x " " lang[x] + } + print "\n// --- filtered table entries follow (if any) -----------------\n" + for (x in filtered) + print "// filtered: " x " " filtered[x] +} diff --git a/i18npool/source/isolang/makefile.mk b/i18npool/source/isolang/makefile.mk new file mode 100644 index 000000000000..b8f01460f67b --- /dev/null +++ b/i18npool/source/isolang/makefile.mk @@ -0,0 +1,69 @@ +#************************************************************************* +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#************************************************************************* + +PRJ=..$/.. + +PRJNAME=i18npool +TARGET=i18nisolang + +# --- Settings ----------------------------------------------------- + +.INCLUDE : settings.mk +.INCLUDE : $(PRJ)$/version.mk +.INCLUDE : $(PRJ)$/util$/makefile.pmk + +# --- Files -------------------------------------------------------- + +SLOFILES= $(SLO)$/insys.obj \ + $(SLO)$/isolang.obj \ + $(SLO)$/mslangid.obj + +SHL1TARGET= $(ISOLANG_TARGET)$(ISOLANG_MAJOR)$(COMID) +SHL1IMPLIB= i$(ISOLANG_TARGET) + +DEF1DEPN= $(MISC)$/$(SHL1TARGET).flt +SHL1DEF= $(MISC)$/$(SHL1TARGET).def +DEF1NAME= $(SHL1TARGET) +DEFLIB1NAME= $(SHL1TARGET) + +SHL1OBJS= $(SLOFILES) + +LIB1TARGET= $(SLB)$/$(SHL1TARGET).lib +LIB1OBJFILES=$(SHL1OBJS) + +SHL1STDLIBS= \ + $(SALLIB) + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk + +$(MISC)$/$(SHL1TARGET).flt: makefile.mk + @echo ------------------------------ + @echo Making: $@ + @echo CLEAR_THE_FILE > $@ + @echo __CT >> $@ diff --git a/i18npool/source/isolang/mslangid.cxx b/i18npool/source/isolang/mslangid.cxx new file mode 100644 index 000000000000..a03d24e6fcdb --- /dev/null +++ b/i18npool/source/isolang/mslangid.cxx @@ -0,0 +1,467 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_i18npool.hxx" +#include <sal/config.h> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <com/sun/star/i18n/ScriptType.hpp> + +#include "i18npool/mslangid.hxx" + + +LanguageType MsLangId::nConfiguredSystemLanguage = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredSystemUILanguage = LANGUAGE_SYSTEM; + +LanguageType MsLangId::nConfiguredWesternFallback = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredAsianFallback = LANGUAGE_SYSTEM; +LanguageType MsLangId::nConfiguredComplexFallback = LANGUAGE_SYSTEM; + +// static +void MsLangId::setConfiguredSystemLanguage( LanguageType nLang ) +{ + nConfiguredSystemLanguage = nLang; +} + + +// static +void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang ) +{ + nConfiguredSystemUILanguage = nLang; +} + +// static +void MsLangId::setConfiguredWesternFallback( LanguageType nLang ) +{ + nConfiguredWesternFallback = nLang; +} + +// static +void MsLangId::setConfiguredAsianFallback( LanguageType nLang ) +{ + nConfiguredAsianFallback = nLang; +} + +// static +void MsLangId::setConfiguredComplexFallback( LanguageType nLang ) +{ + nConfiguredComplexFallback = nLang; +} + +// static +inline LanguageType MsLangId::simplifySystemLanguages( LanguageType nLang ) +{ + switch (nLang) + { + case LANGUAGE_PROCESS_OR_USER_DEFAULT : + case LANGUAGE_SYSTEM_DEFAULT : + case LANGUAGE_SYSTEM : + nLang = LANGUAGE_SYSTEM; + break; + default: + ; // nothing + } + return nLang; +} + + +// static +LanguageType MsLangId::getRealLanguageWithoutConfig( LanguageType nLang ) +{ + switch (simplifySystemLanguages( nLang)) + { + case LANGUAGE_SYSTEM : + nLang = getSystemLanguage(); + break; + case LANGUAGE_NONE : + nLang = getSystemUILanguage(); + break; + default: + /* TODO: would this be useful here? */ + //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); + ; // nothing + } + if (nLang == LANGUAGE_DONTKNOW) + nLang = LANGUAGE_ENGLISH_US; + return nLang; +} + + +// static +LanguageType MsLangId::getRealLanguage( LanguageType nLang ) +{ + switch (simplifySystemLanguages( nLang)) + { + case LANGUAGE_SYSTEM : + if (nConfiguredSystemLanguage == LANGUAGE_SYSTEM) + nLang = getSystemLanguage(); + else + nLang = nConfiguredSystemLanguage; + break; + case LANGUAGE_NONE : + if (nConfiguredSystemUILanguage == LANGUAGE_SYSTEM) + nLang = getSystemUILanguage(); + else + nLang = nConfiguredSystemUILanguage; + break; + default: + /* TODO: would this be useful here? */ + //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); + ; // nothing + } + if (nLang == LANGUAGE_DONTKNOW) + nLang = LANGUAGE_ENGLISH_US; + return nLang; +} + + +// static +LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType ) +{ + if (nLang == LANGUAGE_NONE) + return nLang; + + nLang = getRealLanguage(nLang); + if (nType != ::com::sun::star::i18n::ScriptType::WEAK && getScriptType(nLang) != nType) + { + switch(nType) + { + case ::com::sun::star::i18n::ScriptType::ASIAN: + if (nConfiguredAsianFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_CHINESE_SIMPLIFIED; + else + nLang = nConfiguredAsianFallback; + break; + case ::com::sun::star::i18n::ScriptType::COMPLEX: + if (nConfiguredComplexFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_HINDI; + else + nLang = nConfiguredComplexFallback; + break; + default: + if (nConfiguredWesternFallback == LANGUAGE_SYSTEM) + nLang = LANGUAGE_ENGLISH_US; + else + nLang = nConfiguredWesternFallback; + break; + } + } + return nLang; +} + +// static +void MsLangId::convertLanguageToLocale( LanguageType nLang, + ::com::sun::star::lang::Locale & rLocale ) +{ + if (rLocale.Variant.getLength()) + rLocale.Variant = rtl::OUString(); + convertLanguageToIsoNames( nLang, rLocale.Language, rLocale.Country); +} + + +// static +::com::sun::star::lang::Locale MsLangId::convertLanguageToLocale( + LanguageType nLang, bool bResolveSystem ) +{ + ::com::sun::star::lang::Locale aLocale; + if (!bResolveSystem && simplifySystemLanguages( nLang) == LANGUAGE_SYSTEM) + ; // nothing => empty locale + else + { + // Still resolve LANGUAGE_DONTKNOW if resolving is not requested, + // but not LANGUAGE_NONE or others. + if (bResolveSystem || nLang == LANGUAGE_DONTKNOW) + nLang = MsLangId::getRealLanguage( nLang); + convertLanguageToLocale( nLang, aLocale); + } + return aLocale; +} + + +// static +LanguageType MsLangId::convertLocaleToLanguage( + const ::com::sun::star::lang::Locale& rLocale ) +{ + // empty language => LANGUAGE_SYSTEM + if (rLocale.Language.getLength() == 0) + return LANGUAGE_SYSTEM; + + LanguageType nRet = convertIsoNamesToLanguage( rLocale.Language, + rLocale.Country); + if (nRet == LANGUAGE_DONTKNOW) + nRet = LANGUAGE_SYSTEM; + + return nRet; +} + + +// static +LanguageType MsLangId::convertLocaleToLanguageWithFallback( + const ::com::sun::star::lang::Locale & rLocale ) +{ + // empty language => LANGUAGE_SYSTEM + if (rLocale.Language.getLength() == 0) + return lookupFallbackLanguage( LANGUAGE_SYSTEM); + + return lookupFallbackLanguage( rLocale); +} + + +// static +::com::sun::star::lang::Locale MsLangId::convertLanguageToLocaleWithFallback( + LanguageType nLang ) +{ + return lookupFallbackLocale( MsLangId::getRealLanguage( nLang)); +} + + +// static +::com::sun::star::lang::Locale MsLangId::getFallbackLocale( + const ::com::sun::star::lang::Locale & rLocale ) +{ + // empty language => LANGUAGE_SYSTEM + if (rLocale.Language.getLength() == 0) + return convertLanguageToLocaleWithFallback( LANGUAGE_SYSTEM); + + return lookupFallbackLocale( rLocale); +} + + +// static +LanguageType MsLangId::getFallbackLanguage( LanguageType nLang ) +{ + return lookupFallbackLanguage( MsLangId::getRealLanguage( nLang)); +} + + +// static +bool MsLangId::isRightToLeft( LanguageType nLang ) +{ + switch( nLang & LANGUAGE_MASK_PRIMARY ) + { + case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_HEBREW & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_YIDDISH & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_URDU & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_FARSI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_KASHMIRI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_SINDHI & LANGUAGE_MASK_PRIMARY : + case LANGUAGE_UIGHUR_CHINA & LANGUAGE_MASK_PRIMARY : + return true; + + default: + break; + } + return false; +} + + +// static +bool MsLangId::hasForbiddenCharacters( LanguageType nLang ) +{ + switch (nLang & LANGUAGE_MASK_PRIMARY) + { + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_JAPANESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KOREAN & LANGUAGE_MASK_PRIMARY: + return true; + default: + break; + } + return false; +} + + +// static +bool MsLangId::needsSequenceChecking( LanguageType nLang ) +{ + switch (nLang & LANGUAGE_MASK_PRIMARY) + { + case LANGUAGE_BURMESE & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_KHMER & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_LAO & LANGUAGE_MASK_PRIMARY: + case LANGUAGE_THAI & LANGUAGE_MASK_PRIMARY: + return true; + default: + break; + } + return false; +} + + +// static +sal_Int16 MsLangId::getScriptType( LanguageType nLang ) +{ + sal_Int16 nScript; + switch( nLang ) + { + // CJK + // all LANGUAGE_CHINESE_... are caught below + case LANGUAGE_JAPANESE: + case LANGUAGE_KOREAN: + case LANGUAGE_KOREAN_JOHAB: + case LANGUAGE_USER_KOREAN_NORTH: + nScript = ::com::sun::star::i18n::ScriptType::ASIAN; + break; + + // CTL + // all LANGUAGE_ARABIC_... are caught below + case LANGUAGE_AMHARIC_ETHIOPIA: + case LANGUAGE_ASSAMESE: + case LANGUAGE_BENGALI: + case LANGUAGE_BENGALI_BANGLADESH: + case LANGUAGE_BURMESE: + case LANGUAGE_FARSI: + case LANGUAGE_HEBREW: + case LANGUAGE_YIDDISH: + case LANGUAGE_MARATHI: + case LANGUAGE_PUNJABI: + case LANGUAGE_GUJARATI: + case LANGUAGE_HINDI: + case LANGUAGE_KANNADA: + case LANGUAGE_KASHMIRI: + case LANGUAGE_KASHMIRI_INDIA: + case LANGUAGE_KHMER: + case LANGUAGE_LAO: + case LANGUAGE_MALAYALAM: + case LANGUAGE_MANIPURI: + case LANGUAGE_MONGOLIAN_MONGOLIAN: + case LANGUAGE_NEPALI: + case LANGUAGE_NEPALI_INDIA: + case LANGUAGE_ORIYA: + case LANGUAGE_SANSKRIT: + case LANGUAGE_SINDHI: + case LANGUAGE_SINDHI_PAKISTAN: + case LANGUAGE_SINHALESE_SRI_LANKA: + case LANGUAGE_SYRIAC: + case LANGUAGE_TAMIL: + case LANGUAGE_TELUGU: + case LANGUAGE_THAI: + case LANGUAGE_TIBETAN: + case LANGUAGE_DZONGKHA: + case LANGUAGE_URDU: + case LANGUAGE_URDU_PAKISTAN: + case LANGUAGE_URDU_INDIA: + case LANGUAGE_USER_KURDISH_IRAQ: + case LANGUAGE_USER_KURDISH_IRAN: + case LANGUAGE_DHIVEHI: + case LANGUAGE_USER_BODO_INDIA: + case LANGUAGE_USER_DOGRI_INDIA: + case LANGUAGE_USER_MAITHILI_INDIA: + case LANGUAGE_UIGHUR_CHINA: + nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; + break; + +// currently not knowing scripttype - defaulted to LATIN: +/* +#define LANGUAGE_ARMENIAN 0x042B +#define LANGUAGE_INDONESIAN 0x0421 +#define LANGUAGE_KAZAK 0x043F +#define LANGUAGE_KONKANI 0x0457 +#define LANGUAGE_MACEDONIAN 0x042F +#define LANGUAGE_TATAR 0x0444 +*/ + + default: + switch ( nLang & LANGUAGE_MASK_PRIMARY ) + { + // CJK catcher + case LANGUAGE_CHINESE & LANGUAGE_MASK_PRIMARY: + nScript = ::com::sun::star::i18n::ScriptType::ASIAN; + break; + // CTL catcher + case LANGUAGE_ARABIC_SAUDI_ARABIA & LANGUAGE_MASK_PRIMARY: + nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; + break; + // Western (actually not necessarily Latin but also Cyrillic, for example) + default: + nScript = ::com::sun::star::i18n::ScriptType::LATIN; + } + break; + } + return nScript; +} + + +// static +LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang ) +{ + switch (nLang) + { + default: + break; // nothing + case LANGUAGE_OBSOLETE_USER_LATIN: + nLang = LANGUAGE_LATIN; + break; + case LANGUAGE_OBSOLETE_USER_MAORI: + nLang = LANGUAGE_MAORI_NEW_ZEALAND; + break; + case LANGUAGE_OBSOLETE_USER_KINYARWANDA: + nLang = LANGUAGE_KINYARWANDA_RWANDA; + break; + case LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN: + nLang = LANGUAGE_UPPER_SORBIAN_GERMANY; + break; + case LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN: + nLang = LANGUAGE_LOWER_SORBIAN_GERMANY; + break; + case LANGUAGE_OBSOLETE_USER_OCCITAN: + nLang = LANGUAGE_OCCITAN_FRANCE; + break; + case LANGUAGE_OBSOLETE_USER_BRETON: + nLang = LANGUAGE_BRETON_FRANCE; + break; + case LANGUAGE_OBSOLETE_USER_KALAALLISUT: + nLang = LANGUAGE_KALAALLISUT_GREENLAND; + break; + case LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH: + nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG; + break; + + // The following are not strictly obsolete but should be mapped to a + // replacement locale when encountered. + + // no_NO is an alias for nb_NO + case LANGUAGE_NORWEGIAN: + nLang = LANGUAGE_NORWEGIAN_BOKMAL; + break; + + // #i94435# A Spanish variant that differs only in collation details we + // do not support. + case LANGUAGE_SPANISH_DATED: + nLang = LANGUAGE_SPANISH_MODERN; + break; + + // Do not use ca-XV for document content. + /* TODO: remove in case we implement BCP47 language tags. */ + case LANGUAGE_USER_CATALAN_VALENCIAN: + nLang = LANGUAGE_CATALAN; + break; + } + return nLang; +} |