From 6a826ddc4ee40a9727131cd4b13365bf6ae16319 Mon Sep 17 00:00:00 2001 From: Eike Rathke Date: Mon, 18 Nov 2013 21:07:43 +0100 Subject: cleaned up ISO code usage for Kurdish, fdo#63460 * instead of the 'ku' macrolanguage code use proper ISO 639-3 codes and use 'Latn' script with 'kmr' * use MS-LCID 0x0492 for Central Kurdish (Iraq) [ckb-IQ] * added Southern Kurdish (Iraq) [sdh-IQ] Change-Id: Iaee8be98d0659a0e7bbf041e60025dd1f771066f --- i18nlangtag/qa/cppunit/test_languagetag.cxx | 8 +++++ i18nlangtag/source/isolang/isolang.cxx | 21 +++++++++---- i18nlangtag/source/isolang/mslangid.cxx | 20 +++++++++++- i18nlangtag/source/languagetag/languagetag.cxx | 43 ++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 7 deletions(-) (limited to 'i18nlangtag') diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx index 17217fd08497..8946969b2562 100644 --- a/i18nlangtag/qa/cppunit/test_languagetag.cxx +++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx @@ -625,6 +625,14 @@ static bool checkMapping( const OUString rStr1, const OUString& rStr2 ) if (rStr1 == "yi-Hebr-IL" ) return rStr2 == "yi-IL"; if (rStr1 == "ha-NG" ) return rStr2 == "ha-Latn-NG"; if (rStr1 == "ha-GH" ) return rStr2 == "ha-Latn-GH"; + if (rStr1 == "ku-Arab-IQ" ) return rStr2 == "ckb-IQ"; + if (rStr1 == "ku-Arab" ) return rStr2 == "ckb"; + if (rStr1 == "kmr-TR" ) return rStr2 == "kmr-Latn-TR"; + if (rStr1 == "ku-TR" ) return rStr2 == "kmr-Latn-TR"; + if (rStr1 == "kmr-SY" ) return rStr2 == "kmr-Latn-SY"; + if (rStr1 == "ku-SY" ) return rStr2 == "kmr-Latn-SY"; + if (rStr1 == "ku-IQ" ) return rStr2 == "ckb-IQ"; + if (rStr1 == "ku-IR" ) return rStr2 == "ckb-IR"; return rStr1 == rStr2; } diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx index f9f34a54949e..5505540730ab 100644 --- a/i18nlangtag/source/isolang/isolang.cxx +++ b/i18nlangtag/source/isolang/isolang.cxx @@ -506,10 +506,17 @@ static IsoLanguageCountryEntry const aImplIsoLangEntries[] = { LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN,"dsb", "DE", 0 }, { LANGUAGE_OCCITAN_FRANCE, "oc", "FR", 0 }, { LANGUAGE_OBSOLETE_USER_OCCITAN, "oc", "FR", 0 }, - { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR", 0 }, - { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY", 0 }, - { LANGUAGE_USER_KURDISH_IRAQ, "ku", "IQ", 0 }, - { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR", 0 }, + { LANGUAGE_USER_KURDISH_TURKEY, "kmr", "TR", kSAME }, + { LANGUAGE_USER_KURDISH_TURKEY, "ku", "TR", kSAME }, + { LANGUAGE_USER_KURDISH_SYRIA, "kmr", "SY", kSAME }, + { LANGUAGE_USER_KURDISH_SYRIA, "ku", "SY", kSAME }, + { LANGUAGE_KURDISH_ARABIC_IRAQ, "ckb", "IQ", 0 }, + { LANGUAGE_KURDISH_ARABIC_IRAQ, "ku", "IQ", kSAME }, + { LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ, "ku", "IQ", LANGUAGE_KURDISH_ARABIC_IRAQ }, + { LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ, "sdh", "IQ", 0 }, + { LANGUAGE_USER_KURDISH_IRAN, "ckb", "IR", 0 }, + { LANGUAGE_USER_KURDISH_IRAN, "ku", "IR", kSAME }, + { LANGUAGE_KURDISH_ARABIC_LSO, "ckb", "" , 0 }, { LANGUAGE_USER_SARDINIAN, "sc", "IT", 0 }, // macrolanguage code { LANGUAGE_USER_SARDINIAN_CAMPIDANESE, "sro", "IT", 0 }, { LANGUAGE_USER_SARDINIAN_GALLURESE, "sdn", "IT", 0 }, @@ -697,8 +704,10 @@ static IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] = { LANGUAGE_LATIN_LSO, "la-Latn", "" , kSAME }, // MS, though Latn is suppress-script { LANGUAGE_TAI_NUA_CHINA, "tdd-Tale", "CN", 0 }, // MS reserved { LANGUAGE_LU_CHINA, "khb-Talu", "CN", 0 }, // MS reserved - { LANGUAGE_KURDISH_ARABIC_IRAQ, "ku-Arab", "IQ", 0 }, // macrolanguage code, MS - { LANGUAGE_KURDISH_ARABIC_LSO, "ku-Arab", "" , 0 }, // macrolanguage code + { LANGUAGE_KURDISH_ARABIC_IRAQ, "ku-Arab", "IQ", kSAME }, // macrolanguage code, MS + { LANGUAGE_KURDISH_ARABIC_LSO, "ku-Arab", "" , kSAME }, // macrolanguage code, MS + { LANGUAGE_USER_KURDISH_TURKEY, "kmr-Latn", "TR", 0 }, + { LANGUAGE_USER_KURDISH_SYRIA, "kmr-Latn", "SY", 0 }, { LANGUAGE_PUNJABI_PAKISTAN, "pnb-Arab", "PK", 0 }, { LANGUAGE_PUNJABI_ARABIC_LSO, "pnb-Arab", "" , 0 }, { LANGUAGE_PUNJABI_PAKISTAN, "pa-Arab", "PK", 0 }, // MS, incorrect diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx index 3964c6fd2ed2..d44256de4117 100644 --- a/i18nlangtag/source/isolang/mslangid.cxx +++ b/i18nlangtag/source/isolang/mslangid.cxx @@ -216,6 +216,18 @@ bool MsLangId::isRightToLeft( LanguageType nLang ) default: break; } + switch (nLang) + { + case LANGUAGE_USER_KURDISH_IRAN: + case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ: + case LANGUAGE_KURDISH_ARABIC_IRAQ: + case LANGUAGE_KURDISH_ARABIC_LSO: + case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ: + return true; + + default: + break; + } return false; } @@ -323,7 +335,10 @@ sal_Int16 MsLangId::getScriptType( LanguageType nLang ) case LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA: case LANGUAGE_MONGOLIAN_MONGOLIAN_LSO: case LANGUAGE_USER_KURDISH_IRAN: - case LANGUAGE_USER_KURDISH_IRAQ: + case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ: + case LANGUAGE_KURDISH_ARABIC_IRAQ: + case LANGUAGE_KURDISH_ARABIC_LSO: + case LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ: case LANGUAGE_USER_KYRGYZ_CHINA: nScript = ::com::sun::star::i18n::ScriptType::COMPLEX; break; @@ -507,6 +522,9 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo case LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO: nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO; break; + case LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ: + nLang = LANGUAGE_KURDISH_ARABIC_IRAQ; + break; // The following are not strictly obsolete but should be mapped to a // replacement locale when encountered. diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index b0a8f4d58517..60e8c1280f55 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -2073,6 +2073,11 @@ LanguageTag & LanguageTag::makeFallback() } +/* TODO: maybe this now could take advantage of the mnOverride field in + * isolang.cxx entries and search for kSAME instead of harcoded special + * fallbacks. Though iterating through those tables would be slower and even + * then there would be some special cases, but we wouldn't lack entries that + * were missed out. */ ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const { ::std::vector< OUString > aVec; @@ -2111,6 +2116,36 @@ LanguageTag & LanguageTag::makeFallback() aVec.insert( aVec.end(), aRep.begin(), aRep.end()); // Already includes 'ca' language fallback. } + else if (aLanguage == "ku") + { + if (aCountry == "TR" || aCountry == "SY") + { + aVec.push_back( "kmr-Latn-" + aCountry); + aVec.push_back( "kmr-" + aCountry); + aVec.push_back( "kmr-Latn"); + aVec.push_back( "kmr"); + aVec.push_back( aLanguage); + } + else if (aCountry == "IQ" || aCountry == "IR") + { + aVec.push_back( "ckb-" + aCountry); + aVec.push_back( "ckb"); + } + } + else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY")) + { + aVec.push_back( "ku-Latn-" + aCountry); + aVec.push_back( "ku-" + aCountry); + aVec.push_back( aLanguage); + aVec.push_back( "ku"); + } + else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR")) + { + aVec.push_back( "ku-Arab-" + aCountry); + aVec.push_back( "ku-" + aCountry); + aVec.push_back( aLanguage); + // not 'ku' only, that was used for Latin script + } else aVec.push_back( aLanguage); } @@ -2174,6 +2209,8 @@ LanguageTag & LanguageTag::makeFallback() } else if (aLanguage == "pi" && aScript == "Latn") aVec.push_back( "pli"); // a special case for Pali dictionary, see fdo#41599 + else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY")) + aVec.push_back( "ku-" + aCountry); } if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant) { @@ -2184,10 +2221,16 @@ LanguageTag & LanguageTag::makeFallback() aTmp = aLanguage + "-" + aScript; if (aTmp != maBcp47) aVec.push_back( aTmp); + // 'sh' actually denoted a script, so have it here instead of appended // at the end as language-only. if (aLanguage == "sr" && aScript == "Latn") aVec.push_back( "sh"); + else if (aLanguage == "ku" && aScript == "Arab") + aVec.push_back( "ckb"); + // 'ku' only denoted Latin script + else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty()) + aVec.push_back( "ku"); } bool bHaveLanguageVariant = false; if (!aCountry.isEmpty()) -- cgit v1.2.3