diff options
Diffstat (limited to 'i18npool/source/isolang')
-rw-r--r-- | i18npool/source/isolang/isolang.cxx | 61 | ||||
-rwxr-xr-x | i18npool/source/isolang/langid.pl | 55 | ||||
-rw-r--r-- | i18npool/source/isolang/mslangid.cxx | 10 |
3 files changed, 108 insertions, 18 deletions
diff --git a/i18npool/source/isolang/isolang.cxx b/i18npool/source/isolang/isolang.cxx index 9d80bf7050e6..357be80a69ea 100644 --- a/i18npool/source/isolang/isolang.cxx +++ b/i18npool/source/isolang/isolang.cxx @@ -287,8 +287,9 @@ static MsLangId::IsoLangEntry const aImplIsoLangEntries[] = { LANGUAGE_BELARUSIAN, "be", "BY" }, { LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default) { LANGUAGE_CATALAN, "ca", "AD" }, // Andorra - { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; old workaround for UI localization only, do not use in document content! Kept just in case.. - { LANGUAGE_USER_CATALAN_VALENCIAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; for UI localization, use in document content on own risk! + { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content! + { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content! +// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back. { LANGUAGE_FRENCH_CAMEROON, "fr", "CM" }, { LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" }, { LANGUAGE_FRENCH_HAITI, "fr", "HT" }, @@ -457,6 +458,9 @@ static MsLangId::IsoLangEntry const aImplIsoLangEntries[] = { LANGUAGE_USER_TAHITIAN, "ty", "PF" }, { LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" }, { LANGUAGE_USER_BAFIA, "ksf", "CM" }, + { LANGUAGE_USER_GIKUYU, "ki", "KE" }, + { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" }, + { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" }, { LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information { LANGUAGE_DONTKNOW, "", "" } // marks end of table }; @@ -1005,6 +1009,28 @@ LanguageType MsLangId::convertIsoByteStringToLanguage( } // ----------------------------------------------------------------------- + +struct IsoLangGLIBCModifiersEntry +{ + LanguageType mnLang; + sal_Char maLangStr[4]; + sal_Char maCountry[3]; + sal_Char maAtString[9]; +}; + +static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] = +{ + // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier + { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" }, + { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia + { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro + { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro + { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" }, + { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" }, + { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" }, + { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table +}; + // convert a unix locale string into LanguageType // static @@ -1013,15 +1039,20 @@ LanguageType MsLangId::convertUnxByteStringToLanguage( { rtl::OString aLang; rtl::OString aCountry; + rtl::OString aAtString; sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' ); sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' ); + sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' ); if (nCountrySepPos < 0) - nCountrySepPos = rString.indexOf( (sal_Char)'@' ); + nCountrySepPos = nAtPos; if (nCountrySepPos < 0) nCountrySepPos = rString.getLength(); + if (nAtPos >= 0) + aAtString = rString.copy( nAtPos+1 ); + if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos)) || ((nLangSepPos < 0)) ) { @@ -1035,6 +1066,30 @@ LanguageType MsLangId::convertUnxByteStringToLanguage( aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1); } + // if there is a glibc modifier, first look for exact match in modifier table + if (aAtString.getLength()) + { + // language is lower case in table + rtl::OString aLowerLang = aLang.toAsciiLowerCase(); + // country is upper case in table + rtl::OString aUpperCountry = aCountry.toAsciiUpperCase(); + const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries; + do + { + if (( aLowerLang.equals( pGLIBCModifiersEntry->maLangStr ) ) && + ( aAtString.equals( pGLIBCModifiersEntry->maAtString ) )) + { + if ( !aUpperCountry.getLength() || + aUpperCountry.equals( pGLIBCModifiersEntry->maCountry ) ) + { + return pGLIBCModifiersEntry->mnLang; + } + } + ++pGLIBCModifiersEntry; + } + while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW ); + } + return convertIsoNamesToLanguage( aLang, aCountry ); } diff --git a/i18npool/source/isolang/langid.pl b/i18npool/source/isolang/langid.pl index 06883279345b..8035178b7bb5 100755 --- a/i18npool/source/isolang/langid.pl +++ b/i18npool/source/isolang/langid.pl @@ -39,7 +39,8 @@ sub Usage() "\n", "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", - "listbox entries, postset.mk and file_ooo.scp registry name.\n\n", + "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n", + "msi-encodinglist.txt\n\n", "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", @@ -105,7 +106,8 @@ sub grepFile($$$$@) my( $regex, $path, $module, $name, @addregex) = @_; my @result; my $found = 0; - my $arefound = ''; + my $areopen = 0; + my $arecloser = ''; my $file; # Try module under current working directory first to catch local # modifications. A Not yet delivered lang.h is a special case. @@ -145,17 +147,22 @@ sub grepFile($$$$@) print "$line\n"; push( @result, $line); } - else + elsif (@addregex) { - for my $re (@addregex) + # By convention first element is opener, second element is closer. + if (!$areopen) { - if ($re ne $arefound && $line =~ /$re/) + if ($line =~ /$addregex[0]/) { - if ($arefound eq '') - { - $arefound = $re; - } - else + $areopen = 1; + $arecloser = $addregex[1]; + } + } + if ($areopen) + { + for (my $i = 2; $i < @addregex; ++$i) + { + if ($line =~ /$addregex[$i]/) { if (!$found) { @@ -167,13 +174,19 @@ sub grepFile($$$$@) push( @result, $line); } } + if ($line =~ /$arecloser/) + { + $areopen = 0; + } } } } close( IN); } if (!$found) { - print "Not found in $file\n"; } + print "Not found in $file\n"; + #print "Not found in $file for $regex @addregex\n"; + } return @result; } @@ -317,13 +330,13 @@ sub main() if ($coun) { $loca = $lang . "_" . $coun; - push( @langcoungreplist, $lang . '(-' . $coun . ')?'); + push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); } else { $loca = $lang; $coun = ""; - push( @langcoungreplist, $lang); + push( @langcoungreplist, '\b' . $lang . '\b'); } my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; my $found; @@ -385,12 +398,24 @@ sub main() grepFile( '^\s*Name\s*\(' . $langcoun . '\)\s*=', "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); + # completelangiso=af ar as-IN ... zu grepFile( - '^\s*completelangiso\s*[= ](.{2,3}(-..)?)*' . $langcoun . '', + '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', "$SRC_ROOT", "solenv", "inc/postset.mk", # needs a duplicated pair of backslashes to produce a literal \\ - ('^\s*completelangiso\s*=', '^\s*' . $langcoun . '\s*\\\\*$')); + ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); + + # @noMSLocaleLangs = ( "br", "bs", ... ) + grepFile( + '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '', + "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm", + ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"')); + + # af 1252 1078 # Afrikaans + grepFile( + '^\s*' . $langcoun . '', + "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ()); } } return 0; diff --git a/i18npool/source/isolang/mslangid.cxx b/i18npool/source/isolang/mslangid.cxx index 8e9dddff872b..da9da0ff8646 100644 --- a/i18npool/source/isolang/mslangid.cxx +++ b/i18npool/source/isolang/mslangid.cxx @@ -102,6 +102,8 @@ LanguageType MsLangId::getRealLanguageWithoutConfig( LanguageType nLang ) nLang = getSystemUILanguage(); break; default: + /* TODO: would this be useful here? */ + //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); ; // nothing } if (nLang == LANGUAGE_DONTKNOW) @@ -128,6 +130,8 @@ LanguageType MsLangId::getRealLanguage( LanguageType nLang ) nLang = nConfiguredSystemUILanguage; break; default: + /* TODO: would this be useful here? */ + //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang); ; // nothing } if (nLang == LANGUAGE_DONTKNOW) @@ -451,6 +455,12 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang ) case LANGUAGE_SPANISH_DATED: nLang = LANGUAGE_SPANISH_MODERN; break; + + // Do not use ca-XV for document content. + /* TODO: remove in case we implement BCP47 language tags. */ + case LANGUAGE_USER_CATALAN_VALENCIAN: + nLang = LANGUAGE_CATALAN; + break; } return nLang; } |