summaryrefslogtreecommitdiff
path: root/i18npool/source/isolang
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/isolang')
-rw-r--r--i18npool/source/isolang/isolang.cxx61
-rwxr-xr-xi18npool/source/isolang/langid.pl55
-rw-r--r--i18npool/source/isolang/mslangid.cxx10
3 files changed, 108 insertions, 18 deletions
diff --git a/i18npool/source/isolang/isolang.cxx b/i18npool/source/isolang/isolang.cxx
index 9d80bf7050e6..357be80a69ea 100644
--- a/i18npool/source/isolang/isolang.cxx
+++ b/i18npool/source/isolang/isolang.cxx
@@ -287,8 +287,9 @@ static MsLangId::IsoLangEntry const aImplIsoLangEntries[] =
{ LANGUAGE_BELARUSIAN, "be", "BY" },
{ LANGUAGE_CATALAN, "ca", "ES" }, // Spain (default)
{ LANGUAGE_CATALAN, "ca", "AD" }, // Andorra
- { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; old workaround for UI localization only, do not use in document content! Kept just in case..
- { LANGUAGE_USER_CATALAN_VALENCIAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; for UI localization, use in document content on own risk!
+ { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "XV" }, // XV: ISO 3166 user-assigned; workaround for UI localization only, do not use in document content!
+ { LANGUAGE_CATALAN, "qcv", "ES" }, // qcv: ISO 639-3 reserved-for-local-use; UI localization quirk only, do not use in document content!
+// { LANGUAGE_USER_CATALAN_VALENCIAN, "ca", "ES" }, // In case MS format files escaped into the wild, map them back.
{ LANGUAGE_FRENCH_CAMEROON, "fr", "CM" },
{ LANGUAGE_FRENCH_COTE_D_IVOIRE, "fr", "CI" },
{ LANGUAGE_FRENCH_HAITI, "fr", "HT" },
@@ -457,6 +458,9 @@ static MsLangId::IsoLangEntry const aImplIsoLangEntries[] =
{ LANGUAGE_USER_TAHITIAN, "ty", "PF" },
{ LANGUAGE_USER_MALAGASY_PLATEAU, "plt", "MG" },
{ LANGUAGE_USER_BAFIA, "ksf", "CM" },
+ { LANGUAGE_USER_GIKUYU, "ki", "KE" },
+ { LANGUAGE_USER_RUSYN_UKRAINE, "rue", "UA" },
+ { LANGUAGE_USER_RUSYN_SLOVAKIA, "rue", "SK" },
{ LANGUAGE_NONE, "zxx", "" }, // added to ISO 639-2 on 2006-01-11: Used to declare the absence of linguistic information
{ LANGUAGE_DONTKNOW, "", "" } // marks end of table
};
@@ -1005,6 +1009,28 @@ LanguageType MsLangId::convertIsoByteStringToLanguage(
}
// -----------------------------------------------------------------------
+
+struct IsoLangGLIBCModifiersEntry
+{
+ LanguageType mnLang;
+ sal_Char maLangStr[4];
+ sal_Char maCountry[3];
+ sal_Char maAtString[9];
+};
+
+static IsoLangGLIBCModifiersEntry const aImplIsoLangGLIBCModifiersEntries[] =
+{
+ // MS-LANGID codes ISO639-1/2/3 ISO3166 glibc modifier
+ { LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA, "bs", "BA", "cyrillic" },
+ { LANGUAGE_USER_SERBIAN_LATIN_SERBIA, "sr", "RS", "latin" }, // Serbian Latin in Serbia
+ { LANGUAGE_SERBIAN_LATIN, "sr", "CS", "latin" }, // Serbian Latin in Serbia and Montenegro
+ { LANGUAGE_USER_SERBIAN_LATIN_MONTENEGRO, "sr", "ME", "latin" }, // Serbian Latin in Montenegro
+ { LANGUAGE_SERBIAN_LATIN_NEUTRAL, "sr", "", "latin" },
+ { LANGUAGE_AZERI_CYRILLIC, "az", "AZ", "cyrillic" },
+ { LANGUAGE_UZBEK_CYRILLIC, "uz", "UZ", "cyrillic" },
+ { LANGUAGE_DONTKNOW, "", "", "" } // marks end of table
+};
+
// convert a unix locale string into LanguageType
// static
@@ -1013,15 +1039,20 @@ LanguageType MsLangId::convertUnxByteStringToLanguage(
{
rtl::OString aLang;
rtl::OString aCountry;
+ rtl::OString aAtString;
sal_Int32 nLangSepPos = rString.indexOf( (sal_Char)'_' );
sal_Int32 nCountrySepPos = rString.indexOf( (sal_Char)'.' );
+ sal_Int32 nAtPos = rString.indexOf( (sal_Char)'@' );
if (nCountrySepPos < 0)
- nCountrySepPos = rString.indexOf( (sal_Char)'@' );
+ nCountrySepPos = nAtPos;
if (nCountrySepPos < 0)
nCountrySepPos = rString.getLength();
+ if (nAtPos >= 0)
+ aAtString = rString.copy( nAtPos+1 );
+
if ( ((nLangSepPos >= 0) && (nLangSepPos > nCountrySepPos))
|| ((nLangSepPos < 0)) )
{
@@ -1035,6 +1066,30 @@ LanguageType MsLangId::convertUnxByteStringToLanguage(
aCountry = rString.copy( nLangSepPos+1, nCountrySepPos - nLangSepPos - 1);
}
+ // if there is a glibc modifier, first look for exact match in modifier table
+ if (aAtString.getLength())
+ {
+ // language is lower case in table
+ rtl::OString aLowerLang = aLang.toAsciiLowerCase();
+ // country is upper case in table
+ rtl::OString aUpperCountry = aCountry.toAsciiUpperCase();
+ const IsoLangGLIBCModifiersEntry* pGLIBCModifiersEntry = aImplIsoLangGLIBCModifiersEntries;
+ do
+ {
+ if (( aLowerLang.equals( pGLIBCModifiersEntry->maLangStr ) ) &&
+ ( aAtString.equals( pGLIBCModifiersEntry->maAtString ) ))
+ {
+ if ( !aUpperCountry.getLength() ||
+ aUpperCountry.equals( pGLIBCModifiersEntry->maCountry ) )
+ {
+ return pGLIBCModifiersEntry->mnLang;
+ }
+ }
+ ++pGLIBCModifiersEntry;
+ }
+ while ( pGLIBCModifiersEntry->mnLang != LANGUAGE_DONTKNOW );
+ }
+
return convertIsoNamesToLanguage( aLang, aCountry );
}
diff --git a/i18npool/source/isolang/langid.pl b/i18npool/source/isolang/langid.pl
index 06883279345b..8035178b7bb5 100755
--- a/i18npool/source/isolang/langid.pl
+++ b/i18npool/source/isolang/langid.pl
@@ -39,7 +39,8 @@ sub Usage()
"\n",
"langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
"isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
- "listbox entries, postset.mk and file_ooo.scp registry name.\n\n",
+ "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n",
+ "msi-encodinglist.txt\n\n",
"Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
@@ -105,7 +106,8 @@ sub grepFile($$$$@)
my( $regex, $path, $module, $name, @addregex) = @_;
my @result;
my $found = 0;
- my $arefound = '';
+ my $areopen = 0;
+ my $arecloser = '';
my $file;
# Try module under current working directory first to catch local
# modifications. A Not yet delivered lang.h is a special case.
@@ -145,17 +147,22 @@ sub grepFile($$$$@)
print "$line\n";
push( @result, $line);
}
- else
+ elsif (@addregex)
{
- for my $re (@addregex)
+ # By convention first element is opener, second element is closer.
+ if (!$areopen)
{
- if ($re ne $arefound && $line =~ /$re/)
+ if ($line =~ /$addregex[0]/)
{
- if ($arefound eq '')
- {
- $arefound = $re;
- }
- else
+ $areopen = 1;
+ $arecloser = $addregex[1];
+ }
+ }
+ if ($areopen)
+ {
+ for (my $i = 2; $i < @addregex; ++$i)
+ {
+ if ($line =~ /$addregex[$i]/)
{
if (!$found)
{
@@ -167,13 +174,19 @@ sub grepFile($$$$@)
push( @result, $line);
}
}
+ if ($line =~ /$arecloser/)
+ {
+ $areopen = 0;
+ }
}
}
}
close( IN);
}
if (!$found) {
- print "Not found in $file\n"; }
+ print "Not found in $file\n";
+ #print "Not found in $file for $regex @addregex\n";
+ }
return @result;
}
@@ -317,13 +330,13 @@ sub main()
if ($coun)
{
$loca = $lang . "_" . $coun;
- push( @langcoungreplist, $lang . '(-' . $coun . ')?');
+ push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?');
}
else
{
$loca = $lang;
$coun = "";
- push( @langcoungreplist, $lang);
+ push( @langcoungreplist, '\b' . $lang . '\b');
}
my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
my $found;
@@ -385,12 +398,24 @@ sub main()
grepFile(
'^\s*Name\s*\(' . $langcoun . '\)\s*=',
"$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
+
# completelangiso=af ar as-IN ... zu
grepFile(
- '^\s*completelangiso\s*[= ](.{2,3}(-..)?)*' . $langcoun . '',
+ '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '',
"$SRC_ROOT", "solenv", "inc/postset.mk",
# needs a duplicated pair of backslashes to produce a literal \\
- ('^\s*completelangiso\s*=', '^\s*' . $langcoun . '\s*\\\\*$'));
+ ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$'));
+
+ # @noMSLocaleLangs = ( "br", "bs", ... )
+ grepFile(
+ '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '',
+ "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm",
+ ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"'));
+
+ # af 1252 1078 # Afrikaans
+ grepFile(
+ '^\s*' . $langcoun . '',
+ "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ());
}
}
return 0;
diff --git a/i18npool/source/isolang/mslangid.cxx b/i18npool/source/isolang/mslangid.cxx
index 8e9dddff872b..da9da0ff8646 100644
--- a/i18npool/source/isolang/mslangid.cxx
+++ b/i18npool/source/isolang/mslangid.cxx
@@ -102,6 +102,8 @@ LanguageType MsLangId::getRealLanguageWithoutConfig( LanguageType nLang )
nLang = getSystemUILanguage();
break;
default:
+ /* TODO: would this be useful here? */
+ //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang);
; // nothing
}
if (nLang == LANGUAGE_DONTKNOW)
@@ -128,6 +130,8 @@ LanguageType MsLangId::getRealLanguage( LanguageType nLang )
nLang = nConfiguredSystemUILanguage;
break;
default:
+ /* TODO: would this be useful here? */
+ //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang);
; // nothing
}
if (nLang == LANGUAGE_DONTKNOW)
@@ -451,6 +455,12 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang )
case LANGUAGE_SPANISH_DATED:
nLang = LANGUAGE_SPANISH_MODERN;
break;
+
+ // Do not use ca-XV for document content.
+ /* TODO: remove in case we implement BCP47 language tags. */
+ case LANGUAGE_USER_CATALAN_VALENCIAN:
+ nLang = LANGUAGE_CATALAN;
+ break;
}
return nLang;
}