summaryrefslogtreecommitdiff
path: root/l10ntools
diff options
context:
space:
mode:
authorDavid Tardon <dtardon@redhat.com>2012-07-15 11:38:07 +0200
committerDavid Tardon <dtardon@redhat.com>2012-07-17 14:06:52 +0200
commit4b592ce754e578a347490341caecc1bc45f67242 (patch)
treea25dce68615e277f12827d061e79da086522cf15 /l10ntools
parent4fea92fe5389ba4de593f5e991870cf595b516e1 (diff)
move ulfconv to l10ntools
This allows us to drop dependency on setup_native everywhere. Change-Id: Ib033f8d5953682379c6c2ab53d5cf221e9d8cfec
Diffstat (limited to 'l10ntools')
-rw-r--r--l10ntools/Executable_ulfconv.mk21
-rw-r--r--l10ntools/Module_l10ntools.mk2
-rw-r--r--l10ntools/Package_ulfconv.mk14
-rw-r--r--l10ntools/source/ulfconv/msi-encodinglist.txt152
-rw-r--r--l10ntools/source/ulfconv/ulfconv.cxx361
5 files changed, 550 insertions, 0 deletions
diff --git a/l10ntools/Executable_ulfconv.mk b/l10ntools/Executable_ulfconv.mk
new file mode 100644
index 000000000000..8ed5bb0e5f2c
--- /dev/null
+++ b/l10ntools/Executable_ulfconv.mk
@@ -0,0 +1,21 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Executable_Executable,ulfconv))
+
+$(eval $(call gb_Executable_use_libraries,ulfconv,\
+ sal \
+ $(gb_STDLIBS) \
+))
+
+$(eval $(call gb_Executable_add_exception_objects,ulfconv,\
+ l10ntools/source/ulfconv/ulfconv \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/l10ntools/Module_l10ntools.mk b/l10ntools/Module_l10ntools.mk
index c149db078c77..e3c11344a0c9 100644
--- a/l10ntools/Module_l10ntools.mk
+++ b/l10ntools/Module_l10ntools.mk
@@ -29,6 +29,7 @@ $(eval $(call gb_Module_Module,l10ntools))
$(eval $(call gb_Module_add_targets,l10ntools,\
Executable_helpex \
Executable_idxdict \
+ Executable_ulfconv \
Executable_ulfex \
Executable_gsicheck \
Executable_cfgex \
@@ -41,6 +42,7 @@ $(eval $(call gb_Module_add_targets,l10ntools,\
Library_helplinker \
Package_inc \
Package_scripts \
+ Package_ulfconv \
))
ifneq ($(SOLAR_JAVA),)
diff --git a/l10ntools/Package_ulfconv.mk b/l10ntools/Package_ulfconv.mk
new file mode 100644
index 000000000000..41337b26cb6e
--- /dev/null
+++ b/l10ntools/Package_ulfconv.mk
@@ -0,0 +1,14 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Package_Package,l10ntools_ulfconv,$(SRCDIR)/l10ntools/source/ulfconv))
+
+$(eval $(call gb_Package_add_file,l10ntools_ulfconv,bin/msi-encodinglist.txt,msi-encodinglist.txt))
+
+# vim: set noet sw=4 ts=4:
diff --git a/l10ntools/source/ulfconv/msi-encodinglist.txt b/l10ntools/source/ulfconv/msi-encodinglist.txt
new file mode 100644
index 000000000000..1fd3cb96e70d
--- /dev/null
+++ b/l10ntools/source/ulfconv/msi-encodinglist.txt
@@ -0,0 +1,152 @@
+# Syntax: language ANSI-Codepage LCID
+# comment lines begin with hash
+af 1252 1078 # Afrikaans
+am 0 1118 # Amharic
+ar 1256 1025
+ar-SA 1256 1025
+as 0 1101 # Assamese
+as-IN 0 1101 # Assamese
+ast 1252 1610
+be 1251 1059 # Belarusian
+be-BY 1251 1059
+bg 1251 1026 # Bulgarian
+bn 0 2117 # Bengali
+bn-BD 0 2117 # Bengali Bangladesh
+bn-IN 0 1093 # Bengali India
+bo 0 2121
+br 1252 1150 # Breton
+brx 0 1603 # Bodo (India)
+bs 0 5146 # bosnian
+ca 1252 1027 # Catalan
+ca-XV 1252 32771 # Catalan Valencian
+cs 1250 1029 # Czech
+cy 1252 1106 # Welsh
+da 1252 1030
+de 1252 1031
+dgo 0 1604 # Dogri (India)
+dz 0 2129 # Dzongkha (same ID as tibetan bhutan (s. i40713))
+el 1253 1032
+en-GB 1252 2057
+en-US 1252 1033
+en-ZA 1252 7177
+eo 0 1553 # Esperanto
+es 1252 1034
+et 1257 1061
+eu 1252 1069 # Basque
+fa 0 1065 # Farsi
+fi 1252 1035
+fo 1252 1080 # Faroese
+fr 1252 1036
+fr-CA 1252 3084
+fur 0 1585
+ga 0 2108 # Irish
+gd 0 1084 # Gaelic (Scotland)
+gl 1252 1110 # Galician
+gu 0 1095 # Gujarati
+gu-IN 0 1095 # Gujarati
+he 1255 1037
+hi 0 1081
+hr 1250 1050 # Croatian
+ht 1252 1626 # Haitian
+hu 1250 1038
+hy 0 1067 # Armenian
+id 1252 1057 # Indonesian
+is 1252 1039 # Icelandic
+it 1252 1040
+ja 932 1041
+jbo 0 1624
+ka 0 1079 # Georgian
+kab 0 1625
+kk 0 1087
+km 0 1107 # Khmer
+kn 0 1099 # Kannada
+kn-IN 0 1099 # Kannada
+ko 949 1042
+kok 0 1111 # Konkani
+ks 0 1120 # Kashmiri
+ku 0 1574
+ky 0 1088 # Kyrgyz
+ky-CN 0 1640 # Kyrgyz (China)
+lb 1252 1134
+lo 0 1108 # Lao
+lt 1257 1063 # Lithuanian
+lv 1257 1062 # Latvian
+mai 0 1605 # Maithili (India)
+mk 1251 1071 # Macedonian
+ml 0 1100
+ml-IN 0 1100
+mn 0 1104 # Mongolian
+mni 0 1112 # Manipuri
+mn-TR 0 2128 # Mongolian Classical/traditional
+mr 0 1102 # Marathi
+mr-IN 0 1102
+ms 0 1086 # Malay (Malaysian)
+mt 0 1082 # Maltese
+my 0 1109 # Burmese
+nb 1252 1044
+ne 0 1121 # Nepali
+nl 1252 1043
+nn 1252 2068
+no 1252 1044
+nr 0 1580 # Ndebele South
+nso 0 1132
+ny 0 1598
+oc 1252 1154 # Occitan-lengadocian
+om 0 2162
+or 0 1096 # Oriya
+or-IN 0 1096
+pa-IN 0 1094 # Punjabi
+pap 0 2171
+pl 1250 1045
+ps 0 2171
+pt 1252 2070
+pt-BR 1252 1046
+pt-PT 1252 2070
+qtz 1252 1033 # key id pseudo language
+rm 0 1047 # Raeto-Romance
+ro 1250 1048 # Romanian
+ru 1251 1049
+rw 0 1569 # Kinyarwanda
+sa-IN 0 1103 # Sanskrit
+sat 0 1606 # Santali
+sb 0 1070 # Sorbian
+sc 0 3047
+sd 0 1113 # Sindhi
+sh 1250 2074 # Serbian Latin
+si 0 2133
+sk 1250 1051 # Slovak
+sl 1250 1060 # Slovenian
+sq 1250 1052 # Albanian
+sr 1251 3098 # Serbian Cyrillic
+sr-SP 1251 3098 # Serbian Cyrillic
+ss 0 1579 # Swazi
+st 0 1072 # Southern Sotho, Sutu
+sv 1252 1053
+sw 1252 1089 # Swahili
+sw-TZ 1252 1089 # Swahili
+so 0 1143
+ta 0 1097 # Tamil
+ta-IN 0 1097 # Tamil
+te 0 1098
+te-IN 0 1098
+tg 0 1064 # Tajik
+th 874 1054
+ti 0 1139 # Tigrinya
+ti-ER 0 1139 # Tigrinya
+tn 0 1074 # Setsuana
+tr 1254 1055 # Turkish
+ts 0 1073 # Tsonga
+tk 0 1090
+tt 1251 1092 # Tatar
+ug 0 1152
+uk 1251 1058 # Ukrainian
+ur 1256 1056 # Urdu
+ur-IN 0 2080
+uz 0 1091 # Uzbek (Latin)
+ve 0 1075 # Venda
+vi 1258 1066 # Vietnamese
+xh 0 1076 # Xhosa
+yi 0 1085 # Yiddish
+zh-CN 936 2052
+zh-TW 950 1028
+zu 0 1077 # Zulu
diff --git a/l10ntools/source/ulfconv/ulfconv.cxx b/l10ntools/source/ulfconv/ulfconv.cxx
new file mode 100644
index 000000000000..1643b330d776
--- /dev/null
+++ b/l10ntools/source/ulfconv/ulfconv.cxx
@@ -0,0 +1,361 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sal/alloca.h>
+#include <sal/macros.h>
+
+#include <rtl/ustring.hxx>
+
+#include <map>
+#include <string>
+
+/*****************************************************************************
+ * typedefs
+ *****************************************************************************/
+
+typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
+
+struct _pair {
+ const char *key;
+ rtl_TextEncoding value;
+};
+
+static int _pair_compare (const char *key, const _pair *pair);
+static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
+
+
+const _pair _ms_encoding_list[] = {
+ { "0", RTL_TEXTENCODING_UTF8 },
+ { "1250", RTL_TEXTENCODING_MS_1250 },
+ { "1251", RTL_TEXTENCODING_MS_1251 },
+ { "1252", RTL_TEXTENCODING_MS_1252 },
+ { "1253", RTL_TEXTENCODING_MS_1253 },
+ { "1254", RTL_TEXTENCODING_MS_1254 },
+ { "1255", RTL_TEXTENCODING_MS_1255 },
+ { "1256", RTL_TEXTENCODING_MS_1256 },
+ { "1257", RTL_TEXTENCODING_MS_1257 },
+ { "1258", RTL_TEXTENCODING_MS_1258 },
+ { "874", RTL_TEXTENCODING_MS_874 },
+ { "932", RTL_TEXTENCODING_MS_932 },
+ { "936", RTL_TEXTENCODING_MS_936 },
+ { "949", RTL_TEXTENCODING_MS_949 },
+ { "950", RTL_TEXTENCODING_MS_950 }
+};
+
+
+/*****************************************************************************
+ * fgets that work with unix line ends on Windows
+ *****************************************************************************/
+
+char * my_fgets(char *s, int n, FILE *fp)
+{
+ int i;
+ for( i=0; i < n-1; i++ )
+ {
+ int c = getc(fp);
+
+ if( c == EOF )
+ break;
+
+ s[i] = (char) c;
+
+ if( s[i] == '\n' )
+ {
+ i++;
+ break;
+ }
+ }
+
+ if( i>0 )
+ {
+ s[i] = '\0';
+ return s;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+/*****************************************************************************
+ * compare function for binary search
+ *****************************************************************************/
+
+static int
+_pair_compare (const char *key, const _pair *pair)
+{
+ int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
+ return result;
+}
+
+/*****************************************************************************
+ * binary search on encoding tables
+ *****************************************************************************/
+
+static const _pair*
+_pair_search (const char *key, const _pair *base, unsigned int member )
+{
+ unsigned int lower = 0;
+ unsigned int upper = member;
+ unsigned int current;
+ int comparison;
+
+ /* check for validity of input */
+ if ( (key == NULL) || (base == NULL) || (member == 0) )
+ return NULL;
+
+ /* binary search */
+ while ( lower < upper )
+ {
+ current = (lower + upper) / 2;
+ comparison = _pair_compare( key, base + current );
+ if (comparison < 0)
+ upper = current;
+ else
+ if (comparison > 0)
+ lower = current + 1;
+ else
+ return base + current;
+ }
+
+ return NULL;
+}
+
+
+/************************************************************************
+ * read_encoding_table
+ ************************************************************************/
+
+void read_encoding_table(char * file, EncodingMap& aEncodingMap)
+{
+ FILE * fp = fopen(file, "r");
+ if ( ! fp ) {
+ fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
+ exit(2);
+ }
+
+ char buffer[512];
+ while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
+
+ // strip comment lines
+ if ( buffer[0] == '#' )
+ continue;
+
+ // find end of language string
+ char * cp;
+ for ( cp = buffer; ! isspace(*cp); cp++ )
+ ;
+ *cp = '\0';
+
+ // find start of codepage string
+ for ( ++cp; isspace(*cp); ++cp )
+ ;
+ char * codepage = cp;
+
+ // find end of codepage string
+ for ( ++cp; ! isspace(*cp); ++cp )
+ ;
+ *cp = '\0';
+
+ // find the correct mapping for codepage
+ const unsigned int members = SAL_N_ELEMENTS( _ms_encoding_list );
+ const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
+
+ if ( encoding != NULL ) {
+ const std::string language(buffer);
+ aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
+ }
+ }
+
+ fclose(fp);
+}
+
+/************************************************************************
+ * print_legacy_mixed
+ ************************************************************************/
+
+void print_legacy_mixed(
+ FILE * ostream,
+ const rtl::OUString& aString,
+ const std::string& language,
+ EncodingMap& aEncodingMap)
+{
+ EncodingMap::iterator iter = aEncodingMap.find(language);
+
+ if ( iter != aEncodingMap.end() ) {
+ fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
+ } else {
+ fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
+ }
+}
+
+/************************************************************************
+ * print_java_style
+ ************************************************************************/
+
+void print_java_style(FILE * ostream, const rtl::OUString& aString)
+{
+ int imax = aString.getLength();
+ for (int i = 0; i < imax; i++) {
+ sal_Unicode uc = aString[i];
+ if ( uc < 128 ) {
+ fprintf(ostream, "%c", (char) uc);
+ } else {
+ fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
+ }
+ }
+}
+
+/************************************************************************
+ * main
+ ************************************************************************/
+
+int main( int argc, char * const argv[] )
+{
+ EncodingMap aEncodingMap;
+
+ FILE *istream = stdin;
+ FILE *ostream = stdout;
+
+ char *outfile = NULL;
+
+ int errflg = 0;
+ int argi;
+
+ for( argi=1; argi < argc; argi++ )
+ {
+ if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
+ {
+ switch(argv[argi][1]) {
+ case 'o':
+ if (argi+1 >= argc || argv[argi+1][0] == '-')
+ {
+ fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
+ errflg++;
+ break;
+ }
+
+ ++argi;
+ outfile = argv[argi];
+ break;
+ case 't':
+ if (argi+1 >= argc || argv[argi+1][0] == '-')
+ {
+ fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
+ errflg++;
+ break;
+ }
+
+ read_encoding_table(argv[++argi], aEncodingMap);
+ break;
+ default:
+ fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
+ errflg++;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (errflg) {
+ fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
+ exit(2);
+ }
+
+ /* assign input file to stdin */
+ if ( argi < argc )
+ {
+ istream = fopen(argv[argi], "r");
+ if ( istream == NULL ) {
+ fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
+ exit(2);
+ }
+ }
+
+ /* open output file if any */
+ if ( outfile )
+ {
+ ostream = fopen(outfile, "w");
+ if ( ostream == NULL ) {
+ fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
+ fclose(istream);
+ exit(2);
+ }
+ }
+
+ /* read line by line from stdin */
+ char buffer[65536];
+ while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
+
+ /* only handle lines containing " = " */
+ char * cp = strstr(buffer, " = \"");
+ if ( cp ) {
+ rtl::OUString aString;
+
+ /* find end of lang string */
+ int n;
+ for ( n=0; ! isspace(buffer[n]); n++ )
+ ;
+
+ std::string line = buffer;
+ std::string lang(line, 0, n);
+
+ cp += 4;
+ rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
+ RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
+
+ fprintf(ostream, "%s = \"", lang.c_str());
+
+ if ( aEncodingMap.empty() ) {
+ print_java_style(ostream, aString);
+ } else {
+ print_legacy_mixed(ostream, aString, lang, aEncodingMap);
+ }
+
+ fprintf(ostream, "\"\n");
+
+
+ } else {
+ fputs(buffer, ostream);
+ }
+ }
+
+ fclose(ostream);
+ fclose(istream);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */