diff options
Diffstat (limited to 'l10ntools/source/gsiconv.cxx')
-rw-r--r-- | l10ntools/source/gsiconv.cxx | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/l10ntools/source/gsiconv.cxx b/l10ntools/source/gsiconv.cxx new file mode 100644 index 000000000000..e127e55482da --- /dev/null +++ b/l10ntools/source/gsiconv.cxx @@ -0,0 +1,372 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2008 by Sun Microsystems, Inc. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * $RCSfile: gsiconv.cxx,v $ + * $Revision: 1.8 $ + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_l10ntools.hxx" +#include <stdio.h> +#include <tools/fsys.hxx> +#include <tools/stream.hxx> + +// local includes +#include "utf8conv.hxx" + +#define GSI_FILE_UNKNOWN 0x0000 +#define GSI_FILE_OLDSTYLE 0x0001 +#define GSI_FILE_L10NFRAMEWORK 0x0002 + +/*****************************************************************************/ +USHORT GetGSIFileType( SvStream &rStream ) +/*****************************************************************************/ +{ + USHORT nFileType = GSI_FILE_UNKNOWN; + + ULONG nPos( rStream.Tell()); + rStream.Seek( STREAM_SEEK_TO_BEGIN ); + + ByteString sLine; + while( !rStream.IsEof() && !sLine.Len()) + rStream.ReadLine( sLine ); + + if( sLine.Len()) { + if( sLine.Search( "($$)" ) != STRING_NOTFOUND ) + nFileType = GSI_FILE_OLDSTYLE; + else + nFileType = GSI_FILE_L10NFRAMEWORK; + } + + rStream.Seek( nPos ); + + return nFileType; +} + +/*****************************************************************************/ +ByteString GetGSILineId( const ByteString &rLine, USHORT nFileType ) +/*****************************************************************************/ +{ + ByteString sId; + switch ( nFileType ) { + case GSI_FILE_OLDSTYLE: + sId = rLine; + sId.SearchAndReplaceAll( "($$)", "\t" ); + sId = sId.GetToken( 0, '\t' ); + break; + + case GSI_FILE_L10NFRAMEWORK: + sId = rLine.GetToken( 0, '\t' ); + sId += "\t"; + sId += rLine.GetToken( 1, '\t' ); + sId += "\t"; + sId += rLine.GetToken( 4, '\t' ); + sId += "\t"; + sId += rLine.GetToken( 5, '\t' ); + break; + } + return sId; +} + +/*****************************************************************************/ +ByteString GetGSILineLangId( const ByteString &rLine, USHORT nFileType ) +/*****************************************************************************/ +{ + ByteString sLangId; + switch ( nFileType ) { + case GSI_FILE_OLDSTYLE: + sLangId = rLine; + sLangId.SearchAndReplaceAll( "($$)", "\t" ); + sLangId = sLangId.GetToken( 2, '\t' ); + break; + + case GSI_FILE_L10NFRAMEWORK: + sLangId = rLine.GetToken( 9, '\t' ); + break; + } + return sLangId; +} + +/*****************************************************************************/ +void ConvertGSILine( BOOL bToUTF8, ByteString &rLine, + rtl_TextEncoding nEncoding, USHORT nFileType ) +/*****************************************************************************/ +{ + switch ( nFileType ) { + case GSI_FILE_OLDSTYLE: + if ( bToUTF8 ) + rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding ); + else + rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding ); + break; + + case GSI_FILE_L10NFRAMEWORK: { + ByteString sConverted; + for ( USHORT i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) { + ByteString sToken = rLine.GetToken( i, '\t' ); + if (( i > 9 ) && ( i < 14 )) { + if( bToUTF8 ) + sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding ); + else + sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding ); + } + if ( i ) + sConverted += "\t"; + sConverted += sToken; + } + rLine = sConverted; + } + break; + } +} + +/*****************************************************************************/ +void Help() +/*****************************************************************************/ +{ + fprintf( stdout, "\n" ); + fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" ); + fprintf( stdout, "===============================================\n" ); + fprintf( stdout, "\n" ); + fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" ); + fprintf( stdout, "\n" ); + fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" ); + fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" ); + fprintf( stdout, " -f => conversion from UTF-8 to charset\n" ); + fprintf( stdout, " -p n => creates several files with ca. n lines\n" ); + fprintf( stdout, "\n" ); + fprintf( stdout, "Allowed charsets:\n" ); + fprintf( stdout, " MS_932 => Japanese\n" ); + fprintf( stdout, " MS_936 => Chinese Simplified\n" ); + fprintf( stdout, " MS_949 => Korean\n" ); + fprintf( stdout, " MS_950 => Chinese Traditional\n" ); + fprintf( stdout, " MS_1250 => East Europe\n" ); + fprintf( stdout, " MS_1251 => Cyrillic\n" ); + fprintf( stdout, " MS_1252 => West Europe\n" ); + fprintf( stdout, " MS_1253 => Greek\n" ); + fprintf( stdout, " MS_1254 => Turkish\n" ); + fprintf( stdout, " MS_1255 => Hebrew\n" ); + fprintf( stdout, " MS_1256 => Arabic\n" ); + fprintf( stdout, "\n" ); + fprintf( stdout, "Allowed langids:\n" ); + fprintf( stdout, " 1 => ENGLISH_US\n" ); + fprintf( stdout, " 3 => PORTUGUESE \n" ); + fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" ); + fprintf( stdout, " 7 => RUSSIAN\n" ); + fprintf( stdout, " 30 => GREEK\n" ); + fprintf( stdout, " 31 => DUTCH\n" ); + fprintf( stdout, " 33 => FRENCH\n" ); + fprintf( stdout, " 34 => SPANISH\n" ); + fprintf( stdout, " 35 => FINNISH\n" ); + fprintf( stdout, " 36 => HUNGARIAN\n" ); + fprintf( stdout, " 39 => ITALIAN\n" ); + fprintf( stdout, " 42 => CZECH\n" ); + fprintf( stdout, " 44 => ENGLISH (UK)\n" ); + fprintf( stdout, " 45 => DANISH\n" ); + fprintf( stdout, " 46 => SWEDISH\n" ); + fprintf( stdout, " 47 => NORWEGIAN\n" ); + fprintf( stdout, " 49 => GERMAN (old german style)\n" ); + fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" ); + fprintf( stdout, " 81 => JAPANESE\n" ); + fprintf( stdout, " 82 => KOREAN\n" ); + fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" ); + fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" ); + fprintf( stdout, " 90 => TURKISH\n" ); + fprintf( stdout, " 96 => ARABIC\n" ); + fprintf( stdout, " 97 => HEBREW\n" ); + fprintf( stdout, "\n" ); +} + +/*****************************************************************************/ +#if defined(UNX) || defined(OS2) +int main( int argc, char *argv[] ) +#else +int _cdecl main( int argc, char *argv[] ) +#endif +/*****************************************************************************/ +{ + if (( argc != 5 ) && ( argc != 4 )) { + Help(); + exit ( 0 ); + } + + if ( argc == 4 ) { + if ( ByteString( argv[ 1 ] ) == "-p" ) { + + DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US )); + if ( !aSource.Exists()) { + fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); + exit ( 2 ); + } + + DirEntry aOutput( aSource ); + + String sBase = aOutput.GetBase(); + String sExt = aOutput.GetExtension(); + + String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ); + SvFileStream aGSI( sGSI, STREAM_STD_READ ); + if ( !aGSI.IsOpen()) { + fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); + exit ( 3 ); + } + + USHORT nFileType( GetGSIFileType( aGSI )); + + ULONG nMaxLines = (ULONG) ByteString( argv[ 2 ] ).ToInt64(); + if ( !nMaxLines ) { + fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" ); + exit ( 3 ); + } + + ByteString sGSILine; + ByteString sOldId; + ULONG nLine = 0; + ULONG nOutputFile = 1; + + String sOutput( sBase ); + sOutput += String( "_", RTL_TEXTENCODING_ASCII_US ); + sOutput += String::CreateFromInt64( nOutputFile ); + if ( sExt.Len()) { + sOutput += String( ".", RTL_TEXTENCODING_ASCII_US ); + sOutput += sExt; + } + nOutputFile ++; + + aOutput.SetName( sOutput ); + SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); + + while ( !aGSI.IsEof()) { + + aGSI.ReadLine( sGSILine ); + ByteString sId( GetGSILineId( sGSILine, nFileType )); + + nLine++; + + if (( nLine >= nMaxLines ) && ( sId != sOldId )) { + aOutputStream.Close(); + + ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding()); + sText += " with "; + sText += ByteString::CreateFromInt64( nLine ); + sText += " lines written."; + + fprintf( stdout, "%s\n", sText.GetBuffer()); + String sOutput1( sBase ); + sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US ); + sOutput1 += String::CreateFromInt64( nOutputFile ); + if ( sExt.Len()) { + sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US ); + sOutput1 += sExt; + } + nOutputFile ++; + + aOutput.SetName( sOutput1 ); + + aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); + nLine = 0; + } + + aOutputStream.WriteLine( sGSILine ); + + sOldId = sId; + } + + aGSI.Close(); + aOutputStream.Close(); + + ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US ); + sText += " with "; + sText += ByteString::CreateFromInt64( nLine ); + sText += " lines written."; + } + else { + Help(); + exit( 1 ); + } + } + else { + if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) { + rtl_TextEncoding nEncoding; + + ByteString sCurLangId( argv[ 2 ] ); + + ByteString sCharset( argv[ 3 ] ); + sCharset.ToUpperAscii(); + + if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932; + else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936; + else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949; + else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950; + else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250; + else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251; + else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252; + else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253; + else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254; + else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255; + else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256; + else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257; + else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8; + + else { + Help(); + exit ( 1 ); + } + + DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US )); + if ( !aSource.Exists()) { + fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); + exit ( 2 ); + } + + String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ); + SvFileStream aGSI( sGSI, STREAM_STD_READ ); + if ( !aGSI.IsOpen()) { + fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); + exit ( 3 ); + } + USHORT nFileType( GetGSIFileType( aGSI )); + + ByteString sGSILine; + while ( !aGSI.IsEof()) { + + aGSI.ReadLine( sGSILine ); + ByteString sLangId( GetGSILineLangId( sGSILine, nFileType )); + if ( sLangId == sCurLangId ) + ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType ); + + fprintf( stdout, "%s\n", sGSILine.GetBuffer()); + } + + aGSI.Close(); + } + else { + Help(); + exit( 1 ); + } + } + return 0; +} |