#!@INTLTOOL_PERL@ -w # # The Intltool Message Merger # # Copyright (C) 2000, 2003 Free Software Foundation. # Copyright (C) 2000, 2001 Eazel, Inc # # Intltool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # version 2 published by the Free Software Foundation. # # Intltool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # # Authors: Maciej Stachowiak # Kenneth Christiansen # Darin Adler # # Proper XML UTF-8'ification written by Cyrille Chepelov # ## Release information my $PROGRAM = "intltool-merge"; my $PACKAGE = "intltool"; my $VERSION = "0.27.2"; ## Loaded modules use strict; use Getopt::Long; use Text::Wrap; ## Scalars used by the option stuff my $HELP_ARG = 0; my $VERSION_ARG = 0; my $BA_STYLE_ARG = 0; my $XML_STYLE_ARG = 0; my $KEYS_STYLE_ARG = 0; my $DESKTOP_STYLE_ARG = 0; my $SCHEMAS_STYLE_ARG = 0; my $RFC822DEB_STYLE_ARG = 0; my $QUIET_ARG = 0; my $PASS_THROUGH_ARG = 0; my $UTF8_ARG = 0; my $cache_file; ## Handle options GetOptions ( "help" => \$HELP_ARG, "version" => \$VERSION_ARG, "quiet|q" => \$QUIET_ARG, "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility "ba-style|b" => \$BA_STYLE_ARG, "xml-style|x" => \$XML_STYLE_ARG, "keys-style|k" => \$KEYS_STYLE_ARG, "desktop-style|d" => \$DESKTOP_STYLE_ARG, "schemas-style|s" => \$SCHEMAS_STYLE_ARG, "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, "pass-through|p" => \$PASS_THROUGH_ARG, "utf8|u" => \$UTF8_ARG, "cache|c=s" => \$cache_file ) or &error; my $PO_DIR; my $FILE; my $OUTFILE; my %po_files_by_lang = (); my %translations = (); my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv"; # Use this instead of \w for XML files to handle more possible characters. my $w = "[-A-Za-z0-9._:]"; # XML quoted string contents my $q = "[^\\\"]*"; ## Check for options. if ($VERSION_ARG) { &print_version; } elsif ($HELP_ARG) { &print_help; } elsif ($BA_STYLE_ARG && @ARGV > 2) { &preparation; &print_message; &ba_merge_translations; &finalize; } elsif ($XML_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &xml_merge_translations; &finalize; } elsif ($KEYS_STYLE_ARG && @ARGV > 2) { &utf8_sanity_check; &preparation; &print_message; &keys_merge_translations; &finalize; } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) { &preparation; &print_message; &desktop_merge_translations; &finalize; } elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) { &preparation; &print_message; &schemas_merge_translations; &finalize; } elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) { &preparation; &print_message; &rfc822deb_merge_translations; &finalize; } else { &print_help; } exit; ## Sub for printing release information sub print_version { print <<_EOF_; ${PROGRAM} (${PACKAGE}) ${VERSION} Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. Copyright (C) 2000-2003 Free Software Foundation, Inc. Copyright (C) 2000-2001 Eazel, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. _EOF_ exit; } ## Sub for printing usage information sub print_help { print <<_EOF_; Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE Generates an output file that includes some localized attributes from an untranslated source file. Mandatory options: (exactly one must be specified) -b, --ba-style includes translations in the bonobo-activation style -d, --desktop-style includes translations in the desktop style -k, --keys-style includes translations in the keys style -s, --schemas-style includes translations in the schemas style -r, --rfc822deb-style includes translations in the RFC822 style -x, --xml-style includes translations in the standard xml style Other options: -u, --utf8 convert all strings to UTF-8 before merging -p, --pass-through use strings as found in .po files, without conversion (STRONGLY unrecommended with -x) -c, --cache=FILE specify cache file name (usually \$top_builddir/po/.intltool-merge-cache) -q, --quiet suppress most messages --help display this help and exit --version output version information and exit Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") or send email to . _EOF_ exit; } ## Sub for printing error messages sub print_error { print STDERR "Try `${PROGRAM} --help' for more information.\n"; exit; } sub print_message { print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; } sub preparation { $PO_DIR = $ARGV[0]; $FILE = $ARGV[1]; $OUTFILE = $ARGV[2]; &gather_po_files; &get_translation_database; } # General-purpose code for looking up translations in .po files sub po_file2lang { my ($tmp) = @_; $tmp =~ s/^.*\/(.*)\.po$/$1/; return $tmp; } sub gather_po_files { for my $po_file (glob "$PO_DIR/*.po") { $po_files_by_lang{po_file2lang($po_file)} = $po_file; } } sub get_local_charset { my ($encoding) = @_; my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias"; # seek character encoding aliases in charset.alias (glib) if (open CHARSET_ALIAS, $alias_file) { while () { next if /^\#/; return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) } close CHARSET_ALIAS; } # if not found, return input string return $encoding; } sub get_po_encoding { my ($in_po_file) = @_; my $encoding = ""; open IN_PO_FILE, $in_po_file or die; while () { ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) { $encoding = $1; last; } } close IN_PO_FILE; if (!$encoding) { print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; $encoding = "ISO-8859-1"; } system ("$iconv -f $encoding -t UTF-8 /dev/null"); if ($?) { $encoding = get_local_charset($encoding); } return $encoding } sub utf8_sanity_check { if (!$UTF8_ARG) { if (!$PASS_THROUGH_ARG) { $PASS_THROUGH_ARG="1"; } } } sub get_translation_database { if ($cache_file) { &get_cached_translation_database; } else { &create_translation_database; } } sub get_newest_po_age { my $newest_age; foreach my $file (values %po_files_by_lang) { my $file_age = -M $file; $newest_age = $file_age if !$newest_age || $file_age < $newest_age; } $newest_age = 0 if !$newest_age; return $newest_age; } sub create_cache { print "Generating and caching the translation database\n" unless $QUIET_ARG; &create_translation_database; open CACHE, ">$cache_file" || die; print CACHE join "\x01", %translations; close CACHE; } sub load_cache { print "Found cached translation database\n" unless $QUIET_ARG; my $contents; open CACHE, "<$cache_file" || die; { local $/; $contents = ; } close CACHE; %translations = split "\x01", $contents; } sub get_cached_translation_database { my $cache_file_age = -M $cache_file; if (defined $cache_file_age) { if ($cache_file_age <= &get_newest_po_age) { &load_cache; return; } print "Found too-old cached translation database\n" unless $QUIET_ARG; } &create_cache; } sub create_translation_database { for my $lang (keys %po_files_by_lang) { my $po_file = $po_files_by_lang{$lang}; if ($UTF8_ARG) { my $encoding = get_po_encoding ($po_file); if (lc $encoding eq "utf-8") { open PO_FILE, "<$po_file"; } else { print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; } } else { open PO_FILE, "<$po_file"; } my $nextfuzzy = 0; my $inmsgid = 0; my $inmsgstr = 0; my $msgid = ""; my $msgstr = ""; while () { $nextfuzzy = 1 if /^#, fuzzy/; if (/^msgid "((\\.|[^\\])*)"/ ) { $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; $msgid = ""; $msgstr = ""; if ($nextfuzzy) { $inmsgid = 0; } else { $msgid = unescape_po_string($1); $inmsgid = 1; } $inmsgstr = 0; $nextfuzzy = 0; } if (/^msgstr "((\\.|[^\\])*)"/) { $msgstr = unescape_po_string($1); $inmsgstr = 1; $inmsgid = 0; } if (/^"((\\.|[^\\])*)"/) { $msgid .= unescape_po_string($1) if $inmsgid; $msgstr .= unescape_po_string($1) if $inmsgstr; } } $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; } } sub finalize { } sub unescape_one_sequence { my ($sequence) = @_; return "\\" if $sequence eq "\\\\"; return "\"" if $sequence eq "\\\""; return "\n" if $sequence eq "\\n"; # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal), # \xXX (hex) and has a comment saying they want to handle \u and \U. return $sequence; } sub unescape_po_string { my ($string) = @_; $string =~ s/(\\.)/unescape_one_sequence($1)/eg; return $string; } ## NOTE: deal with < - < but not > - > because it seems its ok to have ## > in the entity. For further info please look at #84738. sub entity_decode { local ($_) = @_; s/'/'/g; # ' s/"/"/g; # " s/&/&/g; s/</ 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60) { # the ($_ > 127) should probably be removed return "&#" . $_ . ";"; } else { return chr $_; } } sub entity_encoded_translation { my ($lang, $string) = @_; my $translation = $translations{$lang, $string}; return $string if !$translation; return entity_encode ($translation); } ## XML (bonobo-activation specific) merge code sub ba_merge_translations { my $source; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) { print OUTPUT $1; my $node = $2 . "\n"; my @strings = (); $_ = $node; while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { push @strings, entity_decode($3); } print OUTPUT; my %langs; for my $string (@strings) { for my $lang (keys %po_files_by_lang) { $langs{$lang} = 1 if $translations{$lang, $string}; } } for my $lang (sort keys %langs) { $_ = $node; s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; print OUTPUT; } } print OUTPUT $source; close OUTPUT; } ## XML (non-bonobo-activation) merge code sub xml_merge_translations { my $source; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">$OUTFILE" or die; # FIXME: support attribute translations # Empty nodes never need translation, so unmark all of them. # For example, <_foo/> is just replaced by . $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; # Support for <_foo>blah style translations. while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) { print OUTPUT $1; my $spaces = $2; my $tag = $3; my $string = $4; print OUTPUT "$spaces<$tag>$string\n"; $string =~ s/\s+/ /g; $string =~ s/^ //; $string =~ s/ $//; $string = entity_decode($string); for my $lang (sort keys %po_files_by_lang) { my $translation = $translations{$lang, $string}; next if !$translation; $translation = entity_encode($translation); print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation\n"; } } print OUTPUT $source; close OUTPUT; } sub keys_merge_translations { open INPUT, "<${FILE}" or die; open OUTPUT, ">${OUTFILE}" or die; while () { if (s/^(\s*)_(\w+=(.*))/$1$2/) { my $string = $3; print OUTPUT; my $non_translated_line = $_; for my $lang (sort keys %po_files_by_lang) { my $translation = $translations{$lang, $string}; next if !$translation; $_ = $non_translated_line; s/(\w+)=.*/[$lang]$1=$translation/; print OUTPUT; } } else { print OUTPUT; } } close OUTPUT; close INPUT; } sub desktop_merge_translations { open INPUT, "<${FILE}" or die; open OUTPUT, ">${OUTFILE}" or die; while () { if (s/^(\s*)_(\w+=(.*))/$1$2/) { my $string = $3; print OUTPUT; my $non_translated_line = $_; for my $lang (sort keys %po_files_by_lang) { my $translation = $translations{$lang, $string}; next if !$translation; $_ = $non_translated_line; s/(\w+)=.*/${1}[$lang]=$translation/; print OUTPUT; } } else { print OUTPUT; } } close OUTPUT; close INPUT; } sub schemas_merge_translations { my $source; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">$OUTFILE" or die; # FIXME: support attribute translations # Empty nodes never need translation, so unmark all of them. # For example, <_foo/> is just replaced by . $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; while ($source =~ s/ (.*?) (\s+)((\s*) (\s*(.*?)\s*<\/default>)?(\s*) (\s*(.*?)\s*<\/short>)?(\s*) (\s*(.*?)\s*<\/long>)?(\s*) <\/locale>) //sx) { print OUTPUT $1; my $locale_start_spaces = $2 ? $2 : ''; my $default_spaces = $4 ? $4 : ''; my $short_spaces = $7 ? $7 : ''; my $long_spaces = $10 ? $10 : ''; my $locale_end_spaces = $13 ? $13 : ''; my $c_default_block = $3 ? $3 : ''; my $default_string = $6 ? $6 : ''; my $short_string = $9 ? $9 : ''; my $long_string = $12 ? $12 : ''; $c_default_block =~ s/default>\[.*?\]/default>/s; print OUTPUT "$locale_start_spaces$c_default_block"; $default_string =~ s/\s+/ /g; $default_string = entity_decode($default_string); $short_string =~ s/\s+/ /g; $short_string = entity_decode($short_string); $long_string =~ s/\s+/ /g; $long_string = entity_decode($long_string); for my $lang (sort keys %po_files_by_lang) { my $default_translation = $translations{$lang, $default_string}; my $short_translation = $translations{$lang, $short_string}; my $long_translation = $translations{$lang, $long_string}; next if (!$default_translation && !$short_translation && !$long_translation); print OUTPUT "\n$locale_start_spaces"; print OUTPUT "$default_spaces"; if ($default_translation) { $default_translation = entity_encode($default_translation); print OUTPUT "$default_translation"; } print OUTPUT "$short_spaces"; if ($short_translation) { $short_translation = entity_encode($short_translation); print OUTPUT "$short_translation"; } print OUTPUT "$long_spaces"; if ($long_translation) { $long_translation = entity_encode($long_translation); print OUTPUT "$long_translation"; } print OUTPUT "$locale_end_spaces"; } } print OUTPUT $source; close OUTPUT; } sub rfc822deb_merge_translations { my $source; $Text::Wrap::huge = 'overflow'; { local $/; # slurp mode open INPUT, "<$FILE" or die "can't open $FILE: $!"; $source = ; close INPUT; } open OUTPUT, ">${OUTFILE}" or die; while ($source =~ /(^|\n+)(_)?([^:_\n]+)(:\s*)(.*?)(?=\n[\S\n]|$)/sg) { my $sep = $1; my $non_translated_line = $3.$4; my $string = $5; my $is_translatable = defined($2); # Remove [] dummy strings $string =~ s/\[\s[^\[\]]*\]$//; $non_translated_line .= $string; print OUTPUT $sep.$non_translated_line; if ($is_translatable) { my @str_list = rfc822deb_split($string); for my $lang (sort keys %po_files_by_lang) { my $is_translated = 1; my $str_translated = ''; my $first = 1; for my $str (@str_list) { my $translation = $translations{$lang, $str}; if (!$translation) { $is_translated = 0; last; } # $translation may also contain [] dummy # strings, mostly to indicate an empty string $translation =~ s/\[\s[^\[\]]*\]$//; if ($first) { $str_translated .= Text::Tabs::expand($translation) . "\n"; } else { $str_translated .= Text::Tabs::expand( Text::Wrap::wrap(' ', ' ', $translation)) . "\n .\n"; } $first = 0; # To fix some problems with Text::Wrap::wrap $str_translated =~ s/(\n )+\n/\n .\n/g; } next unless $is_translated; $str_translated =~ s/\n \.\n$//; $str_translated =~ s/\s+$//; $_ = $non_translated_line; s/^(\w+):\s*.*/$sep${1}-$lang: $str_translated/s; print OUTPUT; } } } print OUTPUT "\n"; close OUTPUT; close INPUT; } sub rfc822deb_split { # Debian defines a special way to deal with rfc822-style files: # when a value contain newlines, it consists of # 1. a short form (first line) # 2. a long description, all lines begin with a space, # and paragraphs are separated by a single dot on a line # This routine returns an array of all paragraphs, and reformat # them. my $text = shift; $text =~ s/^ //mg; return ($text) if $text !~ /\n/; $text =~ s/([^\n]*)\n//; my @list = ($1); my $str = ''; for my $line (split (/\n/, $text)) { chomp $line; $line =~ /\s+$/; if ($line =~ /^\.$/) { # New paragraph $str =~ s/\s*$//; push(@list, $str); $str = ''; } elsif ($line =~ /^\s/) { # Line which must not be reformatted $str .= "\n" if length ($str) && $str !~ /\n$/; $str .= $line."\n"; } else { # Continuation line, remove newline $str .= " " if length ($str) && $str !~ /[\n ]$/; $str .= $line; } } $str =~ s/\s*$//; push(@list, $str) if length ($str); return @list; }