intltool-merge

   1 #!/scratchbox/tools/bin/perl -w
   2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
   3
   4 #
   5 #  The Intltool Message Merger
   6 #
   7 #  Copyright (C) 2000, 2003 Free Software Foundation.
   8 #  Copyright (C) 2000, 2001 Eazel, Inc
   9 #
  10 #  Intltool is free software; you can redistribute it and/or
  11 #  modify it under the terms of the GNU General Public License
  12 #  version 2 published by the Free Software Foundation.
  13 #
  14 #  Intltool is distributed in the hope that it will be useful,
  15 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 #  General Public License for more details.
  18 #
  19 #  You should have received a copy of the GNU General Public License
  20 #  along with this program; if not, write to the Free Software
  21 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 #
  23 #  As a special exception to the GNU General Public License, if you
  24 #  distribute this file as part of a program that contains a
  25 #  configuration script generated by Autoconf, you may include it under
  26 #  the same distribution terms that you use for the rest of that program.
  27 #
  28 #  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
  29 #            Kenneth Christiansen <kenneth@gnu.org>
  30 #            Darin Adler <darin@bentspoon.com>
  31 #
  32 #  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  33 #
  34
  35 ## Release information
  36 my $PROGRAM = "intltool-merge";
  37 my $PACKAGE = "intltool";
  38 my $VERSION = "0.35.0";
  39
  40 ## Loaded modules
  41 use strict;
  42 use Getopt::Long;
  43 use Text::Wrap;
  44 use File::Basename;
  45
  46 my $must_end_tag      = -1;
  47 my $last_depth        = -1;
  48 my $translation_depth = -1;
  49 my @tag_stack = ();
  50 my @entered_tag = ();
  51 my @translation_strings = ();
  52 my $leading_space = "";
  53
  54 ## Scalars used by the option stuff
  55 my $HELP_ARG = 0;
  56 my $VERSION_ARG = 0;
  57 my $BA_STYLE_ARG = 0;
  58 my $XML_STYLE_ARG = 0;
  59 my $KEYS_STYLE_ARG = 0;
  60 my $DESKTOP_STYLE_ARG = 0;
  61 my $SCHEMAS_STYLE_ARG = 0;
  62 my $RFC822DEB_STYLE_ARG = 0;
  63 my $QUIET_ARG = 0;
  64 my $PASS_THROUGH_ARG = 0;
  65 my $UTF8_ARG = 0;
  66 my $MULTIPLE_OUTPUT = 0;
  67 my $cache_file;
  68
  69 ## Handle options
  70 GetOptions
  71 (
  72  "help" => \$HELP_ARG,
  73  "version" => \$VERSION_ARG,
  74  "quiet|q" => \$QUIET_ARG,
  75  "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  76  "ba-style|b" => \$BA_STYLE_ARG,
  77  "xml-style|x" => \$XML_STYLE_ARG,
  78  "keys-style|k" => \$KEYS_STYLE_ARG,
  79  "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  80  "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  81  "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
  82  "pass-through|p" => \$PASS_THROUGH_ARG,
  83  "utf8|u" => \$UTF8_ARG,
  84  "multiple-output|m" => \$MULTIPLE_OUTPUT,
  85  "cache|c=s" => \$cache_file
  86  ) or &error;
  87
  88 my $PO_DIR;
  89 my $FILE;
  90 my $OUTFILE;
  91
  92 my %po_files_by_lang = ();
  93 my %translations = ();
  94 my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
  95 my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
  96
  97 # Use this instead of \w for XML files to handle more possible characters.
  98 my $w = "[-A-Za-z0-9._:]";
  99
 100 # XML quoted string contents
 101 my $q = "[^\\\"]*";
 102
 103 ## Check for options.
 104
 105 if ($VERSION_ARG)
 106 {
 107         &print_version;
 108 }
 109 elsif ($HELP_ARG)
 110 {
 111         &print_help;
 112 }
 113 elsif ($BA_STYLE_ARG && @ARGV > 2)
 114 {
 115         &utf8_sanity_check;
 116         &preparation;
 117         &print_message;
 118         &ba_merge_translations;
 119         &finalize;
 120 }
 121 elsif ($XML_STYLE_ARG && @ARGV > 2)
 122 {
 123         &utf8_sanity_check;
 124         &preparation;
 125         &print_message;
 126         &xml_merge_output;
 127         &finalize;
 128 }
 129 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
 130 {
 131         &utf8_sanity_check;
 132         &preparation;
 133         &print_message;
 134         &keys_merge_translations;
 135         &finalize;
 136 }
 137 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
 138 {
 139         &utf8_sanity_check;
 140         &preparation;
 141         &print_message;
 142         &desktop_merge_translations;
 143         &finalize;
 144 }
 145 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
 146 {
 147         &utf8_sanity_check;
 148         &preparation;
 149         &print_message;
 150         &schemas_merge_translations;
 151         &finalize;
 152 }
 153 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
 154 {
 155         &preparation;
 156         &print_message;
 157         &rfc822deb_merge_translations;
 158         &finalize;
 159 }
 160 else
 161 {
 162         &print_help;
 163 }
 164
 165 exit;
 166
 167 ## Sub for printing release information
 168 sub print_version
 169 {
 170     print <<_EOF_;
 171 ${PROGRAM} (${PACKAGE}) ${VERSION}
 172 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
 173
 174 Copyright (C) 2000-2003 Free Software Foundation, Inc.
 175 Copyright (C) 2000-2001 Eazel, Inc.
 176 This is free software; see the source for copying conditions.  There is NO
 177 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 178 _EOF_
 179     exit;
 180 }
 181
 182 ## Sub for printing usage information
 183 sub print_help
 184 {
 185     print <<_EOF_;
 186 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
 187 Generates an output file that includes some localized attributes from an
 188 untranslated source file.
 189
 190 Mandatory options: (exactly one must be specified)
 191   -b, --ba-style         includes translations in the bonobo-activation style
 192   -d, --desktop-style    includes translations in the desktop style
 193   -k, --keys-style       includes translations in the keys style
 194   -s, --schemas-style    includes translations in the schemas style
 195   -r, --rfc822deb-style  includes translations in the RFC822 style
 196   -x, --xml-style        includes translations in the standard xml style
 197
 198 Other options:
 199   -u, --utf8             convert all strings to UTF-8 before merging
 200                          (default for everything except RFC822 style)
 201   -p, --pass-through     deprecated, does nothing and issues a warning
 202   -m, --multiple-output  output one localized file per locale, instead of
 203                          a single file containing all localized elements
 204   -c, --cache=FILE       specify cache file name
 205                          (usually \$top_builddir/po/.intltool-merge-cache)
 206   -q, --quiet            suppress most messages
 207       --help             display this help and exit
 208       --version          output version information and exit
 209
 210 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
 211 or send email to <xml-i18n-tools\@gnome.org>.
 212 _EOF_
 213     exit;
 214 }
 215
 216
 217 ## Sub for printing error messages
 218 sub print_error
 219 {
 220     print STDERR "Try `${PROGRAM} --help' for more information.\n";
 221     exit;
 222 }
 223
 224
 225 sub print_message
 226 {
 227     print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
 228 }
 229
 230
 231 sub preparation
 232 {
 233     $PO_DIR = $ARGV[0];
 234     $FILE = $ARGV[1];
 235     $OUTFILE = $ARGV[2];
 236
 237     &gather_po_files;
 238     &get_translation_database;
 239 }
 240
 241 # General-purpose code for looking up translations in .po files
 242
 243 sub po_file2lang
 244 {
 245     my ($tmp) = @_;
 246     $tmp =~ s/^.*\/(.*)\.po$/$1/;
 247     return $tmp;
 248 }
 249
 250 sub gather_po_files
 251 {
 252     for my $po_file (glob "$PO_DIR/*.po") {
 253         $po_files_by_lang{po_file2lang($po_file)} = $po_file;
 254     }
 255 }
 256
 257 sub get_local_charset
 258 {
 259     my ($encoding) = @_;
 260     my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
 261
 262     # seek character encoding aliases in charset.alias (glib)
 263
 264     if (open CHARSET_ALIAS, $alias_file)
 265     {
 266         while (<CHARSET_ALIAS>)
 267         {
 268             next if /^\#/;
 269             return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
 270         }
 271
 272         close CHARSET_ALIAS;
 273     }
 274
 275     # if not found, return input string
 276
 277     return $encoding;
 278 }
 279
 280 sub get_po_encoding
 281 {
 282     my ($in_po_file) = @_;
 283     my $encoding = "";
 284
 285     open IN_PO_FILE, $in_po_file or die;
 286     while (<IN_PO_FILE>)
 287     {
 288         ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
 289         if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
 290         {
 291             $encoding = $1;
 292             last;
 293         }
 294     }
 295     close IN_PO_FILE;
 296
 297     if (!$encoding)
 298     {
 299         print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
 300         $encoding = "ISO-8859-1";
 301     }
 302
 303     system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
 304     if ($?) {
 305         $encoding = get_local_charset($encoding);
 306     }
 307
 308     return $encoding
 309 }
 310
 311 sub utf8_sanity_check
 312 {
 313     print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
 314     $UTF8_ARG = 1;
 315 }
 316
 317 sub get_translation_database
 318 {
 319     if ($cache_file) {
 320         &get_cached_translation_database;
 321     } else {
 322         &create_translation_database;
 323     }
 324 }
 325
 326 sub get_newest_po_age
 327 {
 328     my $newest_age;
 329
 330     foreach my $file (values %po_files_by_lang)
 331     {
 332         my $file_age = -M $file;
 333         $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
 334     }
 335
 336     $newest_age = 0 if !$newest_age;
 337
 338     return $newest_age;
 339 }
 340
 341 sub create_cache
 342 {
 343     print "Generating and caching the translation database\n" unless $QUIET_ARG;
 344
 345     &create_translation_database;
 346
 347     open CACHE, ">$cache_file" || die;
 348     print CACHE join "\x01", %translations;
 349     close CACHE;
 350 }
 351
 352 sub load_cache
 353 {
 354     print "Found cached translation database\n" unless $QUIET_ARG;
 355
 356     my $contents;
 357     open CACHE, "<$cache_file" || die;
 358     {
 359         local $/;
 360         $contents = <CACHE>;
 361     }
 362     close CACHE;
 363     %translations = split "\x01", $contents;
 364 }
 365
 366 sub get_cached_translation_database
 367 {
 368     my $cache_file_age = -M $cache_file;
 369     if (defined $cache_file_age)
 370     {
 371         if ($cache_file_age <= &get_newest_po_age)
 372         {
 373             &load_cache;
 374             return;
 375         }
 376         print "Found too-old cached translation database\n" unless $QUIET_ARG;
 377     }
 378
 379     &create_cache;
 380 }
 381
 382 sub create_translation_database
 383 {
 384     for my $lang (keys %po_files_by_lang)
 385     {
 386         my $po_file = $po_files_by_lang{$lang};
 387
 388         if ($UTF8_ARG)
 389         {
 390             my $encoding = get_po_encoding ($po_file);
 391
 392             if (lc $encoding eq "utf-8")
 393             {
 394                 open PO_FILE, "<$po_file";
 395             }
 396             else
 397             {
 398                 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
 399
 400                 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
 401             }
 402         }
 403         else
 404         {
 405             open PO_FILE, "<$po_file";
 406         }
 407
 408         my $nextfuzzy = 0;
 409         my $inmsgid = 0;
 410         my $inmsgstr = 0;
 411         my $msgid = "";
 412         my $msgstr = "";
 413
 414         while (<PO_FILE>)
 415         {
 416             $nextfuzzy = 1 if /^#, fuzzy/;
 417
 418             if (/^msgid "((\\.|[^\\])*)"/ )
 419             {
 420                 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 421                 $msgid = "";
 422                 $msgstr = "";
 423
 424                 if ($nextfuzzy) {
 425                     $inmsgid = 0;
 426                 } else {
 427                     $msgid = unescape_po_string($1);
 428                     $inmsgid = 1;
 429                 }
 430                 $inmsgstr = 0;
 431                 $nextfuzzy = 0;
 432             }
 433
 434             if (/^msgstr "((\\.|[^\\])*)"/)
 435             {
 436                 $msgstr = unescape_po_string($1);
 437                 $inmsgstr = 1;
 438                 $inmsgid = 0;
 439             }
 440
 441             if (/^"((\\.|[^\\])*)"/)
 442             {
 443                 $msgid .= unescape_po_string($1) if $inmsgid;
 444                 $msgstr .= unescape_po_string($1) if $inmsgstr;
 445             }
 446         }
 447         $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 448     }
 449 }
 450
 451 sub finalize
 452 {
 453 }
 454
 455 sub unescape_one_sequence
 456 {
 457     my ($sequence) = @_;
 458
 459     return "\\" if $sequence eq "\\\\";
 460     return "\"" if $sequence eq "\\\"";
 461     return "\n" if $sequence eq "\\n";
 462     return "\r" if $sequence eq "\\r";
 463     return "\t" if $sequence eq "\\t";
 464     return "\b" if $sequence eq "\\b";
 465     return "\f" if $sequence eq "\\f";
 466     return "\a" if $sequence eq "\\a";
 467     return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
 468
 469     return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
 470     return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
 471
 472     # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
 473
 474     return $sequence;
 475 }
 476
 477 sub unescape_po_string
 478 {
 479     my ($string) = @_;
 480
 481     $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
 482
 483     return $string;
 484 }
 485
 486 ## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
 487 ## > in the entity. For further info please look at #84738.
 488 sub entity_decode
 489 {
 490     local ($_) = @_;
 491
 492     s/&apos;/'/g; # '
 493     s/&quot;/"/g; # "
 494     s/&amp;/&/g;
 495     s/&lt;/</g;
 496
 497     return $_;
 498 }
 499
 500 # entity_encode: (string)
 501 #
 502 # Encode the given string to XML format (encode '<' etc).
 503
 504 sub entity_encode
 505 {
 506     my ($pre_encoded) = @_;
 507
 508     my @list_of_chars = unpack ('C*', $pre_encoded);
 509
 510     # with UTF-8 we only encode minimalistic
 511     return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
 512 }
 513
 514 sub entity_encode_int_minimalist
 515 {
 516     return "&quot;" if $_ == 34;
 517     return "&amp;" if $_ == 38;
 518     return "&apos;" if $_ == 39;
 519     return "&lt;" if $_ == 60;
 520     return chr $_;
 521 }
 522
 523 sub entity_encoded_translation
 524 {
 525     my ($lang, $string) = @_;
 526
 527     my $translation = $translations{$lang, $string};
 528     return $string if !$translation;
 529     return entity_encode ($translation);
 530 }
 531
 532 ## XML (bonobo-activation specific) merge code
 533
 534 sub ba_merge_translations
 535 {
 536     my $source;
 537
 538     {
 539        local $/; # slurp mode
 540        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 541        $source = <INPUT>;
 542        close INPUT;
 543     }
 544
 545     open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
 546     # Binmode so that selftest works ok if using a native Win32 Perl...
 547     binmode (OUTPUT) if $^O eq 'MSWin32';
 548
 549     while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
 550     {
 551         print OUTPUT $1;
 552
 553         my $node = $2 . "\n";
 554
 555         my @strings = ();
 556         $_ = $node;
 557         while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
 558              push @strings, entity_decode($3);
 559         }
 560         print OUTPUT;
 561
 562         my %langs;
 563         for my $string (@strings)
 564         {
 565             for my $lang (keys %po_files_by_lang)
 566             {
 567                 $langs{$lang} = 1 if $translations{$lang, $string};
 568             }
 569         }
 570
 571         for my $lang (sort keys %langs)
 572         {
 573             $_ = $node;
 574             s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
 575             s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
 576             print OUTPUT;
 577         }
 578     }
 579
 580     print OUTPUT $source;
 581
 582     close OUTPUT;
 583 }
 584
 585
 586 ## XML (non-bonobo-activation) merge code
 587
 588
 589 # Process tag attributes
 590 #   Only parameter is a HASH containing attributes -> values mapping
 591 sub getAttributeString
 592 {
 593     my $sub = shift;
 594     my $do_translate = shift || 0;
 595     my $language = shift || "";
 596     my $result = "";
 597     my $translate = shift;
 598     foreach my $e (reverse(sort(keys %{ $sub }))) {
 599         my $key    = $e;
 600         my $string = $sub->{$e};
 601         my $quote = '"';
 602
 603         $string =~ s/^[\s]+//;
 604         $string =~ s/[\s]+$//;
 605
 606         if ($string =~ /^'.*'$/)
 607         {
 608             $quote = "'";
 609         }
 610         $string =~ s/^['"]//g;
 611         $string =~ s/['"]$//g;
 612
 613         if ($do_translate && $key =~ /^_/) {
 614             $key =~ s|^_||g;
 615             if ($language) {
 616                 # Handle translation
 617                 my $decode_string = entity_decode($string);
 618                 my $translation = $translations{$language, $decode_string};
 619                 if ($translation) {
 620                     $translation = entity_encode($translation);
 621                     $string = $translation;
 622                 }
 623                 $$translate = 2;
 624             } else {
 625                  $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
 626             }
 627         }
 628
 629         $result .= " $key=$quote$string$quote";
 630     }
 631     return $result;
 632 }
 633
 634 # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
 635 sub getXMLstring
 636 {
 637     my $ref = shift;
 638     my $spacepreserve = shift || 0;
 639     my @list = @{ $ref };
 640     my $result = "";
 641
 642     my $count = scalar(@list);
 643     my $attrs = $list[0];
 644     my $index = 1;
 645
 646     $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 647     $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
 648
 649     while ($index < $count) {
 650         my $type = $list[$index];
 651         my $content = $list[$index+1];
 652         if (! $type ) {
 653             # We've got CDATA
 654             if ($content) {
 655                 # lets strip the whitespace here, and *ONLY* here
 656                 $content =~ s/\s+/ /gs if (!$spacepreserve);
 657                 $result .= $content;
 658             }
 659         } elsif ( "$type" ne "1" ) {
 660             # We've got another element
 661             $result .= "<$type";
 662             $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
 663             if ($content) {
 664                 my $subresult = getXMLstring($content, $spacepreserve);
 665                 if ($subresult) {
 666                     $result .= ">".$subresult . "</$type>";
 667                 } else {
 668                     $result .= "/>";
 669                 }
 670             } else {
 671                 $result .= "/>";
 672             }
 673         }
 674         $index += 2;
 675     }
 676     return $result;
 677 }
 678
 679 # Translate list of nodes if necessary
 680 sub translate_subnodes
 681 {
 682     my $fh = shift;
 683     my $content = shift;
 684     my $language = shift || "";
 685     my $singlelang = shift || 0;
 686     my $spacepreserve = shift || 0;
 687
 688     my @nodes = @{ $content };
 689
 690     my $count = scalar(@nodes);
 691     my $index = 0;
 692     while ($index < $count) {
 693         my $type = $nodes[$index];
 694         my $rest = $nodes[$index+1];
 695         if ($singlelang) {
 696             my $oldMO = $MULTIPLE_OUTPUT;
 697             $MULTIPLE_OUTPUT = 1;
 698             traverse($fh, $type, $rest, $language, $spacepreserve);
 699             $MULTIPLE_OUTPUT = $oldMO;
 700         } else {
 701             traverse($fh, $type, $rest, $language, $spacepreserve);
 702         }
 703         $index += 2;
 704     }
 705 }
 706
 707 sub isWellFormedXmlFragment
 708 {
 709     my $ret = eval 'require XML::Parser';
 710     if(!$ret) {
 711         die "You must have XML::Parser installed to run $0\n\n";
 712     }
 713
 714     my $fragment = shift;
 715     return 0 if (!$fragment);
 716
 717     $fragment = "<root>$fragment</root>";
 718     my $xp = new XML::Parser(Style => 'Tree');
 719     my $tree = 0;
 720     eval { $tree = $xp->parse($fragment); };
 721     return $tree;
 722 }
 723
 724 sub traverse
 725 {
 726     my $fh = shift;
 727     my $nodename = shift;
 728     my $content = shift;
 729     my $language = shift || "";
 730     my $spacepreserve = shift || 0;
 731
 732     if (!$nodename) {
 733         if ($content =~ /^[\s]*$/) {
 734             $leading_space .= $content;
 735         }
 736         print $fh $content;
 737     } else {
 738         # element
 739         my @all = @{ $content };
 740         my $attrs = shift @all;
 741         my $translate = 0;
 742         my $outattr = getAttributeString($attrs, 1, $language, \$translate);
 743
 744         if ($nodename =~ /^_/) {
 745             $translate = 1;
 746             $nodename =~ s/^_//;
 747         }
 748         my $lookup = '';
 749
 750         $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
 751         $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 752
 753         print $fh "<$nodename", $outattr;
 754         if ($translate) {
 755             $lookup = getXMLstring($content, $spacepreserve);
 756             if (!$spacepreserve) {
 757                 $lookup =~ s/^\s+//s;
 758                 $lookup =~ s/\s+$//s;
 759             }
 760
 761             if ($lookup || $translate == 2) {
 762                 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
 763                 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
 764                     $translation = $lookup if (!$translation);
 765                     print $fh " xml:lang=\"", $language, "\"" if $language;
 766                     print $fh ">";
 767                     if ($translate == 2) {
 768                         translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
 769                     } else {
 770                         print $fh $translation;
 771                     }
 772                     print $fh "</$nodename>";
 773
 774                     return; # this means there will be no same translation with xml:lang="$language"...
 775                             # if we want them both, just remove this "return"
 776                 } else {
 777                     print $fh ">";
 778                     if ($translate == 2) {
 779                         translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
 780                     } else {
 781                         print $fh $lookup;
 782                     }
 783                     print $fh "</$nodename>";
 784                 }
 785             } else {
 786                 print $fh "/>";
 787             }
 788
 789             for my $lang (sort keys %po_files_by_lang) {
 790                     if ($MULTIPLE_OUTPUT && $lang ne "$language") {
 791                         next;
 792                     }
 793                     if ($lang) {
 794                         # Handle translation
 795                         #
 796                         my $translate = 0;
 797                         my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
 798                         my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
 799                         if ($translate && !$translation) {
 800                             $translation = $lookup;
 801                         }
 802
 803                         if ($translation || $translate) {
 804                             print $fh "\n";
 805                             $leading_space =~ s/.*\n//g;
 806                             print $fh $leading_space;
 807                             print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
 808                             if ($translate == 2) {
 809                                translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
 810                             } else {
 811                                 print $fh $translation;
 812                             }
 813                             print $fh "</$nodename>";
 814                         }
 815                     }
 816             }
 817
 818         } else {
 819             my $count = scalar(@all);
 820             if ($count > 0) {
 821                 print $fh ">";
 822                 my $index = 0;
 823                 while ($index < $count) {
 824                     my $type = $all[$index];
 825                     my $rest = $all[$index+1];
 826                     traverse($fh, $type, $rest, $language, $spacepreserve);
 827                     $index += 2;
 828                 }
 829                 print $fh "</$nodename>";
 830             } else {
 831                 print $fh "/>";
 832             }
 833         }
 834     }
 835 }
 836
 837 sub intltool_tree_comment
 838 {
 839     my $expat = shift;
 840     my $data  = shift;
 841     my $clist = $expat->{Curlist};
 842     my $pos   = $#$clist;
 843
 844     push @$clist, 1 => $data;
 845 }
 846
 847 sub intltool_tree_cdatastart
 848 {
 849     my $expat    = shift;
 850     my $clist = $expat->{Curlist};
 851     my $pos   = $#$clist;
 852
 853     push @$clist, 0 => $expat->original_string();
 854 }
 855
 856 sub intltool_tree_cdataend
 857 {
 858     my $expat    = shift;
 859     my $clist = $expat->{Curlist};
 860     my $pos   = $#$clist;
 861
 862     $clist->[$pos] .= $expat->original_string();
 863 }
 864
 865 sub intltool_tree_char
 866 {
 867     my $expat = shift;
 868     my $text  = shift;
 869     my $clist = $expat->{Curlist};
 870     my $pos   = $#$clist;
 871
 872     # Use original_string so that we retain escaped entities
 873     # in CDATA sections.
 874     #
 875     if ($pos > 0 and $clist->[$pos - 1] eq '0') {
 876         $clist->[$pos] .= $expat->original_string();
 877     } else {
 878         push @$clist, 0 => $expat->original_string();
 879     }
 880 }
 881
 882 sub intltool_tree_start
 883 {
 884     my $expat    = shift;
 885     my $tag      = shift;
 886     my @origlist = ();
 887
 888     # Use original_string so that we retain escaped entities
 889     # in attribute values.  We must convert the string to an
 890     # @origlist array to conform to the structure of the Tree
 891     # Style.
 892     #
 893     my @original_array = split /\x/, $expat->original_string();
 894     my $source         = $expat->original_string();
 895
 896     # Remove leading tag.
 897     #
 898     $source =~ s|^\s*<\s*(\S+)||s;
 899
 900     # Grab attribute key/value pairs and push onto @origlist array.
 901     #
 902     while ($source)
 903     {
 904        if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
 905        {
 906            $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
 907            push @origlist, $1;
 908            push @origlist, '"' . $2 . '"';
 909        }
 910        elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
 911        {
 912            $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
 913            push @origlist, $1;
 914            push @origlist, "'" . $2 . "'";
 915        }
 916        else
 917        {
 918            last;
 919        }
 920     }
 921
 922     my $ol = [ { @origlist } ];
 923
 924     push @{ $expat->{Lists} }, $expat->{Curlist};
 925     push @{ $expat->{Curlist} }, $tag => $ol;
 926     $expat->{Curlist} = $ol;
 927 }
 928
 929 sub readXml
 930 {
 931     my $filename = shift || return;
 932     if(!-f $filename) {
 933         die "ERROR Cannot find filename: $filename\n";
 934     }
 935
 936     my $ret = eval 'require XML::Parser';
 937     if(!$ret) {
 938         die "You must have XML::Parser installed to run $0\n\n";
 939     }
 940     my $xp = new XML::Parser(Style => 'Tree');
 941     $xp->setHandlers(Char => \&intltool_tree_char);
 942     $xp->setHandlers(Start => \&intltool_tree_start);
 943     $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
 944     $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
 945     my $tree = $xp->parsefile($filename);
 946
 947 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
 948 # would be:
 949 # [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
 950 # 0, "Howdy",  ref, [{}]], 0, "do" ] ]
 951
 952     return $tree;
 953 }
 954
 955 sub print_header
 956 {
 957     my $infile = shift;
 958     my $fh = shift;
 959     my $source;
 960
 961     if(!-f $infile) {
 962         die "ERROR Cannot find filename: $infile\n";
 963     }
 964
 965     print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
 966     {
 967         local $/;
 968         open DOCINPUT, "<${FILE}" or die;
 969         $source = <DOCINPUT>;
 970         close DOCINPUT;
 971     }
 972     if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
 973     {
 974         print $fh "$1\n";
 975     }
 976     elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
 977     {
 978         print $fh "$1\n";
 979     }
 980 }
 981
 982 sub parseTree
 983 {
 984     my $fh        = shift;
 985     my $ref       = shift;
 986     my $language  = shift || "";
 987
 988     my $name = shift @{ $ref };
 989     my $cont = shift @{ $ref };
 990
 991     while (!$name || "$name" eq "1") {
 992         $name = shift @{ $ref };
 993         $cont = shift @{ $ref };
 994     }
 995
 996     my $spacepreserve = 0;
 997     my $attrs = @{$cont}[0];
 998     $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 999
1000     traverse($fh, $name, $cont, $language, $spacepreserve);
1001 }
1002
1003 sub xml_merge_output
1004 {
1005     my $source;
1006
1007     if ($MULTIPLE_OUTPUT) {
1008         for my $lang (sort keys %po_files_by_lang) {
1009             if ( ! -e $lang ) {
1010                 mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
1011             }
1012             open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1013             binmode (OUTPUT) if $^O eq 'MSWin32';
1014             my $tree = readXml($FILE);
1015             print_header($FILE, \*OUTPUT);
1016             parseTree(\*OUTPUT, $tree, $lang);
1017             close OUTPUT;
1018             print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1019         }
1020     }
1021     open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1022     binmode (OUTPUT) if $^O eq 'MSWin32';
1023     my $tree = readXml($FILE);
1024     print_header($FILE, \*OUTPUT);
1025     parseTree(\*OUTPUT, $tree);
1026     close OUTPUT;
1027     print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1028 }
1029
1030 sub keys_merge_translations
1031 {
1032     open INPUT, "<${FILE}" or die;
1033     open OUTPUT, ">${OUTFILE}" or die;
1034     binmode (OUTPUT) if $^O eq 'MSWin32';
1035
1036     while (<INPUT>)
1037     {
1038         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1039         {
1040             my $string = $3;
1041
1042             print OUTPUT;
1043
1044             my $non_translated_line = $_;
1045
1046             for my $lang (sort keys %po_files_by_lang)
1047             {
1048                 my $translation = $translations{$lang, $string};
1049                 next if !$translation;
1050
1051                 $_ = $non_translated_line;
1052                 s/(\w+)=.*/[$lang]$1=$translation/;
1053                 print OUTPUT;
1054             }
1055         }
1056         else
1057         {
1058             print OUTPUT;
1059         }
1060     }
1061
1062     close OUTPUT;
1063     close INPUT;
1064 }
1065
1066 sub desktop_merge_translations
1067 {
1068     open INPUT, "<${FILE}" or die;
1069     open OUTPUT, ">${OUTFILE}" or die;
1070     binmode (OUTPUT) if $^O eq 'MSWin32';
1071
1072     while (<INPUT>)
1073     {
1074         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1075         {
1076             my $string = $3;
1077
1078             print OUTPUT;
1079
1080             my $non_translated_line = $_;
1081
1082             for my $lang (sort keys %po_files_by_lang)
1083             {
1084                 my $translation = $translations{$lang, $string};
1085                 next if !$translation;
1086
1087                 $_ = $non_translated_line;
1088                 s/(\w+)=.*/${1}[$lang]=$translation/;
1089                 print OUTPUT;
1090             }
1091         }
1092         else
1093         {
1094             print OUTPUT;
1095         }
1096     }
1097
1098     close OUTPUT;
1099     close INPUT;
1100 }
1101
1102 sub schemas_merge_translations
1103 {
1104     my $source;
1105
1106     {
1107        local $/; # slurp mode
1108        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1109        $source = <INPUT>;
1110        close INPUT;
1111     }
1112
1113     open OUTPUT, ">$OUTFILE" or die;
1114     binmode (OUTPUT) if $^O eq 'MSWin32';
1115
1116     # FIXME: support attribute translations
1117
1118     # Empty nodes never need translation, so unmark all of them.
1119     # For example, <_foo/> is just replaced by <foo/>.
1120     $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1121
1122     while ($source =~ s/
1123                         (.*?)
1124                         (\s+)(<locale\ name="C">(\s*)
1125                             (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1126                             (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1127                             (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1128                         <\/locale>)
1129                        //sx)
1130     {
1131         print OUTPUT $1;
1132
1133         my $locale_start_spaces = $2 ? $2 : '';
1134         my $default_spaces = $4 ? $4 : '';
1135         my $short_spaces = $7 ? $7 : '';
1136         my $long_spaces = $10 ? $10 : '';
1137         my $locale_end_spaces = $13 ? $13 : '';
1138         my $c_default_block = $3 ? $3 : '';
1139         my $default_string = $6 ? $6 : '';
1140         my $short_string = $9 ? $9 : '';
1141         my $long_string = $12 ? $12 : '';
1142
1143         print OUTPUT "$locale_start_spaces$c_default_block";
1144
1145         $default_string =~ s/\s+/ /g;
1146         $default_string = entity_decode($default_string);
1147         $short_string =~ s/\s+/ /g;
1148         $short_string = entity_decode($short_string);
1149         $long_string =~ s/\s+/ /g;
1150         $long_string = entity_decode($long_string);
1151
1152         for my $lang (sort keys %po_files_by_lang)
1153         {
1154             my $default_translation = $translations{$lang, $default_string};
1155             my $short_translation = $translations{$lang, $short_string};
1156             my $long_translation  = $translations{$lang, $long_string};
1157
1158             next if (!$default_translation && !$short_translation &&
1159                      !$long_translation);
1160
1161             print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1162
1163         print OUTPUT "$default_spaces";
1164
1165         if ($default_translation)
1166         {
1167             $default_translation = entity_encode($default_translation);
1168             print OUTPUT "<default>$default_translation</default>";
1169         }
1170
1171             print OUTPUT "$short_spaces";
1172
1173             if ($short_translation)
1174             {
1175                         $short_translation = entity_encode($short_translation);
1176                         print OUTPUT "<short>$short_translation</short>";
1177             }
1178
1179             print OUTPUT "$long_spaces";
1180
1181             if ($long_translation)
1182             {
1183                         $long_translation = entity_encode($long_translation);
1184                         print OUTPUT "<long>$long_translation</long>";
1185             }
1186
1187             print OUTPUT "$locale_end_spaces</locale>";
1188         }
1189     }
1190
1191     print OUTPUT $source;
1192
1193     close OUTPUT;
1194 }
1195
1196 sub rfc822deb_merge_translations
1197 {
1198     my %encodings = ();
1199     for my $lang (keys %po_files_by_lang) {
1200         $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1201     }
1202
1203     my $source;
1204
1205     $Text::Wrap::huge = 'overflow';
1206     $Text::Wrap::break = qr/\n|\s(?=\S)/;
1207
1208     {
1209        local $/; # slurp mode
1210        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1211        $source = <INPUT>;
1212        close INPUT;
1213     }
1214
1215     open OUTPUT, ">${OUTFILE}" or die;
1216     binmode (OUTPUT) if $^O eq 'MSWin32';
1217
1218     while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1219     {
1220             my $sep = $1;
1221             my $non_translated_line = $3.$4;
1222             my $string = $5;
1223             my $underscore = length($2);
1224             next if $underscore eq 0 && $non_translated_line =~ /^#/;
1225             #  Remove [] dummy strings
1226             my $stripped = $string;
1227             $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1228             $stripped =~ s/\[\s[^\[\]]*\]$//;
1229             $non_translated_line .= $stripped;
1230
1231             print OUTPUT $sep.$non_translated_line;
1232
1233             if ($underscore)
1234             {
1235                 my @str_list = rfc822deb_split($underscore, $string);
1236
1237                 for my $lang (sort keys %po_files_by_lang)
1238                 {
1239                     my $is_translated = 1;
1240                     my $str_translated = '';
1241                     my $first = 1;
1242
1243                     for my $str (@str_list)
1244                     {
1245                         my $translation = $translations{$lang, $str};
1246
1247                         if (!$translation)
1248                         {
1249                             $is_translated = 0;
1250                             last;
1251                         }
1252
1253                         #  $translation may also contain [] dummy
1254                         #  strings, mostly to indicate an empty string
1255                         $translation =~ s/\[\s[^\[\]]*\]$//;
1256
1257                         if ($first)
1258                         {
1259                             if ($underscore eq 2)
1260                             {
1261                                 $str_translated .= $translation;
1262                             }
1263                             else
1264                             {
1265                                 $str_translated .=
1266                                     Text::Tabs::expand($translation) .
1267                                     "\n";
1268                             }
1269                         }
1270                         else
1271                         {
1272                             if ($underscore eq 2)
1273                             {
1274                                 $str_translated .= ', ' . $translation;
1275                             }
1276                             else
1277                             {
1278                                 $str_translated .= Text::Tabs::expand(
1279                                     Text::Wrap::wrap(' ', ' ', $translation)) .
1280                                     "\n .\n";
1281                             }
1282                         }
1283                         $first = 0;
1284
1285                         #  To fix some problems with Text::Wrap::wrap
1286                         $str_translated =~ s/(\n )+\n/\n .\n/g;
1287                     }
1288                     next unless $is_translated;
1289
1290                     $str_translated =~ s/\n \.\n$//;
1291                     $str_translated =~ s/\s+$//;
1292
1293                     $_ = $non_translated_line;
1294                     s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1295                     print OUTPUT;
1296                 }
1297             }
1298     }
1299     print OUTPUT "\n";
1300
1301     close OUTPUT;
1302     close INPUT;
1303 }
1304
1305 sub rfc822deb_split
1306 {
1307     # Debian defines a special way to deal with rfc822-style files:
1308     # when a value contain newlines, it consists of
1309     #   1.  a short form (first line)
1310     #   2.  a long description, all lines begin with a space,
1311     #       and paragraphs are separated by a single dot on a line
1312     # This routine returns an array of all paragraphs, and reformat
1313     # them.
1314     # When first argument is 2, the string is a comma separated list of
1315     # values.
1316     my $type = shift;
1317     my $text = shift;
1318     $text =~ s/^[ \t]//mg;
1319     return (split(/, */, $text, 0)) if $type ne 1;
1320     return ($text) if $text !~ /\n/;
1321
1322     $text =~ s/([^\n]*)\n//;
1323     my @list = ($1);
1324     my $str = '';
1325
1326     for my $line (split (/\n/, $text))
1327     {
1328         chomp $line;
1329         if ($line =~ /^\.\s*$/)
1330         {
1331             #  New paragraph
1332             $str =~ s/\s*$//;
1333             push(@list, $str);
1334             $str = '';
1335         }
1336         elsif ($line =~ /^\s/)
1337         {
1338             #  Line which must not be reformatted
1339             $str .= "\n" if length ($str) && $str !~ /\n$/;
1340             $line =~ s/\s+$//;
1341             $str .= $line."\n";
1342         }
1343         else
1344         {
1345             #  Continuation line, remove newline
1346             $str .= " " if length ($str) && $str !~ /\n$/;
1347             $str .= $line;
1348         }
1349     }
1350
1351     $str =~ s/\s*$//;
1352     push(@list, $str) if length ($str);
1353
1354     return @list;
1355 }
1356