idk
[openDashboard.git] / intltool-merge
blob60b3daef30deecf9bfec91a14793d018294c1d1a
1 #!/usr/bin/perl -w
2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
5 # The Intltool Message Merger
7 # Copyright (C) 2000, 2003 Free Software Foundation.
8 # Copyright (C) 2000, 2001 Eazel, Inc
10 # Intltool is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU General Public License
12 # version 2 published by the Free Software Foundation.
14 # Intltool is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # As a special exception to the GNU General Public License, if you
24 # distribute this file as part of a program that contains a
25 # configuration script generated by Autoconf, you may include it under
26 # the same distribution terms that you use for the rest of that program.
28 # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
29 # Kenneth Christiansen <kenneth@gnu.org>
30 # Darin Adler <darin@bentspoon.com>
32 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
35 ## Release information
36 my $PROGRAM = "intltool-merge";
37 my $PACKAGE = "intltool";
38 my $VERSION = "0.35.5";
40 ## Loaded modules
41 use strict;
42 use Getopt::Long;
43 use Text::Wrap;
44 use File::Basename;
46 my $must_end_tag = -1;
47 my $last_depth = -1;
48 my $translation_depth = -1;
49 my @tag_stack = ();
50 my @entered_tag = ();
51 my @translation_strings = ();
52 my $leading_space = "";
54 ## Scalars used by the option stuff
55 my $HELP_ARG = 0;
56 my $VERSION_ARG = 0;
57 my $BA_STYLE_ARG = 0;
58 my $XML_STYLE_ARG = 0;
59 my $KEYS_STYLE_ARG = 0;
60 my $DESKTOP_STYLE_ARG = 0;
61 my $SCHEMAS_STYLE_ARG = 0;
62 my $RFC822DEB_STYLE_ARG = 0;
63 my $QUOTED_STYLE_ARG = 0;
64 my $QUIET_ARG = 0;
65 my $PASS_THROUGH_ARG = 0;
66 my $UTF8_ARG = 0;
67 my $MULTIPLE_OUTPUT = 0;
68 my $cache_file;
70 ## Handle options
71 GetOptions
73 "help" => \$HELP_ARG,
74 "version" => \$VERSION_ARG,
75 "quiet|q" => \$QUIET_ARG,
76 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
77 "ba-style|b" => \$BA_STYLE_ARG,
78 "xml-style|x" => \$XML_STYLE_ARG,
79 "keys-style|k" => \$KEYS_STYLE_ARG,
80 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
81 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
82 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
83 "quoted-style" => \$QUOTED_STYLE_ARG,
84 "pass-through|p" => \$PASS_THROUGH_ARG,
85 "utf8|u" => \$UTF8_ARG,
86 "multiple-output|m" => \$MULTIPLE_OUTPUT,
87 "cache|c=s" => \$cache_file
88 ) or &error;
90 my $PO_DIR;
91 my $FILE;
92 my $OUTFILE;
94 my %po_files_by_lang = ();
95 my %translations = ();
96 my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
97 my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
99 # Use this instead of \w for XML files to handle more possible characters.
100 my $w = "[-A-Za-z0-9._:]";
102 # XML quoted string contents
103 my $q = "[^\\\"]*";
105 ## Check for options.
107 if ($VERSION_ARG)
109 &print_version;
111 elsif ($HELP_ARG)
113 &print_help;
115 elsif ($BA_STYLE_ARG && @ARGV > 2)
117 &utf8_sanity_check;
118 &preparation;
119 &print_message;
120 &ba_merge_translations;
121 &finalize;
123 elsif ($XML_STYLE_ARG && @ARGV > 2)
125 &utf8_sanity_check;
126 &preparation;
127 &print_message;
128 &xml_merge_output;
129 &finalize;
131 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
133 &utf8_sanity_check;
134 &preparation;
135 &print_message;
136 &keys_merge_translations;
137 &finalize;
139 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
141 &utf8_sanity_check;
142 &preparation;
143 &print_message;
144 &desktop_merge_translations;
145 &finalize;
147 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
149 &utf8_sanity_check;
150 &preparation;
151 &print_message;
152 &schemas_merge_translations;
153 &finalize;
155 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
157 &preparation;
158 &print_message;
159 &rfc822deb_merge_translations;
160 &finalize;
162 elsif ($QUOTED_STYLE_ARG && @ARGV > 2)
164 &utf8_sanity_check;
165 &preparation;
166 &print_message;
167 &quoted_merge_translations;
168 &finalize;
170 else
172 &print_help;
175 exit;
177 ## Sub for printing release information
178 sub print_version
180 print <<_EOF_;
181 ${PROGRAM} (${PACKAGE}) ${VERSION}
182 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
184 Copyright (C) 2000-2003 Free Software Foundation, Inc.
185 Copyright (C) 2000-2001 Eazel, Inc.
186 This is free software; see the source for copying conditions. There is NO
187 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
188 _EOF_
189 exit;
192 ## Sub for printing usage information
193 sub print_help
195 print <<_EOF_;
196 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
197 Generates an output file that includes some localized attributes from an
198 untranslated source file.
200 Mandatory options: (exactly one must be specified)
201 -b, --ba-style includes translations in the bonobo-activation style
202 -d, --desktop-style includes translations in the desktop style
203 -k, --keys-style includes translations in the keys style
204 -s, --schemas-style includes translations in the schemas style
205 -r, --rfc822deb-style includes translations in the RFC822 style
206 --quoted-style includes translations in the quoted string style
207 -x, --xml-style includes translations in the standard xml style
209 Other options:
210 -u, --utf8 convert all strings to UTF-8 before merging
211 (default for everything except RFC822 style)
212 -p, --pass-through deprecated, does nothing and issues a warning
213 -m, --multiple-output output one localized file per locale, instead of
214 a single file containing all localized elements
215 -c, --cache=FILE specify cache file name
216 (usually \$top_builddir/po/.intltool-merge-cache)
217 -q, --quiet suppress most messages
218 --help display this help and exit
219 --version output version information and exit
221 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
222 or send email to <xml-i18n-tools\@gnome.org>.
223 _EOF_
224 exit;
228 ## Sub for printing error messages
229 sub print_error
231 print STDERR "Try `${PROGRAM} --help' for more information.\n";
232 exit;
236 sub print_message
238 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
242 sub preparation
244 $PO_DIR = $ARGV[0];
245 $FILE = $ARGV[1];
246 $OUTFILE = $ARGV[2];
248 &gather_po_files;
249 &get_translation_database;
252 # General-purpose code for looking up translations in .po files
254 sub po_file2lang
256 my ($tmp) = @_;
257 $tmp =~ s/^.*\/(.*)\.po$/$1/;
258 return $tmp;
261 sub gather_po_files
263 for my $po_file (glob "$PO_DIR/*.po") {
264 $po_files_by_lang{po_file2lang($po_file)} = $po_file;
268 sub get_local_charset
270 my ($encoding) = @_;
271 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/local/lib/charset.alias";
273 # seek character encoding aliases in charset.alias (glib)
275 if (open CHARSET_ALIAS, $alias_file)
277 while (<CHARSET_ALIAS>)
279 next if /^\#/;
280 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
283 close CHARSET_ALIAS;
286 # if not found, return input string
288 return $encoding;
291 sub get_po_encoding
293 my ($in_po_file) = @_;
294 my $encoding = "";
296 open IN_PO_FILE, $in_po_file or die;
297 while (<IN_PO_FILE>)
299 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
300 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
302 $encoding = $1;
303 last;
306 close IN_PO_FILE;
308 if (!$encoding)
310 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
311 $encoding = "ISO-8859-1";
314 system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
315 if ($?) {
316 $encoding = get_local_charset($encoding);
319 return $encoding
322 sub utf8_sanity_check
324 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
325 $UTF8_ARG = 1;
328 sub get_translation_database
330 if ($cache_file) {
331 &get_cached_translation_database;
332 } else {
333 &create_translation_database;
337 sub get_newest_po_age
339 my $newest_age;
341 foreach my $file (values %po_files_by_lang)
343 my $file_age = -M $file;
344 $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
347 $newest_age = 0 if !$newest_age;
349 return $newest_age;
352 sub create_cache
354 print "Generating and caching the translation database\n" unless $QUIET_ARG;
356 &create_translation_database;
358 open CACHE, ">$cache_file" || die;
359 print CACHE join "\x01", %translations;
360 close CACHE;
363 sub load_cache
365 print "Found cached translation database\n" unless $QUIET_ARG;
367 my $contents;
368 open CACHE, "<$cache_file" || die;
370 local $/;
371 $contents = <CACHE>;
373 close CACHE;
374 %translations = split "\x01", $contents;
377 sub get_cached_translation_database
379 my $cache_file_age = -M $cache_file;
380 if (defined $cache_file_age)
382 if ($cache_file_age <= &get_newest_po_age)
384 &load_cache;
385 return;
387 print "Found too-old cached translation database\n" unless $QUIET_ARG;
390 &create_cache;
393 sub create_translation_database
395 for my $lang (keys %po_files_by_lang)
397 my $po_file = $po_files_by_lang{$lang};
399 if ($UTF8_ARG)
401 my $encoding = get_po_encoding ($po_file);
403 if (lc $encoding eq "utf-8")
405 open PO_FILE, "<$po_file";
407 else
409 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
411 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
414 else
416 open PO_FILE, "<$po_file";
419 my $nextfuzzy = 0;
420 my $inmsgid = 0;
421 my $inmsgstr = 0;
422 my $msgid = "";
423 my $msgstr = "";
425 while (<PO_FILE>)
427 $nextfuzzy = 1 if /^#, fuzzy/;
429 if (/^msgid "((\\.|[^\\]+)*)"/ )
431 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
432 $msgid = "";
433 $msgstr = "";
435 if ($nextfuzzy) {
436 $inmsgid = 0;
437 } else {
438 $msgid = unescape_po_string($1);
439 $inmsgid = 1;
441 $inmsgstr = 0;
442 $nextfuzzy = 0;
445 if (/^msgstr "((\\.|[^\\]+)*)"/)
447 $msgstr = unescape_po_string($1);
448 $inmsgstr = 1;
449 $inmsgid = 0;
452 if (/^"((\\.|[^\\]+)*)"/)
454 $msgid .= unescape_po_string($1) if $inmsgid;
455 $msgstr .= unescape_po_string($1) if $inmsgstr;
458 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
462 sub finalize
466 sub unescape_one_sequence
468 my ($sequence) = @_;
470 return "\\" if $sequence eq "\\\\";
471 return "\"" if $sequence eq "\\\"";
472 return "\n" if $sequence eq "\\n";
473 return "\r" if $sequence eq "\\r";
474 return "\t" if $sequence eq "\\t";
475 return "\b" if $sequence eq "\\b";
476 return "\f" if $sequence eq "\\f";
477 return "\a" if $sequence eq "\\a";
478 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
480 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
481 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
483 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
485 return $sequence;
488 sub unescape_po_string
490 my ($string) = @_;
492 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
494 return $string;
497 ## NOTE: deal with < - &lt; but not > - &gt; because it seems its ok to have
498 ## > in the entity. For further info please look at #84738.
499 sub entity_decode
501 local ($_) = @_;
503 s/&apos;/'/g; # '
504 s/&quot;/"/g; # "
505 s/&amp;/&/g;
506 s/&lt;/</g;
508 return $_;
511 # entity_encode: (string)
513 # Encode the given string to XML format (encode '<' etc).
515 sub entity_encode
517 my ($pre_encoded) = @_;
519 my @list_of_chars = unpack ('C*', $pre_encoded);
521 # with UTF-8 we only encode minimalistic
522 return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
525 sub entity_encode_int_minimalist
527 return "&quot;" if $_ == 34;
528 return "&amp;" if $_ == 38;
529 return "&apos;" if $_ == 39;
530 return "&lt;" if $_ == 60;
531 return chr $_;
534 sub entity_encoded_translation
536 my ($lang, $string) = @_;
538 my $translation = $translations{$lang, $string};
539 return $string if !$translation;
540 return entity_encode ($translation);
543 ## XML (bonobo-activation specific) merge code
545 sub ba_merge_translations
547 my $source;
550 local $/; # slurp mode
551 open INPUT, "<$FILE" or die "can't open $FILE: $!";
552 $source = <INPUT>;
553 close INPUT;
556 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
557 # Binmode so that selftest works ok if using a native Win32 Perl...
558 binmode (OUTPUT) if $^O eq 'MSWin32';
560 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
562 print OUTPUT $1;
564 my $node = $2 . "\n";
566 my @strings = ();
567 $_ = $node;
568 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
569 push @strings, entity_decode($3);
571 print OUTPUT;
573 my %langs;
574 for my $string (@strings)
576 for my $lang (keys %po_files_by_lang)
578 $langs{$lang} = 1 if $translations{$lang, $string};
582 for my $lang (sort keys %langs)
584 $_ = $node;
585 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
586 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
587 print OUTPUT;
591 print OUTPUT $source;
593 close OUTPUT;
597 ## XML (non-bonobo-activation) merge code
600 # Process tag attributes
601 # Only parameter is a HASH containing attributes -> values mapping
602 sub getAttributeString
604 my $sub = shift;
605 my $do_translate = shift || 0;
606 my $language = shift || "";
607 my $result = "";
608 my $translate = shift;
609 foreach my $e (reverse(sort(keys %{ $sub }))) {
610 my $key = $e;
611 my $string = $sub->{$e};
612 my $quote = '"';
614 $string =~ s/^[\s]+//;
615 $string =~ s/[\s]+$//;
617 if ($string =~ /^'.*'$/)
619 $quote = "'";
621 $string =~ s/^['"]//g;
622 $string =~ s/['"]$//g;
624 if ($do_translate && $key =~ /^_/) {
625 $key =~ s|^_||g;
626 if ($language) {
627 # Handle translation
628 my $decode_string = entity_decode($string);
629 my $translation = $translations{$language, $decode_string};
630 if ($translation) {
631 $translation = entity_encode($translation);
632 $string = $translation;
634 $$translate = 2;
635 } else {
636 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
640 $result .= " $key=$quote$string$quote";
642 return $result;
645 # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
646 sub getXMLstring
648 my $ref = shift;
649 my $spacepreserve = shift || 0;
650 my @list = @{ $ref };
651 my $result = "";
653 my $count = scalar(@list);
654 my $attrs = $list[0];
655 my $index = 1;
657 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
658 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
660 while ($index < $count) {
661 my $type = $list[$index];
662 my $content = $list[$index+1];
663 if (! $type ) {
664 # We've got CDATA
665 if ($content) {
666 # lets strip the whitespace here, and *ONLY* here
667 $content =~ s/\s+/ /gs if (!$spacepreserve);
668 $result .= $content;
670 } elsif ( "$type" ne "1" ) {
671 # We've got another element
672 $result .= "<$type";
673 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
674 if ($content) {
675 my $subresult = getXMLstring($content, $spacepreserve);
676 if ($subresult) {
677 $result .= ">".$subresult . "</$type>";
678 } else {
679 $result .= "/>";
681 } else {
682 $result .= "/>";
685 $index += 2;
687 return $result;
690 # Translate list of nodes if necessary
691 sub translate_subnodes
693 my $fh = shift;
694 my $content = shift;
695 my $language = shift || "";
696 my $singlelang = shift || 0;
697 my $spacepreserve = shift || 0;
699 my @nodes = @{ $content };
701 my $count = scalar(@nodes);
702 my $index = 0;
703 while ($index < $count) {
704 my $type = $nodes[$index];
705 my $rest = $nodes[$index+1];
706 if ($singlelang) {
707 my $oldMO = $MULTIPLE_OUTPUT;
708 $MULTIPLE_OUTPUT = 1;
709 traverse($fh, $type, $rest, $language, $spacepreserve);
710 $MULTIPLE_OUTPUT = $oldMO;
711 } else {
712 traverse($fh, $type, $rest, $language, $spacepreserve);
714 $index += 2;
718 sub isWellFormedXmlFragment
720 my $ret = eval 'require XML::Parser';
721 if(!$ret) {
722 die "You must have XML::Parser installed to run $0\n\n";
725 my $fragment = shift;
726 return 0 if (!$fragment);
728 $fragment = "<root>$fragment</root>";
729 my $xp = new XML::Parser(Style => 'Tree');
730 my $tree = 0;
731 eval { $tree = $xp->parse($fragment); };
732 return $tree;
735 sub traverse
737 my $fh = shift;
738 my $nodename = shift;
739 my $content = shift;
740 my $language = shift || "";
741 my $spacepreserve = shift || 0;
743 if (!$nodename) {
744 if ($content =~ /^[\s]*$/) {
745 $leading_space .= $content;
747 print $fh $content;
748 } else {
749 # element
750 my @all = @{ $content };
751 my $attrs = shift @all;
752 my $translate = 0;
753 my $outattr = getAttributeString($attrs, 1, $language, \$translate);
755 if ($nodename =~ /^_/) {
756 $translate = 1;
757 $nodename =~ s/^_//;
759 my $lookup = '';
761 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
762 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
764 print $fh "<$nodename", $outattr;
765 if ($translate) {
766 $lookup = getXMLstring($content, $spacepreserve);
767 if (!$spacepreserve) {
768 $lookup =~ s/^\s+//s;
769 $lookup =~ s/\s+$//s;
772 if ($lookup || $translate == 2) {
773 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
774 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
775 $translation = $lookup if (!$translation);
776 print $fh " xml:lang=\"", $language, "\"" if $language;
777 print $fh ">";
778 if ($translate == 2) {
779 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
780 } else {
781 print $fh $translation;
783 print $fh "</$nodename>";
785 return; # this means there will be no same translation with xml:lang="$language"...
786 # if we want them both, just remove this "return"
787 } else {
788 print $fh ">";
789 if ($translate == 2) {
790 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
791 } else {
792 print $fh $lookup;
794 print $fh "</$nodename>";
796 } else {
797 print $fh "/>";
800 for my $lang (sort keys %po_files_by_lang) {
801 if ($MULTIPLE_OUTPUT && $lang ne "$language") {
802 next;
804 if ($lang) {
805 # Handle translation
807 my $translate = 0;
808 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
809 my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
810 if ($translate && !$translation) {
811 $translation = $lookup;
814 if ($translation || $translate) {
815 print $fh "\n";
816 $leading_space =~ s/.*\n//g;
817 print $fh $leading_space;
818 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
819 if ($translate == 2) {
820 translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
821 } else {
822 print $fh $translation;
824 print $fh "</$nodename>";
829 } else {
830 my $count = scalar(@all);
831 if ($count > 0) {
832 print $fh ">";
833 my $index = 0;
834 while ($index < $count) {
835 my $type = $all[$index];
836 my $rest = $all[$index+1];
837 traverse($fh, $type, $rest, $language, $spacepreserve);
838 $index += 2;
840 print $fh "</$nodename>";
841 } else {
842 print $fh "/>";
848 sub intltool_tree_comment
850 my $expat = shift;
851 my $data = shift;
852 my $clist = $expat->{Curlist};
853 my $pos = $#$clist;
855 push @$clist, 1 => $data;
858 sub intltool_tree_cdatastart
860 my $expat = shift;
861 my $clist = $expat->{Curlist};
862 my $pos = $#$clist;
864 push @$clist, 0 => $expat->original_string();
867 sub intltool_tree_cdataend
869 my $expat = shift;
870 my $clist = $expat->{Curlist};
871 my $pos = $#$clist;
873 $clist->[$pos] .= $expat->original_string();
876 sub intltool_tree_char
878 my $expat = shift;
879 my $text = shift;
880 my $clist = $expat->{Curlist};
881 my $pos = $#$clist;
883 # Use original_string so that we retain escaped entities
884 # in CDATA sections.
886 if ($pos > 0 and $clist->[$pos - 1] eq '0') {
887 $clist->[$pos] .= $expat->original_string();
888 } else {
889 push @$clist, 0 => $expat->original_string();
893 sub intltool_tree_start
895 my $expat = shift;
896 my $tag = shift;
897 my @origlist = ();
899 # Use original_string so that we retain escaped entities
900 # in attribute values. We must convert the string to an
901 # @origlist array to conform to the structure of the Tree
902 # Style.
904 my @original_array = split /\x/, $expat->original_string();
905 my $source = $expat->original_string();
907 # Remove leading tag.
909 $source =~ s|^\s*<\s*(\S+)||s;
911 # Grab attribute key/value pairs and push onto @origlist array.
913 while ($source)
915 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
917 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
918 push @origlist, $1;
919 push @origlist, '"' . $2 . '"';
921 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
923 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
924 push @origlist, $1;
925 push @origlist, "'" . $2 . "'";
927 else
929 last;
933 my $ol = [ { @origlist } ];
935 push @{ $expat->{Lists} }, $expat->{Curlist};
936 push @{ $expat->{Curlist} }, $tag => $ol;
937 $expat->{Curlist} = $ol;
940 sub readXml
942 my $filename = shift || return;
943 if(!-f $filename) {
944 die "ERROR Cannot find filename: $filename\n";
947 my $ret = eval 'require XML::Parser';
948 if(!$ret) {
949 die "You must have XML::Parser installed to run $0\n\n";
951 my $xp = new XML::Parser(Style => 'Tree');
952 $xp->setHandlers(Char => \&intltool_tree_char);
953 $xp->setHandlers(Start => \&intltool_tree_start);
954 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
955 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
956 my $tree = $xp->parsefile($filename);
958 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
959 # would be:
960 # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
961 # 0, "Howdy", ref, [{}]], 0, "do" ] ]
963 return $tree;
966 sub print_header
968 my $infile = shift;
969 my $fh = shift;
970 my $source;
972 if(!-f $infile) {
973 die "ERROR Cannot find filename: $infile\n";
976 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
978 local $/;
979 open DOCINPUT, "<${FILE}" or die;
980 $source = <DOCINPUT>;
981 close DOCINPUT;
983 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
985 print $fh "$1\n";
987 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
989 print $fh "$1\n";
993 sub parseTree
995 my $fh = shift;
996 my $ref = shift;
997 my $language = shift || "";
999 my $name = shift @{ $ref };
1000 my $cont = shift @{ $ref };
1002 while (!$name || "$name" eq "1") {
1003 $name = shift @{ $ref };
1004 $cont = shift @{ $ref };
1007 my $spacepreserve = 0;
1008 my $attrs = @{$cont}[0];
1009 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
1011 traverse($fh, $name, $cont, $language, $spacepreserve);
1014 sub xml_merge_output
1016 my $source;
1018 if ($MULTIPLE_OUTPUT) {
1019 for my $lang (sort keys %po_files_by_lang) {
1020 if ( ! -d $lang ) {
1021 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1023 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1024 binmode (OUTPUT) if $^O eq 'MSWin32';
1025 my $tree = readXml($FILE);
1026 print_header($FILE, \*OUTPUT);
1027 parseTree(\*OUTPUT, $tree, $lang);
1028 close OUTPUT;
1029 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1032 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1033 binmode (OUTPUT) if $^O eq 'MSWin32';
1034 my $tree = readXml($FILE);
1035 print_header($FILE, \*OUTPUT);
1036 parseTree(\*OUTPUT, $tree);
1037 close OUTPUT;
1038 print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1041 sub keys_merge_translations
1043 open INPUT, "<${FILE}" or die;
1044 open OUTPUT, ">${OUTFILE}" or die;
1045 binmode (OUTPUT) if $^O eq 'MSWin32';
1047 while (<INPUT>)
1049 if (s/^(\s*)_(\w+=(.*))/$1$2/)
1051 my $string = $3;
1053 print OUTPUT;
1055 my $non_translated_line = $_;
1057 for my $lang (sort keys %po_files_by_lang)
1059 my $translation = $translations{$lang, $string};
1060 next if !$translation;
1062 $_ = $non_translated_line;
1063 s/(\w+)=.*/[$lang]$1=$translation/;
1064 print OUTPUT;
1067 else
1069 print OUTPUT;
1073 close OUTPUT;
1074 close INPUT;
1077 sub desktop_merge_translations
1079 open INPUT, "<${FILE}" or die;
1080 open OUTPUT, ">${OUTFILE}" or die;
1081 binmode (OUTPUT) if $^O eq 'MSWin32';
1083 while (<INPUT>)
1085 if (s/^(\s*)_(\w+=(.*))/$1$2/)
1087 my $string = $3;
1089 print OUTPUT;
1091 my $non_translated_line = $_;
1093 for my $lang (sort keys %po_files_by_lang)
1095 my $translation = $translations{$lang, $string};
1096 next if !$translation;
1098 $_ = $non_translated_line;
1099 s/(\w+)=.*/${1}[$lang]=$translation/;
1100 print OUTPUT;
1103 else
1105 print OUTPUT;
1109 close OUTPUT;
1110 close INPUT;
1113 sub schemas_merge_translations
1115 my $source;
1118 local $/; # slurp mode
1119 open INPUT, "<$FILE" or die "can't open $FILE: $!";
1120 $source = <INPUT>;
1121 close INPUT;
1124 open OUTPUT, ">$OUTFILE" or die;
1125 binmode (OUTPUT) if $^O eq 'MSWin32';
1127 # FIXME: support attribute translations
1129 # Empty nodes never need translation, so unmark all of them.
1130 # For example, <_foo/> is just replaced by <foo/>.
1131 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1133 while ($source =~ s/
1134 (.*?)
1135 (\s+)(<locale\ name="C">(\s*)
1136 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1137 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1138 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1139 <\/locale>)
1140 //sx)
1142 print OUTPUT $1;
1144 my $locale_start_spaces = $2 ? $2 : '';
1145 my $default_spaces = $4 ? $4 : '';
1146 my $short_spaces = $7 ? $7 : '';
1147 my $long_spaces = $10 ? $10 : '';
1148 my $locale_end_spaces = $13 ? $13 : '';
1149 my $c_default_block = $3 ? $3 : '';
1150 my $default_string = $6 ? $6 : '';
1151 my $short_string = $9 ? $9 : '';
1152 my $long_string = $12 ? $12 : '';
1154 print OUTPUT "$locale_start_spaces$c_default_block";
1156 $default_string =~ s/\s+/ /g;
1157 $default_string = entity_decode($default_string);
1158 $short_string =~ s/\s+/ /g;
1159 $short_string = entity_decode($short_string);
1160 $long_string =~ s/\s+/ /g;
1161 $long_string = entity_decode($long_string);
1163 for my $lang (sort keys %po_files_by_lang)
1165 my $default_translation = $translations{$lang, $default_string};
1166 my $short_translation = $translations{$lang, $short_string};
1167 my $long_translation = $translations{$lang, $long_string};
1169 next if (!$default_translation && !$short_translation &&
1170 !$long_translation);
1172 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1174 print OUTPUT "$default_spaces";
1176 if ($default_translation)
1178 $default_translation = entity_encode($default_translation);
1179 print OUTPUT "<default>$default_translation</default>";
1182 print OUTPUT "$short_spaces";
1184 if ($short_translation)
1186 $short_translation = entity_encode($short_translation);
1187 print OUTPUT "<short>$short_translation</short>";
1190 print OUTPUT "$long_spaces";
1192 if ($long_translation)
1194 $long_translation = entity_encode($long_translation);
1195 print OUTPUT "<long>$long_translation</long>";
1198 print OUTPUT "$locale_end_spaces</locale>";
1202 print OUTPUT $source;
1204 close OUTPUT;
1207 sub rfc822deb_merge_translations
1209 my %encodings = ();
1210 for my $lang (keys %po_files_by_lang) {
1211 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1214 my $source;
1216 $Text::Wrap::huge = 'overflow';
1217 $Text::Wrap::break = qr/\n|\s(?=\S)/;
1220 local $/; # slurp mode
1221 open INPUT, "<$FILE" or die "can't open $FILE: $!";
1222 $source = <INPUT>;
1223 close INPUT;
1226 open OUTPUT, ">${OUTFILE}" or die;
1227 binmode (OUTPUT) if $^O eq 'MSWin32';
1229 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1231 my $sep = $1;
1232 my $non_translated_line = $3.$4;
1233 my $string = $5;
1234 my $underscore = length($2);
1235 next if $underscore eq 0 && $non_translated_line =~ /^#/;
1236 # Remove [] dummy strings
1237 my $stripped = $string;
1238 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1239 $stripped =~ s/\[\s[^\[\]]*\]$//;
1240 $non_translated_line .= $stripped;
1242 print OUTPUT $sep.$non_translated_line;
1244 if ($underscore)
1246 my @str_list = rfc822deb_split($underscore, $string);
1248 for my $lang (sort keys %po_files_by_lang)
1250 my $is_translated = 1;
1251 my $str_translated = '';
1252 my $first = 1;
1254 for my $str (@str_list)
1256 my $translation = $translations{$lang, $str};
1258 if (!$translation)
1260 $is_translated = 0;
1261 last;
1264 # $translation may also contain [] dummy
1265 # strings, mostly to indicate an empty string
1266 $translation =~ s/\[\s[^\[\]]*\]$//;
1268 if ($first)
1270 if ($underscore eq 2)
1272 $str_translated .= $translation;
1274 else
1276 $str_translated .=
1277 Text::Tabs::expand($translation) .
1278 "\n";
1281 else
1283 if ($underscore eq 2)
1285 $str_translated .= ', ' . $translation;
1287 else
1289 $str_translated .= Text::Tabs::expand(
1290 Text::Wrap::wrap(' ', ' ', $translation)) .
1291 "\n .\n";
1294 $first = 0;
1296 # To fix some problems with Text::Wrap::wrap
1297 $str_translated =~ s/(\n )+\n/\n .\n/g;
1299 next unless $is_translated;
1301 $str_translated =~ s/\n \.\n$//;
1302 $str_translated =~ s/\s+$//;
1304 $_ = $non_translated_line;
1305 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1306 print OUTPUT;
1310 print OUTPUT "\n";
1312 close OUTPUT;
1313 close INPUT;
1316 sub rfc822deb_split
1318 # Debian defines a special way to deal with rfc822-style files:
1319 # when a value contain newlines, it consists of
1320 # 1. a short form (first line)
1321 # 2. a long description, all lines begin with a space,
1322 # and paragraphs are separated by a single dot on a line
1323 # This routine returns an array of all paragraphs, and reformat
1324 # them.
1325 # When first argument is 2, the string is a comma separated list of
1326 # values.
1327 my $type = shift;
1328 my $text = shift;
1329 $text =~ s/^[ \t]//mg;
1330 return (split(/, */, $text, 0)) if $type ne 1;
1331 return ($text) if $text !~ /\n/;
1333 $text =~ s/([^\n]*)\n//;
1334 my @list = ($1);
1335 my $str = '';
1337 for my $line (split (/\n/, $text))
1339 chomp $line;
1340 if ($line =~ /^\.\s*$/)
1342 # New paragraph
1343 $str =~ s/\s*$//;
1344 push(@list, $str);
1345 $str = '';
1347 elsif ($line =~ /^\s/)
1349 # Line which must not be reformatted
1350 $str .= "\n" if length ($str) && $str !~ /\n$/;
1351 $line =~ s/\s+$//;
1352 $str .= $line."\n";
1354 else
1356 # Continuation line, remove newline
1357 $str .= " " if length ($str) && $str !~ /\n$/;
1358 $str .= $line;
1362 $str =~ s/\s*$//;
1363 push(@list, $str) if length ($str);
1365 return @list;
1368 sub quoted_translation
1370 my ($lang, $string) = @_;
1372 $string =~ s/\\\"/\"/g;
1374 my $translation = $translations{$lang, $string};
1375 $translation = $string if !$translation;
1377 $translation =~ s/\"/\\\"/g;
1378 return $translation
1381 sub quoted_merge_translations
1383 if (!$MULTIPLE_OUTPUT) {
1384 print "Quoted only supports Multiple Output.\n";
1385 exit(1);
1388 for my $lang (sort keys %po_files_by_lang) {
1389 if ( ! -d $lang ) {
1390 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1392 open INPUT, "<${FILE}" or die;
1393 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1394 binmode (OUTPUT) if $^O eq 'MSWin32';
1395 while (<INPUT>)
1397 s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . &quoted_translation($lang, $1) . "\""/ge;
1398 print OUTPUT;
1400 close OUTPUT;
1401 close INPUT;