Bug 2505 - Add commented use warnings where missing in the misc/ directory
[koha.git] / misc / translator / tmpl_process3.pl
blobdbbe18e9b398d66fb7a2da759ce5640d17cdc983
1 #!/usr/bin/perl
2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
7 =head1 NAME
9 tmpl_process3.pl - Alternative version of tmpl_process.pl
10 using gettext-compatible translation files
12 =cut
14 use strict;
15 #use warnings; FIXME - Bug 2505
16 use Getopt::Long;
17 use Locale::PO;
18 use File::Temp qw( :POSIX );
19 use TmplTokenizer;
20 use VerboseWarnings qw( :warn :die );
22 ###############################################################################
24 use vars qw( @in_files $in_dir $str_file $out_dir $quiet );
25 use vars qw( @excludes $exclude_regex );
26 use vars qw( $recursive_p );
27 use vars qw( $pedantic_p );
28 use vars qw( $href );
29 use vars qw( $type ); # file extension (DOS form without the dot) to match
30 use vars qw( $charset_in $charset_out );
32 ###############################################################################
34 sub find_translation ($) {
35 my($s) = @_;
36 my $key = $s;
37 if ($s =~ /\S/s) {
38 $key = TmplTokenizer::string_canon($key);
39 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
40 $key = TmplTokenizer::quote_po($key);
42 return defined $href->{$key}
43 && !$href->{$key}->fuzzy
44 && length Locale::PO->dequote($href->{$key}->msgstr)?
45 Locale::PO->dequote($href->{$key}->msgstr): $s;
48 sub text_replace_tag ($$) {
49 my($t, $attr) = @_;
50 my $it;
51 # value [tag=input], meta
52 my $tag = lc($1) if $t =~ /^<(\S+)/s;
53 my $translated_p = 0;
54 for my $a ('alt', 'content', 'title', 'value','label') {
55 if ($attr->{$a}) {
56 next if $a eq 'label' && $tag ne 'optgroup';
57 next if $a eq 'content' && $tag ne 'meta';
58 next if $a eq 'value' && ($tag ne 'input'
59 || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio|text)$/)); # FIXME
60 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
61 if ($val =~ /\S/s) {
62 my $s = find_translation($val);
63 if ($attr->{$a}->[1] ne $s) { #FIXME
64 $attr->{$a}->[1] = $s; # FIXME
65 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
66 $translated_p = 1;
71 if ($translated_p) {
72 $it = "<$tag"
73 . join('', map {
74 sprintf(' %s=%s', $_, $attr->{$_}->[2]) #FIXME
75 } sort {
76 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
77 } keys %$attr)
78 . '>';
79 } else {
80 $it = $t;
82 return $it;
85 sub text_replace (**) {
86 my($h, $output) = @_;
87 for (;;) {
88 my $s = TmplTokenizer::next_token $h;
89 last unless defined $s;
90 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
91 if ($kind eq TmplTokenType::TEXT) {
92 print $output find_translation($t);
93 } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) {
94 my $fmt = find_translation($s->form);
95 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
96 $_ = $_[0];
97 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
98 $kind == TmplTokenType::TAG && %$attr?
99 text_replace_tag($t, $attr): $t });
100 } elsif ($kind eq TmplTokenType::TAG && %$attr) {
101 print $output text_replace_tag($t, $attr);
102 } elsif ($s->has_js_data) {
103 for my $t (@{$s->js_data}) {
104 # FIXME for this whole block
105 if ($t->[0]) {
106 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
107 $t->[2];
108 } else {
109 print $output $t->[1];
112 } elsif (defined $t) {
113 print $output $t;
118 sub listfiles ($$$) {
119 my($dir, $type, $action) = @_;
120 my @it = ();
121 if (opendir(DIR, $dir)) {
122 my @dirent = readdir DIR; # because DIR is shared when recursing
123 closedir DIR;
124 for my $dirent (@dirent) {
125 my $path = "$dir/$dirent";
126 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
127 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
129 } elsif (-f $path) {
130 push @it, $path if (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
131 } elsif (-d $path && $recursive_p) {
132 push @it, listfiles($path, $type, $action);
135 } else {
136 warn_normal "$dir: $!", undef;
138 return @it;
141 ###############################################################################
143 sub mkdir_recursive ($) {
144 my($dir) = @_;
145 local($`, $&, $', $1);
146 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
147 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
148 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
149 if (!-d $dir) {
150 print STDERR "Making directory $dir..." unless $quiet;
151 # creates with rwxrwxr-x permissions
152 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
156 ###############################################################################
158 sub usage ($) {
159 my($exitcode) = @_;
160 my $h = $exitcode? *STDERR: *STDOUT;
161 print $h <<EOF;
162 Usage: $0 create [OPTION]
163 or: $0 update [OPTION]
164 or: $0 install [OPTION]
165 or: $0 --help
166 Create or update PO files from templates, or install translated templates.
168 -i, --input=SOURCE Get or update strings from SOURCE file.
169 SOURCE is a directory if -r is also specified.
170 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
171 --pedantic-warnings Issue warnings even for detected problems
172 which are likely to be harmless
173 -r, --recursive SOURCE in the -i option is a directory
174 -s, --str-file=FILE Specify FILE as the translation (po) file
175 for input (install) or output (create, update)
176 -x, --exclude=REGEXP Exclude files matching the given REGEXP
177 --help Display this help and exit
178 -q, --quiet no output to screen (except for errors)
180 The -o option is ignored for the "create" and "update" actions.
181 Try `perldoc $0 for perhaps more information.
183 exit($exitcode);
186 ###############################################################################
188 sub usage_error (;$) {
189 for my $msg (split(/\n/, $_[0])) {
190 print STDERR "$msg\n";
192 print STDERR "Try `$0 --help for more information.\n";
193 exit(-1);
196 ###############################################################################
198 GetOptions(
199 'input|i=s' => \@in_files,
200 'outputdir|o=s' => \$out_dir,
201 'recursive|r' => \$recursive_p,
202 'str-file|s=s' => \$str_file,
203 'exclude|x=s' => \@excludes,
204 'quiet|q' => \$quiet,
205 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
206 'help' => \&usage,
207 ) || usage_error;
209 VerboseWarnings::set_application_name $0;
210 VerboseWarnings::set_pedantic_mode $pedantic_p;
212 # keep the buggy Locale::PO quiet if it says stupid things
213 $SIG{__WARN__} = sub {
214 my($s) = @_;
215 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
218 my $action = shift or usage_error('You must specify an ACTION.');
219 usage_error('You must at least specify input and string list filenames.')
220 if !@in_files || !defined $str_file;
222 # Type match defaults to *.tmpl plus *.inc if not specified
223 $type = "tmpl|inc|xsl" if !defined($type);
225 # Check the inputs for being files or directories
226 for my $input (@in_files) {
227 usage_error("$input: Input must be a file or directory.\n"
228 . "(Symbolic links are not supported at the moment)")
229 unless -d $input || -f $input;;
232 # Generates the global exclude regular expression
233 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
235 # Generate the list of input files if a directory is specified
236 if (-d $in_files[0]) {
237 die "If you specify a directory as input, you must specify only it.\n"
238 if @in_files > 1;
240 # input is a directory, generates list of files to process
241 $in_dir = $in_files[0];
242 $in_dir =~ s/\/$//; # strips the trailing / if any
243 @in_files = listfiles($in_dir, $type, $action);
244 } else {
245 for my $input (@in_files) {
246 die "You cannot specify input files and directories at the same time.\n"
247 unless -f $input;
251 # restores the string list from file
252 $href = Locale::PO->load_file_ashash($str_file);
254 # guess the charsets. HTML::Templates defaults to iso-8859-1
255 if (defined $href) {
256 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
257 $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
258 $charset_in = $charset_out;
259 warn "Charset in/out: ".$charset_out;
260 # for my $msgid (keys %$href) {
261 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
262 # my $candidate = TmplTokenizer::charset_canon $2;
263 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
264 # if defined $charset_in && $charset_in ne $candidate;
265 # $charset_in = $candidate;
270 # set our charset in to UTF-8
271 if (!defined $charset_in) {
272 $charset_in = TmplTokenizer::charset_canon 'UTF-8';
273 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
275 # set our charset out to UTF-8
276 if (!defined $charset_out) {
277 $charset_out = TmplTokenizer::charset_canon 'UTF-8';
278 warn "Warning: Charset Out defaulting to $charset_out\n";
280 my $xgettext = './xgettext.pl'; # actual text extractor script
281 my $st;
283 if ($action eq 'create') {
284 # updates the list. As the list is empty, every entry will be added
285 if (!-s $str_file) {
286 warn "Removing empty file $str_file\n";
287 unlink $str_file || die "$str_file: $!\n";
289 die "$str_file: Output file already exists\n" if -f $str_file;
290 my($tmph1, $tmpfile1) = tmpnam();
291 my($tmph2, $tmpfile2) = tmpnam();
292 close $tmph2; # We just want a name
293 # Generate the temporary file that acts as <MODULE>/POTFILES.in
294 for my $input (@in_files) {
295 print $tmph1 "$input\n";
297 close $tmph1;
298 warn "I $charset_in O $charset_out";
299 # Generate the specified po file ($str_file)
300 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
301 (defined $charset_in? ('-I', $charset_in): ()),
302 (defined $charset_out? ('-O', $charset_out): ())
304 # Run msgmerge so that the pot file looks like a real pot file
305 # We need to help msgmerge a bit by pre-creating a dummy po file that has
306 # the headers and the "" msgid & msgstr. It will fill in the rest.
307 if ($st == 0) {
308 # Merge the temporary "pot file" with the specified po file ($str_file)
309 # FIXME: msgmerge(1) is a Unix dependency
310 # FIXME: need to check the return value
311 unless (-f $str_file) {
312 local(*INPUT, *OUTPUT);
313 open(INPUT, "<$tmpfile2");
314 open(OUTPUT, ">$str_file");
315 while (<INPUT>) {
316 print OUTPUT;
317 last if /^\n/s;
319 close INPUT;
320 close OUTPUT;
322 $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
323 } else {
324 error_normal "Text extraction failed: $xgettext: $!\n", undef;
325 error_additional "Will not run msgmerge\n", undef;
327 # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
328 # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
330 } elsif ($action eq 'update') {
331 my($tmph1, $tmpfile1) = tmpnam();
332 my($tmph2, $tmpfile2) = tmpnam();
333 close $tmph2; # We just want a name
334 # Generate the temporary file that acts as <MODULE>/POTFILES.in
335 for my $input (@in_files) {
336 print $tmph1 "$input\n";
338 close $tmph1;
339 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
340 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
341 '--po-mode',
342 (defined $charset_in? ('-I', $charset_in): ()),
343 (defined $charset_out? ('-O', $charset_out): ()));
344 if ($st == 0) {
345 # Merge the temporary "pot file" with the specified po file ($str_file)
346 # FIXME: msgmerge(1) is a Unix dependency
347 # FIXME: need to check the return value
348 $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
349 } else {
350 error_normal "Text extraction failed: $xgettext: $!\n", undef;
351 error_additional "Will not run msgmerge\n", undef;
353 # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
354 # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
356 } elsif ($action eq 'install') {
357 if(!defined($out_dir)) {
358 usage_error("You must specify an output directory when using the install method.");
361 if ($in_dir eq $out_dir) {
362 warn "You must specify a different input and output directory.\n";
363 exit -1;
366 # Make sure the output directory exists
367 # (It will auto-create it, but for compatibility we should not)
368 -d $out_dir || die "$out_dir: The directory does not exist\n";
370 # Try to open the file, because Locale::PO doesn't check :-/
371 open(INPUT, "<$str_file") || die "$str_file: $!\n";
372 close INPUT;
374 # creates the new tmpl file using the new translation
375 for my $input (@in_files) {
376 die "Assertion failed"
377 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
378 # print "$input / $type\n";
379 if (!defined $type || $input =~ /\.(?:$type)$/) {
380 my $h = TmplTokenizer->new( $input );
381 $h->set_allow_cformat( 1 );
382 VerboseWarnings::set_input_file_name $input;
384 my $target = $out_dir . substr($input, length($in_dir));
385 my $targetdir = $` if $target =~ /[^\/]+$/s;
386 mkdir_recursive($targetdir) unless -d $targetdir;
387 print STDERR "Creating $target...\n" unless $quiet;
388 open( OUTPUT, ">$target" ) || die "$target: $!\n";
389 text_replace( $h, *OUTPUT );
390 close OUTPUT;
391 } else {
392 # just copying the file
393 my $target = $out_dir . substr($input, length($in_dir));
394 my $targetdir = $` if $target =~ /[^\/]+$/s;
395 mkdir_recursive($targetdir) unless -d $targetdir;
396 system("cp -f $input $target");
397 print STDERR "Copying $input...\n" unless $quiet;
401 } else {
402 usage_error('Unknown action specified.');
405 if ($st == 0) {
406 printf "The %s seems to be successful.\n", $action unless $quiet;
407 } else {
408 printf "%s FAILED.\n", "\u$action" unless $quiet;
410 exit 0;
412 ###############################################################################
414 =head1 SYNOPSIS
416 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
418 =head1 DESCRIPTION
420 This is an alternative version of the tmpl_process.pl script,
421 using standard gettext-style PO files. While there still might
422 be changes made to the way it extracts strings, at this moment
423 it should be stable enough for general use; it is already being
424 used for the Chinese and Polish translations.
426 Currently, the create, update, and install actions have all been
427 reimplemented and seem to work.
429 =head2 Features
431 =over
433 =item -
435 Translation files in standard Uniforum PO format.
436 All standard tools including all gettext tools,
437 plus PO file editors like kbabel(1) etc.
438 can be used.
440 =item -
442 Minor changes in whitespace in source templates
443 do not generally require strings to be re-translated.
445 =item -
447 Able to handle <TMPL_VAR> variables in the templates;
448 <TMPL_VAR> variables are usually extracted in proper context,
449 represented by a short %s placeholder.
451 =item -
453 Able to handle text input and radio button INPUT elements
454 in the templates; these INPUT elements are also usually
455 extracted in proper context,
456 represented by a short %S or %p placeholder.
458 =item -
460 Automatic comments in the generated PO files to provide
461 even more context (line numbers, and the names and types
462 of the variables).
464 =item -
466 The %I<n>$s (or %I<n>$p, etc.) notation can be used
467 for change the ordering of the variables,
468 if such a reordering is required for correct translation.
470 =item -
472 If a particular <TMPL_VAR> should not appear in the
473 translation, it can be suppressed with the %0.0s notation.
475 =item -
477 Using the PO format also means translators can add their
478 own comments in the translation files, if necessary.
480 =item -
482 Create, update, and install actions are all based on the
483 same scanner module. This ensures that update and install
484 have the same idea of what is a translatable string;
485 attribute names in tags, for example, will not be
486 accidentally translated.
488 =back
490 =head1 NOTES
492 Anchors are represented by an <AI<n>> notation.
493 The meaning of this non-standard notation might not be obvious.
495 The create action calls xgettext.pl to do the actual work;
496 the update action calls xgettext.pl and msgmerge(1) to do the
497 actual work.
499 =head1 BUGS
501 xgettext.pl must be present in the current directory; the
502 msgmerge(1) command must also be present in the search path.
503 The script currently does not check carefully whether these
504 dependent commands are present.
506 Locale::PO(3) has a lot of bugs. It can neither parse nor
507 generate GNU PO files properly; a couple of workarounds have
508 been written in TmplTokenizer and more is likely to be needed
509 (e.g., to get rid of the "Strange line" warning for #~).
511 This script may not work in Windows.
513 There are probably some other bugs too, since this has not been
514 tested very much.
516 =head1 SEE ALSO
518 xgettext.pl,
519 TmplTokenizer.pm,
520 msgmerge(1),
521 Locale::PO(3),
522 translator_doc.txt
524 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms
526 =cut