Bug 6328 follow-up, fixes Undefined subroutine &C4::Circulation::Delta_Days
[koha.git] / misc / translator / tmpl_process3.pl
blobcb69117e7cd8ca8b7dc7b556559a1d8c4fc0e746
1 #!/usr/bin/perl
2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
7 =head1 NAME
9 tmpl_process3.pl - Alternative version of tmpl_process.pl
10 using gettext-compatible translation files
12 =cut
14 use strict;
15 #use warnings; FIXME - Bug 2505
16 use Getopt::Long;
17 use Locale::PO;
18 use File::Temp qw( :POSIX );
19 use TmplTokenizer;
20 use VerboseWarnings qw( :warn :die );
22 ###############################################################################
24 use vars qw( @in_files $in_dir $str_file $out_dir $quiet );
25 use vars qw( @excludes $exclude_regex );
26 use vars qw( $recursive_p );
27 use vars qw( $pedantic_p );
28 use vars qw( $href );
29 use vars qw( $type ); # file extension (DOS form without the dot) to match
30 use vars qw( $charset_in $charset_out );
32 ###############################################################################
34 sub find_translation ($) {
35 my($s) = @_;
36 my $key = $s;
37 if ($s =~ /\S/s) {
38 $key = TmplTokenizer::string_canon($key);
39 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
40 $key = TmplTokenizer::quote_po($key);
42 if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){
43 if ($s =~ /^(\s+)/){
44 return $1 . Locale::PO->dequote($href->{$key}->msgstr);
46 else {
47 return Locale::PO->dequote($href->{$key}->msgstr);
50 else {
51 return $s;
55 sub text_replace_tag ($$) {
56 my($t, $attr) = @_;
57 my $it;
59 # value [tag=input], meta
60 my $tag = lc($1) if $t =~ /^<(\S+)/s;
61 my $translated_p = 0;
62 for my $a ('alt', 'content', 'title', 'value','label') {
63 if ($attr->{$a}) {
64 next if $a eq 'label' && $tag ne 'optgroup';
65 next if $a eq 'content' && $tag ne 'meta';
66 next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio|text)$/)); # FIXME
68 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
69 if ($val =~ /\S/s) {
70 my $s = find_translation($val);
71 if ($attr->{$a}->[1] ne $s) { #FIXME
72 $attr->{$a}->[1] = $s; # FIXME
73 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
74 $translated_p = 1;
79 if ($translated_p) {
80 $it = "<$tag"
81 . join('', map { if ($_ ne '/'){
82 sprintf(' %s="%s"', $_, $attr->{$_}->[1]);
84 else {
85 sprintf(' %s',$_);
88 } sort {
89 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
90 } keys %$attr);
91 $it .= '>';
93 else {
94 $it = $t;
96 return $it;
99 sub text_replace (**) {
100 my($h, $output) = @_;
101 for (;;) {
102 my $s = TmplTokenizer::next_token $h;
103 last unless defined $s;
104 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
105 if ($kind eq C4::TmplTokenType::TEXT) {
106 print $output find_translation($t);
107 } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
108 my $fmt = find_translation($s->form);
109 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
110 $_ = $_[0];
111 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
112 $kind == C4::TmplTokenType::TAG && %$attr?
113 text_replace_tag($t, $attr): $t });
114 } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
115 print $output text_replace_tag($t, $attr);
116 } elsif ($s->has_js_data) {
117 for my $t (@{$s->js_data}) {
118 # FIXME for this whole block
119 if ($t->[0]) {
120 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
121 $t->[2];
122 } else {
123 print $output $t->[1];
126 } elsif (defined $t) {
127 # Quick fix to bug 4472
128 $t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/ ;
129 print $output $t;
134 sub listfiles ($$$) {
135 my($dir, $type, $action) = @_;
136 my @it = ();
137 if (opendir(DIR, $dir)) {
138 my @dirent = readdir DIR; # because DIR is shared when recursing
139 closedir DIR;
140 for my $dirent (@dirent) {
141 my $path = "$dir/$dirent";
142 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
143 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
145 } elsif (-f $path) {
146 push @it, $path if (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
147 } elsif (-d $path && $recursive_p) {
148 push @it, listfiles($path, $type, $action);
151 } else {
152 warn_normal "$dir: $!", undef;
154 return @it;
157 ###############################################################################
159 sub mkdir_recursive ($) {
160 my($dir) = @_;
161 local($`, $&, $', $1);
162 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
163 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
164 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
165 if (!-d $dir) {
166 print STDERR "Making directory $dir..." unless $quiet;
167 # creates with rwxrwxr-x permissions
168 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
172 ###############################################################################
174 sub usage ($) {
175 my($exitcode) = @_;
176 my $h = $exitcode? *STDERR: *STDOUT;
177 print $h <<EOF;
178 Usage: $0 create [OPTION]
179 or: $0 update [OPTION]
180 or: $0 install [OPTION]
181 or: $0 --help
182 Create or update PO files from templates, or install translated templates.
184 -i, --input=SOURCE Get or update strings from SOURCE file.
185 SOURCE is a directory if -r is also specified.
186 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
187 --pedantic-warnings Issue warnings even for detected problems
188 which are likely to be harmless
189 -r, --recursive SOURCE in the -i option is a directory
190 -s, --str-file=FILE Specify FILE as the translation (po) file
191 for input (install) or output (create, update)
192 -x, --exclude=REGEXP Exclude files matching the given REGEXP
193 --help Display this help and exit
194 -q, --quiet no output to screen (except for errors)
196 The -o option is ignored for the "create" and "update" actions.
197 Try `perldoc $0 for perhaps more information.
199 exit($exitcode);
202 ###############################################################################
204 sub usage_error (;$) {
205 for my $msg (split(/\n/, $_[0])) {
206 print STDERR "$msg\n";
208 print STDERR "Try `$0 --help for more information.\n";
209 exit(-1);
212 ###############################################################################
214 GetOptions(
215 'input|i=s' => \@in_files,
216 'outputdir|o=s' => \$out_dir,
217 'recursive|r' => \$recursive_p,
218 'str-file|s=s' => \$str_file,
219 'exclude|x=s' => \@excludes,
220 'quiet|q' => \$quiet,
221 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
222 'help' => \&usage,
223 ) || usage_error;
225 VerboseWarnings::set_application_name $0;
226 VerboseWarnings::set_pedantic_mode $pedantic_p;
228 # keep the buggy Locale::PO quiet if it says stupid things
229 $SIG{__WARN__} = sub {
230 my($s) = @_;
231 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
234 my $action = shift or usage_error('You must specify an ACTION.');
235 usage_error('You must at least specify input and string list filenames.')
236 if !@in_files || !defined $str_file;
238 # Type match defaults to *.tt plus *.inc if not specified
239 $type = "tt|inc|xsl" if !defined($type);
241 # Check the inputs for being files or directories
242 for my $input (@in_files) {
243 usage_error("$input: Input must be a file or directory.\n"
244 . "(Symbolic links are not supported at the moment)")
245 unless -d $input || -f $input;;
248 # Generates the global exclude regular expression
249 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
251 # Generate the list of input files if a directory is specified
252 if (-d $in_files[0]) {
253 die "If you specify a directory as input, you must specify only it.\n"
254 if @in_files > 1;
256 # input is a directory, generates list of files to process
257 $in_dir = $in_files[0];
258 $in_dir =~ s/\/$//; # strips the trailing / if any
259 @in_files = listfiles($in_dir, $type, $action);
260 } else {
261 for my $input (@in_files) {
262 die "You cannot specify input files and directories at the same time.\n"
263 unless -f $input;
267 # restores the string list from file
268 $href = Locale::PO->load_file_ashash($str_file);
270 # guess the charsets. HTML::Templates defaults to iso-8859-1
271 if (defined $href) {
272 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
273 $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
274 $charset_in = $charset_out;
275 warn "Charset in/out: ".$charset_out;
276 # for my $msgid (keys %$href) {
277 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
278 # my $candidate = TmplTokenizer::charset_canon $2;
279 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
280 # if defined $charset_in && $charset_in ne $candidate;
281 # $charset_in = $candidate;
285 # BUG6464: check consistency of PO messages
286 # - count number of '%s' in msgid and msgstr
287 for my $msg ( values %$href ) {
288 my $id_count = split(/%s/, $msg->{msgid}) - 1;
289 my $str_count = split(/%s/, $msg->{msgstr}) - 1;
290 next if $id_count == $str_count ||
291 $msg->{msgstr} eq '""' ||
292 grep { /fuzzy/ } @{$msg->{_flags}};
293 warn_normal
294 "unconsistent %s count: ($id_count/$str_count):\n" .
295 " line: " . $msg->{loaded_line_number} . "\n" .
296 " msgid: " . $msg->{msgid} . "\n" .
297 " msgstr: " . $msg->{msgstr} . "\n", undef;
301 # set our charset in to UTF-8
302 if (!defined $charset_in) {
303 $charset_in = TmplTokenizer::charset_canon 'UTF-8';
304 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
306 # set our charset out to UTF-8
307 if (!defined $charset_out) {
308 $charset_out = TmplTokenizer::charset_canon 'UTF-8';
309 warn "Warning: Charset Out defaulting to $charset_out\n";
311 my $xgettext = './xgettext.pl'; # actual text extractor script
312 my $st;
314 if ($action eq 'create') {
315 # updates the list. As the list is empty, every entry will be added
316 if (!-s $str_file) {
317 warn "Removing empty file $str_file\n";
318 unlink $str_file || die "$str_file: $!\n";
320 die "$str_file: Output file already exists\n" if -f $str_file;
321 my($tmph1, $tmpfile1) = tmpnam();
322 my($tmph2, $tmpfile2) = tmpnam();
323 close $tmph2; # We just want a name
324 # Generate the temporary file that acts as <MODULE>/POTFILES.in
325 for my $input (@in_files) {
326 print $tmph1 "$input\n";
328 close $tmph1;
329 warn "I $charset_in O $charset_out";
330 # Generate the specified po file ($str_file)
331 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
332 (defined $charset_in? ('-I', $charset_in): ()),
333 (defined $charset_out? ('-O', $charset_out): ())
335 # Run msgmerge so that the pot file looks like a real pot file
336 # We need to help msgmerge a bit by pre-creating a dummy po file that has
337 # the headers and the "" msgid & msgstr. It will fill in the rest.
338 if ($st == 0) {
339 # Merge the temporary "pot file" with the specified po file ($str_file)
340 # FIXME: msgmerge(1) is a Unix dependency
341 # FIXME: need to check the return value
342 unless (-f $str_file) {
343 local(*INPUT, *OUTPUT);
344 open(INPUT, "<$tmpfile2");
345 open(OUTPUT, ">$str_file");
346 while (<INPUT>) {
347 print OUTPUT;
348 last if /^\n/s;
350 close INPUT;
351 close OUTPUT;
353 $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
354 } else {
355 error_normal "Text extraction failed: $xgettext: $!\n", undef;
356 error_additional "Will not run msgmerge\n", undef;
358 # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
359 # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
361 } elsif ($action eq 'update') {
362 my($tmph1, $tmpfile1) = tmpnam();
363 my($tmph2, $tmpfile2) = tmpnam();
364 close $tmph2; # We just want a name
365 # Generate the temporary file that acts as <MODULE>/POTFILES.in
366 for my $input (@in_files) {
367 print $tmph1 "$input\n";
369 close $tmph1;
370 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
371 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
372 '--po-mode',
373 (defined $charset_in? ('-I', $charset_in): ()),
374 (defined $charset_out? ('-O', $charset_out): ()));
375 if ($st == 0) {
376 # Merge the temporary "pot file" with the specified po file ($str_file)
377 # FIXME: msgmerge(1) is a Unix dependency
378 # FIXME: need to check the return value
379 $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
380 } else {
381 error_normal "Text extraction failed: $xgettext: $!\n", undef;
382 error_additional "Will not run msgmerge\n", undef;
384 # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
385 # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
387 } elsif ($action eq 'install') {
388 if(!defined($out_dir)) {
389 usage_error("You must specify an output directory when using the install method.");
392 if ($in_dir eq $out_dir) {
393 warn "You must specify a different input and output directory.\n";
394 exit -1;
397 # Make sure the output directory exists
398 # (It will auto-create it, but for compatibility we should not)
399 -d $out_dir || die "$out_dir: The directory does not exist\n";
401 # Try to open the file, because Locale::PO doesn't check :-/
402 open(INPUT, "<$str_file") || die "$str_file: $!\n";
403 close INPUT;
405 # creates the new tmpl file using the new translation
406 for my $input (@in_files) {
407 die "Assertion failed"
408 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
409 # print "$input / $type\n";
410 if (!defined $type || $input =~ /\.(?:$type)$/) {
411 my $h = TmplTokenizer->new( $input );
412 $h->set_allow_cformat( 1 );
413 VerboseWarnings::set_input_file_name $input;
415 my $target = $out_dir . substr($input, length($in_dir));
416 my $targetdir = $` if $target =~ /[^\/]+$/s;
417 mkdir_recursive($targetdir) unless -d $targetdir;
418 print STDERR "Creating $target...\n" unless $quiet;
419 open( OUTPUT, ">$target" ) || die "$target: $!\n";
420 text_replace( $h, *OUTPUT );
421 close OUTPUT;
422 } else {
423 # just copying the file
424 my $target = $out_dir . substr($input, length($in_dir));
425 my $targetdir = $` if $target =~ /[^\/]+$/s;
426 mkdir_recursive($targetdir) unless -d $targetdir;
427 system("cp -f $input $target");
428 print STDERR "Copying $input...\n" unless $quiet;
432 } else {
433 usage_error('Unknown action specified.');
436 if ($st == 0) {
437 printf "The %s seems to be successful.\n", $action unless $quiet;
438 } else {
439 printf "%s FAILED.\n", "\u$action" unless $quiet;
441 exit 0;
443 ###############################################################################
445 =head1 SYNOPSIS
447 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
449 =head1 DESCRIPTION
451 This is an alternative version of the tmpl_process.pl script,
452 using standard gettext-style PO files. While there still might
453 be changes made to the way it extracts strings, at this moment
454 it should be stable enough for general use; it is already being
455 used for the Chinese and Polish translations.
457 Currently, the create, update, and install actions have all been
458 reimplemented and seem to work.
460 =head2 Features
462 =over
464 =item -
466 Translation files in standard Uniforum PO format.
467 All standard tools including all gettext tools,
468 plus PO file editors like kbabel(1) etc.
469 can be used.
471 =item -
473 Minor changes in whitespace in source templates
474 do not generally require strings to be re-translated.
476 =item -
478 Able to handle <TMPL_VAR> variables in the templates;
479 <TMPL_VAR> variables are usually extracted in proper context,
480 represented by a short %s placeholder.
482 =item -
484 Able to handle text input and radio button INPUT elements
485 in the templates; these INPUT elements are also usually
486 extracted in proper context,
487 represented by a short %S or %p placeholder.
489 =item -
491 Automatic comments in the generated PO files to provide
492 even more context (line numbers, and the names and types
493 of the variables).
495 =item -
497 The %I<n>$s (or %I<n>$p, etc.) notation can be used
498 for change the ordering of the variables,
499 if such a reordering is required for correct translation.
501 =item -
503 If a particular <TMPL_VAR> should not appear in the
504 translation, it can be suppressed with the %0.0s notation.
506 =item -
508 Using the PO format also means translators can add their
509 own comments in the translation files, if necessary.
511 =item -
513 Create, update, and install actions are all based on the
514 same scanner module. This ensures that update and install
515 have the same idea of what is a translatable string;
516 attribute names in tags, for example, will not be
517 accidentally translated.
519 =back
521 =head1 NOTES
523 Anchors are represented by an <AI<n>> notation.
524 The meaning of this non-standard notation might not be obvious.
526 The create action calls xgettext.pl to do the actual work;
527 the update action calls xgettext.pl and msgmerge(1) to do the
528 actual work.
530 =head1 BUGS
532 xgettext.pl must be present in the current directory; the
533 msgmerge(1) command must also be present in the search path.
534 The script currently does not check carefully whether these
535 dependent commands are present.
537 Locale::PO(3) has a lot of bugs. It can neither parse nor
538 generate GNU PO files properly; a couple of workarounds have
539 been written in TmplTokenizer and more is likely to be needed
540 (e.g., to get rid of the "Strange line" warning for #~).
542 This script may not work in Windows.
544 There are probably some other bugs too, since this has not been
545 tested very much.
547 =head1 SEE ALSO
549 xgettext.pl,
550 TmplTokenizer.pm,
551 msgmerge(1),
552 Locale::PO(3),
553 translator_doc.txt
555 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms
557 =cut