2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
12 tmpl_process3.pl - Alternative version of tmpl_process.pl
13 using gettext-compatible translation files
18 #use warnings; FIXME - Bug 2505
22 use File
::Temp
qw( :POSIX );
24 use VerboseWarnings
qw( :warn :die );
26 ###############################################################################
28 use vars
qw( @in_dirs @filenames @match @nomatch $str_file $out_dir $quiet );
29 use vars qw( @excludes $exclude_regex );
30 use vars qw( $recursive_p );
31 use vars qw( $pedantic_p );
33 use vars qw( $type ); # file extension (DOS form without the dot) to match
34 use vars qw( $charset_in $charset_out );
36 ###############################################################################
38 sub find_translation ($) {
42 $key = TmplTokenizer::string_canon($key);
43 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
44 $key = TmplTokenizer::quote_po($key);
46 if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){
48 return $1 . Locale::PO->dequote($href->{$key}->msgstr);
51 return Locale::PO->dequote($href->{$key}->msgstr);
59 sub text_replace_tag ($$) {
64 # value [tag=input], meta
65 my $tag = lc($1) if $t =~ /^<(\S+)/s;
67 for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
69 next if $a eq 'label' && $tag ne 'optgroup';
70 next if $a eq 'content' && $tag ne 'meta';
71 next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio)$/)); # FIXME
73 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
75 # for selected attributes replace '[%..%]' with '%s' and remember matches
76 if ( $a =~ /title|value|alt|content|placeholder/ ) {
77 while ( $val =~ s/(\[\%.*?\%\])/\%s/ ) {
82 # find translation for transformed attributes
83 my $s = find_translation($val);
84 # replace '%s' with original content (in order) on translated string, this is fragile!
85 if ( $a =~ /title|value|alt|content|placeholder/ and @ttvar ) {
87 my $var = shift @ttvar;
91 if ($attr->{$a}->[1] ne $s) { #FIXME
92 $attr->{$a}->[1] = $s; # FIXME
93 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
101 . join('', map { if ($_ ne '/'){
102 sprintf(' %s="%s"', $_, $attr->{$_}->[1]);
109 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
110 || $a cmp $b # Sort attributes BZ 22236
120 sub text_replace (**) {
121 my($h, $output) = @_;
123 my $s = TmplTokenizer::next_token $h;
124 last unless defined $s;
125 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
126 if ($kind eq C4::TmplTokenType::TEXT) {
127 print $output find_translation($t);
128 } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
129 my $fmt = find_translation($s->form);
130 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
132 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
133 $kind == C4::TmplTokenType::TAG && %$attr?
134 text_replace_tag($t, $attr): $t });
135 } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
136 print $output text_replace_tag($t, $attr);
137 } elsif ($s->has_js_data) {
138 for my $t (@{$s->js_data}) {
139 # FIXME for this whole block
141 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
144 print $output $t->[1];
147 } elsif (defined $t) {
148 # Quick fix to bug 4472
149 $t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/ ;
156 my($dir, $type, $action) = @_;
157 my $filenames = join ('|', @filenames); # used to update strings from this file
158 my $match = join ('|', @match); # use only this files
159 my $nomatch = join ('|', @nomatch); # do no use this files
161 if (opendir(DIR, $dir)) {
162 my @dirent = readdir DIR; # because DIR is shared when recursing
164 for my $dirent (@dirent) {
165 my $path = "$dir/$dirent";
166 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
167 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
170 my $basename = fileparse( $path );
172 if ( not @filenames or $basename =~ /($filenames)/i )
173 and ( not @match or $basename =~ /($match)/i ) # files to include
174 and ( not @nomatch or $basename !~ /($nomatch)/i ) # files not to include
175 and (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
176 } elsif (-d $path && $recursive_p) {
177 push @it, listfiles($path, $type, $action);
181 warn_normal "$dir: $!", undef;
186 ###############################################################################
188 sub mkdir_recursive ($) {
190 local($`, $&, $', $1);
191 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
192 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
193 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
195 print STDERR "Making directory $dir...\n" unless $quiet;
196 # creates with rwxrwxr-x permissions
197 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
201 ###############################################################################
205 my $h = $exitcode? *STDERR: *STDOUT;
207 Usage: $0 create [OPTION]
208 or: $0 update [OPTION]
209 or: $0 install [OPTION]
211 Create or update PO files from templates, or install translated templates.
213 -i, --input=SOURCE Get or update strings from SOURCE directory(s).
214 On create or update can have multiple values.
215 On install only one value.
216 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
217 --pedantic-warnings Issue warnings even for detected problems
218 which are likely to be harmless
219 -r, --recursive SOURCE in the -i option is a directory
220 -f, --filename=FILE FILE is a specific filename or part of it.
221 If given, only these files will be processed.
222 On update only relevant strings will be updated.
223 -m, --match=FILE FILE is a specific filename or part of it.
224 If given, only these files will be processed.
225 -n, --nomatch=FILE FILE is a specific filename or part of it.
226 If given, these files will not be processed.
227 -s, --str-file=FILE Specify FILE as the translation (po) file
228 for input (install) or output (create, update)
229 -x, --exclude=REGEXP Exclude dirs matching the given REGEXP
230 --help Display this help and exit
231 -q, --quiet no output to screen (except for errors)
233 The -o option is ignored for the "create" and "update" actions.
234 Try `perldoc $0` for perhaps more information.
239 ###############################################################################
241 sub usage_error
(;$) {
242 for my $msg (split(/\n/, $_[0])) {
243 print STDERR
"$msg\n";
245 print STDERR
"Try `$0 --help for more information.\n";
249 ###############################################################################
252 'input|i=s' => \
@in_dirs,
253 'filename|f=s' => \
@filenames,
254 'match|m=s' => \
@match,
255 'nomatch|n=s' => \
@nomatch,
256 'outputdir|o=s' => \
$out_dir,
257 'recursive|r' => \
$recursive_p,
258 'str-file|s=s' => \
$str_file,
259 'exclude|x=s' => \
@excludes,
260 'quiet|q' => \
$quiet,
261 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
265 VerboseWarnings
::set_application_name
$0;
266 VerboseWarnings
::set_pedantic_mode
$pedantic_p;
268 # keep the buggy Locale::PO quiet if it says stupid things
269 $SIG{__WARN__
} = sub {
271 print STDERR
$s unless $s =~ /^Strange line in [^:]+: #~/s
274 my $action = shift or usage_error
('You must specify an ACTION.');
275 usage_error
('You must at least specify input and string list filenames.')
276 if !@in_dirs || !defined $str_file;
278 # Type match defaults to *.tt plus *.inc if not specified
279 $type = "tt|inc|xsl|xml|def" if !defined($type);
281 # Check the inputs for being directories
282 for my $in_dir ( @in_dirs ) {
283 usage_error
("$in_dir: Input must be a directory.\n"
284 . "(Symbolic links are not supported at the moment)")
288 # Generates the global exclude regular expression
289 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
292 # Generate the list of input files if a directory is specified
293 # input is a directory, generates list of files to process
295 for my $fn ( @filenames ) {
296 die "You cannot specify input files and directories at the same time.\n"
299 for my $in_dir ( @in_dirs ) {
300 $in_dir =~ s/\/$//; # strips the trailing / if any
301 @in_files = ( @in_files, listfiles
($in_dir, $type, $action));
304 # restores the string list from file
305 $href = Locale
::PO
->load_file_ashash($str_file);
307 # guess the charsets. HTML::Templates defaults to iso-8859-1
309 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
310 $charset_out = TmplTokenizer
::charset_canon
$2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
311 $charset_in = $charset_out;
312 # for my $msgid (keys %$href) {
313 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
314 # my $candidate = TmplTokenizer::charset_canon $2;
315 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
316 # if defined $charset_in && $charset_in ne $candidate;
317 # $charset_in = $candidate;
321 # BUG6464: check consistency of PO messages
322 # - count number of '%s' in msgid and msgstr
323 for my $msg ( values %$href ) {
324 my $id_count = split(/%s/, $msg->{msgid
}) - 1;
325 my $str_count = split(/%s/, $msg->{msgstr
}) - 1;
326 next if $id_count == $str_count ||
327 $msg->{msgstr
} eq '""' ||
328 grep { /fuzzy/ } @
{$msg->{_flags
}};
330 "unconsistent %s count: ($id_count/$str_count):\n" .
331 " line: " . $msg->{loaded_line_number
} . "\n" .
332 " msgid: " . $msg->{msgid
} . "\n" .
333 " msgstr: " . $msg->{msgstr
} . "\n", undef;
337 # set our charset in to UTF-8
338 if (!defined $charset_in) {
339 $charset_in = TmplTokenizer
::charset_canon
'UTF-8';
340 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n" unless ( $quiet );
342 # set our charset out to UTF-8
343 if (!defined $charset_out) {
344 $charset_out = TmplTokenizer
::charset_canon
'UTF-8';
345 warn "Warning: Charset Out defaulting to $charset_out\n" unless ( $quiet );
347 my $xgettext = './xgettext.pl'; # actual text extractor script
350 if ($action eq 'create') {
351 # updates the list. As the list is empty, every entry will be added
353 warn "Removing empty file $str_file\n" unless ( $quiet );
354 unlink $str_file || die "$str_file: $!\n";
356 die "$str_file: Output file already exists\n" if -f
$str_file;
357 my($tmph1, $tmpfile1) = tmpnam
();
358 my($tmph2, $tmpfile2) = tmpnam
();
359 close $tmph2; # We just want a name
360 # Generate the temporary file that acts as <MODULE>/POTFILES.in
361 for my $input (@in_files) {
362 print $tmph1 "$input\n";
365 warn "I $charset_in O $charset_out" unless ( $quiet );
366 # Generate the specified po file ($str_file)
367 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
368 (defined $charset_in?
('-I', $charset_in): ()),
369 (defined $charset_out?
('-O', $charset_out): ())
371 # Run msgmerge so that the pot file looks like a real pot file
372 # We need to help msgmerge a bit by pre-creating a dummy po file that has
373 # the headers and the "" msgid & msgstr. It will fill in the rest.
375 # Merge the temporary "pot file" with the specified po file ($str_file)
376 # FIXME: msgmerge(1) is a Unix dependency
377 # FIXME: need to check the return value
378 unless (-f
$str_file) {
379 local(*INPUT
, *OUTPUT
);
380 open(INPUT
, "<$tmpfile2");
381 open(OUTPUT
, ">$str_file");
389 $st = system("msgmerge ".($quiet?
'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
391 error_normal
"Text extraction failed: $xgettext: $!\n", undef;
392 error_additional
"Will not run msgmerge\n", undef;
394 unlink $tmpfile1 || warn_normal
"$tmpfile1: unlink failed: $!\n", undef;
395 unlink $tmpfile2 || warn_normal
"$tmpfile2: unlink failed: $!\n", undef;
397 } elsif ($action eq 'update') {
398 my($tmph1, $tmpfile1) = tmpnam
();
399 my($tmph2, $tmpfile2) = tmpnam
();
400 close $tmph2; # We just want a name
401 # Generate the temporary file that acts as <MODULE>/POTFILES.in
402 for my $input (@in_files) {
403 print $tmph1 "$input\n";
406 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
407 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
409 (defined $charset_in?
('-I', $charset_in): ()),
410 (defined $charset_out?
('-O', $charset_out): ()));
412 # Merge the temporary "pot file" with the specified po file ($str_file)
413 # FIXME: msgmerge(1) is a Unix dependency
414 # FIXME: need to check the return value
416 my ($tmph3, $tmpfile3) = tmpnam
();
417 $st = system("msgcat $str_file $tmpfile2 > $tmpfile3");
418 $st = system("msgmerge ".($quiet?
'-q':'')." -s $str_file $tmpfile3 -o - | msgattrib --no-obsolete -o $str_file")
421 $st = system("msgmerge ".($quiet?
'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
424 error_normal
"Text extraction failed: $xgettext: $!\n", undef;
425 error_additional
"Will not run msgmerge\n", undef;
427 unlink $tmpfile1 || warn_normal
"$tmpfile1: unlink failed: $!\n", undef;
428 unlink $tmpfile2 || warn_normal
"$tmpfile2: unlink failed: $!\n", undef;
430 } elsif ($action eq 'install') {
431 if(!defined($out_dir)) {
432 usage_error
("You must specify an output directory when using the install method.");
435 if ( scalar @in_dirs > 1 ) {
436 usage_error
("You must specify only one input directory when using the install method.");
439 my $in_dir = shift @in_dirs;
441 if ($in_dir eq $out_dir) {
442 warn "You must specify a different input and output directory.\n";
446 # Make sure the output directory exists
447 # (It will auto-create it, but for compatibility we should not)
448 -d
$out_dir || die "$out_dir: The directory does not exist\n";
450 # Try to open the file, because Locale::PO doesn't check :-/
451 open(INPUT
, "<$str_file") || die "$str_file: $!\n";
454 # creates the new tmpl file using the new translation
455 for my $input (@in_files) {
456 die "Assertion failed"
457 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
459 my $target = $out_dir . substr($input, length($in_dir));
460 my $targetdir = $` if $target =~ /[^\/]+$/s;
462 if (!defined $type || $input =~ /\.(?:$type)$/) {
463 my $h = TmplTokenizer->new( $input );
464 $h->set_allow_cformat( 1 );
465 VerboseWarnings::set_input_file_name $input;
466 mkdir_recursive($targetdir) unless -d $targetdir;
467 print STDERR "Creating $target...\n" unless $quiet;
468 open( OUTPUT, ">$target" ) || die "$target: $!\n";
469 text_replace( $h, *OUTPUT );
472 # just copying the file
473 mkdir_recursive($targetdir) unless -d $targetdir;
474 system("cp -f $input $target");
475 print STDERR "Copying $input...\n" unless $quiet;
480 usage_error('Unknown action specified.');
484 printf "The %s seems to be successful.\n", $action unless $quiet;
486 printf "%s FAILED.\n", "\u$action" unless $quiet;
490 ###############################################################################
494 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
498 This is an alternative version of the tmpl_process.pl script,
499 using standard gettext-style PO files. While there still might
500 be changes made to the way it extracts strings, at this moment
501 it should be stable enough for general use; it is already being
502 used for the Chinese and Polish translations.
504 Currently, the create, update, and install actions have all been
505 reimplemented and seem to work.
513 Translation files in standard Uniforum PO format.
514 All standard tools including all gettext tools,
515 plus PO file editors like kbabel(1) etc.
520 Minor changes in whitespace in source templates
521 do not generally require strings to be re-translated.
525 Able to handle <TMPL_VAR> variables in the templates;
526 <TMPL_VAR> variables are usually extracted in proper context,
527 represented by a short %s placeholder.
531 Able to handle text input and radio button INPUT elements
532 in the templates; these INPUT elements are also usually
533 extracted in proper context,
534 represented by a short %S or %p placeholder.
538 Automatic comments in the generated PO files to provide
539 even more context (line numbers, and the names and types
544 The %I<n>$s (or %I<n>$p, etc.) notation can be used
545 for change the ordering of the variables,
546 if such a reordering is required for correct translation.
550 If a particular <TMPL_VAR> should not appear in the
551 translation, it can be suppressed with the %0.0s notation.
555 Using the PO format also means translators can add their
556 own comments in the translation files, if necessary.
560 Create, update, and install actions are all based on the
561 same scanner module. This ensures that update and install
562 have the same idea of what is a translatable string;
563 attribute names in tags, for example, will not be
564 accidentally translated.
570 Anchors are represented by an <AI<n>> notation.
571 The meaning of this non-standard notation might not be obvious.
573 The create action calls xgettext.pl to do the actual work;
574 the update action calls xgettext.pl, msgmerge(1) and msgattrib(1)
575 to do the actual work.
579 xgettext.pl must be present in the current directory; both
580 msgmerge(1) and msgattrib(1) must also be present in the search path.
581 The script currently does not check carefully whether these
582 dependent commands are present.
584 Locale::PO(3) has a lot of bugs. It can neither parse nor
585 generate GNU PO files properly; a couple of workarounds have
586 been written in TmplTokenizer and more is likely to be needed
587 (e.g., to get rid of the "Strange line" warning for #~).
589 This script may not work in Windows.
591 There are probably some other bugs too, since this has not been
602 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms