the showlog script
[kugel-rb.git] / tools / genlang
blobbe7d791afe264e32e6caba00e877182c385950f5
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 3; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file namaed [outfile].
32 The use of this option requires that you also use -e.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
43 -t=<target>
44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
47 The target can in fact be specified as numerous different strings,
48 separated with colons. This will make genlang to use all the specified
49 strings when searching for a matching phrase.
52 Voice mode output. Outputs all id: and voice: lines for the given target!
55 Enables verbose (debug) output.
56 MOO
58 exit;
61 # How update works:
63 # 1) scan the english file, keep the whole <phrase> for each phrase.
64 # 2) read the translated file, for each end of phrase, compare:
65 # A) all source strings, if there's any change there should be a comment about
66 # it output
67 # B) the desc fields
69 # 3) output the phrase with the comments from above
70 # 4) check which phrases that the translated version didn't have, and spit out
71 # the english version of those
74 my $prefix = $p;
75 my $binary = $b;
76 my $update = $u;
78 my $english = $e;
79 my $voiceout = $o;
81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
83 if($check > 1) {
84 print "Please use only one of -p, -u, -o and -b\n";
85 exit;
87 if(!$check) {
88 print "Please use at least one of -p, -u, -o and -b\n";
89 exit;
91 if(($binary || $update || $voiceout) && !$english) {
92 print "Please use -e too when you use -b, -o or -u\n";
93 exit;
96 my $target = $t;
97 if(!$target && !$update) {
98 print "Please specify a target (with -t)!\n";
99 exit;
101 my $verbose=$v;
103 my %id; # string to num hash
104 my @idnum; # num to string array
106 my %source; # id string to source phrase hash
107 my %dest; # id string to dest phrase hash
108 my %voice; # id string to voice phrase hash
110 my $input = $ARGV[0];
112 my @m;
113 my $m="blank";
115 sub match {
116 my ($string, $pattern)=@_;
118 $pattern =~ s/\*/.?*/g;
119 $pattern =~ s/\?/./g;
121 return ($string =~ $pattern);
124 sub blank {
125 # nothing to do
128 my %head;
129 sub header {
130 my ($full, $n, $v)=@_;
131 $head{$n}=$v;
134 my %phrase;
135 sub phrase {
136 my ($full, $n, $v)=@_;
137 $phrase{$n}=$v;
140 sub parsetarget {
141 my ($debug, $strref, $full, $n, $v)=@_;
142 my $string;
143 my @all= split(" *, *", $n);
144 my $test;
145 for $test (@all) {
146 # print "TEST ($debug) $target for $test\n";
147 for my $part (split(":", $target)) {
148 if(match($part, $test)) {
149 $string = $v;
150 # print "MATCH: $test => $v\n";
151 $$strref = $string;
152 return $string;
158 my $src;
159 sub source {
160 parsetarget("src", \$src, @_);
163 my $dest;
164 sub dest {
165 parsetarget("dest", \$dest, @_);
168 my $voice;
169 sub voice {
170 parsetarget("voice", \$voice, @_);
173 my %idmap;
174 my %english;
175 if($english) {
176 # For the cases where the english file needs to be scanned/read, we do
177 # it before we read the translated file. For -b it isn't necessary, but for
178 # -u it is convenient.
180 my $idnum=0; # start with a true number
181 my $vidnum=0x8000; # first voice id
182 open(ENG, "<$english") || die "can't open $english";
183 my @phrase;
184 my $id;
185 while(<ENG>) {
187 # get rid of DOS newlines
188 $_ =~ s/\r//g;
190 if($_ =~ /^ *\<phrase\>/) {
191 # this is the start of a phrase
193 elsif($_ =~ /^ *\<\/phrase\>/) {
194 # this is the end of a phrase, add it to the english hash
195 $english{$id}=join("", @phrase);
196 undef @phrase;
198 elsif($_ ne "\n") {
199 # gather everything related to this phrase
200 push @phrase, $_;
203 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
204 $id=$1;
205 # voice-only entries get a difference range
206 if($id =~ /^VOICE_/) {
207 # Assign an ID number to this entry
208 $idmap{$id}=$vidnum;
209 $vidnum++;
211 else {
212 # Assign an ID number to this entry
213 $idmap{$id}=$idnum;
214 $idnum++;
218 close(ENG);
221 # a function that compares the english phrase with the translated one.
222 # compare source strings and desc
224 # Then output the updated version!
225 sub compare {
226 my ($idstr, $engref, $locref)=@_;
227 my ($edesc, $ldesc);
228 my ($esource, $lsource);
229 my $mode=0;
231 for my $l (@$engref) {
232 if($l =~ /^ *desc: (.*)/) {
233 $edesc=$1;
235 elsif($l =~ / *\<source\>/i) {
236 $mode=1;
238 elsif($mode) {
239 if($l =~ / *\<\/source\>/i) {
240 last;
242 $esource .= "$l\n";
246 my @show;
247 my @source;
249 $mode = 0;
250 for my $l (@$locref) {
251 if($l =~ /^ *desc: (.*)/) {
252 $ldesc=$1;
253 if($edesc ne $ldesc) {
254 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
256 push @show, $l;
258 elsif($l =~ / *\<source\>/i) {
259 $mode=1;
260 push @show, $l;
262 elsif($mode) {
263 if($l =~ / *\<\/source\>/i) {
264 $mode = 0;
265 print @show;
266 if($esource ne $lsource) {
267 print "### The <source> section differs from the english!\n",
268 "### the previously used one is commented below:\n";
269 for(split("\n", $lsource)) {
270 print "### $_\n";
272 print $esource;
274 else {
275 print $lsource;
277 undef @show; # start over
279 push @show, $l;
281 else {
282 $lsource .= "$l";
285 else {
286 push @show, $l;
291 print @show;
294 my $idcount; # counter for lang ID numbers
295 my $voiceid=0x8000; # counter for voice-only ID numbers
298 # Now start the scanning of the selected language string
301 open(LANG, "<$input") || die "couldn't read language file named $input\n";
302 my @phrase;
303 while(<LANG>) {
305 $line++;
307 # get rid of DOS newlines
308 $_ =~ s/\r//g;
310 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
311 # comment or empty line
312 next;
315 my $ll = $_;
317 # print "M: $m\n";
319 push @phrase, $ll;
321 # this is an XML-lookalike tag
322 if (/^(<|[^\"<]+<)([^>]*)>/) {
323 my $part = $2;
324 # print "P: $part\n";
326 if($part =~ /^\//) {
327 # this was a closing tag
329 if($part eq "/phrase") {
330 # closing the phrase
332 my $idstr = $phrase{'id'};
333 my $idnum;
335 if($dest =~ /^none\z/i) {
336 # "none" as dest (without quotes) means that this entire
337 # phrase is to be ignored
339 elsif(!$update) {
340 # we don't do the fully detailed analysis when we "update"
341 # since we don't do it for a particular target etc
343 # allow the keyword 'deprecated' to be used on dest and
344 # voice strings to mark that as deprecated. It will then
345 # be replaced with "".
347 $dest =~ s/^deprecate(|d)\z/\"\"/i;
348 $voice =~ s/^deprecate(|d)\z/\"\"/i;
350 # basic syntax error alerts, if there are no quotes we
351 # will assume an empty string was intended
352 if($dest !~ /^\"/) {
353 print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
354 $dest='""';
356 if($src !~ /^\"/) {
357 print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
358 $src='""';
360 if($voice !~ /^\"/) {
361 print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
362 $voice='""';
365 # Use the ID name to figure out which id number range we
366 # should use for this phrase. Voice-only strings are
367 # separated.
369 if($idstr =~ /^VOICE/) {
370 $idnum = $voiceid++;
372 else {
373 $idnum = $idcount++;
376 $id{$idstr} = $idnum;
377 $idnum[$idnum]=$idstr;
379 $source{$idstr}=$src;
380 $dest{$idstr}=$dest;
381 $voice{$idstr}=$voice;
383 if($verbose) {
384 print "id: $phrase{id} ($idnum)\n";
385 print "source: $src\n";
386 print "dest: $dest\n";
387 print "voice: $voice\n";
390 undef $src;
391 undef $dest;
392 undef $voice;
393 undef %phrase;
396 if($update) {
397 my $e = $english{$idstr};
399 if($e) {
400 # compare original english with this!
401 my @eng = split("\n", $english{$idstr});
403 compare($idstr, \@eng, \@phrase);
405 $english{$idstr}=""; # clear it
407 else {
408 print "### $idstr: The phrase is not used. Skipped\n";
411 undef @phrase;
413 } # end of </phrase>
415 # starts with a slash, this _ends_ this section
416 $m = pop @m; # get back old value, the previous level's tag
417 next;
418 } # end of tag close
420 # This is an opening (sub) tag
422 push @m, $m; # store old value
423 $m = $part;
424 next;
427 if(/^ *([^:]+): *(.*)/) {
428 my ($name, $val)=($1, $2);
429 &$m($_, $name, $val);
432 close(LANG);
434 if($update) {
435 my $any=0;
436 for(keys %english) {
437 if($english{$_}) {
438 print "###\n",
439 "### This phrase below was not present in the translated file\n",
440 "<phrase>\n";
441 print $english{$_};
442 print "</phrase>\n";
447 if($prefix) {
448 # We create a .c and .h file
450 open(HFILE, ">$prefix.h") ||
451 die "couldn't create file $prefix.h\n";
452 open(CFILE, ">$prefix.c") ||
453 die "couldn't create file $prefix.c\n";
455 print HFILE <<MOO
456 /* This file was automatically generated using genlang */
458 * The str() macro/functions is how to access strings that might be
459 * translated. Use it like str(MACRO) and expect a string to be
460 * returned!
462 #define str(x) language_strings[x]
464 /* this is the array for holding the string pointers.
465 It will be initialized at runtime. */
466 extern unsigned char *language_strings[];
467 /* this contains the concatenation of all strings, separated by \\0 chars */
468 extern const unsigned char language_builtin[];
470 /* The enum below contains all available strings */
471 enum \{
475 print CFILE <<MOO
476 /* This file was automaticly generated using genlang, the strings come
477 from "$input" */
479 #include "$prefix.h"
481 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
482 const unsigned char language_builtin[] =
486 # Output the ID names for the enum in the header file
487 my $i;
488 for $i (1 .. $idcount) {
489 my $name=$idnum[$i - 1]; # get the ID name
491 $name =~ s/\"//g; # cut off the quotes
493 printf HFILE (" %s,\n", $name);
496 # Output separation marker for last string ID and the upcoming voice IDs
498 print HFILE <<MOO
499 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
500 /* --- below this follows voice-only strings --- */
501 VOICEONLY_DELIMITER = 0x8000,
505 # Output the ID names for the enum in the header file
506 for $i (0x8000 .. ($voiceid-1)) {
507 my $name=$idnum[$i]; # get the ID name
509 $name =~ s/\"//g; # cut off the quotes
511 printf HFILE (" %s,\n", $name);
514 # Output end of enum
515 print HFILE "\n};\n/* end of generated enum list */\n";
517 # Output the target phrases for the source file
518 for $i (1 .. $idcount) {
519 my $name=$idnum[$i - 1]; # get the ID
520 my $dest = $dest{$name}; # get the destination phrase
522 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
524 if(!$dest) {
525 # this is just to be on the safe side
526 $dest = '"\0"';
529 printf CFILE (" %s\n", $dest);
532 # Output end of string chunk
533 print CFILE <<MOO
535 /* end of generated string list */
539 close(HFILE);
540 close(CFILE);
541 } # end of the c/h file generation
542 elsif($binary) {
543 # Creation of a binary lang file was requested
545 # We must first scan the english file to get the correct order of the id
546 # numbers used there, as that is what sets the id order for all language
547 # files. The english file is scanned before the translated file was
548 # scanned.
550 open(OUTF, ">$binary") or die "Can't create $binary";
551 binmode OUTF;
552 printf OUTF ("\x1a%c", $langversion); # magic lang file header
554 # loop over the target phrases
555 for $i (1 .. $idcount) {
556 my $name=$idnum[$i - 1]; # get the ID
557 my $dest = $dest{$name}; # get the destination phrase
559 if($dest) {
560 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
562 # Now, make sure we get the number from the english sort order:
563 $idnum = $idmap{$name};
565 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
569 elsif($voiceout) {
570 # voice output requested, display id: and voice: strings in a v1-like
571 # fashion
573 my @engl;
575 # This loops over the strings in the translated language file order
576 my @ids = ((0 .. ($idcount-1)));
577 push @ids, (0x8000 .. ($voiceid-1));
579 #for my $id (@ids) {
580 # print "$id\n";
583 for $i (@ids) {
584 my $name=$idnum[$i]; # get the ID
585 my $dest = $voice{$name}; # get the destination voice string
587 if($dest) {
588 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
590 # Now, make sure we get the number from the english sort order:
591 $idnum = $idmap{$name};
593 $engl[$idnum] = $i;
595 # print "Input index $i output index $idnum\n";
599 for my $i (@ids) {
601 my $o = $engl[$i];
603 my $name=$idnum[$o]; # get the ID
604 my $dest = $voice{$name}; # get the destination voice string
606 print "#$i\nid: $name\nvoice: $dest\n";
612 if($verbose) {
613 printf("%d ID strings scanned\n", $idcount);
615 print "* head *\n";
616 for(keys %head) {
617 printf "$_: %s\n", $head{$_};