Remove redundant remove call, which caused problems in the simulator.
[kugel-rb.git] / tools / genlang
blob9be77a603084d3a7ba11847bea2dea69b9a8cb7e
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 4; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file named [outfile].
32 The use of this option requires that you also use -e, -t and -i.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
43 -t=<target>
44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
47 The target can in fact be specified as numerous different strings,
48 separated with colons. This will make genlang to use all the specified
49 strings when searching for a matching phrase.
51 -i=<target id>
52 The target id number, needed for -b.
55 Voice mode output. Outputs all id: and voice: lines for the given target!
58 Enables verbose (debug) output.
59 MOO
61 exit;
64 # How update works:
66 # 1) scan the english file, keep the whole <phrase> for each phrase.
67 # 2) read the translated file, for each end of phrase, compare:
68 # A) all source strings, if there's any change there should be a comment about
69 # it output
70 # B) the desc fields
72 # 3) output the phrase with the comments from above
73 # 4) check which phrases that the translated version didn't have, and spit out
74 # the english version of those
77 my $prefix = $p;
78 my $binary = $b;
79 my $update = $u;
81 my $english = $e;
82 my $voiceout = $o;
84 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
86 if($check > 1) {
87 print "Please use only one of -p, -u, -o and -b\n";
88 exit;
90 if(!$check) {
91 print "Please use at least one of -p, -u, -o and -b\n";
92 exit;
95 if(($binary || $update || $voiceout) && !$english) {
96 print "Please use -e too when you use -b, -o or -u\n";
97 exit;
100 my $target_id = $i;
101 if($binary && !$target_id) {
102 print "Please specify a target id number (with -i)!\n";
103 exit;
106 my $target = $t;
107 if(!$target && !$update) {
108 print "Please specify a target (with -t)!\n";
109 exit;
111 my $verbose=$v;
113 my %id; # string to num hash
114 my @idnum; # num to string array
116 my %source; # id string to source phrase hash
117 my %dest; # id string to dest phrase hash
118 my %voice; # id string to voice phrase hash
120 my $input = $ARGV[0];
122 my @m;
123 my $m="blank";
125 sub trim {
126 my ($string) = @_;
127 $string =~ s/^\s+//;
128 $string =~ s/\s+$//;
129 return $string;
132 sub match {
133 my ($string, $pattern)=@_;
135 $pattern =~ s/\*/.?*/g;
136 $pattern =~ s/\?/./g;
138 return ($string =~ /^$pattern\z/);
141 sub blank {
142 # nothing to do
145 my %head;
146 sub header {
147 my ($full, $n, $v)=@_;
148 $head{$n}=$v;
151 my %phrase;
152 sub phrase {
153 my ($full, $n, $v)=@_;
154 $phrase{$n}=$v;
157 sub parsetarget {
158 my ($debug, $strref, $full, $n, $v)=@_;
159 my $string;
160 my @all= split(" *, *", $n);
161 my $test;
162 for $test (@all) {
163 # print "TEST ($debug) $target for $test\n";
164 for my $part (split(":", $target)) {
165 if(match($part, $test)) {
166 $string = $v;
167 # print "MATCH: $test => $v\n";
168 $$strref = $string;
169 return $string;
175 my $src;
176 sub source {
177 parsetarget("src", \$src, @_);
180 my $dest;
181 sub dest {
182 parsetarget("dest", \$dest, @_);
185 my $voice;
186 sub voice {
187 parsetarget("voice", \$voice, @_);
190 my %idmap;
191 my %english;
192 if($english) {
193 # For the cases where the english file needs to be scanned/read, we do
194 # it before we read the translated file. For -b it isn't necessary, but for
195 # -u it is convenient.
197 my $idnum=0; # start with a true number
198 my $vidnum=0x8000; # first voice id
199 open(ENG, "<$english") || die "can't open $english";
200 my @phrase;
201 my $id;
202 my $maybeid;
203 my $withindest;
204 while(<ENG>) {
206 # get rid of DOS newlines
207 $_ =~ s/\r//g;
209 if($_ =~ /^ *\<phrase\>/) {
210 # this is the start of a phrase
212 elsif($_ =~ /^ *\<\/phrase\>/) {
214 # if id is something, when we count and store this phrase
215 if($id) {
216 # voice-only entries get a difference range
217 if($id =~ /^VOICE_/) {
218 # Assign an ID number to this entry
219 $idmap{$id}=$vidnum;
220 $vidnum++;
222 else {
223 # Assign an ID number to this entry
224 $idmap{$id}=$idnum;
225 $idnum++;
226 # print STDERR "DEST: bumped idnum to $idnum\n";
229 # this is the end of a phrase, add it to the english hash
230 $english{$id}=join("", @phrase);
232 undef @phrase;
233 $id="";
235 elsif($_ ne "\n") {
236 # gather everything related to this phrase
237 push @phrase, $_;
238 if($_ =~ /^ *\<dest\>/i) {
239 $withindest=1;
240 $deststr="";
242 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
243 $withindest=0;
245 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
246 # we unconditionally always use all IDs when the "update"
247 # feature is used
248 $id = $maybeid;
249 # print "DEST: use this id $id\n";
251 else {
252 # print "skip $maybeid for $name\n";
255 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
256 my ($name, $val)=($1, $2);
257 $dest=""; # in case it is left untouched for when the
258 # model name isn't "our"
259 dest($_, $name, $val);
261 if($dest) {
262 # Store the current dest string. If this target matches
263 # multiple strings, it will get updated several times.
264 $deststr = $dest;
269 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
270 $maybeid=$1;
273 close(ENG);
276 # a function that compares the english phrase with the translated one.
277 # compare source strings and desc
279 # Then output the updated version!
280 sub compare {
281 my ($idstr, $engref, $locref)=@_;
282 my ($edesc, $ldesc);
283 my ($esource, $lsource);
284 my $mode=0;
286 for my $l (@$engref) {
287 if($l =~ /^ *#/) {
288 # comment
289 next;
291 if($l =~ /^ *desc: (.*)/) {
292 $edesc=$1;
294 elsif($l =~ / *\<source\>/i) {
295 $mode=1;
297 elsif($mode) {
298 if($l =~ / *\<\/source\>/i) {
299 last;
301 $esource .= "$l\n";
305 my @show;
306 my @source;
308 $mode = 0;
309 for my $l (@$locref) {
310 if($l =~ /^ *desc: (.*)/) {
311 $ldesc=$1;
312 if(trim($edesc) ne trim($ldesc)) {
313 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
315 push @show, $l;
317 elsif($l =~ / *\<source\>/i) {
318 $mode=1;
319 push @show, $l;
321 elsif($mode) {
322 if($l =~ / *\<\/source\>/i) {
323 $mode = 0;
324 print @show;
325 if(trim($esource) ne trim($lsource)) {
326 print "### The <source> section differs from the english!\n",
327 "### the previously used one is commented below:\n";
328 for(split("\n", $lsource)) {
329 print "### $_\n";
331 print $esource;
333 else {
334 print $lsource;
336 undef @show; # start over
338 push @show, $l;
340 else {
341 $lsource .= "$l";
344 else {
345 push @show, $l;
350 print @show;
353 my $idcount; # counter for lang ID numbers
354 my $voiceid=0x8000; # counter for voice-only ID numbers
357 # Now start the scanning of the selected language string
360 open(LANG, "<$input") || die "couldn't read language file named $input\n";
361 my @phrase;
362 while(<LANG>) {
364 $line++;
366 # get rid of DOS newlines
367 $_ =~ s/\r//g;
369 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
370 # comment or empty line
371 next;
374 my $ll = $_;
376 # print "M: $m\n";
378 push @phrase, $ll;
380 # this is an XML-lookalike tag
381 if (/^(<|[^\"<]+<)([^>]*)>/) {
382 my $part = $2;
383 # print "P: $part\n";
385 if($part =~ /^\//) {
386 # this was a closing tag
388 if($part eq "/phrase") {
389 # closing the phrase
391 my $idstr = $phrase{'id'};
392 my $idnum;
394 if($binary && !$english{$idstr}) {
395 # $idstr doesn't exist for english, skip it\n";
397 elsif($dest =~ /^none\z/i) {
398 # "none" as dest (without quotes) means that this entire
399 # phrase is to be ignored
401 elsif(!$update) {
402 # we don't do the fully detailed analysis when we "update"
403 # since we don't do it for a particular target etc
405 # allow the keyword 'deprecated' to be used on dest and
406 # voice strings to mark that as deprecated. It will then
407 # be replaced with "".
409 $dest =~ s/^deprecate(|d)\z/\"\"/i;
410 $voice =~ s/^deprecate(|d)\z/\"\"/i;
412 # basic syntax error alerts, if there are no quotes we
413 # will assume an empty string was intended
414 if($dest !~ /^\"/) {
415 print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
416 $dest='""';
418 if($src !~ /^\"/) {
419 print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
420 $src='""';
422 if($voice !~ /^\"/) {
423 print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
424 $voice='""';
427 # Use the ID name to figure out which id number range we
428 # should use for this phrase. Voice-only strings are
429 # separated.
431 if($idstr =~ /^VOICE/) {
432 $idnum = $voiceid++;
434 else {
435 $idnum = $idcount++;
438 $id{$idstr} = $idnum;
439 $idnum[$idnum]=$idstr;
441 $source{$idstr}=$src;
442 $dest{$idstr}=$dest;
443 $voice{$idstr}=$voice;
445 if($verbose) {
446 print "id: $phrase{id} ($idnum)\n";
447 print "source: $src\n";
448 print "dest: $dest\n";
449 print "voice: $voice\n";
452 undef $src;
453 undef $dest;
454 undef $voice;
455 undef %phrase;
458 if($update) {
459 my $e = $english{$idstr};
461 if($e) {
462 # compare original english with this!
463 my @eng = split("\n", $english{$idstr});
465 compare($idstr, \@eng, \@phrase);
467 $english{$idstr}=""; # clear it
469 else {
470 print "### $idstr: The phrase is not used. Skipped\n";
473 undef @phrase;
475 } # end of </phrase>
477 # starts with a slash, this _ends_ this section
478 $m = pop @m; # get back old value, the previous level's tag
479 next;
480 } # end of tag close
482 # This is an opening (sub) tag
484 push @m, $m; # store old value
485 $m = $part;
486 next;
489 if(/^ *([^:]+): *(.*)/) {
490 my ($name, $val)=($1, $2);
491 &$m($_, $name, $val);
494 close(LANG);
496 if($update) {
497 my $any=0;
498 for(keys %english) {
499 if($english{$_}) {
500 print "###\n",
501 "### This phrase below was not present in the translated file\n",
502 "<phrase>\n";
503 print $english{$_};
504 print "</phrase>\n";
509 if($prefix) {
510 # We create a .c and .h file
512 open(HFILE, ">$prefix.h") ||
513 die "couldn't create file $prefix.h\n";
514 open(CFILE, ">$prefix.c") ||
515 die "couldn't create file $prefix.c\n";
517 print HFILE <<MOO
518 /* This file was automatically generated using genlang */
520 * The str() macro/functions is how to access strings that might be
521 * translated. Use it like str(MACRO) and expect a string to be
522 * returned!
524 #define str(x) language_strings[x]
526 /* this is the array for holding the string pointers.
527 It will be initialized at runtime. */
528 extern unsigned char *language_strings[];
529 /* this contains the concatenation of all strings, separated by \\0 chars */
530 extern const unsigned char language_builtin[];
532 /* The enum below contains all available strings */
533 enum \{
537 print CFILE <<MOO
538 /* This file was automaticly generated using genlang, the strings come
539 from "$input" */
541 #include "$prefix.h"
543 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
544 const unsigned char language_builtin[] =
548 # Output the ID names for the enum in the header file
549 my $i;
550 for $i (1 .. $idcount) {
551 my $name=$idnum[$i - 1]; # get the ID name
553 $name =~ s/\"//g; # cut off the quotes
555 printf HFILE (" %s, /* %d */\n", $name, $i-1);
558 # Output separation marker for last string ID and the upcoming voice IDs
560 print HFILE <<MOO
561 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
562 /* --- below this follows voice-only strings --- */
563 VOICEONLY_DELIMITER = 0x8000,
567 # Output the ID names for the enum in the header file
568 for $i (0x8000 .. ($voiceid-1)) {
569 my $name=$idnum[$i]; # get the ID name
571 $name =~ s/\"//g; # cut off the quotes
573 printf HFILE (" %s,\n", $name);
576 # Output end of enum
577 print HFILE "\n};\n/* end of generated enum list */\n";
579 # Output the target phrases for the source file
580 for $i (1 .. $idcount) {
581 my $name=$idnum[$i - 1]; # get the ID
582 my $dest = $dest{$name}; # get the destination phrase
584 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
586 if(!$dest) {
587 # this is just to be on the safe side
588 $dest = '"\0"';
591 printf CFILE (" %s\n", $dest);
594 # Output end of string chunk
595 print CFILE <<MOO
597 /* end of generated string list */
601 close(HFILE);
602 close(CFILE);
603 } # end of the c/h file generation
604 elsif($binary) {
605 # Creation of a binary lang file was requested
607 # We must first scan the english file to get the correct order of the id
608 # numbers used there, as that is what sets the id order for all language
609 # files. The english file is scanned before the translated file was
610 # scanned.
612 open(OUTF, ">$binary") or die "Can't create $binary";
613 binmode OUTF;
614 printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
616 # loop over the target phrases
617 for $i (1 .. $idcount) {
618 my $name=$idnum[$i - 1]; # get the ID
619 my $dest = $dest{$name}; # get the destination phrase
621 if($dest) {
622 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
624 # Now, make sure we get the number from the english sort order:
625 $idnum = $idmap{$name};
627 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
631 elsif($voiceout) {
632 # voice output requested, display id: and voice: strings in a v1-like
633 # fashion
635 my @engl;
637 # This loops over the strings in the translated language file order
638 my @ids = ((0 .. ($idcount-1)));
639 push @ids, (0x8000 .. ($voiceid-1));
641 #for my $id (@ids) {
642 # print "$id\n";
645 for $i (@ids) {
646 my $name=$idnum[$i]; # get the ID
647 my $dest = $voice{$name}; # get the destination voice string
649 if($dest) {
650 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
652 # Now, make sure we get the number from the english sort order:
653 $idnum = $idmap{$name};
655 if(length($idnum)) {
656 $engl[$idnum] = $i;
658 #print "Input index $i output index $idnum\n";
660 else {
661 # not used, mark it so
662 $engl[$i] = -1
667 for my $i (@ids) {
669 my $o = $engl[$i];
671 if(($o < 0) || !length($o)) {
672 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
673 next;
676 my $name=$idnum[$o]; # get the ID
677 my $dest = $voice{$name}; # get the destination voice string
679 print "#$i ($o)\nid: $name\nvoice: $dest\n";
685 if($verbose) {
686 printf("%d ID strings scanned\n", $idcount);
688 print "* head *\n";
689 for(keys %head) {
690 printf "$_: %s\n", $head{$_};