Fix red
[kugel-rb.git] / tools / genlang
blob45171290d63a6e9ab0c438d16c4672ffe339ad78
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 4; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 my %user2num =
24 ('core' => 1);
27 if(!$ARGV[0]) {
28 print <<MOO
29 Usage: genlang [options] <langv2 file>
31 -p=<prefix>
32 Make the tool create a [prefix].c and [prefix].h file.
34 -b=<outfile>
35 Make the tool create a binary language (.lng) file named [outfile].
36 The use of this option requires that you also use -e, -t and -i.
39 Update language file. Given the translated file and the most recent english
40 file, you\'ll get an updated version sent to stdout. Suitable action to do
41 when you intend to update a translation.
43 -e=<english lang file>
44 Point out the english (original source) file, to use that as master
45 language template. Used in combination with -b, -u or -s.
48 Sort the Update language file in the same order as the strings in the
49 English file.
51 -t=<target>
52 Specify which target you want the translations/phrases for. Required when
53 -b or -p is used.
55 The target can in fact be specified as numerous different strings,
56 separated with colons. This will make genlang to use all the specified
57 strings when searching for a matching phrase.
59 -i=<target id>
60 The target id number, needed for -b.
63 Voice mode output. Outputs all id: and voice: lines for the given target!
66 Enables verbose (debug) output.
67 MOO
69 exit;
72 # How update works:
74 # 1) scan the english file, keep the whole <phrase> for each phrase.
75 # 2) read the translated file, for each end of phrase, compare:
76 # A) all source strings, if there's any change there should be a comment about
77 # it output
78 # B) the desc fields
80 # 3) output the phrase with the comments from above
81 # 4) check which phrases that the translated version didn't have, and spit out
82 # the english version of those
85 my $prefix = $p;
86 my $binary = $b;
87 my $update = $u;
88 my $sortfile = $s;
90 my $english = $e;
91 my $voiceout = $o;
93 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
95 if($check > 1) {
96 print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
97 exit;
99 if(!$check) {
100 print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
101 exit;
105 if(($binary || $update || $voiceout || $sortfile) && !$english) {
106 print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
107 exit;
110 my $target_id = $i;
111 if($binary && !$target_id) {
112 print STDERR "Please specify a target id number (with -i)!\n";
113 exit;
116 my $target = $t;
117 if(!$target && !$update && !$sortfile) {
118 print STDERR "Please specify a target (with -t)!\n";
119 exit;
121 my $verbose=$v;
123 my %id; # string to num hash
124 my @idnum; # num to string array
126 my %allphrases; # For sorting - an array of the <phrase> elements
127 my %source; # id string to source phrase hash
128 my %dest; # id string to dest phrase hash
129 my %voice; # id string to voice phrase hash
131 my $input = $ARGV[0];
133 my @m;
134 my $m="blank";
136 sub trim {
137 my ($string) = @_;
138 $string =~ s/^\s+//;
139 $string =~ s/\s+$//;
140 return $string;
143 sub match {
144 my ($string, $pattern)=@_;
146 $pattern =~ s/\*/.*/g;
147 $pattern =~ s/\?/./g;
149 return ($string =~ /^$pattern\z/);
152 sub blank {
153 # nothing to do
156 my %head;
157 sub header {
158 my ($full, $n, $v)=@_;
159 $head{$n}=$v;
162 my %phrase;
163 sub phrase {
164 my ($full, $n, $v)=@_;
165 $phrase{$n}=$v;
168 sub parsetarget {
169 my ($debug, $strref, $full, $n, $v)=@_;
170 my $string;
171 my @all= split(" *, *", $n);
172 my $test;
173 for $test (@all) {
174 # print "TEST ($debug) $target for $test\n";
175 for my $part (split(":", $target)) {
176 if(match($part, $test)) {
177 $string = $v;
178 # print "MATCH: $test => $v\n";
179 $$strref = $string;
180 return $string;
186 my $src;
187 sub source {
188 parsetarget("src", \$src, @_);
191 my $dest;
192 sub dest {
193 parsetarget("dest", \$dest, @_);
196 my $voice;
197 sub voice {
198 parsetarget("voice", \$voice, @_);
201 my %idmap;
202 my %english;
203 if($english) {
204 # For the cases where the english file needs to be scanned/read, we do
205 # it before we read the translated file. For -b it isn't necessary, but for
206 # -u it is convenient.
208 my $idnum=0; # start with a true number
209 my $vidnum=0x8000; # first voice id
210 open(ENG, "<$english") || die "Error: can't open $english";
211 my @phrase;
212 my $id;
213 my $maybeid;
214 my $user;
215 my $maybeuser;
216 my $withindest;
217 my $numphrases = 0;
218 while(<ENG>) {
220 # get rid of DOS newlines
221 $_ =~ s/\r//g;
223 if($_ =~ /^ *\<phrase\>/) {
224 # this is the start of a phrase
226 elsif($_ =~ /^ *\<\/phrase\>/) {
228 # if id is something, when we count and store this phrase
229 if($id) {
230 # voice-only entries get a difference range
231 if($id =~ /^VOICE_/) {
232 # Assign an ID number to this entry
233 $idmap{$id}=$vidnum;
234 $vidnum++;
236 else {
237 # Assign an ID number to this entry
238 $idmap{$id}=$idnum;
239 $idnum++;
240 # print STDERR "DEST: bumped idnum to $idnum\n";
243 # this is the end of a phrase, add it to the english hash
244 $english{$id}=join("", @phrase);
246 undef @phrase;
247 $id="";
249 elsif($_ ne "\n") {
250 # gather everything related to this phrase
251 push @phrase, $_;
252 if($_ =~ /^ *\<dest\>/i) {
253 $withindest=1;
254 $deststr="";
256 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
257 $withindest=0;
259 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
260 # we unconditionally always use all IDs when the "update"
261 # feature is used
262 $id = $maybeid;
263 $user = $user2num{$maybeuser};
264 # print "DEST: use this id $id\n";
266 else {
267 # print "skip $maybeid for $name\n";
270 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
271 my ($name, $val)=($1, $2);
272 $dest=""; # in case it is left untouched for when the
273 # model name isn't "our"
274 dest($_, $name, $val);
276 if($dest) {
277 # Store the current dest string. If this target matches
278 # multiple strings, it will get updated several times.
279 $deststr = $dest;
284 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
285 $maybeid=$1;
286 $sortorder{$maybeid}=$numphrases++;
288 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
289 $maybeuser=$1;
292 close(ENG);
295 # a function that compares the english phrase with the translated one.
296 # compare source strings and desc
298 # Then output the updated version!
299 sub compare {
300 my ($idstr, $engref, $locref)=@_;
301 my ($edesc, $ldesc);
302 my ($esource, $lsource);
303 my $mode=0;
305 for my $l (@$engref) {
306 if($l =~ /^ *#/) {
307 # comment
308 next;
310 if($l =~ /^ *desc: (.*)/) {
311 $edesc=$1;
313 elsif($l =~ / *\<source\>/i) {
314 $mode=1;
316 elsif($mode) {
317 if($l =~ / *\<\/source\>/i) {
318 last;
320 $esource .= "$l\n";
324 my @show;
325 my @source;
327 $mode = 0;
328 for my $l (@$locref) {
329 if($l =~ /^ *desc: (.*)/) {
330 $ldesc=$1;
331 if(trim($edesc) ne trim($ldesc)) {
332 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
334 push @show, $l;
336 elsif($l =~ / *\<source\>/i) {
337 $mode=1;
338 push @show, $l;
340 elsif($mode) {
341 if($l =~ / *\<\/source\>/i) {
342 $mode = 0;
343 print @show;
344 if(trim($esource) ne trim($lsource)) {
345 print "### The <source> section differs from the english!\n",
346 "### the previously used one is commented below:\n";
347 for(split("\n", $lsource)) {
348 print "### $_\n";
350 print $esource;
352 else {
353 print $lsource;
355 undef @show; # start over
357 push @show, $l;
359 else {
360 $lsource .= "$l";
363 else {
364 push @show, $l;
369 print @show;
372 my $idcount; # counter for lang ID numbers
373 my $voiceid=0x8000; # counter for voice-only ID numbers
376 # Now start the scanning of the selected language string
379 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
380 my @phrase;
381 my $header = 1;
382 while(<LANG>) {
384 $line++;
386 # get rid of DOS newlines
387 $_ =~ s/\r//g;
389 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
390 # comment or empty line - output it if it's part of the header
391 if ($header and ($update || $sortfile)) {
392 print($_);
394 next;
396 $header = 0;
398 my $ll = $_;
400 # print "M: $m\n";
402 push @phrase, $ll;
404 # this is an XML-lookalike tag
405 if (/^(<|[^\"<]+<)([^>]*)>/) {
406 my $part = $2;
407 # print "P: $part\n";
409 if($part =~ /^\//) {
410 # this was a closing tag
412 if($part eq "/phrase") {
413 # closing the phrase
415 my $idstr = $phrase{'id'};
416 my $idnum;
418 if($binary && !$english{$idstr}) {
419 # $idstr doesn't exist for english, skip it\n";
421 elsif($dest =~ /^none\z/i) {
422 # "none" as dest (without quotes) means that this entire
423 # phrase is to be ignored
425 elsif($sortfile) {
426 $allphrases{$idstr}=join('',@phrase);
428 elsif(!$update) {
429 # we don't do the fully detailed analysis when we "update"
430 # since we don't do it for a particular target etc
432 # allow the keyword 'deprecated' to be used on dest and
433 # voice strings to mark that as deprecated. It will then
434 # be replaced with "".
436 $dest =~ s/^deprecate(|d)\z/\"\"/i;
437 $voice =~ s/^deprecate(|d)\z/\"\"/i;
439 # basic syntax error alerts, if there are no quotes we
440 # will assume an empty string was intended
441 if($dest !~ /^\"/) {
442 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
443 $dest='""';
445 if($src !~ /^\"/) {
446 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
447 $src='""';
449 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
450 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
451 $voice='""';
453 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
454 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
457 my $user = $user2num{trim($phrase{'user'})};
458 if(!$user) {
459 print STDERR "$input:$line:1: warning: unknown user!\n";
460 $user = 1;
463 # Use the ID name to figure out which id number range we
464 # should use for this phrase. Voice-only strings are
465 # separated.
467 if($idstr =~ /^VOICE/) {
468 $idnum = $voiceid++;
470 else {
471 $idnum = $idcount++;
474 $id{$idstr} = $idnum;
475 $idnum[$idnum]=$idstr;
477 $source{$idstr}=$src;
478 $dest{$idstr}=$dest;
479 $voice{$idstr}=$voice;
481 if($verbose) {
482 print "id: $phrase{id} ($idnum)\n";
483 print "source: $src\n";
484 print "dest: $dest\n";
485 print "voice: $voice\n";
486 print "user: $user\n";
489 undef $src;
490 undef $dest;
491 undef $voice;
492 undef $user;
493 undef %phrase;
496 if($update) {
497 my $e = $english{$idstr};
499 if($e) {
500 # compare original english with this!
501 my @eng = split("\n", $english{$idstr});
503 compare($idstr, \@eng, \@phrase);
505 $english{$idstr}=""; # clear it
507 else {
508 print "### $idstr: The phrase is not used. Skipped\n";
511 undef @phrase;
513 } # end of </phrase>
515 # starts with a slash, this _ends_ this section
516 $m = pop @m; # get back old value, the previous level's tag
517 next;
518 } # end of tag close
520 # This is an opening (sub) tag
522 push @m, $m; # store old value
523 $m = $part;
524 next;
527 if(/^ *([^:]+): *(.*)/) {
528 my ($name, $val)=($1, $2);
529 &$m($_, $name, $val);
532 close(LANG);
534 if($update) {
535 my $any=0;
536 for(keys %english) {
537 if($english{$_}) {
538 print "###\n",
539 "### This phrase below was not present in the translated file\n",
540 "<phrase>\n";
541 print $english{$_};
542 print "</phrase>\n";
547 if ($sortfile) {
548 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
549 print $allphrases{$_};
553 if($prefix) {
554 # We create a .c and .h file
556 open(HFILE, ">$prefix/lang.h") ||
557 die "Error: couldn't create file $prefix/lang.h\n";
558 open(CFILE, ">$prefix/lang_core.c") ||
559 die "Error: couldn't create file $prefix/lang_core.c\n";
561 # get header file name
562 $headername = "$prefix/lang.h";
563 $headername =~ s/(.*\/)*//;
565 print HFILE <<MOO
566 /* This file was automatically generated using genlang */
568 * The str() macro/functions is how to access strings that might be
569 * translated. Use it like str(MACRO) and expect a string to be
570 * returned!
572 #define str(x) language_strings[x]
574 /* this is the array for holding the string pointers.
575 It will be initialized at runtime. */
576 extern unsigned char *language_strings[];
577 /* this contains the concatenation of all strings, separated by \\0 chars */
578 extern const unsigned char language_builtin[];
580 /* The enum below contains all available strings */
581 enum \{
585 print CFILE <<MOO
586 /* This file was automaticly generated using genlang, the strings come
587 from "$input" */
589 #include "$headername"
591 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
592 const unsigned char language_builtin[] =
596 # Output the ID names for the enum in the header file
597 my $i;
598 for $i (1 .. $idcount) {
599 my $name=$idnum[$i - 1]; # get the ID name
601 $name =~ s/\"//g; # cut off the quotes
603 printf HFILE (" %s, /* %d */\n", $name, $i-1);
606 # Output separation marker for last string ID and the upcoming voice IDs
608 print HFILE <<MOO
609 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
610 /* --- below this follows voice-only strings --- */
611 VOICEONLY_DELIMITER = 0x8000,
615 # Output the ID names for the enum in the header file
616 for $i (0x8000 .. ($voiceid-1)) {
617 my $name=$idnum[$i]; # get the ID name
619 $name =~ s/\"//g; # cut off the quotes
621 printf HFILE (" %s,\n", $name);
624 # Output end of enum
625 print HFILE "\n};\n/* end of generated enum list */\n";
627 # Output the target phrases for the source file
628 for $i (1 .. $idcount) {
629 my $name=$idnum[$i - 1]; # get the ID
630 my $dest = $dest{$name}; # get the destination phrase
632 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
634 if(!$dest) {
635 # this is just to be on the safe side
636 $dest = '"\0"';
639 printf CFILE (" %s\n", $dest);
642 # Output end of string chunk
643 print CFILE <<MOO
645 /* end of generated string list */
649 close(HFILE);
650 close(CFILE);
651 } # end of the c/h file generation
652 elsif($binary) {
653 # Creation of a binary lang file was requested
655 # We must first scan the english file to get the correct order of the id
656 # numbers used there, as that is what sets the id order for all language
657 # files. The english file is scanned before the translated file was
658 # scanned.
660 open(OUTF, ">$binary") or die "Error: Can't create $binary";
661 binmode OUTF;
662 printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
664 # loop over the target phrases
665 for $i (1 .. $idcount) {
666 my $name=$idnum[$i - 1]; # get the ID
667 my $dest = $dest{$name}; # get the destination phrase
669 if($dest) {
670 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
672 # Now, make sure we get the number from the english sort order:
673 $idnum = $idmap{$name};
675 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
679 elsif($voiceout) {
680 # voice output requested, display id: and voice: strings in a v1-like
681 # fashion
683 my @engl;
685 # This loops over the strings in the translated language file order
686 my @ids = ((0 .. ($idcount-1)));
687 push @ids, (0x8000 .. ($voiceid-1));
689 #for my $id (@ids) {
690 # print "$id\n";
693 for $i (@ids) {
694 my $name=$idnum[$i]; # get the ID
695 my $dest = $voice{$name}; # get the destination voice string
697 if($dest) {
698 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
700 # Now, make sure we get the number from the english sort order:
701 $idnum = $idmap{$name};
703 if(length($idnum)) {
704 $engl[$idnum] = $i;
706 #print "Input index $i output index $idnum\n";
708 else {
709 # not used, mark it so
710 $engl[$i] = -1
715 for my $i (@ids) {
717 my $o = $engl[$i];
719 if(($o < 0) || !length($o)) {
720 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
721 next;
724 my $name=$idnum[$o]; # get the ID
725 my $dest = $voice{$name}; # get the destination voice string
727 print "#$i ($o)\nid: $name\nvoice: $dest\n";
733 if($verbose) {
734 printf("%d ID strings scanned\n", $idcount);
736 print "* head *\n";
737 for(keys %head) {
738 printf "$_: %s\n", $head{$_};