Make the greylib text output functions handle unicode. Fixes non-working umlauts...
[kugel-rb.git] / tools / genlang
blobf4eb698a8faa158ab4d7e846beaa7a4a9141a633
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 4; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file named [outfile].
32 The use of this option requires that you also use -e, -t and -i.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b, -u or -s.
44 Sort the Update language file in the same order as the strings in the
45 English file.
47 -t=<target>
48 Specify which target you want the translations/phrases for. Required when
49 -b or -p is used.
51 The target can in fact be specified as numerous different strings,
52 separated with colons. This will make genlang to use all the specified
53 strings when searching for a matching phrase.
55 -i=<target id>
56 The target id number, needed for -b.
59 Voice mode output. Outputs all id: and voice: lines for the given target!
62 Enables verbose (debug) output.
63 MOO
65 exit;
68 # How update works:
70 # 1) scan the english file, keep the whole <phrase> for each phrase.
71 # 2) read the translated file, for each end of phrase, compare:
72 # A) all source strings, if there's any change there should be a comment about
73 # it output
74 # B) the desc fields
76 # 3) output the phrase with the comments from above
77 # 4) check which phrases that the translated version didn't have, and spit out
78 # the english version of those
81 my $prefix = $p;
82 my $binary = $b;
83 my $update = $u;
84 my $sortfile = $s;
86 my $english = $e;
87 my $voiceout = $o;
89 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
91 if($check > 1) {
92 print "Please use only one of -p, -u, -o, -b and -s\n";
93 exit;
95 if(!$check) {
96 print "Please use at least one of -p, -u, -o, -b and -s\n";
97 exit;
101 if(($binary || $update || $voiceout || $sortfile) && !$english) {
102 print "Please use -e too when you use -b, -o, -u or -s\n";
103 exit;
106 my $target_id = $i;
107 if($binary && !$target_id) {
108 print "Please specify a target id number (with -i)!\n";
109 exit;
112 my $target = $t;
113 if(!$target && !$update && !$sortfile) {
114 print "Please specify a target (with -t)!\n";
115 exit;
117 my $verbose=$v;
119 my %id; # string to num hash
120 my @idnum; # num to string array
122 my %allphrases; # For sorting - an array of the <phrase> elements
123 my %source; # id string to source phrase hash
124 my %dest; # id string to dest phrase hash
125 my %voice; # id string to voice phrase hash
127 my $input = $ARGV[0];
129 my @m;
130 my $m="blank";
132 sub trim {
133 my ($string) = @_;
134 $string =~ s/^\s+//;
135 $string =~ s/\s+$//;
136 return $string;
139 sub match {
140 my ($string, $pattern)=@_;
142 $pattern =~ s/\*/.*/g;
143 $pattern =~ s/\?/./g;
145 return ($string =~ /^$pattern\z/);
148 sub blank {
149 # nothing to do
152 my %head;
153 sub header {
154 my ($full, $n, $v)=@_;
155 $head{$n}=$v;
158 my %phrase;
159 sub phrase {
160 my ($full, $n, $v)=@_;
161 $phrase{$n}=$v;
164 sub parsetarget {
165 my ($debug, $strref, $full, $n, $v)=@_;
166 my $string;
167 my @all= split(" *, *", $n);
168 my $test;
169 for $test (@all) {
170 # print "TEST ($debug) $target for $test\n";
171 for my $part (split(":", $target)) {
172 if(match($part, $test)) {
173 $string = $v;
174 # print "MATCH: $test => $v\n";
175 $$strref = $string;
176 return $string;
182 my $src;
183 sub source {
184 parsetarget("src", \$src, @_);
187 my $dest;
188 sub dest {
189 parsetarget("dest", \$dest, @_);
192 my $voice;
193 sub voice {
194 parsetarget("voice", \$voice, @_);
197 my %idmap;
198 my %english;
199 if($english) {
200 # For the cases where the english file needs to be scanned/read, we do
201 # it before we read the translated file. For -b it isn't necessary, but for
202 # -u it is convenient.
204 my $idnum=0; # start with a true number
205 my $vidnum=0x8000; # first voice id
206 open(ENG, "<$english") || die "Error: can't open $english";
207 my @phrase;
208 my $id;
209 my $maybeid;
210 my $withindest;
211 my $numphrases = 0;
212 while(<ENG>) {
214 # get rid of DOS newlines
215 $_ =~ s/\r//g;
217 if($_ =~ /^ *\<phrase\>/) {
218 # this is the start of a phrase
220 elsif($_ =~ /^ *\<\/phrase\>/) {
222 # if id is something, when we count and store this phrase
223 if($id) {
224 # voice-only entries get a difference range
225 if($id =~ /^VOICE_/) {
226 # Assign an ID number to this entry
227 $idmap{$id}=$vidnum;
228 $vidnum++;
230 else {
231 # Assign an ID number to this entry
232 $idmap{$id}=$idnum;
233 $idnum++;
234 # print STDERR "DEST: bumped idnum to $idnum\n";
237 # this is the end of a phrase, add it to the english hash
238 $english{$id}=join("", @phrase);
240 undef @phrase;
241 $id="";
243 elsif($_ ne "\n") {
244 # gather everything related to this phrase
245 push @phrase, $_;
246 if($_ =~ /^ *\<dest\>/i) {
247 $withindest=1;
248 $deststr="";
250 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
251 $withindest=0;
253 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
254 # we unconditionally always use all IDs when the "update"
255 # feature is used
256 $id = $maybeid;
257 # print "DEST: use this id $id\n";
259 else {
260 # print "skip $maybeid for $name\n";
263 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
264 my ($name, $val)=($1, $2);
265 $dest=""; # in case it is left untouched for when the
266 # model name isn't "our"
267 dest($_, $name, $val);
269 if($dest) {
270 # Store the current dest string. If this target matches
271 # multiple strings, it will get updated several times.
272 $deststr = $dest;
277 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
278 $maybeid=$1;
279 $sortorder{$maybeid}=$numphrases++;
282 close(ENG);
285 # a function that compares the english phrase with the translated one.
286 # compare source strings and desc
288 # Then output the updated version!
289 sub compare {
290 my ($idstr, $engref, $locref)=@_;
291 my ($edesc, $ldesc);
292 my ($esource, $lsource);
293 my $mode=0;
295 for my $l (@$engref) {
296 if($l =~ /^ *#/) {
297 # comment
298 next;
300 if($l =~ /^ *desc: (.*)/) {
301 $edesc=$1;
303 elsif($l =~ / *\<source\>/i) {
304 $mode=1;
306 elsif($mode) {
307 if($l =~ / *\<\/source\>/i) {
308 last;
310 $esource .= "$l\n";
314 my @show;
315 my @source;
317 $mode = 0;
318 for my $l (@$locref) {
319 if($l =~ /^ *desc: (.*)/) {
320 $ldesc=$1;
321 if(trim($edesc) ne trim($ldesc)) {
322 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
324 push @show, $l;
326 elsif($l =~ / *\<source\>/i) {
327 $mode=1;
328 push @show, $l;
330 elsif($mode) {
331 if($l =~ / *\<\/source\>/i) {
332 $mode = 0;
333 print @show;
334 if(trim($esource) ne trim($lsource)) {
335 print "### The <source> section differs from the english!\n",
336 "### the previously used one is commented below:\n";
337 for(split("\n", $lsource)) {
338 print "### $_\n";
340 print $esource;
342 else {
343 print $lsource;
345 undef @show; # start over
347 push @show, $l;
349 else {
350 $lsource .= "$l";
353 else {
354 push @show, $l;
359 print @show;
362 my $idcount; # counter for lang ID numbers
363 my $voiceid=0x8000; # counter for voice-only ID numbers
366 # Now start the scanning of the selected language string
369 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
370 my @phrase;
371 my $header = 1;
372 while(<LANG>) {
374 $line++;
376 # get rid of DOS newlines
377 $_ =~ s/\r//g;
379 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
380 # comment or empty line - output it if it's part of the header
381 if ($header and ($update || $sortfile)) {
382 print($_);
384 next;
386 $header = 0;
388 my $ll = $_;
390 # print "M: $m\n";
392 push @phrase, $ll;
394 # this is an XML-lookalike tag
395 if (/^(<|[^\"<]+<)([^>]*)>/) {
396 my $part = $2;
397 # print "P: $part\n";
399 if($part =~ /^\//) {
400 # this was a closing tag
402 if($part eq "/phrase") {
403 # closing the phrase
405 my $idstr = $phrase{'id'};
406 my $idnum;
408 if($binary && !$english{$idstr}) {
409 # $idstr doesn't exist for english, skip it\n";
411 elsif($dest =~ /^none\z/i) {
412 # "none" as dest (without quotes) means that this entire
413 # phrase is to be ignored
415 elsif($sortfile) {
416 $allphrases{$idstr}=join('',@phrase);
418 elsif(!$update) {
419 # we don't do the fully detailed analysis when we "update"
420 # since we don't do it for a particular target etc
422 # allow the keyword 'deprecated' to be used on dest and
423 # voice strings to mark that as deprecated. It will then
424 # be replaced with "".
426 $dest =~ s/^deprecate(|d)\z/\"\"/i;
427 $voice =~ s/^deprecate(|d)\z/\"\"/i;
429 # basic syntax error alerts, if there are no quotes we
430 # will assume an empty string was intended
431 if($dest !~ /^\"/) {
432 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
433 $dest='""';
435 if($src !~ /^\"/) {
436 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
437 $src='""';
439 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
440 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
441 $voice='""';
443 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
444 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
447 # Use the ID name to figure out which id number range we
448 # should use for this phrase. Voice-only strings are
449 # separated.
451 if($idstr =~ /^VOICE/) {
452 $idnum = $voiceid++;
454 else {
455 $idnum = $idcount++;
458 $id{$idstr} = $idnum;
459 $idnum[$idnum]=$idstr;
461 $source{$idstr}=$src;
462 $dest{$idstr}=$dest;
463 $voice{$idstr}=$voice;
465 if($verbose) {
466 print "id: $phrase{id} ($idnum)\n";
467 print "source: $src\n";
468 print "dest: $dest\n";
469 print "voice: $voice\n";
472 undef $src;
473 undef $dest;
474 undef $voice;
475 undef %phrase;
478 if($update) {
479 my $e = $english{$idstr};
481 if($e) {
482 # compare original english with this!
483 my @eng = split("\n", $english{$idstr});
485 compare($idstr, \@eng, \@phrase);
487 $english{$idstr}=""; # clear it
489 else {
490 print "### $idstr: The phrase is not used. Skipped\n";
493 undef @phrase;
495 } # end of </phrase>
497 # starts with a slash, this _ends_ this section
498 $m = pop @m; # get back old value, the previous level's tag
499 next;
500 } # end of tag close
502 # This is an opening (sub) tag
504 push @m, $m; # store old value
505 $m = $part;
506 next;
509 if(/^ *([^:]+): *(.*)/) {
510 my ($name, $val)=($1, $2);
511 &$m($_, $name, $val);
514 close(LANG);
516 if($update) {
517 my $any=0;
518 for(keys %english) {
519 if($english{$_}) {
520 print "###\n",
521 "### This phrase below was not present in the translated file\n",
522 "<phrase>\n";
523 print $english{$_};
524 print "</phrase>\n";
529 if ($sortfile) {
530 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
531 print $allphrases{$_};
535 if($prefix) {
536 # We create a .c and .h file
538 open(HFILE, ">$prefix.h") ||
539 die "Error: couldn't create file $prefix.h\n";
540 open(CFILE, ">$prefix.c") ||
541 die "Error: couldn't create file $prefix.c\n";
543 print HFILE <<MOO
544 /* This file was automatically generated using genlang */
546 * The str() macro/functions is how to access strings that might be
547 * translated. Use it like str(MACRO) and expect a string to be
548 * returned!
550 #define str(x) language_strings[x]
552 /* this is the array for holding the string pointers.
553 It will be initialized at runtime. */
554 extern unsigned char *language_strings[];
555 /* this contains the concatenation of all strings, separated by \\0 chars */
556 extern const unsigned char language_builtin[];
558 /* The enum below contains all available strings */
559 enum \{
563 print CFILE <<MOO
564 /* This file was automaticly generated using genlang, the strings come
565 from "$input" */
567 #include "$prefix.h"
569 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
570 const unsigned char language_builtin[] =
574 # Output the ID names for the enum in the header file
575 my $i;
576 for $i (1 .. $idcount) {
577 my $name=$idnum[$i - 1]; # get the ID name
579 $name =~ s/\"//g; # cut off the quotes
581 printf HFILE (" %s, /* %d */\n", $name, $i-1);
584 # Output separation marker for last string ID and the upcoming voice IDs
586 print HFILE <<MOO
587 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
588 /* --- below this follows voice-only strings --- */
589 VOICEONLY_DELIMITER = 0x8000,
593 # Output the ID names for the enum in the header file
594 for $i (0x8000 .. ($voiceid-1)) {
595 my $name=$idnum[$i]; # get the ID name
597 $name =~ s/\"//g; # cut off the quotes
599 printf HFILE (" %s,\n", $name);
602 # Output end of enum
603 print HFILE "\n};\n/* end of generated enum list */\n";
605 # Output the target phrases for the source file
606 for $i (1 .. $idcount) {
607 my $name=$idnum[$i - 1]; # get the ID
608 my $dest = $dest{$name}; # get the destination phrase
610 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
612 if(!$dest) {
613 # this is just to be on the safe side
614 $dest = '"\0"';
617 printf CFILE (" %s\n", $dest);
620 # Output end of string chunk
621 print CFILE <<MOO
623 /* end of generated string list */
627 close(HFILE);
628 close(CFILE);
629 } # end of the c/h file generation
630 elsif($binary) {
631 # Creation of a binary lang file was requested
633 # We must first scan the english file to get the correct order of the id
634 # numbers used there, as that is what sets the id order for all language
635 # files. The english file is scanned before the translated file was
636 # scanned.
638 open(OUTF, ">$binary") or die "Error: Can't create $binary";
639 binmode OUTF;
640 printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
642 # loop over the target phrases
643 for $i (1 .. $idcount) {
644 my $name=$idnum[$i - 1]; # get the ID
645 my $dest = $dest{$name}; # get the destination phrase
647 if($dest) {
648 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
650 # Now, make sure we get the number from the english sort order:
651 $idnum = $idmap{$name};
653 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
657 elsif($voiceout) {
658 # voice output requested, display id: and voice: strings in a v1-like
659 # fashion
661 my @engl;
663 # This loops over the strings in the translated language file order
664 my @ids = ((0 .. ($idcount-1)));
665 push @ids, (0x8000 .. ($voiceid-1));
667 #for my $id (@ids) {
668 # print "$id\n";
671 for $i (@ids) {
672 my $name=$idnum[$i]; # get the ID
673 my $dest = $voice{$name}; # get the destination voice string
675 if($dest) {
676 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
678 # Now, make sure we get the number from the english sort order:
679 $idnum = $idmap{$name};
681 if(length($idnum)) {
682 $engl[$idnum] = $i;
684 #print "Input index $i output index $idnum\n";
686 else {
687 # not used, mark it so
688 $engl[$i] = -1
693 for my $i (@ids) {
695 my $o = $engl[$i];
697 if(($o < 0) || !length($o)) {
698 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
699 next;
702 my $name=$idnum[$o]; # get the ID
703 my $dest = $voice{$name}; # get the destination voice string
705 print "#$i ($o)\nid: $name\nvoice: $dest\n";
711 if($verbose) {
712 printf("%d ID strings scanned\n", $idcount);
714 print "* head *\n";
715 for(keys %head) {
716 printf "$_: %s\n", $head{$_};