Remove tabs.
[kugel-rb.git] / tools / genlang
blobc8ccbd16d751c366889e340123cfb7fa84b58721
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # See apps/language.c (TODO: Use common include for both)
14 # Cookie and binary version for the binary lang file
15 my $LANGUAGE_COOKIE = 0x1a;
16 my $LANGUAGE_VERSION = 0x06;
17 my $LANGUAGE_FLAG_RTL = 0x01;
19 my $HEADER_SIZE = 4;
20 my $SUBHEADER_SIZE = 6;
22 # A note for future users and readers: The original v1 language system allowed
23 # the build to create and use a different language than english built-in. We
24 # removed that feature from our build-system, but the build scripts still had
25 # the ability. But, starting now, this ability is no longer provided since I
26 # figured it was boring and unnecessary to write support for now since we
27 # don't use it anymore.
30 if(!$ARGV[0]) {
31 print <<MOO
32 Usage: genlang [options] <langv2 file>
34 -p=<prefix>
35 Make the tool create a [prefix].c and [prefix].h file.
37 -b=<outfile>
38 Make the tool create a binary language (.lng) file named [outfile].
39 The use of this option requires that you also use -e, -t and -i.
42 Update language file. Given the translated file and the most recent english
43 file, you\'ll get an updated version sent to stdout. Suitable action to do
44 when you intend to update a translation.
46 -e=<english lang file>
47 Point out the english (original source) file, to use that as master
48 language template. Used in combination with -b, -u or -s.
51 Sort the Update language file in the same order as the strings in the
52 English file.
54 -t=<target>
55 Specify which target you want the translations/phrases for. Required when
56 -b or -p is used.
58 The target can in fact be specified as numerous different strings,
59 separated with colons. This will make genlang to use all the specified
60 strings when searching for a matching phrase.
62 -i=<target id>
63 The target id number, needed for -b.
66 Voice mode output. Outputs all id: and voice: lines for the given target!
69 Enables verbose (debug) output.
70 MOO
72 exit;
75 # How update works:
77 # 1) scan the english file, keep the whole <phrase> for each phrase.
78 # 2) read the translated file, for each end of phrase, compare:
79 # A) all source strings, if there's any change there should be a comment about
80 # it output
81 # B) the desc fields
83 # 3) output the phrase with the comments from above
84 # 4) check which phrases that the translated version didn't have, and spit out
85 # the english version of those
88 my $prefix = $p;
89 my $binary = $b;
90 my $update = $u;
91 my $sortfile = $s;
93 my $english = $e;
94 my $voiceout = $o;
96 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
98 if($check > 1) {
99 print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
100 exit;
102 if(!$check) {
103 print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
104 exit;
108 if(($binary || $update || $voiceout || $sortfile) && !$english) {
109 print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
110 exit;
113 my $target_id = $i;
114 if($binary && !$target_id) {
115 print STDERR "Please specify a target id number (with -i)!\n";
116 exit;
119 my $target = $t;
120 if(!$target && !$update && !$sortfile) {
121 print STDERR "Please specify a target (with -t)!\n";
122 exit;
124 my @target_parts = split ':', $target;
126 my $binpath = "";
127 if ($binary =~ m|(.*)/[^/]+|) {
128 $binpath = $1;
131 my $verbose=$v;
133 my %id; # string to num hash
134 my @idnum; # num to string array
136 my %allphrases; # For sorting - an array of the <phrase> elements
137 my %source; # id string to source phrase hash
138 my %dest; # id string to dest phrase hash
139 my %voice; # id string to voice phrase hash
141 my %users =
142 ('core' => 0);
144 my $input = $ARGV[0];
146 my @m;
147 my $m="blank";
149 sub trim {
150 my ($string) = @_;
151 $string =~ s/^\s+//;
152 $string =~ s/\s+$//;
153 return $string;
156 sub blank {
157 # nothing to do
160 my %head;
161 sub header {
162 my ($full, $n, $v)=@_;
163 $head{$n}=$v;
166 my %phrase;
167 sub phrase {
168 my ($full, $n, $v)=@_;
169 $phrase{$n}=$v;
172 my %options;
173 sub options {
174 my ($full, $n, $v)=@_;
175 $options{$n}=$v;
178 sub parsetarget {
179 my ($debug, $strref, $full, $n, $v)=@_;
180 my $string;
181 my @all= split(" *, *", $n);
182 my $test;
183 for $test (@all) {
184 $test =~ s/\*/.*/g;
185 $test =~ s/\?/./g;
187 # print "TEST ($debug) $target for $test\n";
188 for my $part (@target_parts) {
189 if($part =~ /^$test\z/) {
190 $string = $v;
191 # print "MATCH: $test => $v\n";
192 $$strref = $string;
193 return $string;
199 my $src;
200 sub source {
201 parsetarget("src", \$src, @_);
204 my $dest;
205 sub dest {
206 parsetarget("dest", \$dest, @_);
209 my $voice;
210 sub voice {
211 parsetarget("voice", \$voice, @_);
214 sub file_is_newer {
215 my ($file1, $file2) = @_;
217 my @s1 = stat $file1;
218 my @s2 = stat $file2;
220 return 1 if ($s1[9] > $s2[9]);
221 return 0;
224 my %idmap;
225 my %english;
226 if($english) {
227 readenglish();
230 sub readenglish {
231 # For the cases where the english file needs to be scanned/read, we do
232 # it before we read the translated file. For -b it isn't necessary, but for
233 # -u it is convenient.
235 my @idnum = ((0)); # start with a true number
236 my @vidnum = ((0x8000)); # first voice id
239 if ($binary and file_is_newer("$binpath/english.list", $english)) {
240 open(ENG, "<$binpath/english.list") ||
241 die "Error: can't open $binpath/english.list";
242 while (<ENG>) {
243 my ($user, $id, $value) = split ':', $_;
244 $idmap[$user]{$id} = $value;
245 $english{$id} = 1;
247 close ENG;
249 return;
252 open(ENG, "<$english") || die "Error: can't open $english";
253 my @phrase;
254 my $id;
255 my $maybeid;
256 my $user;
257 my $withindest;
258 my $numphrases = 0;
259 my $numusers = 1; # core is already in the users map
261 while(<ENG>) {
263 # get rid of DOS newlines
264 $_ =~ tr/\r//d;
266 if($_ =~ /^ *\<phrase\>/) {
267 # this is the start of a phrase
269 elsif($_ =~ /\<\/phrase\>/) {
271 # if id is something, when we count and store this phrase
272 if($id) {
273 # voice-only entries get a difference range
274 if($id =~ /^VOICE_/) {
275 # Assign an ID number to this entry
276 $idmap[$user]{$id}=$vidnum[$user];
277 $vidnum[$user]++;
279 else {
280 # Assign an ID number to this entry
281 $idmap[$user]{$id}=$idnum[$user];
282 $idnum[$user]++;
283 # print STDERR "DEST: bumped idnum to $idnum[$user]\n";
286 # this is the end of a phrase, add it to the english hash
287 $english{$id}=join("", @phrase);
289 undef @phrase;
290 $id="";
292 elsif($_ ne "\n") {
293 # gather everything related to this phrase
294 push @phrase, $_;
295 if($_ =~ /^ *\<dest\>/i) {
296 $withindest=1;
297 $deststr="";
299 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
300 $withindest=0;
302 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
303 # we unconditionally always use all IDs when the "update"
304 # feature is used
305 $id = $maybeid;
306 # print "DEST: use this id $id\n";
308 else {
309 # print "skip $maybeid for $name\n";
312 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
313 my ($name, $val)=($1, $2);
314 $dest=""; # in case it is left untouched for when the
315 # model name isn't "our"
316 dest($_, $name, $val);
318 if($dest) {
319 # Store the current dest string. If this target matches
320 # multiple strings, it will get updated several times.
321 $deststr = $dest;
326 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
327 $maybeid=$1;
328 $sortorder{$maybeid}=$numphrases++;
330 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
331 $user = $users{$1};
332 if(!(defined $user)) {
333 $user = ++$numusers;
334 $users{$1} = $user;
338 close(ENG);
341 # a function that compares the english phrase with the translated one.
342 # compare source strings and desc
344 # Then output the updated version!
345 sub compare {
346 my ($idstr, $engref, $locref)=@_;
347 my ($edesc, $ldesc);
348 my ($esource, $lsource);
349 my $mode=0;
351 for my $l (@$engref) {
352 if($l =~ /^ *#/) {
353 # comment
354 next;
356 if($l =~ /^ *desc: (.*)/) {
357 $edesc=$1;
359 elsif($l =~ / *\<source\>/i) {
360 $mode=1;
362 elsif($mode) {
363 if($l =~ / *\<\/source\>/i) {
364 last;
366 $esource .= "$l\n";
370 my @show;
371 my @source;
373 $mode = 0;
374 for my $l (@$locref) {
375 if($l =~ /^ *desc: (.*)/) {
376 $ldesc=$1;
377 if(trim($edesc) ne trim($ldesc)) {
378 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
380 push @show, $l;
382 elsif($l =~ / *\<source\>/i) {
383 $mode=1;
384 push @show, $l;
386 elsif($mode) {
387 if($l =~ / *\<\/source\>/i) {
388 $mode = 0;
389 print @show;
390 if(trim($esource) ne trim($lsource)) {
391 print "### The <source> section differs from the english!\n",
392 "### the previously used one is commented below:\n";
393 for(split("\n", $lsource)) {
394 print "### $_\n";
396 print $esource;
398 else {
399 print $lsource;
401 undef @show; # start over
403 push @show, $l;
405 else {
406 $lsource .= "$l";
409 else {
410 push @show, $l;
415 print @show;
418 my @idcount; # counter for lang ID numbers
419 my @voiceid; # counter for voice-only ID numbers
421 for (keys %users) {
422 push @idcount, 0;
423 push @voiceid, 0x8001;
427 # Now start the scanning of the selected language string
430 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
431 my @phrase;
432 my $header = 1;
433 my $langoptions = 0;
435 while(<LANG>) {
437 $line++;
439 # get rid of DOS newlines
440 $_ =~ tr/\r//d;
442 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
443 # comment or empty line - output it if it's part of the header
444 if ($header and ($update || $sortfile)) {
445 print($_);
447 next;
449 $header = 0;
451 my $ll = $_;
453 # print "M: $m\n";
455 push @phrase, $ll;
457 # this is an XML-lookalike tag
458 if (/^(<|[^\"<]+<)([^>]*)>/) {
459 my $part = $2;
460 # print "P: $part\n";
462 if($part =~ /^\//) {
463 # this was a closing tag
465 if($part eq "/phrase") {
466 # closing the phrase
468 my $idstr = $phrase{'id'};
469 my $idnum;
471 if($binary && !$english{$idstr}) {
472 # $idstr doesn't exist for english, skip it\n";
474 elsif($dest =~ /^none\z/i) {
475 # "none" as dest (without quotes) means that this entire
476 # phrase is to be ignored
478 elsif($sortfile) {
479 $allphrases{$idstr}=join('',@phrase);
481 elsif(!$update) {
482 # we don't do the fully detailed analysis when we "update"
483 # since we don't do it for a particular target etc
485 # allow the keyword 'deprecated' to be used on dest and
486 # voice strings to mark that as deprecated. It will then
487 # be replaced with "".
489 $dest =~ s/^deprecate(|d)\z/\"\"/i;
490 $voice =~ s/^deprecate(|d)\z/\"\"/i;
492 # basic syntax error alerts, if there are no quotes we
493 # will assume an empty string was intended
494 if($dest !~ /^\"/) {
495 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
496 $dest='""';
498 if($src !~ /^\"/) {
499 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
500 $src='""';
502 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
503 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
504 $voice='""';
506 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
507 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
510 my $userstr = trim($phrase{'user'});
511 my $user = $users{$userstr};
512 if ($userstr eq "") {
513 print STDERR "$input:$line:1: warning: missing user!\n";
514 $user = $users{"core"};
516 elsif(!(defined $user)) {
517 if($english) {
518 print STDERR "$input:$line:1: warning: user was not found in $english!\n";
519 $user = keys %users; # set to an invalid user so it won't be added
521 else {
522 # we found a new user, add it to the usermap
523 $user = ++$numusers;
524 $users{$userstr} = $user;
528 # Use the ID name to figure out which id number range we
529 # should use for this phrase. Voice-only strings are
530 # separated.
532 if($idstr =~ /^VOICE/) {
533 $idnum = $voiceid[$user]++;
535 else {
536 $idnum = $idcount[$user]++;
539 $id{$idstr} = $idnum;
540 $idnum[$user][$idnum]=$idstr;
542 $source{$idstr}=$src;
543 $dest{$idstr}=$dest;
544 $voice{$idstr}=$voice;
546 if($verbose) {
547 print "id: $phrase{id} ($idnum)\n";
548 print "source: $src\n";
549 print "dest: $dest\n";
550 print "voice: $voice\n";
551 print "user: $user\n";
554 undef $src;
555 undef $dest;
556 undef $voice;
557 undef $user;
558 undef %phrase;
561 if($update) {
562 my $e = $english{$idstr};
564 if($e) {
565 # compare original english with this!
566 my @eng = split("\n", $english{$idstr});
568 compare($idstr, \@eng, \@phrase);
570 $english{$idstr}=""; # clear it
572 else {
573 print "### $idstr: The phrase is not used. Skipped\n";
576 undef @phrase;
577 } # end of </phrase>
578 elsif($part eq "/options") {
579 # closing the options
580 if ($options{'rtl'}) {
581 $langoptions |= $LANGUAGE_FLAG_RTL;
583 } # end of </options>
585 # starts with a slash, this _ends_ this section
586 $m = pop @m; # get back old value, the previous level's tag
587 next;
588 } # end of tag close
590 # This is an opening (sub) tag
592 push @m, $m; # store old value
593 $m = $part;
594 next;
597 if(/^ *([^:]+): *(.*)/) {
598 my ($name, $val)=($1, $2);
599 &$m($_, $name, $val);
602 close(LANG);
604 if($update) {
605 my $any=0;
606 for(keys %english) {
607 if($english{$_}) {
608 print "###\n",
609 "### This phrase below was not present in the translated file\n",
610 "<phrase>\n";
611 print $english{$_};
612 print "</phrase>\n";
617 if ($sortfile) {
618 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
619 print $allphrases{$_};
623 if($prefix) {
624 # We create a .c and .h file
626 open(HFILE_CORE, ">$prefix/lang.h") ||
627 die "Error: couldn't create file $prefix/lang.h\n";
628 open(CFILE_CORE, ">$prefix/lang_core.c") ||
629 die "Error: couldn't create file $prefix/lang_core.c\n";
631 # get header file name
632 $headername = "$prefix/lang.h";
633 $headername =~ s/(.*\/)*//;
635 print HFILE_CORE <<MOO
636 /* This file was automatically generated using genlang */
638 * The str() macro/functions is how to access strings that might be
639 * translated. Use it like str(MACRO) and expect a string to be
640 * returned!
642 #define str(x) language_strings[x]
644 /* this is the array for holding the string pointers.
645 It will be initialized at runtime. */
646 extern unsigned char *language_strings[];
647 /* this contains the concatenation of all strings, separated by \\0 chars */
648 extern const unsigned char core_language_builtin[];
650 /* The enum below contains all available strings */
651 enum \{
655 print CFILE_CORE <<MOO
656 /* This file was automatically generated using genlang, the strings come
657 from "$input" */
659 #include "$headername"
661 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
662 const unsigned char core_language_builtin[] =
666 # Output the ID names for the enum in the header file
667 my $i;
668 for $i (0 .. $idcount[$users{"core"}]-1) {
669 my $name=$idnum[$users{"core"}][$i]; # get the ID name
671 $name =~ tr/\"//d; # cut off the quotes
673 printf HFILE_CORE (" %s, /* %d */\n", $name, $i);
676 # Output separation marker for last string ID and the upcoming voice IDs
678 print HFILE_CORE <<MOO
679 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
680 /* --- below this follows voice-only strings --- */
681 VOICEONLY_DELIMITER = 0x8000,
685 # Output the ID names for the enum in the header file
686 for $i (0x8001 .. ($voiceid[$users{"core"}]-1)) {
687 my $name=$idnum[$users{"core"}][$i]; # get the ID name
689 $name =~ tr/\"//d; # cut off the quotes
691 printf HFILE_CORE (" %s, /* 0x%x */\n", $name, $i);
694 # Output end of enum
695 print HFILE_CORE "\n};\n/* end of generated enum list */\n";
697 # Output the target phrases for the source file
698 for $i (0 .. $idcount[$users{"core"}]-1) {
699 my $name=$idnum[$users{"core"}][$i]; # get the ID
700 my $dest = $dest{$name}; # get the destination phrase
702 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
704 if(!$dest) {
705 # this is just to be on the safe side
706 $dest = '"\0"';
709 printf CFILE_CORE (" %s\n", $dest);
712 # Output end of string chunk
713 print CFILE_CORE <<MOO
715 /* end of generated string list */
719 close(HFILE_CORE);
720 close(CFILE_CORE);
721 } # end of the c/h file generation
722 elsif($binary) {
723 # Creation of a binary lang file was requested
725 # We must first scan the english file to get the correct order of the id
726 # numbers used there, as that is what sets the id order for all language
727 # files. The english file is scanned before the translated file was
728 # scanned.
730 open(OUTF, ">$binary") or die "Error: Can't create $binary";
731 binmode OUTF;
732 printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
733 $langoptions); # magic lang file header
735 # output the number of strings for each user
736 my $foffset = $HEADER_SIZE + $SUBHEADER_SIZE * keys(%users);
737 for (keys %users) {
738 my $size;
739 for $n (0 .. $idcount[$_]-1) {
740 $size += length(trim($dest{$idnum[$_][$n]})) + 1;
742 printf OUTF ("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff),
743 ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff));
744 $foffset += $size;
747 for (keys %users) {
748 # loop over the target phrases
749 for $n (0 .. $idcount[$_]-1) {
750 my $name=$idnum[$_][$n]; # get the ID
751 my $dest = $dest{$name}; # get the destination phrase
753 if($dest) {
754 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
756 # Now, make sure we get the number from the english sort order:
757 $idnum = $idmap[$_]{$name};
759 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
764 elsif($voiceout) {
765 # voice output requested, display id: and voice: strings in a v1-like
766 # fashion
768 my @engl;
770 # This loops over the strings in the translated language file order
771 my @ids = ((0 .. ($idcount[$users{"core"}]-1)));
772 push @ids, (0x8000 .. ($voiceid[$users{"core"}]-1));
774 #for my $id (@ids) {
775 # print "$id\n";
778 for $i (@ids) {
779 my $name=$idnum[$users{"core"}][$i]; # get the ID
780 my $dest = $voice{$name}; # get the destination voice string
782 if($dest) {
783 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
785 # Now, make sure we get the number from the english sort order:
786 $idnum = $idmap[$users{"core"}]{$name};
788 if(length($idnum)) {
789 $engl[$idnum] = $i;
791 #print "Input index $i output index $idnum\n";
793 else {
794 # not used, mark it so
795 $engl[$i] = -1
800 for my $i (@ids) {
802 my $o = $engl[$i];
804 if(($o < 0) || !length($o)) {
805 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
806 next;
809 my $name=$idnum[$users{"core"}][$o]; # get the ID
810 my $dest = $voice{$name}; # get the destination voice string
812 print "#$i ($o)\nid: $name\nvoice: $dest\n";
818 if($verbose) {
819 my $num_str = 0;
821 for (keys %users) {
822 $num_str += $idcount[$_];
825 printf("%d ID strings scanned\n", $num_str);
827 print "* head *\n";
828 for(keys %head) {
829 printf "$_: %s\n", $head{$_};
833 if ($binary and !-r "$binpath/english.list") {
834 open(ENGLIST, ">$binpath/english.list") ||
835 die "Failed creating $binpath/english.list";
836 for my $user (keys %users) {
837 for my $id (keys %{$idmap[$user]}) {
838 print ENGLIST "$user:$id:$idmap[$user]{$id}\n";
841 close ENGLIST;