Add a center flag, next to the rtl flag, for viewports. That results in any text...
[kugel-rb.git] / tools / genlang
blob44dfcc6fdfe236b39d07954341ffe22199c32c3f
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # See apps/language.c (TODO: Use common include for both)
14 # Cookie and binary version for the binary lang file
15 my $LANGUAGE_COOKIE = 0x1a;
16 my $LANGUAGE_VERSION = 0x05;
17 my $LANGUAGE_FLAG_RTL = 0x01;
19 # A note for future users and readers: The original v1 language system allowed
20 # the build to create and use a different language than english built-in. We
21 # removed that feature from our build-system, but the build scripts still had
22 # the ability. But, starting now, this ability is no longer provided since I
23 # figured it was boring and unnecessary to write support for now since we
24 # don't use it anymore.
26 my %user2num =
27 ('core' => 1);
30 if(!$ARGV[0]) {
31 print <<MOO
32 Usage: genlang [options] <langv2 file>
34 -p=<prefix>
35 Make the tool create a [prefix].c and [prefix].h file.
37 -b=<outfile>
38 Make the tool create a binary language (.lng) file named [outfile].
39 The use of this option requires that you also use -e, -t and -i.
42 Update language file. Given the translated file and the most recent english
43 file, you\'ll get an updated version sent to stdout. Suitable action to do
44 when you intend to update a translation.
46 -e=<english lang file>
47 Point out the english (original source) file, to use that as master
48 language template. Used in combination with -b, -u or -s.
51 Sort the Update language file in the same order as the strings in the
52 English file.
54 -t=<target>
55 Specify which target you want the translations/phrases for. Required when
56 -b or -p is used.
58 The target can in fact be specified as numerous different strings,
59 separated with colons. This will make genlang to use all the specified
60 strings when searching for a matching phrase.
62 -i=<target id>
63 The target id number, needed for -b.
66 Voice mode output. Outputs all id: and voice: lines for the given target!
69 Enables verbose (debug) output.
70 MOO
72 exit;
75 # How update works:
77 # 1) scan the english file, keep the whole <phrase> for each phrase.
78 # 2) read the translated file, for each end of phrase, compare:
79 # A) all source strings, if there's any change there should be a comment about
80 # it output
81 # B) the desc fields
83 # 3) output the phrase with the comments from above
84 # 4) check which phrases that the translated version didn't have, and spit out
85 # the english version of those
88 my $prefix = $p;
89 my $binary = $b;
90 my $update = $u;
91 my $sortfile = $s;
93 my $english = $e;
94 my $voiceout = $o;
96 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
98 if($check > 1) {
99 print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
100 exit;
102 if(!$check) {
103 print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
104 exit;
108 if(($binary || $update || $voiceout || $sortfile) && !$english) {
109 print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
110 exit;
113 my $target_id = $i;
114 if($binary && !$target_id) {
115 print STDERR "Please specify a target id number (with -i)!\n";
116 exit;
119 my $target = $t;
120 if(!$target && !$update && !$sortfile) {
121 print STDERR "Please specify a target (with -t)!\n";
122 exit;
124 my $verbose=$v;
126 my %id; # string to num hash
127 my @idnum; # num to string array
129 my %allphrases; # For sorting - an array of the <phrase> elements
130 my %source; # id string to source phrase hash
131 my %dest; # id string to dest phrase hash
132 my %voice; # id string to voice phrase hash
134 my $input = $ARGV[0];
136 my @m;
137 my $m="blank";
139 sub trim {
140 my ($string) = @_;
141 $string =~ s/^\s+//;
142 $string =~ s/\s+$//;
143 return $string;
146 sub match {
147 my ($string, $pattern)=@_;
149 $pattern =~ s/\*/.*/g;
150 $pattern =~ s/\?/./g;
152 return ($string =~ /^$pattern\z/);
155 sub blank {
156 # nothing to do
159 my %head;
160 sub header {
161 my ($full, $n, $v)=@_;
162 $head{$n}=$v;
165 my %phrase;
166 sub phrase {
167 my ($full, $n, $v)=@_;
168 $phrase{$n}=$v;
171 my %options;
172 sub options {
173 my ($full, $n, $v)=@_;
174 $options{$n}=$v;
177 sub parsetarget {
178 my ($debug, $strref, $full, $n, $v)=@_;
179 my $string;
180 my @all= split(" *, *", $n);
181 my $test;
182 for $test (@all) {
183 # print "TEST ($debug) $target for $test\n";
184 for my $part (split(":", $target)) {
185 if(match($part, $test)) {
186 $string = $v;
187 # print "MATCH: $test => $v\n";
188 $$strref = $string;
189 return $string;
195 my $src;
196 sub source {
197 parsetarget("src", \$src, @_);
200 my $dest;
201 sub dest {
202 parsetarget("dest", \$dest, @_);
205 my $voice;
206 sub voice {
207 parsetarget("voice", \$voice, @_);
210 my %idmap;
211 my %english;
212 if($english) {
213 # For the cases where the english file needs to be scanned/read, we do
214 # it before we read the translated file. For -b it isn't necessary, but for
215 # -u it is convenient.
217 my $idnum=0; # start with a true number
218 my $vidnum=0x8000; # first voice id
219 open(ENG, "<$english") || die "Error: can't open $english";
220 my @phrase;
221 my $id;
222 my $maybeid;
223 my $user;
224 my $maybeuser;
225 my $withindest;
226 my $numphrases = 0;
227 while(<ENG>) {
229 # get rid of DOS newlines
230 $_ =~ s/\r//g;
232 if($_ =~ /^ *\<phrase\>/) {
233 # this is the start of a phrase
235 elsif($_ =~ /^ *\<\/phrase\>/) {
237 # if id is something, when we count and store this phrase
238 if($id) {
239 # voice-only entries get a difference range
240 if($id =~ /^VOICE_/) {
241 # Assign an ID number to this entry
242 $idmap{$id}=$vidnum;
243 $vidnum++;
245 else {
246 # Assign an ID number to this entry
247 $idmap{$id}=$idnum;
248 $idnum++;
249 # print STDERR "DEST: bumped idnum to $idnum\n";
252 # this is the end of a phrase, add it to the english hash
253 $english{$id}=join("", @phrase);
255 undef @phrase;
256 $id="";
258 elsif($_ ne "\n") {
259 # gather everything related to this phrase
260 push @phrase, $_;
261 if($_ =~ /^ *\<dest\>/i) {
262 $withindest=1;
263 $deststr="";
265 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
266 $withindest=0;
268 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
269 # we unconditionally always use all IDs when the "update"
270 # feature is used
271 $id = $maybeid;
272 $user = $user2num{$maybeuser};
273 # print "DEST: use this id $id\n";
275 else {
276 # print "skip $maybeid for $name\n";
279 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
280 my ($name, $val)=($1, $2);
281 $dest=""; # in case it is left untouched for when the
282 # model name isn't "our"
283 dest($_, $name, $val);
285 if($dest) {
286 # Store the current dest string. If this target matches
287 # multiple strings, it will get updated several times.
288 $deststr = $dest;
293 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
294 $maybeid=$1;
295 $sortorder{$maybeid}=$numphrases++;
297 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
298 $maybeuser=$1;
301 close(ENG);
304 # a function that compares the english phrase with the translated one.
305 # compare source strings and desc
307 # Then output the updated version!
308 sub compare {
309 my ($idstr, $engref, $locref)=@_;
310 my ($edesc, $ldesc);
311 my ($esource, $lsource);
312 my $mode=0;
314 for my $l (@$engref) {
315 if($l =~ /^ *#/) {
316 # comment
317 next;
319 if($l =~ /^ *desc: (.*)/) {
320 $edesc=$1;
322 elsif($l =~ / *\<source\>/i) {
323 $mode=1;
325 elsif($mode) {
326 if($l =~ / *\<\/source\>/i) {
327 last;
329 $esource .= "$l\n";
333 my @show;
334 my @source;
336 $mode = 0;
337 for my $l (@$locref) {
338 if($l =~ /^ *desc: (.*)/) {
339 $ldesc=$1;
340 if(trim($edesc) ne trim($ldesc)) {
341 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
343 push @show, $l;
345 elsif($l =~ / *\<source\>/i) {
346 $mode=1;
347 push @show, $l;
349 elsif($mode) {
350 if($l =~ / *\<\/source\>/i) {
351 $mode = 0;
352 print @show;
353 if(trim($esource) ne trim($lsource)) {
354 print "### The <source> section differs from the english!\n",
355 "### the previously used one is commented below:\n";
356 for(split("\n", $lsource)) {
357 print "### $_\n";
359 print $esource;
361 else {
362 print $lsource;
364 undef @show; # start over
366 push @show, $l;
368 else {
369 $lsource .= "$l";
372 else {
373 push @show, $l;
378 print @show;
381 my $idcount; # counter for lang ID numbers
382 my $voiceid=0x8000; # counter for voice-only ID numbers
385 # Now start the scanning of the selected language string
388 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
389 my @phrase;
390 my $header = 1;
391 my $langoptions = 0;
393 while(<LANG>) {
395 $line++;
397 # get rid of DOS newlines
398 $_ =~ s/\r//g;
400 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
401 # comment or empty line - output it if it's part of the header
402 if ($header and ($update || $sortfile)) {
403 print($_);
405 next;
407 $header = 0;
409 my $ll = $_;
411 # print "M: $m\n";
413 push @phrase, $ll;
415 # this is an XML-lookalike tag
416 if (/^(<|[^\"<]+<)([^>]*)>/) {
417 my $part = $2;
418 # print "P: $part\n";
420 if($part =~ /^\//) {
421 # this was a closing tag
423 if($part eq "/phrase") {
424 # closing the phrase
426 my $idstr = $phrase{'id'};
427 my $idnum;
429 if($binary && !$english{$idstr}) {
430 # $idstr doesn't exist for english, skip it\n";
432 elsif($dest =~ /^none\z/i) {
433 # "none" as dest (without quotes) means that this entire
434 # phrase is to be ignored
436 elsif($sortfile) {
437 $allphrases{$idstr}=join('',@phrase);
439 elsif(!$update) {
440 # we don't do the fully detailed analysis when we "update"
441 # since we don't do it for a particular target etc
443 # allow the keyword 'deprecated' to be used on dest and
444 # voice strings to mark that as deprecated. It will then
445 # be replaced with "".
447 $dest =~ s/^deprecate(|d)\z/\"\"/i;
448 $voice =~ s/^deprecate(|d)\z/\"\"/i;
450 # basic syntax error alerts, if there are no quotes we
451 # will assume an empty string was intended
452 if($dest !~ /^\"/) {
453 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
454 $dest='""';
456 if($src !~ /^\"/) {
457 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
458 $src='""';
460 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
461 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
462 $voice='""';
464 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
465 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
468 my $user = $user2num{trim($phrase{'user'})};
469 if(!$user) {
470 print STDERR "$input:$line:1: warning: unknown user!\n";
471 $user = 1;
474 # Use the ID name to figure out which id number range we
475 # should use for this phrase. Voice-only strings are
476 # separated.
478 if($idstr =~ /^VOICE/) {
479 $idnum = $voiceid++;
481 else {
482 $idnum = $idcount++;
485 $id{$idstr} = $idnum;
486 $idnum[$idnum]=$idstr;
488 $source{$idstr}=$src;
489 $dest{$idstr}=$dest;
490 $voice{$idstr}=$voice;
492 if($verbose) {
493 print "id: $phrase{id} ($idnum)\n";
494 print "source: $src\n";
495 print "dest: $dest\n";
496 print "voice: $voice\n";
497 print "user: $user\n";
500 undef $src;
501 undef $dest;
502 undef $voice;
503 undef $user;
504 undef %phrase;
507 if($update) {
508 my $e = $english{$idstr};
510 if($e) {
511 # compare original english with this!
512 my @eng = split("\n", $english{$idstr});
514 compare($idstr, \@eng, \@phrase);
516 $english{$idstr}=""; # clear it
518 else {
519 print "### $idstr: The phrase is not used. Skipped\n";
522 } # end of </phrase>
523 elsif($part eq "/options") {
524 # closing the options
525 if ($options{'rtl'}) {
526 $langoptions |= $LANGUAGE_FLAG_RTL;
528 } # end of </options>
530 undef @phrase;
532 # starts with a slash, this _ends_ this section
533 $m = pop @m; # get back old value, the previous level's tag
534 next;
535 } # end of tag close
537 # This is an opening (sub) tag
539 push @m, $m; # store old value
540 $m = $part;
541 next;
544 if(/^ *([^:]+): *(.*)/) {
545 my ($name, $val)=($1, $2);
546 &$m($_, $name, $val);
549 close(LANG);
551 if($update) {
552 my $any=0;
553 for(keys %english) {
554 if($english{$_}) {
555 print "###\n",
556 "### This phrase below was not present in the translated file\n",
557 "<phrase>\n";
558 print $english{$_};
559 print "</phrase>\n";
564 if ($sortfile) {
565 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
566 print $allphrases{$_};
570 if($prefix) {
571 # We create a .c and .h file
573 open(HFILE, ">$prefix/lang.h") ||
574 die "Error: couldn't create file $prefix/lang.h\n";
575 open(CFILE, ">$prefix/lang_core.c") ||
576 die "Error: couldn't create file $prefix/lang_core.c\n";
578 # get header file name
579 $headername = "$prefix/lang.h";
580 $headername =~ s/(.*\/)*//;
582 print HFILE <<MOO
583 /* This file was automatically generated using genlang */
585 * The str() macro/functions is how to access strings that might be
586 * translated. Use it like str(MACRO) and expect a string to be
587 * returned!
589 #define str(x) language_strings[x]
591 /* this is the array for holding the string pointers.
592 It will be initialized at runtime. */
593 extern unsigned char *language_strings[];
594 /* this contains the concatenation of all strings, separated by \\0 chars */
595 extern const unsigned char language_builtin[];
597 /* The enum below contains all available strings */
598 enum \{
602 print CFILE <<MOO
603 /* This file was automaticly generated using genlang, the strings come
604 from "$input" */
606 #include "$headername"
608 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
609 const unsigned char language_builtin[] =
613 # Output the ID names for the enum in the header file
614 my $i;
615 for $i (1 .. $idcount) {
616 my $name=$idnum[$i - 1]; # get the ID name
618 $name =~ s/\"//g; # cut off the quotes
620 printf HFILE (" %s, /* %d */\n", $name, $i-1);
623 # Output separation marker for last string ID and the upcoming voice IDs
625 print HFILE <<MOO
626 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
627 /* --- below this follows voice-only strings --- */
628 VOICEONLY_DELIMITER = 0x8000,
632 # Output the ID names for the enum in the header file
633 for $i (0x8000 .. ($voiceid-1)) {
634 my $name=$idnum[$i]; # get the ID name
636 $name =~ s/\"//g; # cut off the quotes
638 printf HFILE (" %s,\n", $name);
641 # Output end of enum
642 print HFILE "\n};\n/* end of generated enum list */\n";
644 # Output the target phrases for the source file
645 for $i (1 .. $idcount) {
646 my $name=$idnum[$i - 1]; # get the ID
647 my $dest = $dest{$name}; # get the destination phrase
649 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
651 if(!$dest) {
652 # this is just to be on the safe side
653 $dest = '"\0"';
656 printf CFILE (" %s\n", $dest);
659 # Output end of string chunk
660 print CFILE <<MOO
662 /* end of generated string list */
666 close(HFILE);
667 close(CFILE);
668 } # end of the c/h file generation
669 elsif($binary) {
670 # Creation of a binary lang file was requested
672 # We must first scan the english file to get the correct order of the id
673 # numbers used there, as that is what sets the id order for all language
674 # files. The english file is scanned before the translated file was
675 # scanned.
677 open(OUTF, ">$binary") or die "Error: Can't create $binary";
678 binmode OUTF;
679 printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
680 $langoptions); # magic lang file header
682 # loop over the target phrases
683 for $i (1 .. $idcount) {
684 my $name=$idnum[$i - 1]; # get the ID
685 my $dest = $dest{$name}; # get the destination phrase
687 if($dest) {
688 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
690 # Now, make sure we get the number from the english sort order:
691 $idnum = $idmap{$name};
693 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
697 elsif($voiceout) {
698 # voice output requested, display id: and voice: strings in a v1-like
699 # fashion
701 my @engl;
703 # This loops over the strings in the translated language file order
704 my @ids = ((0 .. ($idcount-1)));
705 push @ids, (0x8000 .. ($voiceid-1));
707 #for my $id (@ids) {
708 # print "$id\n";
711 for $i (@ids) {
712 my $name=$idnum[$i]; # get the ID
713 my $dest = $voice{$name}; # get the destination voice string
715 if($dest) {
716 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
718 # Now, make sure we get the number from the english sort order:
719 $idnum = $idmap{$name};
721 if(length($idnum)) {
722 $engl[$idnum] = $i;
724 #print "Input index $i output index $idnum\n";
726 else {
727 # not used, mark it so
728 $engl[$i] = -1
733 for my $i (@ids) {
735 my $o = $engl[$i];
737 if(($o < 0) || !length($o)) {
738 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
739 next;
742 my $name=$idnum[$o]; # get the ID
743 my $dest = $voice{$name}; # get the destination voice string
745 print "#$i ($o)\nid: $name\nvoice: $dest\n";
751 if($verbose) {
752 printf("%d ID strings scanned\n", $idcount);
754 print "* head *\n";
755 for(keys %head) {
756 printf "$_: %s\n", $head{$_};