Final changes to the recording naming code, this should make jhMikeS happy ;)
[Rockbox.git] / tools / genlang
blobf213057a3ce0bc64ef01515407cc57b9258d3b6d
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 3; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file namaed [outfile].
32 The use of this option requires that you also use -e.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
43 -t=<target>
44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
47 The target can in fact be specified as numerous different strings,
48 separated with colons. This will make genlang to use all the specified
49 strings when searching for a matching phrase.
52 Voice mode output. Outputs all id: and voice: lines for the given target!
55 Enables verbose (debug) output.
56 MOO
58 exit;
61 # How update works:
63 # 1) scan the english file, keep the whole <phrase> for each phrase.
64 # 2) read the translated file, for each end of phrase, compare:
65 # A) all source strings, if there's any change there should be a comment about
66 # it output
67 # B) the desc fields
69 # 3) output the phrase with the comments from above
70 # 4) check which phrases that the translated version didn't have, and spit out
71 # the english version of those
74 my $prefix = $p;
75 my $binary = $b;
76 my $update = $u;
78 my $english = $e;
79 my $voiceout = $o;
81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
83 if($check > 1) {
84 print "Please use only one of -p, -u, -o and -b\n";
85 exit;
87 if(!$check) {
88 print "Please use at least one of -p, -u, -o and -b\n";
89 exit;
91 if(($binary || $update || $voiceout) && !$english) {
92 print "Please use -e too when you use -b, -o or -u\n";
93 exit;
96 my $target = $t;
97 if(!$target && !$update) {
98 print "Please specify a target (with -t)!\n";
99 exit;
101 my $verbose=$v;
103 my %id; # string to num hash
104 my @idnum; # num to string array
106 my %source; # id string to source phrase hash
107 my %dest; # id string to dest phrase hash
108 my %voice; # id string to voice phrase hash
110 my $input = $ARGV[0];
112 my @m;
113 my $m="blank";
115 sub match {
116 my ($string, $pattern)=@_;
118 $pattern =~ s/\*/.?*/g;
119 $pattern =~ s/\?/./g;
121 return ($string =~ /^$pattern\z/);
124 sub blank {
125 # nothing to do
128 my %head;
129 sub header {
130 my ($full, $n, $v)=@_;
131 $head{$n}=$v;
134 my %phrase;
135 sub phrase {
136 my ($full, $n, $v)=@_;
137 $phrase{$n}=$v;
140 sub parsetarget {
141 my ($debug, $strref, $full, $n, $v)=@_;
142 my $string;
143 my @all= split(" *, *", $n);
144 my $test;
145 for $test (@all) {
146 # print "TEST ($debug) $target for $test\n";
147 for my $part (split(":", $target)) {
148 if(match($part, $test)) {
149 $string = $v;
150 # print "MATCH: $test => $v\n";
151 $$strref = $string;
152 return $string;
158 my $src;
159 sub source {
160 parsetarget("src", \$src, @_);
163 my $dest;
164 sub dest {
165 parsetarget("dest", \$dest, @_);
168 my $voice;
169 sub voice {
170 parsetarget("voice", \$voice, @_);
173 my %idmap;
174 my %english;
175 if($english) {
176 # For the cases where the english file needs to be scanned/read, we do
177 # it before we read the translated file. For -b it isn't necessary, but for
178 # -u it is convenient.
180 my $idnum=0; # start with a true number
181 my $vidnum=0x8000; # first voice id
182 open(ENG, "<$english") || die "can't open $english";
183 my @phrase;
184 my $id;
185 my $maybeid;
186 my $withindest;
187 while(<ENG>) {
189 # get rid of DOS newlines
190 $_ =~ s/\r//g;
192 if($_ =~ /^ *\<phrase\>/) {
193 # this is the start of a phrase
195 elsif($_ =~ /^ *\<\/phrase\>/) {
197 # if id is something, when we count and store this phrase
198 if($id) {
199 # voice-only entries get a difference range
200 if($id =~ /^VOICE_/) {
201 # Assign an ID number to this entry
202 $idmap{$id}=$vidnum;
203 $vidnum++;
205 else {
206 # Assign an ID number to this entry
207 $idmap{$id}=$idnum;
208 $idnum++;
209 # print STDERR "DEST: bumped idnum to $idnum\n";
212 # this is the end of a phrase, add it to the english hash
213 $english{$id}=join("", @phrase);
215 undef @phrase;
216 $id="";
218 elsif($_ ne "\n") {
219 # gather everything related to this phrase
220 push @phrase, $_;
221 if($_ =~ /^ *\<dest\>/i) {
222 $withindest=1;
224 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
225 $withindest=0;
227 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
228 my ($name, $val)=($1, $2);
229 $dest=""; # in case it is left untouched for when the
230 # model name isn't "our"
231 dest($_, $name, $val);
233 # print STDERR "DEST: \"$dest\" for $name / $id\n";
235 if($update || ($dest && ($dest !~ /^none\z/i))) {
236 # we unconditionally always use all IDs when the "update"
237 # feature is used
238 $id = $maybeid;
239 # print STDERR "DEST: use this id $id\n";
244 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
245 $maybeid=$1;
248 close(ENG);
251 # a function that compares the english phrase with the translated one.
252 # compare source strings and desc
254 # Then output the updated version!
255 sub compare {
256 my ($idstr, $engref, $locref)=@_;
257 my ($edesc, $ldesc);
258 my ($esource, $lsource);
259 my $mode=0;
261 for my $l (@$engref) {
262 if($l =~ /^ *desc: (.*)/) {
263 $edesc=$1;
265 elsif($l =~ / *\<source\>/i) {
266 $mode=1;
268 elsif($mode) {
269 if($l =~ / *\<\/source\>/i) {
270 last;
272 $esource .= "$l\n";
276 my @show;
277 my @source;
279 $mode = 0;
280 for my $l (@$locref) {
281 if($l =~ /^ *desc: (.*)/) {
282 $ldesc=$1;
283 if($edesc ne $ldesc) {
284 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
286 push @show, $l;
288 elsif($l =~ / *\<source\>/i) {
289 $mode=1;
290 push @show, $l;
292 elsif($mode) {
293 if($l =~ / *\<\/source\>/i) {
294 $mode = 0;
295 print @show;
296 if($esource ne $lsource) {
297 print "### The <source> section differs from the english!\n",
298 "### the previously used one is commented below:\n";
299 for(split("\n", $lsource)) {
300 print "### $_\n";
302 print $esource;
304 else {
305 print $lsource;
307 undef @show; # start over
309 push @show, $l;
311 else {
312 $lsource .= "$l";
315 else {
316 push @show, $l;
321 print @show;
324 my $idcount; # counter for lang ID numbers
325 my $voiceid=0x8000; # counter for voice-only ID numbers
328 # Now start the scanning of the selected language string
331 open(LANG, "<$input") || die "couldn't read language file named $input\n";
332 my @phrase;
333 while(<LANG>) {
335 $line++;
337 # get rid of DOS newlines
338 $_ =~ s/\r//g;
340 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
341 # comment or empty line
342 next;
345 my $ll = $_;
347 # print "M: $m\n";
349 push @phrase, $ll;
351 # this is an XML-lookalike tag
352 if (/^(<|[^\"<]+<)([^>]*)>/) {
353 my $part = $2;
354 # print "P: $part\n";
356 if($part =~ /^\//) {
357 # this was a closing tag
359 if($part eq "/phrase") {
360 # closing the phrase
362 my $idstr = $phrase{'id'};
363 my $idnum;
365 if($binary && !$english{$idstr}) {
366 # $idstr doesn't exist for english, skip it\n";
368 elsif($dest =~ /^none\z/i) {
369 # "none" as dest (without quotes) means that this entire
370 # phrase is to be ignored
372 elsif(!$update) {
373 # we don't do the fully detailed analysis when we "update"
374 # since we don't do it for a particular target etc
376 # allow the keyword 'deprecated' to be used on dest and
377 # voice strings to mark that as deprecated. It will then
378 # be replaced with "".
380 $dest =~ s/^deprecate(|d)\z/\"\"/i;
381 $voice =~ s/^deprecate(|d)\z/\"\"/i;
383 # basic syntax error alerts, if there are no quotes we
384 # will assume an empty string was intended
385 if($dest !~ /^\"/) {
386 print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
387 $dest='""';
389 if($src !~ /^\"/) {
390 print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
391 $src='""';
393 if($voice !~ /^\"/) {
394 print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
395 $voice='""';
398 # Use the ID name to figure out which id number range we
399 # should use for this phrase. Voice-only strings are
400 # separated.
402 if($idstr =~ /^VOICE/) {
403 $idnum = $voiceid++;
405 else {
406 $idnum = $idcount++;
409 $id{$idstr} = $idnum;
410 $idnum[$idnum]=$idstr;
412 $source{$idstr}=$src;
413 $dest{$idstr}=$dest;
414 $voice{$idstr}=$voice;
416 if($verbose) {
417 print "id: $phrase{id} ($idnum)\n";
418 print "source: $src\n";
419 print "dest: $dest\n";
420 print "voice: $voice\n";
423 undef $src;
424 undef $dest;
425 undef $voice;
426 undef %phrase;
429 if($update) {
430 my $e = $english{$idstr};
432 if($e) {
433 # compare original english with this!
434 my @eng = split("\n", $english{$idstr});
436 compare($idstr, \@eng, \@phrase);
438 $english{$idstr}=""; # clear it
440 else {
441 print "### $idstr: The phrase is not used. Skipped\n";
444 undef @phrase;
446 } # end of </phrase>
448 # starts with a slash, this _ends_ this section
449 $m = pop @m; # get back old value, the previous level's tag
450 next;
451 } # end of tag close
453 # This is an opening (sub) tag
455 push @m, $m; # store old value
456 $m = $part;
457 next;
460 if(/^ *([^:]+): *(.*)/) {
461 my ($name, $val)=($1, $2);
462 &$m($_, $name, $val);
465 close(LANG);
467 if($update) {
468 my $any=0;
469 for(keys %english) {
470 if($english{$_}) {
471 print "###\n",
472 "### This phrase below was not present in the translated file\n",
473 "<phrase>\n";
474 print $english{$_};
475 print "</phrase>\n";
480 if($prefix) {
481 # We create a .c and .h file
483 open(HFILE, ">$prefix.h") ||
484 die "couldn't create file $prefix.h\n";
485 open(CFILE, ">$prefix.c") ||
486 die "couldn't create file $prefix.c\n";
488 print HFILE <<MOO
489 /* This file was automatically generated using genlang */
491 * The str() macro/functions is how to access strings that might be
492 * translated. Use it like str(MACRO) and expect a string to be
493 * returned!
495 #define str(x) language_strings[x]
497 /* this is the array for holding the string pointers.
498 It will be initialized at runtime. */
499 extern unsigned char *language_strings[];
500 /* this contains the concatenation of all strings, separated by \\0 chars */
501 extern const unsigned char language_builtin[];
503 /* The enum below contains all available strings */
504 enum \{
508 print CFILE <<MOO
509 /* This file was automaticly generated using genlang, the strings come
510 from "$input" */
512 #include "$prefix.h"
514 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
515 const unsigned char language_builtin[] =
519 # Output the ID names for the enum in the header file
520 my $i;
521 for $i (1 .. $idcount) {
522 my $name=$idnum[$i - 1]; # get the ID name
524 $name =~ s/\"//g; # cut off the quotes
526 printf HFILE (" %s,\n", $name);
529 # Output separation marker for last string ID and the upcoming voice IDs
531 print HFILE <<MOO
532 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
533 /* --- below this follows voice-only strings --- */
534 VOICEONLY_DELIMITER = 0x8000,
538 # Output the ID names for the enum in the header file
539 for $i (0x8000 .. ($voiceid-1)) {
540 my $name=$idnum[$i]; # get the ID name
542 $name =~ s/\"//g; # cut off the quotes
544 printf HFILE (" %s,\n", $name);
547 # Output end of enum
548 print HFILE "\n};\n/* end of generated enum list */\n";
550 # Output the target phrases for the source file
551 for $i (1 .. $idcount) {
552 my $name=$idnum[$i - 1]; # get the ID
553 my $dest = $dest{$name}; # get the destination phrase
555 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
557 if(!$dest) {
558 # this is just to be on the safe side
559 $dest = '"\0"';
562 printf CFILE (" %s\n", $dest);
565 # Output end of string chunk
566 print CFILE <<MOO
568 /* end of generated string list */
572 close(HFILE);
573 close(CFILE);
574 } # end of the c/h file generation
575 elsif($binary) {
576 # Creation of a binary lang file was requested
578 # We must first scan the english file to get the correct order of the id
579 # numbers used there, as that is what sets the id order for all language
580 # files. The english file is scanned before the translated file was
581 # scanned.
583 open(OUTF, ">$binary") or die "Can't create $binary";
584 binmode OUTF;
585 printf OUTF ("\x1a%c", $langversion); # magic lang file header
587 # loop over the target phrases
588 for $i (1 .. $idcount) {
589 my $name=$idnum[$i - 1]; # get the ID
590 my $dest = $dest{$name}; # get the destination phrase
592 if($dest) {
593 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
595 # Now, make sure we get the number from the english sort order:
596 $idnum = $idmap{$name};
598 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
602 elsif($voiceout) {
603 # voice output requested, display id: and voice: strings in a v1-like
604 # fashion
606 my @engl;
608 # This loops over the strings in the translated language file order
609 my @ids = ((0 .. ($idcount-1)));
610 push @ids, (0x8000 .. ($voiceid-1));
612 #for my $id (@ids) {
613 # print "$id\n";
616 for $i (@ids) {
617 my $name=$idnum[$i]; # get the ID
618 my $dest = $voice{$name}; # get the destination voice string
620 if($dest) {
621 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
623 # Now, make sure we get the number from the english sort order:
624 $idnum = $idmap{$name};
626 $engl[$idnum] = $i;
628 # print "Input index $i output index $idnum\n";
632 for my $i (@ids) {
634 my $o = $engl[$i];
636 my $name=$idnum[$o]; # get the ID
637 my $dest = $voice{$name}; # get the destination voice string
639 print "#$i\nid: $name\nvoice: $dest\n";
645 if($verbose) {
646 printf("%d ID strings scanned\n", $idcount);
648 print "* head *\n";
649 for(keys %head) {
650 printf "$_: %s\n", $head{$_};