Don't close http connection manually, the destructor will do that anyway. Fixes some...
[Rockbox.git] / tools / genlang
blob4c0a9d65649a5b123dc5f12e9ef7528cf6bf6aeb
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 4; # 3 was the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file named [outfile].
32 The use of this option requires that you also use -e, -t and -i.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
43 -t=<target>
44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
47 The target can in fact be specified as numerous different strings,
48 separated with colons. This will make genlang to use all the specified
49 strings when searching for a matching phrase.
51 -i=<target id>
52 The target id number, needed for -b.
55 Voice mode output. Outputs all id: and voice: lines for the given target!
58 Enables verbose (debug) output.
59 MOO
61 exit;
64 # How update works:
66 # 1) scan the english file, keep the whole <phrase> for each phrase.
67 # 2) read the translated file, for each end of phrase, compare:
68 # A) all source strings, if there's any change there should be a comment about
69 # it output
70 # B) the desc fields
72 # 3) output the phrase with the comments from above
73 # 4) check which phrases that the translated version didn't have, and spit out
74 # the english version of those
77 my $prefix = $p;
78 my $binary = $b;
79 my $update = $u;
81 my $english = $e;
82 my $voiceout = $o;
84 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
86 if($check > 1) {
87 print "Please use only one of -p, -u, -o and -b\n";
88 exit;
90 if(!$check) {
91 print "Please use at least one of -p, -u, -o and -b\n";
92 exit;
95 if(($binary || $update || $voiceout) && !$english) {
96 print "Please use -e too when you use -b, -o or -u\n";
97 exit;
100 my $target_id = $i;
101 if($binary && !$target_id) {
102 print "Please specify a target id number (with -i)!\n";
103 exit;
106 my $target = $t;
107 if(!$target && !$update) {
108 print "Please specify a target (with -t)!\n";
109 exit;
111 my $verbose=$v;
113 my %id; # string to num hash
114 my @idnum; # num to string array
116 my %source; # id string to source phrase hash
117 my %dest; # id string to dest phrase hash
118 my %voice; # id string to voice phrase hash
120 my $input = $ARGV[0];
122 my @m;
123 my $m="blank";
125 sub match {
126 my ($string, $pattern)=@_;
128 $pattern =~ s/\*/.?*/g;
129 $pattern =~ s/\?/./g;
131 return ($string =~ /^$pattern\z/);
134 sub blank {
135 # nothing to do
138 my %head;
139 sub header {
140 my ($full, $n, $v)=@_;
141 $head{$n}=$v;
144 my %phrase;
145 sub phrase {
146 my ($full, $n, $v)=@_;
147 $phrase{$n}=$v;
150 sub parsetarget {
151 my ($debug, $strref, $full, $n, $v)=@_;
152 my $string;
153 my @all= split(" *, *", $n);
154 my $test;
155 for $test (@all) {
156 # print "TEST ($debug) $target for $test\n";
157 for my $part (split(":", $target)) {
158 if(match($part, $test)) {
159 $string = $v;
160 # print "MATCH: $test => $v\n";
161 $$strref = $string;
162 return $string;
168 my $src;
169 sub source {
170 parsetarget("src", \$src, @_);
173 my $dest;
174 sub dest {
175 parsetarget("dest", \$dest, @_);
178 my $voice;
179 sub voice {
180 parsetarget("voice", \$voice, @_);
183 my %idmap;
184 my %english;
185 if($english) {
186 # For the cases where the english file needs to be scanned/read, we do
187 # it before we read the translated file. For -b it isn't necessary, but for
188 # -u it is convenient.
190 my $idnum=0; # start with a true number
191 my $vidnum=0x8000; # first voice id
192 open(ENG, "<$english") || die "can't open $english";
193 my @phrase;
194 my $id;
195 my $maybeid;
196 my $withindest;
197 while(<ENG>) {
199 # get rid of DOS newlines
200 $_ =~ s/\r//g;
202 if($_ =~ /^ *\<phrase\>/) {
203 # this is the start of a phrase
205 elsif($_ =~ /^ *\<\/phrase\>/) {
207 # if id is something, when we count and store this phrase
208 if($id) {
209 # voice-only entries get a difference range
210 if($id =~ /^VOICE_/) {
211 # Assign an ID number to this entry
212 $idmap{$id}=$vidnum;
213 $vidnum++;
215 else {
216 # Assign an ID number to this entry
217 $idmap{$id}=$idnum;
218 $idnum++;
219 # print STDERR "DEST: bumped idnum to $idnum\n";
222 # this is the end of a phrase, add it to the english hash
223 $english{$id}=join("", @phrase);
225 undef @phrase;
226 $id="";
228 elsif($_ ne "\n") {
229 # gather everything related to this phrase
230 push @phrase, $_;
231 if($_ =~ /^ *\<dest\>/i) {
232 $withindest=1;
233 $deststr="";
235 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
236 $withindest=0;
238 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
239 # we unconditionally always use all IDs when the "update"
240 # feature is used
241 $id = $maybeid;
242 # print "DEST: use this id $id\n";
244 else {
245 # print "skip $maybeid for $name\n";
248 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
249 my ($name, $val)=($1, $2);
250 $dest=""; # in case it is left untouched for when the
251 # model name isn't "our"
252 dest($_, $name, $val);
254 if($dest) {
255 # Store the current dest string. If this target matches
256 # multiple strings, it will get updated several times.
257 $deststr = $dest;
262 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
263 $maybeid=$1;
266 close(ENG);
269 # a function that compares the english phrase with the translated one.
270 # compare source strings and desc
272 # Then output the updated version!
273 sub compare {
274 my ($idstr, $engref, $locref)=@_;
275 my ($edesc, $ldesc);
276 my ($esource, $lsource);
277 my $mode=0;
279 for my $l (@$engref) {
280 if($l =~ /^ *#/) {
281 # comment
282 next;
284 if($l =~ /^ *desc: (.*)/) {
285 $edesc=$1;
287 elsif($l =~ / *\<source\>/i) {
288 $mode=1;
290 elsif($mode) {
291 if($l =~ / *\<\/source\>/i) {
292 last;
294 $esource .= "$l\n";
298 my @show;
299 my @source;
301 $mode = 0;
302 for my $l (@$locref) {
303 if($l =~ /^ *desc: (.*)/) {
304 $ldesc=$1;
305 if($edesc ne $ldesc) {
306 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
308 push @show, $l;
310 elsif($l =~ / *\<source\>/i) {
311 $mode=1;
312 push @show, $l;
314 elsif($mode) {
315 if($l =~ / *\<\/source\>/i) {
316 $mode = 0;
317 print @show;
318 if($esource ne $lsource) {
319 print "### The <source> section differs from the english!\n",
320 "### the previously used one is commented below:\n";
321 for(split("\n", $lsource)) {
322 print "### $_\n";
324 print $esource;
326 else {
327 print $lsource;
329 undef @show; # start over
331 push @show, $l;
333 else {
334 $lsource .= "$l";
337 else {
338 push @show, $l;
343 print @show;
346 my $idcount; # counter for lang ID numbers
347 my $voiceid=0x8000; # counter for voice-only ID numbers
350 # Now start the scanning of the selected language string
353 open(LANG, "<$input") || die "couldn't read language file named $input\n";
354 my @phrase;
355 while(<LANG>) {
357 $line++;
359 # get rid of DOS newlines
360 $_ =~ s/\r//g;
362 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
363 # comment or empty line
364 next;
367 my $ll = $_;
369 # print "M: $m\n";
371 push @phrase, $ll;
373 # this is an XML-lookalike tag
374 if (/^(<|[^\"<]+<)([^>]*)>/) {
375 my $part = $2;
376 # print "P: $part\n";
378 if($part =~ /^\//) {
379 # this was a closing tag
381 if($part eq "/phrase") {
382 # closing the phrase
384 my $idstr = $phrase{'id'};
385 my $idnum;
387 if($binary && !$english{$idstr}) {
388 # $idstr doesn't exist for english, skip it\n";
390 elsif($dest =~ /^none\z/i) {
391 # "none" as dest (without quotes) means that this entire
392 # phrase is to be ignored
394 elsif(!$update) {
395 # we don't do the fully detailed analysis when we "update"
396 # since we don't do it for a particular target etc
398 # allow the keyword 'deprecated' to be used on dest and
399 # voice strings to mark that as deprecated. It will then
400 # be replaced with "".
402 $dest =~ s/^deprecate(|d)\z/\"\"/i;
403 $voice =~ s/^deprecate(|d)\z/\"\"/i;
405 # basic syntax error alerts, if there are no quotes we
406 # will assume an empty string was intended
407 if($dest !~ /^\"/) {
408 print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
409 $dest='""';
411 if($src !~ /^\"/) {
412 print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
413 $src='""';
415 if($voice !~ /^\"/) {
416 print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
417 $voice='""';
420 # Use the ID name to figure out which id number range we
421 # should use for this phrase. Voice-only strings are
422 # separated.
424 if($idstr =~ /^VOICE/) {
425 $idnum = $voiceid++;
427 else {
428 $idnum = $idcount++;
431 $id{$idstr} = $idnum;
432 $idnum[$idnum]=$idstr;
434 $source{$idstr}=$src;
435 $dest{$idstr}=$dest;
436 $voice{$idstr}=$voice;
438 if($verbose) {
439 print "id: $phrase{id} ($idnum)\n";
440 print "source: $src\n";
441 print "dest: $dest\n";
442 print "voice: $voice\n";
445 undef $src;
446 undef $dest;
447 undef $voice;
448 undef %phrase;
451 if($update) {
452 my $e = $english{$idstr};
454 if($e) {
455 # compare original english with this!
456 my @eng = split("\n", $english{$idstr});
458 compare($idstr, \@eng, \@phrase);
460 $english{$idstr}=""; # clear it
462 else {
463 print "### $idstr: The phrase is not used. Skipped\n";
466 undef @phrase;
468 } # end of </phrase>
470 # starts with a slash, this _ends_ this section
471 $m = pop @m; # get back old value, the previous level's tag
472 next;
473 } # end of tag close
475 # This is an opening (sub) tag
477 push @m, $m; # store old value
478 $m = $part;
479 next;
482 if(/^ *([^:]+): *(.*)/) {
483 my ($name, $val)=($1, $2);
484 &$m($_, $name, $val);
487 close(LANG);
489 if($update) {
490 my $any=0;
491 for(keys %english) {
492 if($english{$_}) {
493 print "###\n",
494 "### This phrase below was not present in the translated file\n",
495 "<phrase>\n";
496 print $english{$_};
497 print "</phrase>\n";
502 if($prefix) {
503 # We create a .c and .h file
505 open(HFILE, ">$prefix.h") ||
506 die "couldn't create file $prefix.h\n";
507 open(CFILE, ">$prefix.c") ||
508 die "couldn't create file $prefix.c\n";
510 print HFILE <<MOO
511 /* This file was automatically generated using genlang */
513 * The str() macro/functions is how to access strings that might be
514 * translated. Use it like str(MACRO) and expect a string to be
515 * returned!
517 #define str(x) language_strings[x]
519 /* this is the array for holding the string pointers.
520 It will be initialized at runtime. */
521 extern unsigned char *language_strings[];
522 /* this contains the concatenation of all strings, separated by \\0 chars */
523 extern const unsigned char language_builtin[];
525 /* The enum below contains all available strings */
526 enum \{
530 print CFILE <<MOO
531 /* This file was automaticly generated using genlang, the strings come
532 from "$input" */
534 #include "$prefix.h"
536 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
537 const unsigned char language_builtin[] =
541 # Output the ID names for the enum in the header file
542 my $i;
543 for $i (1 .. $idcount) {
544 my $name=$idnum[$i - 1]; # get the ID name
546 $name =~ s/\"//g; # cut off the quotes
548 printf HFILE (" %s, /* %d */\n", $name, $i-1);
551 # Output separation marker for last string ID and the upcoming voice IDs
553 print HFILE <<MOO
554 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
555 /* --- below this follows voice-only strings --- */
556 VOICEONLY_DELIMITER = 0x8000,
560 # Output the ID names for the enum in the header file
561 for $i (0x8000 .. ($voiceid-1)) {
562 my $name=$idnum[$i]; # get the ID name
564 $name =~ s/\"//g; # cut off the quotes
566 printf HFILE (" %s,\n", $name);
569 # Output end of enum
570 print HFILE "\n};\n/* end of generated enum list */\n";
572 # Output the target phrases for the source file
573 for $i (1 .. $idcount) {
574 my $name=$idnum[$i - 1]; # get the ID
575 my $dest = $dest{$name}; # get the destination phrase
577 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
579 if(!$dest) {
580 # this is just to be on the safe side
581 $dest = '"\0"';
584 printf CFILE (" %s\n", $dest);
587 # Output end of string chunk
588 print CFILE <<MOO
590 /* end of generated string list */
594 close(HFILE);
595 close(CFILE);
596 } # end of the c/h file generation
597 elsif($binary) {
598 # Creation of a binary lang file was requested
600 # We must first scan the english file to get the correct order of the id
601 # numbers used there, as that is what sets the id order for all language
602 # files. The english file is scanned before the translated file was
603 # scanned.
605 open(OUTF, ">$binary") or die "Can't create $binary";
606 binmode OUTF;
607 printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
609 # loop over the target phrases
610 for $i (1 .. $idcount) {
611 my $name=$idnum[$i - 1]; # get the ID
612 my $dest = $dest{$name}; # get the destination phrase
614 if($dest) {
615 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
617 # Now, make sure we get the number from the english sort order:
618 $idnum = $idmap{$name};
620 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
624 elsif($voiceout) {
625 # voice output requested, display id: and voice: strings in a v1-like
626 # fashion
628 my @engl;
630 # This loops over the strings in the translated language file order
631 my @ids = ((0 .. ($idcount-1)));
632 push @ids, (0x8000 .. ($voiceid-1));
634 #for my $id (@ids) {
635 # print "$id\n";
638 for $i (@ids) {
639 my $name=$idnum[$i]; # get the ID
640 my $dest = $voice{$name}; # get the destination voice string
642 if($dest) {
643 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
645 # Now, make sure we get the number from the english sort order:
646 $idnum = $idmap{$name};
648 if(length($idnum)) {
649 $engl[$idnum] = $i;
651 #print "Input index $i output index $idnum\n";
653 else {
654 # not used, mark it so
655 $engl[$i] = -1
660 for my $i (@ids) {
662 my $o = $engl[$i];
664 if(($o < 0) || !length($o)) {
665 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
666 next;
669 my $name=$idnum[$o]; # get the ID
670 my $dest = $voice{$name}; # get the destination voice string
672 print "#$i ($o)\nid: $name\nvoice: $dest\n";
678 if($verbose) {
679 printf("%d ID strings scanned\n", $idcount);
681 print "* head *\n";
682 for(keys %head) {
683 printf "$_: %s\n", $head{$_};