extend note and warn macro not to fail when outside par mode. Now they also set a...
[Rockbox.git] / tools / genlang2
blob7d82f21b31110721b5bade1414fb05019f6b9c23
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 by Daniel Stenberg
13 # binary version for the binary lang file
14 my $langversion = 2; # 2 is the latest one used in the v1 format
16 # A note for future users and readers: The original v1 language system allowed
17 # the build to create and use a different language than english built-in. We
18 # removed that feature from our build-system, but the build scripts still had
19 # the ability. But, starting now, this ability is no longer provided since I
20 # figured it was boring and unnecessary to write support for now since we
21 # don't use it anymore.
23 if(!$ARGV[0]) {
24 print <<MOO
25 Usage: genlang2 [options] <langv2 file>
27 -p=<prefix>
28 Make the tool create a [prefix].c and [prefix].h file.
30 -b=<outfile>
31 Make the tool create a binary language (.lng) file namaed [outfile].
32 The use of this option requires that you also use -e.
35 Update language file. Given the translated file and the most recent english
36 file, you\'ll get an updated version sent to stdout. Suitable action to do
37 when you intend to update a translation.
39 -e=<english lang file>
40 Point out the english (original source) file, to use that as master
41 language template. Used in combination with -b or -u.
43 -t=<target>
44 Specify which target you want the translations/phrases for. Required when
45 -b or -p is used.
48 Enables verbose (debug) output.
49 MOO
51 exit;
54 # How update works:
56 # 1) scan the english file, keep the whole <phrase> for each phrase.
57 # 2) read the translated file, for each end of phrase, compare:
58 # A) all source strings, if there's any change there should be a comment about
59 # it output
60 # B) the desc fields
62 # 3) output the phrase with the comments from above
63 # 4) check which phrases that the translated version didn't have, and spit out
64 # the english version of those
67 my $prefix = $p;
68 my $binary = $b;
69 my $update = $u;
71 my $english = $e;
73 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0);
75 if($check > 1) {
76 print "Please use only one of -p, -u and -b\n";
77 exit;
79 if(!$check) {
80 print "Please use at least one of -p, -u and -b\n";
81 exit;
83 if(($binary || $update) && !$english) {
84 print "Please use -e too when you use -b or -u\n";
85 exit;
88 my $target = $t;
89 if(!$target && !$update) {
90 print "Please specify a target (with -t)!\n";
91 exit;
93 my $verbose=$v;
95 my %id; # string to num hash
96 my @idnum; # num to string array
98 my %source; # id string to source phrase hash
99 my %dest; # id string to dest phrase hash
100 my %voice; # id string to voice phrase hash
103 my $input = $ARGV[0];
105 my @m;
106 my $m="blank";
108 sub match {
109 my ($string, $pattern)=@_;
111 $pattern =~ s/\*/.?*/g;
112 $pattern =~ s/\?/./g;
114 return ($string =~ $pattern);
117 sub blank {
118 # nothing to do
121 my %head;
122 sub header {
123 my ($full, $n, $v)=@_;
124 $head{$n}=$v;
127 my %phrase;
128 sub phrase {
129 my ($full, $n, $v)=@_;
130 $phrase{$n}=$v;
133 sub parsetarget {
134 my ($debug, $strref, $full, $n, $v)=@_;
135 my $string;
136 my @all= split(" *, *", $n);
137 my $test;
138 for $test (@all) {
139 # print "TEST ($debug) $target for $test\n";
140 if(match($target, $test)) {
141 $string = $v;
142 # print "MATCH: $test => $v\n";
145 if($string) {
146 $$strref = $string;
148 return $string;
151 my $src;
152 sub source {
153 parsetarget("src", \$src, @_);
156 my $dest;
157 sub dest {
158 parsetarget("dest", \$dest, @_);
161 my $voice;
162 sub voice {
163 parsetarget("voice", \$voice, @_);
166 my %english;
167 if($english) {
168 # For the cases where the english file needs to be scanned/read, we do
169 # it before we read the translated file. For -b it isn't necessary, but for
170 # -u it is convenient.
172 my $idnum=0; # start with a true number
173 my %idmap;
174 open(ENG, "<$english") || die "can't open $english";
175 my @phrase;
176 my $id;
177 while(<ENG>) {
179 # get rid of DOS newlines
180 $_ =~ s/\r//g;
182 if($_ =~ /^ *\<phrase\>/) {
183 # this is the start of a phrase
185 elsif($_ =~ /^ *\<\/phrase\>/) {
186 # this is the end of a phrase, add it to the english hash
187 $english{$id}=join("", @phrase);
188 undef @phrase;
190 elsif($_ ne "\n") {
191 # gather everything related to this phrase
192 push @phrase, $_;
195 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
196 $id=$1;
197 # Skip voice-only entries
198 if($id =~ /^VOICE_/) {
199 next;
202 # Assign an ID number to this entry
203 $idmap{$id}=$idnum;
204 $idnum++;
207 close(ENG);
210 # a function that compares the english phrase with the translated one.
211 # compare source strings and desc
213 # Then output the updated version!
214 sub compare {
215 my ($idstr, $engref, $locref)=@_;
216 my ($edesc, $ldesc);
217 my ($esource, $lsource);
218 my $mode=0;
220 for my $l (@$engref) {
221 if($l =~ /^ *desc: (.*)/) {
222 $edesc=$1;
224 elsif($l =~ / *\<source\>/i) {
225 $mode=1;
227 elsif($mode) {
228 if($l =~ / *\<\/source\>/i) {
229 last;
231 $esource .= "$l\n";
235 my @show;
236 my @source;
238 $mode = 0;
239 for my $l (@$locref) {
240 if($l =~ /^ *desc: (.*)/) {
241 $ldesc=$1;
242 if($edesc ne $ldesc) {
243 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
245 push @show, $l;
247 elsif($l =~ / *\<source\>/i) {
248 $mode=1;
249 push @show, $l;
251 elsif($mode) {
252 if($l =~ / *\<\/source\>/i) {
253 $mode = 0;
254 print @show;
255 if($esource ne $lsource) {
256 print "### The <source> section differs from the english!\n",
257 "### the previously used one is commented below:\n";
258 for(split("\n", $lsource)) {
259 print "### $_\n";
261 print $esource;
263 else {
264 print $lsource;
266 undef @show; # start over
268 push @show, $l;
270 else {
271 $lsource .= "$l";
274 else {
275 push @show, $l;
280 print @show;
283 my $idcount; # counter for lang ID numbers
284 my $voiceid=0x8000; # counter for voice-only ID numbers
287 # Now start the scanning of the selected language string
290 open(LANG, "<$input");
291 my @phrase;
292 while(<LANG>) {
294 $line++;
296 # get rid of DOS newlines
297 $_ =~ s/\r//g;
299 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
300 # comment or empty line
301 next;
304 my $ll = $_;
306 # print "M: $m\n";
308 push @phrase, $ll;
310 # this is an XML-lookalike tag
311 if(/ *<([^>]*)>/) {
312 my $part = $1;
313 #print "P: $part\n";
315 if($part =~ /^\//) {
316 # this was a closing tag
318 if($part eq "/phrase") {
319 # closing the phrase
321 my $idstr = $phrase{'id'};
322 my $idnum;
324 if($dest =~ /^none\z/i) {
325 # "none" as dest means that this entire phrase is to be
326 # ignored
327 print "dest is NONE!\n";
329 else {
331 # Use the ID name to figure out which id number range we
332 # should use for this phrase. Voice-only strings are
333 # separated.
335 if($idstr =~ /^VOICE/) {
336 $idnum = $voiceid++;
338 else {
339 $idnum = $idcount++;
342 $id{$idstr} = $idnum;
343 $idnum[$idnum]=$idstr;
345 $source{$idstr}=$src;
346 $dest{$idstr}=$dest;
347 $voice{$idstr}=$voice;
349 if($verbose) {
350 print "id: $phrase{id} ($idnum)\n";
351 print "source: $src\n";
352 print "dest: $dest\n";
353 print "voice: $voice\n";
356 undef $src;
357 undef $dest;
358 undef $voice;
359 undef %phrase;
362 if($update) {
363 my $e = $english{$idstr};
365 if($e) {
366 # compare original english with this!
367 my @eng = split("\n", $english{$idstr});
369 compare($idstr, \@eng, \@phrase);
371 $english{$idstr}=""; # clear it
373 else {
374 print "### $idstr: The phrase is not used. Skipped\n";
377 undef @phrase;
379 } # end of </phrase>
381 # starts with a slash, this _ends_ this section
382 $m = pop @m; # get back old value, the previous level's tag
383 next;
384 } # end of tag close
386 # This is an opening (sub) tag
388 push @m, $m; # store old value
389 $m = $1;
390 next;
393 if(/^ *([^:]+): *(.*)/) {
394 my ($name, $val)=($1, $2);
395 &$m($_, $name, $val);
398 close(LANG);
400 if($update) {
401 my $any=0;
402 for(keys %english) {
403 if($english{$_}) {
404 print "###\n",
405 "### This phrase below was not present in the translated file\n",
406 "<phrase>\n";
407 print $english{$_};
408 print "</phrase>\n";
413 if($prefix) {
414 # We create a .c and .h file
416 open(HFILE, ">$prefix.h");
417 open(CFILE, ">$prefix.c");
419 print HFILE <<MOO
420 /* This file was automatically generated using genlang2 */
422 * The str() macro/functions is how to access strings that might be
423 * translated. Use it like str(MACRO) and expect a string to be
424 * returned!
426 #define str(x) language_strings[x]
428 /* this is the array for holding the string pointers.
429 It will be initialized at runtime. */
430 extern unsigned char *language_strings[];
431 /* this contains the concatenation of all strings, separated by \\0 chars */
432 extern const unsigned char language_builtin[];
434 /* The enum below contains all available strings */
435 enum {
439 print CFILE <<MOO
440 /* This file was automaticly generated using genlang2, the strings come
441 from "$input" */
443 #include "$prefix.h"
445 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
446 const unsigned char language_builtin[] =
450 # Output the ID names for the enum in the header file
451 my $i;
452 for $i (1 .. $idcount) {
453 my $name=$idnum[$i - 1]; # get the ID name
455 $name =~ s/\"//g; # cut off the quotes
457 printf HFILE (" %s,\n", $name);
460 # Output separation marker for last string ID and the upcoming voice IDs
462 print HFILE <<MOO
463 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
464 /* --- below this follows voice-only strings --- */
465 VOICEONLY_DELIMITER = 0x8000,
469 # Output the ID names for the enum in the header file
470 my $i;
471 for $i (0x8000 .. ($voiceid-1)) {
472 my $name=$idnum[$i]; # get the ID name
474 $name =~ s/\"//g; # cut off the quotes
476 printf HFILE (" %s,\n", $name);
479 # Output end of enum
480 print HFILE "\n};\n/* end of generated enum list */\n";
482 # Output the target phrases for the source file
483 for $i (1 .. $idcount) {
484 my $name=$idnum[$i - 1]; # get the ID
485 my $dest = $dest{$name}; # get the destination phrase
487 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
489 if(!$dest) {
490 # this is just to be on the safe side
491 $dest = '"\0"';
494 printf CFILE (" %s\n", $dest);
497 # Output end of string chunk
498 print CFILE <<MOO
500 /* end of generated string list */
504 close(HFILE);
505 close(CFILE);
506 } # end of the c/h file generation
507 elsif($binary) {
508 # Creation of a binary lang file was requested
510 # We must first scan the english file to get the correct order of the id
511 # numbers used there, as that is what sets the id order for all language
512 # files. The english file is scanned before the translated file was
513 # scanned.
515 open(OUTF, ">$binary") or die "Can't create $binary";
516 binmode OUTF;
517 printf OUTF ("\x1a%c", $langversion); # magic lang file header
519 # loop over the target phrases
520 for $i (1 .. $idcount) {
521 my $name=$idnum[$i - 1]; # get the ID
522 my $dest = $dest{$name}; # get the destination phrase
524 if($dest) {
525 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
527 # Now, make sure we get the number from the english sort order:
528 $idnum = $idmap{$name};
530 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
531 if($debug) {
532 printf("%02x => %s\n", $idnum, $value);
539 if($verbose) {
540 printf("%d ID strings scanned\n", $idcount);
542 print "* head *\n";
543 for(keys %head) {
544 printf "$_: %s\n", $head{$_};