FS#11950: Update czech language file.
[maemo-rb.git] / tools / genlang
blob22d020fe5a3af1ac3e13002540dc5516971f51ae
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # See apps/language.c (TODO: Use common include for both)
14 # Cookie and binary version for the binary lang file
15 my $LANGUAGE_COOKIE = 0x1a;
16 my $LANGUAGE_VERSION = 0x06;
17 my $LANGUAGE_FLAG_RTL = 0x01;
19 my $HEADER_SIZE = 4;
20 my $SUBHEADER_SIZE = 6;
22 # A note for future users and readers: The original v1 language system allowed
23 # the build to create and use a different language than english built-in. We
24 # removed that feature from our build-system, but the build scripts still had
25 # the ability. But, starting now, this ability is no longer provided since I
26 # figured it was boring and unnecessary to write support for now since we
27 # don't use it anymore.
30 if(!$ARGV[0]) {
31 print <<MOO
32 Usage: genlang [options] <langv2 file>
34 -p=<prefix>
35 Make the tool create a [prefix].c and [prefix].h file.
37 -b=<outfile>
38 Make the tool create a binary language (.lng) file named [outfile].
39 The use of this option requires that you also use -e, -t and -i.
42 Update language file. Given the translated file and the most recent english
43 file, you\'ll get an updated version sent to stdout. Suitable action to do
44 when you intend to update a translation.
46 -e=<english lang file>
47 Point out the english (original source) file, to use that as master
48 language template. Used in combination with -b, -u or -s.
51 Sort the Update language file in the same order as the strings in the
52 English file.
54 -t=<target>
55 Specify which target you want the translations/phrases for. Required when
56 -b or -p is used.
58 The target can in fact be specified as numerous different strings,
59 separated with colons. This will make genlang to use all the specified
60 strings when searching for a matching phrase.
62 -i=<target id>
63 The target id number, needed for -b.
66 Voice mode output. Outputs all id: and voice: lines for the given target!
69 Enables verbose (debug) output.
70 MOO
72 exit;
75 # How update works:
77 # 1) scan the english file, keep the whole <phrase> for each phrase.
78 # 2) read the translated file, for each end of phrase, compare:
79 # A) all source strings, if there's any change there should be a comment about
80 # it output
81 # B) the desc fields
83 # 3) output the phrase with the comments from above
84 # 4) check which phrases that the translated version didn't have, and spit out
85 # the english version of those
88 my $prefix = $p;
89 my $binary = $b;
90 my $update = $u;
91 my $sortfile = $s;
93 my $english = $e;
94 my $voiceout = $o;
96 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
98 if($check > 1) {
99 print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
100 exit;
102 if(!$check) {
103 print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
104 exit;
108 if(($binary || $update || $voiceout || $sortfile) && !$english) {
109 print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
110 exit;
113 my $target_id = $i;
114 if($binary && !$target_id) {
115 print STDERR "Please specify a target id number (with -i)!\n";
116 exit;
119 my $target = $t;
120 if(!$target && !$update && !$sortfile) {
121 print STDERR "Please specify a target (with -t)!\n";
122 exit;
124 my $verbose=$v;
126 my %id; # string to num hash
127 my @idnum; # num to string array
129 my %allphrases; # For sorting - an array of the <phrase> elements
130 my %source; # id string to source phrase hash
131 my %dest; # id string to dest phrase hash
132 my %voice; # id string to voice phrase hash
134 my %users =
135 ('core' => 0);
137 my $input = $ARGV[0];
139 my @m;
140 my $m="blank";
142 sub trim {
143 my ($string) = @_;
144 $string =~ s/^\s+//;
145 $string =~ s/\s+$//;
146 return $string;
149 sub match {
150 my ($string, $pattern)=@_;
152 $pattern =~ s/\*/.*/g;
153 $pattern =~ s/\?/./g;
155 return ($string =~ /^$pattern\z/);
158 sub blank {
159 # nothing to do
162 my %head;
163 sub header {
164 my ($full, $n, $v)=@_;
165 $head{$n}=$v;
168 my %phrase;
169 sub phrase {
170 my ($full, $n, $v)=@_;
171 $phrase{$n}=$v;
174 my %options;
175 sub options {
176 my ($full, $n, $v)=@_;
177 $options{$n}=$v;
180 sub parsetarget {
181 my ($debug, $strref, $full, $n, $v)=@_;
182 my $string;
183 my @all= split(" *, *", $n);
184 my $test;
185 for $test (@all) {
186 # print "TEST ($debug) $target for $test\n";
187 for my $part (split(":", $target)) {
188 if(match($part, $test)) {
189 $string = $v;
190 # print "MATCH: $test => $v\n";
191 $$strref = $string;
192 return $string;
198 my $src;
199 sub source {
200 parsetarget("src", \$src, @_);
203 my $dest;
204 sub dest {
205 parsetarget("dest", \$dest, @_);
208 my $voice;
209 sub voice {
210 parsetarget("voice", \$voice, @_);
213 my %idmap;
214 my %english;
215 if($english) {
216 # For the cases where the english file needs to be scanned/read, we do
217 # it before we read the translated file. For -b it isn't necessary, but for
218 # -u it is convenient.
220 my @idnum = ((0)); # start with a true number
221 my @vidnum = ((0x8000)); # first voice id
222 open(ENG, "<$english") || die "Error: can't open $english";
223 my @phrase;
224 my $id;
225 my $maybeid;
226 my $user;
227 my $withindest;
228 my $numphrases = 0;
229 my $numusers = 1; # core is already in the users map
231 while(<ENG>) {
233 # get rid of DOS newlines
234 $_ =~ s/\r//g;
236 if($_ =~ /^ *\<phrase\>/) {
237 # this is the start of a phrase
239 elsif($_ =~ /^ *\<\/phrase\>/) {
241 # if id is something, when we count and store this phrase
242 if($id) {
243 # voice-only entries get a difference range
244 if($id =~ /^VOICE_/) {
245 # Assign an ID number to this entry
246 $idmap[$user]{$id}=$vidnum[$user];
247 $vidnum[$user]++;
249 else {
250 # Assign an ID number to this entry
251 $idmap[$user]{$id}=$idnum[$user];
252 $idnum[$user]++;
253 # print STDERR "DEST: bumped idnum to $idnum[$user]\n";
256 # this is the end of a phrase, add it to the english hash
257 $english{$id}=join("", @phrase);
259 undef @phrase;
260 $id="";
262 elsif($_ ne "\n") {
263 # gather everything related to this phrase
264 push @phrase, $_;
265 if($_ =~ /^ *\<dest\>/i) {
266 $withindest=1;
267 $deststr="";
269 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
270 $withindest=0;
272 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
273 # we unconditionally always use all IDs when the "update"
274 # feature is used
275 $id = $maybeid;
276 # print "DEST: use this id $id\n";
278 else {
279 # print "skip $maybeid for $name\n";
282 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
283 my ($name, $val)=($1, $2);
284 $dest=""; # in case it is left untouched for when the
285 # model name isn't "our"
286 dest($_, $name, $val);
288 if($dest) {
289 # Store the current dest string. If this target matches
290 # multiple strings, it will get updated several times.
291 $deststr = $dest;
296 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
297 $maybeid=$1;
298 $sortorder{$maybeid}=$numphrases++;
300 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
301 $user = $users{$1};
302 if(!(defined $user)) {
303 $user = ++$numusers;
304 $users{$1} = $user;
308 close(ENG);
311 # a function that compares the english phrase with the translated one.
312 # compare source strings and desc
314 # Then output the updated version!
315 sub compare {
316 my ($idstr, $engref, $locref)=@_;
317 my ($edesc, $ldesc);
318 my ($esource, $lsource);
319 my $mode=0;
321 for my $l (@$engref) {
322 if($l =~ /^ *#/) {
323 # comment
324 next;
326 if($l =~ /^ *desc: (.*)/) {
327 $edesc=$1;
329 elsif($l =~ / *\<source\>/i) {
330 $mode=1;
332 elsif($mode) {
333 if($l =~ / *\<\/source\>/i) {
334 last;
336 $esource .= "$l\n";
340 my @show;
341 my @source;
343 $mode = 0;
344 for my $l (@$locref) {
345 if($l =~ /^ *desc: (.*)/) {
346 $ldesc=$1;
347 if(trim($edesc) ne trim($ldesc)) {
348 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
350 push @show, $l;
352 elsif($l =~ / *\<source\>/i) {
353 $mode=1;
354 push @show, $l;
356 elsif($mode) {
357 if($l =~ / *\<\/source\>/i) {
358 $mode = 0;
359 print @show;
360 if(trim($esource) ne trim($lsource)) {
361 print "### The <source> section differs from the english!\n",
362 "### the previously used one is commented below:\n";
363 for(split("\n", $lsource)) {
364 print "### $_\n";
366 print $esource;
368 else {
369 print $lsource;
371 undef @show; # start over
373 push @show, $l;
375 else {
376 $lsource .= "$l";
379 else {
380 push @show, $l;
385 print @show;
388 my @idcount; # counter for lang ID numbers
389 my @voiceid; # counter for voice-only ID numbers
391 for (keys %users) {
392 push @idcount, 0;
393 push @voiceid, 0x8001;
397 # Now start the scanning of the selected language string
400 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
401 my @phrase;
402 my $header = 1;
403 my $langoptions = 0;
405 while(<LANG>) {
407 $line++;
409 # get rid of DOS newlines
410 $_ =~ s/\r//g;
412 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
413 # comment or empty line - output it if it's part of the header
414 if ($header and ($update || $sortfile)) {
415 print($_);
417 next;
419 $header = 0;
421 my $ll = $_;
423 # print "M: $m\n";
425 push @phrase, $ll;
427 # this is an XML-lookalike tag
428 if (/^(<|[^\"<]+<)([^>]*)>/) {
429 my $part = $2;
430 # print "P: $part\n";
432 if($part =~ /^\//) {
433 # this was a closing tag
435 if($part eq "/phrase") {
436 # closing the phrase
438 my $idstr = $phrase{'id'};
439 my $idnum;
441 if($binary && !$english{$idstr}) {
442 # $idstr doesn't exist for english, skip it\n";
444 elsif($dest =~ /^none\z/i) {
445 # "none" as dest (without quotes) means that this entire
446 # phrase is to be ignored
448 elsif($sortfile) {
449 $allphrases{$idstr}=join('',@phrase);
451 elsif(!$update) {
452 # we don't do the fully detailed analysis when we "update"
453 # since we don't do it for a particular target etc
455 # allow the keyword 'deprecated' to be used on dest and
456 # voice strings to mark that as deprecated. It will then
457 # be replaced with "".
459 $dest =~ s/^deprecate(|d)\z/\"\"/i;
460 $voice =~ s/^deprecate(|d)\z/\"\"/i;
462 # basic syntax error alerts, if there are no quotes we
463 # will assume an empty string was intended
464 if($dest !~ /^\"/) {
465 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
466 $dest='""';
468 if($src !~ /^\"/) {
469 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
470 $src='""';
472 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
473 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
474 $voice='""';
476 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
477 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
480 my $userstr = trim($phrase{'user'});
481 my $user = $users{$userstr};
482 if ($userstr eq "") {
483 print STDERR "$input:$line:1: warning: missing user!\n";
484 $user = $users{"core"};
486 elsif(!(defined $user)) {
487 if($english) {
488 print STDERR "$input:$line:1: warning: user was not found in $english!\n";
489 $user = keys %users; # set to an invalid user so it won't be added
491 else {
492 # we found a new user, add it to the usermap
493 $user = ++$numusers;
494 $users{$userstr} = $user;
498 # Use the ID name to figure out which id number range we
499 # should use for this phrase. Voice-only strings are
500 # separated.
502 if($idstr =~ /^VOICE/) {
503 $idnum = $voiceid[$user]++;
505 else {
506 $idnum = $idcount[$user]++;
509 $id{$idstr} = $idnum;
510 $idnum[$user][$idnum]=$idstr;
512 $source{$idstr}=$src;
513 $dest{$idstr}=$dest;
514 $voice{$idstr}=$voice;
516 if($verbose) {
517 print "id: $phrase{id} ($idnum)\n";
518 print "source: $src\n";
519 print "dest: $dest\n";
520 print "voice: $voice\n";
521 print "user: $user\n";
524 undef $src;
525 undef $dest;
526 undef $voice;
527 undef $user;
528 undef %phrase;
531 if($update) {
532 my $e = $english{$idstr};
534 if($e) {
535 # compare original english with this!
536 my @eng = split("\n", $english{$idstr});
538 compare($idstr, \@eng, \@phrase);
540 $english{$idstr}=""; # clear it
542 else {
543 print "### $idstr: The phrase is not used. Skipped\n";
546 undef @phrase;
547 } # end of </phrase>
548 elsif($part eq "/options") {
549 # closing the options
550 if ($options{'rtl'}) {
551 $langoptions |= $LANGUAGE_FLAG_RTL;
553 } # end of </options>
555 # starts with a slash, this _ends_ this section
556 $m = pop @m; # get back old value, the previous level's tag
557 next;
558 } # end of tag close
560 # This is an opening (sub) tag
562 push @m, $m; # store old value
563 $m = $part;
564 next;
567 if(/^ *([^:]+): *(.*)/) {
568 my ($name, $val)=($1, $2);
569 &$m($_, $name, $val);
572 close(LANG);
574 if($update) {
575 my $any=0;
576 for(keys %english) {
577 if($english{$_}) {
578 print "###\n",
579 "### This phrase below was not present in the translated file\n",
580 "<phrase>\n";
581 print $english{$_};
582 print "</phrase>\n";
587 if ($sortfile) {
588 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
589 print $allphrases{$_};
593 if($prefix) {
594 # We create a .c and .h file
596 open(HFILE_CORE, ">$prefix/lang.h") ||
597 die "Error: couldn't create file $prefix/lang.h\n";
598 open(CFILE_CORE, ">$prefix/lang_core.c") ||
599 die "Error: couldn't create file $prefix/lang_core.c\n";
601 # get header file name
602 $headername = "$prefix/lang.h";
603 $headername =~ s/(.*\/)*//;
605 print HFILE_CORE <<MOO
606 /* This file was automatically generated using genlang */
608 * The str() macro/functions is how to access strings that might be
609 * translated. Use it like str(MACRO) and expect a string to be
610 * returned!
612 #define str(x) language_strings[x]
614 /* this is the array for holding the string pointers.
615 It will be initialized at runtime. */
616 extern unsigned char *language_strings[];
617 /* this contains the concatenation of all strings, separated by \\0 chars */
618 extern const unsigned char core_language_builtin[];
620 /* The enum below contains all available strings */
621 enum \{
625 print CFILE_CORE <<MOO
626 /* This file was automatically generated using genlang, the strings come
627 from "$input" */
629 #include "$headername"
631 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
632 const unsigned char core_language_builtin[] =
636 # Output the ID names for the enum in the header file
637 my $i;
638 for $i (0 .. $idcount[$users{"core"}]-1) {
639 my $name=$idnum[$users{"core"}][$i]; # get the ID name
641 $name =~ s/\"//g; # cut off the quotes
643 printf HFILE_CORE (" %s, /* %d */\n", $name, $i);
646 # Output separation marker for last string ID and the upcoming voice IDs
648 print HFILE_CORE <<MOO
649 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
650 /* --- below this follows voice-only strings --- */
651 VOICEONLY_DELIMITER = 0x8000,
655 # Output the ID names for the enum in the header file
656 for $i (0x8001 .. ($voiceid[$users{"core"}]-1)) {
657 my $name=$idnum[$users{"core"}][$i]; # get the ID name
659 $name =~ s/\"//g; # cut off the quotes
661 printf HFILE_CORE (" %s, /* 0x%x */\n", $name, $i);
664 # Output end of enum
665 print HFILE_CORE "\n};\n/* end of generated enum list */\n";
667 # Output the target phrases for the source file
668 for $i (0 .. $idcount[$users{"core"}]-1) {
669 my $name=$idnum[$users{"core"}][$i]; # get the ID
670 my $dest = $dest{$name}; # get the destination phrase
672 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
674 if(!$dest) {
675 # this is just to be on the safe side
676 $dest = '"\0"';
679 printf CFILE_CORE (" %s\n", $dest);
682 # Output end of string chunk
683 print CFILE_CORE <<MOO
685 /* end of generated string list */
689 close(HFILE_CORE);
690 close(CFILE_CORE);
691 } # end of the c/h file generation
692 elsif($binary) {
693 # Creation of a binary lang file was requested
695 # We must first scan the english file to get the correct order of the id
696 # numbers used there, as that is what sets the id order for all language
697 # files. The english file is scanned before the translated file was
698 # scanned.
700 open(OUTF, ">$binary") or die "Error: Can't create $binary";
701 binmode OUTF;
702 printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
703 $langoptions); # magic lang file header
705 # output the number of strings for each user
706 my $foffset = $HEADER_SIZE + $SUBHEADER_SIZE * keys(%users);
707 for (keys %users) {
708 my $size;
709 for $n (0 .. $idcount[$_]-1) {
710 $size += length(trim($dest{$idnum[$_][$n]})) + 1;
712 printf OUTF ("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff),
713 ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff));
714 $foffset += $size;
717 for (keys %users) {
718 # loop over the target phrases
719 for $n (0 .. $idcount[$_]-1) {
720 my $name=$idnum[$_][$n]; # get the ID
721 my $dest = $dest{$name}; # get the destination phrase
723 if($dest) {
724 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
726 # Now, make sure we get the number from the english sort order:
727 $idnum = $idmap[$_]{$name};
729 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
734 elsif($voiceout) {
735 # voice output requested, display id: and voice: strings in a v1-like
736 # fashion
738 my @engl;
740 # This loops over the strings in the translated language file order
741 my @ids = ((0 .. ($idcount[$users{"core"}]-1)));
742 push @ids, (0x8000 .. ($voiceid[$users{"core"}]-1));
744 #for my $id (@ids) {
745 # print "$id\n";
748 for $i (@ids) {
749 my $name=$idnum[$users{"core"}][$i]; # get the ID
750 my $dest = $voice{$name}; # get the destination voice string
752 if($dest) {
753 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
755 # Now, make sure we get the number from the english sort order:
756 $idnum = $idmap[$users{"core"}]{$name};
758 if(length($idnum)) {
759 $engl[$idnum] = $i;
761 #print "Input index $i output index $idnum\n";
763 else {
764 # not used, mark it so
765 $engl[$i] = -1
770 for my $i (@ids) {
772 my $o = $engl[$i];
774 if(($o < 0) || !length($o)) {
775 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
776 next;
779 my $name=$idnum[$users{"core"}][$o]; # get the ID
780 my $dest = $voice{$name}; # get the destination voice string
782 print "#$i ($o)\nid: $name\nvoice: $dest\n";
788 if($verbose) {
789 my $num_str = 0;
791 for (keys %users) {
792 $num_str += $idcount[$_];
795 printf("%d ID strings scanned\n", $num_str);
797 print "* head *\n";
798 for(keys %head) {
799 printf "$_: %s\n", $head{$_};