PP: move DEBUG_CANCEL definition to .c
[maemo-rb.git] / tools / genlang
blob6139bd18103791514bc26dcee6358f137c251518
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # See apps/language.c (TODO: Use common include for both)
14 # Cookie and binary version for the binary lang file
15 my $LANGUAGE_COOKIE = 0x1a;
16 my $LANGUAGE_VERSION = 0x06;
17 my $LANGUAGE_FLAG_RTL = 0x01;
19 my $HEADER_SIZE = 4;
20 my $SUBHEADER_SIZE = 6;
22 # A note for future users and readers: The original v1 language system allowed
23 # the build to create and use a different language than english built-in. We
24 # removed that feature from our build-system, but the build scripts still had
25 # the ability. But, starting now, this ability is no longer provided since I
26 # figured it was boring and unnecessary to write support for now since we
27 # don't use it anymore.
30 if(!$ARGV[0]) {
31 print <<MOO
32 Usage: genlang [options] <langv2 file>
34 -p=<prefix>
35 Make the tool create a [prefix].c and [prefix].h file.
37 -b=<outfile>
38 Make the tool create a binary language (.lng) file named [outfile].
39 The use of this option requires that you also use -e, -t and -i.
42 Update language file. Given the translated file and the most recent english
43 file, you\'ll get an updated version sent to stdout. Suitable action to do
44 when you intend to update a translation.
46 -e=<english lang file>
47 Point out the english (original source) file, to use that as master
48 language template. Used in combination with -b, -u or -s.
51 Sort the Update language file in the same order as the strings in the
52 English file.
54 -t=<target>
55 Specify which target you want the translations/phrases for. Required when
56 -b or -p is used.
58 The target can in fact be specified as numerous different strings,
59 separated with colons. This will make genlang to use all the specified
60 strings when searching for a matching phrase.
62 -i=<target id>
63 The target id number, needed for -b.
66 Voice mode output. Outputs all id: and voice: lines for the given target!
69 Enables verbose (debug) output.
70 MOO
72 exit;
75 # How update works:
77 # 1) scan the english file, keep the whole <phrase> for each phrase.
78 # 2) read the translated file, for each end of phrase, compare:
79 # A) all source strings, if there's any change there should be a comment about
80 # it output
81 # B) the desc fields
83 # 3) output the phrase with the comments from above
84 # 4) check which phrases that the translated version didn't have, and spit out
85 # the english version of those
88 my $prefix = $p;
89 my $binary = $b;
90 my $update = $u;
91 my $sortfile = $s;
93 my $english = $e;
94 my $voiceout = $o;
96 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
98 if($check > 1) {
99 print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
100 exit;
102 if(!$check) {
103 print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
104 exit;
108 if(($binary || $update || $voiceout || $sortfile) && !$english) {
109 print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
110 exit;
113 my $target_id = $i;
114 if($binary && !$target_id) {
115 print STDERR "Please specify a target id number (with -i)!\n";
116 exit;
119 my $target = $t;
120 if(!$target && !$update && !$sortfile) {
121 print STDERR "Please specify a target (with -t)!\n";
122 exit;
125 # Build up a regex which can be applied to target wildcard lists. We only need
126 # to support prefix matches, so a target parameter of foo:bar can be expanded
127 # to the regex "\*|f\*|fo\*|foo|b\*|ba\*|bar" and applied to the wildcard list
128 # (plus end-of-string or commas on either side). The regex engine should
129 # discard any duplicates generated for us in the process of constructing the
130 # state machine, so we don't bother to check.
131 my $target_regex = "(?:^|,) *(?:\\*";
132 foreach my $target_part (split ':', $target) {
133 for (my $c=1; $c<=length $target_part; $c++) {
134 my $partial = substr $target_part, 0, $c;
135 $target_regex .= "|$partial\\*";
137 $target_regex .= "|$target_part";
139 $target_regex .= ") *(?:,|\$)";
140 $target_regex = qr/$target_regex/;
142 my $binpath = "";
143 if ($binary =~ m|(.*)/[^/]+|) {
144 $binpath = $1;
147 my $verbose=$v;
149 my %id; # string to num hash
150 my @idnum; # num to string array
152 my %allphrases; # For sorting - an array of the <phrase> elements
153 my %source; # id string to source phrase hash
154 my %dest; # id string to dest phrase hash
155 my %voice; # id string to voice phrase hash
157 my %users =
158 ('core' => 0);
160 my $input = $ARGV[0];
162 my @m;
163 my $m="blank";
165 sub trim {
166 my ($string) = @_;
167 $string =~ s/^\s+//;
168 $string =~ s/\s+$//;
169 return $string;
172 sub blank {
173 # nothing to do
176 my %head;
177 sub header {
178 my ($full, $n, $v)=@_;
179 $head{$n}=$v;
182 my %phrase;
183 sub phrase {
184 my ($full, $n, $v)=@_;
185 $phrase{$n}=$v;
188 my %options;
189 sub options {
190 my ($full, $n, $v)=@_;
191 $options{$n}=$v;
194 sub parsetarget {
195 my ($debug, $strref, $full, $n, $v)=@_;
196 my $string;
197 if ($n =~ $target_regex) {
198 $string = $v;
199 $$strref = $string;
200 return $string;
204 my $src;
205 sub source {
206 parsetarget("src", \$src, @_);
209 my $dest;
210 sub dest {
211 parsetarget("dest", \$dest, @_);
214 my $voice;
215 sub voice {
216 parsetarget("voice", \$voice, @_);
219 sub file_is_newer {
220 my ($file1, $file2) = @_;
222 my @s1 = stat $file1;
223 my @s2 = stat $file2;
225 return 1 if ($s1[9] > $s2[9]);
226 return 0;
229 my %idmap;
230 my %english;
231 if($english) {
232 readenglish();
235 sub readenglish {
236 # For the cases where the english file needs to be scanned/read, we do
237 # it before we read the translated file. For -b it isn't necessary, but for
238 # -u it is convenient.
240 my @idnum = ((0)); # start with a true number
241 my @vidnum = ((0x8000)); # first voice id
244 if ($binary and file_is_newer("$binpath/english.list", $english)) {
245 open(ENG, "<$binpath/english.list") ||
246 die "Error: can't open $binpath/english.list";
247 while (<ENG>) {
248 my ($user, $id, $value) = split ':', $_;
249 $idmap[$user]{$id} = $value;
250 $english{$id} = 1;
252 close ENG;
254 return;
257 open(ENG, "<$english") || die "Error: can't open $english";
258 my @phrase;
259 my $id;
260 my $maybeid;
261 my $user;
262 my $withindest;
263 my $numphrases = 0;
264 my $numusers = 1; # core is already in the users map
266 while(<ENG>) {
268 # get rid of DOS newlines
269 $_ =~ tr/\r//d;
271 if($_ =~ /^ *\<phrase\>/) {
272 # this is the start of a phrase
274 elsif($_ =~ /\<\/phrase\>/) {
276 # if id is something, when we count and store this phrase
277 if($id) {
278 # voice-only entries get a difference range
279 if($id =~ /^VOICE_/) {
280 # Assign an ID number to this entry
281 $idmap[$user]{$id}=$vidnum[$user];
282 $vidnum[$user]++;
284 else {
285 # Assign an ID number to this entry
286 $idmap[$user]{$id}=$idnum[$user];
287 $idnum[$user]++;
288 # print STDERR "DEST: bumped idnum to $idnum[$user]\n";
291 # this is the end of a phrase, add it to the english hash
292 $english{$id}=join("", @phrase);
294 undef @phrase;
295 $id="";
297 elsif($_ ne "\n") {
298 # gather everything related to this phrase
299 push @phrase, $_;
300 if($_ =~ /^ *\<dest\>/i) {
301 $withindest=1;
302 $deststr="";
304 elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
305 $withindest=0;
307 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
308 # we unconditionally always use all IDs when the "update"
309 # feature is used
310 $id = $maybeid;
311 # print "DEST: use this id $id\n";
313 else {
314 # print "skip $maybeid for $name\n";
317 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
318 my ($name, $val)=($1, $2);
319 $dest=""; # in case it is left untouched for when the
320 # model name isn't "our"
321 dest($_, $name, $val);
323 if($dest) {
324 # Store the current dest string. If this target matches
325 # multiple strings, it will get updated several times.
326 $deststr = $dest;
331 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
332 $maybeid=$1;
333 $sortorder{$maybeid}=$numphrases++;
335 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
336 $user = $users{$1};
337 if(!(defined $user)) {
338 $user = ++$numusers;
339 $users{$1} = $user;
343 close(ENG);
346 # a function that compares the english phrase with the translated one.
347 # compare source strings and desc
349 # Then output the updated version!
350 sub compare {
351 my ($idstr, $engref, $locref)=@_;
352 my ($edesc, $ldesc);
353 my ($esource, $lsource);
354 my $mode=0;
356 for my $l (@$engref) {
357 if($l =~ /^ *#/) {
358 # comment
359 next;
361 if($l =~ /^ *desc: (.*)/) {
362 $edesc=$1;
364 elsif($l =~ / *\<source\>/i) {
365 $mode=1;
367 elsif($mode) {
368 if($l =~ / *\<\/source\>/i) {
369 last;
371 $esource .= "$l\n";
375 my @show;
376 my @source;
378 $mode = 0;
379 for my $l (@$locref) {
380 if($l =~ /^ *desc: (.*)/) {
381 $ldesc=$1;
382 if(trim($edesc) ne trim($ldesc)) {
383 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
385 push @show, $l;
387 elsif($l =~ / *\<source\>/i) {
388 $mode=1;
389 push @show, $l;
391 elsif($mode) {
392 if($l =~ / *\<\/source\>/i) {
393 $mode = 0;
394 print @show;
395 if(trim($esource) ne trim($lsource)) {
396 print "### The <source> section differs from the english!\n",
397 "### the previously used one is commented below:\n";
398 for(split("\n", $lsource)) {
399 print "### $_\n";
401 print $esource;
403 else {
404 print $lsource;
406 undef @show; # start over
408 push @show, $l;
410 else {
411 $lsource .= "$l";
414 else {
415 push @show, $l;
420 print @show;
423 my @idcount; # counter for lang ID numbers
424 my @voiceid; # counter for voice-only ID numbers
426 for (keys %users) {
427 push @idcount, 0;
428 push @voiceid, 0x8001;
432 # Now start the scanning of the selected language string
435 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
436 my @phrase;
437 my $header = 1;
438 my $langoptions = 0;
440 while(<LANG>) {
442 $line++;
444 # get rid of DOS newlines
445 $_ =~ tr/\r//d;
447 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
448 # comment or empty line - output it if it's part of the header
449 if ($header and ($update || $sortfile)) {
450 print($_);
452 next;
454 $header = 0;
456 my $ll = $_;
458 # print "M: $m\n";
460 push @phrase, $ll;
462 # this is an XML-lookalike tag
463 if (/^(<|[^\"<]+<)([^>]*)>/) {
464 my $part = $2;
465 # print "P: $part\n";
467 if($part =~ /^\//) {
468 # this was a closing tag
470 if($part eq "/phrase") {
471 # closing the phrase
473 my $idstr = $phrase{'id'};
474 my $idnum;
476 if($binary && !$english{$idstr}) {
477 # $idstr doesn't exist for english, skip it\n";
479 elsif($dest =~ /^none\z/i) {
480 # "none" as dest (without quotes) means that this entire
481 # phrase is to be ignored
483 elsif($sortfile) {
484 $allphrases{$idstr}=join('',@phrase);
486 elsif(!$update) {
487 # we don't do the fully detailed analysis when we "update"
488 # since we don't do it for a particular target etc
490 # allow the keyword 'deprecated' to be used on dest and
491 # voice strings to mark that as deprecated. It will then
492 # be replaced with "".
494 $dest =~ s/^deprecate(|d)\z/\"\"/i;
495 $voice =~ s/^deprecate(|d)\z/\"\"/i;
497 # basic syntax error alerts, if there are no quotes we
498 # will assume an empty string was intended
499 if($dest !~ /^\"/) {
500 print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
501 $dest='""';
503 if($src !~ /^\"/) {
504 print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
505 $src='""';
507 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
508 print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
509 $voice='""';
511 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
512 print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
515 my $userstr = trim($phrase{'user'});
516 my $user = $users{$userstr};
517 if ($userstr eq "") {
518 print STDERR "$input:$line:1: warning: missing user!\n";
519 $user = $users{"core"};
521 elsif(!(defined $user)) {
522 if($english) {
523 print STDERR "$input:$line:1: warning: user was not found in $english!\n";
524 $user = keys %users; # set to an invalid user so it won't be added
526 else {
527 # we found a new user, add it to the usermap
528 $user = ++$numusers;
529 $users{$userstr} = $user;
533 # Use the ID name to figure out which id number range we
534 # should use for this phrase. Voice-only strings are
535 # separated.
537 if($idstr =~ /^VOICE/) {
538 $idnum = $voiceid[$user]++;
540 else {
541 $idnum = $idcount[$user]++;
544 $id{$idstr} = $idnum;
545 $idnum[$user][$idnum]=$idstr;
547 $source{$idstr}=$src;
548 $dest{$idstr}=$dest;
549 $voice{$idstr}=$voice;
551 if($verbose) {
552 print "id: $phrase{id} ($idnum)\n";
553 print "source: $src\n";
554 print "dest: $dest\n";
555 print "voice: $voice\n";
556 print "user: $user\n";
559 undef $src;
560 undef $dest;
561 undef $voice;
562 undef $user;
563 undef %phrase;
566 if($update) {
567 my $e = $english{$idstr};
569 if($e) {
570 # compare original english with this!
571 my @eng = split("\n", $english{$idstr});
573 compare($idstr, \@eng, \@phrase);
575 $english{$idstr}=""; # clear it
577 else {
578 print "### $idstr: The phrase is not used. Skipped\n";
581 undef @phrase;
582 } # end of </phrase>
583 elsif($part eq "/options") {
584 # closing the options
585 if ($options{'rtl'}) {
586 $langoptions |= $LANGUAGE_FLAG_RTL;
588 } # end of </options>
590 # starts with a slash, this _ends_ this section
591 $m = pop @m; # get back old value, the previous level's tag
592 next;
593 } # end of tag close
595 # This is an opening (sub) tag
597 push @m, $m; # store old value
598 $m = $part;
599 next;
602 if(/^ *([^:]+): *(.*)/) {
603 my ($name, $val)=($1, $2);
604 &$m($_, $name, $val);
607 close(LANG);
609 if($update) {
610 my $any=0;
611 for(keys %english) {
612 if($english{$_}) {
613 print "###\n",
614 "### This phrase below was not present in the translated file\n",
615 "<phrase>\n";
616 print $english{$_};
617 print "</phrase>\n";
622 if ($sortfile) {
623 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
624 print $allphrases{$_};
628 if($prefix) {
629 # We create a .c and .h file
631 open(HFILE_CORE, ">$prefix/lang.h") ||
632 die "Error: couldn't create file $prefix/lang.h\n";
633 open(CFILE_CORE, ">$prefix/lang_core.c") ||
634 die "Error: couldn't create file $prefix/lang_core.c\n";
636 # get header file name
637 $headername = "$prefix/lang.h";
638 $headername =~ s/(.*\/)*//;
640 print HFILE_CORE <<MOO
641 /* This file was automatically generated using genlang */
643 * The str() macro/functions is how to access strings that might be
644 * translated. Use it like str(MACRO) and expect a string to be
645 * returned!
647 #define str(x) language_strings[x]
649 /* this is the array for holding the string pointers.
650 It will be initialized at runtime. */
651 extern unsigned char *language_strings[];
652 /* this contains the concatenation of all strings, separated by \\0 chars */
653 extern const unsigned char core_language_builtin[];
655 /* The enum below contains all available strings */
656 enum \{
660 print CFILE_CORE <<MOO
661 /* This file was automatically generated using genlang, the strings come
662 from "$input" */
664 #include "$headername"
666 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
667 const unsigned char core_language_builtin[] =
671 # Output the ID names for the enum in the header file
672 my $i;
673 for $i (0 .. $idcount[$users{"core"}]-1) {
674 my $name=$idnum[$users{"core"}][$i]; # get the ID name
676 $name =~ tr/\"//d; # cut off the quotes
678 printf HFILE_CORE (" %s, /* %d */\n", $name, $i);
681 # Output separation marker for last string ID and the upcoming voice IDs
683 print HFILE_CORE <<MOO
684 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
685 /* --- below this follows voice-only strings --- */
686 VOICEONLY_DELIMITER = 0x8000,
690 # Output the ID names for the enum in the header file
691 for $i (0x8001 .. ($voiceid[$users{"core"}]-1)) {
692 my $name=$idnum[$users{"core"}][$i]; # get the ID name
694 $name =~ tr/\"//d; # cut off the quotes
696 printf HFILE_CORE (" %s, /* 0x%x */\n", $name, $i);
699 # Output end of enum
700 print HFILE_CORE "\n};\n/* end of generated enum list */\n";
702 # Output the target phrases for the source file
703 for $i (0 .. $idcount[$users{"core"}]-1) {
704 my $name=$idnum[$users{"core"}][$i]; # get the ID
705 my $dest = $dest{$name}; # get the destination phrase
707 $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
709 if(!$dest) {
710 # this is just to be on the safe side
711 $dest = '"\0"';
714 printf CFILE_CORE (" %s\n", $dest);
717 # Output end of string chunk
718 print CFILE_CORE <<MOO
720 /* end of generated string list */
724 close(HFILE_CORE);
725 close(CFILE_CORE);
726 } # end of the c/h file generation
727 elsif($binary) {
728 # Creation of a binary lang file was requested
730 # We must first scan the english file to get the correct order of the id
731 # numbers used there, as that is what sets the id order for all language
732 # files. The english file is scanned before the translated file was
733 # scanned.
735 open(OUTF, ">$binary") or die "Error: Can't create $binary";
736 binmode OUTF;
737 printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
738 $langoptions); # magic lang file header
740 # output the number of strings for each user
741 my $foffset = $HEADER_SIZE + $SUBHEADER_SIZE * keys(%users);
742 for (keys %users) {
743 my $size;
744 for $n (0 .. $idcount[$_]-1) {
745 $size += length(trim($dest{$idnum[$_][$n]})) + 1;
747 printf OUTF ("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff),
748 ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff));
749 $foffset += $size;
752 for (keys %users) {
753 # loop over the target phrases
754 for $n (0 .. $idcount[$_]-1) {
755 my $name=$idnum[$_][$n]; # get the ID
756 my $dest = $dest{$name}; # get the destination phrase
758 if($dest) {
759 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
761 # Now, make sure we get the number from the english sort order:
762 $idnum = $idmap[$_]{$name};
764 printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
769 elsif($voiceout) {
770 # voice output requested, display id: and voice: strings in a v1-like
771 # fashion
773 my @engl;
775 # This loops over the strings in the translated language file order
776 my @ids = ((0 .. ($idcount[$users{"core"}]-1)));
777 push @ids, (0x8000 .. ($voiceid[$users{"core"}]-1));
779 #for my $id (@ids) {
780 # print "$id\n";
783 for $i (@ids) {
784 my $name=$idnum[$users{"core"}][$i]; # get the ID
785 my $dest = $voice{$name}; # get the destination voice string
787 if($dest) {
788 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
790 # Now, make sure we get the number from the english sort order:
791 $idnum = $idmap[$users{"core"}]{$name};
793 if(length($idnum)) {
794 $engl[$idnum] = $i;
796 #print "Input index $i output index $idnum\n";
798 else {
799 # not used, mark it so
800 $engl[$i] = -1
805 for my $i (@ids) {
807 my $o = $engl[$i];
809 if(($o < 0) || !length($o)) {
810 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
811 next;
814 my $name=$idnum[$users{"core"}][$o]; # get the ID
815 my $dest = $voice{$name}; # get the destination voice string
817 print "#$i ($o)\nid: $name\nvoice: $dest\n";
823 if($verbose) {
824 my $num_str = 0;
826 for (keys %users) {
827 $num_str += $idcount[$_];
830 printf("%d ID strings scanned\n", $num_str);
832 print "* head *\n";
833 for(keys %head) {
834 printf "$_: %s\n", $head{$_};
838 if ($binary and !file_is_newer("$binpath/english.list", $english)) {
839 open(ENGLIST, ">$binpath/english.list") ||
840 die "Failed creating $binpath/english.list";
841 for my $user (keys %users) {
842 for my $id (keys %{$idmap[$user]}) {
843 print ENGLIST "$user:$id:$idmap[$user]{$id}\n";
846 close ENGLIST;