3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
13 # See apps/language.c (TODO: Use common include for both)
14 # Cookie and binary version for the binary lang file
15 my $LANGUAGE_COOKIE = 0x1a;
16 my $VOICE_COOKIE = 0x9a;
17 my $LANGUAGE_VERSION = 0x06;
18 my $LANGUAGE_FLAG_RTL = 0x01;
21 my $SUBHEADER_SIZE = 6;
23 # A note for future users and readers: The original v1 language system allowed
24 # the build to create and use a different language than english built-in. We
25 # removed that feature from our build-system, but the build scripts still had
26 # the ability. But, starting now, this ability is no longer provided since I
27 # figured it was boring and unnecessary to write support for now since we
28 # don't use it anymore.
33 Usage: genlang [options] <langv2 file>
36 Make the tool create a [prefix].c and [prefix].h file.
39 Make the tool create a binary language (.lng) file named [outfile].
40 The use of this option requires that you also use -e, -t and -i.
43 Create binary voicestring file named [outfile]. Works like -b and can be
47 Update language file. Given the translated file and the most recent english
48 file, you\'ll get an updated version sent to stdout. Suitable action to do
49 when you intend to update a translation.
51 -e=<english lang file>
52 Point out the english (original source) file, to use that as master
53 language template. Used in combination with -b, -u or -s.
56 Sort the Update language file in the same order as the strings in the
60 Specify which target you want the translations/phrases for. Required when
63 The target can in fact be specified as numerous different strings,
64 separated with colons. This will make genlang to use all the specified
65 strings when searching for a matching phrase.
68 The target id number, needed for -b.
71 Voice mode output. Outputs all id: and voice: lines for the given target!
74 Enables verbose (debug) output.
82 # 1) scan the english file, keep the whole <phrase> for each phrase.
83 # 2) read the translated file, for each end of phrase, compare:
84 # A) all source strings, if there's any change there should be a comment about
88 # 3) output the phrase with the comments from above
89 # 4) check which phrases that the translated version didn't have, and spit out
90 # the english version of those
102 my $check = ($binary?
.5:0) + ($prefix?
1:0) + ($update?
1:0) + ($voiceout?
1:0) + ($sortfile?
1:0) + ($binvoice?
.5:0);
105 print STDERR
"Please use only one of -p, -u, -o, -b, -c and -s\n";
109 print STDERR
"Please use at least one of -p, -u, -o, -c, -e and -s\n";
114 if(($binary || $update || $voiceout || $sortfile) && !$english) {
115 print STDERR
"Please use -e too when you use -b, -o, -u or -s\n";
120 if($binary && !$target_id) {
121 print STDERR
"Please specify a target id number (with -i)!\n";
126 if(!$target && !$update && !$sortfile) {
127 print STDERR
"Please specify a target (with -t)!\n";
131 # Build up a regex which can be applied to target wildcard lists. We only need
132 # to support prefix matches, so a target parameter of foo:bar can be expanded
133 # to the regex "\*|f\*|fo\*|foo|b\*|ba\*|bar" and applied to the wildcard list
134 # (plus end-of-string or commas on either side). The regex engine should
135 # discard any duplicates generated for us in the process of constructing the
136 # state machine, so we don't bother to check.
137 my $target_regex = "(?:^|,) *(?:\\*";
138 foreach my $target_part (split ':', $target) {
139 for (my $c=1; $c<=length $target_part; $c++) {
140 my $partial = substr $target_part, 0, $c;
141 $target_regex .= "|$partial\\*";
143 $target_regex .= "|$target_part";
145 $target_regex .= ") *(?:,|\$)";
146 $target_regex = qr/$target_regex/;
149 if ($binary =~ m
|(.*)/[^/]+|) {
155 my %id; # string to num hash
156 my @idnum; # num to string array
158 my %allphrases; # For sorting - an array of the <phrase> elements
159 my %source; # id string to source phrase hash
160 my %dest; # id string to dest phrase hash
161 my %voice; # id string to voice phrase hash
166 my $input = $ARGV[0];
184 my ($full, $n, $v)=@_;
190 my ($full, $n, $v)=@_;
196 my ($full, $n, $v)=@_;
201 my ($debug, $strref, $full, $n, $v)=@_;
203 if ($n =~ $target_regex) {
212 parsetarget
("src", \
$src, @_);
217 parsetarget
("dest", \
$dest, @_);
222 parsetarget
("voice", \
$voice, @_);
226 my ($file1, $file2) = @_;
228 my @s1 = stat $file1;
229 my @s2 = stat $file2;
231 return 1 if ($s1[9] > $s2[9]);
242 # For the cases where the english file needs to be scanned/read, we do
243 # it before we read the translated file. For -b it isn't necessary, but for
244 # -u it is convenient.
246 my @idnum = ((0)); # start with a true number
247 my @vidnum = ((0x8000)); # first voice id
250 if ($binary and file_is_newer
("$binpath/english.list", $english)) {
251 open(ENG
, "<$binpath/english.list") ||
252 die "Error: can't open $binpath/english.list";
254 my ($user, $id, $value) = split ':', $_;
255 $idmap[$user]{$id} = $value;
263 open(ENG
, "<$english") || die "Error: can't open $english";
270 my $numusers = 1; # core is already in the users map
274 # get rid of DOS newlines
277 if($_ =~ /^ *\<phrase\>/) {
278 # this is the start of a phrase
280 elsif($_ =~ /\<\/phrase\
>/) {
282 # if id is something, when we count and store this phrase
284 # voice-only entries get a difference range
285 if($id =~ /^VOICE_/) {
286 # Assign an ID number to this entry
287 $idmap[$user]{$id}=$vidnum[$user];
291 # Assign an ID number to this entry
292 $idmap[$user]{$id}=$idnum[$user];
294 # print STDERR "DEST: bumped idnum to $idnum[$user]\n";
297 # this is the end of a phrase, add it to the english hash
298 $english{$id}=join("", @phrase);
304 # gather everything related to this phrase
306 if($_ =~ /^ *\<dest\>/i) {
310 elsif($withindest && ($_ =~ /^ *\<\/dest\
>/i
)) {
313 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
314 # we unconditionally always use all IDs when the "update"
317 # print "DEST: use this id $id\n";
320 # print "skip $maybeid for $name\n";
323 elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
324 my ($name, $val)=($1, $2);
325 $dest=""; # in case it is left untouched for when the
326 # model name isn't "our"
327 dest
($_, $name, $val);
330 # Store the current dest string. If this target matches
331 # multiple strings, it will get updated several times.
337 if($_ =~ /^ *id: ([^ \t\n]+)/i) {
339 $sortorder{$maybeid}=$numphrases++;
341 if($_ =~ /^ *user: ([^ \t\n]+)/i) {
343 if(!(defined $user)) {
352 # a function that compares the english phrase with the translated one.
353 # compare source strings and desc
355 # Then output the updated version!
357 my ($idstr, $engref, $locref)=@_;
359 my ($esource, $lsource);
362 for my $l (@
$engref) {
367 if($l =~ /^ *desc: (.*)/) {
370 elsif($l =~ / *\<source\>/i) {
374 if($l =~ / *\<\/source\
>/i
) {
385 for my $l (@
$locref) {
386 if($l =~ /^ *desc: (.*)/) {
388 if(trim
($edesc) ne trim
($ldesc)) {
389 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
393 elsif($l =~ / *\<source\>/i) {
398 if($l =~ / *\<\/source\
>/i
) {
401 if(trim
($esource) ne trim
($lsource)) {
402 print "### The <source> section differs from the english!\n",
403 "### the previously used one is commented below:\n";
404 for(split("\n", $lsource)) {
412 undef @show; # start over
429 my @idcount; # counter for lang ID numbers
430 my @voiceid; # counter for voice-only ID numbers
434 push @voiceid, 0x8001;
438 # Now start the scanning of the selected language string
441 open(LANG
, "<$input") || die "Error: couldn't read language file named $input\n";
450 # get rid of DOS newlines
453 if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
454 # comment or empty line - output it if it's part of the header
455 if ($header and ($update || $sortfile)) {
468 # this is an XML-lookalike tag
469 if (/^(<|[^\"<]+<)([^>]*)>/) {
471 # print "P: $part\n";
474 # this was a closing tag
476 if($part eq "/phrase") {
479 my $idstr = $phrase{'id'};
482 if($binary && !$english{$idstr}) {
483 # $idstr doesn't exist for english, skip it\n";
485 elsif($dest =~ /^none\z/i) {
486 # "none" as dest (without quotes) means that this entire
487 # phrase is to be ignored
490 $allphrases{$idstr}=join('',@phrase);
493 # we don't do the fully detailed analysis when we "update"
494 # since we don't do it for a particular target etc
496 # allow the keyword 'deprecated' to be used on dest and
497 # voice strings to mark that as deprecated. It will then
498 # be replaced with "".
500 $dest =~ s/^deprecate(|d)\z/\"\"/i;
501 $voice =~ s/^deprecate(|d)\z/\"\"/i;
503 # basic syntax error alerts, if there are no quotes we
504 # will assume an empty string was intended
506 print STDERR
"$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
510 print STDERR
"$input:$line:1: warning: source before line lacks quotes ($src)!\n";
513 if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
514 print STDERR
"$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
517 if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
518 print STDERR
"$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
521 my $userstr = trim
($phrase{'user'});
522 my $user = $users{$userstr};
523 if ($userstr eq "") {
524 print STDERR
"$input:$line:1: warning: missing user!\n";
525 $user = $users{"core"};
527 elsif(!(defined $user)) {
529 print STDERR
"$input:$line:1: warning: user was not found in $english!\n";
530 $user = keys %users; # set to an invalid user so it won't be added
533 # we found a new user, add it to the usermap
535 $users{$userstr} = $user;
539 # Use the ID name to figure out which id number range we
540 # should use for this phrase. Voice-only strings are
543 if($idstr =~ /^VOICE/) {
544 $idnum = $voiceid[$user]++;
547 $idnum = $idcount[$user]++;
550 $id{$idstr} = $idnum;
551 $idnum[$user][$idnum]=$idstr;
553 $source{$idstr}=$src;
555 $voice{$idstr}=$voice;
558 print "id: $phrase{id} ($idnum)\n";
559 print "source: $src\n";
560 print "dest: $dest\n";
561 print "voice: $voice\n";
562 print "user: $user\n";
573 my $e = $english{$idstr};
576 # compare original english with this!
577 my @eng = split("\n", $english{$idstr});
579 compare
($idstr, \
@eng, \
@phrase);
581 $english{$idstr}=""; # clear it
584 print "### $idstr: The phrase is not used. Skipped\n";
589 elsif($part eq "/options") {
590 # closing the options
591 if ($options{'rtl'}) {
592 $langoptions |= $LANGUAGE_FLAG_RTL;
594 } # end of </options>
596 # starts with a slash, this _ends_ this section
597 $m = pop @m; # get back old value, the previous level's tag
601 # This is an opening (sub) tag
603 push @m, $m; # store old value
608 if(/^ *([^:]+): *(.*)/) {
609 my ($name, $val)=($1, $2);
610 &$m($_, $name, $val);
620 "### This phrase below was not present in the translated file\n",
629 for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
630 print $allphrases{$_};
635 # We create a .c and .h file
637 open(HFILE_CORE
, ">$prefix/lang.h") ||
638 die "Error: couldn't create file $prefix/lang.h\n";
639 open(CFILE_CORE
, ">$prefix/lang_core.c") ||
640 die "Error: couldn't create file $prefix/lang_core.c\n";
642 # get header file name
643 $headername = "$prefix/lang.h";
644 $headername =~ s/(.*\/)*//;
646 print HFILE_CORE
<<MOO
647 /* This file was automatically generated using genlang */
649 * The str() macro/functions is how to access strings that might be
650 * translated. Use it like str(MACRO) and expect a string to be
653 #define str(x) language_strings[x]
655 /* this is the array for holding the string pointers.
656 It will be initialized at runtime. */
657 extern unsigned char *language_strings[];
658 /* this contains the concatenation of all strings, separated by \\0 chars */
659 extern const unsigned char core_language_builtin[];
661 /* The enum below contains all available strings */
666 print CFILE_CORE
<<MOO
667 /* This file was automatically generated using genlang, the strings come
670 #include "$headername"
672 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
673 const unsigned char core_language_builtin[] =
677 # Output the ID names for the enum in the header file
679 for $i (0 .. $idcount[$users{"core"}]-1) {
680 my $name=$idnum[$users{"core"}][$i]; # get the ID name
682 $name =~ tr/\"//d; # cut off the quotes
684 printf HFILE_CORE
(" %s, /* %d */\n", $name, $i);
687 # Output separation marker for last string ID and the upcoming voice IDs
689 print HFILE_CORE
<<MOO
690 LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
691 /* --- below this follows voice-only strings --- */
692 VOICEONLY_DELIMITER = 0x8000,
696 # Output the ID names for the enum in the header file
697 for $i (0x8001 .. ($voiceid[$users{"core"}]-1)) {
698 my $name=$idnum[$users{"core"}][$i]; # get the ID name
700 $name =~ tr/\"//d; # cut off the quotes
702 printf HFILE_CORE
(" %s, /* 0x%x */\n", $name, $i);
706 print HFILE_CORE
"\n};\n/* end of generated enum list */\n";
708 # Output the target phrases for the source file
709 for $i (0 .. $idcount[$users{"core"}]-1) {
710 my $name=$idnum[$users{"core"}][$i]; # get the ID
711 my $dest = $dest{$name}; # get the destination phrase
713 $dest =~ s
:\"$:\\0\":; # insert a \0 before the second quote
716 # this is just to be on the safe side
720 printf CFILE_CORE
(" %s\n", $dest);
723 # Output end of string chunk
724 print CFILE_CORE
<<MOO
726 /* end of generated string list */
732 } # end of the c/h file generation
733 elsif($binary || $binvoice) {
734 # Creation of a binary lang file was requested
736 # We must first scan the english file to get the correct order of the id
737 # numbers used there, as that is what sets the id order for all language
738 # files. The english file is scanned before the translated file was
742 open(OUTF
, ">$binary") or die "Error: Can't create $binary";
744 printf OUTF
("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
745 $langoptions); # magic lang file header
748 open(OUTV
, ">$binvoice") or die "Error: Can't create $binary";
750 printf OUTV
("%c%c%c%c", $VOICE_COOKIE, $LANGUAGE_VERSION, $target_id,
751 $langoptions); # magic lang file header
754 # output the number of strings for each user
755 my $foffset = $HEADER_SIZE + $SUBHEADER_SIZE * keys(%users);
758 for $n (0 .. $idcount[$_]-1) {
759 $size += length(trim
($dest{$idnum[$_][$n]})) + 1;
762 printf OUTF
("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff),
763 ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff));
766 printf OUTV
("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff),
767 ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff));
773 # loop over the target phrases
774 # This loops over the strings in the translated language file order
775 my @ids = ((0 .. ($idcount[$_]-1)));
776 push @ids, (0x8000 .. ($voiceid[$_]-1));
778 my $name=$idnum[$_][$n]; # get the ID
779 my $dest = $dest{$name}; # get the destination phrase
780 my $voice = $voice{$name}; # get the destination voice string
782 if($dest && $n < 0x8000 && $binary) {
783 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
785 # Now, make sure we get the number from the english sort order:
786 $idnum = $idmap[$_]{$name};
788 printf OUTF
("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
790 if($voice && $binvoice) {
791 $voice =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
792 # Now, make sure we get the number from the english sort order:
793 $idnum = $idmap[$_]{$name};
794 printf OUTV
("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $voice);
806 # voice output requested, display id: and voice: strings in a v1-like
811 # This loops over the strings in the translated language file order
812 my @ids = ((0 .. ($idcount[$users{"core"}]-1)));
813 push @ids, (0x8000 .. ($voiceid[$users{"core"}]-1));
820 my $name=$idnum[$users{"core"}][$i]; # get the ID
821 my $dest = $voice{$name}; # get the destination voice string
824 $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
826 # Now, make sure we get the number from the english sort order:
827 $idnum = $idmap[$users{"core"}]{$name};
832 #print "Input index $i output index $idnum\n";
835 # not used, mark it so
845 if(($o < 0) || !length($o)) {
847 print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
850 print "#$i\nid: VOICE_NOT_USED_$i\nvoice: \"\"\n";
855 my $name=$idnum[$users{"core"}][$o]; # get the ID
856 my $dest = $voice{$name}; # get the destination voice string
858 print "#$i ($o)\nid: $name\nvoice: $dest\n";
868 $num_str += $idcount[$_];
871 printf("%d ID strings scanned\n", $num_str);
875 printf "$_: %s\n", $head{$_};
879 if ($binary and !file_is_newer
("$binpath/english.list", $english)) {
880 open(ENGLIST
, ">$binpath/english.list") ||
881 die "Failed creating $binpath/english.list";
882 for my $user (keys %users) {
883 for my $id (keys %{$idmap[$user]}) {
884 print ENGLIST
"$user:$id:$idmap[$user]{$id}\n";