tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file named [outfile].
  32     The use of this option requires that you also use -e, -t and -i.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -i=<target id>
  52     The target id number, needed for -b.
  53
  54  -o
  55     Voice mode output. Outputs all id: and voice: lines for the given target!
  56
  57  -v
  58     Enables verbose (debug) output.
  59 MOO
  60 ;
  61     exit;
  62 }
  63
  64 # How update works:
  65 #
  66 # 1) scan the english file, keep the whole <phrase> for each phrase.
  67 # 2) read the translated file, for each end of phrase, compare:
  68 #  A) all source strings, if there's any change there should be a comment about
  69 #     it output
  70 #  B) the desc fields
  71 #
  72 # 3) output the phrase with the comments from above
  73 # 4) check which phrases that the translated version didn't have, and spit out
  74 #    the english version of those
  75 #
  76
  77 my $prefix = $p;
  78 my $binary = $b;
  79 my $update = $u;
  80
  81 my $english = $e;
  82 my $voiceout = $o;
  83
  84 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  85
  86 if($check > 1) {
  87     print "Please use only one of -p, -u, -o and -b\n";
  88     exit;
  89 }
  90 if(!$check) {
  91     print "Please use at least one of -p, -u, -o and -b\n";
  92     exit;
  93 }
  94
  95 if(($binary || $update || $voiceout) && !$english) {
  96     print "Please use -e too when you use -b, -o or -u\n";
  97     exit;
  98 }
  99
 100 my $target_id = $i;
 101 if($binary && !$target_id) {
 102     print "Please specify a target id number (with -i)!\n";
 103     exit;
 104 }
 105
 106 my $target = $t;
 107 if(!$target && !$update) {
 108     print "Please specify a target (with -t)!\n";
 109     exit;
 110 }
 111 my $verbose=$v;
 112
 113 my %id; # string to num hash
 114 my @idnum; # num to string array
 115
 116 my %source; # id string to source phrase hash
 117 my %dest; # id string to dest phrase hash
 118 my %voice; # id string to voice phrase hash
 119
 120 my $input = $ARGV[0];
 121
 122 my @m;
 123 my $m="blank";
 124
 125 sub match {
 126     my ($string, $pattern)=@_;
 127
 128     $pattern =~ s/\*/.?*/g;
 129     $pattern =~ s/\?/./g;
 130
 131     return ($string =~ /^$pattern\z/);
 132 }
 133
 134 sub blank {
 135     # nothing to do
 136 }
 137
 138 my %head;
 139 sub header {
 140     my ($full, $n, $v)=@_;
 141     $head{$n}=$v;
 142 }
 143
 144 my %phrase;
 145 sub phrase {
 146     my ($full, $n, $v)=@_;
 147     $phrase{$n}=$v;
 148 }
 149
 150 sub parsetarget {
 151     my ($debug, $strref, $full, $n, $v)=@_;
 152     my $string;
 153     my @all= split(" *, *", $n);
 154     my $test;
 155     for $test (@all) {
 156 #        print "TEST ($debug) $target for $test\n";
 157         for my $part (split(":", $target)) {
 158             if(match($part, $test)) {
 159                 $string = $v;
 160 #                print "MATCH: $test => $v\n";
 161                 $$strref = $string;
 162                 return $string;
 163             }
 164         }
 165     }
 166 }
 167
 168 my $src;
 169 sub source {
 170     parsetarget("src", \$src, @_);
 171 }
 172
 173 my $dest;
 174 sub dest {
 175     parsetarget("dest", \$dest, @_);
 176 }
 177
 178 my $voice;
 179 sub voice {
 180     parsetarget("voice", \$voice, @_);
 181 }
 182
 183 my %idmap;
 184 my %english;
 185 if($english) {
 186     # For the cases where the english file needs to be scanned/read, we do
 187     # it before we read the translated file. For -b it isn't necessary, but for
 188     # -u it is convenient.
 189
 190     my $idnum=0; # start with a true number
 191     my $vidnum=0x8000; # first voice id
 192     open(ENG, "<$english") || die "can't open $english";
 193     my @phrase;
 194     my $id;
 195     my $maybeid;
 196     my $withindest;
 197     while(<ENG>) {
 198
 199         # get rid of DOS newlines
 200         $_ =~ s/\r//g;
 201
 202         if($_ =~ /^ *\<phrase\>/) {
 203             # this is the start of a phrase
 204         }
 205         elsif($_ =~ /^ *\<\/phrase\>/) {
 206
 207             # if id is something, when we count and store this phrase
 208             if($id) {
 209                 # voice-only entries get a difference range
 210                 if($id =~ /^VOICE_/) {
 211                     # Assign an ID number to this entry
 212                     $idmap{$id}=$vidnum;
 213                     $vidnum++;
 214                 }
 215                 else {
 216                     # Assign an ID number to this entry
 217                     $idmap{$id}=$idnum;
 218                     $idnum++;
 219    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 220                 }
 221
 222                 # this is the end of a phrase, add it to the english hash
 223                 $english{$id}=join("", @phrase);
 224             }
 225             undef @phrase;
 226             $id="";
 227         }
 228         elsif($_ ne "\n") {
 229             # gather everything related to this phrase
 230             push @phrase, $_;
 231             if($_ =~ /^ *\<dest\>/i) {
 232                 $withindest=1;
 233                 $deststr="";
 234             }
 235             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 236                 $withindest=0;
 237
 238                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 239                     # we unconditionally always use all IDs when the "update"
 240                     # feature is used
 241                     $id = $maybeid;
 242     #                print "DEST: use this id $id\n";
 243                 }
 244                 else {
 245     #                print "skip $maybeid for $name\n";
 246                 }
 247             }
 248             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 249                 my ($name, $val)=($1, $2);
 250                 $dest=""; # in case it is left untouched for when the
 251                 # model name isn't "our"
 252                 dest($_, $name, $val);
 253
 254                 if($dest) {
 255                     # Store the current dest string. If this target matches
 256                     # multiple strings, it will get updated several times.
 257                     $deststr = $dest;
 258                 }
 259             }
 260         }
 261
 262         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 263             $maybeid=$1;
 264         }
 265     }
 266     close(ENG);
 267 }
 268
 269 # a function that compares the english phrase with the translated one.
 270 # compare source strings and desc
 271
 272 # Then output the updated version!
 273 sub compare {
 274     my ($idstr, $engref, $locref)=@_;
 275     my ($edesc, $ldesc);
 276     my ($esource, $lsource);
 277     my $mode=0;
 278
 279     for my $l (@$engref) {
 280         if($l =~ /^ *#/) {
 281             # comment
 282             next;
 283         }
 284         if($l =~ /^ *desc: (.*)/) {
 285             $edesc=$1;
 286         }
 287         elsif($l =~ / *\<source\>/i) {
 288             $mode=1;
 289         }
 290         elsif($mode) {
 291             if($l =~ / *\<\/source\>/i) {
 292                 last;
 293             }
 294             $esource .= "$l\n";
 295         }
 296     }
 297
 298     my @show;
 299     my @source;
 300
 301     $mode = 0;
 302     for my $l (@$locref) {
 303         if($l =~ /^ *desc: (.*)/) {
 304             $ldesc=$1;
 305             if($edesc ne $ldesc) {
 306                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 307             }
 308             push @show, $l;
 309         }
 310         elsif($l =~ / *\<source\>/i) {
 311             $mode=1;
 312             push @show, $l;
 313         }
 314         elsif($mode) {
 315             if($l =~ / *\<\/source\>/i) {
 316                 $mode = 0;
 317                 print @show;
 318                 if($esource ne $lsource) {
 319                     print "### The <source> section differs from the english!\n",
 320                     "### the previously used one is commented below:\n";
 321                     for(split("\n", $lsource)) {
 322                         print "### $_\n";
 323                     }
 324                     print $esource;
 325                 }
 326                 else {
 327                     print $lsource;
 328                 }
 329                 undef @show; # start over
 330
 331                 push @show, $l;
 332             }
 333             else {
 334                 $lsource .= "$l";
 335             }
 336         }
 337         else {
 338             push @show, $l;
 339         }
 340     }
 341
 342
 343     print @show;
 344 }
 345
 346 my $idcount;        # counter for lang ID numbers
 347 my $voiceid=0x8000; # counter for voice-only ID numbers
 348
 349 #
 350 # Now start the scanning of the selected language string
 351 #
 352
 353 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 354 my @phrase;
 355 while(<LANG>) {
 356
 357     $line++;
 358
 359     # get rid of DOS newlines
 360     $_ =~ s/\r//g;
 361
 362     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 363         # comment or empty line
 364         next;
 365     }
 366
 367     my $ll = $_;
 368
 369     # print "M: $m\n";
 370
 371     push @phrase, $ll;
 372
 373     # this is an XML-lookalike tag
 374     if (/^(<|[^\"<]+<)([^>]*)>/) {
 375         my $part = $2;
 376         # print "P: $part\n";
 377
 378         if($part =~ /^\//) {
 379             # this was a closing tag
 380
 381             if($part eq "/phrase") {
 382                 # closing the phrase
 383
 384                 my $idstr = $phrase{'id'};
 385                 my $idnum;
 386
 387                 if($binary && !$english{$idstr}) {
 388                     # $idstr doesn't exist for english, skip it\n";
 389                 }
 390                 elsif($dest =~ /^none\z/i) {
 391                     # "none" as dest (without quotes) means that this entire
 392                     # phrase is to be ignored
 393                 }
 394                 elsif(!$update) {
 395                     # we don't do the fully detailed analysis when we "update"
 396                     # since we don't do it for a particular target etc
 397
 398                     # allow the keyword 'deprecated' to be used on dest and
 399                     # voice strings to mark that as deprecated. It will then
 400                     # be replaced with "".
 401
 402                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 403                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 404
 405                     # basic syntax error alerts, if there are no quotes we
 406                     # will assume an empty string was intended
 407                     if($dest !~ /^\"/) {
 408                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 409                         $dest='""';
 410                     }
 411                     if($src !~ /^\"/) {
 412                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 413                         $src='""';
 414                     }
 415                     if($voice !~ /^\"/) {
 416                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 417                         $voice='""';
 418                     }
 419
 420                     # Use the ID name to figure out which id number range we
 421                     # should use for this phrase. Voice-only strings are
 422                     # separated.
 423
 424                     if($idstr =~ /^VOICE/) {
 425                         $idnum = $voiceid++;
 426                     }
 427                     else {
 428                         $idnum = $idcount++;
 429                     }
 430
 431                     $id{$idstr} = $idnum;
 432                     $idnum[$idnum]=$idstr;
 433
 434                     $source{$idstr}=$src;
 435                     $dest{$idstr}=$dest;
 436                     $voice{$idstr}=$voice;
 437
 438                     if($verbose) {
 439                         print "id: $phrase{id} ($idnum)\n";
 440                         print "source: $src\n";
 441                         print "dest: $dest\n";
 442                         print "voice: $voice\n";
 443                     }
 444
 445                     undef $src;
 446                     undef $dest;
 447                     undef $voice;
 448                     undef %phrase;
 449                 }
 450
 451                 if($update) {
 452                     my $e = $english{$idstr};
 453
 454                     if($e) {
 455                         # compare original english with this!
 456                         my @eng = split("\n", $english{$idstr});
 457
 458                         compare($idstr, \@eng, \@phrase);
 459
 460                         $english{$idstr}=""; # clear it
 461                     }
 462                     else {
 463                         print "### $idstr: The phrase is not used. Skipped\n";
 464                     }
 465                 }
 466                 undef @phrase;
 467
 468             } # end of </phrase>
 469
 470             # starts with a slash, this _ends_ this section
 471             $m = pop @m; # get back old value, the previous level's tag
 472             next;
 473         } # end of tag close
 474
 475         # This is an opening (sub) tag
 476
 477         push @m, $m; # store old value
 478         $m = $part;
 479         next;
 480     }
 481
 482     if(/^ *([^:]+): *(.*)/) {
 483         my ($name, $val)=($1, $2);
 484         &$m($_, $name, $val);
 485     }
 486 }
 487 close(LANG);
 488
 489 if($update) {
 490     my $any=0;
 491     for(keys %english) {
 492         if($english{$_}) {
 493             print "###\n",
 494             "### This phrase below was not present in the translated file\n",
 495             "<phrase>\n";
 496             print $english{$_};
 497             print "</phrase>\n";
 498         }
 499     }
 500 }
 501
 502 if($prefix) {
 503     # We create a .c and .h file
 504
 505     open(HFILE, ">$prefix.h") ||
 506         die "couldn't create file $prefix.h\n";
 507     open(CFILE, ">$prefix.c") ||
 508         die "couldn't create file $prefix.c\n";
 509
 510     print HFILE <<MOO
 511 /* This file was automatically generated using genlang */
 512 /*
 513  * The str() macro/functions is how to access strings that might be
 514  * translated. Use it like str(MACRO) and expect a string to be
 515  * returned!
 516  */
 517 #define str(x) language_strings[x]
 518
 519 /* this is the array for holding the string pointers.
 520    It will be initialized at runtime. */
 521 extern unsigned char *language_strings[];
 522 /* this contains the concatenation of all strings, separated by \\0 chars */
 523 extern const unsigned char language_builtin[];
 524
 525 /* The enum below contains all available strings */
 526 enum \{
 527 MOO
 528     ;
 529
 530     print CFILE <<MOO
 531 /* This file was automaticly generated using genlang, the strings come
 532    from "$input" */
 533
 534 #include "$prefix.h"
 535
 536 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 537 const unsigned char language_builtin[] =
 538 MOO
 539 ;
 540
 541     # Output the ID names for the enum in the header file
 542     my $i;
 543     for $i (1 .. $idcount) {
 544         my $name=$idnum[$i - 1]; # get the ID name
 545
 546         $name =~ s/\"//g; # cut off the quotes
 547
 548         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 549     }
 550
 551 # Output separation marker for last string ID and the upcoming voice IDs
 552
 553     print HFILE <<MOO
 554     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 555     /* --- below this follows voice-only strings --- */
 556     VOICEONLY_DELIMITER = 0x8000,
 557 MOO
 558     ;
 559
 560 # Output the ID names for the enum in the header file
 561     for $i (0x8000 .. ($voiceid-1)) {
 562         my $name=$idnum[$i]; # get the ID name
 563
 564         $name =~ s/\"//g; # cut off the quotes
 565
 566         printf HFILE ("    %s,\n", $name);
 567     }
 568
 569     # Output end of enum
 570     print HFILE "\n};\n/* end of generated enum list */\n";
 571
 572     # Output the target phrases for the source file
 573     for $i (1 .. $idcount) {
 574         my $name=$idnum[$i - 1]; # get the ID
 575         my $dest = $dest{$name}; # get the destination phrase
 576
 577         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 578
 579         if(!$dest) {
 580             # this is just to be on the safe side
 581             $dest = '"\0"';
 582         }
 583
 584         printf CFILE ("    %s\n", $dest);
 585     }
 586
 587 # Output end of string chunk
 588     print CFILE <<MOO
 589 ;
 590 /* end of generated string list */
 591 MOO
 592 ;
 593
 594     close(HFILE);
 595     close(CFILE);
 596 } # end of the c/h file generation
 597 elsif($binary) {
 598     # Creation of a binary lang file was requested
 599
 600     # We must first scan the english file to get the correct order of the id
 601     # numbers used there, as that is what sets the id order for all language
 602     # files. The english file is scanned before the translated file was
 603     # scanned.
 604
 605     open(OUTF, ">$binary") or die "Can't create $binary";
 606     binmode OUTF;
 607     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 608
 609     # loop over the target phrases
 610     for $i (1 .. $idcount) {
 611         my $name=$idnum[$i - 1]; # get the ID
 612         my $dest = $dest{$name}; # get the destination phrase
 613
 614         if($dest) {
 615             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 616
 617             # Now, make sure we get the number from the english sort order:
 618             $idnum = $idmap{$name};
 619
 620             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 621         }
 622     }
 623 }
 624 elsif($voiceout) {
 625     # voice output requested, display id: and voice: strings in a v1-like
 626     # fashion
 627
 628     my @engl;
 629
 630     # This loops over the strings in the translated language file order
 631     my @ids = ((0 .. ($idcount-1)));
 632     push @ids, (0x8000 .. ($voiceid-1));
 633
 634     #for my $id (@ids) {
 635     #    print "$id\n";
 636     #}
 637
 638     for $i (@ids) {
 639         my $name=$idnum[$i]; # get the ID
 640         my $dest = $voice{$name}; # get the destination voice string
 641
 642         if($dest) {
 643             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 644
 645             # Now, make sure we get the number from the english sort order:
 646             $idnum = $idmap{$name};
 647
 648             if(length($idnum)) {
 649                 $engl[$idnum] = $i;
 650
 651                 #print "Input index $i output index $idnum\n";
 652             }
 653             else {
 654                 # not used, mark it so
 655                 $engl[$i] = -1
 656             }
 657
 658         }
 659     }
 660     for my $i (@ids) {
 661
 662         my $o = $engl[$i];
 663
 664         if(($o < 0) || !length($o)) {
 665             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 666             next;
 667         }
 668
 669         my $name=$idnum[$o]; # get the ID
 670         my $dest = $voice{$name}; # get the destination voice string
 671
 672         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 673     }
 674
 675 }
 676
 677
 678 if($verbose) {
 679     printf("%d ID strings scanned\n", $idcount);
 680
 681     print "* head *\n";
 682     for(keys %head) {
 683         printf "$_: %s\n", $head{$_};
 684     }
 685 }
 686