tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file named [outfile].
  32     The use of this option requires that you also use -e, -t and -i.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -i=<target id>
  52     The target id number, needed for -b.
  53
  54  -o
  55     Voice mode output. Outputs all id: and voice: lines for the given target!
  56
  57  -v
  58     Enables verbose (debug) output.
  59 MOO
  60 ;
  61     exit;
  62 }
  63
  64 # How update works:
  65 #
  66 # 1) scan the english file, keep the whole <phrase> for each phrase.
  67 # 2) read the translated file, for each end of phrase, compare:
  68 #  A) all source strings, if there's any change there should be a comment about
  69 #     it output
  70 #  B) the desc fields
  71 #
  72 # 3) output the phrase with the comments from above
  73 # 4) check which phrases that the translated version didn't have, and spit out
  74 #    the english version of those
  75 #
  76
  77 my $prefix = $p;
  78 my $binary = $b;
  79 my $update = $u;
  80
  81 my $english = $e;
  82 my $voiceout = $o;
  83
  84 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  85
  86 if($check > 1) {
  87     print "Please use only one of -p, -u, -o and -b\n";
  88     exit;
  89 }
  90 if(!$check) {
  91     print "Please use at least one of -p, -u, -o and -b\n";
  92     exit;
  93 }
  94
  95 if(($binary || $update || $voiceout) && !$english) {
  96     print "Please use -e too when you use -b, -o or -u\n";
  97     exit;
  98 }
  99
 100 my $target_id = $i;
 101 if($binary && !$target_id) {
 102     print "Please specify a target id number (with -i)!\n";
 103     exit;
 104 }
 105
 106 my $target = $t;
 107 if(!$target && !$update) {
 108     print "Please specify a target (with -t)!\n";
 109     exit;
 110 }
 111 my $verbose=$v;
 112
 113 my %id; # string to num hash
 114 my @idnum; # num to string array
 115
 116 my %source; # id string to source phrase hash
 117 my %dest; # id string to dest phrase hash
 118 my %voice; # id string to voice phrase hash
 119
 120 my $input = $ARGV[0];
 121
 122 my @m;
 123 my $m="blank";
 124
 125 sub match {
 126     my ($string, $pattern)=@_;
 127
 128     $pattern =~ s/\*/.?*/g;
 129     $pattern =~ s/\?/./g;
 130
 131     return ($string =~ /^$pattern\z/);
 132 }
 133
 134 sub blank {
 135     # nothing to do
 136 }
 137
 138 my %head;
 139 sub header {
 140     my ($full, $n, $v)=@_;
 141     $head{$n}=$v;
 142 }
 143
 144 my %phrase;
 145 sub phrase {
 146     my ($full, $n, $v)=@_;
 147     $phrase{$n}=$v;
 148 }
 149
 150 sub parsetarget {
 151     my ($debug, $strref, $full, $n, $v)=@_;
 152     my $string;
 153     my @all= split(" *, *", $n);
 154     my $test;
 155     for $test (@all) {
 156 #        print "TEST ($debug) $target for $test\n";
 157         for my $part (split(":", $target)) {
 158             if(match($part, $test)) {
 159                 $string = $v;
 160 #                print "MATCH: $test => $v\n";
 161                 $$strref = $string;
 162                 return $string;
 163             }
 164         }
 165     }
 166 }
 167
 168 my $src;
 169 sub source {
 170     parsetarget("src", \$src, @_);
 171 }
 172
 173 my $dest;
 174 sub dest {
 175     parsetarget("dest", \$dest, @_);
 176 }
 177
 178 my $voice;
 179 sub voice {
 180     parsetarget("voice", \$voice, @_);
 181 }
 182
 183 my %idmap;
 184 my %english;
 185 if($english) {
 186     # For the cases where the english file needs to be scanned/read, we do
 187     # it before we read the translated file. For -b it isn't necessary, but for
 188     # -u it is convenient.
 189
 190     my $idnum=0; # start with a true number
 191     my $vidnum=0x8000; # first voice id
 192     open(ENG, "<$english") || die "can't open $english";
 193     my @phrase;
 194     my $id;
 195     my $maybeid;
 196     my $withindest;
 197     while(<ENG>) {
 198
 199         # get rid of DOS newlines
 200         $_ =~ s/\r//g;
 201
 202         if($_ =~ /^ *\<phrase\>/) {
 203             # this is the start of a phrase
 204         }
 205         elsif($_ =~ /^ *\<\/phrase\>/) {
 206
 207             # if id is something, when we count and store this phrase
 208             if($id) {
 209                 # voice-only entries get a difference range
 210                 if($id =~ /^VOICE_/) {
 211                     # Assign an ID number to this entry
 212                     $idmap{$id}=$vidnum;
 213                     $vidnum++;
 214                 }
 215                 else {
 216                     # Assign an ID number to this entry
 217                     $idmap{$id}=$idnum;
 218                     $idnum++;
 219    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 220                 }
 221
 222                 # this is the end of a phrase, add it to the english hash
 223                 $english{$id}=join("", @phrase);
 224             }
 225             undef @phrase;
 226             $id="";
 227         }
 228         elsif($_ ne "\n") {
 229             # gather everything related to this phrase
 230             push @phrase, $_;
 231             if($_ =~ /^ *\<dest\>/i) {
 232                 $withindest=1;
 233             }
 234             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 235                 $withindest=0;
 236             }
 237             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 238                 my ($name, $val)=($1, $2);
 239                 $dest=""; # in case it is left untouched for when the
 240                 # model name isn't "our"
 241                 dest($_, $name, $val);
 242
 243    #             print STDERR "DEST: \"$dest\" for $name / $id\n";
 244
 245                 if($update || ($dest && ($dest !~ /^none\z/i))) {
 246                     # we unconditionally always use all IDs when the "update"
 247                     # feature is used
 248                     $id = $maybeid;
 249    #                 print STDERR "DEST: use this id $id\n";
 250                 }
 251             }
 252         }
 253
 254         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 255             $maybeid=$1;
 256         }
 257     }
 258     close(ENG);
 259 }
 260
 261 # a function that compares the english phrase with the translated one.
 262 # compare source strings and desc
 263
 264 # Then output the updated version!
 265 sub compare {
 266     my ($idstr, $engref, $locref)=@_;
 267     my ($edesc, $ldesc);
 268     my ($esource, $lsource);
 269     my $mode=0;
 270
 271     for my $l (@$engref) {
 272         if($l =~ /^ *#/) {
 273             # comment
 274             next;
 275         }
 276         if($l =~ /^ *desc: (.*)/) {
 277             $edesc=$1;
 278         }
 279         elsif($l =~ / *\<source\>/i) {
 280             $mode=1;
 281         }
 282         elsif($mode) {
 283             if($l =~ / *\<\/source\>/i) {
 284                 last;
 285             }
 286             $esource .= "$l\n";
 287         }
 288     }
 289
 290     my @show;
 291     my @source;
 292
 293     $mode = 0;
 294     for my $l (@$locref) {
 295         if($l =~ /^ *desc: (.*)/) {
 296             $ldesc=$1;
 297             if($edesc ne $ldesc) {
 298                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 299             }
 300             push @show, $l;
 301         }
 302         elsif($l =~ / *\<source\>/i) {
 303             $mode=1;
 304             push @show, $l;
 305         }
 306         elsif($mode) {
 307             if($l =~ / *\<\/source\>/i) {
 308                 $mode = 0;
 309                 print @show;
 310                 if($esource ne $lsource) {
 311                     print "### The <source> section differs from the english!\n",
 312                     "### the previously used one is commented below:\n";
 313                     for(split("\n", $lsource)) {
 314                         print "### $_\n";
 315                     }
 316                     print $esource;
 317                 }
 318                 else {
 319                     print $lsource;
 320                 }
 321                 undef @show; # start over
 322
 323                 push @show, $l;
 324             }
 325             else {
 326                 $lsource .= "$l";
 327             }
 328         }
 329         else {
 330             push @show, $l;
 331         }
 332     }
 333
 334
 335     print @show;
 336 }
 337
 338 my $idcount;        # counter for lang ID numbers
 339 my $voiceid=0x8000; # counter for voice-only ID numbers
 340
 341 #
 342 # Now start the scanning of the selected language string
 343 #
 344
 345 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 346 my @phrase;
 347 while(<LANG>) {
 348
 349     $line++;
 350
 351     # get rid of DOS newlines
 352     $_ =~ s/\r//g;
 353
 354     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 355         # comment or empty line
 356         next;
 357     }
 358
 359     my $ll = $_;
 360
 361     # print "M: $m\n";
 362
 363     push @phrase, $ll;
 364
 365     # this is an XML-lookalike tag
 366     if (/^(<|[^\"<]+<)([^>]*)>/) {
 367         my $part = $2;
 368         # print "P: $part\n";
 369
 370         if($part =~ /^\//) {
 371             # this was a closing tag
 372
 373             if($part eq "/phrase") {
 374                 # closing the phrase
 375
 376                 my $idstr = $phrase{'id'};
 377                 my $idnum;
 378
 379                 if($binary && !$english{$idstr}) {
 380                     # $idstr doesn't exist for english, skip it\n";
 381                 }
 382                 elsif($dest =~ /^none\z/i) {
 383                     # "none" as dest (without quotes) means that this entire
 384                     # phrase is to be ignored
 385                 }
 386                 elsif(!$update) {
 387                     # we don't do the fully detailed analysis when we "update"
 388                     # since we don't do it for a particular target etc
 389
 390                     # allow the keyword 'deprecated' to be used on dest and
 391                     # voice strings to mark that as deprecated. It will then
 392                     # be replaced with "".
 393
 394                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 395                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 396
 397                     # basic syntax error alerts, if there are no quotes we
 398                     # will assume an empty string was intended
 399                     if($dest !~ /^\"/) {
 400                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 401                         $dest='""';
 402                     }
 403                     if($src !~ /^\"/) {
 404                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 405                         $src='""';
 406                     }
 407                     if($voice !~ /^\"/) {
 408                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 409                         $voice='""';
 410                     }
 411
 412                     # Use the ID name to figure out which id number range we
 413                     # should use for this phrase. Voice-only strings are
 414                     # separated.
 415
 416                     if($idstr =~ /^VOICE/) {
 417                         $idnum = $voiceid++;
 418                     }
 419                     else {
 420                         $idnum = $idcount++;
 421                     }
 422
 423                     $id{$idstr} = $idnum;
 424                     $idnum[$idnum]=$idstr;
 425
 426                     $source{$idstr}=$src;
 427                     $dest{$idstr}=$dest;
 428                     $voice{$idstr}=$voice;
 429
 430                     if($verbose) {
 431                         print "id: $phrase{id} ($idnum)\n";
 432                         print "source: $src\n";
 433                         print "dest: $dest\n";
 434                         print "voice: $voice\n";
 435                     }
 436
 437                     undef $src;
 438                     undef $dest;
 439                     undef $voice;
 440                     undef %phrase;
 441                 }
 442
 443                 if($update) {
 444                     my $e = $english{$idstr};
 445
 446                     if($e) {
 447                         # compare original english with this!
 448                         my @eng = split("\n", $english{$idstr});
 449
 450                         compare($idstr, \@eng, \@phrase);
 451
 452                         $english{$idstr}=""; # clear it
 453                     }
 454                     else {
 455                         print "### $idstr: The phrase is not used. Skipped\n";
 456                     }
 457                 }
 458                 undef @phrase;
 459
 460             } # end of </phrase>
 461
 462             # starts with a slash, this _ends_ this section
 463             $m = pop @m; # get back old value, the previous level's tag
 464             next;
 465         } # end of tag close
 466
 467         # This is an opening (sub) tag
 468
 469         push @m, $m; # store old value
 470         $m = $part;
 471         next;
 472     }
 473
 474     if(/^ *([^:]+): *(.*)/) {
 475         my ($name, $val)=($1, $2);
 476         &$m($_, $name, $val);
 477     }
 478 }
 479 close(LANG);
 480
 481 if($update) {
 482     my $any=0;
 483     for(keys %english) {
 484         if($english{$_}) {
 485             print "###\n",
 486             "### This phrase below was not present in the translated file\n",
 487             "<phrase>\n";
 488             print $english{$_};
 489             print "</phrase>\n";
 490         }
 491     }
 492 }
 493
 494 if($prefix) {
 495     # We create a .c and .h file
 496
 497     open(HFILE, ">$prefix.h") ||
 498         die "couldn't create file $prefix.h\n";
 499     open(CFILE, ">$prefix.c") ||
 500         die "couldn't create file $prefix.c\n";
 501
 502     print HFILE <<MOO
 503 /* This file was automatically generated using genlang */
 504 /*
 505  * The str() macro/functions is how to access strings that might be
 506  * translated. Use it like str(MACRO) and expect a string to be
 507  * returned!
 508  */
 509 #define str(x) language_strings[x]
 510
 511 /* this is the array for holding the string pointers.
 512    It will be initialized at runtime. */
 513 extern unsigned char *language_strings[];
 514 /* this contains the concatenation of all strings, separated by \\0 chars */
 515 extern const unsigned char language_builtin[];
 516
 517 /* The enum below contains all available strings */
 518 enum \{
 519 MOO
 520     ;
 521
 522     print CFILE <<MOO
 523 /* This file was automaticly generated using genlang, the strings come
 524    from "$input" */
 525
 526 #include "$prefix.h"
 527
 528 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 529 const unsigned char language_builtin[] =
 530 MOO
 531 ;
 532
 533     # Output the ID names for the enum in the header file
 534     my $i;
 535     for $i (1 .. $idcount) {
 536         my $name=$idnum[$i - 1]; # get the ID name
 537
 538         $name =~ s/\"//g; # cut off the quotes
 539
 540         printf HFILE ("    %s,\n", $name);
 541     }
 542
 543 # Output separation marker for last string ID and the upcoming voice IDs
 544
 545     print HFILE <<MOO
 546     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 547     /* --- below this follows voice-only strings --- */
 548     VOICEONLY_DELIMITER = 0x8000,
 549 MOO
 550     ;
 551
 552 # Output the ID names for the enum in the header file
 553     for $i (0x8000 .. ($voiceid-1)) {
 554         my $name=$idnum[$i]; # get the ID name
 555
 556         $name =~ s/\"//g; # cut off the quotes
 557
 558         printf HFILE ("    %s,\n", $name);
 559     }
 560
 561     # Output end of enum
 562     print HFILE "\n};\n/* end of generated enum list */\n";
 563
 564     # Output the target phrases for the source file
 565     for $i (1 .. $idcount) {
 566         my $name=$idnum[$i - 1]; # get the ID
 567         my $dest = $dest{$name}; # get the destination phrase
 568
 569         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 570
 571         if(!$dest) {
 572             # this is just to be on the safe side
 573             $dest = '"\0"';
 574         }
 575
 576         printf CFILE ("    %s\n", $dest);
 577     }
 578
 579 # Output end of string chunk
 580     print CFILE <<MOO
 581 ;
 582 /* end of generated string list */
 583 MOO
 584 ;
 585
 586     close(HFILE);
 587     close(CFILE);
 588 } # end of the c/h file generation
 589 elsif($binary) {
 590     # Creation of a binary lang file was requested
 591
 592     # We must first scan the english file to get the correct order of the id
 593     # numbers used there, as that is what sets the id order for all language
 594     # files. The english file is scanned before the translated file was
 595     # scanned.
 596
 597     open(OUTF, ">$binary") or die "Can't create $binary";
 598     binmode OUTF;
 599     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 600
 601     # loop over the target phrases
 602     for $i (1 .. $idcount) {
 603         my $name=$idnum[$i - 1]; # get the ID
 604         my $dest = $dest{$name}; # get the destination phrase
 605
 606         if($dest) {
 607             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 608
 609             # Now, make sure we get the number from the english sort order:
 610             $idnum = $idmap{$name};
 611
 612             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 613         }
 614     }
 615 }
 616 elsif($voiceout) {
 617     # voice output requested, display id: and voice: strings in a v1-like
 618     # fashion
 619
 620     my @engl;
 621
 622     # This loops over the strings in the translated language file order
 623     my @ids = ((0 .. ($idcount-1)));
 624     push @ids, (0x8000 .. ($voiceid-1));
 625
 626     #for my $id (@ids) {
 627     #    print "$id\n";
 628     #}
 629
 630     for $i (@ids) {
 631         my $name=$idnum[$i]; # get the ID
 632         my $dest = $voice{$name}; # get the destination voice string
 633
 634         if($dest) {
 635             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 636
 637             # Now, make sure we get the number from the english sort order:
 638             $idnum = $idmap{$name};
 639
 640             $engl[$idnum] = $i;
 641
 642            # print "Input index $i output index $idnum\n";
 643
 644         }
 645     }
 646     for my $i (@ids) {
 647
 648         my $o = $engl[$i];
 649
 650         my $name=$idnum[$o]; # get the ID
 651         my $dest = $voice{$name}; # get the destination voice string
 652
 653         print "#$i\nid: $name\nvoice: $dest\n";
 654     }
 655
 656 }
 657
 658
 659 if($verbose) {
 660     printf("%d ID strings scanned\n", $idcount);
 661
 662     print "* head *\n";
 663     for(keys %head) {
 664         printf "$_: %s\n", $head{$_};
 665     }
 666 }
 667