tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file named [outfile].
  32     The use of this option requires that you also use -e, -t and -i.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -i=<target id>
  52     The target id number, needed for -b.
  53
  54  -o
  55     Voice mode output. Outputs all id: and voice: lines for the given target!
  56
  57  -v
  58     Enables verbose (debug) output.
  59 MOO
  60 ;
  61     exit;
  62 }
  63
  64 # How update works:
  65 #
  66 # 1) scan the english file, keep the whole <phrase> for each phrase.
  67 # 2) read the translated file, for each end of phrase, compare:
  68 #  A) all source strings, if there's any change there should be a comment about
  69 #     it output
  70 #  B) the desc fields
  71 #
  72 # 3) output the phrase with the comments from above
  73 # 4) check which phrases that the translated version didn't have, and spit out
  74 #    the english version of those
  75 #
  76
  77 my $prefix = $p;
  78 my $binary = $b;
  79 my $update = $u;
  80
  81 my $english = $e;
  82 my $voiceout = $o;
  83
  84 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  85
  86 if($check > 1) {
  87     print "Please use only one of -p, -u, -o and -b\n";
  88     exit;
  89 }
  90 if(!$check) {
  91     print "Please use at least one of -p, -u, -o and -b\n";
  92     exit;
  93 }
  94
  95 if(($binary || $update || $voiceout) && !$english) {
  96     print "Please use -e too when you use -b, -o or -u\n";
  97     exit;
  98 }
  99
 100 my $target_id = $i;
 101 if($binary && !$target_id) {
 102     print "Please specify a target id number (with -i)!\n";
 103     exit;
 104 }
 105
 106 my $target = $t;
 107 if(!$target && !$update) {
 108     print "Please specify a target (with -t)!\n";
 109     exit;
 110 }
 111 my $verbose=$v;
 112
 113 my %id; # string to num hash
 114 my @idnum; # num to string array
 115
 116 my %source; # id string to source phrase hash
 117 my %dest; # id string to dest phrase hash
 118 my %voice; # id string to voice phrase hash
 119
 120 my $input = $ARGV[0];
 121
 122 my @m;
 123 my $m="blank";
 124
 125 sub trim {
 126     my ($string) = @_;
 127     $string =~ s/^\s+//;
 128     $string =~ s/\s+$//;
 129     return $string;
 130 }
 131
 132 sub match {
 133     my ($string, $pattern)=@_;
 134
 135     $pattern =~ s/\*/.?*/g;
 136     $pattern =~ s/\?/./g;
 137
 138     return ($string =~ /^$pattern\z/);
 139 }
 140
 141 sub blank {
 142     # nothing to do
 143 }
 144
 145 my %head;
 146 sub header {
 147     my ($full, $n, $v)=@_;
 148     $head{$n}=$v;
 149 }
 150
 151 my %phrase;
 152 sub phrase {
 153     my ($full, $n, $v)=@_;
 154     $phrase{$n}=$v;
 155 }
 156
 157 sub parsetarget {
 158     my ($debug, $strref, $full, $n, $v)=@_;
 159     my $string;
 160     my @all= split(" *, *", $n);
 161     my $test;
 162     for $test (@all) {
 163 #        print "TEST ($debug) $target for $test\n";
 164         for my $part (split(":", $target)) {
 165             if(match($part, $test)) {
 166                 $string = $v;
 167 #                print "MATCH: $test => $v\n";
 168                 $$strref = $string;
 169                 return $string;
 170             }
 171         }
 172     }
 173 }
 174
 175 my $src;
 176 sub source {
 177     parsetarget("src", \$src, @_);
 178 }
 179
 180 my $dest;
 181 sub dest {
 182     parsetarget("dest", \$dest, @_);
 183 }
 184
 185 my $voice;
 186 sub voice {
 187     parsetarget("voice", \$voice, @_);
 188 }
 189
 190 my %idmap;
 191 my %english;
 192 if($english) {
 193     # For the cases where the english file needs to be scanned/read, we do
 194     # it before we read the translated file. For -b it isn't necessary, but for
 195     # -u it is convenient.
 196
 197     my $idnum=0; # start with a true number
 198     my $vidnum=0x8000; # first voice id
 199     open(ENG, "<$english") || die "Error: can't open $english";
 200     my @phrase;
 201     my $id;
 202     my $maybeid;
 203     my $withindest;
 204     while(<ENG>) {
 205
 206         # get rid of DOS newlines
 207         $_ =~ s/\r//g;
 208
 209         if($_ =~ /^ *\<phrase\>/) {
 210             # this is the start of a phrase
 211         }
 212         elsif($_ =~ /^ *\<\/phrase\>/) {
 213
 214             # if id is something, when we count and store this phrase
 215             if($id) {
 216                 # voice-only entries get a difference range
 217                 if($id =~ /^VOICE_/) {
 218                     # Assign an ID number to this entry
 219                     $idmap{$id}=$vidnum;
 220                     $vidnum++;
 221                 }
 222                 else {
 223                     # Assign an ID number to this entry
 224                     $idmap{$id}=$idnum;
 225                     $idnum++;
 226    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 227                 }
 228
 229                 # this is the end of a phrase, add it to the english hash
 230                 $english{$id}=join("", @phrase);
 231             }
 232             undef @phrase;
 233             $id="";
 234         }
 235         elsif($_ ne "\n") {
 236             # gather everything related to this phrase
 237             push @phrase, $_;
 238             if($_ =~ /^ *\<dest\>/i) {
 239                 $withindest=1;
 240                 $deststr="";
 241             }
 242             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 243                 $withindest=0;
 244
 245                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 246                     # we unconditionally always use all IDs when the "update"
 247                     # feature is used
 248                     $id = $maybeid;
 249     #                print "DEST: use this id $id\n";
 250                 }
 251                 else {
 252     #                print "skip $maybeid for $name\n";
 253                 }
 254             }
 255             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 256                 my ($name, $val)=($1, $2);
 257                 $dest=""; # in case it is left untouched for when the
 258                 # model name isn't "our"
 259                 dest($_, $name, $val);
 260
 261                 if($dest) {
 262                     # Store the current dest string. If this target matches
 263                     # multiple strings, it will get updated several times.
 264                     $deststr = $dest;
 265                 }
 266             }
 267         }
 268
 269         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 270             $maybeid=$1;
 271         }
 272     }
 273     close(ENG);
 274 }
 275
 276 # a function that compares the english phrase with the translated one.
 277 # compare source strings and desc
 278
 279 # Then output the updated version!
 280 sub compare {
 281     my ($idstr, $engref, $locref)=@_;
 282     my ($edesc, $ldesc);
 283     my ($esource, $lsource);
 284     my $mode=0;
 285
 286     for my $l (@$engref) {
 287         if($l =~ /^ *#/) {
 288             # comment
 289             next;
 290         }
 291         if($l =~ /^ *desc: (.*)/) {
 292             $edesc=$1;
 293         }
 294         elsif($l =~ / *\<source\>/i) {
 295             $mode=1;
 296         }
 297         elsif($mode) {
 298             if($l =~ / *\<\/source\>/i) {
 299                 last;
 300             }
 301             $esource .= "$l\n";
 302         }
 303     }
 304
 305     my @show;
 306     my @source;
 307
 308     $mode = 0;
 309     for my $l (@$locref) {
 310         if($l =~ /^ *desc: (.*)/) {
 311             $ldesc=$1;
 312             if(trim($edesc) ne trim($ldesc)) {
 313                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 314             }
 315             push @show, $l;
 316         }
 317         elsif($l =~ / *\<source\>/i) {
 318             $mode=1;
 319             push @show, $l;
 320         }
 321         elsif($mode) {
 322             if($l =~ / *\<\/source\>/i) {
 323                 $mode = 0;
 324                 print @show;
 325                 if(trim($esource) ne trim($lsource)) {
 326                     print "### The <source> section differs from the english!\n",
 327                     "### the previously used one is commented below:\n";
 328                     for(split("\n", $lsource)) {
 329                         print "### $_\n";
 330                     }
 331                     print $esource;
 332                 }
 333                 else {
 334                     print $lsource;
 335                 }
 336                 undef @show; # start over
 337
 338                 push @show, $l;
 339             }
 340             else {
 341                 $lsource .= "$l";
 342             }
 343         }
 344         else {
 345             push @show, $l;
 346         }
 347     }
 348
 349
 350     print @show;
 351 }
 352
 353 my $idcount;        # counter for lang ID numbers
 354 my $voiceid=0x8000; # counter for voice-only ID numbers
 355
 356 #
 357 # Now start the scanning of the selected language string
 358 #
 359
 360 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
 361 my @phrase;
 362 while(<LANG>) {
 363
 364     $line++;
 365
 366     # get rid of DOS newlines
 367     $_ =~ s/\r//g;
 368
 369     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 370         # comment or empty line
 371         next;
 372     }
 373
 374     my $ll = $_;
 375
 376     # print "M: $m\n";
 377
 378     push @phrase, $ll;
 379
 380     # this is an XML-lookalike tag
 381     if (/^(<|[^\"<]+<)([^>]*)>/) {
 382         my $part = $2;
 383         # print "P: $part\n";
 384
 385         if($part =~ /^\//) {
 386             # this was a closing tag
 387
 388             if($part eq "/phrase") {
 389                 # closing the phrase
 390
 391                 my $idstr = $phrase{'id'};
 392                 my $idnum;
 393
 394                 if($binary && !$english{$idstr}) {
 395                     # $idstr doesn't exist for english, skip it\n";
 396                 }
 397                 elsif($dest =~ /^none\z/i) {
 398                     # "none" as dest (without quotes) means that this entire
 399                     # phrase is to be ignored
 400                 }
 401                 elsif(!$update) {
 402                     # we don't do the fully detailed analysis when we "update"
 403                     # since we don't do it for a particular target etc
 404
 405                     # allow the keyword 'deprecated' to be used on dest and
 406                     # voice strings to mark that as deprecated. It will then
 407                     # be replaced with "".
 408
 409                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 410                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 411
 412                     # basic syntax error alerts, if there are no quotes we
 413                     # will assume an empty string was intended
 414                     if($dest !~ /^\"/) {
 415                         print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
 416                         $dest='""';
 417                     }
 418                     if($src !~ /^\"/) {
 419                         print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
 420                         $src='""';
 421                     }
 422                     if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
 423                         print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
 424                         $voice='""';
 425                     }
 426
 427                     # Use the ID name to figure out which id number range we
 428                     # should use for this phrase. Voice-only strings are
 429                     # separated.
 430
 431                     if($idstr =~ /^VOICE/) {
 432                         $idnum = $voiceid++;
 433                     }
 434                     else {
 435                         $idnum = $idcount++;
 436                     }
 437
 438                     $id{$idstr} = $idnum;
 439                     $idnum[$idnum]=$idstr;
 440
 441                     $source{$idstr}=$src;
 442                     $dest{$idstr}=$dest;
 443                     $voice{$idstr}=$voice;
 444
 445                     if($verbose) {
 446                         print "id: $phrase{id} ($idnum)\n";
 447                         print "source: $src\n";
 448                         print "dest: $dest\n";
 449                         print "voice: $voice\n";
 450                     }
 451
 452                     undef $src;
 453                     undef $dest;
 454                     undef $voice;
 455                     undef %phrase;
 456                 }
 457
 458                 if($update) {
 459                     my $e = $english{$idstr};
 460
 461                     if($e) {
 462                         # compare original english with this!
 463                         my @eng = split("\n", $english{$idstr});
 464
 465                         compare($idstr, \@eng, \@phrase);
 466
 467                         $english{$idstr}=""; # clear it
 468                     }
 469                     else {
 470                         print "### $idstr: The phrase is not used. Skipped\n";
 471                     }
 472                 }
 473                 undef @phrase;
 474
 475             } # end of </phrase>
 476
 477             # starts with a slash, this _ends_ this section
 478             $m = pop @m; # get back old value, the previous level's tag
 479             next;
 480         } # end of tag close
 481
 482         # This is an opening (sub) tag
 483
 484         push @m, $m; # store old value
 485         $m = $part;
 486         next;
 487     }
 488
 489     if(/^ *([^:]+): *(.*)/) {
 490         my ($name, $val)=($1, $2);
 491         &$m($_, $name, $val);
 492     }
 493 }
 494 close(LANG);
 495
 496 if($update) {
 497     my $any=0;
 498     for(keys %english) {
 499         if($english{$_}) {
 500             print "###\n",
 501             "### This phrase below was not present in the translated file\n",
 502             "<phrase>\n";
 503             print $english{$_};
 504             print "</phrase>\n";
 505         }
 506     }
 507 }
 508
 509 if($prefix) {
 510     # We create a .c and .h file
 511
 512     open(HFILE, ">$prefix.h") ||
 513         die "Error: couldn't create file $prefix.h\n";
 514     open(CFILE, ">$prefix.c") ||
 515         die "Error: couldn't create file $prefix.c\n";
 516
 517     print HFILE <<MOO
 518 /* This file was automatically generated using genlang */
 519 /*
 520  * The str() macro/functions is how to access strings that might be
 521  * translated. Use it like str(MACRO) and expect a string to be
 522  * returned!
 523  */
 524 #define str(x) language_strings[x]
 525
 526 /* this is the array for holding the string pointers.
 527    It will be initialized at runtime. */
 528 extern unsigned char *language_strings[];
 529 /* this contains the concatenation of all strings, separated by \\0 chars */
 530 extern const unsigned char language_builtin[];
 531
 532 /* The enum below contains all available strings */
 533 enum \{
 534 MOO
 535     ;
 536
 537     print CFILE <<MOO
 538 /* This file was automaticly generated using genlang, the strings come
 539    from "$input" */
 540
 541 #include "$prefix.h"
 542
 543 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 544 const unsigned char language_builtin[] =
 545 MOO
 546 ;
 547
 548     # Output the ID names for the enum in the header file
 549     my $i;
 550     for $i (1 .. $idcount) {
 551         my $name=$idnum[$i - 1]; # get the ID name
 552
 553         $name =~ s/\"//g; # cut off the quotes
 554
 555         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 556     }
 557
 558 # Output separation marker for last string ID and the upcoming voice IDs
 559
 560     print HFILE <<MOO
 561     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 562     /* --- below this follows voice-only strings --- */
 563     VOICEONLY_DELIMITER = 0x8000,
 564 MOO
 565     ;
 566
 567 # Output the ID names for the enum in the header file
 568     for $i (0x8000 .. ($voiceid-1)) {
 569         my $name=$idnum[$i]; # get the ID name
 570
 571         $name =~ s/\"//g; # cut off the quotes
 572
 573         printf HFILE ("    %s,\n", $name);
 574     }
 575
 576     # Output end of enum
 577     print HFILE "\n};\n/* end of generated enum list */\n";
 578
 579     # Output the target phrases for the source file
 580     for $i (1 .. $idcount) {
 581         my $name=$idnum[$i - 1]; # get the ID
 582         my $dest = $dest{$name}; # get the destination phrase
 583
 584         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 585
 586         if(!$dest) {
 587             # this is just to be on the safe side
 588             $dest = '"\0"';
 589         }
 590
 591         printf CFILE ("    %s\n", $dest);
 592     }
 593
 594 # Output end of string chunk
 595     print CFILE <<MOO
 596 ;
 597 /* end of generated string list */
 598 MOO
 599 ;
 600
 601     close(HFILE);
 602     close(CFILE);
 603 } # end of the c/h file generation
 604 elsif($binary) {
 605     # Creation of a binary lang file was requested
 606
 607     # We must first scan the english file to get the correct order of the id
 608     # numbers used there, as that is what sets the id order for all language
 609     # files. The english file is scanned before the translated file was
 610     # scanned.
 611
 612     open(OUTF, ">$binary") or die "Error: Can't create $binary";
 613     binmode OUTF;
 614     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 615
 616     # loop over the target phrases
 617     for $i (1 .. $idcount) {
 618         my $name=$idnum[$i - 1]; # get the ID
 619         my $dest = $dest{$name}; # get the destination phrase
 620
 621         if($dest) {
 622             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 623
 624             # Now, make sure we get the number from the english sort order:
 625             $idnum = $idmap{$name};
 626
 627             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 628         }
 629     }
 630 }
 631 elsif($voiceout) {
 632     # voice output requested, display id: and voice: strings in a v1-like
 633     # fashion
 634
 635     my @engl;
 636
 637     # This loops over the strings in the translated language file order
 638     my @ids = ((0 .. ($idcount-1)));
 639     push @ids, (0x8000 .. ($voiceid-1));
 640
 641     #for my $id (@ids) {
 642     #    print "$id\n";
 643     #}
 644
 645     for $i (@ids) {
 646         my $name=$idnum[$i]; # get the ID
 647         my $dest = $voice{$name}; # get the destination voice string
 648
 649         if($dest) {
 650             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 651
 652             # Now, make sure we get the number from the english sort order:
 653             $idnum = $idmap{$name};
 654
 655             if(length($idnum)) {
 656                 $engl[$idnum] = $i;
 657
 658                 #print "Input index $i output index $idnum\n";
 659             }
 660             else {
 661                 # not used, mark it so
 662                 $engl[$i] = -1
 663             }
 664
 665         }
 666     }
 667     for my $i (@ids) {
 668
 669         my $o = $engl[$i];
 670
 671         if(($o < 0) || !length($o)) {
 672             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 673             next;
 674         }
 675
 676         my $name=$idnum[$o]; # get the ID
 677         my $dest = $voice{$name}; # get the destination voice string
 678
 679         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 680     }
 681
 682 }
 683
 684
 685 if($verbose) {
 686     printf("%d ID strings scanned\n", $idcount);
 687
 688     print "* head *\n";
 689     for(keys %head) {
 690         printf "$_: %s\n", $head{$_};
 691     }
 692 }
 693