tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 3; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -o
  52     Voice mode output. Outputs all id: and voice: lines for the given target!
  53
  54  -v
  55     Enables verbose (debug) output.
  56 MOO
  57 ;
  58     exit;
  59 }
  60
  61 # How update works:
  62 #
  63 # 1) scan the english file, keep the whole <phrase> for each phrase.
  64 # 2) read the translated file, for each end of phrase, compare:
  65 #  A) all source strings, if there's any change there should be a comment about
  66 #     it output
  67 #  B) the desc fields
  68 #
  69 # 3) output the phrase with the comments from above
  70 # 4) check which phrases that the translated version didn't have, and spit out
  71 #    the english version of those
  72 #
  73
  74 my $prefix = $p;
  75 my $binary = $b;
  76 my $update = $u;
  77
  78 my $english = $e;
  79 my $voiceout = $o;
  80
  81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  82
  83 if($check > 1) {
  84     print "Please use only one of -p, -u, -o and -b\n";
  85     exit;
  86 }
  87 if(!$check) {
  88     print "Please use at least one of -p, -u, -o and -b\n";
  89     exit;
  90 }
  91 if(($binary || $update || $voiceout) && !$english) {
  92     print "Please use -e too when you use -b, -o or -u\n";
  93     exit;
  94 }
  95
  96 my $target = $t;
  97 if(!$target && !$update) {
  98     print "Please specify a target (with -t)!\n";
  99     exit;
 100 }
 101 my $verbose=$v;
 102
 103 my %id; # string to num hash
 104 my @idnum; # num to string array
 105
 106 my %source; # id string to source phrase hash
 107 my %dest; # id string to dest phrase hash
 108 my %voice; # id string to voice phrase hash
 109
 110 my $input = $ARGV[0];
 111
 112 my @m;
 113 my $m="blank";
 114
 115 sub match {
 116     my ($string, $pattern)=@_;
 117
 118     $pattern =~ s/\*/.?*/g;
 119     $pattern =~ s/\?/./g;
 120
 121     return ($string =~ /^$pattern\z/);
 122 }
 123
 124 sub blank {
 125     # nothing to do
 126 }
 127
 128 my %head;
 129 sub header {
 130     my ($full, $n, $v)=@_;
 131     $head{$n}=$v;
 132 }
 133
 134 my %phrase;
 135 sub phrase {
 136     my ($full, $n, $v)=@_;
 137     $phrase{$n}=$v;
 138 }
 139
 140 sub parsetarget {
 141     my ($debug, $strref, $full, $n, $v)=@_;
 142     my $string;
 143     my @all= split(" *, *", $n);
 144     my $test;
 145     for $test (@all) {
 146 #        print "TEST ($debug) $target for $test\n";
 147         for my $part (split(":", $target)) {
 148             if(match($part, $test)) {
 149                 $string = $v;
 150 #                print "MATCH: $test => $v\n";
 151                 $$strref = $string;
 152                 return $string;
 153             }
 154         }
 155     }
 156 }
 157
 158 my $src;
 159 sub source {
 160     parsetarget("src", \$src, @_);
 161 }
 162
 163 my $dest;
 164 sub dest {
 165     parsetarget("dest", \$dest, @_);
 166 }
 167
 168 my $voice;
 169 sub voice {
 170     parsetarget("voice", \$voice, @_);
 171 }
 172
 173 my %idmap;
 174 my %english;
 175 if($english) {
 176     # For the cases where the english file needs to be scanned/read, we do
 177     # it before we read the translated file. For -b it isn't necessary, but for
 178     # -u it is convenient.
 179
 180     my $idnum=0; # start with a true number
 181     my $vidnum=0x8000; # first voice id
 182     open(ENG, "<$english") || die "can't open $english";
 183     my @phrase;
 184     my $id;
 185     my $maybeid;
 186     my $withindest;
 187     while(<ENG>) {
 188
 189         # get rid of DOS newlines
 190         $_ =~ s/\r//g;
 191
 192         if($_ =~ /^ *\<phrase\>/) {
 193             # this is the start of a phrase
 194         }
 195         elsif($_ =~ /^ *\<\/phrase\>/) {
 196
 197             # if id is something, when we count and store this phrase
 198             if($id) {
 199                 # voice-only entries get a difference range
 200                 if($id =~ /^VOICE_/) {
 201                     # Assign an ID number to this entry
 202                     $idmap{$id}=$vidnum;
 203                     $vidnum++;
 204                 }
 205                 else {
 206                     # Assign an ID number to this entry
 207                     $idmap{$id}=$idnum;
 208                     $idnum++;
 209                     print STDERR "DEST: bumped idnum to $idnum\n";
 210                 }
 211
 212                 # this is the end of a phrase, add it to the english hash
 213                 $english{$id}=join("", @phrase);
 214             }
 215             undef @phrase;
 216             $id="";
 217         }
 218         elsif($_ ne "\n") {
 219             # gather everything related to this phrase
 220             push @phrase, $_;
 221             if($_ =~ /^ *\<dest\>/i) {
 222                 $withindest=1;
 223             }
 224             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 225                 $withindest=0;
 226             }
 227             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 228                 my ($name, $val)=($1, $2);
 229                 $dest=""; # in case it is left untouched for when the
 230                 # model name isn't "our"
 231                 dest($_, $name, $val);
 232
 233                 print STDERR "DEST: \"$dest\" for $name / $id\n";
 234
 235                 if($update || ($dest && ($dest !~ /^none\z/i))) {
 236                     # we unconditionally always use all IDs when the "update"
 237                     # feature is used
 238                     $id = $maybeid;
 239                     print STDERR "DEST: use this id $id\n";
 240                 }
 241             }
 242         }
 243
 244         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 245             $maybeid=$1;
 246         }
 247     }
 248     close(ENG);
 249 }
 250
 251 # a function that compares the english phrase with the translated one.
 252 # compare source strings and desc
 253
 254 # Then output the updated version!
 255 sub compare {
 256     my ($idstr, $engref, $locref)=@_;
 257     my ($edesc, $ldesc);
 258     my ($esource, $lsource);
 259     my $mode=0;
 260
 261     for my $l (@$engref) {
 262         if($l =~ /^ *desc: (.*)/) {
 263             $edesc=$1;
 264         }
 265         elsif($l =~ / *\<source\>/i) {
 266             $mode=1;
 267         }
 268         elsif($mode) {
 269             if($l =~ / *\<\/source\>/i) {
 270                 last;
 271             }
 272             $esource .= "$l\n";
 273         }
 274     }
 275
 276     my @show;
 277     my @source;
 278
 279     $mode = 0;
 280     for my $l (@$locref) {
 281         if($l =~ /^ *desc: (.*)/) {
 282             $ldesc=$1;
 283             if($edesc ne $ldesc) {
 284                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 285             }
 286             push @show, $l;
 287         }
 288         elsif($l =~ / *\<source\>/i) {
 289             $mode=1;
 290             push @show, $l;
 291         }
 292         elsif($mode) {
 293             if($l =~ / *\<\/source\>/i) {
 294                 $mode = 0;
 295                 print @show;
 296                 if($esource ne $lsource) {
 297                     print "### The <source> section differs from the english!\n",
 298                     "### the previously used one is commented below:\n";
 299                     for(split("\n", $lsource)) {
 300                         print "### $_\n";
 301                     }
 302                     print $esource;
 303                 }
 304                 else {
 305                     print $lsource;
 306                 }
 307                 undef @show; # start over
 308
 309                 push @show, $l;
 310             }
 311             else {
 312                 $lsource .= "$l";
 313             }
 314         }
 315         else {
 316             push @show, $l;
 317         }
 318     }
 319
 320
 321     print @show;
 322 }
 323
 324 my $idcount;        # counter for lang ID numbers
 325 my $voiceid=0x8000; # counter for voice-only ID numbers
 326
 327 #
 328 # Now start the scanning of the selected language string
 329 #
 330
 331 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 332 my @phrase;
 333 while(<LANG>) {
 334
 335     $line++;
 336
 337     # get rid of DOS newlines
 338     $_ =~ s/\r//g;
 339
 340     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 341         # comment or empty line
 342         next;
 343     }
 344
 345     my $ll = $_;
 346
 347     # print "M: $m\n";
 348
 349     push @phrase, $ll;
 350
 351     # this is an XML-lookalike tag
 352     if (/^(<|[^\"<]+<)([^>]*)>/) {
 353         my $part = $2;
 354         # print "P: $part\n";
 355
 356         if($part =~ /^\//) {
 357             # this was a closing tag
 358
 359             if($part eq "/phrase") {
 360                 # closing the phrase
 361
 362                 my $idstr = $phrase{'id'};
 363                 my $idnum;
 364
 365                 if($dest =~ /^none\z/i) {
 366                     # "none" as dest (without quotes) means that this entire
 367                     # phrase is to be ignored
 368                 }
 369                 elsif(!$update) {
 370                     # we don't do the fully detailed analysis when we "update"
 371                     # since we don't do it for a particular target etc
 372
 373                     # allow the keyword 'deprecated' to be used on dest and
 374                     # voice strings to mark that as deprecated. It will then
 375                     # be replaced with "".
 376
 377                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 378                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 379
 380                     # basic syntax error alerts, if there are no quotes we
 381                     # will assume an empty string was intended
 382                     if($dest !~ /^\"/) {
 383                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 384                         $dest='""';
 385                     }
 386                     if($src !~ /^\"/) {
 387                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 388                         $src='""';
 389                     }
 390                     if($voice !~ /^\"/) {
 391                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 392                         $voice='""';
 393                     }
 394
 395                     # Use the ID name to figure out which id number range we
 396                     # should use for this phrase. Voice-only strings are
 397                     # separated.
 398
 399                     if($idstr =~ /^VOICE/) {
 400                         $idnum = $voiceid++;
 401                     }
 402                     else {
 403                         $idnum = $idcount++;
 404                     }
 405
 406                     $id{$idstr} = $idnum;
 407                     $idnum[$idnum]=$idstr;
 408
 409                     $source{$idstr}=$src;
 410                     $dest{$idstr}=$dest;
 411                     $voice{$idstr}=$voice;
 412
 413                     if($verbose) {
 414                         print "id: $phrase{id} ($idnum)\n";
 415                         print "source: $src\n";
 416                         print "dest: $dest\n";
 417                         print "voice: $voice\n";
 418                     }
 419
 420                     undef $src;
 421                     undef $dest;
 422                     undef $voice;
 423                     undef %phrase;
 424                 }
 425
 426                 if($update) {
 427                     my $e = $english{$idstr};
 428
 429                     if($e) {
 430                         # compare original english with this!
 431                         my @eng = split("\n", $english{$idstr});
 432
 433                         compare($idstr, \@eng, \@phrase);
 434
 435                         $english{$idstr}=""; # clear it
 436                     }
 437                     else {
 438                         print "### $idstr: The phrase is not used. Skipped\n";
 439                     }
 440                 }
 441                 undef @phrase;
 442
 443             } # end of </phrase>
 444
 445             # starts with a slash, this _ends_ this section
 446             $m = pop @m; # get back old value, the previous level's tag
 447             next;
 448         } # end of tag close
 449
 450         # This is an opening (sub) tag
 451
 452         push @m, $m; # store old value
 453         $m = $part;
 454         next;
 455     }
 456
 457     if(/^ *([^:]+): *(.*)/) {
 458         my ($name, $val)=($1, $2);
 459         &$m($_, $name, $val);
 460     }
 461 }
 462 close(LANG);
 463
 464 if($update) {
 465     my $any=0;
 466     for(keys %english) {
 467         if($english{$_}) {
 468             print "###\n",
 469             "### This phrase below was not present in the translated file\n",
 470             "<phrase>\n";
 471             print $english{$_};
 472             print "</phrase>\n";
 473         }
 474     }
 475 }
 476
 477 if($prefix) {
 478     # We create a .c and .h file
 479
 480     open(HFILE, ">$prefix.h") ||
 481         die "couldn't create file $prefix.h\n";
 482     open(CFILE, ">$prefix.c") ||
 483         die "couldn't create file $prefix.c\n";
 484
 485     print HFILE <<MOO
 486 /* This file was automatically generated using genlang */
 487 /*
 488  * The str() macro/functions is how to access strings that might be
 489  * translated. Use it like str(MACRO) and expect a string to be
 490  * returned!
 491  */
 492 #define str(x) language_strings[x]
 493
 494 /* this is the array for holding the string pointers.
 495    It will be initialized at runtime. */
 496 extern unsigned char *language_strings[];
 497 /* this contains the concatenation of all strings, separated by \\0 chars */
 498 extern const unsigned char language_builtin[];
 499
 500 /* The enum below contains all available strings */
 501 enum \{
 502 MOO
 503     ;
 504
 505     print CFILE <<MOO
 506 /* This file was automaticly generated using genlang, the strings come
 507    from "$input" */
 508
 509 #include "$prefix.h"
 510
 511 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 512 const unsigned char language_builtin[] =
 513 MOO
 514 ;
 515
 516     # Output the ID names for the enum in the header file
 517     my $i;
 518     for $i (1 .. $idcount) {
 519         my $name=$idnum[$i - 1]; # get the ID name
 520
 521         $name =~ s/\"//g; # cut off the quotes
 522
 523         printf HFILE ("    %s,\n", $name);
 524     }
 525
 526 # Output separation marker for last string ID and the upcoming voice IDs
 527
 528     print HFILE <<MOO
 529     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 530     /* --- below this follows voice-only strings --- */
 531     VOICEONLY_DELIMITER = 0x8000,
 532 MOO
 533     ;
 534
 535 # Output the ID names for the enum in the header file
 536     for $i (0x8000 .. ($voiceid-1)) {
 537         my $name=$idnum[$i]; # get the ID name
 538
 539         $name =~ s/\"//g; # cut off the quotes
 540
 541         printf HFILE ("    %s,\n", $name);
 542     }
 543
 544     # Output end of enum
 545     print HFILE "\n};\n/* end of generated enum list */\n";
 546
 547     # Output the target phrases for the source file
 548     for $i (1 .. $idcount) {
 549         my $name=$idnum[$i - 1]; # get the ID
 550         my $dest = $dest{$name}; # get the destination phrase
 551
 552         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 553
 554         if(!$dest) {
 555             # this is just to be on the safe side
 556             $dest = '"\0"';
 557         }
 558
 559         printf CFILE ("    %s\n", $dest);
 560     }
 561
 562 # Output end of string chunk
 563     print CFILE <<MOO
 564 ;
 565 /* end of generated string list */
 566 MOO
 567 ;
 568
 569     close(HFILE);
 570     close(CFILE);
 571 } # end of the c/h file generation
 572 elsif($binary) {
 573     # Creation of a binary lang file was requested
 574
 575     # We must first scan the english file to get the correct order of the id
 576     # numbers used there, as that is what sets the id order for all language
 577     # files. The english file is scanned before the translated file was
 578     # scanned.
 579
 580     open(OUTF, ">$binary") or die "Can't create $binary";
 581     binmode OUTF;
 582     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 583
 584     # loop over the target phrases
 585     for $i (1 .. $idcount) {
 586         my $name=$idnum[$i - 1]; # get the ID
 587         my $dest = $dest{$name}; # get the destination phrase
 588
 589         if($dest) {
 590             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 591
 592             # Now, make sure we get the number from the english sort order:
 593             $idnum = $idmap{$name};
 594
 595             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 596         }
 597     }
 598 }
 599 elsif($voiceout) {
 600     # voice output requested, display id: and voice: strings in a v1-like
 601     # fashion
 602
 603     my @engl;
 604
 605     # This loops over the strings in the translated language file order
 606     my @ids = ((0 .. ($idcount-1)));
 607     push @ids, (0x8000 .. ($voiceid-1));
 608
 609     #for my $id (@ids) {
 610     #    print "$id\n";
 611     #}
 612
 613     for $i (@ids) {
 614         my $name=$idnum[$i]; # get the ID
 615         my $dest = $voice{$name}; # get the destination voice string
 616
 617         if($dest) {
 618             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 619
 620             # Now, make sure we get the number from the english sort order:
 621             $idnum = $idmap{$name};
 622
 623             $engl[$idnum] = $i;
 624
 625            # print "Input index $i output index $idnum\n";
 626
 627         }
 628     }
 629     for my $i (@ids) {
 630
 631         my $o = $engl[$i];
 632
 633         my $name=$idnum[$o]; # get the ID
 634         my $dest = $voice{$name}; # get the destination voice string
 635
 636         print "#$i\nid: $name\nvoice: $dest\n";
 637     }
 638
 639 }
 640
 641
 642 if($verbose) {
 643     printf("%d ID strings scanned\n", $idcount);
 644
 645     print "* head *\n";
 646     for(keys %head) {
 647         printf "$_: %s\n", $head{$_};
 648     }
 649 }
 650