tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 3; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -o
  52     Voice mode output. Outputs all id: and voice: lines for the given target!
  53
  54  -v
  55     Enables verbose (debug) output.
  56 MOO
  57 ;
  58     exit;
  59 }
  60
  61 # How update works:
  62 #
  63 # 1) scan the english file, keep the whole <phrase> for each phrase.
  64 # 2) read the translated file, for each end of phrase, compare:
  65 #  A) all source strings, if there's any change there should be a comment about
  66 #     it output
  67 #  B) the desc fields
  68 #
  69 # 3) output the phrase with the comments from above
  70 # 4) check which phrases that the translated version didn't have, and spit out
  71 #    the english version of those
  72 #
  73
  74 my $prefix = $p;
  75 my $binary = $b;
  76 my $update = $u;
  77
  78 my $english = $e;
  79 my $voiceout = $o;
  80
  81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  82
  83 if($check > 1) {
  84     print "Please use only one of -p, -u, -o and -b\n";
  85     exit;
  86 }
  87 if(!$check) {
  88     print "Please use at least one of -p, -u, -o and -b\n";
  89     exit;
  90 }
  91 if(($binary || $update || $voiceout) && !$english) {
  92     print "Please use -e too when you use -b, -o or -u\n";
  93     exit;
  94 }
  95
  96 my $target = $t;
  97 if(!$target && !$update) {
  98     print "Please specify a target (with -t)!\n";
  99     exit;
 100 }
 101 my $verbose=$v;
 102
 103 my %id; # string to num hash
 104 my @idnum; # num to string array
 105
 106 my %source; # id string to source phrase hash
 107 my %dest; # id string to dest phrase hash
 108 my %voice; # id string to voice phrase hash
 109
 110 my $input = $ARGV[0];
 111
 112 my @m;
 113 my $m="blank";
 114
 115 sub match {
 116     my ($string, $pattern)=@_;
 117
 118     $pattern =~ s/\*/.?*/g;
 119     $pattern =~ s/\?/./g;
 120
 121     return ($string =~ /^$pattern\z/);
 122 }
 123
 124 sub blank {
 125     # nothing to do
 126 }
 127
 128 my %head;
 129 sub header {
 130     my ($full, $n, $v)=@_;
 131     $head{$n}=$v;
 132 }
 133
 134 my %phrase;
 135 sub phrase {
 136     my ($full, $n, $v)=@_;
 137     $phrase{$n}=$v;
 138 }
 139
 140 sub parsetarget {
 141     my ($debug, $strref, $full, $n, $v)=@_;
 142     my $string;
 143     my @all= split(" *, *", $n);
 144     my $test;
 145     for $test (@all) {
 146 #        print "TEST ($debug) $target for $test\n";
 147         for my $part (split(":", $target)) {
 148             if(match($part, $test)) {
 149                 $string = $v;
 150 #                print "MATCH: $test => $v\n";
 151                 $$strref = $string;
 152                 return $string;
 153             }
 154         }
 155     }
 156 }
 157
 158 my $src;
 159 sub source {
 160     parsetarget("src", \$src, @_);
 161 }
 162
 163 my $dest;
 164 sub dest {
 165     parsetarget("dest", \$dest, @_);
 166 }
 167
 168 my $voice;
 169 sub voice {
 170     parsetarget("voice", \$voice, @_);
 171 }
 172
 173 my %idmap;
 174 my %english;
 175 if($english) {
 176     # For the cases where the english file needs to be scanned/read, we do
 177     # it before we read the translated file. For -b it isn't necessary, but for
 178     # -u it is convenient.
 179
 180     my $idnum=0; # start with a true number
 181     my $vidnum=0x8000; # first voice id
 182     open(ENG, "<$english") || die "can't open $english";
 183     my @phrase;
 184     my $id;
 185     my $maybeid;
 186     my $withindest;
 187     while(<ENG>) {
 188
 189         # get rid of DOS newlines
 190         $_ =~ s/\r//g;
 191
 192         if($_ =~ /^ *\<phrase\>/) {
 193             # this is the start of a phrase
 194         }
 195         elsif($_ =~ /^ *\<\/phrase\>/) {
 196
 197             # if id is something, when we count and store this phrase
 198             if($id) {
 199                 # voice-only entries get a difference range
 200                 if($id =~ /^VOICE_/) {
 201                     # Assign an ID number to this entry
 202                     $idmap{$id}=$vidnum;
 203                     $vidnum++;
 204                 }
 205                 else {
 206                     # Assign an ID number to this entry
 207                     $idmap{$id}=$idnum;
 208                     $idnum++;
 209    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 210                 }
 211
 212                 # this is the end of a phrase, add it to the english hash
 213                 $english{$id}=join("", @phrase);
 214             }
 215             undef @phrase;
 216             $id="";
 217         }
 218         elsif($_ ne "\n") {
 219             # gather everything related to this phrase
 220             push @phrase, $_;
 221             if($_ =~ /^ *\<dest\>/i) {
 222                 $withindest=1;
 223             }
 224             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 225                 $withindest=0;
 226             }
 227             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 228                 my ($name, $val)=($1, $2);
 229                 $dest=""; # in case it is left untouched for when the
 230                 # model name isn't "our"
 231                 dest($_, $name, $val);
 232
 233    #             print STDERR "DEST: \"$dest\" for $name / $id\n";
 234
 235                 if($update || ($dest && ($dest !~ /^none\z/i))) {
 236                     # we unconditionally always use all IDs when the "update"
 237                     # feature is used
 238                     $id = $maybeid;
 239    #                 print STDERR "DEST: use this id $id\n";
 240                 }
 241             }
 242         }
 243
 244         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 245             $maybeid=$1;
 246         }
 247     }
 248     close(ENG);
 249 }
 250
 251 # a function that compares the english phrase with the translated one.
 252 # compare source strings and desc
 253
 254 # Then output the updated version!
 255 sub compare {
 256     my ($idstr, $engref, $locref)=@_;
 257     my ($edesc, $ldesc);
 258     my ($esource, $lsource);
 259     my $mode=0;
 260
 261     for my $l (@$engref) {
 262         if($l =~ /^ *desc: (.*)/) {
 263             $edesc=$1;
 264         }
 265         elsif($l =~ / *\<source\>/i) {
 266             $mode=1;
 267         }
 268         elsif($mode) {
 269             if($l =~ / *\<\/source\>/i) {
 270                 last;
 271             }
 272             $esource .= "$l\n";
 273         }
 274     }
 275
 276     my @show;
 277     my @source;
 278
 279     $mode = 0;
 280     for my $l (@$locref) {
 281         if($l =~ /^ *desc: (.*)/) {
 282             $ldesc=$1;
 283             if($edesc ne $ldesc) {
 284                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 285             }
 286             push @show, $l;
 287         }
 288         elsif($l =~ / *\<source\>/i) {
 289             $mode=1;
 290             push @show, $l;
 291         }
 292         elsif($mode) {
 293             if($l =~ / *\<\/source\>/i) {
 294                 $mode = 0;
 295                 print @show;
 296                 if($esource ne $lsource) {
 297                     print "### The <source> section differs from the english!\n",
 298                     "### the previously used one is commented below:\n";
 299                     for(split("\n", $lsource)) {
 300                         print "### $_\n";
 301                     }
 302                     print $esource;
 303                 }
 304                 else {
 305                     print $lsource;
 306                 }
 307                 undef @show; # start over
 308
 309                 push @show, $l;
 310             }
 311             else {
 312                 $lsource .= "$l";
 313             }
 314         }
 315         else {
 316             push @show, $l;
 317         }
 318     }
 319
 320
 321     print @show;
 322 }
 323
 324 my $idcount;        # counter for lang ID numbers
 325 my $voiceid=0x8000; # counter for voice-only ID numbers
 326
 327 #
 328 # Now start the scanning of the selected language string
 329 #
 330
 331 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 332 my @phrase;
 333 while(<LANG>) {
 334
 335     $line++;
 336
 337     # get rid of DOS newlines
 338     $_ =~ s/\r//g;
 339
 340     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 341         # comment or empty line
 342         next;
 343     }
 344
 345     my $ll = $_;
 346
 347     # print "M: $m\n";
 348
 349     push @phrase, $ll;
 350
 351     # this is an XML-lookalike tag
 352     if (/^(<|[^\"<]+<)([^>]*)>/) {
 353         my $part = $2;
 354         # print "P: $part\n";
 355
 356         if($part =~ /^\//) {
 357             # this was a closing tag
 358
 359             if($part eq "/phrase") {
 360                 # closing the phrase
 361
 362                 my $idstr = $phrase{'id'};
 363                 my $idnum;
 364
 365                 if($binary && !$english{$idstr}) {
 366                     # $idstr doesn't exist for english, skip it\n";
 367                 }
 368                 elsif($dest =~ /^none\z/i) {
 369                     # "none" as dest (without quotes) means that this entire
 370                     # phrase is to be ignored
 371                 }
 372                 elsif(!$update) {
 373                     # we don't do the fully detailed analysis when we "update"
 374                     # since we don't do it for a particular target etc
 375
 376                     # allow the keyword 'deprecated' to be used on dest and
 377                     # voice strings to mark that as deprecated. It will then
 378                     # be replaced with "".
 379
 380                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 381                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 382
 383                     # basic syntax error alerts, if there are no quotes we
 384                     # will assume an empty string was intended
 385                     if($dest !~ /^\"/) {
 386                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 387                         $dest='""';
 388                     }
 389                     if($src !~ /^\"/) {
 390                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 391                         $src='""';
 392                     }
 393                     if($voice !~ /^\"/) {
 394                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 395                         $voice='""';
 396                     }
 397
 398                     # Use the ID name to figure out which id number range we
 399                     # should use for this phrase. Voice-only strings are
 400                     # separated.
 401
 402                     if($idstr =~ /^VOICE/) {
 403                         $idnum = $voiceid++;
 404                     }
 405                     else {
 406                         $idnum = $idcount++;
 407                     }
 408
 409                     $id{$idstr} = $idnum;
 410                     $idnum[$idnum]=$idstr;
 411
 412                     $source{$idstr}=$src;
 413                     $dest{$idstr}=$dest;
 414                     $voice{$idstr}=$voice;
 415
 416                     if($verbose) {
 417                         print "id: $phrase{id} ($idnum)\n";
 418                         print "source: $src\n";
 419                         print "dest: $dest\n";
 420                         print "voice: $voice\n";
 421                     }
 422
 423                     undef $src;
 424                     undef $dest;
 425                     undef $voice;
 426                     undef %phrase;
 427                 }
 428
 429                 if($update) {
 430                     my $e = $english{$idstr};
 431
 432                     if($e) {
 433                         # compare original english with this!
 434                         my @eng = split("\n", $english{$idstr});
 435
 436                         compare($idstr, \@eng, \@phrase);
 437
 438                         $english{$idstr}=""; # clear it
 439                     }
 440                     else {
 441                         print "### $idstr: The phrase is not used. Skipped\n";
 442                     }
 443                 }
 444                 undef @phrase;
 445
 446             } # end of </phrase>
 447
 448             # starts with a slash, this _ends_ this section
 449             $m = pop @m; # get back old value, the previous level's tag
 450             next;
 451         } # end of tag close
 452
 453         # This is an opening (sub) tag
 454
 455         push @m, $m; # store old value
 456         $m = $part;
 457         next;
 458     }
 459
 460     if(/^ *([^:]+): *(.*)/) {
 461         my ($name, $val)=($1, $2);
 462         &$m($_, $name, $val);
 463     }
 464 }
 465 close(LANG);
 466
 467 if($update) {
 468     my $any=0;
 469     for(keys %english) {
 470         if($english{$_}) {
 471             print "###\n",
 472             "### This phrase below was not present in the translated file\n",
 473             "<phrase>\n";
 474             print $english{$_};
 475             print "</phrase>\n";
 476         }
 477     }
 478 }
 479
 480 if($prefix) {
 481     # We create a .c and .h file
 482
 483     open(HFILE, ">$prefix.h") ||
 484         die "couldn't create file $prefix.h\n";
 485     open(CFILE, ">$prefix.c") ||
 486         die "couldn't create file $prefix.c\n";
 487
 488     print HFILE <<MOO
 489 /* This file was automatically generated using genlang */
 490 /*
 491  * The str() macro/functions is how to access strings that might be
 492  * translated. Use it like str(MACRO) and expect a string to be
 493  * returned!
 494  */
 495 #define str(x) language_strings[x]
 496
 497 /* this is the array for holding the string pointers.
 498    It will be initialized at runtime. */
 499 extern unsigned char *language_strings[];
 500 /* this contains the concatenation of all strings, separated by \\0 chars */
 501 extern const unsigned char language_builtin[];
 502
 503 /* The enum below contains all available strings */
 504 enum \{
 505 MOO
 506     ;
 507
 508     print CFILE <<MOO
 509 /* This file was automaticly generated using genlang, the strings come
 510    from "$input" */
 511
 512 #include "$prefix.h"
 513
 514 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 515 const unsigned char language_builtin[] =
 516 MOO
 517 ;
 518
 519     # Output the ID names for the enum in the header file
 520     my $i;
 521     for $i (1 .. $idcount) {
 522         my $name=$idnum[$i - 1]; # get the ID name
 523
 524         $name =~ s/\"//g; # cut off the quotes
 525
 526         printf HFILE ("    %s,\n", $name);
 527     }
 528
 529 # Output separation marker for last string ID and the upcoming voice IDs
 530
 531     print HFILE <<MOO
 532     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 533     /* --- below this follows voice-only strings --- */
 534     VOICEONLY_DELIMITER = 0x8000,
 535 MOO
 536     ;
 537
 538 # Output the ID names for the enum in the header file
 539     for $i (0x8000 .. ($voiceid-1)) {
 540         my $name=$idnum[$i]; # get the ID name
 541
 542         $name =~ s/\"//g; # cut off the quotes
 543
 544         printf HFILE ("    %s,\n", $name);
 545     }
 546
 547     # Output end of enum
 548     print HFILE "\n};\n/* end of generated enum list */\n";
 549
 550     # Output the target phrases for the source file
 551     for $i (1 .. $idcount) {
 552         my $name=$idnum[$i - 1]; # get the ID
 553         my $dest = $dest{$name}; # get the destination phrase
 554
 555         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 556
 557         if(!$dest) {
 558             # this is just to be on the safe side
 559             $dest = '"\0"';
 560         }
 561
 562         printf CFILE ("    %s\n", $dest);
 563     }
 564
 565 # Output end of string chunk
 566     print CFILE <<MOO
 567 ;
 568 /* end of generated string list */
 569 MOO
 570 ;
 571
 572     close(HFILE);
 573     close(CFILE);
 574 } # end of the c/h file generation
 575 elsif($binary) {
 576     # Creation of a binary lang file was requested
 577
 578     # We must first scan the english file to get the correct order of the id
 579     # numbers used there, as that is what sets the id order for all language
 580     # files. The english file is scanned before the translated file was
 581     # scanned.
 582
 583     open(OUTF, ">$binary") or die "Can't create $binary";
 584     binmode OUTF;
 585     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 586
 587     # loop over the target phrases
 588     for $i (1 .. $idcount) {
 589         my $name=$idnum[$i - 1]; # get the ID
 590         my $dest = $dest{$name}; # get the destination phrase
 591
 592         if($dest) {
 593             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 594
 595             # Now, make sure we get the number from the english sort order:
 596             $idnum = $idmap{$name};
 597
 598             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 599         }
 600     }
 601 }
 602 elsif($voiceout) {
 603     # voice output requested, display id: and voice: strings in a v1-like
 604     # fashion
 605
 606     my @engl;
 607
 608     # This loops over the strings in the translated language file order
 609     my @ids = ((0 .. ($idcount-1)));
 610     push @ids, (0x8000 .. ($voiceid-1));
 611
 612     #for my $id (@ids) {
 613     #    print "$id\n";
 614     #}
 615
 616     for $i (@ids) {
 617         my $name=$idnum[$i]; # get the ID
 618         my $dest = $voice{$name}; # get the destination voice string
 619
 620         if($dest) {
 621             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 622
 623             # Now, make sure we get the number from the english sort order:
 624             $idnum = $idmap{$name};
 625
 626             $engl[$idnum] = $i;
 627
 628            # print "Input index $i output index $idnum\n";
 629
 630         }
 631     }
 632     for my $i (@ids) {
 633
 634         my $o = $engl[$i];
 635
 636         my $name=$idnum[$o]; # get the ID
 637         my $dest = $voice{$name}; # get the destination voice string
 638
 639         print "#$i\nid: $name\nvoice: $dest\n";
 640     }
 641
 642 }
 643
 644
 645 if($verbose) {
 646     printf("%d ID strings scanned\n", $idcount);
 647
 648     print "* head *\n";
 649     for(keys %head) {
 650         printf "$_: %s\n", $head{$_};
 651     }
 652 }
 653