tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file named [outfile].
  32     The use of this option requires that you also use -e, -t and -i.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b, -u or -s.
  42
  43  -s
  44     Sort the Update language file in the same order as the strings in the
  45     English file.
  46
  47  -t=<target>
  48     Specify which target you want the translations/phrases for. Required when
  49     -b or -p is used.
  50
  51     The target can in fact be specified as numerous different strings,
  52     separated with colons. This will make genlang to use all the specified
  53     strings when searching for a matching phrase.
  54
  55  -i=<target id>
  56     The target id number, needed for -b.
  57
  58  -o
  59     Voice mode output. Outputs all id: and voice: lines for the given target!
  60
  61  -v
  62     Enables verbose (debug) output.
  63 MOO
  64 ;
  65     exit;
  66 }
  67
  68 # How update works:
  69 #
  70 # 1) scan the english file, keep the whole <phrase> for each phrase.
  71 # 2) read the translated file, for each end of phrase, compare:
  72 #  A) all source strings, if there's any change there should be a comment about
  73 #     it output
  74 #  B) the desc fields
  75 #
  76 # 3) output the phrase with the comments from above
  77 # 4) check which phrases that the translated version didn't have, and spit out
  78 #    the english version of those
  79 #
  80
  81 my $prefix = $p;
  82 my $binary = $b;
  83 my $update = $u;
  84 my $sortfile = $s;
  85
  86 my $english = $e;
  87 my $voiceout = $o;
  88
  89 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
  90
  91 if($check > 1) {
  92     print "Please use only one of -p, -u, -o, -b and -s\n";
  93     exit;
  94 }
  95 if(!$check) {
  96     print "Please use at least one of -p, -u, -o, -b and -s\n";
  97     exit;
  98 }
  99
 100
 101 if(($binary || $update || $voiceout || $sortfile) && !$english) {
 102     print "Please use -e too when you use -b, -o, -u or -s\n";
 103     exit;
 104 }
 105
 106 my $target_id = $i;
 107 if($binary && !$target_id) {
 108     print "Please specify a target id number (with -i)!\n";
 109     exit;
 110 }
 111
 112 my $target = $t;
 113 if(!$target && !$update && !$sortfile) {
 114     print "Please specify a target (with -t)!\n";
 115     exit;
 116 }
 117 my $verbose=$v;
 118
 119 my %id; # string to num hash
 120 my @idnum; # num to string array
 121
 122 my %allphrases;  # For sorting - an array of the <phrase> elements
 123 my %source; # id string to source phrase hash
 124 my %dest; # id string to dest phrase hash
 125 my %voice; # id string to voice phrase hash
 126
 127 my $input = $ARGV[0];
 128
 129 my @m;
 130 my $m="blank";
 131
 132 sub trim {
 133     my ($string) = @_;
 134     $string =~ s/^\s+//;
 135     $string =~ s/\s+$//;
 136     return $string;
 137 }
 138
 139 sub match {
 140     my ($string, $pattern)=@_;
 141
 142     $pattern =~ s/\*/.?*/g;
 143     $pattern =~ s/\?/./g;
 144
 145     return ($string =~ /^$pattern\z/);
 146 }
 147
 148 sub blank {
 149     # nothing to do
 150 }
 151
 152 my %head;
 153 sub header {
 154     my ($full, $n, $v)=@_;
 155     $head{$n}=$v;
 156 }
 157
 158 my %phrase;
 159 sub phrase {
 160     my ($full, $n, $v)=@_;
 161     $phrase{$n}=$v;
 162 }
 163
 164 sub parsetarget {
 165     my ($debug, $strref, $full, $n, $v)=@_;
 166     my $string;
 167     my @all= split(" *, *", $n);
 168     my $test;
 169     for $test (@all) {
 170 #        print "TEST ($debug) $target for $test\n";
 171         for my $part (split(":", $target)) {
 172             if(match($part, $test)) {
 173                 $string = $v;
 174 #                print "MATCH: $test => $v\n";
 175                 $$strref = $string;
 176                 return $string;
 177             }
 178         }
 179     }
 180 }
 181
 182 my $src;
 183 sub source {
 184     parsetarget("src", \$src, @_);
 185 }
 186
 187 my $dest;
 188 sub dest {
 189     parsetarget("dest", \$dest, @_);
 190 }
 191
 192 my $voice;
 193 sub voice {
 194     parsetarget("voice", \$voice, @_);
 195 }
 196
 197 my %idmap;
 198 my %english;
 199 if($english) {
 200     # For the cases where the english file needs to be scanned/read, we do
 201     # it before we read the translated file. For -b it isn't necessary, but for
 202     # -u it is convenient.
 203
 204     my $idnum=0; # start with a true number
 205     my $vidnum=0x8000; # first voice id
 206     open(ENG, "<$english") || die "Error: can't open $english";
 207     my @phrase;
 208     my $id;
 209     my $maybeid;
 210     my $withindest;
 211     my $numphrases = 0;
 212     while(<ENG>) {
 213
 214         # get rid of DOS newlines
 215         $_ =~ s/\r//g;
 216
 217         if($_ =~ /^ *\<phrase\>/) {
 218             # this is the start of a phrase
 219         }
 220         elsif($_ =~ /^ *\<\/phrase\>/) {
 221
 222             # if id is something, when we count and store this phrase
 223             if($id) {
 224                 # voice-only entries get a difference range
 225                 if($id =~ /^VOICE_/) {
 226                     # Assign an ID number to this entry
 227                     $idmap{$id}=$vidnum;
 228                     $vidnum++;
 229                 }
 230                 else {
 231                     # Assign an ID number to this entry
 232                     $idmap{$id}=$idnum;
 233                     $idnum++;
 234    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 235                 }
 236
 237                 # this is the end of a phrase, add it to the english hash
 238                 $english{$id}=join("", @phrase);
 239             }
 240             undef @phrase;
 241             $id="";
 242         }
 243         elsif($_ ne "\n") {
 244             # gather everything related to this phrase
 245             push @phrase, $_;
 246             if($_ =~ /^ *\<dest\>/i) {
 247                 $withindest=1;
 248                 $deststr="";
 249             }
 250             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 251                 $withindest=0;
 252
 253                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 254                     # we unconditionally always use all IDs when the "update"
 255                     # feature is used
 256                     $id = $maybeid;
 257     #                print "DEST: use this id $id\n";
 258                 }
 259                 else {
 260     #                print "skip $maybeid for $name\n";
 261                 }
 262             }
 263             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 264                 my ($name, $val)=($1, $2);
 265                 $dest=""; # in case it is left untouched for when the
 266                 # model name isn't "our"
 267                 dest($_, $name, $val);
 268
 269                 if($dest) {
 270                     # Store the current dest string. If this target matches
 271                     # multiple strings, it will get updated several times.
 272                     $deststr = $dest;
 273                 }
 274             }
 275         }
 276
 277         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 278             $maybeid=$1;
 279             $sortorder{$maybeid}=$numphrases++;
 280         }
 281     }
 282     close(ENG);
 283 }
 284
 285 # a function that compares the english phrase with the translated one.
 286 # compare source strings and desc
 287
 288 # Then output the updated version!
 289 sub compare {
 290     my ($idstr, $engref, $locref)=@_;
 291     my ($edesc, $ldesc);
 292     my ($esource, $lsource);
 293     my $mode=0;
 294
 295     for my $l (@$engref) {
 296         if($l =~ /^ *#/) {
 297             # comment
 298             next;
 299         }
 300         if($l =~ /^ *desc: (.*)/) {
 301             $edesc=$1;
 302         }
 303         elsif($l =~ / *\<source\>/i) {
 304             $mode=1;
 305         }
 306         elsif($mode) {
 307             if($l =~ / *\<\/source\>/i) {
 308                 last;
 309             }
 310             $esource .= "$l\n";
 311         }
 312     }
 313
 314     my @show;
 315     my @source;
 316
 317     $mode = 0;
 318     for my $l (@$locref) {
 319         if($l =~ /^ *desc: (.*)/) {
 320             $ldesc=$1;
 321             if(trim($edesc) ne trim($ldesc)) {
 322                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 323             }
 324             push @show, $l;
 325         }
 326         elsif($l =~ / *\<source\>/i) {
 327             $mode=1;
 328             push @show, $l;
 329         }
 330         elsif($mode) {
 331             if($l =~ / *\<\/source\>/i) {
 332                 $mode = 0;
 333                 print @show;
 334                 if(trim($esource) ne trim($lsource)) {
 335                     print "### The <source> section differs from the english!\n",
 336                     "### the previously used one is commented below:\n";
 337                     for(split("\n", $lsource)) {
 338                         print "### $_\n";
 339                     }
 340                     print $esource;
 341                 }
 342                 else {
 343                     print $lsource;
 344                 }
 345                 undef @show; # start over
 346
 347                 push @show, $l;
 348             }
 349             else {
 350                 $lsource .= "$l";
 351             }
 352         }
 353         else {
 354             push @show, $l;
 355         }
 356     }
 357
 358
 359     print @show;
 360 }
 361
 362 my $idcount;        # counter for lang ID numbers
 363 my $voiceid=0x8000; # counter for voice-only ID numbers
 364
 365 #
 366 # Now start the scanning of the selected language string
 367 #
 368
 369 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
 370 my @phrase;
 371 my $header = 1;
 372 while(<LANG>) {
 373
 374     $line++;
 375
 376     # get rid of DOS newlines
 377     $_ =~ s/\r//g;
 378
 379     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 380         # comment or empty line - output it if it's part of the header
 381         if ($header and ($update || $sortfile)) {
 382             print($_);
 383         }
 384         next;
 385     }
 386     $header = 0;
 387
 388     my $ll = $_;
 389
 390     # print "M: $m\n";
 391
 392     push @phrase, $ll;
 393
 394     # this is an XML-lookalike tag
 395     if (/^(<|[^\"<]+<)([^>]*)>/) {
 396         my $part = $2;
 397         # print "P: $part\n";
 398
 399         if($part =~ /^\//) {
 400             # this was a closing tag
 401
 402             if($part eq "/phrase") {
 403                 # closing the phrase
 404
 405                 my $idstr = $phrase{'id'};
 406                 my $idnum;
 407
 408                 if($binary && !$english{$idstr}) {
 409                     # $idstr doesn't exist for english, skip it\n";
 410                 }
 411                 elsif($dest =~ /^none\z/i) {
 412                     # "none" as dest (without quotes) means that this entire
 413                     # phrase is to be ignored
 414                 }
 415                 elsif($sortfile) {
 416                     $allphrases{$idstr}=join('',@phrase);
 417                 }
 418                 elsif(!$update) {
 419                     # we don't do the fully detailed analysis when we "update"
 420                     # since we don't do it for a particular target etc
 421
 422                     # allow the keyword 'deprecated' to be used on dest and
 423                     # voice strings to mark that as deprecated. It will then
 424                     # be replaced with "".
 425
 426                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 427                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 428
 429                     # basic syntax error alerts, if there are no quotes we
 430                     # will assume an empty string was intended
 431                     if($dest !~ /^\"/) {
 432                         print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
 433                         $dest='""';
 434                     }
 435                     if($src !~ /^\"/) {
 436                         print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
 437                         $src='""';
 438                     }
 439                     if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
 440                         print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
 441                         $voice='""';
 442                     }
 443
 444                     # Use the ID name to figure out which id number range we
 445                     # should use for this phrase. Voice-only strings are
 446                     # separated.
 447
 448                     if($idstr =~ /^VOICE/) {
 449                         $idnum = $voiceid++;
 450                     }
 451                     else {
 452                         $idnum = $idcount++;
 453                     }
 454
 455                     $id{$idstr} = $idnum;
 456                     $idnum[$idnum]=$idstr;
 457
 458                     $source{$idstr}=$src;
 459                     $dest{$idstr}=$dest;
 460                     $voice{$idstr}=$voice;
 461
 462                     if($verbose) {
 463                         print "id: $phrase{id} ($idnum)\n";
 464                         print "source: $src\n";
 465                         print "dest: $dest\n";
 466                         print "voice: $voice\n";
 467                     }
 468
 469                     undef $src;
 470                     undef $dest;
 471                     undef $voice;
 472                     undef %phrase;
 473                 }
 474
 475                 if($update) {
 476                     my $e = $english{$idstr};
 477
 478                     if($e) {
 479                         # compare original english with this!
 480                         my @eng = split("\n", $english{$idstr});
 481
 482                         compare($idstr, \@eng, \@phrase);
 483
 484                         $english{$idstr}=""; # clear it
 485                     }
 486                     else {
 487                         print "### $idstr: The phrase is not used. Skipped\n";
 488                     }
 489                 }
 490                 undef @phrase;
 491
 492             } # end of </phrase>
 493
 494             # starts with a slash, this _ends_ this section
 495             $m = pop @m; # get back old value, the previous level's tag
 496             next;
 497         } # end of tag close
 498
 499         # This is an opening (sub) tag
 500
 501         push @m, $m; # store old value
 502         $m = $part;
 503         next;
 504     }
 505
 506     if(/^ *([^:]+): *(.*)/) {
 507         my ($name, $val)=($1, $2);
 508         &$m($_, $name, $val);
 509     }
 510 }
 511 close(LANG);
 512
 513 if($update) {
 514     my $any=0;
 515     for(keys %english) {
 516         if($english{$_}) {
 517             print "###\n",
 518             "### This phrase below was not present in the translated file\n",
 519             "<phrase>\n";
 520             print $english{$_};
 521             print "</phrase>\n";
 522         }
 523     }
 524 }
 525
 526 if ($sortfile) {
 527     for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
 528          print $allphrases{$_};
 529     }
 530 }
 531
 532 if($prefix) {
 533     # We create a .c and .h file
 534
 535     open(HFILE, ">$prefix.h") ||
 536         die "Error: couldn't create file $prefix.h\n";
 537     open(CFILE, ">$prefix.c") ||
 538         die "Error: couldn't create file $prefix.c\n";
 539
 540     print HFILE <<MOO
 541 /* This file was automatically generated using genlang */
 542 /*
 543  * The str() macro/functions is how to access strings that might be
 544  * translated. Use it like str(MACRO) and expect a string to be
 545  * returned!
 546  */
 547 #define str(x) language_strings[x]
 548
 549 /* this is the array for holding the string pointers.
 550    It will be initialized at runtime. */
 551 extern unsigned char *language_strings[];
 552 /* this contains the concatenation of all strings, separated by \\0 chars */
 553 extern const unsigned char language_builtin[];
 554
 555 /* The enum below contains all available strings */
 556 enum \{
 557 MOO
 558     ;
 559
 560     print CFILE <<MOO
 561 /* This file was automaticly generated using genlang, the strings come
 562    from "$input" */
 563
 564 #include "$prefix.h"
 565
 566 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 567 const unsigned char language_builtin[] =
 568 MOO
 569 ;
 570
 571     # Output the ID names for the enum in the header file
 572     my $i;
 573     for $i (1 .. $idcount) {
 574         my $name=$idnum[$i - 1]; # get the ID name
 575
 576         $name =~ s/\"//g; # cut off the quotes
 577
 578         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 579     }
 580
 581 # Output separation marker for last string ID and the upcoming voice IDs
 582
 583     print HFILE <<MOO
 584     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 585     /* --- below this follows voice-only strings --- */
 586     VOICEONLY_DELIMITER = 0x8000,
 587 MOO
 588     ;
 589
 590 # Output the ID names for the enum in the header file
 591     for $i (0x8000 .. ($voiceid-1)) {
 592         my $name=$idnum[$i]; # get the ID name
 593
 594         $name =~ s/\"//g; # cut off the quotes
 595
 596         printf HFILE ("    %s,\n", $name);
 597     }
 598
 599     # Output end of enum
 600     print HFILE "\n};\n/* end of generated enum list */\n";
 601
 602     # Output the target phrases for the source file
 603     for $i (1 .. $idcount) {
 604         my $name=$idnum[$i - 1]; # get the ID
 605         my $dest = $dest{$name}; # get the destination phrase
 606
 607         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 608
 609         if(!$dest) {
 610             # this is just to be on the safe side
 611             $dest = '"\0"';
 612         }
 613
 614         printf CFILE ("    %s\n", $dest);
 615     }
 616
 617 # Output end of string chunk
 618     print CFILE <<MOO
 619 ;
 620 /* end of generated string list */
 621 MOO
 622 ;
 623
 624     close(HFILE);
 625     close(CFILE);
 626 } # end of the c/h file generation
 627 elsif($binary) {
 628     # Creation of a binary lang file was requested
 629
 630     # We must first scan the english file to get the correct order of the id
 631     # numbers used there, as that is what sets the id order for all language
 632     # files. The english file is scanned before the translated file was
 633     # scanned.
 634
 635     open(OUTF, ">$binary") or die "Error: Can't create $binary";
 636     binmode OUTF;
 637     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 638
 639     # loop over the target phrases
 640     for $i (1 .. $idcount) {
 641         my $name=$idnum[$i - 1]; # get the ID
 642         my $dest = $dest{$name}; # get the destination phrase
 643
 644         if($dest) {
 645             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 646
 647             # Now, make sure we get the number from the english sort order:
 648             $idnum = $idmap{$name};
 649
 650             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 651         }
 652     }
 653 }
 654 elsif($voiceout) {
 655     # voice output requested, display id: and voice: strings in a v1-like
 656     # fashion
 657
 658     my @engl;
 659
 660     # This loops over the strings in the translated language file order
 661     my @ids = ((0 .. ($idcount-1)));
 662     push @ids, (0x8000 .. ($voiceid-1));
 663
 664     #for my $id (@ids) {
 665     #    print "$id\n";
 666     #}
 667
 668     for $i (@ids) {
 669         my $name=$idnum[$i]; # get the ID
 670         my $dest = $voice{$name}; # get the destination voice string
 671
 672         if($dest) {
 673             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 674
 675             # Now, make sure we get the number from the english sort order:
 676             $idnum = $idmap{$name};
 677
 678             if(length($idnum)) {
 679                 $engl[$idnum] = $i;
 680
 681                 #print "Input index $i output index $idnum\n";
 682             }
 683             else {
 684                 # not used, mark it so
 685                 $engl[$i] = -1
 686             }
 687
 688         }
 689     }
 690     for my $i (@ids) {
 691
 692         my $o = $engl[$i];
 693
 694         if(($o < 0) || !length($o)) {
 695             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 696             next;
 697         }
 698
 699         my $name=$idnum[$o]; # get the ID
 700         my $dest = $voice{$name}; # get the destination voice string
 701
 702         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 703     }
 704
 705 }
 706
 707
 708 if($verbose) {
 709     printf("%d ID strings scanned\n", $idcount);
 710
 711     print "* head *\n";
 712     for(keys %head) {
 713         printf "$_: %s\n", $head{$_};
 714     }
 715 }
 716