tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 3; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -o
  52     Voice mode output. Outputs all id: and voice: lines for the given target!
  53
  54  -v
  55     Enables verbose (debug) output.
  56 MOO
  57 ;
  58     exit;
  59 }
  60
  61 # How update works:
  62 #
  63 # 1) scan the english file, keep the whole <phrase> for each phrase.
  64 # 2) read the translated file, for each end of phrase, compare:
  65 #  A) all source strings, if there's any change there should be a comment about
  66 #     it output
  67 #  B) the desc fields
  68 #
  69 # 3) output the phrase with the comments from above
  70 # 4) check which phrases that the translated version didn't have, and spit out
  71 #    the english version of those
  72 #
  73
  74 my $prefix = $p;
  75 my $binary = $b;
  76 my $update = $u;
  77
  78 my $english = $e;
  79 my $voiceout = $o;
  80
  81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  82
  83 if($check > 1) {
  84     print "Please use only one of -p, -u, -o and -b\n";
  85     exit;
  86 }
  87 if(!$check) {
  88     print "Please use at least one of -p, -u, -o and -b\n";
  89     exit;
  90 }
  91 if(($binary || $update || $voiceout) && !$english) {
  92     print "Please use -e too when you use -b, -o or -u\n";
  93     exit;
  94 }
  95
  96 my $target = $t;
  97 if(!$target && !$update) {
  98     print "Please specify a target (with -t)!\n";
  99     exit;
 100 }
 101 my $verbose=$v;
 102
 103 my %id; # string to num hash
 104 my @idnum; # num to string array
 105
 106 my %source; # id string to source phrase hash
 107 my %dest; # id string to dest phrase hash
 108 my %voice; # id string to voice phrase hash
 109
 110 my $input = $ARGV[0];
 111
 112 my @m;
 113 my $m="blank";
 114
 115 sub match {
 116     my ($string, $pattern)=@_;
 117
 118     $pattern =~ s/\*/.?*/g;
 119     $pattern =~ s/\?/./g;
 120
 121     return ($string =~ /^$pattern\z/);
 122 }
 123
 124 sub blank {
 125     # nothing to do
 126 }
 127
 128 my %head;
 129 sub header {
 130     my ($full, $n, $v)=@_;
 131     $head{$n}=$v;
 132 }
 133
 134 my %phrase;
 135 sub phrase {
 136     my ($full, $n, $v)=@_;
 137     $phrase{$n}=$v;
 138 }
 139
 140 sub parsetarget {
 141     my ($debug, $strref, $full, $n, $v)=@_;
 142     my $string;
 143     my @all= split(" *, *", $n);
 144     my $test;
 145     for $test (@all) {
 146 #        print "TEST ($debug) $target for $test\n";
 147         for my $part (split(":", $target)) {
 148             if(match($part, $test)) {
 149                 $string = $v;
 150 #                print "MATCH: $test => $v\n";
 151                 $$strref = $string;
 152                 return $string;
 153             }
 154         }
 155     }
 156 }
 157
 158 my $src;
 159 sub source {
 160     parsetarget("src", \$src, @_);
 161 }
 162
 163 my $dest;
 164 sub dest {
 165     parsetarget("dest", \$dest, @_);
 166 }
 167
 168 my $voice;
 169 sub voice {
 170     parsetarget("voice", \$voice, @_);
 171 }
 172
 173 my %idmap;
 174 my %english;
 175 if($english) {
 176     # For the cases where the english file needs to be scanned/read, we do
 177     # it before we read the translated file. For -b it isn't necessary, but for
 178     # -u it is convenient.
 179
 180     my $idnum=0; # start with a true number
 181     my $vidnum=0x8000; # first voice id
 182     open(ENG, "<$english") || die "can't open $english";
 183     my @phrase;
 184     my $id;
 185     my $maybeid;
 186     my $withindest;
 187     while(<ENG>) {
 188
 189         # get rid of DOS newlines
 190         $_ =~ s/\r//g;
 191
 192         if($_ =~ /^ *\<phrase\>/) {
 193             # this is the start of a phrase
 194         }
 195         elsif($_ =~ /^ *\<\/phrase\>/) {
 196
 197             # if id is something, when we count and store this phrase
 198             if($id) {
 199                 # voice-only entries get a difference range
 200                 if($id =~ /^VOICE_/) {
 201                     # Assign an ID number to this entry
 202                     $idmap{$id}=$vidnum;
 203                     $vidnum++;
 204                 }
 205                 else {
 206                     # Assign an ID number to this entry
 207                     $idmap{$id}=$idnum;
 208                     $idnum++;
 209                     print STDERR "DEST: bumped idnum to $idnum\n";
 210                 }
 211
 212                 # this is the end of a phrase, add it to the english hash
 213                 $english{$id}=join("", @phrase);
 214             }
 215             undef @phrase;
 216             $id="";
 217         }
 218         elsif($_ ne "\n") {
 219             # gather everything related to this phrase
 220             push @phrase, $_;
 221             if($_ =~ /^ *\<dest\>/i) {
 222                 $withindest=1;
 223             }
 224             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 225                 $withindest=0;
 226             }
 227             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 228                 my ($name, $val)=($1, $2);
 229                 $dest=""; # in case it is left untouched for when the
 230                 # model name isn't "our"
 231                 dest($_, $name, $val);
 232
 233                 print STDERR "DEST: \"$dest\" for $name / $id\n";
 234
 235                 if($dest && ($dest ne "none")) {
 236                     $id = $maybeid;
 237                     print STDERR "DEST: use this id $id\n";
 238                 }
 239             }
 240         }
 241
 242         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 243             $maybeid=$1;
 244         }
 245     }
 246     close(ENG);
 247 }
 248
 249 # a function that compares the english phrase with the translated one.
 250 # compare source strings and desc
 251
 252 # Then output the updated version!
 253 sub compare {
 254     my ($idstr, $engref, $locref)=@_;
 255     my ($edesc, $ldesc);
 256     my ($esource, $lsource);
 257     my $mode=0;
 258
 259     for my $l (@$engref) {
 260         if($l =~ /^ *desc: (.*)/) {
 261             $edesc=$1;
 262         }
 263         elsif($l =~ / *\<source\>/i) {
 264             $mode=1;
 265         }
 266         elsif($mode) {
 267             if($l =~ / *\<\/source\>/i) {
 268                 last;
 269             }
 270             $esource .= "$l\n";
 271         }
 272     }
 273
 274     my @show;
 275     my @source;
 276
 277     $mode = 0;
 278     for my $l (@$locref) {
 279         if($l =~ /^ *desc: (.*)/) {
 280             $ldesc=$1;
 281             if($edesc ne $ldesc) {
 282                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 283             }
 284             push @show, $l;
 285         }
 286         elsif($l =~ / *\<source\>/i) {
 287             $mode=1;
 288             push @show, $l;
 289         }
 290         elsif($mode) {
 291             if($l =~ / *\<\/source\>/i) {
 292                 $mode = 0;
 293                 print @show;
 294                 if($esource ne $lsource) {
 295                     print "### The <source> section differs from the english!\n",
 296                     "### the previously used one is commented below:\n";
 297                     for(split("\n", $lsource)) {
 298                         print "### $_\n";
 299                     }
 300                     print $esource;
 301                 }
 302                 else {
 303                     print $lsource;
 304                 }
 305                 undef @show; # start over
 306
 307                 push @show, $l;
 308             }
 309             else {
 310                 $lsource .= "$l";
 311             }
 312         }
 313         else {
 314             push @show, $l;
 315         }
 316     }
 317
 318
 319     print @show;
 320 }
 321
 322 my $idcount;        # counter for lang ID numbers
 323 my $voiceid=0x8000; # counter for voice-only ID numbers
 324
 325 #
 326 # Now start the scanning of the selected language string
 327 #
 328
 329 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 330 my @phrase;
 331 while(<LANG>) {
 332
 333     $line++;
 334
 335     # get rid of DOS newlines
 336     $_ =~ s/\r//g;
 337
 338     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 339         # comment or empty line
 340         next;
 341     }
 342
 343     my $ll = $_;
 344
 345     # print "M: $m\n";
 346
 347     push @phrase, $ll;
 348
 349     # this is an XML-lookalike tag
 350     if (/^(<|[^\"<]+<)([^>]*)>/) {
 351         my $part = $2;
 352         # print "P: $part\n";
 353
 354         if($part =~ /^\//) {
 355             # this was a closing tag
 356
 357             if($part eq "/phrase") {
 358                 # closing the phrase
 359
 360                 my $idstr = $phrase{'id'};
 361                 my $idnum;
 362
 363                 if($dest =~ /^none\z/i) {
 364                     # "none" as dest (without quotes) means that this entire
 365                     # phrase is to be ignored
 366                 }
 367                 elsif(!$update) {
 368                     # we don't do the fully detailed analysis when we "update"
 369                     # since we don't do it for a particular target etc
 370
 371                     # allow the keyword 'deprecated' to be used on dest and
 372                     # voice strings to mark that as deprecated. It will then
 373                     # be replaced with "".
 374
 375                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 376                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 377
 378                     # basic syntax error alerts, if there are no quotes we
 379                     # will assume an empty string was intended
 380                     if($dest !~ /^\"/) {
 381                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 382                         $dest='""';
 383                     }
 384                     if($src !~ /^\"/) {
 385                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 386                         $src='""';
 387                     }
 388                     if($voice !~ /^\"/) {
 389                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 390                         $voice='""';
 391                     }
 392
 393                     # Use the ID name to figure out which id number range we
 394                     # should use for this phrase. Voice-only strings are
 395                     # separated.
 396
 397                     if($idstr =~ /^VOICE/) {
 398                         $idnum = $voiceid++;
 399                     }
 400                     else {
 401                         $idnum = $idcount++;
 402                     }
 403
 404                     $id{$idstr} = $idnum;
 405                     $idnum[$idnum]=$idstr;
 406
 407                     $source{$idstr}=$src;
 408                     $dest{$idstr}=$dest;
 409                     $voice{$idstr}=$voice;
 410
 411                     if($verbose) {
 412                         print "id: $phrase{id} ($idnum)\n";
 413                         print "source: $src\n";
 414                         print "dest: $dest\n";
 415                         print "voice: $voice\n";
 416                     }
 417
 418                     undef $src;
 419                     undef $dest;
 420                     undef $voice;
 421                     undef %phrase;
 422                 }
 423
 424                 if($update) {
 425                     my $e = $english{$idstr};
 426
 427                     if($e) {
 428                         # compare original english with this!
 429                         my @eng = split("\n", $english{$idstr});
 430
 431                         compare($idstr, \@eng, \@phrase);
 432
 433                         $english{$idstr}=""; # clear it
 434                     }
 435                     else {
 436                         print "### $idstr: The phrase is not used. Skipped\n";
 437                     }
 438                 }
 439                 undef @phrase;
 440
 441             } # end of </phrase>
 442
 443             # starts with a slash, this _ends_ this section
 444             $m = pop @m; # get back old value, the previous level's tag
 445             next;
 446         } # end of tag close
 447
 448         # This is an opening (sub) tag
 449
 450         push @m, $m; # store old value
 451         $m = $part;
 452         next;
 453     }
 454
 455     if(/^ *([^:]+): *(.*)/) {
 456         my ($name, $val)=($1, $2);
 457         &$m($_, $name, $val);
 458     }
 459 }
 460 close(LANG);
 461
 462 if($update) {
 463     my $any=0;
 464     for(keys %english) {
 465         if($english{$_}) {
 466             print "###\n",
 467             "### This phrase below was not present in the translated file\n",
 468             "<phrase>\n";
 469             print $english{$_};
 470             print "</phrase>\n";
 471         }
 472     }
 473 }
 474
 475 if($prefix) {
 476     # We create a .c and .h file
 477
 478     open(HFILE, ">$prefix.h") ||
 479         die "couldn't create file $prefix.h\n";
 480     open(CFILE, ">$prefix.c") ||
 481         die "couldn't create file $prefix.c\n";
 482
 483     print HFILE <<MOO
 484 /* This file was automatically generated using genlang */
 485 /*
 486  * The str() macro/functions is how to access strings that might be
 487  * translated. Use it like str(MACRO) and expect a string to be
 488  * returned!
 489  */
 490 #define str(x) language_strings[x]
 491
 492 /* this is the array for holding the string pointers.
 493    It will be initialized at runtime. */
 494 extern unsigned char *language_strings[];
 495 /* this contains the concatenation of all strings, separated by \\0 chars */
 496 extern const unsigned char language_builtin[];
 497
 498 /* The enum below contains all available strings */
 499 enum \{
 500 MOO
 501     ;
 502
 503     print CFILE <<MOO
 504 /* This file was automaticly generated using genlang, the strings come
 505    from "$input" */
 506
 507 #include "$prefix.h"
 508
 509 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 510 const unsigned char language_builtin[] =
 511 MOO
 512 ;
 513
 514     # Output the ID names for the enum in the header file
 515     my $i;
 516     for $i (1 .. $idcount) {
 517         my $name=$idnum[$i - 1]; # get the ID name
 518
 519         $name =~ s/\"//g; # cut off the quotes
 520
 521         printf HFILE ("    %s,\n", $name);
 522     }
 523
 524 # Output separation marker for last string ID and the upcoming voice IDs
 525
 526     print HFILE <<MOO
 527     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 528     /* --- below this follows voice-only strings --- */
 529     VOICEONLY_DELIMITER = 0x8000,
 530 MOO
 531     ;
 532
 533 # Output the ID names for the enum in the header file
 534     for $i (0x8000 .. ($voiceid-1)) {
 535         my $name=$idnum[$i]; # get the ID name
 536
 537         $name =~ s/\"//g; # cut off the quotes
 538
 539         printf HFILE ("    %s,\n", $name);
 540     }
 541
 542     # Output end of enum
 543     print HFILE "\n};\n/* end of generated enum list */\n";
 544
 545     # Output the target phrases for the source file
 546     for $i (1 .. $idcount) {
 547         my $name=$idnum[$i - 1]; # get the ID
 548         my $dest = $dest{$name}; # get the destination phrase
 549
 550         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 551
 552         if(!$dest) {
 553             # this is just to be on the safe side
 554             $dest = '"\0"';
 555         }
 556
 557         printf CFILE ("    %s\n", $dest);
 558     }
 559
 560 # Output end of string chunk
 561     print CFILE <<MOO
 562 ;
 563 /* end of generated string list */
 564 MOO
 565 ;
 566
 567     close(HFILE);
 568     close(CFILE);
 569 } # end of the c/h file generation
 570 elsif($binary) {
 571     # Creation of a binary lang file was requested
 572
 573     # We must first scan the english file to get the correct order of the id
 574     # numbers used there, as that is what sets the id order for all language
 575     # files. The english file is scanned before the translated file was
 576     # scanned.
 577
 578     open(OUTF, ">$binary") or die "Can't create $binary";
 579     binmode OUTF;
 580     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 581
 582     # loop over the target phrases
 583     for $i (1 .. $idcount) {
 584         my $name=$idnum[$i - 1]; # get the ID
 585         my $dest = $dest{$name}; # get the destination phrase
 586
 587         if($dest) {
 588             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 589
 590             # Now, make sure we get the number from the english sort order:
 591             $idnum = $idmap{$name};
 592
 593             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 594         }
 595     }
 596 }
 597 elsif($voiceout) {
 598     # voice output requested, display id: and voice: strings in a v1-like
 599     # fashion
 600
 601     my @engl;
 602
 603     # This loops over the strings in the translated language file order
 604     my @ids = ((0 .. ($idcount-1)));
 605     push @ids, (0x8000 .. ($voiceid-1));
 606
 607     #for my $id (@ids) {
 608     #    print "$id\n";
 609     #}
 610
 611     for $i (@ids) {
 612         my $name=$idnum[$i]; # get the ID
 613         my $dest = $voice{$name}; # get the destination voice string
 614
 615         if($dest) {
 616             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 617
 618             # Now, make sure we get the number from the english sort order:
 619             $idnum = $idmap{$name};
 620
 621             $engl[$idnum] = $i;
 622
 623            # print "Input index $i output index $idnum\n";
 624
 625         }
 626     }
 627     for my $i (@ids) {
 628
 629         my $o = $engl[$i];
 630
 631         my $name=$idnum[$o]; # get the ID
 632         my $dest = $voice{$name}; # get the destination voice string
 633
 634         print "#$i\nid: $name\nvoice: $dest\n";
 635     }
 636
 637 }
 638
 639
 640 if($verbose) {
 641     printf("%d ID strings scanned\n", $idcount);
 642
 643     print "* head *\n";
 644     for(keys %head) {
 645         printf "$_: %s\n", $head{$_};
 646     }
 647 }
 648