tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 3; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -o
  52     Voice mode output. Outputs all id: and voice: lines for the given target!
  53
  54  -v
  55     Enables verbose (debug) output.
  56 MOO
  57 ;
  58     exit;
  59 }
  60
  61 # How update works:
  62 #
  63 # 1) scan the english file, keep the whole <phrase> for each phrase.
  64 # 2) read the translated file, for each end of phrase, compare:
  65 #  A) all source strings, if there's any change there should be a comment about
  66 #     it output
  67 #  B) the desc fields
  68 #
  69 # 3) output the phrase with the comments from above
  70 # 4) check which phrases that the translated version didn't have, and spit out
  71 #    the english version of those
  72 #
  73
  74 my $prefix = $p;
  75 my $binary = $b;
  76 my $update = $u;
  77
  78 my $english = $e;
  79 my $voiceout = $o;
  80
  81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  82
  83 if($check > 1) {
  84     print "Please use only one of -p, -u, -o and -b\n";
  85     exit;
  86 }
  87 if(!$check) {
  88     print "Please use at least one of -p, -u, -o and -b\n";
  89     exit;
  90 }
  91 if(($binary || $update || $voiceout) && !$english) {
  92     print "Please use -e too when you use -b, -o or -u\n";
  93     exit;
  94 }
  95
  96 my $target = $t;
  97 if(!$target && !$update) {
  98     print "Please specify a target (with -t)!\n";
  99     exit;
 100 }
 101 my $verbose=$v;
 102
 103 my %id; # string to num hash
 104 my @idnum; # num to string array
 105
 106 my %source; # id string to source phrase hash
 107 my %dest; # id string to dest phrase hash
 108 my %voice; # id string to voice phrase hash
 109
 110 my $input = $ARGV[0];
 111
 112 my @m;
 113 my $m="blank";
 114
 115 sub match {
 116     my ($string, $pattern)=@_;
 117
 118     $pattern =~ s/\*/.?*/g;
 119     $pattern =~ s/\?/./g;
 120
 121     return ($string =~ $pattern);
 122 }
 123
 124 sub blank {
 125     # nothing to do
 126 }
 127
 128 my %head;
 129 sub header {
 130     my ($full, $n, $v)=@_;
 131     $head{$n}=$v;
 132 }
 133
 134 my %phrase;
 135 sub phrase {
 136     my ($full, $n, $v)=@_;
 137     $phrase{$n}=$v;
 138 }
 139
 140 sub parsetarget {
 141     my ($debug, $strref, $full, $n, $v)=@_;
 142     my $string;
 143     my @all= split(" *, *", $n);
 144     my $test;
 145     for $test (@all) {
 146 #        print "TEST ($debug) $target for $test\n";
 147         for my $part (split(":", $target)) {
 148             if(match($part, $test)) {
 149                 $string = $v;
 150 #                print "MATCH: $test => $v\n";
 151                 $$strref = $string;
 152                 return $string;
 153             }
 154         }
 155     }
 156 }
 157
 158 my $src;
 159 sub source {
 160     parsetarget("src", \$src, @_);
 161 }
 162
 163 my $dest;
 164 sub dest {
 165     parsetarget("dest", \$dest, @_);
 166 }
 167
 168 my $voice;
 169 sub voice {
 170     parsetarget("voice", \$voice, @_);
 171 }
 172
 173 my %idmap;
 174 my %english;
 175 if($english) {
 176     # For the cases where the english file needs to be scanned/read, we do
 177     # it before we read the translated file. For -b it isn't necessary, but for
 178     # -u it is convenient.
 179
 180     my $idnum=0; # start with a true number
 181     my $vidnum=0x8000; # first voice id
 182     open(ENG, "<$english") || die "can't open $english";
 183     my @phrase;
 184     my $id;
 185     while(<ENG>) {
 186
 187         # get rid of DOS newlines
 188         $_ =~ s/\r//g;
 189
 190         if($_ =~ /^ *\<phrase\>/) {
 191             # this is the start of a phrase
 192         }
 193         elsif($_ =~ /^ *\<\/phrase\>/) {
 194             # this is the end of a phrase, add it to the english hash
 195             $english{$id}=join("", @phrase);
 196             undef @phrase;
 197         }
 198         elsif($_ ne "\n") {
 199             # gather everything related to this phrase
 200             push @phrase, $_;
 201         }
 202
 203         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 204             $id=$1;
 205             # voice-only entries get a difference range
 206             if($id =~ /^VOICE_/) {
 207                 # Assign an ID number to this entry
 208                 $idmap{$id}=$vidnum;
 209                 $vidnum++;
 210             }
 211             else {
 212                 # Assign an ID number to this entry
 213                 $idmap{$id}=$idnum;
 214                 $idnum++;
 215             }
 216         }
 217     }
 218     close(ENG);
 219 }
 220
 221 # a function that compares the english phrase with the translated one.
 222 # compare source strings and desc
 223
 224 # Then output the updated version!
 225 sub compare {
 226     my ($idstr, $engref, $locref)=@_;
 227     my ($edesc, $ldesc);
 228     my ($esource, $lsource);
 229     my $mode=0;
 230
 231     for my $l (@$engref) {
 232         if($l =~ /^ *desc: (.*)/) {
 233             $edesc=$1;
 234         }
 235         elsif($l =~ / *\<source\>/i) {
 236             $mode=1;
 237         }
 238         elsif($mode) {
 239             if($l =~ / *\<\/source\>/i) {
 240                 last;
 241             }
 242             $esource .= "$l\n";
 243         }
 244     }
 245
 246     my @show;
 247     my @source;
 248
 249     $mode = 0;
 250     for my $l (@$locref) {
 251         if($l =~ /^ *desc: (.*)/) {
 252             $ldesc=$1;
 253             if($edesc ne $ldesc) {
 254                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 255             }
 256             push @show, $l;
 257         }
 258         elsif($l =~ / *\<source\>/i) {
 259             $mode=1;
 260             push @show, $l;
 261         }
 262         elsif($mode) {
 263             if($l =~ / *\<\/source\>/i) {
 264                 $mode = 0;
 265                 print @show;
 266                 if($esource ne $lsource) {
 267                     print "### The <source> section differs from the english!\n",
 268                     "### the previously used one is commented below:\n";
 269                     for(split("\n", $lsource)) {
 270                         print "### $_\n";
 271                     }
 272                     print $esource;
 273                 }
 274                 else {
 275                     print $lsource;
 276                 }
 277                 undef @show; # start over
 278
 279                 push @show, $l;
 280             }
 281             else {
 282                 $lsource .= "$l";
 283             }
 284         }
 285         else {
 286             push @show, $l;
 287         }
 288     }
 289
 290
 291     print @show;
 292 }
 293
 294 my $idcount;        # counter for lang ID numbers
 295 my $voiceid=0x8000; # counter for voice-only ID numbers
 296
 297 #
 298 # Now start the scanning of the selected language string
 299 #
 300
 301 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 302 my @phrase;
 303 while(<LANG>) {
 304
 305     $line++;
 306
 307     # get rid of DOS newlines
 308     $_ =~ s/\r//g;
 309
 310     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 311         # comment or empty line
 312         next;
 313     }
 314
 315     my $ll = $_;
 316
 317     # print "M: $m\n";
 318
 319     push @phrase, $ll;
 320
 321     # this is an XML-lookalike tag
 322     if (/^(<|[^\"<]+<)([^>]*)>/) {
 323         my $part = $2;
 324         # print "P: $part\n";
 325
 326         if($part =~ /^\//) {
 327             # this was a closing tag
 328
 329             if($part eq "/phrase") {
 330                 # closing the phrase
 331
 332                 my $idstr = $phrase{'id'};
 333                 my $idnum;
 334
 335                 if($dest =~ /^none\z/i) {
 336                     # "none" as dest (without quotes) means that this entire
 337                     # phrase is to be ignored
 338                 }
 339                 else {
 340                     # allow the keyword 'deprecated' to be used on dest and
 341                     # voice strings to mark that as deprecated. It will then
 342                     # be replaced with "".
 343
 344                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 345                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 346
 347                     # Use the ID name to figure out which id number range we
 348                     # should use for this phrase. Voice-only strings are
 349                     # separated.
 350
 351                     # basic syntax error alerts, if there are no quotes we
 352                     # will assume an empty string was intended
 353                     if($dest !~ /^\"/) {
 354                         print STDERR "Warning: dest before line $line lacks quotes!\n";
 355                         $dest='""';
 356                     }
 357                     if($src !~ /^\"/) {
 358                         print STDERR "Warning: source before line $line lacks quotes!\n";
 359                         $src='""';
 360                     }
 361                     if($voice !~ /^\"/) {
 362                         print STDERR "Warning: voice before line $line lacks quotes!\n";
 363                         $voice='""';
 364                     }
 365
 366                     if($idstr =~ /^VOICE/) {
 367                         $idnum = $voiceid++;
 368                     }
 369                     else {
 370                         $idnum = $idcount++;
 371                     }
 372
 373                     $id{$idstr} = $idnum;
 374                     $idnum[$idnum]=$idstr;
 375
 376                     $source{$idstr}=$src;
 377                     $dest{$idstr}=$dest;
 378                     $voice{$idstr}=$voice;
 379
 380                     if($verbose) {
 381                         print "id: $phrase{id} ($idnum)\n";
 382                         print "source: $src\n";
 383                         print "dest: $dest\n";
 384                         print "voice: $voice\n";
 385                     }
 386
 387                     undef $src;
 388                     undef $dest;
 389                     undef $voice;
 390                     undef %phrase;
 391                 }
 392
 393                 if($update) {
 394                     my $e = $english{$idstr};
 395
 396                     if($e) {
 397                         # compare original english with this!
 398                         my @eng = split("\n", $english{$idstr});
 399
 400                         compare($idstr, \@eng, \@phrase);
 401
 402                         $english{$idstr}=""; # clear it
 403                     }
 404                     else {
 405                         print "### $idstr: The phrase is not used. Skipped\n";
 406                     }
 407                 }
 408                 undef @phrase;
 409
 410             } # end of </phrase>
 411
 412             # starts with a slash, this _ends_ this section
 413             $m = pop @m; # get back old value, the previous level's tag
 414             next;
 415         } # end of tag close
 416
 417         # This is an opening (sub) tag
 418
 419         push @m, $m; # store old value
 420         $m = $part;
 421         next;
 422     }
 423
 424     if(/^ *([^:]+): *(.*)/) {
 425         my ($name, $val)=($1, $2);
 426         &$m($_, $name, $val);
 427     }
 428 }
 429 close(LANG);
 430
 431 if($update) {
 432     my $any=0;
 433     for(keys %english) {
 434         if($english{$_}) {
 435             print "###\n",
 436             "### This phrase below was not present in the translated file\n",
 437             "<phrase>\n";
 438             print $english{$_};
 439             print "</phrase>\n";
 440         }
 441     }
 442 }
 443
 444 if($prefix) {
 445     # We create a .c and .h file
 446
 447     open(HFILE, ">$prefix.h") ||
 448         die "couldn't create file $prefix.h\n";
 449     open(CFILE, ">$prefix.c") ||
 450         die "couldn't create file $prefix.c\n";
 451
 452     print HFILE <<MOO
 453 /* This file was automatically generated using genlang */
 454 /*
 455  * The str() macro/functions is how to access strings that might be
 456  * translated. Use it like str(MACRO) and expect a string to be
 457  * returned!
 458  */
 459 #define str(x) language_strings[x]
 460
 461 /* this is the array for holding the string pointers.
 462    It will be initialized at runtime. */
 463 extern unsigned char *language_strings[];
 464 /* this contains the concatenation of all strings, separated by \\0 chars */
 465 extern const unsigned char language_builtin[];
 466
 467 /* The enum below contains all available strings */
 468 enum \{
 469 MOO
 470     ;
 471
 472     print CFILE <<MOO
 473 /* This file was automaticly generated using genlang, the strings come
 474    from "$input" */
 475
 476 #include "$prefix.h"
 477
 478 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 479 const unsigned char language_builtin[] =
 480 MOO
 481 ;
 482
 483     # Output the ID names for the enum in the header file
 484     my $i;
 485     for $i (1 .. $idcount) {
 486         my $name=$idnum[$i - 1]; # get the ID name
 487
 488         $name =~ s/\"//g; # cut off the quotes
 489
 490         printf HFILE ("    %s,\n", $name);
 491     }
 492
 493 # Output separation marker for last string ID and the upcoming voice IDs
 494
 495     print HFILE <<MOO
 496     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 497     /* --- below this follows voice-only strings --- */
 498     VOICEONLY_DELIMITER = 0x8000,
 499 MOO
 500     ;
 501
 502 # Output the ID names for the enum in the header file
 503     for $i (0x8000 .. ($voiceid-1)) {
 504         my $name=$idnum[$i]; # get the ID name
 505
 506         $name =~ s/\"//g; # cut off the quotes
 507
 508         printf HFILE ("    %s,\n", $name);
 509     }
 510
 511     # Output end of enum
 512     print HFILE "\n};\n/* end of generated enum list */\n";
 513
 514     # Output the target phrases for the source file
 515     for $i (1 .. $idcount) {
 516         my $name=$idnum[$i - 1]; # get the ID
 517         my $dest = $dest{$name}; # get the destination phrase
 518
 519         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 520
 521         if(!$dest) {
 522             # this is just to be on the safe side
 523             $dest = '"\0"';
 524         }
 525
 526         printf CFILE ("    %s\n", $dest);
 527     }
 528
 529 # Output end of string chunk
 530     print CFILE <<MOO
 531 ;
 532 /* end of generated string list */
 533 MOO
 534 ;
 535
 536     close(HFILE);
 537     close(CFILE);
 538 } # end of the c/h file generation
 539 elsif($binary) {
 540     # Creation of a binary lang file was requested
 541
 542     # We must first scan the english file to get the correct order of the id
 543     # numbers used there, as that is what sets the id order for all language
 544     # files. The english file is scanned before the translated file was
 545     # scanned.
 546
 547     open(OUTF, ">$binary") or die "Can't create $binary";
 548     binmode OUTF;
 549     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 550
 551     # loop over the target phrases
 552     for $i (1 .. $idcount) {
 553         my $name=$idnum[$i - 1]; # get the ID
 554         my $dest = $dest{$name}; # get the destination phrase
 555
 556         if($dest) {
 557             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 558
 559             # Now, make sure we get the number from the english sort order:
 560             $idnum = $idmap{$name};
 561
 562             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 563         }
 564     }
 565 }
 566 elsif($voiceout) {
 567     # voice output requested, display id: and voice: strings in a v1-like
 568     # fashion
 569
 570     my @engl;
 571
 572     # This loops over the strings in the translated language file order
 573     my @ids = ((0 .. ($idcount-1)));
 574     push @ids, (0x8000 .. ($voiceid-1));
 575
 576     #for my $id (@ids) {
 577     #    print "$id\n";
 578     #}
 579
 580     for $i (@ids) {
 581         my $name=$idnum[$i]; # get the ID
 582         my $dest = $voice{$name}; # get the destination voice string
 583
 584         if($dest) {
 585             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 586
 587             # Now, make sure we get the number from the english sort order:
 588             $idnum = $idmap{$name};
 589
 590             $engl[$idnum] = $i;
 591
 592            # print "Input index $i output index $idnum\n";
 593
 594         }
 595     }
 596     for my $i (@ids) {
 597
 598         my $o = $engl[$i];
 599
 600         my $name=$idnum[$o]; # get the ID
 601         my $dest = $voice{$name}; # get the destination voice string
 602
 603         print "#$i\nid: $name\nvoice: $dest\n";
 604     }
 605
 606 }
 607
 608
 609 if($verbose) {
 610     printf("%d ID strings scanned\n", $idcount);
 611
 612     print "* head *\n";
 613     for(keys %head) {
 614         printf "$_: %s\n", $head{$_};
 615     }
 616 }
 617