tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2007 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 3; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47     The target can in fact be specified as numerous different strings,
  48     separated with colons. This will make genlang to use all the specified
  49     strings when searching for a matching phrase.
  50
  51  -o
  52     Voice mode output. Outputs all id: and voice: lines for the given target!
  53
  54  -v
  55     Enables verbose (debug) output.
  56 MOO
  57 ;
  58     exit;
  59 }
  60
  61 # How update works:
  62 #
  63 # 1) scan the english file, keep the whole <phrase> for each phrase.
  64 # 2) read the translated file, for each end of phrase, compare:
  65 #  A) all source strings, if there's any change there should be a comment about
  66 #     it output
  67 #  B) the desc fields
  68 #
  69 # 3) output the phrase with the comments from above
  70 # 4) check which phrases that the translated version didn't have, and spit out
  71 #    the english version of those
  72 #
  73
  74 my $prefix = $p;
  75 my $binary = $b;
  76 my $update = $u;
  77
  78 my $english = $e;
  79 my $voiceout = $o;
  80
  81 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
  82
  83 if($check > 1) {
  84     print "Please use only one of -p, -u, -o and -b\n";
  85     exit;
  86 }
  87 if(!$check) {
  88     print "Please use at least one of -p, -u, -o and -b\n";
  89     exit;
  90 }
  91 if(($binary || $update || $voiceout) && !$english) {
  92     print "Please use -e too when you use -b, -o or -u\n";
  93     exit;
  94 }
  95
  96 my $target = $t;
  97 if(!$target && !$update) {
  98     print "Please specify a target (with -t)!\n";
  99     exit;
 100 }
 101 my $verbose=$v;
 102
 103 my %id; # string to num hash
 104 my @idnum; # num to string array
 105
 106 my %source; # id string to source phrase hash
 107 my %dest; # id string to dest phrase hash
 108 my %voice; # id string to voice phrase hash
 109
 110 my $input = $ARGV[0];
 111
 112 my @m;
 113 my $m="blank";
 114
 115 sub match {
 116     my ($string, $pattern)=@_;
 117
 118     $pattern =~ s/\*/.?*/g;
 119     $pattern =~ s/\?/./g;
 120
 121     return ($string =~ /^$pattern\z/);
 122 }
 123
 124 sub blank {
 125     # nothing to do
 126 }
 127
 128 my %head;
 129 sub header {
 130     my ($full, $n, $v)=@_;
 131     $head{$n}=$v;
 132 }
 133
 134 my %phrase;
 135 sub phrase {
 136     my ($full, $n, $v)=@_;
 137     $phrase{$n}=$v;
 138 }
 139
 140 sub parsetarget {
 141     my ($debug, $strref, $full, $n, $v)=@_;
 142     my $string;
 143     my @all= split(" *, *", $n);
 144     my $test;
 145     for $test (@all) {
 146 #        print "TEST ($debug) $target for $test\n";
 147         for my $part (split(":", $target)) {
 148             if(match($part, $test)) {
 149                 $string = $v;
 150 #                print "MATCH: $test => $v\n";
 151                 $$strref = $string;
 152                 return $string;
 153             }
 154         }
 155     }
 156 }
 157
 158 my $src;
 159 sub source {
 160     parsetarget("src", \$src, @_);
 161 }
 162
 163 my $dest;
 164 sub dest {
 165     parsetarget("dest", \$dest, @_);
 166 }
 167
 168 my $voice;
 169 sub voice {
 170     parsetarget("voice", \$voice, @_);
 171 }
 172
 173 my %idmap;
 174 my %english;
 175 if($english) {
 176     # For the cases where the english file needs to be scanned/read, we do
 177     # it before we read the translated file. For -b it isn't necessary, but for
 178     # -u it is convenient.
 179
 180     my $idnum=0; # start with a true number
 181     my $vidnum=0x8000; # first voice id
 182     open(ENG, "<$english") || die "can't open $english";
 183     my @phrase;
 184     my $id;
 185     while(<ENG>) {
 186
 187         # get rid of DOS newlines
 188         $_ =~ s/\r//g;
 189
 190         if($_ =~ /^ *\<phrase\>/) {
 191             # this is the start of a phrase
 192         }
 193         elsif($_ =~ /^ *\<\/phrase\>/) {
 194             # this is the end of a phrase, add it to the english hash
 195             $english{$id}=join("", @phrase);
 196             undef @phrase;
 197         }
 198         elsif($_ ne "\n") {
 199             # gather everything related to this phrase
 200             push @phrase, $_;
 201         }
 202
 203         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 204             $id=$1;
 205             # voice-only entries get a difference range
 206             if($id =~ /^VOICE_/) {
 207                 # Assign an ID number to this entry
 208                 $idmap{$id}=$vidnum;
 209                 $vidnum++;
 210             }
 211             else {
 212                 # Assign an ID number to this entry
 213                 $idmap{$id}=$idnum;
 214                 $idnum++;
 215             }
 216         }
 217     }
 218     close(ENG);
 219 }
 220
 221 # a function that compares the english phrase with the translated one.
 222 # compare source strings and desc
 223
 224 # Then output the updated version!
 225 sub compare {
 226     my ($idstr, $engref, $locref)=@_;
 227     my ($edesc, $ldesc);
 228     my ($esource, $lsource);
 229     my $mode=0;
 230
 231     for my $l (@$engref) {
 232         if($l =~ /^ *desc: (.*)/) {
 233             $edesc=$1;
 234         }
 235         elsif($l =~ / *\<source\>/i) {
 236             $mode=1;
 237         }
 238         elsif($mode) {
 239             if($l =~ / *\<\/source\>/i) {
 240                 last;
 241             }
 242             $esource .= "$l\n";
 243         }
 244     }
 245
 246     my @show;
 247     my @source;
 248
 249     $mode = 0;
 250     for my $l (@$locref) {
 251         if($l =~ /^ *desc: (.*)/) {
 252             $ldesc=$1;
 253             if($edesc ne $ldesc) {
 254                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 255             }
 256             push @show, $l;
 257         }
 258         elsif($l =~ / *\<source\>/i) {
 259             $mode=1;
 260             push @show, $l;
 261         }
 262         elsif($mode) {
 263             if($l =~ / *\<\/source\>/i) {
 264                 $mode = 0;
 265                 print @show;
 266                 if($esource ne $lsource) {
 267                     print "### The <source> section differs from the english!\n",
 268                     "### the previously used one is commented below:\n";
 269                     for(split("\n", $lsource)) {
 270                         print "### $_\n";
 271                     }
 272                     print $esource;
 273                 }
 274                 else {
 275                     print $lsource;
 276                 }
 277                 undef @show; # start over
 278
 279                 push @show, $l;
 280             }
 281             else {
 282                 $lsource .= "$l";
 283             }
 284         }
 285         else {
 286             push @show, $l;
 287         }
 288     }
 289
 290
 291     print @show;
 292 }
 293
 294 my $idcount;        # counter for lang ID numbers
 295 my $voiceid=0x8000; # counter for voice-only ID numbers
 296
 297 #
 298 # Now start the scanning of the selected language string
 299 #
 300
 301 open(LANG, "<$input") || die "couldn't read language file named $input\n";
 302 my @phrase;
 303 while(<LANG>) {
 304
 305     $line++;
 306
 307     # get rid of DOS newlines
 308     $_ =~ s/\r//g;
 309
 310     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 311         # comment or empty line
 312         next;
 313     }
 314
 315     my $ll = $_;
 316
 317     # print "M: $m\n";
 318
 319     push @phrase, $ll;
 320
 321     # this is an XML-lookalike tag
 322     if (/^(<|[^\"<]+<)([^>]*)>/) {
 323         my $part = $2;
 324         # print "P: $part\n";
 325
 326         if($part =~ /^\//) {
 327             # this was a closing tag
 328
 329             if($part eq "/phrase") {
 330                 # closing the phrase
 331
 332                 my $idstr = $phrase{'id'};
 333                 my $idnum;
 334
 335                 if($dest =~ /^none\z/i) {
 336                     # "none" as dest (without quotes) means that this entire
 337                     # phrase is to be ignored
 338                 }
 339                 elsif(!$update) {
 340                     # we don't do the fully detailed analysis when we "update"
 341                     # since we don't do it for a particular target etc
 342
 343                     # allow the keyword 'deprecated' to be used on dest and
 344                     # voice strings to mark that as deprecated. It will then
 345                     # be replaced with "".
 346
 347                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 348                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 349
 350                     # basic syntax error alerts, if there are no quotes we
 351                     # will assume an empty string was intended
 352                     if($dest !~ /^\"/) {
 353                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 354                         $dest='""';
 355                     }
 356                     if($src !~ /^\"/) {
 357                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 358                         $src='""';
 359                     }
 360                     if($voice !~ /^\"/) {
 361                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 362                         $voice='""';
 363                     }
 364
 365                     # Use the ID name to figure out which id number range we
 366                     # should use for this phrase. Voice-only strings are
 367                     # separated.
 368
 369                     if($idstr =~ /^VOICE/) {
 370                         $idnum = $voiceid++;
 371                     }
 372                     else {
 373                         $idnum = $idcount++;
 374                     }
 375
 376                     $id{$idstr} = $idnum;
 377                     $idnum[$idnum]=$idstr;
 378
 379                     $source{$idstr}=$src;
 380                     $dest{$idstr}=$dest;
 381                     $voice{$idstr}=$voice;
 382
 383                     if($verbose) {
 384                         print "id: $phrase{id} ($idnum)\n";
 385                         print "source: $src\n";
 386                         print "dest: $dest\n";
 387                         print "voice: $voice\n";
 388                     }
 389
 390                     undef $src;
 391                     undef $dest;
 392                     undef $voice;
 393                     undef %phrase;
 394                 }
 395
 396                 if($update) {
 397                     my $e = $english{$idstr};
 398
 399                     if($e) {
 400                         # compare original english with this!
 401                         my @eng = split("\n", $english{$idstr});
 402
 403                         compare($idstr, \@eng, \@phrase);
 404
 405                         $english{$idstr}=""; # clear it
 406                     }
 407                     else {
 408                         print "### $idstr: The phrase is not used. Skipped\n";
 409                     }
 410                 }
 411                 undef @phrase;
 412
 413             } # end of </phrase>
 414
 415             # starts with a slash, this _ends_ this section
 416             $m = pop @m; # get back old value, the previous level's tag
 417             next;
 418         } # end of tag close
 419
 420         # This is an opening (sub) tag
 421
 422         push @m, $m; # store old value
 423         $m = $part;
 424         next;
 425     }
 426
 427     if(/^ *([^:]+): *(.*)/) {
 428         my ($name, $val)=($1, $2);
 429         &$m($_, $name, $val);
 430     }
 431 }
 432 close(LANG);
 433
 434 if($update) {
 435     my $any=0;
 436     for(keys %english) {
 437         if($english{$_}) {
 438             print "###\n",
 439             "### This phrase below was not present in the translated file\n",
 440             "<phrase>\n";
 441             print $english{$_};
 442             print "</phrase>\n";
 443         }
 444     }
 445 }
 446
 447 if($prefix) {
 448     # We create a .c and .h file
 449
 450     open(HFILE, ">$prefix.h") ||
 451         die "couldn't create file $prefix.h\n";
 452     open(CFILE, ">$prefix.c") ||
 453         die "couldn't create file $prefix.c\n";
 454
 455     print HFILE <<MOO
 456 /* This file was automatically generated using genlang */
 457 /*
 458  * The str() macro/functions is how to access strings that might be
 459  * translated. Use it like str(MACRO) and expect a string to be
 460  * returned!
 461  */
 462 #define str(x) language_strings[x]
 463
 464 /* this is the array for holding the string pointers.
 465    It will be initialized at runtime. */
 466 extern unsigned char *language_strings[];
 467 /* this contains the concatenation of all strings, separated by \\0 chars */
 468 extern const unsigned char language_builtin[];
 469
 470 /* The enum below contains all available strings */
 471 enum \{
 472 MOO
 473     ;
 474
 475     print CFILE <<MOO
 476 /* This file was automaticly generated using genlang, the strings come
 477    from "$input" */
 478
 479 #include "$prefix.h"
 480
 481 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 482 const unsigned char language_builtin[] =
 483 MOO
 484 ;
 485
 486     # Output the ID names for the enum in the header file
 487     my $i;
 488     for $i (1 .. $idcount) {
 489         my $name=$idnum[$i - 1]; # get the ID name
 490
 491         $name =~ s/\"//g; # cut off the quotes
 492
 493         printf HFILE ("    %s,\n", $name);
 494     }
 495
 496 # Output separation marker for last string ID and the upcoming voice IDs
 497
 498     print HFILE <<MOO
 499     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 500     /* --- below this follows voice-only strings --- */
 501     VOICEONLY_DELIMITER = 0x8000,
 502 MOO
 503     ;
 504
 505 # Output the ID names for the enum in the header file
 506     for $i (0x8000 .. ($voiceid-1)) {
 507         my $name=$idnum[$i]; # get the ID name
 508
 509         $name =~ s/\"//g; # cut off the quotes
 510
 511         printf HFILE ("    %s,\n", $name);
 512     }
 513
 514     # Output end of enum
 515     print HFILE "\n};\n/* end of generated enum list */\n";
 516
 517     # Output the target phrases for the source file
 518     for $i (1 .. $idcount) {
 519         my $name=$idnum[$i - 1]; # get the ID
 520         my $dest = $dest{$name}; # get the destination phrase
 521
 522         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 523
 524         if(!$dest) {
 525             # this is just to be on the safe side
 526             $dest = '"\0"';
 527         }
 528
 529         printf CFILE ("    %s\n", $dest);
 530     }
 531
 532 # Output end of string chunk
 533     print CFILE <<MOO
 534 ;
 535 /* end of generated string list */
 536 MOO
 537 ;
 538
 539     close(HFILE);
 540     close(CFILE);
 541 } # end of the c/h file generation
 542 elsif($binary) {
 543     # Creation of a binary lang file was requested
 544
 545     # We must first scan the english file to get the correct order of the id
 546     # numbers used there, as that is what sets the id order for all language
 547     # files. The english file is scanned before the translated file was
 548     # scanned.
 549
 550     open(OUTF, ">$binary") or die "Can't create $binary";
 551     binmode OUTF;
 552     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 553
 554     # loop over the target phrases
 555     for $i (1 .. $idcount) {
 556         my $name=$idnum[$i - 1]; # get the ID
 557         my $dest = $dest{$name}; # get the destination phrase
 558
 559         if($dest) {
 560             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 561
 562             # Now, make sure we get the number from the english sort order:
 563             $idnum = $idmap{$name};
 564
 565             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 566         }
 567     }
 568 }
 569 elsif($voiceout) {
 570     # voice output requested, display id: and voice: strings in a v1-like
 571     # fashion
 572
 573     my @engl;
 574
 575     # This loops over the strings in the translated language file order
 576     my @ids = ((0 .. ($idcount-1)));
 577     push @ids, (0x8000 .. ($voiceid-1));
 578
 579     #for my $id (@ids) {
 580     #    print "$id\n";
 581     #}
 582
 583     for $i (@ids) {
 584         my $name=$idnum[$i]; # get the ID
 585         my $dest = $voice{$name}; # get the destination voice string
 586
 587         if($dest) {
 588             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 589
 590             # Now, make sure we get the number from the english sort order:
 591             $idnum = $idmap{$name};
 592
 593             $engl[$idnum] = $i;
 594
 595            # print "Input index $i output index $idnum\n";
 596
 597         }
 598     }
 599     for my $i (@ids) {
 600
 601         my $o = $engl[$i];
 602
 603         my $name=$idnum[$o]; # get the ID
 604         my $dest = $voice{$name}; # get the destination voice string
 605
 606         print "#$i\nid: $name\nvoice: $dest\n";
 607     }
 608
 609 }
 610
 611
 612 if($verbose) {
 613     printf("%d ID strings scanned\n", $idcount);
 614
 615     print "* head *\n";
 616     for(keys %head) {
 617         printf "$_: %s\n", $head{$_};
 618     }
 619 }
 620