tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file named [outfile].
  32     The use of this option requires that you also use -e, -t and -i.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b, -u or -s.
  42
  43  -s
  44     Sort the Update language file in the same order as the strings in the
  45     English file.
  46
  47  -t=<target>
  48     Specify which target you want the translations/phrases for. Required when
  49     -b or -p is used.
  50
  51     The target can in fact be specified as numerous different strings,
  52     separated with colons. This will make genlang to use all the specified
  53     strings when searching for a matching phrase.
  54
  55  -i=<target id>
  56     The target id number, needed for -b.
  57
  58  -o
  59     Voice mode output. Outputs all id: and voice: lines for the given target!
  60
  61  -v
  62     Enables verbose (debug) output.
  63 MOO
  64 ;
  65     exit;
  66 }
  67
  68 # How update works:
  69 #
  70 # 1) scan the english file, keep the whole <phrase> for each phrase.
  71 # 2) read the translated file, for each end of phrase, compare:
  72 #  A) all source strings, if there's any change there should be a comment about
  73 #     it output
  74 #  B) the desc fields
  75 #
  76 # 3) output the phrase with the comments from above
  77 # 4) check which phrases that the translated version didn't have, and spit out
  78 #    the english version of those
  79 #
  80
  81 my $prefix = $p;
  82 my $binary = $b;
  83 my $update = $u;
  84 my $sortfile = $s;
  85
  86 my $english = $e;
  87 my $voiceout = $o;
  88
  89 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
  90
  91 if($check > 1) {
  92     print "Please use only one of -p, -u, -o, -b and -s\n";
  93     exit;
  94 }
  95 if(!$check) {
  96     print "Please use at least one of -p, -u, -o, -b and -s\n";
  97     exit;
  98 }
  99
 100
 101 if(($binary || $update || $voiceout || $sortfile) && !$english) {
 102     print "Please use -e too when you use -b, -o, -u or -s\n";
 103     exit;
 104 }
 105
 106 my $target_id = $i;
 107 if($binary && !$target_id) {
 108     print "Please specify a target id number (with -i)!\n";
 109     exit;
 110 }
 111
 112 my $target = $t;
 113 if(!$target && !$update && !$sortfile) {
 114     print "Please specify a target (with -t)!\n";
 115     exit;
 116 }
 117 my $verbose=$v;
 118
 119 my %id; # string to num hash
 120 my @idnum; # num to string array
 121
 122 my %allphrases;  # For sorting - an array of the <phrase> elements
 123 my %source; # id string to source phrase hash
 124 my %dest; # id string to dest phrase hash
 125 my %voice; # id string to voice phrase hash
 126
 127 my $input = $ARGV[0];
 128
 129 my @m;
 130 my $m="blank";
 131
 132 sub trim {
 133     my ($string) = @_;
 134     $string =~ s/^\s+//;
 135     $string =~ s/\s+$//;
 136     return $string;
 137 }
 138
 139 sub match {
 140     my ($string, $pattern)=@_;
 141
 142     $pattern =~ s/\*/.*/g;
 143     $pattern =~ s/\?/./g;
 144
 145     return ($string =~ /^$pattern\z/);
 146 }
 147
 148 sub blank {
 149     # nothing to do
 150 }
 151
 152 my %head;
 153 sub header {
 154     my ($full, $n, $v)=@_;
 155     $head{$n}=$v;
 156 }
 157
 158 my %phrase;
 159 sub phrase {
 160     my ($full, $n, $v)=@_;
 161     $phrase{$n}=$v;
 162 }
 163
 164 sub parsetarget {
 165     my ($debug, $strref, $full, $n, $v)=@_;
 166     my $string;
 167     my @all= split(" *, *", $n);
 168     my $test;
 169     for $test (@all) {
 170 #        print "TEST ($debug) $target for $test\n";
 171         for my $part (split(":", $target)) {
 172             if(match($part, $test)) {
 173                 $string = $v;
 174 #                print "MATCH: $test => $v\n";
 175                 $$strref = $string;
 176                 return $string;
 177             }
 178         }
 179     }
 180 }
 181
 182 my $src;
 183 sub source {
 184     parsetarget("src", \$src, @_);
 185 }
 186
 187 my $dest;
 188 sub dest {
 189     parsetarget("dest", \$dest, @_);
 190 }
 191
 192 my $voice;
 193 sub voice {
 194     parsetarget("voice", \$voice, @_);
 195 }
 196
 197 my %idmap;
 198 my %english;
 199 if($english) {
 200     # For the cases where the english file needs to be scanned/read, we do
 201     # it before we read the translated file. For -b it isn't necessary, but for
 202     # -u it is convenient.
 203
 204     my $idnum=0; # start with a true number
 205     my $vidnum=0x8000; # first voice id
 206     open(ENG, "<$english") || die "Error: can't open $english";
 207     my @phrase;
 208     my $id;
 209     my $maybeid;
 210     my $withindest;
 211     my $numphrases = 0;
 212     while(<ENG>) {
 213
 214         # get rid of DOS newlines
 215         $_ =~ s/\r//g;
 216
 217         if($_ =~ /^ *\<phrase\>/) {
 218             # this is the start of a phrase
 219         }
 220         elsif($_ =~ /^ *\<\/phrase\>/) {
 221
 222             # if id is something, when we count and store this phrase
 223             if($id) {
 224                 # voice-only entries get a difference range
 225                 if($id =~ /^VOICE_/) {
 226                     # Assign an ID number to this entry
 227                     $idmap{$id}=$vidnum;
 228                     $vidnum++;
 229                 }
 230                 else {
 231                     # Assign an ID number to this entry
 232                     $idmap{$id}=$idnum;
 233                     $idnum++;
 234    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 235                 }
 236
 237                 # this is the end of a phrase, add it to the english hash
 238                 $english{$id}=join("", @phrase);
 239             }
 240             undef @phrase;
 241             $id="";
 242         }
 243         elsif($_ ne "\n") {
 244             # gather everything related to this phrase
 245             push @phrase, $_;
 246             if($_ =~ /^ *\<dest\>/i) {
 247                 $withindest=1;
 248                 $deststr="";
 249             }
 250             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 251                 $withindest=0;
 252
 253                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 254                     # we unconditionally always use all IDs when the "update"
 255                     # feature is used
 256                     $id = $maybeid;
 257     #                print "DEST: use this id $id\n";
 258                 }
 259                 else {
 260     #                print "skip $maybeid for $name\n";
 261                 }
 262             }
 263             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 264                 my ($name, $val)=($1, $2);
 265                 $dest=""; # in case it is left untouched for when the
 266                 # model name isn't "our"
 267                 dest($_, $name, $val);
 268
 269                 if($dest) {
 270                     # Store the current dest string. If this target matches
 271                     # multiple strings, it will get updated several times.
 272                     $deststr = $dest;
 273                 }
 274             }
 275         }
 276
 277         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 278             $maybeid=$1;
 279             $sortorder{$maybeid}=$numphrases++;
 280         }
 281     }
 282     close(ENG);
 283 }
 284
 285 # a function that compares the english phrase with the translated one.
 286 # compare source strings and desc
 287
 288 # Then output the updated version!
 289 sub compare {
 290     my ($idstr, $engref, $locref)=@_;
 291     my ($edesc, $ldesc);
 292     my ($esource, $lsource);
 293     my $mode=0;
 294
 295     for my $l (@$engref) {
 296         if($l =~ /^ *#/) {
 297             # comment
 298             next;
 299         }
 300         if($l =~ /^ *desc: (.*)/) {
 301             $edesc=$1;
 302         }
 303         elsif($l =~ / *\<source\>/i) {
 304             $mode=1;
 305         }
 306         elsif($mode) {
 307             if($l =~ / *\<\/source\>/i) {
 308                 last;
 309             }
 310             $esource .= "$l\n";
 311         }
 312     }
 313
 314     my @show;
 315     my @source;
 316
 317     $mode = 0;
 318     for my $l (@$locref) {
 319         if($l =~ /^ *desc: (.*)/) {
 320             $ldesc=$1;
 321             if(trim($edesc) ne trim($ldesc)) {
 322                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 323             }
 324             push @show, $l;
 325         }
 326         elsif($l =~ / *\<source\>/i) {
 327             $mode=1;
 328             push @show, $l;
 329         }
 330         elsif($mode) {
 331             if($l =~ / *\<\/source\>/i) {
 332                 $mode = 0;
 333                 print @show;
 334                 if(trim($esource) ne trim($lsource)) {
 335                     print "### The <source> section differs from the english!\n",
 336                     "### the previously used one is commented below:\n";
 337                     for(split("\n", $lsource)) {
 338                         print "### $_\n";
 339                     }
 340                     print $esource;
 341                 }
 342                 else {
 343                     print $lsource;
 344                 }
 345                 undef @show; # start over
 346
 347                 push @show, $l;
 348             }
 349             else {
 350                 $lsource .= "$l";
 351             }
 352         }
 353         else {
 354             push @show, $l;
 355         }
 356     }
 357
 358
 359     print @show;
 360 }
 361
 362 my $idcount;        # counter for lang ID numbers
 363 my $voiceid=0x8000; # counter for voice-only ID numbers
 364
 365 #
 366 # Now start the scanning of the selected language string
 367 #
 368
 369 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
 370 my @phrase;
 371 my $header = 1;
 372 while(<LANG>) {
 373
 374     $line++;
 375
 376     # get rid of DOS newlines
 377     $_ =~ s/\r//g;
 378
 379     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 380         # comment or empty line - output it if it's part of the header
 381         if ($header and ($update || $sortfile)) {
 382             print($_);
 383         }
 384         next;
 385     }
 386     $header = 0;
 387
 388     my $ll = $_;
 389
 390     # print "M: $m\n";
 391
 392     push @phrase, $ll;
 393
 394     # this is an XML-lookalike tag
 395     if (/^(<|[^\"<]+<)([^>]*)>/) {
 396         my $part = $2;
 397         # print "P: $part\n";
 398
 399         if($part =~ /^\//) {
 400             # this was a closing tag
 401
 402             if($part eq "/phrase") {
 403                 # closing the phrase
 404
 405                 my $idstr = $phrase{'id'};
 406                 my $idnum;
 407
 408                 if($binary && !$english{$idstr}) {
 409                     # $idstr doesn't exist for english, skip it\n";
 410                 }
 411                 elsif($dest =~ /^none\z/i) {
 412                     # "none" as dest (without quotes) means that this entire
 413                     # phrase is to be ignored
 414                 }
 415                 elsif($sortfile) {
 416                     $allphrases{$idstr}=join('',@phrase);
 417                 }
 418                 elsif(!$update) {
 419                     # we don't do the fully detailed analysis when we "update"
 420                     # since we don't do it for a particular target etc
 421
 422                     # allow the keyword 'deprecated' to be used on dest and
 423                     # voice strings to mark that as deprecated. It will then
 424                     # be replaced with "".
 425
 426                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 427                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 428
 429                     # basic syntax error alerts, if there are no quotes we
 430                     # will assume an empty string was intended
 431                     if($dest !~ /^\"/) {
 432                         print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
 433                         $dest='""';
 434                     }
 435                     if($src !~ /^\"/) {
 436                         print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
 437                         $src='""';
 438                     }
 439                     if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
 440                         print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
 441                         $voice='""';
 442                     }
 443                     if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
 444                         print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
 445                     }
 446
 447                     # Use the ID name to figure out which id number range we
 448                     # should use for this phrase. Voice-only strings are
 449                     # separated.
 450
 451                     if($idstr =~ /^VOICE/) {
 452                         $idnum = $voiceid++;
 453                     }
 454                     else {
 455                         $idnum = $idcount++;
 456                     }
 457
 458                     $id{$idstr} = $idnum;
 459                     $idnum[$idnum]=$idstr;
 460
 461                     $source{$idstr}=$src;
 462                     $dest{$idstr}=$dest;
 463                     $voice{$idstr}=$voice;
 464
 465                     if($verbose) {
 466                         print "id: $phrase{id} ($idnum)\n";
 467                         print "source: $src\n";
 468                         print "dest: $dest\n";
 469                         print "voice: $voice\n";
 470                     }
 471
 472                     undef $src;
 473                     undef $dest;
 474                     undef $voice;
 475                     undef %phrase;
 476                 }
 477
 478                 if($update) {
 479                     my $e = $english{$idstr};
 480
 481                     if($e) {
 482                         # compare original english with this!
 483                         my @eng = split("\n", $english{$idstr});
 484
 485                         compare($idstr, \@eng, \@phrase);
 486
 487                         $english{$idstr}=""; # clear it
 488                     }
 489                     else {
 490                         print "### $idstr: The phrase is not used. Skipped\n";
 491                     }
 492                 }
 493                 undef @phrase;
 494
 495             } # end of </phrase>
 496
 497             # starts with a slash, this _ends_ this section
 498             $m = pop @m; # get back old value, the previous level's tag
 499             next;
 500         } # end of tag close
 501
 502         # This is an opening (sub) tag
 503
 504         push @m, $m; # store old value
 505         $m = $part;
 506         next;
 507     }
 508
 509     if(/^ *([^:]+): *(.*)/) {
 510         my ($name, $val)=($1, $2);
 511         &$m($_, $name, $val);
 512     }
 513 }
 514 close(LANG);
 515
 516 if($update) {
 517     my $any=0;
 518     for(keys %english) {
 519         if($english{$_}) {
 520             print "###\n",
 521             "### This phrase below was not present in the translated file\n",
 522             "<phrase>\n";
 523             print $english{$_};
 524             print "</phrase>\n";
 525         }
 526     }
 527 }
 528
 529 if ($sortfile) {
 530     for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
 531          print $allphrases{$_};
 532     }
 533 }
 534
 535 if($prefix) {
 536     # We create a .c and .h file
 537
 538     open(HFILE, ">$prefix.h") ||
 539         die "Error: couldn't create file $prefix.h\n";
 540     open(CFILE, ">$prefix.c") ||
 541         die "Error: couldn't create file $prefix.c\n";
 542
 543     print HFILE <<MOO
 544 /* This file was automatically generated using genlang */
 545 /*
 546  * The str() macro/functions is how to access strings that might be
 547  * translated. Use it like str(MACRO) and expect a string to be
 548  * returned!
 549  */
 550 #define str(x) language_strings[x]
 551
 552 /* this is the array for holding the string pointers.
 553    It will be initialized at runtime. */
 554 extern unsigned char *language_strings[];
 555 /* this contains the concatenation of all strings, separated by \\0 chars */
 556 extern const unsigned char language_builtin[];
 557
 558 /* The enum below contains all available strings */
 559 enum \{
 560 MOO
 561     ;
 562
 563     print CFILE <<MOO
 564 /* This file was automaticly generated using genlang, the strings come
 565    from "$input" */
 566
 567 #include "$prefix.h"
 568
 569 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 570 const unsigned char language_builtin[] =
 571 MOO
 572 ;
 573
 574     # Output the ID names for the enum in the header file
 575     my $i;
 576     for $i (1 .. $idcount) {
 577         my $name=$idnum[$i - 1]; # get the ID name
 578
 579         $name =~ s/\"//g; # cut off the quotes
 580
 581         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 582     }
 583
 584 # Output separation marker for last string ID and the upcoming voice IDs
 585
 586     print HFILE <<MOO
 587     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 588     /* --- below this follows voice-only strings --- */
 589     VOICEONLY_DELIMITER = 0x8000,
 590 MOO
 591     ;
 592
 593 # Output the ID names for the enum in the header file
 594     for $i (0x8000 .. ($voiceid-1)) {
 595         my $name=$idnum[$i]; # get the ID name
 596
 597         $name =~ s/\"//g; # cut off the quotes
 598
 599         printf HFILE ("    %s,\n", $name);
 600     }
 601
 602     # Output end of enum
 603     print HFILE "\n};\n/* end of generated enum list */\n";
 604
 605     # Output the target phrases for the source file
 606     for $i (1 .. $idcount) {
 607         my $name=$idnum[$i - 1]; # get the ID
 608         my $dest = $dest{$name}; # get the destination phrase
 609
 610         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 611
 612         if(!$dest) {
 613             # this is just to be on the safe side
 614             $dest = '"\0"';
 615         }
 616
 617         printf CFILE ("    %s\n", $dest);
 618     }
 619
 620 # Output end of string chunk
 621     print CFILE <<MOO
 622 ;
 623 /* end of generated string list */
 624 MOO
 625 ;
 626
 627     close(HFILE);
 628     close(CFILE);
 629 } # end of the c/h file generation
 630 elsif($binary) {
 631     # Creation of a binary lang file was requested
 632
 633     # We must first scan the english file to get the correct order of the id
 634     # numbers used there, as that is what sets the id order for all language
 635     # files. The english file is scanned before the translated file was
 636     # scanned.
 637
 638     open(OUTF, ">$binary") or die "Error: Can't create $binary";
 639     binmode OUTF;
 640     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 641
 642     # loop over the target phrases
 643     for $i (1 .. $idcount) {
 644         my $name=$idnum[$i - 1]; # get the ID
 645         my $dest = $dest{$name}; # get the destination phrase
 646
 647         if($dest) {
 648             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 649
 650             # Now, make sure we get the number from the english sort order:
 651             $idnum = $idmap{$name};
 652
 653             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 654         }
 655     }
 656 }
 657 elsif($voiceout) {
 658     # voice output requested, display id: and voice: strings in a v1-like
 659     # fashion
 660
 661     my @engl;
 662
 663     # This loops over the strings in the translated language file order
 664     my @ids = ((0 .. ($idcount-1)));
 665     push @ids, (0x8000 .. ($voiceid-1));
 666
 667     #for my $id (@ids) {
 668     #    print "$id\n";
 669     #}
 670
 671     for $i (@ids) {
 672         my $name=$idnum[$i]; # get the ID
 673         my $dest = $voice{$name}; # get the destination voice string
 674
 675         if($dest) {
 676             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 677
 678             # Now, make sure we get the number from the english sort order:
 679             $idnum = $idmap{$name};
 680
 681             if(length($idnum)) {
 682                 $engl[$idnum] = $i;
 683
 684                 #print "Input index $i output index $idnum\n";
 685             }
 686             else {
 687                 # not used, mark it so
 688                 $engl[$i] = -1
 689             }
 690
 691         }
 692     }
 693     for my $i (@ids) {
 694
 695         my $o = $engl[$i];
 696
 697         if(($o < 0) || !length($o)) {
 698             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 699             next;
 700         }
 701
 702         my $name=$idnum[$o]; # get the ID
 703         my $dest = $voice{$name}; # get the destination voice string
 704
 705         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 706     }
 707
 708 }
 709
 710
 711 if($verbose) {
 712     printf("%d ID strings scanned\n", $idcount);
 713
 714     print "* head *\n";
 715     for(keys %head) {
 716         printf "$_: %s\n", $head{$_};
 717     }
 718 }
 719