tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
  11 #
  12
  13 # See apps/language.c (TODO: Use common include for both)
  14 # Cookie and binary version for the binary lang file
  15 my $LANGUAGE_COOKIE   = 0x1a;
  16 my $LANGUAGE_VERSION  = 0x05;
  17 my $LANGUAGE_FLAG_RTL = 0x01;
  18
  19 # A note for future users and readers: The original v1 language system allowed
  20 # the build to create and use a different language than english built-in. We
  21 # removed that feature from our build-system, but the build scripts still had
  22 # the ability. But, starting now, this ability is no longer provided since I
  23 # figured it was boring and unnecessary to write support for now since we
  24 # don't use it anymore.
  25
  26 my %user2num =
  27   ('core' => 1);
  28
  29
  30 if(!$ARGV[0]) {
  31     print <<MOO
  32 Usage: genlang [options] <langv2 file>
  33
  34  -p=<prefix>
  35     Make the tool create a [prefix].c and [prefix].h file.
  36
  37  -b=<outfile>
  38     Make the tool create a binary language (.lng) file named [outfile].
  39     The use of this option requires that you also use -e, -t and -i.
  40
  41  -u
  42     Update language file. Given the translated file and the most recent english
  43     file, you\'ll get an updated version sent to stdout. Suitable action to do
  44     when you intend to update a translation.
  45
  46  -e=<english lang file>
  47     Point out the english (original source) file, to use that as master
  48     language template. Used in combination with -b, -u or -s.
  49
  50  -s
  51     Sort the Update language file in the same order as the strings in the
  52     English file.
  53
  54  -t=<target>
  55     Specify which target you want the translations/phrases for. Required when
  56     -b or -p is used.
  57
  58     The target can in fact be specified as numerous different strings,
  59     separated with colons. This will make genlang to use all the specified
  60     strings when searching for a matching phrase.
  61
  62  -i=<target id>
  63     The target id number, needed for -b.
  64
  65  -o
  66     Voice mode output. Outputs all id: and voice: lines for the given target!
  67
  68  -v
  69     Enables verbose (debug) output.
  70 MOO
  71 ;
  72     exit;
  73 }
  74
  75 # How update works:
  76 #
  77 # 1) scan the english file, keep the whole <phrase> for each phrase.
  78 # 2) read the translated file, for each end of phrase, compare:
  79 #  A) all source strings, if there's any change there should be a comment about
  80 #     it output
  81 #  B) the desc fields
  82 #
  83 # 3) output the phrase with the comments from above
  84 # 4) check which phrases that the translated version didn't have, and spit out
  85 #    the english version of those
  86 #
  87
  88 my $prefix = $p;
  89 my $binary = $b;
  90 my $update = $u;
  91 my $sortfile = $s;
  92
  93 my $english = $e;
  94 my $voiceout = $o;
  95
  96 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
  97
  98 if($check > 1) {
  99     print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
 100     exit;
 101 }
 102 if(!$check) {
 103     print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
 104     exit;
 105 }
 106
 107
 108 if(($binary || $update || $voiceout || $sortfile) && !$english) {
 109     print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
 110     exit;
 111 }
 112
 113 my $target_id = $i;
 114 if($binary && !$target_id) {
 115     print STDERR "Please specify a target id number (with -i)!\n";
 116     exit;
 117 }
 118
 119 my $target = $t;
 120 if(!$target && !$update && !$sortfile) {
 121     print STDERR "Please specify a target (with -t)!\n";
 122     exit;
 123 }
 124 my $verbose=$v;
 125
 126 my %id; # string to num hash
 127 my @idnum; # num to string array
 128
 129 my %allphrases;  # For sorting - an array of the <phrase> elements
 130 my %source; # id string to source phrase hash
 131 my %dest; # id string to dest phrase hash
 132 my %voice; # id string to voice phrase hash
 133
 134 my $input = $ARGV[0];
 135
 136 my @m;
 137 my $m="blank";
 138
 139 sub trim {
 140     my ($string) = @_;
 141     $string =~ s/^\s+//;
 142     $string =~ s/\s+$//;
 143     return $string;
 144 }
 145
 146 sub match {
 147     my ($string, $pattern)=@_;
 148
 149     $pattern =~ s/\*/.*/g;
 150     $pattern =~ s/\?/./g;
 151
 152     return ($string =~ /^$pattern\z/);
 153 }
 154
 155 sub blank {
 156     # nothing to do
 157 }
 158
 159 my %head;
 160 sub header {
 161     my ($full, $n, $v)=@_;
 162     $head{$n}=$v;
 163 }
 164
 165 my %phrase;
 166 sub phrase {
 167     my ($full, $n, $v)=@_;
 168     $phrase{$n}=$v;
 169 }
 170
 171 my %options;
 172 sub options {
 173     my ($full, $n, $v)=@_;
 174     $options{$n}=$v;
 175 }
 176
 177 sub parsetarget {
 178     my ($debug, $strref, $full, $n, $v)=@_;
 179     my $string;
 180     my @all= split(" *, *", $n);
 181     my $test;
 182     for $test (@all) {
 183 #        print "TEST ($debug) $target for $test\n";
 184         for my $part (split(":", $target)) {
 185             if(match($part, $test)) {
 186                 $string = $v;
 187 #                print "MATCH: $test => $v\n";
 188                 $$strref = $string;
 189                 return $string;
 190             }
 191         }
 192     }
 193 }
 194
 195 my $src;
 196 sub source {
 197     parsetarget("src", \$src, @_);
 198 }
 199
 200 my $dest;
 201 sub dest {
 202     parsetarget("dest", \$dest, @_);
 203 }
 204
 205 my $voice;
 206 sub voice {
 207     parsetarget("voice", \$voice, @_);
 208 }
 209
 210 my %idmap;
 211 my %english;
 212 if($english) {
 213     # For the cases where the english file needs to be scanned/read, we do
 214     # it before we read the translated file. For -b it isn't necessary, but for
 215     # -u it is convenient.
 216
 217     my $idnum=0; # start with a true number
 218     my $vidnum=0x8000; # first voice id
 219     open(ENG, "<$english") || die "Error: can't open $english";
 220     my @phrase;
 221     my $id;
 222     my $maybeid;
 223     my $user;
 224     my $maybeuser;
 225     my $withindest;
 226     my $numphrases = 0;
 227     while(<ENG>) {
 228
 229         # get rid of DOS newlines
 230         $_ =~ s/\r//g;
 231
 232         if($_ =~ /^ *\<phrase\>/) {
 233             # this is the start of a phrase
 234         }
 235         elsif($_ =~ /^ *\<\/phrase\>/) {
 236
 237             # if id is something, when we count and store this phrase
 238             if($id) {
 239                 # voice-only entries get a difference range
 240                 if($id =~ /^VOICE_/) {
 241                     # Assign an ID number to this entry
 242                     $idmap{$id}=$vidnum;
 243                     $vidnum++;
 244                 }
 245                 else {
 246                     # Assign an ID number to this entry
 247                     $idmap{$id}=$idnum;
 248                     $idnum++;
 249    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 250                 }
 251
 252                 # this is the end of a phrase, add it to the english hash
 253                 $english{$id}=join("", @phrase);
 254             }
 255             undef @phrase;
 256             $id="";
 257         }
 258         elsif($_ ne "\n") {
 259             # gather everything related to this phrase
 260             push @phrase, $_;
 261             if($_ =~ /^ *\<dest\>/i) {
 262                 $withindest=1;
 263                 $deststr="";
 264             }
 265             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 266                 $withindest=0;
 267
 268                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 269                     # we unconditionally always use all IDs when the "update"
 270                     # feature is used
 271                     $id = $maybeid;
 272                     $user = $user2num{$maybeuser};
 273     #                print "DEST: use this id $id\n";
 274                 }
 275                 else {
 276     #                print "skip $maybeid for $name\n";
 277                 }
 278             }
 279             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 280                 my ($name, $val)=($1, $2);
 281                 $dest=""; # in case it is left untouched for when the
 282                 # model name isn't "our"
 283                 dest($_, $name, $val);
 284
 285                 if($dest) {
 286                     # Store the current dest string. If this target matches
 287                     # multiple strings, it will get updated several times.
 288                     $deststr = $dest;
 289                 }
 290             }
 291         }
 292
 293         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 294             $maybeid=$1;
 295             $sortorder{$maybeid}=$numphrases++;
 296         }
 297         if($_ =~ /^ *user: ([^ \t\n]+)/i) {
 298             $maybeuser=$1;
 299         }
 300     }
 301     close(ENG);
 302 }
 303
 304 # a function that compares the english phrase with the translated one.
 305 # compare source strings and desc
 306
 307 # Then output the updated version!
 308 sub compare {
 309     my ($idstr, $engref, $locref)=@_;
 310     my ($edesc, $ldesc);
 311     my ($esource, $lsource);
 312     my $mode=0;
 313
 314     for my $l (@$engref) {
 315         if($l =~ /^ *#/) {
 316             # comment
 317             next;
 318         }
 319         if($l =~ /^ *desc: (.*)/) {
 320             $edesc=$1;
 321         }
 322         elsif($l =~ / *\<source\>/i) {
 323             $mode=1;
 324         }
 325         elsif($mode) {
 326             if($l =~ / *\<\/source\>/i) {
 327                 last;
 328             }
 329             $esource .= "$l\n";
 330         }
 331     }
 332
 333     my @show;
 334     my @source;
 335
 336     $mode = 0;
 337     for my $l (@$locref) {
 338         if($l =~ /^ *desc: (.*)/) {
 339             $ldesc=$1;
 340             if(trim($edesc) ne trim($ldesc)) {
 341                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 342             }
 343             push @show, $l;
 344         }
 345         elsif($l =~ / *\<source\>/i) {
 346             $mode=1;
 347             push @show, $l;
 348         }
 349         elsif($mode) {
 350             if($l =~ / *\<\/source\>/i) {
 351                 $mode = 0;
 352                 print @show;
 353                 if(trim($esource) ne trim($lsource)) {
 354                     print "### The <source> section differs from the english!\n",
 355                     "### the previously used one is commented below:\n";
 356                     for(split("\n", $lsource)) {
 357                         print "### $_\n";
 358                     }
 359                     print $esource;
 360                 }
 361                 else {
 362                     print $lsource;
 363                 }
 364                 undef @show; # start over
 365
 366                 push @show, $l;
 367             }
 368             else {
 369                 $lsource .= "$l";
 370             }
 371         }
 372         else {
 373             push @show, $l;
 374         }
 375     }
 376
 377
 378     print @show;
 379 }
 380
 381 my $idcount;        # counter for lang ID numbers
 382 my $voiceid=0x8000; # counter for voice-only ID numbers
 383
 384 #
 385 # Now start the scanning of the selected language string
 386 #
 387
 388 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
 389 my @phrase;
 390 my $header = 1;
 391 my $langoptions = 0;
 392
 393 while(<LANG>) {
 394
 395     $line++;
 396
 397     # get rid of DOS newlines
 398     $_ =~ s/\r//g;
 399
 400     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 401         # comment or empty line - output it if it's part of the header
 402         if ($header and ($update || $sortfile)) {
 403             print($_);
 404         }
 405         next;
 406     }
 407     $header = 0;
 408
 409     my $ll = $_;
 410
 411     # print "M: $m\n";
 412
 413     push @phrase, $ll;
 414
 415     # this is an XML-lookalike tag
 416     if (/^(<|[^\"<]+<)([^>]*)>/) {
 417         my $part = $2;
 418         # print "P: $part\n";
 419
 420         if($part =~ /^\//) {
 421             # this was a closing tag
 422
 423             if($part eq "/phrase") {
 424                 # closing the phrase
 425
 426                 my $idstr = $phrase{'id'};
 427                 my $idnum;
 428
 429                 if($binary && !$english{$idstr}) {
 430                     # $idstr doesn't exist for english, skip it\n";
 431                 }
 432                 elsif($dest =~ /^none\z/i) {
 433                     # "none" as dest (without quotes) means that this entire
 434                     # phrase is to be ignored
 435                 }
 436                 elsif($sortfile) {
 437                     $allphrases{$idstr}=join('',@phrase);
 438                 }
 439                 elsif(!$update) {
 440                     # we don't do the fully detailed analysis when we "update"
 441                     # since we don't do it for a particular target etc
 442
 443                     # allow the keyword 'deprecated' to be used on dest and
 444                     # voice strings to mark that as deprecated. It will then
 445                     # be replaced with "".
 446
 447                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 448                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 449
 450                     # basic syntax error alerts, if there are no quotes we
 451                     # will assume an empty string was intended
 452                     if($dest !~ /^\"/) {
 453                         print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
 454                         $dest='""';
 455                     }
 456                     if($src !~ /^\"/) {
 457                         print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
 458                         $src='""';
 459                     }
 460                     if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
 461                         print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
 462                         $voice='""';
 463                     }
 464                     if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
 465                         print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
 466                     }
 467
 468                     my $user = $user2num{trim($phrase{'user'})};
 469                     if(!$user) {
 470                         print STDERR "$input:$line:1: warning: unknown user!\n";
 471                         $user = 1;
 472                     }
 473
 474                     # Use the ID name to figure out which id number range we
 475                     # should use for this phrase. Voice-only strings are
 476                     # separated.
 477
 478                     if($idstr =~ /^VOICE/) {
 479                         $idnum = $voiceid++;
 480                     }
 481                     else {
 482                         $idnum = $idcount++;
 483                     }
 484
 485                     $id{$idstr} = $idnum;
 486                     $idnum[$idnum]=$idstr;
 487
 488                     $source{$idstr}=$src;
 489                     $dest{$idstr}=$dest;
 490                     $voice{$idstr}=$voice;
 491
 492                     if($verbose) {
 493                         print "id: $phrase{id} ($idnum)\n";
 494                         print "source: $src\n";
 495                         print "dest: $dest\n";
 496                         print "voice: $voice\n";
 497                         print "user: $user\n";
 498                     }
 499
 500                     undef $src;
 501                     undef $dest;
 502                     undef $voice;
 503                     undef $user;
 504                     undef %phrase;
 505                 }
 506
 507                 if($update) {
 508                     my $e = $english{$idstr};
 509
 510                     if($e) {
 511                         # compare original english with this!
 512                         my @eng = split("\n", $english{$idstr});
 513
 514                         compare($idstr, \@eng, \@phrase);
 515
 516                         $english{$idstr}=""; # clear it
 517                     }
 518                     else {
 519                         print "### $idstr: The phrase is not used. Skipped\n";
 520                     }
 521                 }
 522             } # end of </phrase>
 523             elsif($part eq "/options") {
 524                 # closing the options
 525                 if ($options{'rtl'}) {
 526                     $langoptions |= $LANGUAGE_FLAG_RTL;
 527                 }
 528             } # end of </options>
 529
 530            undef @phrase;
 531
 532             # starts with a slash, this _ends_ this section
 533             $m = pop @m; # get back old value, the previous level's tag
 534             next;
 535         } # end of tag close
 536
 537         # This is an opening (sub) tag
 538
 539         push @m, $m; # store old value
 540         $m = $part;
 541         next;
 542     }
 543
 544     if(/^ *([^:]+): *(.*)/) {
 545         my ($name, $val)=($1, $2);
 546         &$m($_, $name, $val);
 547     }
 548 }
 549 close(LANG);
 550
 551 if($update) {
 552     my $any=0;
 553     for(keys %english) {
 554         if($english{$_}) {
 555             print "###\n",
 556             "### This phrase below was not present in the translated file\n",
 557             "<phrase>\n";
 558             print $english{$_};
 559             print "</phrase>\n";
 560         }
 561     }
 562 }
 563
 564 if ($sortfile) {
 565     for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
 566          print $allphrases{$_};
 567     }
 568 }
 569
 570 if($prefix) {
 571     # We create a .c and .h file
 572
 573     open(HFILE, ">$prefix/lang.h") ||
 574         die "Error: couldn't create file $prefix/lang.h\n";
 575     open(CFILE, ">$prefix/lang_core.c") ||
 576         die "Error: couldn't create file $prefix/lang_core.c\n";
 577
 578    # get header file name
 579    $headername = "$prefix/lang.h";
 580    $headername =~ s/(.*\/)*//;
 581
 582     print HFILE <<MOO
 583 /* This file was automatically generated using genlang */
 584 /*
 585  * The str() macro/functions is how to access strings that might be
 586  * translated. Use it like str(MACRO) and expect a string to be
 587  * returned!
 588  */
 589 #define str(x) language_strings[x]
 590
 591 /* this is the array for holding the string pointers.
 592    It will be initialized at runtime. */
 593 extern unsigned char *language_strings[];
 594 /* this contains the concatenation of all strings, separated by \\0 chars */
 595 extern const unsigned char language_builtin[];
 596
 597 /* The enum below contains all available strings */
 598 enum \{
 599 MOO
 600     ;
 601
 602     print CFILE <<MOO
 603 /* This file was automaticly generated using genlang, the strings come
 604    from "$input" */
 605
 606 #include "$headername"
 607
 608 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 609 const unsigned char language_builtin[] =
 610 MOO
 611 ;
 612
 613     # Output the ID names for the enum in the header file
 614     my $i;
 615     for $i (1 .. $idcount) {
 616         my $name=$idnum[$i - 1]; # get the ID name
 617
 618         $name =~ s/\"//g; # cut off the quotes
 619
 620         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 621     }
 622
 623 # Output separation marker for last string ID and the upcoming voice IDs
 624
 625     print HFILE <<MOO
 626     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 627     /* --- below this follows voice-only strings --- */
 628     VOICEONLY_DELIMITER = 0x8000,
 629 MOO
 630     ;
 631
 632 # Output the ID names for the enum in the header file
 633     for $i (0x8000 .. ($voiceid-1)) {
 634         my $name=$idnum[$i]; # get the ID name
 635
 636         $name =~ s/\"//g; # cut off the quotes
 637
 638         printf HFILE ("    %s,\n", $name);
 639     }
 640
 641     # Output end of enum
 642     print HFILE "\n};\n/* end of generated enum list */\n";
 643
 644     # Output the target phrases for the source file
 645     for $i (1 .. $idcount) {
 646         my $name=$idnum[$i - 1]; # get the ID
 647         my $dest = $dest{$name}; # get the destination phrase
 648
 649         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 650
 651         if(!$dest) {
 652             # this is just to be on the safe side
 653             $dest = '"\0"';
 654         }
 655
 656         printf CFILE ("    %s\n", $dest);
 657     }
 658
 659 # Output end of string chunk
 660     print CFILE <<MOO
 661 ;
 662 /* end of generated string list */
 663 MOO
 664 ;
 665
 666     close(HFILE);
 667     close(CFILE);
 668 } # end of the c/h file generation
 669 elsif($binary) {
 670     # Creation of a binary lang file was requested
 671
 672     # We must first scan the english file to get the correct order of the id
 673     # numbers used there, as that is what sets the id order for all language
 674     # files. The english file is scanned before the translated file was
 675     # scanned.
 676
 677     open(OUTF, ">$binary") or die "Error: Can't create $binary";
 678     binmode OUTF;
 679     printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id,
 680       $langoptions); # magic lang file header
 681
 682     # loop over the target phrases
 683     for $i (1 .. $idcount) {
 684         my $name=$idnum[$i - 1]; # get the ID
 685         my $dest = $dest{$name}; # get the destination phrase
 686
 687         if($dest) {
 688             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 689
 690             # Now, make sure we get the number from the english sort order:
 691             $idnum = $idmap{$name};
 692
 693             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 694         }
 695     }
 696 }
 697 elsif($voiceout) {
 698     # voice output requested, display id: and voice: strings in a v1-like
 699     # fashion
 700
 701     my @engl;
 702
 703     # This loops over the strings in the translated language file order
 704     my @ids = ((0 .. ($idcount-1)));
 705     push @ids, (0x8000 .. ($voiceid-1));
 706
 707     #for my $id (@ids) {
 708     #    print "$id\n";
 709     #}
 710
 711     for $i (@ids) {
 712         my $name=$idnum[$i]; # get the ID
 713         my $dest = $voice{$name}; # get the destination voice string
 714
 715         if($dest) {
 716             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 717
 718             # Now, make sure we get the number from the english sort order:
 719             $idnum = $idmap{$name};
 720
 721             if(length($idnum)) {
 722                 $engl[$idnum] = $i;
 723
 724                 #print "Input index $i output index $idnum\n";
 725             }
 726             else {
 727                 # not used, mark it so
 728                 $engl[$i] = -1
 729             }
 730
 731         }
 732     }
 733     for my $i (@ids) {
 734
 735         my $o = $engl[$i];
 736
 737         if(($o < 0) || !length($o)) {
 738             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 739             next;
 740         }
 741
 742         my $name=$idnum[$o]; # get the ID
 743         my $dest = $voice{$name}; # get the destination voice string
 744
 745         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 746     }
 747
 748 }
 749
 750
 751 if($verbose) {
 752     printf("%d ID strings scanned\n", $idcount);
 753
 754     print "* head *\n";
 755     for(keys %head) {
 756         printf "$_: %s\n", $head{$_};
 757     }
 758 }
 759