tools/genlang

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 - 2008 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 4; # 3 was the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 my %user2num =
  24   ('core' => 1);
  25
  26
  27 if(!$ARGV[0]) {
  28     print <<MOO
  29 Usage: genlang [options] <langv2 file>
  30
  31  -p=<prefix>
  32     Make the tool create a [prefix].c and [prefix].h file.
  33
  34  -b=<outfile>
  35     Make the tool create a binary language (.lng) file named [outfile].
  36     The use of this option requires that you also use -e, -t and -i.
  37
  38  -u
  39     Update language file. Given the translated file and the most recent english
  40     file, you\'ll get an updated version sent to stdout. Suitable action to do
  41     when you intend to update a translation.
  42
  43  -e=<english lang file>
  44     Point out the english (original source) file, to use that as master
  45     language template. Used in combination with -b, -u or -s.
  46
  47  -s
  48     Sort the Update language file in the same order as the strings in the
  49     English file.
  50
  51  -t=<target>
  52     Specify which target you want the translations/phrases for. Required when
  53     -b or -p is used.
  54
  55     The target can in fact be specified as numerous different strings,
  56     separated with colons. This will make genlang to use all the specified
  57     strings when searching for a matching phrase.
  58
  59  -i=<target id>
  60     The target id number, needed for -b.
  61
  62  -o
  63     Voice mode output. Outputs all id: and voice: lines for the given target!
  64
  65  -v
  66     Enables verbose (debug) output.
  67 MOO
  68 ;
  69     exit;
  70 }
  71
  72 # How update works:
  73 #
  74 # 1) scan the english file, keep the whole <phrase> for each phrase.
  75 # 2) read the translated file, for each end of phrase, compare:
  76 #  A) all source strings, if there's any change there should be a comment about
  77 #     it output
  78 #  B) the desc fields
  79 #
  80 # 3) output the phrase with the comments from above
  81 # 4) check which phrases that the translated version didn't have, and spit out
  82 #    the english version of those
  83 #
  84
  85 my $prefix = $p;
  86 my $binary = $b;
  87 my $update = $u;
  88 my $sortfile = $s;
  89
  90 my $english = $e;
  91 my $voiceout = $o;
  92
  93 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0);
  94
  95 if($check > 1) {
  96     print STDERR "Please use only one of -p, -u, -o, -b and -s\n";
  97     exit;
  98 }
  99 if(!$check) {
 100     print STDERR "Please use at least one of -p, -u, -o, -b and -s\n";
 101     exit;
 102 }
 103
 104
 105 if(($binary || $update || $voiceout || $sortfile) && !$english) {
 106     print STDERR "Please use -e too when you use -b, -o, -u or -s\n";
 107     exit;
 108 }
 109
 110 my $target_id = $i;
 111 if($binary && !$target_id) {
 112     print STDERR "Please specify a target id number (with -i)!\n";
 113     exit;
 114 }
 115
 116 my $target = $t;
 117 if(!$target && !$update && !$sortfile) {
 118     print STDERR "Please specify a target (with -t)!\n";
 119     exit;
 120 }
 121 my $verbose=$v;
 122
 123 my %id; # string to num hash
 124 my @idnum; # num to string array
 125
 126 my %allphrases;  # For sorting - an array of the <phrase> elements
 127 my %source; # id string to source phrase hash
 128 my %dest; # id string to dest phrase hash
 129 my %voice; # id string to voice phrase hash
 130
 131 my $input = $ARGV[0];
 132
 133 my @m;
 134 my $m="blank";
 135
 136 sub trim {
 137     my ($string) = @_;
 138     $string =~ s/^\s+//;
 139     $string =~ s/\s+$//;
 140     return $string;
 141 }
 142
 143 sub match {
 144     my ($string, $pattern)=@_;
 145
 146     $pattern =~ s/\*/.*/g;
 147     $pattern =~ s/\?/./g;
 148
 149     return ($string =~ /^$pattern\z/);
 150 }
 151
 152 sub blank {
 153     # nothing to do
 154 }
 155
 156 my %head;
 157 sub header {
 158     my ($full, $n, $v)=@_;
 159     $head{$n}=$v;
 160 }
 161
 162 my %phrase;
 163 sub phrase {
 164     my ($full, $n, $v)=@_;
 165     $phrase{$n}=$v;
 166 }
 167
 168 sub parsetarget {
 169     my ($debug, $strref, $full, $n, $v)=@_;
 170     my $string;
 171     my @all= split(" *, *", $n);
 172     my $test;
 173     for $test (@all) {
 174 #        print "TEST ($debug) $target for $test\n";
 175         for my $part (split(":", $target)) {
 176             if(match($part, $test)) {
 177                 $string = $v;
 178 #                print "MATCH: $test => $v\n";
 179                 $$strref = $string;
 180                 return $string;
 181             }
 182         }
 183     }
 184 }
 185
 186 my $src;
 187 sub source {
 188     parsetarget("src", \$src, @_);
 189 }
 190
 191 my $dest;
 192 sub dest {
 193     parsetarget("dest", \$dest, @_);
 194 }
 195
 196 my $voice;
 197 sub voice {
 198     parsetarget("voice", \$voice, @_);
 199 }
 200
 201 my %idmap;
 202 my %english;
 203 if($english) {
 204     # For the cases where the english file needs to be scanned/read, we do
 205     # it before we read the translated file. For -b it isn't necessary, but for
 206     # -u it is convenient.
 207
 208     my $idnum=0; # start with a true number
 209     my $vidnum=0x8000; # first voice id
 210     open(ENG, "<$english") || die "Error: can't open $english";
 211     my @phrase;
 212     my $id;
 213     my $maybeid;
 214     my $user;
 215     my $maybeuser;
 216     my $withindest;
 217     my $numphrases = 0;
 218     while(<ENG>) {
 219
 220         # get rid of DOS newlines
 221         $_ =~ s/\r//g;
 222
 223         if($_ =~ /^ *\<phrase\>/) {
 224             # this is the start of a phrase
 225         }
 226         elsif($_ =~ /^ *\<\/phrase\>/) {
 227
 228             # if id is something, when we count and store this phrase
 229             if($id) {
 230                 # voice-only entries get a difference range
 231                 if($id =~ /^VOICE_/) {
 232                     # Assign an ID number to this entry
 233                     $idmap{$id}=$vidnum;
 234                     $vidnum++;
 235                 }
 236                 else {
 237                     # Assign an ID number to this entry
 238                     $idmap{$id}=$idnum;
 239                     $idnum++;
 240    #                 print STDERR "DEST: bumped idnum to $idnum\n";
 241                 }
 242
 243                 # this is the end of a phrase, add it to the english hash
 244                 $english{$id}=join("", @phrase);
 245             }
 246             undef @phrase;
 247             $id="";
 248         }
 249         elsif($_ ne "\n") {
 250             # gather everything related to this phrase
 251             push @phrase, $_;
 252             if($_ =~ /^ *\<dest\>/i) {
 253                 $withindest=1;
 254                 $deststr="";
 255             }
 256             elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
 257                 $withindest=0;
 258
 259                 if($update || ($deststr && ($deststr !~ /^none\z/i))) {
 260                     # we unconditionally always use all IDs when the "update"
 261                     # feature is used
 262                     $id = $maybeid;
 263                     $user = $user2num{$maybeuser};
 264     #                print "DEST: use this id $id\n";
 265                 }
 266                 else {
 267     #                print "skip $maybeid for $name\n";
 268                 }
 269             }
 270             elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
 271                 my ($name, $val)=($1, $2);
 272                 $dest=""; # in case it is left untouched for when the
 273                 # model name isn't "our"
 274                 dest($_, $name, $val);
 275
 276                 if($dest) {
 277                     # Store the current dest string. If this target matches
 278                     # multiple strings, it will get updated several times.
 279                     $deststr = $dest;
 280                 }
 281             }
 282         }
 283
 284         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 285             $maybeid=$1;
 286             $sortorder{$maybeid}=$numphrases++;
 287         }
 288         if($_ =~ /^ *user: ([^ \t\n]+)/i) {
 289             $maybeuser=$1;
 290         }
 291     }
 292     close(ENG);
 293 }
 294
 295 # a function that compares the english phrase with the translated one.
 296 # compare source strings and desc
 297
 298 # Then output the updated version!
 299 sub compare {
 300     my ($idstr, $engref, $locref)=@_;
 301     my ($edesc, $ldesc);
 302     my ($esource, $lsource);
 303     my $mode=0;
 304
 305     for my $l (@$engref) {
 306         if($l =~ /^ *#/) {
 307             # comment
 308             next;
 309         }
 310         if($l =~ /^ *desc: (.*)/) {
 311             $edesc=$1;
 312         }
 313         elsif($l =~ / *\<source\>/i) {
 314             $mode=1;
 315         }
 316         elsif($mode) {
 317             if($l =~ / *\<\/source\>/i) {
 318                 last;
 319             }
 320             $esource .= "$l\n";
 321         }
 322     }
 323
 324     my @show;
 325     my @source;
 326
 327     $mode = 0;
 328     for my $l (@$locref) {
 329         if($l =~ /^ *desc: (.*)/) {
 330             $ldesc=$1;
 331             if(trim($edesc) ne trim($ldesc)) {
 332                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 333             }
 334             push @show, $l;
 335         }
 336         elsif($l =~ / *\<source\>/i) {
 337             $mode=1;
 338             push @show, $l;
 339         }
 340         elsif($mode) {
 341             if($l =~ / *\<\/source\>/i) {
 342                 $mode = 0;
 343                 print @show;
 344                 if(trim($esource) ne trim($lsource)) {
 345                     print "### The <source> section differs from the english!\n",
 346                     "### the previously used one is commented below:\n";
 347                     for(split("\n", $lsource)) {
 348                         print "### $_\n";
 349                     }
 350                     print $esource;
 351                 }
 352                 else {
 353                     print $lsource;
 354                 }
 355                 undef @show; # start over
 356
 357                 push @show, $l;
 358             }
 359             else {
 360                 $lsource .= "$l";
 361             }
 362         }
 363         else {
 364             push @show, $l;
 365         }
 366     }
 367
 368
 369     print @show;
 370 }
 371
 372 my $idcount;        # counter for lang ID numbers
 373 my $voiceid=0x8000; # counter for voice-only ID numbers
 374
 375 #
 376 # Now start the scanning of the selected language string
 377 #
 378
 379 open(LANG, "<$input") || die "Error: couldn't read language file named $input\n";
 380 my @phrase;
 381 my $header = 1;
 382 while(<LANG>) {
 383
 384     $line++;
 385
 386     # get rid of DOS newlines
 387     $_ =~ s/\r//g;
 388
 389     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 390         # comment or empty line - output it if it's part of the header
 391         if ($header and ($update || $sortfile)) {
 392             print($_);
 393         }
 394         next;
 395     }
 396     $header = 0;
 397
 398     my $ll = $_;
 399
 400     # print "M: $m\n";
 401
 402     push @phrase, $ll;
 403
 404     # this is an XML-lookalike tag
 405     if (/^(<|[^\"<]+<)([^>]*)>/) {
 406         my $part = $2;
 407         # print "P: $part\n";
 408
 409         if($part =~ /^\//) {
 410             # this was a closing tag
 411
 412             if($part eq "/phrase") {
 413                 # closing the phrase
 414
 415                 my $idstr = $phrase{'id'};
 416                 my $idnum;
 417
 418                 if($binary && !$english{$idstr}) {
 419                     # $idstr doesn't exist for english, skip it\n";
 420                 }
 421                 elsif($dest =~ /^none\z/i) {
 422                     # "none" as dest (without quotes) means that this entire
 423                     # phrase is to be ignored
 424                 }
 425                 elsif($sortfile) {
 426                     $allphrases{$idstr}=join('',@phrase);
 427                 }
 428                 elsif(!$update) {
 429                     # we don't do the fully detailed analysis when we "update"
 430                     # since we don't do it for a particular target etc
 431
 432                     # allow the keyword 'deprecated' to be used on dest and
 433                     # voice strings to mark that as deprecated. It will then
 434                     # be replaced with "".
 435
 436                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 437                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 438
 439                     # basic syntax error alerts, if there are no quotes we
 440                     # will assume an empty string was intended
 441                     if($dest !~ /^\"/) {
 442                         print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n";
 443                         $dest='""';
 444                     }
 445                     if($src !~ /^\"/) {
 446                         print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n";
 447                         $src='""';
 448                     }
 449                     if($voice !~ /^\"/ and $voice !~ /^none\z/i) {
 450                         print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n";
 451                         $voice='""';
 452                     }
 453                     if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) {
 454                         print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n";
 455                     }
 456
 457                     my $user = $user2num{trim($phrase{'user'})};
 458                     if(!$user) {
 459                         print STDERR "$input:$line:1: warning: unknown user!\n";
 460                         $user = 1;
 461                     }
 462
 463                     # Use the ID name to figure out which id number range we
 464                     # should use for this phrase. Voice-only strings are
 465                     # separated.
 466
 467                     if($idstr =~ /^VOICE/) {
 468                         $idnum = $voiceid++;
 469                     }
 470                     else {
 471                         $idnum = $idcount++;
 472                     }
 473
 474                     $id{$idstr} = $idnum;
 475                     $idnum[$idnum]=$idstr;
 476
 477                     $source{$idstr}=$src;
 478                     $dest{$idstr}=$dest;
 479                     $voice{$idstr}=$voice;
 480
 481                     if($verbose) {
 482                         print "id: $phrase{id} ($idnum)\n";
 483                         print "source: $src\n";
 484                         print "dest: $dest\n";
 485                         print "voice: $voice\n";
 486                         print "user: $user\n";
 487                     }
 488
 489                     undef $src;
 490                     undef $dest;
 491                     undef $voice;
 492                     undef $user;
 493                     undef %phrase;
 494                 }
 495
 496                 if($update) {
 497                     my $e = $english{$idstr};
 498
 499                     if($e) {
 500                         # compare original english with this!
 501                         my @eng = split("\n", $english{$idstr});
 502
 503                         compare($idstr, \@eng, \@phrase);
 504
 505                         $english{$idstr}=""; # clear it
 506                     }
 507                     else {
 508                         print "### $idstr: The phrase is not used. Skipped\n";
 509                     }
 510                 }
 511                 undef @phrase;
 512
 513             } # end of </phrase>
 514
 515             # starts with a slash, this _ends_ this section
 516             $m = pop @m; # get back old value, the previous level's tag
 517             next;
 518         } # end of tag close
 519
 520         # This is an opening (sub) tag
 521
 522         push @m, $m; # store old value
 523         $m = $part;
 524         next;
 525     }
 526
 527     if(/^ *([^:]+): *(.*)/) {
 528         my ($name, $val)=($1, $2);
 529         &$m($_, $name, $val);
 530     }
 531 }
 532 close(LANG);
 533
 534 if($update) {
 535     my $any=0;
 536     for(keys %english) {
 537         if($english{$_}) {
 538             print "###\n",
 539             "### This phrase below was not present in the translated file\n",
 540             "<phrase>\n";
 541             print $english{$_};
 542             print "</phrase>\n";
 543         }
 544     }
 545 }
 546
 547 if ($sortfile) {
 548     for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) {
 549          print $allphrases{$_};
 550     }
 551 }
 552
 553 if($prefix) {
 554     # We create a .c and .h file
 555
 556     open(HFILE, ">$prefix/lang.h") ||
 557         die "Error: couldn't create file $prefix/lang.h\n";
 558     open(CFILE, ">$prefix/lang_core.c") ||
 559         die "Error: couldn't create file $prefix/lang_core.c\n";
 560
 561    # get header file name
 562    $headername = "$prefix/lang.h";
 563    $headername =~ s/(.*\/)*//;
 564
 565     print HFILE <<MOO
 566 /* This file was automatically generated using genlang */
 567 /*
 568  * The str() macro/functions is how to access strings that might be
 569  * translated. Use it like str(MACRO) and expect a string to be
 570  * returned!
 571  */
 572 #define str(x) language_strings[x]
 573
 574 /* this is the array for holding the string pointers.
 575    It will be initialized at runtime. */
 576 extern unsigned char *language_strings[];
 577 /* this contains the concatenation of all strings, separated by \\0 chars */
 578 extern const unsigned char language_builtin[];
 579
 580 /* The enum below contains all available strings */
 581 enum \{
 582 MOO
 583     ;
 584
 585     print CFILE <<MOO
 586 /* This file was automaticly generated using genlang, the strings come
 587    from "$input" */
 588
 589 #include "$headername"
 590
 591 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 592 const unsigned char language_builtin[] =
 593 MOO
 594 ;
 595
 596     # Output the ID names for the enum in the header file
 597     my $i;
 598     for $i (1 .. $idcount) {
 599         my $name=$idnum[$i - 1]; # get the ID name
 600
 601         $name =~ s/\"//g; # cut off the quotes
 602
 603         printf HFILE ("    %s, /* %d */\n", $name, $i-1);
 604     }
 605
 606 # Output separation marker for last string ID and the upcoming voice IDs
 607
 608     print HFILE <<MOO
 609     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 610     /* --- below this follows voice-only strings --- */
 611     VOICEONLY_DELIMITER = 0x8000,
 612 MOO
 613     ;
 614
 615 # Output the ID names for the enum in the header file
 616     for $i (0x8000 .. ($voiceid-1)) {
 617         my $name=$idnum[$i]; # get the ID name
 618
 619         $name =~ s/\"//g; # cut off the quotes
 620
 621         printf HFILE ("    %s,\n", $name);
 622     }
 623
 624     # Output end of enum
 625     print HFILE "\n};\n/* end of generated enum list */\n";
 626
 627     # Output the target phrases for the source file
 628     for $i (1 .. $idcount) {
 629         my $name=$idnum[$i - 1]; # get the ID
 630         my $dest = $dest{$name}; # get the destination phrase
 631
 632         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 633
 634         if(!$dest) {
 635             # this is just to be on the safe side
 636             $dest = '"\0"';
 637         }
 638
 639         printf CFILE ("    %s\n", $dest);
 640     }
 641
 642 # Output end of string chunk
 643     print CFILE <<MOO
 644 ;
 645 /* end of generated string list */
 646 MOO
 647 ;
 648
 649     close(HFILE);
 650     close(CFILE);
 651 } # end of the c/h file generation
 652 elsif($binary) {
 653     # Creation of a binary lang file was requested
 654
 655     # We must first scan the english file to get the correct order of the id
 656     # numbers used there, as that is what sets the id order for all language
 657     # files. The english file is scanned before the translated file was
 658     # scanned.
 659
 660     open(OUTF, ">$binary") or die "Error: Can't create $binary";
 661     binmode OUTF;
 662     printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
 663
 664     # loop over the target phrases
 665     for $i (1 .. $idcount) {
 666         my $name=$idnum[$i - 1]; # get the ID
 667         my $dest = $dest{$name}; # get the destination phrase
 668
 669         if($dest) {
 670             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 671
 672             # Now, make sure we get the number from the english sort order:
 673             $idnum = $idmap{$name};
 674
 675             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 676         }
 677     }
 678 }
 679 elsif($voiceout) {
 680     # voice output requested, display id: and voice: strings in a v1-like
 681     # fashion
 682
 683     my @engl;
 684
 685     # This loops over the strings in the translated language file order
 686     my @ids = ((0 .. ($idcount-1)));
 687     push @ids, (0x8000 .. ($voiceid-1));
 688
 689     #for my $id (@ids) {
 690     #    print "$id\n";
 691     #}
 692
 693     for $i (@ids) {
 694         my $name=$idnum[$i]; # get the ID
 695         my $dest = $voice{$name}; # get the destination voice string
 696
 697         if($dest) {
 698             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 699
 700             # Now, make sure we get the number from the english sort order:
 701             $idnum = $idmap{$name};
 702
 703             if(length($idnum)) {
 704                 $engl[$idnum] = $i;
 705
 706                 #print "Input index $i output index $idnum\n";
 707             }
 708             else {
 709                 # not used, mark it so
 710                 $engl[$i] = -1
 711             }
 712
 713         }
 714     }
 715     for my $i (@ids) {
 716
 717         my $o = $engl[$i];
 718
 719         if(($o < 0) || !length($o)) {
 720             print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n";
 721             next;
 722         }
 723
 724         my $name=$idnum[$o]; # get the ID
 725         my $dest = $voice{$name}; # get the destination voice string
 726
 727         print "#$i ($o)\nid: $name\nvoice: $dest\n";
 728     }
 729
 730 }
 731
 732
 733 if($verbose) {
 734     printf("%d ID strings scanned\n", $idcount);
 735
 736     print "* head *\n";
 737     for(keys %head) {
 738         printf "$_: %s\n", $head{$_};
 739     }
 740 }
 741