tools/genlang2

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2006 by Daniel Stenberg
  11 #
  12
  13 # binary version for the binary lang file
  14 my $langversion = 2; # 2 is the latest one used in the v1 format
  15
  16 # A note for future users and readers: The original v1 language system allowed
  17 # the build to create and use a different language than english built-in. We
  18 # removed that feature from our build-system, but the build scripts still had
  19 # the ability. But, starting now, this ability is no longer provided since I
  20 # figured it was boring and unnecessary to write support for now since we
  21 # don't use it anymore.
  22
  23 if(!$ARGV[0]) {
  24     print <<MOO
  25 Usage: genlang2 [options] <langv2 file>
  26
  27  -p=<prefix>
  28     Make the tool create a [prefix].c and [prefix].h file.
  29
  30  -b=<outfile>
  31     Make the tool create a binary language (.lng) file namaed [outfile].
  32     The use of this option requires that you also use -e.
  33
  34  -u
  35     Update language file. Given the translated file and the most recent english
  36     file, you\'ll get an updated version sent to stdout. Suitable action to do
  37     when you intend to update a translation.
  38
  39  -e=<english lang file>
  40     Point out the english (original source) file, to use that as master
  41     language template. Used in combination with -b or -u.
  42
  43  -t=<target>
  44     Specify which target you want the translations/phrases for. Required when
  45     -b or -p is used.
  46
  47  -v
  48     Enables verbose (debug) output.
  49 MOO
  50 ;
  51     exit;
  52 }
  53
  54 # How update works:
  55 #
  56 # 1) scan the english file, keep the whole <phrase> for each phrase.
  57 # 2) read the translated file, for each end of phrase, compare:
  58 #  A) all source strings, if there's any change there should be a comment about
  59 #     it output
  60 #  B) the desc fields
  61 #
  62 # 3) output the phrase with the comments from above
  63 # 4) check which phrases that the translated version didn't have, and spit out
  64 #    the english version of those
  65 #
  66
  67 my $prefix = $p;
  68 my $binary = $b;
  69 my $update = $u;
  70
  71 my $english = $e;
  72
  73 my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0);
  74
  75 if($check > 1) {
  76     print "Please use only one of -p, -u and -b\n";
  77     exit;
  78 }
  79 if(!$check) {
  80     print "Please use at least one of -p, -u and -b\n";
  81     exit;
  82 }
  83 if(($binary || $update) && !$english) {
  84     print "Please use -e too when you use -b or -u\n";
  85     exit;
  86 }
  87
  88 my $target = $t;
  89 if(!$target && !$update) {
  90     print "Please specify a target (with -t)!\n";
  91     exit;
  92 }
  93 my $verbose=$v;
  94
  95 my %id; # string to num hash
  96 my @idnum; # num to string array
  97
  98 my %source; # id string to source phrase hash
  99 my %dest; # id string to dest phrase hash
 100 my %voice; # id string to voice phrase hash
 101
 102
 103 my $input = $ARGV[0];
 104
 105 my @m;
 106 my $m="blank";
 107
 108 sub match {
 109     my ($string, $pattern)=@_;
 110
 111     $pattern =~ s/\*/.?*/g;
 112     $pattern =~ s/\?/./g;
 113
 114     return ($string =~ $pattern);
 115 }
 116
 117 sub blank {
 118     # nothing to do
 119 }
 120
 121 my %head;
 122 sub header {
 123     my ($full, $n, $v)=@_;
 124     $head{$n}=$v;
 125 }
 126
 127 my %phrase;
 128 sub phrase {
 129     my ($full, $n, $v)=@_;
 130     $phrase{$n}=$v;
 131 }
 132
 133 sub parsetarget {
 134     my ($debug, $strref, $full, $n, $v)=@_;
 135     my $string;
 136     my @all= split(" *, *", $n);
 137     my $test;
 138     for $test (@all) {
 139 #        print "TEST ($debug) $target for $test\n";
 140         if(match($target, $test)) {
 141             $string = $v;
 142 #            print "MATCH: $test => $v\n";
 143         }
 144     }
 145     if($string) {
 146         $$strref = $string;
 147     }
 148     return $string;
 149 }
 150
 151 my $src;
 152 sub source {
 153     parsetarget("src", \$src, @_);
 154 }
 155
 156 my $dest;
 157 sub dest {
 158     parsetarget("dest", \$dest, @_);
 159 }
 160
 161 my $voice;
 162 sub voice {
 163     parsetarget("voice", \$voice, @_);
 164 }
 165
 166 my %english;
 167 if($english) {
 168     # For the cases where the english file needs to be scanned/read, we do
 169     # it before we read the translated file. For -b it isn't necessary, but for
 170     # -u it is convenient.
 171
 172     my $idnum=0; # start with a true number
 173     my %idmap;
 174     open(ENG, "<$english") || die "can't open $english";
 175     my @phrase;
 176     my $id;
 177     while(<ENG>) {
 178
 179         # get rid of DOS newlines
 180         $_ =~ s/\r//g;
 181
 182         if($_ =~ /^ *\<phrase\>/) {
 183             # this is the start of a phrase
 184         }
 185         elsif($_ =~ /^ *\<\/phrase\>/) {
 186             # this is the end of a phrase, add it to the english hash
 187             $english{$id}=join("", @phrase);
 188             undef @phrase;
 189         }
 190         elsif($_ ne "\n") {
 191             # gather everything related to this phrase
 192             push @phrase, $_;
 193         }
 194
 195         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 196             $id=$1;
 197             # Skip voice-only entries
 198             if($id =~ /^VOICE_/) {
 199                 next;
 200             }
 201
 202             # Assign an ID number to this entry
 203             $idmap{$id}=$idnum;
 204             $idnum++;
 205         }
 206     }
 207     close(ENG);
 208 }
 209
 210 # a function that compares the english phrase with the translated one.
 211 # compare source strings and desc
 212
 213 # Then output the updated version!
 214 sub compare {
 215     my ($idstr, $engref, $locref)=@_;
 216     my ($edesc, $ldesc);
 217     my ($esource, $lsource);
 218     my $mode=0;
 219
 220     for my $l (@$engref) {
 221         if($l =~ /^ *desc: (.*)/) {
 222             $edesc=$1;
 223         }
 224         elsif($l =~ / *\<source\>/i) {
 225             $mode=1;
 226         }
 227         elsif($mode) {
 228             if($l =~ / *\<\/source\>/i) {
 229                 last;
 230             }
 231             $esource .= "$l\n";
 232         }
 233     }
 234
 235     my @show;
 236     my @source;
 237
 238     $mode = 0;
 239     for my $l (@$locref) {
 240         if($l =~ /^ *desc: (.*)/) {
 241             $ldesc=$1;
 242             if($edesc ne $ldesc) {
 243                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 244             }
 245             push @show, $l;
 246         }
 247         elsif($l =~ / *\<source\>/i) {
 248             $mode=1;
 249             push @show, $l;
 250         }
 251         elsif($mode) {
 252             if($l =~ / *\<\/source\>/i) {
 253                 $mode = 0;
 254                 print @show;
 255                 if($esource ne $lsource) {
 256                     print "### The <source> section differs from the english!\n",
 257                     "### the previously used one is commented below:\n";
 258                     for(split("\n", $lsource)) {
 259                         print "### $_\n";
 260                     }
 261                     print $esource;
 262                 }
 263                 else {
 264                     print $lsource;
 265                 }
 266                 undef @show; # start over
 267
 268                 push @show, $l;
 269             }
 270             else {
 271                 $lsource .= "$l";
 272             }
 273         }
 274         else {
 275             push @show, $l;
 276         }
 277     }
 278
 279
 280     print @show;
 281 }
 282
 283 my $idcount;        # counter for lang ID numbers
 284 my $voiceid=0x8000; # counter for voice-only ID numbers
 285
 286 #
 287 # Now start the scanning of the selected language string
 288 #
 289
 290 open(LANG, "<$input");
 291 my @phrase;
 292 while(<LANG>) {
 293
 294     $line++;
 295
 296     # get rid of DOS newlines
 297     $_ =~ s/\r//g;
 298
 299     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 300         # comment or empty line
 301         next;
 302     }
 303
 304     my $ll = $_;
 305
 306    # print "M: $m\n";
 307
 308     push @phrase, $ll;
 309
 310     # this is an XML-lookalike tag
 311     if(/ *<([^>]*)>/) {
 312         my $part = $1;
 313         #print "P: $part\n";
 314
 315         if($part =~ /^\//) {
 316             # this was a closing tag
 317
 318             if($part eq "/phrase") {
 319                 # closing the phrase
 320
 321                 my $idstr = $phrase{'id'};
 322                 my $idnum;
 323
 324                 if($dest =~ /^none\z/i) {
 325                     # "none" as dest means that this entire phrase is to be
 326                     # ignored
 327                     print "dest is NONE!\n";
 328                 }
 329                 else {
 330
 331                     # Use the ID name to figure out which id number range we
 332                     # should use for this phrase. Voice-only strings are
 333                     # separated.
 334
 335                     if($idstr =~ /^VOICE/) {
 336                         $idnum = $voiceid++;
 337                     }
 338                     else {
 339                         $idnum = $idcount++;
 340                     }
 341
 342                     $id{$idstr} = $idnum;
 343                     $idnum[$idnum]=$idstr;
 344
 345                     $source{$idstr}=$src;
 346                     $dest{$idstr}=$dest;
 347                     $voice{$idstr}=$voice;
 348
 349                     if($verbose) {
 350                         print "id: $phrase{id} ($idnum)\n";
 351                         print "source: $src\n";
 352                         print "dest: $dest\n";
 353                         print "voice: $voice\n";
 354                     }
 355
 356                     undef $src;
 357                     undef $dest;
 358                     undef $voice;
 359                     undef %phrase;
 360                 }
 361
 362                 if($update) {
 363                     my $e = $english{$idstr};
 364
 365                     if($e) {
 366                         # compare original english with this!
 367                         my @eng = split("\n", $english{$idstr});
 368
 369                         compare($idstr, \@eng, \@phrase);
 370
 371                         $english{$idstr}=""; # clear it
 372                     }
 373                     else {
 374                         print "### $idstr: The phrase is not used. Skipped\n";
 375                     }
 376                 }
 377                 undef @phrase;
 378
 379             } # end of </phrase>
 380
 381             # starts with a slash, this _ends_ this section
 382             $m = pop @m; # get back old value, the previous level's tag
 383             next;
 384         } # end of tag close
 385
 386         # This is an opening (sub) tag
 387
 388         push @m, $m; # store old value
 389         $m = $1;
 390         next;
 391     }
 392
 393     if(/^ *([^:]+): *(.*)/) {
 394         my ($name, $val)=($1, $2);
 395         &$m($_, $name, $val);
 396     }
 397 }
 398 close(LANG);
 399
 400 if($update) {
 401     my $any=0;
 402     for(keys %english) {
 403         if($english{$_}) {
 404             print "###\n",
 405             "### This phrase below was not present in the translated file\n",
 406             "<phrase>\n";
 407             print $english{$_};
 408             print "</phrase>\n";
 409         }
 410     }
 411 }
 412
 413 if($prefix) {
 414     # We create a .c and .h file
 415
 416     open(HFILE, ">$prefix.h");
 417     open(CFILE, ">$prefix.c");
 418
 419     print HFILE <<MOO
 420 /* This file was automatically generated using genlang2 */
 421 /*
 422  * The str() macro/functions is how to access strings that might be
 423  * translated. Use it like str(MACRO) and expect a string to be
 424  * returned!
 425  */
 426 #define str(x) language_strings[x]
 427
 428 /* this is the array for holding the string pointers.
 429    It will be initialized at runtime. */
 430 extern unsigned char *language_strings[];
 431 /* this contains the concatenation of all strings, separated by \\0 chars */
 432 extern const unsigned char language_builtin[];
 433
 434 /* The enum below contains all available strings */
 435 enum {
 436 MOO
 437     ;
 438
 439 print CFILE <<MOO
 440 /* This file was automaticly generated using genlang2, the strings come
 441    from "$input" */
 442
 443 #include "$prefix.h"
 444
 445 unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 446 const unsigned char language_builtin[] =
 447 MOO
 448     ;
 449
 450  # Output the ID names for the enum in the header file
 451 my $i;
 452 for $i (1 .. $idcount) {
 453     my $name=$idnum[$i - 1]; # get the ID name
 454
 455     $name =~ s/\"//g; # cut off the quotes
 456
 457     printf HFILE ("    %s,\n", $name);
 458 }
 459
 460 # Output separation marker for last string ID and the upcoming voice IDs
 461
 462 print HFILE <<MOO
 463     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 464     /* --- below this follows voice-only strings --- */
 465     VOICEONLY_DELIMITER = 0x8000,
 466 MOO
 467     ;
 468
 469 # Output the ID names for the enum in the header file
 470 my $i;
 471 for $i (0x8000 .. ($voiceid-1)) {
 472     my $name=$idnum[$i]; # get the ID name
 473
 474     $name =~ s/\"//g; # cut off the quotes
 475
 476     printf HFILE ("    %s,\n", $name);
 477 }
 478
 479 # Output end of enum
 480     print HFILE "\n};\n/* end of generated enum list */\n";
 481
 482     # Output the target phrases for the source file
 483     for $i (1 .. $idcount) {
 484         my $name=$idnum[$i - 1]; # get the ID
 485         my $dest = $dest{$name}; # get the destination phrase
 486
 487         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 488
 489         if(!$dest) {
 490             # this is just to be on the safe side
 491             $dest = '"\0"';
 492         }
 493
 494         printf CFILE ("    %s\n", $dest);
 495     }
 496
 497 # Output end of string chunk
 498 print CFILE <<MOO
 499 ;
 500 /* end of generated string list */
 501 MOO
 502     ;
 503
 504     close(HFILE);
 505     close(CFILE);
 506 } # end of the c/h file generation
 507 elsif($binary) {
 508     # Creation of a binary lang file was requested
 509
 510     # We must first scan the english file to get the correct order of the id
 511     # numbers used there, as that is what sets the id order for all language
 512     # files. The english file is scanned before the translated file was
 513     # scanned.
 514
 515     open(OUTF, ">$binary") or die "Can't create $binary";
 516     binmode OUTF;
 517     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 518
 519     # loop over the target phrases
 520     for $i (1 .. $idcount) {
 521         my $name=$idnum[$i - 1]; # get the ID
 522         my $dest = $dest{$name}; # get the destination phrase
 523
 524         if($dest) {
 525             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 526
 527             # Now, make sure we get the number from the english sort order:
 528             $idnum = $idmap{$name};
 529
 530             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 531             if($debug) {
 532                 printf("%02x => %s\n", $idnum, $value);
 533             }
 534         }
 535     }
 536 }
 537
 538
 539 if($verbose) {
 540     printf("%d ID strings scanned\n", $idcount);
 541
 542     print "* head *\n";
 543     for(keys %head) {
 544         printf "$_: %s\n", $head{$_};
 545     }
 546 }
 547