tools/voice.pl

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id$
   9 #
  10 # Copyright (C) 2007 Jonas Häggqvist
  11 #
  12 # All files in this archive are subject to the GNU General Public License.
  13 # See the file COPYING in the source tree root for full license agreement.
  14 #
  15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  16 # KIND, either express or implied.
  17
  18 use strict;
  19 use warnings;
  20 use File::Basename;
  21 use File::Copy;
  22 use vars qw($V $C $t $l $e $E $s $S $i $v);
  23 use IPC::Open2;
  24 use IPC::Open3;
  25 use Digest::MD5 qw(md5_hex);
  26 use DirHandle;
  27 use open ':encoding(utf8)';
  28 use open ':std';
  29
  30 sub printusage {
  31     print <<USAGE
  32
  33 Usage: voice.pl [options] [path to dir]
  34  -V
  35     Create voice file. You must also specify -t and -l.
  36
  37  -C
  38     Create .talk clips.
  39
  40  -t=<target>
  41     Specify which target you want to build voicefile for. Must include
  42     any features that target supports.
  43
  44  -i=<target_id>
  45     Numeric target id. Needed for voice building.
  46
  47  -l=<language>
  48     Specify which language you want to build. Without .lang extension.
  49
  50  -e=<encoder>
  51     Which encoder to use for voice strings
  52
  53  -E=<encoder options>
  54     Which encoder options to use when compressing voice strings. Enclose
  55     in double quotes if the options include spaces.
  56
  57  -s=<TTS engine>
  58     Which TTS engine to use.
  59
  60  -S=<TTS engine options>
  61     Options to pass to the TTS engine. Enclose in double quotes if the
  62     options include spaces.
  63
  64  -v
  65     Be verbose
  66 USAGE
  67 ;
  68 }
  69
  70 # Initialize TTS engine. May return an object or value which will be passed
  71 # to voicestring and shutdown_tts
  72 sub init_tts {
  73     our $verbose;
  74     my ($tts_engine, $tts_engine_opts, $language) = @_;
  75     my %ret = ("name" => $tts_engine);
  76     # Don't use given/when here - it's not compatible with old perl versions
  77     if ($tts_engine eq 'festival') {
  78         print("> festival $tts_engine_opts --server\n") if $verbose;
  79         my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
  80         my $dummy = *FESTIVAL_SERVER; #suppress warning
  81         $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
  82         $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
  83         $ret{"pid"} = $pid;
  84     }
  85     elsif ($tts_engine eq 'sapi') {
  86         my $toolsdir = dirname($0);
  87         my $path = `cygpath $toolsdir -a -w`;
  88         chomp($path);
  89         $path = $path . '\\';
  90         my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
  91         $cmd =~ s/\\/\\\\/g;
  92         print("> cscript //nologo $cmd\n") if $verbose;
  93         my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
  94         binmode(*CMD_IN, ':encoding(utf16le)');
  95         binmode(*CMD_OUT, ':encoding(utf16le)');
  96         $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
  97         $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
  98         print(CMD_IN "QUERY\tVENDOR\r\n");
  99         my $vendor = readline(*CMD_OUT);
 100         $vendor =~ s/\r\n//;
 101         %ret = (%ret,
 102                 "stdin" => *CMD_IN,
 103                 "stdout" => *CMD_OUT,
 104                 "vendor" => $vendor);
 105     }
 106     return \%ret;
 107 }
 108
 109 # Shutdown TTS engine if necessary.
 110 sub shutdown_tts {
 111     my ($tts_object) = @_;
 112     if ($$tts_object{'name'} eq 'festival') {
 113         # Send SIGTERM to festival server
 114         kill TERM => $$tts_object{"pid"};
 115     }
 116     elsif ($$tts_object{'name'} eq 'sapi') {
 117         print({$$tts_object{"stdin"}} "QUIT\r\n");
 118         close($$tts_object{"stdin"});
 119     }
 120 }
 121
 122 # Apply corrections to a voice-string to make it sound better
 123 sub correct_string {
 124     our $verbose;
 125     my ($string, $language, $tts_object) = @_;
 126     my $orig = $string;
 127     my $corrections = $tts_object->{"corrections"};
 128
 129     foreach (@$corrections) {
 130         my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
 131                 . $_->{replace} . $_->{separator} . $_->{modifier};
 132         eval ('$string =~' . "$r;");
 133     }
 134     if ($orig ne $string) {
 135         printf("%s -> %s\n", $orig, $string) if $verbose;
 136     }
 137     return $string;
 138 }
 139
 140 # Produce a wav file of the text given
 141 sub voicestring {
 142     our $verbose;
 143     my ($string, $output, $tts_engine_opts, $tts_object) = @_;
 144     my $cmd;
 145     my $name = $$tts_object{'name'};
 146     printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
 147     if ($name eq 'festival') {
 148         # festival_client lies to us, so we have to do awful soul-eating
 149         # work with IPC::open3()
 150         $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
 151         # Use festival-prolog.scm if it's there (created by user of tools/configure)
 152         if (-f "festival-prolog.scm") {
 153             $cmd .= " --prolog festival-prolog.scm";
 154         }
 155         print("> $cmd\n") if $verbose;
 156         # Open command, and filehandles for STDIN, STDOUT, STDERR
 157         my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
 158         # Put the string to speak into STDIN and close it
 159         print(CMD_IN $string);
 160         close(CMD_IN);
 161         # Read all output from festival_client (because it LIES TO US)
 162         while (<CMD_ERR>) {
 163         }
 164         close(CMD_OUT);
 165         close(CMD_ERR);
 166     }
 167     elsif ($name eq 'flite') {
 168         $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
 169         print("> $cmd\n") if $verbose;
 170         `$cmd`;
 171     }
 172     elsif ($name eq 'espeak') {
 173         $cmd = "espeak $tts_engine_opts -w \"$output\"";
 174         print("> $cmd\n") if $verbose;
 175         open(ESPEAK, "| $cmd");
 176         print ESPEAK $string . "\n";
 177         close(ESPEAK);
 178     }
 179     elsif ($name eq 'sapi') {
 180         print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
 181     }
 182     elsif ($name eq 'swift') {
 183         $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
 184         print("> $cmd\n") if $verbose;
 185         system($cmd);
 186     }
 187 }
 188
 189 # trim leading / trailing silence from the clip
 190 sub wavtrim {
 191     our $verbose;
 192     my ($file, $threshold, $tts_object) = @_;
 193     printf("Trim \"%s\"\n", $file) if $verbose;
 194     my $cmd = "wavtrim \"$file\" $threshold";
 195     if ($$tts_object{"name"} eq "sapi") {
 196         print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
 197     }
 198     else {
 199         print("> $cmd\n") if $verbose;
 200         `$cmd`;
 201     }
 202 }
 203
 204 # Encode a wav file into the given destination file
 205 sub encodewav {
 206     our $verbose;
 207     my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
 208     printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
 209     my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
 210     if ($$tts_object{"name"} eq "sapi") {
 211         print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
 212     }
 213     else {
 214         print("> $cmd\n") if $verbose;
 215         `$cmd`;
 216     }
 217 }
 218
 219 # synchronize the clip generation / processing if it's running in another process
 220 sub synchronize {
 221     my ($tts_object) = @_;
 222     if ($$tts_object{"name"} eq "sapi") {
 223         print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
 224         my $wait = readline($$tts_object{"stdout"});
 225         #ignore what's actually returned
 226     }
 227 }
 228
 229 # Run genlang and create voice clips for each string
 230 sub generateclips {
 231     our $verbose;
 232     my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
 233     my $english = dirname($0) . '/../apps/lang/english.lang';
 234     my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
 235     my $correctionsfile = dirname($0) . '/voice-corrections.txt';
 236     my $id = '';
 237     my $voice = '';
 238     my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
 239     my $pool_file;
 240     open(VOICEFONTIDS, "> voicefontids");
 241     my $i = 0;
 242     local $| = 1; # make progress indicator work reliably
 243
 244     my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
 245     # add string corrections to tts_object.
 246     my @corrects = ();
 247     open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
 248     while(<VOICEREGEXP>) {
 249         # get first character of line
 250         my $line = $_;
 251         my $separator = substr($_, 0, 1);
 252         if($separator =~ m/\s+/) {
 253             next;
 254         }
 255         chomp($line);
 256         $line =~ s/^.//g; # remove separator at beginning
 257         my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
 258
 259         # does language match?
 260         if($language !~ m/$lang/) {
 261             next;
 262         }
 263         if($$tts_object{"name"} !~ m/$engine/) {
 264             next;
 265         }
 266         my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
 267         if($v !~ m/$vendor/) {
 268             next;
 269         }
 270         push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
 271
 272     }
 273     close(VOICEREGEXP);
 274     $tts_object->{corrections} = [@corrects];
 275
 276     print("Generating voice clips");
 277     print("\n") if $verbose;
 278     for (`$cmd`) {
 279         my $line = $_;
 280         print(VOICEFONTIDS $line);
 281         if ($line =~ /^id: (.*)$/) {
 282             $id = $1;
 283         }
 284         elsif ($line =~ /^voice: "(.*)"$/) {
 285             $voice = $1;
 286             if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
 287                 my $wav = $id . '.wav';
 288                 my $mp3 = $id . '.mp3';
 289
 290                 # Print some progress information
 291                 if (++$i % 10 == 0 and !$verbose) {
 292                     print(".");
 293                 }
 294
 295                 # Apply corrections to the string
 296                 $voice = correct_string($voice, $language, $tts_object);
 297
 298                 # If we have a pool of snippets, see if the string exists there first
 299                 if (defined($ENV{'POOL'})) {
 300                     $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
 301                                          md5_hex("$voice $tts_engine $tts_engine_opts $encoder_opts"),
 302                                          $language);
 303                     if (-f $pool_file) {
 304                         printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
 305                         copy($pool_file, $mp3);
 306                     }
 307                 }
 308
 309                 # Don't generate MP3 if it already exists (probably from the POOL)
 310                 if (! -f $mp3) {
 311                     if ($id eq "VOICE_PAUSE") {
 312                         print("Use distributed $wav\n") if $verbose;
 313                         copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
 314                     }
 315                     else {
 316                         voicestring($voice, $wav, $tts_engine_opts, $tts_object);
 317                         wavtrim($wav, 500, $tts_object);
 318                         # 500 seems to be a reasonable default for now
 319                     }
 320
 321                     encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
 322                     synchronize($tts_object);
 323                     if (defined($ENV{'POOL'})) {
 324                         copy($mp3, $pool_file);
 325                     }
 326                     unlink($wav);
 327                 }
 328                 $voice = "";
 329                 $id = "";
 330             }
 331         }
 332     }
 333     print("\n");
 334     close(VOICEFONTIDS);
 335     shutdown_tts($tts_object);
 336 }
 337
 338 # Assemble the voicefile
 339 sub createvoice {
 340     our $verbose;
 341     my ($language, $target_id) = @_;
 342     my $outfile = "";
 343     $outfile = sprintf("%s.voice", $language);
 344     printf("Saving voice file to %s\n", $outfile) if $verbose;
 345     my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
 346     print("> $cmd\n") if $verbose;
 347     my $output = `$cmd`;
 348     print($output) if $verbose;
 349 }
 350
 351 sub deletemp3s() {
 352     for (glob('*.mp3')) {
 353         unlink($_);
 354     }
 355     for (glob('*.wav')) {
 356         unlink($_);
 357     }
 358 }
 359
 360 sub panic_cleanup {
 361     deletemp3s();
 362     die "moo";
 363 }
 364
 365 # Generate .talk clips
 366 sub gentalkclips {
 367     our $verbose;
 368     my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
 369     my $d = new DirHandle $dir;
 370     while (my $file = $d->read) {
 371         my ($voice, $wav, $mp3);
 372         # Print some progress information
 373         if (++$i % 10 == 0 and !$verbose) {
 374             print(".");
 375         }
 376
 377         # Convert to a complete path
 378         my $path = sprintf("%s/%s", $dir, $file);
 379
 380         $voice = $file;
 381         $wav = sprintf("%s.talk.wav", $path);
 382
 383         # Ignore dot-dirs and talk files
 384         if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
 385             next;
 386         }
 387         # Element is a dir
 388         if ( -d $path) {
 389             gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
 390             $mp3 = sprintf("%s/_dirname.talk", $path);
 391         }
 392         # Element is a file
 393         else {
 394             $mp3 = sprintf("%s.talk", $path);
 395             $voice =~ s/\.[^\.]*$//; # Trim extension
 396         }
 397
 398         printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
 399
 400         voicestring($voice, $wav, $tts_engine_opts, $tts_object);
 401         wavtrim($wav, 500, $tts_object);
 402         # 500 seems to be a reasonable default for now
 403         encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
 404         synchronize($tts_object);
 405         unlink($wav);
 406     }
 407 }
 408
 409
 410 # Check parameters
 411 my $printusage = 0;
 412 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
 413 if (defined($V)) {
 414     unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
 415     unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
 416     unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
 417 }
 418 elsif (defined($C)) {
 419     unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
 420 }
 421 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
 422 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
 423 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
 424 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
 425 if ($printusage == 1) { printusage(); exit 1; }
 426
 427 if (defined($v) or defined($ENV{'V'})) {
 428     our $verbose = 1;
 429 }
 430
 431 # add the tools dir to the path temporarily, for calling various tools
 432 $ENV{'PATH'} = dirname($0) . ':' . $ENV{'PATH'};
 433
 434
 435 # Do what we're told
 436 if ($V == 1) {
 437     # Only do the panic cleanup for voicefiles
 438     $SIG{INT} = \&panic_cleanup;
 439     $SIG{KILL} = \&panic_cleanup;
 440
 441     printf("Generating voice\n  Target: %s\n  Language: %s\n  Encoder (options): %s (%s)\n  TTS Engine (options): %s (%s)\n",
 442         $t, $l, $e, $E, $s, $S);
 443     generateclips($l, $t, $e, $E, $s, $S);
 444     createvoice($l, $i);
 445     deletemp3s();
 446 }
 447 elsif ($C) {
 448     printf("Generating .talk clips\n  Path: %s\n  Language: %s\n  Encoder (options): %s (%s)\n  TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
 449     my $tts_object = init_tts($s, $S, $l);
 450     gentalkclips($ARGV[0], $tts_object, $e, $E, $S, 0);
 451     shutdown_tts($tts_object);
 452 }
 453 else {
 454     printusage();
 455     exit 1;
 456 }