tools/voice.pl

   1 #!/usr/bin/perl -s
   2 #             __________               __   ___.
   3 #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 #                     \/            \/     \/    \/            \/
   8 # $Id:
   9 #
  10 # Copyright (C) 2007 Jonas Häggqvist
  11 #
  12 # All files in this archive are subject to the GNU General Public License.
  13 # See the file COPYING in the source tree root for full license agreement.
  14 #
  15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  16 # KIND, either express or implied.
  17
  18 use strict;
  19 use warnings;
  20 use File::Basename;
  21 use File::Copy;
  22 use Switch;
  23 use vars qw($V $C $t $l $e $E $s $S $i $v);
  24 use IPC::Open3;
  25 use Digest::MD5 qw(md5_hex);
  26
  27 sub printusage {
  28     print <<USAGE
  29
  30 Usage: voice.pl [options] [path to dir]
  31  -V
  32     Create voice file. You must also specify -t and -l.
  33
  34  -C
  35     Create .talk clips.
  36
  37  -t=<target>
  38     Specify which target you want to build voicefile for. Must include
  39     any features that target supports.
  40
  41  -i=<target_id>
  42     Numeric target id. Needed for voice building.
  43
  44  -l=<language>
  45     Specify which language you want to build. Without .lang extension.
  46
  47  -e=<encoder>
  48     Which encoder to use for voice strings
  49
  50  -E=<encoder options>
  51     Which encoder options to use when compressing voice strings. Enclose
  52     in double quotes if the options include spaces.
  53
  54  -s=<TTS engine>
  55     Which TTS engine to use.
  56
  57  -S=<TTS engine options>
  58     Options to pass to the TTS engine. Enclose in double quotes if the
  59     options include spaces.
  60
  61  -v
  62     Be verbose
  63 USAGE
  64 ;
  65 }
  66
  67 # Initialize TTS engine. May return an object or value which will be passed
  68 # to voicestring and shutdown_tts
  69 sub init_tts {
  70     our $verbose;
  71     my ($tts_engine, $tts_engine_opts, $language) = @_;
  72     my $ret = undef;
  73     switch($tts_engine) {
  74         case "festival" {
  75             print("> festival $tts_engine_opts --server\n") if $verbose;
  76             my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
  77             $ret = *FESTIVAL_SERVER;
  78             $ret = $pid;
  79             $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
  80             $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
  81         }
  82         case "sapi5" {
  83             my $toolsdir = dirname($0);
  84             my $path = `cygpath $toolsdir -a -w`;
  85             chomp($path);
  86             $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts";
  87             $path =~ s/\\/\\\\/g;
  88             print("> cscript /B $path\n") if $verbose;
  89             my $pid = open(F, "| cscript /B $path");
  90             $ret = *F;
  91             $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
  92             $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
  93         }
  94     }
  95     return $ret;
  96 }
  97
  98 # Shutdown TTS engine if necessary.
  99 sub shutdown_tts {
 100     my ($tts_engine, $tts_object) = @_;
 101     switch($tts_engine) {
 102         case "festival" {
 103             # Send SIGTERM to festival server
 104             kill TERM => $tts_object;
 105         }
 106         case "sapi5" {
 107             print($tts_object "\r\n\r\n");
 108             close($tts_object);
 109         }
 110     }
 111 }
 112
 113 # Apply corrections to a voice-string to make it sound better
 114 sub correct_string {
 115     our $verbose;
 116     my ($string, $language, $tts_engine) = @_;
 117     my $orig = $string;
 118     switch($language) {
 119         # General for all engines and languages (perhaps - just an example)
 120         $string =~ s/USB/U S B/;
 121
 122         case ("deutsch") {
 123             switch($tts_engine) {
 124                 $string =~ s/alphabet/alfabet/;
 125                 $string =~ s/alkaline/alkalein/;
 126                 $string =~ s/ampere/amper/;
 127                 $string =~ s/byte(s?)\b/beit$1/;
 128                 $string =~ s/\bdezibel\b/de-zibell/;
 129                 $string =~ s/energie\b/ener-gie/;
 130                 $string =~ s/\bflash\b/fläsh/g;
 131                 $string =~ s/\bfirmware(s?)\b/firmwer$1/;
 132                 $string =~ s/\bid3 tag\b/id3 täg/g; # can't just use "tag" here
 133                 $string =~ s/\bloudness\b/laudness/;
 134                 $string =~ s/\bnumerisch\b/numehrisch/;
 135                 $string =~ s/\brücklauf\b/rück-lauf/;
 136                 $string =~ s/\bsuchlauf\b/such-lauf/;
 137             }
 138         }
 139     }
 140     if ($orig ne $string) {
 141         printf("%s -> %s\n", $orig, $string) if $verbose;
 142     }
 143     return $string;
 144 }
 145
 146 # Produce a wav file of the text given
 147 sub voicestring {
 148     our $verbose;
 149     my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_;
 150     my $cmd;
 151     printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose;
 152     switch($tts_engine) {
 153         case "festival" {
 154             # festival_client lies to us, so we have to do awful soul-eating
 155             # work with IPC::open3()
 156             $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
 157             print("> $cmd\n") if $verbose;
 158             # Open command, and filehandles for STDIN, STDOUT, STDERR
 159             my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
 160             # Put the string to speak into STDIN and close it
 161             print(CMD_IN $string);
 162             close(CMD_IN);
 163             # Read all output from festival_client (because it LIES TO US)
 164             while (<CMD_ERR>) {
 165             }
 166             close(CMD_OUT);
 167             close(CMD_ERR);
 168         }
 169         case "flite" {
 170             $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
 171             print("> $cmd\n") if $verbose;
 172             `$cmd`;
 173         }
 174         case "espeak" {
 175             # xxx: $tts_engine_opts isn't used
 176             $cmd = "espeak $tts_engine_opts -w $output";
 177             print("> $cmd\n") if $verbose;
 178             open(ESPEAK, "| $cmd");
 179             print ESPEAK $string . "\n";
 180             close(ESPEAK);
 181         }
 182         case "sapi5" {
 183             print($tts_object sprintf("%s\r\n%s\r\n", $string, $output));
 184         }
 185     }
 186 }
 187
 188 # Encode a wav file into the given destination file
 189 sub encodewav {
 190     our $verbose;
 191     my ($input, $output, $encoder, $encoder_opts) = @_;
 192     printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
 193     switch ($encoder) {
 194         case 'lame' {
 195             my $cmd = "lame $encoder_opts \"$input\" \"$output\"";
 196             print("> $cmd\n") if $verbose;
 197             `lame $encoder_opts "$input" "$output"`;
 198             `$cmd`;
 199         }
 200         case 'vorbis' {
 201             `oggenc $encoder_opts "$input" -o "$output"`;
 202         }
 203         case 'speexenc' {
 204             `speexenc $encoder_opts "$input" "$output"`;
 205         }
 206     }
 207 }
 208
 209 sub wavtrim {
 210     our $verbose;
 211     my ($file) = @_;
 212     my $cmd = dirname($0) . "/wavtrim \"$file\"";
 213     print("> $cmd\n") if $verbose;
 214     `$cmd`;
 215 }
 216
 217 # Run genlang and create voice clips for each string
 218 sub generateclips {
 219     our $verbose;
 220     my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
 221     my $genlang = dirname($0) . '/genlang';
 222     my $english = dirname($0) . '/../apps/lang/english.lang';
 223     my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
 224     my $id = '';
 225     my $voice = '';
 226     my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null";
 227     my $pool_file;
 228     open(VOICEFONTIDS, "> voicefontids");
 229     my $i = 0;
 230
 231     my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
 232     print("Generating voice clips");
 233     print("\n") if $verbose;
 234     for (`$cmd`) {
 235         my $line = $_;
 236         print(VOICEFONTIDS $line);
 237         if ($line =~ /^id: (.*)$/) {
 238             $id = $1;
 239         }
 240         elsif ($line =~ /^voice: "(.*)"$/) {
 241             $voice = $1;
 242             if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
 243                 my $wav = $id . '.wav';
 244                 my $mp3 = $id . '.mp3';
 245
 246                 # Print some progress information
 247                 if (++$i % 10 == 0 and !$verbose) {
 248                     print(".");
 249                 }
 250
 251                 # Apply corrections to the string
 252                 $voice = correct_string($voice);
 253
 254                 # If we have a pool of snippes, see if the string exists there first
 255                 if (defined($ENV{'POOL'})) {
 256                     $pool_file = sprintf("%s/%s-%s-%s.mp3", $ENV{'POOL'}, md5_hex($voice), $language, $tts_engine);
 257                     if (-f $pool_file) {
 258                         printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
 259                         copy($pool_file, $mp3);
 260                     }
 261                 }
 262
 263                 # Don't generate MP3 if it already exists (probably from the POOL)
 264                 if (! -f $mp3) {
 265                     if ($id eq "VOICE_PAUSE") {
 266                         print("Use distributed $wav\n") if $verbose;
 267                         copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
 268                     }
 269                     else {
 270                         voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object);
 271                         wavtrim($wav, 500); # 500 seems to be a reasonable default for now
 272                     }
 273
 274                     encodewav($wav, $mp3, $encoder, $encoder_opts);
 275                     if (defined($ENV{'POOL'})) {
 276                         copy($mp3, $pool_file);
 277                     }
 278                     unlink($wav);
 279                 }
 280                 $voice = "";
 281                 $id = "";
 282             }
 283         }
 284     }
 285     print("\n");
 286     close(VOICEFONTIDS);
 287     shutdown_tts($tts_engine, $tts_object);
 288 }
 289
 290 # Assemble the voicefile
 291 sub createvoice {
 292     our $verbose;
 293     my ($language, $target_id) = @_;
 294     my $voicefont = dirname($0) . '/voicefont';
 295     my $outfile = "";
 296     my $i = 0;
 297     do {
 298         $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ? '' : '-'.$i));
 299     } while (-f $outfile);
 300     printf("Saving voice file to %s\n", $outfile) if $verbose;
 301     my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile";
 302     print("> $cmd\n") if $verbose;
 303     my $output = `$cmd`;
 304     print($output) if $verbose;
 305 }
 306
 307 sub deletemp3s() {
 308     for (glob('*.mp3')) {
 309         unlink($_);
 310     }
 311     for (glob('*.wav')) {
 312         unlink($_);
 313     }
 314 }
 315
 316 sub panic_cleanup {
 317     deletemp3s();
 318     die "moo";
 319 }
 320
 321 # Check parameters
 322 my $printusage = 0;
 323 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
 324 if (defined($V)) {
 325     unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
 326     unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
 327     unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
 328 }
 329 elsif (defined($C)) {
 330     unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
 331 }
 332 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
 333 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
 334 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
 335 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
 336 if ($printusage == 1) { printusage(); exit 1; }
 337
 338 $SIG{INT} = \&panic_cleanup;
 339 $SIG{KILL} = \&panic_cleanup;
 340
 341 if (defined($v) or defined($ENV{'V'})) {
 342     our $verbose = 1;
 343 }
 344
 345
 346 # Do what we're told
 347 if ($V == 1) {
 348     printf("Generating voice\n  Target: %s\n  Language: %s\n  Encoder (options): %s (%s)\n  TTS Engine (options): %s (%s)\n",
 349         $t, $l, $e, $E, $s, $S);
 350     generateclips($l, $t, $e, $E, $s, $S);
 351     createvoice($l, $i);
 352     deletemp3s();
 353 }
 354 elsif ($C) {
 355     # xxx: Implement .talk clip generation
 356 }
 357 else {
 358     printusage();
 359     exit 1;
 360 }