3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
32 Usage: voice.pl [options] [path to dir]
34 Create voice file. You must also specify -t and -l.
40 Specify which target you want to build voicefile for. Must include
41 any features that target supports.
44 Numeric target id. Needed for voice building.
47 Specify which language you want to build. Without .lang extension.
50 Which encoder to use for voice strings
53 Which encoder options to use when compressing voice strings. Enclose
54 in double quotes if the options include spaces.
57 Which TTS engine to use.
59 -S=<TTS engine options>
60 Options to pass to the TTS engine. Enclose in double quotes if the
61 options include spaces.
69 # Initialize TTS engine. May return an object or value which will be passed
70 # to voicestring and shutdown_tts
73 my ($tts_engine, $tts_engine_opts, $language) = @_;
74 my %ret = ("name" => $tts_engine);
77 print("> festival $tts_engine_opts --server\n") if $verbose;
78 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
79 my $dummy = *FESTIVAL_SERVER
; #suppress warning
80 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
81 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
85 my $toolsdir = dirname
($0);
86 my $path = `cygpath $toolsdir -a -w`;
89 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
91 print("> cscript //nologo $cmd\n") if $verbose;
92 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
93 $SIG{INT
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
94 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
95 print(CMD_IN
"QUERY\tVENDOR\r\n");
96 my $vendor = readline(CMD_OUT
);
100 "stdout" => *CMD_OUT
,
101 "vendor" => $vendor);
107 # Shutdown TTS engine if necessary.
109 my ($tts_object) = @_;
110 switch
($$tts_object{"name"}) {
112 # Send SIGTERM to festival server
113 kill TERM
=> $$tts_object{"pid"};
116 print({$$tts_object{"stdin"}} "QUIT\r\n");
117 close($$tts_object{"stdin"});
122 # Apply corrections to a voice-string to make it sound better
125 my ($string, $language, $tts_object) = @_;
128 # General for all engines and languages
129 $string =~ s/USB/U S B/g;
130 $string =~ s/ID3/I D 3/g;
133 switch
($$tts_object{"name"}) {
134 case
["sapi","festival"] {
135 $string =~ s/plugin(s?)/plug-in$1/ig;
140 # for all german engines (e.g. for english words)
141 $string =~ s/alkaline/alkalein/ig;
142 $string =~ s/byte(s?)/beit$1/ig;
143 $string =~ s/clip(s?)/klipp$1/ig;
144 $string =~ s/cuesheet/kjuschiet/ig;
145 $string =~ s/dither/didder/ig;
146 $string =~ s/equalizer/iquileiser/ig;
147 $string =~ s/\bflash\b/fläsh/ig;
148 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
149 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
150 $string =~ s/\bloudness\b/laudness/ig;
151 $string =~ s/\bunicode\b/unikod/ig;
152 switch
($$tts_object{"name"}) {
153 case
"sapi" { # just for SAPI
154 switch
($$tts_object{"vendor"}) {
156 $string =~ s/alphabet/alfabet/ig;
157 $string =~ s/ampere/amper/ig;
158 $string =~ s/\bdezibel\b/de-zibell/ig;
159 $string =~ s/diddering/didde-ring/ig;
160 $string =~ s/energie\b/ener-gie/ig;
161 $string =~ s/\bnumerisch\b/numehrisch/ig;
162 $string =~ s/\brücklauf\b/rück-lauf/ig;
163 $string =~ s/\bsuchlauf\b/such-lauf/ig;
170 # for all swedish engines (e.g. for english words)
171 $string =~ s/kilobyte/kilobajt/ig;
172 $string =~ s/megabyte/megabajt/ig;
173 $string =~ s/gigabyte/gigabajt/ig;
174 $string =~ s/\bloudness\b/laudness/ig;
176 switch
($$tts_object{"name"}) {
177 case
"espeak" { # just for eSpeak
178 $string =~ s/ampere/ampär/ig;
179 $string =~ s/bokmärken/bok-märken/ig;
180 $string =~ s/generella/schenerella/ig;
181 $string =~ s/dithering/diddering/ig;
182 $string =~ s/\bunicode\b/jynikod/ig;
183 $string =~ s/uttoning/utoning/ig;
184 $string =~ s/procent/pro-cent/ig;
185 $string =~ s/spellistor/spelistor/ig;
186 $string =~ s/cuesheet/qjyschiit/ig;
191 # for all italian engines (e.g. for english words)
192 $string =~ s/Replaygain/Ripleyghein/ig;
193 $string =~ s/Crossfade/Crossfeid/ig;
194 $string =~ s/beep/Bip/ig;
195 $string =~ s/cuesheet/chiushit/ig;
196 $string =~ s/fade/feid/ig;
197 $string =~ s/Crossfeed/crossfid/ig;
198 $string =~ s/Cache/chash/ig;
199 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
200 $string =~ s/\bFile(s?)\b/fail$1/ig;
201 $string =~ s/\bloudness\b/laudness/ig;
202 $string =~ s/\bunicode\b/unikod/ig;
203 $string =~ s/Playlist/pleylist/ig;
204 $string =~ s/WavPack/wave pak/ig;
205 $string =~ s/BITRATE/bit reit/ig;
206 $string =~ s/Codepage/cod page/ig;
207 $string =~ s/PCM Wave/pcm ueiv/ig;
208 $string =~ s/è/è/ig;
209 $string =~ s/\b(s*)ù\b/$1ù/ig;
210 $string =~ s/\b(s*)Ã\b/$1à/ig;
211 switch
($$tts_object{"name"}) {
212 case
"sapi" { # just for SAPI
213 switch
($$tts_object{"vendor"}) {
215 $string =~ s/Inizializza/inizializa/ig;
217 case
"ScanSoft, Inc" {
220 $string =~ s/stop/stohp/ig;
227 if ($orig ne $string) {
228 printf("%s -> %s\n", $orig, $string) if $verbose;
233 # Produce a wav file of the text given
236 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
238 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
239 switch
($$tts_object{"name"}) {
241 # festival_client lies to us, so we have to do awful soul-eating
242 # work with IPC::open3()
243 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
244 print("> $cmd\n") if $verbose;
245 # Open command, and filehandles for STDIN, STDOUT, STDERR
246 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
247 # Put the string to speak into STDIN and close it
248 print(CMD_IN
$string);
250 # Read all output from festival_client (because it LIES TO US)
257 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
258 print("> $cmd\n") if $verbose;
262 # xxx: $tts_engine_opts isn't used
263 $cmd = "espeak $tts_engine_opts -w \"$output\"";
264 print("> $cmd\n") if $verbose;
265 open(ESPEAK
, "| $cmd");
266 print ESPEAK
$string . "\n";
270 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
273 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
274 print("> $cmd\n") if $verbose;
280 # trim leading / trailing silence from the clip
283 my ($file, $threshold, $tts_object) = @_;
284 printf("Trim \"%s\"\n", $file) if $verbose;
285 my $cmd = "wavtrim \"$file\" $threshold";
286 if ($$tts_object{"name"} eq "sapi") {
287 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
290 print("> $cmd\n") if $verbose;
295 # Encode a wav file into the given destination file
298 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
299 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
300 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
301 if ($$tts_object{"name"} eq "sapi") {
302 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
305 print("> $cmd\n") if $verbose;
310 # synchronize the clip generation / processing if it's running in another process
312 my ($tts_object) = @_;
313 if ($$tts_object{"name"} eq "sapi") {
314 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
315 my $wait = readline($$tts_object{"stdout"});
316 #ignore what's actually returned
320 # Run genlang and create voice clips for each string
323 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
324 my $english = dirname
($0) . '/../apps/lang/english.lang';
325 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
328 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
330 open(VOICEFONTIDS
, "> voicefontids");
332 local $| = 1; # make progress indicator work reliably
334 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
335 print("Generating voice clips");
336 print("\n") if $verbose;
339 print(VOICEFONTIDS
$line);
340 if ($line =~ /^id: (.*)$/) {
343 elsif ($line =~ /^voice: "(.*)"$/) {
345 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
346 my $wav = $id . '.wav';
347 my $mp3 = $id . '.mp3';
349 # Print some progress information
350 if (++$i % 10 == 0 and !$verbose) {
354 # Apply corrections to the string
355 $voice = correct_string
($voice, $language, $tts_object);
357 # If we have a pool of snippets, see if the string exists there first
358 if (defined($ENV{'POOL'})) {
359 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
360 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
363 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
364 copy
($pool_file, $mp3);
368 # Don't generate MP3 if it already exists (probably from the POOL)
370 if ($id eq "VOICE_PAUSE") {
371 print("Use distributed $wav\n") if $verbose;
372 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
375 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
376 wavtrim
($wav, 500, $tts_object);
377 # 500 seems to be a reasonable default for now
380 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
381 synchronize
($tts_object);
382 if (defined($ENV{'POOL'})) {
383 copy
($mp3, $pool_file);
394 shutdown_tts
($tts_object);
397 # Assemble the voicefile
400 my ($language, $target_id) = @_;
404 $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ?
'' : '-'.$i));
405 } while (-f
$outfile);
406 printf("Saving voice file to %s\n", $outfile) if $verbose;
407 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
408 print("> $cmd\n") if $verbose;
410 print($output) if $verbose;
414 for (glob('*.mp3')) {
417 for (glob('*.wav')) {
427 # Generate .talk clips
430 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
431 my $d = new DirHandle
$dir;
432 while (my $file = $d->read) {
433 my ($voice, $wav, $mp3);
434 # Print some progress information
435 if (++$i % 10 == 0 and !$verbose) {
439 # Convert to a complete path
440 my $path = sprintf("%s/%s", $dir, $file);
443 $wav = sprintf("%s.talk.wav", $path);
445 # Ignore dot-dirs and talk files
446 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
451 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
452 $mp3 = sprintf("%s/_dirname.talk", $path);
456 $mp3 = sprintf("%s.talk", $path);
457 $voice =~ s/\.[^\.]*$//; # Trim extension
460 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
462 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
463 wavtrim
($wav, 500, $tts_object);
464 # 500 seems to be a reasonable default for now
465 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
473 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
475 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
476 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
477 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
479 elsif (defined($C)) {
480 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
482 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
483 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
484 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
485 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
486 if ($printusage == 1) { printusage
(); exit 1; }
488 if (defined($v) or defined($ENV{'V'})) {
492 # add the tools dir to the path temporarily, for calling various tools
493 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
498 # Only do the panic cleanup for voicefiles
499 $SIG{INT
} = \
&panic_cleanup
;
500 $SIG{KILL
} = \
&panic_cleanup
;
502 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
503 $t, $l, $e, $E, $s, $S);
504 generateclips
($l, $t, $e, $E, $s, $S);
509 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
510 my $tts_object = init_tts
($s, $S, $l);
511 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
512 shutdown_tts
($tts_object);