3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
28 use open ':encoding(utf8)';
34 Usage: voice.pl [options] [path to dir]
36 Create voice file. You must also specify -t and -l.
42 Specify which target you want to build voicefile for. Must include
43 any features that target supports.
46 Numeric target id. Needed for voice building.
49 Specify which language you want to build. Without .lang extension.
52 Which encoder to use for voice strings
55 Which encoder options to use when compressing voice strings. Enclose
56 in double quotes if the options include spaces.
59 Which TTS engine to use.
61 -S=<TTS engine options>
62 Options to pass to the TTS engine. Enclose in double quotes if the
63 options include spaces.
71 # Initialize TTS engine. May return an object or value which will be passed
72 # to voicestring and shutdown_tts
75 my ($tts_engine, $tts_engine_opts, $language) = @_;
76 my %ret = ("name" => $tts_engine);
79 print("> festival $tts_engine_opts --server\n") if $verbose;
80 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
81 my $dummy = *FESTIVAL_SERVER
; #suppress warning
82 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
83 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
87 my $toolsdir = dirname
($0);
88 my $path = `cygpath $toolsdir -a -w`;
91 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
93 print("> cscript //nologo $cmd\n") if $verbose;
94 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
95 binmode(*CMD_IN
, ':encoding(utf16le)');
96 binmode(*CMD_OUT
, ':encoding(utf16le)');
97 $SIG{INT
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
98 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
99 print(CMD_IN
"QUERY\tVENDOR\r\n");
100 my $vendor = readline(*CMD_OUT
);
104 "stdout" => *CMD_OUT
,
105 "vendor" => $vendor);
111 # Shutdown TTS engine if necessary.
113 my ($tts_object) = @_;
114 switch
($$tts_object{"name"}) {
116 # Send SIGTERM to festival server
117 kill TERM
=> $$tts_object{"pid"};
120 print({$$tts_object{"stdin"}} "QUIT\r\n");
121 close($$tts_object{"stdin"});
126 # Apply corrections to a voice-string to make it sound better
129 my ($string, $language, $tts_object) = @_;
132 # General for all engines and languages
133 $string =~ s/USB/U S B/g;
134 $string =~ s/ID3/I D 3/g;
137 switch
($$tts_object{"name"}) {
138 case
["sapi","festival"] {
139 $string =~ s/plugin(s?)/plug-in$1/ig; next
142 $string =~ s/\ba\b/ay/ig;
148 # for all german engines (e.g. for english words)
149 $string =~ s/alkaline/alkalein/ig;
150 $string =~ s/byte(s?)/beit$1/ig;
151 $string =~ s/clip(s?)/klipp$1/ig;
152 $string =~ s/\bcover/kawwer/ig;
153 $string =~ s/cuesheet/kjuschiet/ig;
154 $string =~ s/dither/didder/ig;
155 $string =~ s/equalizer/iquileiser/ig;
156 $string =~ s/\bflash\b/fläsh/ig;
157 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
158 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
159 $string =~ s/\bloudness\b/laudness/ig;
160 $string =~ s/\bunicode\b/unikod/ig;
161 switch
($$tts_object{"name"}) {
162 case
"sapi" { # just for SAPI
163 switch
($$tts_object{"vendor"}) {
165 $string =~ s/alphabet/alfabet/ig;
166 $string =~ s/ampere/amper/ig;
167 $string =~ s/\bdezibel\b/de-zibell/ig;
168 $string =~ s/diddering/didde-ring/ig;
169 $string =~ s/energie\b/ener-gie/ig;
170 $string =~ s/\Blauf\b/-lauf/ig;
171 $string =~ s/\bnumerisch\b/numehrisch/ig;
178 # for all swedish engines (e.g. for english words)
179 $string =~ s/kilobyte/kilobajt/ig;
180 $string =~ s/megabyte/megabajt/ig;
181 $string =~ s/gigabyte/gigabajt/ig;
182 $string =~ s/\bloudness\b/laudness/ig;
184 switch
($$tts_object{"name"}) {
185 case
"espeak" { # just for eSpeak
186 $string =~ s/ampere/ampär/ig;
187 $string =~ s/bokmärken/bok-märken/ig;
188 $string =~ s/generella/schenerella/ig;
189 $string =~ s/dithering/diddering/ig;
190 $string =~ s/\bunicode\b/jynikod/ig;
191 $string =~ s/uttoning/utoning/ig;
192 $string =~ s/procent/pro-cent/ig;
193 $string =~ s/spellistor/spelistor/ig;
194 $string =~ s/cuesheet/qjyschiit/ig;
199 # for all italian engines (e.g. for english words)
200 $string =~ s/Replaygain/Ripleyghein/ig;
201 $string =~ s/Crossfade/Crossfeid/ig;
202 $string =~ s/beep/Bip/ig;
203 $string =~ s/cuesheet/chiushit/ig;
204 $string =~ s/fade/feid/ig;
205 $string =~ s/Crossfeed/crossfid/ig;
206 $string =~ s/Cache/chash/ig;
207 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
208 $string =~ s/\bFile(s?)\b/fail$1/ig;
209 $string =~ s/\bloudness\b/laudness/ig;
210 $string =~ s/\bunicode\b/unikod/ig;
211 $string =~ s/Playlist/pleylist/ig;
212 $string =~ s/WavPack/wave pak/ig;
213 $string =~ s/BITRATE/bit reit/ig;
214 $string =~ s/Codepage/cod page/ig;
215 $string =~ s/PCM Wave/pcm Ue'iv/ig;
216 switch
($$tts_object{"name"}) {
217 case
"sapi" { # just for SAPI
218 switch
($$tts_object{"vendor"}) {
220 $string =~ s/Inizializza/inizializa/ig;
222 case
"ScanSoft, Inc" {
225 $string =~ s/stop/stohp/ig;
232 if ($orig ne $string) {
233 printf("%s -> %s\n", $orig, $string) if $verbose;
238 # Produce a wav file of the text given
241 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
243 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
244 switch
($$tts_object{"name"}) {
246 # festival_client lies to us, so we have to do awful soul-eating
247 # work with IPC::open3()
248 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
249 # Use festival-prolog.scm if it's there (created by user of tools/configure)
250 if (-f
"festival-prolog.scm") {
251 $cmd .= " --prolog festival-prolog.scm";
253 print("> $cmd\n") if $verbose;
254 # Open command, and filehandles for STDIN, STDOUT, STDERR
255 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
256 # Put the string to speak into STDIN and close it
257 print(CMD_IN
$string);
259 # Read all output from festival_client (because it LIES TO US)
266 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
267 print("> $cmd\n") if $verbose;
271 $cmd = "espeak $tts_engine_opts -w \"$output\"";
272 print("> $cmd\n") if $verbose;
273 open(ESPEAK
, "| $cmd");
274 print ESPEAK
$string . "\n";
278 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
281 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
282 print("> $cmd\n") if $verbose;
288 # trim leading / trailing silence from the clip
291 my ($file, $threshold, $tts_object) = @_;
292 printf("Trim \"%s\"\n", $file) if $verbose;
293 my $cmd = "wavtrim \"$file\" $threshold";
294 if ($$tts_object{"name"} eq "sapi") {
295 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
298 print("> $cmd\n") if $verbose;
303 # Encode a wav file into the given destination file
306 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
307 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
308 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
309 if ($$tts_object{"name"} eq "sapi") {
310 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
313 print("> $cmd\n") if $verbose;
318 # synchronize the clip generation / processing if it's running in another process
320 my ($tts_object) = @_;
321 if ($$tts_object{"name"} eq "sapi") {
322 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
323 my $wait = readline($$tts_object{"stdout"});
324 #ignore what's actually returned
328 # Run genlang and create voice clips for each string
331 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
332 my $english = dirname
($0) . '/../apps/lang/english.lang';
333 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
336 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
338 open(VOICEFONTIDS
, "> voicefontids");
340 local $| = 1; # make progress indicator work reliably
342 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
343 print("Generating voice clips");
344 print("\n") if $verbose;
347 print(VOICEFONTIDS
$line);
348 if ($line =~ /^id: (.*)$/) {
351 elsif ($line =~ /^voice: "(.*)"$/) {
353 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
354 my $wav = $id . '.wav';
355 my $mp3 = $id . '.mp3';
357 # Print some progress information
358 if (++$i % 10 == 0 and !$verbose) {
362 # Apply corrections to the string
363 $voice = correct_string
($voice, $language, $tts_object);
365 # If we have a pool of snippets, see if the string exists there first
366 if (defined($ENV{'POOL'})) {
367 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
368 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
371 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
372 copy
($pool_file, $mp3);
376 # Don't generate MP3 if it already exists (probably from the POOL)
378 if ($id eq "VOICE_PAUSE") {
379 print("Use distributed $wav\n") if $verbose;
380 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
383 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
384 wavtrim
($wav, 500, $tts_object);
385 # 500 seems to be a reasonable default for now
388 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
389 synchronize
($tts_object);
390 if (defined($ENV{'POOL'})) {
391 copy
($mp3, $pool_file);
402 shutdown_tts
($tts_object);
405 # Assemble the voicefile
408 my ($language, $target_id) = @_;
410 $outfile = sprintf("%s.voice", $language);
411 printf("Saving voice file to %s\n", $outfile) if $verbose;
412 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
413 print("> $cmd\n") if $verbose;
415 print($output) if $verbose;
419 for (glob('*.mp3')) {
422 for (glob('*.wav')) {
432 # Generate .talk clips
435 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
436 my $d = new DirHandle
$dir;
437 while (my $file = $d->read) {
438 my ($voice, $wav, $mp3);
439 # Print some progress information
440 if (++$i % 10 == 0 and !$verbose) {
444 # Convert to a complete path
445 my $path = sprintf("%s/%s", $dir, $file);
448 $wav = sprintf("%s.talk.wav", $path);
450 # Ignore dot-dirs and talk files
451 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
456 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
457 $mp3 = sprintf("%s/_dirname.talk", $path);
461 $mp3 = sprintf("%s.talk", $path);
462 $voice =~ s/\.[^\.]*$//; # Trim extension
465 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
467 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
468 wavtrim
($wav, 500, $tts_object);
469 # 500 seems to be a reasonable default for now
470 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
471 synchronize
($tts_object);
479 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
481 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
482 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
483 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
485 elsif (defined($C)) {
486 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
488 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
489 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
490 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
491 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
492 if ($printusage == 1) { printusage
(); exit 1; }
494 if (defined($v) or defined($ENV{'V'})) {
498 # add the tools dir to the path temporarily, for calling various tools
499 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
504 # Only do the panic cleanup for voicefiles
505 $SIG{INT
} = \
&panic_cleanup
;
506 $SIG{KILL
} = \
&panic_cleanup
;
508 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
509 $t, $l, $e, $E, $s, $S);
510 generateclips
($l, $t, $e, $E, $s, $S);
515 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
516 my $tts_object = init_tts
($s, $S, $l);
517 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
518 shutdown_tts
($tts_object);