3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
32 Usage: voice.pl [options] [path to dir]
34 Create voice file. You must also specify -t and -l.
40 Specify which target you want to build voicefile for. Must include
41 any features that target supports.
44 Numeric target id. Needed for voice building.
47 Specify which language you want to build. Without .lang extension.
50 Which encoder to use for voice strings
53 Which encoder options to use when compressing voice strings. Enclose
54 in double quotes if the options include spaces.
57 Which TTS engine to use.
59 -S=<TTS engine options>
60 Options to pass to the TTS engine. Enclose in double quotes if the
61 options include spaces.
69 # Initialize TTS engine. May return an object or value which will be passed
70 # to voicestring and shutdown_tts
73 my ($tts_engine, $tts_engine_opts, $language) = @_;
74 my %ret = ("name" => $tts_engine);
77 print("> festival $tts_engine_opts --server\n") if $verbose;
78 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
79 my $dummy = *FESTIVAL_SERVER
; #suppress warning
80 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
81 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
85 my $toolsdir = dirname
($0);
86 my $path = `cygpath $toolsdir -a -w`;
89 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
91 print("> cscript //nologo $cmd\n") if $verbose;
92 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
93 $SIG{INT
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
94 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
95 print(CMD_IN
"QUERY\tVENDOR\r\n");
96 my $vendor = readline(CMD_OUT
);
100 "stdout" => *CMD_OUT
,
101 "vendor" => $vendor);
107 # Shutdown TTS engine if necessary.
109 my ($tts_object) = @_;
110 switch
($$tts_object{"name"}) {
112 # Send SIGTERM to festival server
113 kill TERM
=> $$tts_object{"pid"};
116 print({$$tts_object{"stdin"}} "QUIT\r\n");
117 close($$tts_object{"stdin"});
122 # Apply corrections to a voice-string to make it sound better
125 my ($string, $language, $tts_object) = @_;
128 # General for all engines and languages
129 $string =~ s/USB/U S B/g;
130 $string =~ s/ID3/I D 3/g;
133 switch
($$tts_object{"name"}) {
134 case
["sapi","festival"] {
135 $string =~ s/plugin(s?)/plug-in$1/ig;
140 # for all german engines (e.g. for english words)
141 $string =~ s/alkaline/alkalein/ig;
142 $string =~ s/byte(s?)/beit$1/ig;
143 $string =~ s/clip(s?)/klipp$1/ig;
144 $string =~ s/cuesheet/kjuschiet/ig;
145 $string =~ s/dither/didder/ig;
146 $string =~ s/equalizer/iquileiser/ig;
147 $string =~ s/\bflash\b/fläsh/ig;
148 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
149 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
150 $string =~ s/\bloudness\b/laudness/ig;
151 $string =~ s/\bunicode\b/unikod/ig;
152 switch
($$tts_object{"name"}) {
153 case
"sapi" { # just for SAPI
154 switch
($$tts_object{"vendor"}) {
156 $string =~ s/alphabet/alfabet/ig;
157 $string =~ s/ampere/amper/ig;
158 $string =~ s/\bdezibel\b/de-zibell/ig;
159 $string =~ s/diddering/didde-ring/ig;
160 $string =~ s/energie\b/ener-gie/ig;
161 $string =~ s/\Blauf\b/-lauf/ig;
162 $string =~ s/\bnumerisch\b/numehrisch/ig;
169 # for all swedish engines (e.g. for english words)
170 $string =~ s/kilobyte/kilobajt/ig;
171 $string =~ s/megabyte/megabajt/ig;
172 $string =~ s/gigabyte/gigabajt/ig;
173 $string =~ s/\bloudness\b/laudness/ig;
175 switch
($$tts_object{"name"}) {
176 case
"espeak" { # just for eSpeak
177 $string =~ s/ampere/ampär/ig;
178 $string =~ s/bokmärken/bok-märken/ig;
179 $string =~ s/generella/schenerella/ig;
180 $string =~ s/dithering/diddering/ig;
181 $string =~ s/\bunicode\b/jynikod/ig;
182 $string =~ s/uttoning/utoning/ig;
183 $string =~ s/procent/pro-cent/ig;
184 $string =~ s/spellistor/spelistor/ig;
185 $string =~ s/cuesheet/qjyschiit/ig;
190 # for all italian engines (e.g. for english words)
191 $string =~ s/Replaygain/Ripleyghein/ig;
192 $string =~ s/Crossfade/Crossfeid/ig;
193 $string =~ s/beep/Bip/ig;
194 $string =~ s/cuesheet/chiushit/ig;
195 $string =~ s/fade/feid/ig;
196 $string =~ s/Crossfeed/crossfid/ig;
197 $string =~ s/Cache/chash/ig;
198 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
199 $string =~ s/\bFile(s?)\b/fail$1/ig;
200 $string =~ s/\bloudness\b/laudness/ig;
201 $string =~ s/\bunicode\b/unikod/ig;
202 $string =~ s/Playlist/pleylist/ig;
203 $string =~ s/WavPack/wave pak/ig;
204 $string =~ s/BITRATE/bit reit/ig;
205 $string =~ s/Codepage/cod page/ig;
206 $string =~ s/PCM Wave/pcm ueiv/ig;
207 $string =~ s/è/è/ig;
208 $string =~ s/\b(s*)ù\b/$1ù/ig;
209 $string =~ s/\b(s*)Ã\b/$1à/ig;
210 switch
($$tts_object{"name"}) {
211 case
"sapi" { # just for SAPI
212 switch
($$tts_object{"vendor"}) {
214 $string =~ s/Inizializza/inizializa/ig;
216 case
"ScanSoft, Inc" {
219 $string =~ s/stop/stohp/ig;
226 if ($orig ne $string) {
227 printf("%s -> %s\n", $orig, $string) if $verbose;
232 # Produce a wav file of the text given
235 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
237 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
238 switch
($$tts_object{"name"}) {
240 # festival_client lies to us, so we have to do awful soul-eating
241 # work with IPC::open3()
242 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
243 print("> $cmd\n") if $verbose;
244 # Open command, and filehandles for STDIN, STDOUT, STDERR
245 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
246 # Put the string to speak into STDIN and close it
247 print(CMD_IN
$string);
249 # Read all output from festival_client (because it LIES TO US)
256 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
257 print("> $cmd\n") if $verbose;
261 # xxx: $tts_engine_opts isn't used
262 $cmd = "espeak $tts_engine_opts -w \"$output\"";
263 print("> $cmd\n") if $verbose;
264 open(ESPEAK
, "| $cmd");
265 print ESPEAK
$string . "\n";
269 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
272 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
273 print("> $cmd\n") if $verbose;
279 # trim leading / trailing silence from the clip
282 my ($file, $threshold, $tts_object) = @_;
283 printf("Trim \"%s\"\n", $file) if $verbose;
284 my $cmd = "wavtrim \"$file\" $threshold";
285 if ($$tts_object{"name"} eq "sapi") {
286 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
289 print("> $cmd\n") if $verbose;
294 # Encode a wav file into the given destination file
297 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
298 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
299 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
300 if ($$tts_object{"name"} eq "sapi") {
301 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
304 print("> $cmd\n") if $verbose;
309 # synchronize the clip generation / processing if it's running in another process
311 my ($tts_object) = @_;
312 if ($$tts_object{"name"} eq "sapi") {
313 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
314 my $wait = readline($$tts_object{"stdout"});
315 #ignore what's actually returned
319 # Run genlang and create voice clips for each string
322 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
323 my $english = dirname
($0) . '/../apps/lang/english.lang';
324 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
327 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
329 open(VOICEFONTIDS
, "> voicefontids");
331 local $| = 1; # make progress indicator work reliably
333 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
334 print("Generating voice clips");
335 print("\n") if $verbose;
338 print(VOICEFONTIDS
$line);
339 if ($line =~ /^id: (.*)$/) {
342 elsif ($line =~ /^voice: "(.*)"$/) {
344 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
345 my $wav = $id . '.wav';
346 my $mp3 = $id . '.mp3';
348 # Print some progress information
349 if (++$i % 10 == 0 and !$verbose) {
353 # Apply corrections to the string
354 $voice = correct_string
($voice, $language, $tts_object);
356 # If we have a pool of snippets, see if the string exists there first
357 if (defined($ENV{'POOL'})) {
358 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
359 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
362 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
363 copy
($pool_file, $mp3);
367 # Don't generate MP3 if it already exists (probably from the POOL)
369 if ($id eq "VOICE_PAUSE") {
370 print("Use distributed $wav\n") if $verbose;
371 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
374 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
375 wavtrim
($wav, 500, $tts_object);
376 # 500 seems to be a reasonable default for now
379 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
380 synchronize
($tts_object);
381 if (defined($ENV{'POOL'})) {
382 copy
($mp3, $pool_file);
393 shutdown_tts
($tts_object);
396 # Assemble the voicefile
399 my ($language, $target_id) = @_;
401 $outfile = sprintf("%s.voice", $language);
402 printf("Saving voice file to %s\n", $outfile) if $verbose;
403 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
404 print("> $cmd\n") if $verbose;
406 print($output) if $verbose;
410 for (glob('*.mp3')) {
413 for (glob('*.wav')) {
423 # Generate .talk clips
426 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
427 my $d = new DirHandle
$dir;
428 while (my $file = $d->read) {
429 my ($voice, $wav, $mp3);
430 # Print some progress information
431 if (++$i % 10 == 0 and !$verbose) {
435 # Convert to a complete path
436 my $path = sprintf("%s/%s", $dir, $file);
439 $wav = sprintf("%s.talk.wav", $path);
441 # Ignore dot-dirs and talk files
442 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
447 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
448 $mp3 = sprintf("%s/_dirname.talk", $path);
452 $mp3 = sprintf("%s.talk", $path);
453 $voice =~ s/\.[^\.]*$//; # Trim extension
456 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
458 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
459 wavtrim
($wav, 500, $tts_object);
460 # 500 seems to be a reasonable default for now
461 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
462 synchronize
($tts_object);
470 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
472 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
473 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
474 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
476 elsif (defined($C)) {
477 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
479 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
480 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
481 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
482 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
483 if ($printusage == 1) { printusage
(); exit 1; }
485 if (defined($v) or defined($ENV{'V'})) {
489 # add the tools dir to the path temporarily, for calling various tools
490 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
495 # Only do the panic cleanup for voicefiles
496 $SIG{INT
} = \
&panic_cleanup
;
497 $SIG{KILL
} = \
&panic_cleanup
;
499 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
500 $t, $l, $e, $E, $s, $S);
501 generateclips
($l, $t, $e, $E, $s, $S);
506 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
507 my $tts_object = init_tts
($s, $S, $l);
508 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
509 shutdown_tts
($tts_object);