3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
28 use open ':encoding(utf8)';
34 Usage: voice.pl [options] [path to dir]
36 Create voice file. You must also specify -t and -l.
42 Specify which target you want to build voicefile for. Must include
43 any features that target supports.
46 Numeric target id. Needed for voice building.
49 Specify which language you want to build. Without .lang extension.
52 Which encoder to use for voice strings
55 Which encoder options to use when compressing voice strings. Enclose
56 in double quotes if the options include spaces.
59 Which TTS engine to use.
61 -S=<TTS engine options>
62 Options to pass to the TTS engine. Enclose in double quotes if the
63 options include spaces.
71 # Initialize TTS engine. May return an object or value which will be passed
72 # to voicestring and shutdown_tts
75 my ($tts_engine, $tts_engine_opts, $language) = @_;
76 my %ret = ("name" => $tts_engine);
79 print("> festival $tts_engine_opts --server\n") if $verbose;
80 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
81 my $dummy = *FESTIVAL_SERVER
; #suppress warning
82 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
83 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
87 my $toolsdir = dirname
($0);
88 my $path = `cygpath $toolsdir -a -w`;
91 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
93 print("> cscript //nologo $cmd\n") if $verbose;
94 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
95 binmode(*CMD_IN
, ':encoding(utf16le)');
96 binmode(*CMD_OUT
, ':encoding(utf16le)');
97 $SIG{INT
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
98 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
99 print(CMD_IN
"QUERY\tVENDOR\r\n");
100 my $vendor = readline(*CMD_OUT
);
104 "stdout" => *CMD_OUT
,
105 "vendor" => $vendor);
111 # Shutdown TTS engine if necessary.
113 my ($tts_object) = @_;
114 switch
($$tts_object{"name"}) {
116 # Send SIGTERM to festival server
117 kill TERM
=> $$tts_object{"pid"};
120 print({$$tts_object{"stdin"}} "QUIT\r\n");
121 close($$tts_object{"stdin"});
126 # Apply corrections to a voice-string to make it sound better
129 my ($string, $language, $tts_object) = @_;
132 # General for all engines and languages
133 $string =~ s/USB/U S B/g;
134 $string =~ s/ID3/I D 3/g;
137 switch
($$tts_object{"name"}) {
138 case
["sapi","festival"] {
139 $string =~ s/plugin(s?)/plug-in$1/ig; next
142 $string =~ s/\ba\b/ay/ig;
147 # for all german engines (e.g. for english words)
148 $string =~ s/alkaline/alkalein/ig;
149 $string =~ s/byte(s?)/beit$1/ig;
150 $string =~ s/clip(s?)/klipp$1/ig;
151 $string =~ s/\bcover/kawwer/ig;
152 $string =~ s/cuesheet/kjuschiet/ig;
153 $string =~ s/dither/didder/ig;
154 $string =~ s/equalizer/iquileiser/ig;
155 $string =~ s/\bflash\b/fläsh/ig;
156 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
157 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
158 $string =~ s/\bloudness\b/laudness/ig;
159 $string =~ s/\bunicode\b/unikod/ig;
160 switch
($$tts_object{"name"}) {
161 case
"sapi" { # just for SAPI
162 switch
($$tts_object{"vendor"}) {
164 $string =~ s/alphabet/alfabet/ig;
165 $string =~ s/ampere/amper/ig;
166 $string =~ s/\bdezibel\b/de-zibell/ig;
167 $string =~ s/diddering/didde-ring/ig;
168 $string =~ s/energie\b/ener-gie/ig;
169 $string =~ s/\Blauf\b/-lauf/ig;
170 $string =~ s/\bnumerisch\b/numehrisch/ig;
177 # for all swedish engines (e.g. for english words)
178 $string =~ s/kilobyte/kilobajt/ig;
179 $string =~ s/megabyte/megabajt/ig;
180 $string =~ s/gigabyte/gigabajt/ig;
181 $string =~ s/\bloudness\b/laudness/ig;
183 switch
($$tts_object{"name"}) {
184 case
"espeak" { # just for eSpeak
185 $string =~ s/ampere/ampär/ig;
186 $string =~ s/bokmärken/bok-märken/ig;
187 $string =~ s/generella/schenerella/ig;
188 $string =~ s/dithering/diddering/ig;
189 $string =~ s/\bunicode\b/jynikod/ig;
190 $string =~ s/uttoning/utoning/ig;
191 $string =~ s/procent/pro-cent/ig;
192 $string =~ s/spellistor/spelistor/ig;
193 $string =~ s/cuesheet/qjyschiit/ig;
198 # for all italian engines (e.g. for english words)
199 $string =~ s/Replaygain/Ripleyghein/ig;
200 $string =~ s/Crossfade/Crossfeid/ig;
201 $string =~ s/beep/Bip/ig;
202 $string =~ s/cuesheet/chiushit/ig;
203 $string =~ s/fade/feid/ig;
204 $string =~ s/Crossfeed/crossfid/ig;
205 $string =~ s/Cache/chash/ig;
206 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
207 $string =~ s/\bFile(s?)\b/fail$1/ig;
208 $string =~ s/\bloudness\b/laudness/ig;
209 $string =~ s/\bunicode\b/unikod/ig;
210 $string =~ s/Playlist/pleylist/ig;
211 $string =~ s/WavPack/wave pak/ig;
212 $string =~ s/BITRATE/bit reit/ig;
213 $string =~ s/Codepage/cod page/ig;
214 $string =~ s/PCM Wave/pcm Ue'iv/ig;
215 switch
($$tts_object{"name"}) {
216 case
"sapi" { # just for SAPI
217 switch
($$tts_object{"vendor"}) {
219 $string =~ s/Inizializza/inizializa/ig;
221 case
"ScanSoft, Inc" {
224 $string =~ s/stop/stohp/ig;
231 if ($orig ne $string) {
232 printf("%s -> %s\n", $orig, $string) if $verbose;
237 # Produce a wav file of the text given
240 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
242 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
243 switch
($$tts_object{"name"}) {
245 # festival_client lies to us, so we have to do awful soul-eating
246 # work with IPC::open3()
247 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
248 print("> $cmd\n") if $verbose;
249 # Open command, and filehandles for STDIN, STDOUT, STDERR
250 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
251 # Put the string to speak into STDIN and close it
252 print(CMD_IN
$string);
254 # Read all output from festival_client (because it LIES TO US)
261 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
262 print("> $cmd\n") if $verbose;
266 $cmd = "espeak $tts_engine_opts -w \"$output\"";
267 print("> $cmd\n") if $verbose;
268 open(ESPEAK
, "| $cmd");
269 print ESPEAK
$string . "\n";
273 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
276 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
277 print("> $cmd\n") if $verbose;
283 # trim leading / trailing silence from the clip
286 my ($file, $threshold, $tts_object) = @_;
287 printf("Trim \"%s\"\n", $file) if $verbose;
288 my $cmd = "wavtrim \"$file\" $threshold";
289 if ($$tts_object{"name"} eq "sapi") {
290 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
293 print("> $cmd\n") if $verbose;
298 # Encode a wav file into the given destination file
301 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
302 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
303 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
304 if ($$tts_object{"name"} eq "sapi") {
305 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
308 print("> $cmd\n") if $verbose;
313 # synchronize the clip generation / processing if it's running in another process
315 my ($tts_object) = @_;
316 if ($$tts_object{"name"} eq "sapi") {
317 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
318 my $wait = readline($$tts_object{"stdout"});
319 #ignore what's actually returned
323 # Run genlang and create voice clips for each string
326 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
327 my $english = dirname
($0) . '/../apps/lang/english.lang';
328 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
331 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
333 open(VOICEFONTIDS
, "> voicefontids");
335 local $| = 1; # make progress indicator work reliably
337 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
338 print("Generating voice clips");
339 print("\n") if $verbose;
342 print(VOICEFONTIDS
$line);
343 if ($line =~ /^id: (.*)$/) {
346 elsif ($line =~ /^voice: "(.*)"$/) {
348 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
349 my $wav = $id . '.wav';
350 my $mp3 = $id . '.mp3';
352 # Print some progress information
353 if (++$i % 10 == 0 and !$verbose) {
357 # Apply corrections to the string
358 $voice = correct_string
($voice, $language, $tts_object);
360 # If we have a pool of snippets, see if the string exists there first
361 if (defined($ENV{'POOL'})) {
362 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
363 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
366 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
367 copy
($pool_file, $mp3);
371 # Don't generate MP3 if it already exists (probably from the POOL)
373 if ($id eq "VOICE_PAUSE") {
374 print("Use distributed $wav\n") if $verbose;
375 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
378 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
379 wavtrim
($wav, 500, $tts_object);
380 # 500 seems to be a reasonable default for now
383 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
384 synchronize
($tts_object);
385 if (defined($ENV{'POOL'})) {
386 copy
($mp3, $pool_file);
397 shutdown_tts
($tts_object);
400 # Assemble the voicefile
403 my ($language, $target_id) = @_;
405 $outfile = sprintf("%s.voice", $language);
406 printf("Saving voice file to %s\n", $outfile) if $verbose;
407 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
408 print("> $cmd\n") if $verbose;
410 print($output) if $verbose;
414 for (glob('*.mp3')) {
417 for (glob('*.wav')) {
427 # Generate .talk clips
430 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
431 my $d = new DirHandle
$dir;
432 while (my $file = $d->read) {
433 my ($voice, $wav, $mp3);
434 # Print some progress information
435 if (++$i % 10 == 0 and !$verbose) {
439 # Convert to a complete path
440 my $path = sprintf("%s/%s", $dir, $file);
443 $wav = sprintf("%s.talk.wav", $path);
445 # Ignore dot-dirs and talk files
446 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
451 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
452 $mp3 = sprintf("%s/_dirname.talk", $path);
456 $mp3 = sprintf("%s.talk", $path);
457 $voice =~ s/\.[^\.]*$//; # Trim extension
460 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
462 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
463 wavtrim
($wav, 500, $tts_object);
464 # 500 seems to be a reasonable default for now
465 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
466 synchronize
($tts_object);
474 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
476 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
477 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
478 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
480 elsif (defined($C)) {
481 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
483 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
484 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
485 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
486 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
487 if ($printusage == 1) { printusage
(); exit 1; }
489 if (defined($v) or defined($ENV{'V'})) {
493 # add the tools dir to the path temporarily, for calling various tools
494 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
499 # Only do the panic cleanup for voicefiles
500 $SIG{INT
} = \
&panic_cleanup
;
501 $SIG{KILL
} = \
&panic_cleanup
;
503 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
504 $t, $l, $e, $E, $s, $S);
505 generateclips
($l, $t, $e, $E, $s, $S);
510 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
511 my $tts_object = init_tts
($s, $S, $l);
512 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
513 shutdown_tts
($tts_object);