3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
28 use open ':encoding(utf8)';
34 Usage: voice.pl [options] [path to dir]
36 Create voice file. You must also specify -t and -l.
42 Specify which target you want to build voicefile for. Must include
43 any features that target supports.
46 Numeric target id. Needed for voice building.
49 Specify which language you want to build. Without .lang extension.
52 Which encoder to use for voice strings
55 Which encoder options to use when compressing voice strings. Enclose
56 in double quotes if the options include spaces.
59 Which TTS engine to use.
61 -S=<TTS engine options>
62 Options to pass to the TTS engine. Enclose in double quotes if the
63 options include spaces.
71 # Initialize TTS engine. May return an object or value which will be passed
72 # to voicestring and shutdown_tts
75 my ($tts_engine, $tts_engine_opts, $language) = @_;
76 my %ret = ("name" => $tts_engine);
79 print("> festival $tts_engine_opts --server\n") if $verbose;
80 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
81 my $dummy = *FESTIVAL_SERVER
; #suppress warning
82 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
83 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
87 my $toolsdir = dirname
($0);
88 my $path = `cygpath $toolsdir -a -w`;
91 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
93 print("> cscript //nologo $cmd\n") if $verbose;
94 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
95 binmode(*CMD_IN
, ':encoding(utf16le)');
96 binmode(*CMD_OUT
, ':encoding(utf16le)');
97 $SIG{INT
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
98 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\r\n"); panic_cleanup
(); };
99 print(CMD_IN
"QUERY\tVENDOR\r\n");
100 my $vendor = readline(*CMD_OUT
);
104 "stdout" => *CMD_OUT
,
105 "vendor" => $vendor);
111 # Shutdown TTS engine if necessary.
113 my ($tts_object) = @_;
114 switch
($$tts_object{"name"}) {
116 # Send SIGTERM to festival server
117 kill TERM
=> $$tts_object{"pid"};
120 print({$$tts_object{"stdin"}} "QUIT\r\n");
121 close($$tts_object{"stdin"});
126 # Apply corrections to a voice-string to make it sound better
129 my ($string, $language, $tts_object) = @_;
132 # General for all engines and languages
133 $string =~ s/USB/U S B/g;
134 $string =~ s/ID3/I D 3/g;
137 switch
($$tts_object{"name"}) {
138 case
["sapi","festival"] {
139 $string =~ s/plugin(s?)/plug-in$1/ig; next
142 $string =~ s/\ba\b/ay/ig;
147 # for all german engines (e.g. for english words)
148 $string =~ s/alkaline/alkalein/ig;
149 $string =~ s/byte(s?)/beit$1/ig;
150 $string =~ s/clip(s?)/klipp$1/ig;
151 $string =~ s/cuesheet/kjuschiet/ig;
152 $string =~ s/dither/didder/ig;
153 $string =~ s/equalizer/iquileiser/ig;
154 $string =~ s/\bflash\b/fläsh/ig;
155 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
156 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
157 $string =~ s/\bloudness\b/laudness/ig;
158 $string =~ s/\bunicode\b/unikod/ig;
159 switch
($$tts_object{"name"}) {
160 case
"sapi" { # just for SAPI
161 switch
($$tts_object{"vendor"}) {
163 $string =~ s/alphabet/alfabet/ig;
164 $string =~ s/ampere/amper/ig;
165 $string =~ s/\bdezibel\b/de-zibell/ig;
166 $string =~ s/diddering/didde-ring/ig;
167 $string =~ s/energie\b/ener-gie/ig;
168 $string =~ s/\Blauf\b/-lauf/ig;
169 $string =~ s/\bnumerisch\b/numehrisch/ig;
176 # for all swedish engines (e.g. for english words)
177 $string =~ s/kilobyte/kilobajt/ig;
178 $string =~ s/megabyte/megabajt/ig;
179 $string =~ s/gigabyte/gigabajt/ig;
180 $string =~ s/\bloudness\b/laudness/ig;
182 switch
($$tts_object{"name"}) {
183 case
"espeak" { # just for eSpeak
184 $string =~ s/ampere/ampär/ig;
185 $string =~ s/bokmärken/bok-märken/ig;
186 $string =~ s/generella/schenerella/ig;
187 $string =~ s/dithering/diddering/ig;
188 $string =~ s/\bunicode\b/jynikod/ig;
189 $string =~ s/uttoning/utoning/ig;
190 $string =~ s/procent/pro-cent/ig;
191 $string =~ s/spellistor/spelistor/ig;
192 $string =~ s/cuesheet/qjyschiit/ig;
197 # for all italian engines (e.g. for english words)
198 $string =~ s/Replaygain/Ripleyghein/ig;
199 $string =~ s/Crossfade/Crossfeid/ig;
200 $string =~ s/beep/Bip/ig;
201 $string =~ s/cuesheet/chiushit/ig;
202 $string =~ s/fade/feid/ig;
203 $string =~ s/Crossfeed/crossfid/ig;
204 $string =~ s/Cache/chash/ig;
205 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
206 $string =~ s/\bFile(s?)\b/fail$1/ig;
207 $string =~ s/\bloudness\b/laudness/ig;
208 $string =~ s/\bunicode\b/unikod/ig;
209 $string =~ s/Playlist/pleylist/ig;
210 $string =~ s/WavPack/wave pak/ig;
211 $string =~ s/BITRATE/bit reit/ig;
212 $string =~ s/Codepage/cod page/ig;
213 $string =~ s/PCM Wave/pcm Ue'iv/ig;
214 switch
($$tts_object{"name"}) {
215 case
"sapi" { # just for SAPI
216 switch
($$tts_object{"vendor"}) {
218 $string =~ s/Inizializza/inizializa/ig;
220 case
"ScanSoft, Inc" {
223 $string =~ s/stop/stohp/ig;
230 if ($orig ne $string) {
231 printf("%s -> %s\n", $orig, $string) if $verbose;
236 # Produce a wav file of the text given
239 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
241 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
242 switch
($$tts_object{"name"}) {
244 # festival_client lies to us, so we have to do awful soul-eating
245 # work with IPC::open3()
246 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
247 print("> $cmd\n") if $verbose;
248 # Open command, and filehandles for STDIN, STDOUT, STDERR
249 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
250 # Put the string to speak into STDIN and close it
251 print(CMD_IN
$string);
253 # Read all output from festival_client (because it LIES TO US)
260 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
261 print("> $cmd\n") if $verbose;
265 $cmd = "espeak $tts_engine_opts -w \"$output\"";
266 print("> $cmd\n") if $verbose;
267 open(ESPEAK
, "| $cmd");
268 print ESPEAK
$string . "\n";
272 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
275 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
276 print("> $cmd\n") if $verbose;
282 # trim leading / trailing silence from the clip
285 my ($file, $threshold, $tts_object) = @_;
286 printf("Trim \"%s\"\n", $file) if $verbose;
287 my $cmd = "wavtrim \"$file\" $threshold";
288 if ($$tts_object{"name"} eq "sapi") {
289 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
292 print("> $cmd\n") if $verbose;
297 # Encode a wav file into the given destination file
300 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
301 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
302 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
303 if ($$tts_object{"name"} eq "sapi") {
304 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
307 print("> $cmd\n") if $verbose;
312 # synchronize the clip generation / processing if it's running in another process
314 my ($tts_object) = @_;
315 if ($$tts_object{"name"} eq "sapi") {
316 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
317 my $wait = readline($$tts_object{"stdout"});
318 #ignore what's actually returned
322 # Run genlang and create voice clips for each string
325 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
326 my $english = dirname
($0) . '/../apps/lang/english.lang';
327 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
330 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
332 open(VOICEFONTIDS
, "> voicefontids");
334 local $| = 1; # make progress indicator work reliably
336 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
337 print("Generating voice clips");
338 print("\n") if $verbose;
341 print(VOICEFONTIDS
$line);
342 if ($line =~ /^id: (.*)$/) {
345 elsif ($line =~ /^voice: "(.*)"$/) {
347 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
348 my $wav = $id . '.wav';
349 my $mp3 = $id . '.mp3';
351 # Print some progress information
352 if (++$i % 10 == 0 and !$verbose) {
356 # Apply corrections to the string
357 $voice = correct_string
($voice, $language, $tts_object);
359 # If we have a pool of snippets, see if the string exists there first
360 if (defined($ENV{'POOL'})) {
361 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
362 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
365 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
366 copy
($pool_file, $mp3);
370 # Don't generate MP3 if it already exists (probably from the POOL)
372 if ($id eq "VOICE_PAUSE") {
373 print("Use distributed $wav\n") if $verbose;
374 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
377 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
378 wavtrim
($wav, 500, $tts_object);
379 # 500 seems to be a reasonable default for now
382 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
383 synchronize
($tts_object);
384 if (defined($ENV{'POOL'})) {
385 copy
($mp3, $pool_file);
396 shutdown_tts
($tts_object);
399 # Assemble the voicefile
402 my ($language, $target_id) = @_;
404 $outfile = sprintf("%s.voice", $language);
405 printf("Saving voice file to %s\n", $outfile) if $verbose;
406 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
407 print("> $cmd\n") if $verbose;
409 print($output) if $verbose;
413 for (glob('*.mp3')) {
416 for (glob('*.wav')) {
426 # Generate .talk clips
429 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
430 my $d = new DirHandle
$dir;
431 while (my $file = $d->read) {
432 my ($voice, $wav, $mp3);
433 # Print some progress information
434 if (++$i % 10 == 0 and !$verbose) {
438 # Convert to a complete path
439 my $path = sprintf("%s/%s", $dir, $file);
442 $wav = sprintf("%s.talk.wav", $path);
444 # Ignore dot-dirs and talk files
445 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
450 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
451 $mp3 = sprintf("%s/_dirname.talk", $path);
455 $mp3 = sprintf("%s.talk", $path);
456 $voice =~ s/\.[^\.]*$//; # Trim extension
459 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
461 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
462 wavtrim
($wav, 500, $tts_object);
463 # 500 seems to be a reasonable default for now
464 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
465 synchronize
($tts_object);
473 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
475 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
476 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
477 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
479 elsif (defined($C)) {
480 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
482 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
483 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
484 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
485 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
486 if ($printusage == 1) { printusage
(); exit 1; }
488 if (defined($v) or defined($ENV{'V'})) {
492 # add the tools dir to the path temporarily, for calling various tools
493 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
498 # Only do the panic cleanup for voicefiles
499 $SIG{INT
} = \
&panic_cleanup
;
500 $SIG{KILL
} = \
&panic_cleanup
;
502 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
503 $t, $l, $e, $E, $s, $S);
504 generateclips
($l, $t, $e, $E, $s, $S);
509 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
510 my $tts_object = init_tts
($s, $S, $l);
511 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
512 shutdown_tts
($tts_object);