3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
26 use Digest::MD5 qw(md5_hex);
28 use open IN
=> ':utf8';
33 Usage: voice.pl [options] [path to dir]
35 Create voice file. You must also specify -t and -l.
41 Specify which target you want to build voicefile for. Must include
42 any features that target supports.
45 Numeric target id. Needed for voice building.
48 Specify which language you want to build. Without .lang extension.
51 Which encoder to use for voice strings
54 Which encoder options to use when compressing voice strings. Enclose
55 in double quotes if the options include spaces.
58 Which TTS engine to use.
60 -S=<TTS engine options>
61 Options to pass to the TTS engine. Enclose in double quotes if the
62 options include spaces.
70 # Initialize TTS engine. May return an object or value which will be passed
71 # to voicestring and shutdown_tts
74 my ($tts_engine, $tts_engine_opts, $language) = @_;
75 my %ret = ("name" => $tts_engine);
78 print("> festival $tts_engine_opts --server\n") if $verbose;
79 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
80 my $dummy = *FESTIVAL_SERVER
; #suppress warning
81 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
82 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
86 my $toolsdir = dirname
($0);
87 my $path = `cygpath $toolsdir -a -w`;
90 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
92 print("> cscript //nologo $cmd\n") if $verbose;
93 my $pid = open2
(*CMD_OUT
, *CMD_IN
, "cscript //nologo $cmd");
94 binmode(*CMD_IN
, ':encoding(utf16le):crlf');
95 binmode(*CMD_OUT
, ':encoding(utf16le):crlf');
96 $SIG{INT
} = sub { print(CMD_IN
"QUIT\n"); panic_cleanup
(); };
97 $SIG{KILL
} = sub { print(CMD_IN
"QUIT\n"); panic_cleanup
(); };
98 print(CMD_IN
"QUERY\tVENDOR\n");
99 my $vendor = readline(*CMD_OUT
);
103 "stdout" => *CMD_OUT
,
104 "vendor" => $vendor);
110 # Shutdown TTS engine if necessary.
112 my ($tts_object) = @_;
113 switch
($$tts_object{"name"}) {
115 # Send SIGTERM to festival server
116 kill TERM
=> $$tts_object{"pid"};
119 print({$$tts_object{"stdin"}} "QUIT\n");
120 close($$tts_object{"stdin"});
125 # Apply corrections to a voice-string to make it sound better
128 my ($string, $language, $tts_object) = @_;
131 # General for all engines and languages
132 $string =~ s/USB/U S B/g;
133 $string =~ s/ID3/I D 3/g;
136 switch
($$tts_object{"name"}) {
137 case
["sapi","festival"] {
138 $string =~ s/plugin(s?)/plug-in$1/ig; next
141 $string =~ s/\ba\b/ay/ig;
146 # for all german engines (e.g. for english words)
147 $string =~ s/alkaline/alkalein/ig;
148 $string =~ s/byte(s?)/beit$1/ig;
149 $string =~ s/clip(s?)/klipp$1/ig;
150 $string =~ s/cuesheet/kjuschiet/ig;
151 $string =~ s/dither/didder/ig;
152 $string =~ s/equalizer/iquileiser/ig;
153 $string =~ s/\bflash\b/fläsh/ig;
154 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
155 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
156 $string =~ s/\bloudness\b/laudness/ig;
157 $string =~ s/\bunicode\b/unikod/ig;
158 switch
($$tts_object{"name"}) {
159 case
"sapi" { # just for SAPI
160 switch
($$tts_object{"vendor"}) {
162 $string =~ s/alphabet/alfabet/ig;
163 $string =~ s/ampere/amper/ig;
164 $string =~ s/\bdezibel\b/de-zibell/ig;
165 $string =~ s/diddering/didde-ring/ig;
166 $string =~ s/energie\b/ener-gie/ig;
167 $string =~ s/\Blauf\b/-lauf/ig;
168 $string =~ s/\bnumerisch\b/numehrisch/ig;
175 # for all swedish engines (e.g. for english words)
176 $string =~ s/kilobyte/kilobajt/ig;
177 $string =~ s/megabyte/megabajt/ig;
178 $string =~ s/gigabyte/gigabajt/ig;
179 $string =~ s/\bloudness\b/laudness/ig;
181 switch
($$tts_object{"name"}) {
182 case
"espeak" { # just for eSpeak
183 $string =~ s/ampere/ampär/ig;
184 $string =~ s/bokmärken/bok-märken/ig;
185 $string =~ s/generella/schenerella/ig;
186 $string =~ s/dithering/diddering/ig;
187 $string =~ s/\bunicode\b/jynikod/ig;
188 $string =~ s/uttoning/utoning/ig;
189 $string =~ s/procent/pro-cent/ig;
190 $string =~ s/spellistor/spelistor/ig;
191 $string =~ s/cuesheet/qjyschiit/ig;
196 # for all italian engines (e.g. for english words)
197 $string =~ s/Replaygain/Ripleyghein/ig;
198 $string =~ s/Crossfade/Crossfeid/ig;
199 $string =~ s/beep/Bip/ig;
200 $string =~ s/cuesheet/chiushit/ig;
201 $string =~ s/fade/feid/ig;
202 $string =~ s/Crossfeed/crossfid/ig;
203 $string =~ s/Cache/chash/ig;
204 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
205 $string =~ s/\bFile(s?)\b/fail$1/ig;
206 $string =~ s/\bloudness\b/laudness/ig;
207 $string =~ s/\bunicode\b/unikod/ig;
208 $string =~ s/Playlist/pleylist/ig;
209 $string =~ s/WavPack/wave pak/ig;
210 $string =~ s/BITRATE/bit reit/ig;
211 $string =~ s/Codepage/cod page/ig;
212 $string =~ s/PCM Wave/pcm Ue'iv/ig;
213 $string =~ s/è/è/ig;
214 $string =~ s/\b(s*)ì\b/$1ì/ig;
215 $string =~ s/\b(s*)ù\b/$1ù/ig;
216 $string =~ s/\b(s*)Ã \b/$1à/ig;
217 switch
($$tts_object{"name"}) {
218 case
"sapi" { # just for SAPI
219 switch
($$tts_object{"vendor"}) {
221 $string =~ s/Inizializza/inizializa/ig;
223 case
"ScanSoft, Inc" {
226 $string =~ s/stop/stohp/ig;
233 if ($orig ne $string) {
234 printf("%s -> %s\n", $orig, $string) if $verbose;
239 # Produce a wav file of the text given
242 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
244 binmode(STDOUT
, ':encoding(UTF-8)');
245 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
246 switch
($$tts_object{"name"}) {
248 # festival_client lies to us, so we have to do awful soul-eating
249 # work with IPC::open3()
250 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
251 print("> $cmd\n") if $verbose;
252 # Open command, and filehandles for STDIN, STDOUT, STDERR
253 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
254 # Put the string to speak into STDIN and close it
255 binmode(CMD_IN
, ':encoding(utf8)');
256 print(CMD_IN
$string);
258 # Read all output from festival_client (because it LIES TO US)
265 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
266 print("> $cmd\n") if $verbose;
270 $cmd = "espeak $tts_engine_opts -w \"$output\"";
271 print("> $cmd\n") if $verbose;
272 open(ESPEAK
, "|-:encoding(utf8)", $cmd);
273 print ESPEAK
$string . "\n";
277 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\n");
280 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
281 print("> $cmd\n") if $verbose;
287 # trim leading / trailing silence from the clip
290 my ($file, $threshold, $tts_object) = @_;
291 printf("Trim \"%s\"\n", $file) if $verbose;
292 my $cmd = "wavtrim \"$file\" $threshold";
293 if ($$tts_object{"name"} eq "sapi") {
294 print({$$tts_object{"stdin"}} "EXEC\t$cmd\n");
297 print("> $cmd\n") if $verbose;
302 # Encode a wav file into the given destination file
305 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
306 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
307 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
308 if ($$tts_object{"name"} eq "sapi") {
309 print({$$tts_object{"stdin"}} "EXEC\t$cmd\n");
312 print("> $cmd\n") if $verbose;
317 # synchronize the clip generation / processing if it's running in another process
319 my ($tts_object) = @_;
320 if ($$tts_object{"name"} eq "sapi") {
321 print({$$tts_object{"stdin"}} "SYNC\t42\n");
322 my $wait = readline($$tts_object{"stdout"});
323 #ignore what's actually returned
327 # Run genlang and create voice clips for each string
330 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
331 my $english = dirname
($0) . '/../apps/lang/english.lang';
332 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
335 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
337 open(VOICEFONTIDS
, ">:utf8", "voicefontids");
339 local $| = 1; # make progress indicator work reliably
341 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
342 print("Generating voice clips");
343 print("\n") if $verbose;
346 print(VOICEFONTIDS
$line);
347 if ($line =~ /^id: (.*)$/) {
350 elsif ($line =~ /^voice: "(.*)"$/) {
352 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
353 my $wav = $id . '.wav';
354 my $mp3 = $id . '.mp3';
356 # Print some progress information
357 if (++$i % 10 == 0 and !$verbose) {
361 # Apply corrections to the string
362 $voice = correct_string
($voice, $language, $tts_object);
364 # If we have a pool of snippets, see if the string exists there first
365 if (defined($ENV{'POOL'})) {
366 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
367 md5_hex
("$voice $tts_engine $tts_engine_opts $encoder_opts"),
370 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
371 copy
($pool_file, $mp3);
375 # Don't generate MP3 if it already exists (probably from the POOL)
377 if ($id eq "VOICE_PAUSE") {
378 print("Use distributed $wav\n") if $verbose;
379 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
382 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
383 wavtrim
($wav, 500, $tts_object);
384 # 500 seems to be a reasonable default for now
387 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
388 synchronize
($tts_object);
389 if (defined($ENV{'POOL'})) {
390 copy
($mp3, $pool_file);
401 shutdown_tts
($tts_object);
404 # Assemble the voicefile
407 my ($language, $target_id) = @_;
409 $outfile = sprintf("%s.voice", $language);
410 printf("Saving voice file to %s\n", $outfile) if $verbose;
411 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
412 print("> $cmd\n") if $verbose;
414 print($output) if $verbose;
418 for (glob('*.mp3')) {
421 for (glob('*.wav')) {
431 # Generate .talk clips
434 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
435 my $d = new DirHandle
$dir;
436 while (my $file = $d->read) {
437 my ($voice, $wav, $mp3);
438 # Print some progress information
439 if (++$i % 10 == 0 and !$verbose) {
443 # Convert to a complete path
444 my $path = sprintf("%s/%s", $dir, $file);
447 $wav = sprintf("%s.talk.wav", $path);
449 # Ignore dot-dirs and talk files
450 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
455 gentalkclips
($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
456 $mp3 = sprintf("%s/_dirname.talk", $path);
460 $mp3 = sprintf("%s.talk", $path);
461 $voice =~ s/\.[^\.]*$//; # Trim extension
464 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
466 voicestring
($voice, $wav, $tts_engine_opts, $tts_object);
467 wavtrim
($wav, 500, $tts_object);
468 # 500 seems to be a reasonable default for now
469 encodewav
($wav, $mp3, $encoder, $encoder_opts, $tts_object);
470 synchronize
($tts_object);
478 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
480 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
481 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
482 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
484 elsif (defined($C)) {
485 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
487 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
488 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
489 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
490 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
491 if ($printusage == 1) { printusage
(); exit 1; }
493 if (defined($v) or defined($ENV{'V'})) {
497 # add the tools dir to the path temporarily, for calling various tools
498 $ENV{'PATH'} = dirname
($0) . ':' . $ENV{'PATH'};
503 # Only do the panic cleanup for voicefiles
504 $SIG{INT
} = \
&panic_cleanup
;
505 $SIG{KILL
} = \
&panic_cleanup
;
507 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
508 $t, $l, $e, $E, $s, $S);
509 generateclips
($l, $t, $e, $E, $s, $S);
514 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
515 my $tts_object = init_tts
($s, $S, $l);
516 gentalkclips
($ARGV[0], $tts_object, $e, $E, $S, 0);
517 shutdown_tts
($tts_object);