Updated italian lang file.
[kugel-rb.git] / tools / voice.pl
blob1cf6076efb812c28d0ff4799a960009cdb49685e
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
18 use strict;
19 use warnings;
20 use File::Basename;
21 use File::Copy;
22 use Switch;
23 use vars qw($V $C $t $l $e $E $s $S $i $v);
24 use IPC::Open2;
25 use IPC::Open3;
26 use Digest::MD5 qw(md5_hex);
27 use DirHandle;
28 use open IN => ':utf8';
30 sub printusage {
31 print <<USAGE
33 Usage: voice.pl [options] [path to dir]
35 Create voice file. You must also specify -t and -l.
38 Create .talk clips.
40 -t=<target>
41 Specify which target you want to build voicefile for. Must include
42 any features that target supports.
44 -i=<target_id>
45 Numeric target id. Needed for voice building.
47 -l=<language>
48 Specify which language you want to build. Without .lang extension.
50 -e=<encoder>
51 Which encoder to use for voice strings
53 -E=<encoder options>
54 Which encoder options to use when compressing voice strings. Enclose
55 in double quotes if the options include spaces.
57 -s=<TTS engine>
58 Which TTS engine to use.
60 -S=<TTS engine options>
61 Options to pass to the TTS engine. Enclose in double quotes if the
62 options include spaces.
65 Be verbose
66 USAGE
70 # Initialize TTS engine. May return an object or value which will be passed
71 # to voicestring and shutdown_tts
72 sub init_tts {
73 our $verbose;
74 my ($tts_engine, $tts_engine_opts, $language) = @_;
75 my %ret = ("name" => $tts_engine);
76 switch($tts_engine) {
77 case "festival" {
78 print("> festival $tts_engine_opts --server\n") if $verbose;
79 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
80 my $dummy = *FESTIVAL_SERVER; #suppress warning
81 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
82 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
83 $ret{"pid"} = $pid;
85 case "sapi" {
86 my $toolsdir = dirname($0);
87 my $path = `cygpath $toolsdir -a -w`;
88 chomp($path);
89 $path = $path . '\\';
90 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
91 $cmd =~ s/\\/\\\\/g;
92 print("> cscript //nologo $cmd\n") if $verbose;
93 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
94 binmode(*CMD_IN, ':encoding(utf16le):crlf');
95 binmode(*CMD_OUT, ':encoding(utf16le):crlf');
96 $SIG{INT} = sub { print(CMD_IN "QUIT\n"); panic_cleanup(); };
97 $SIG{KILL} = sub { print(CMD_IN "QUIT\n"); panic_cleanup(); };
98 print(CMD_IN "QUERY\tVENDOR\n");
99 my $vendor = readline(*CMD_OUT);
100 chomp($vendor);
101 %ret = (%ret,
102 "stdin" => *CMD_IN,
103 "stdout" => *CMD_OUT,
104 "vendor" => $vendor);
107 return \%ret;
110 # Shutdown TTS engine if necessary.
111 sub shutdown_tts {
112 my ($tts_object) = @_;
113 switch($$tts_object{"name"}) {
114 case "festival" {
115 # Send SIGTERM to festival server
116 kill TERM => $$tts_object{"pid"};
118 case "sapi" {
119 print({$$tts_object{"stdin"}} "QUIT\n");
120 close($$tts_object{"stdin"});
125 # Apply corrections to a voice-string to make it sound better
126 sub correct_string {
127 our $verbose;
128 my ($string, $language, $tts_object) = @_;
129 my $orig = $string;
130 switch($language) {
131 # General for all engines and languages
132 $string =~ s/USB/U S B/g;
133 $string =~ s/ID3/I D 3/g;
135 case "english" {
136 switch($$tts_object{"name"}) {
137 case ["sapi","festival"] {
138 $string =~ s/plugin(s?)/plug-in$1/ig; next
140 case "festival" {
141 $string =~ s/\ba\b/ay/ig;
145 case "deutsch" {
146 # for all german engines (e.g. for english words)
147 $string =~ s/alkaline/alkalein/ig;
148 $string =~ s/byte(s?)/beit$1/ig;
149 $string =~ s/clip(s?)/klipp$1/ig;
150 $string =~ s/cuesheet/kjuschiet/ig;
151 $string =~ s/dither/didder/ig;
152 $string =~ s/equalizer/iquileiser/ig;
153 $string =~ s/\bflash\b/fläsh/ig;
154 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
155 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
156 $string =~ s/\bloudness\b/laudness/ig;
157 $string =~ s/\bunicode\b/unikod/ig;
158 switch($$tts_object{"name"}) {
159 case "sapi" { # just for SAPI
160 switch($$tts_object{"vendor"}) {
161 case "AT&T Labs" {
162 $string =~ s/alphabet/alfabet/ig;
163 $string =~ s/ampere/amper/ig;
164 $string =~ s/\bdezibel\b/de-zibell/ig;
165 $string =~ s/diddering/didde-ring/ig;
166 $string =~ s/energie\b/ener-gie/ig;
167 $string =~ s/\Blauf\b/-lauf/ig;
168 $string =~ s/\bnumerisch\b/numehrisch/ig;
174 case "svenska" {
175 # for all swedish engines (e.g. for english words)
176 $string =~ s/kilobyte/kilobajt/ig;
177 $string =~ s/megabyte/megabajt/ig;
178 $string =~ s/gigabyte/gigabajt/ig;
179 $string =~ s/\bloudness\b/laudness/ig;
181 switch($$tts_object{"name"}) {
182 case "espeak" { # just for eSpeak
183 $string =~ s/ampere/ampär/ig;
184 $string =~ s/bokmärken/bok-märken/ig;
185 $string =~ s/generella/schenerella/ig;
186 $string =~ s/dithering/diddering/ig;
187 $string =~ s/\bunicode\b/jynikod/ig;
188 $string =~ s/uttoning/utoning/ig;
189 $string =~ s/procent/pro-cent/ig;
190 $string =~ s/spellistor/spelistor/ig;
191 $string =~ s/cuesheet/qjyschiit/ig;
195 case "italiano" {
196 # for all italian engines (e.g. for english words)
197 $string =~ s/Replaygain/Ripleyghein/ig;
198 $string =~ s/Crossfade/Crossfeid/ig;
199 $string =~ s/beep/Bip/ig;
200 $string =~ s/cuesheet/chiushit/ig;
201 $string =~ s/fade/feid/ig;
202 $string =~ s/Crossfeed/crossfid/ig;
203 $string =~ s/Cache/chash/ig;
204 $string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
205 $string =~ s/\bFile(s?)\b/fail$1/ig;
206 $string =~ s/\bloudness\b/laudness/ig;
207 $string =~ s/\bunicode\b/unikod/ig;
208 $string =~ s/Playlist/pleylist/ig;
209 $string =~ s/WavPack/wave pak/ig;
210 $string =~ s/BITRATE/bit reit/ig;
211 $string =~ s/Codepage/cod page/ig;
212 $string =~ s/PCM Wave/pcm Ue'iv/ig;
213 $string =~ s/è/è/ig;
214 $string =~ s/\b(s*)ì\b/$1ì/ig;
215 $string =~ s/\b(s*)ù\b/$1ù/ig;
216 $string =~ s/\b(s*)à\b/$1à/ig;
217 switch($$tts_object{"name"}) {
218 case "sapi" { # just for SAPI
219 switch($$tts_object{"vendor"}) {
220 case "Loquendo" {
221 $string =~ s/Inizializza/inizializa/ig;
223 case "ScanSoft, Inc" {
224 $string =~ s/V/v/ig;
225 $string =~ s/X/x/ig;
226 $string =~ s/stop/stohp/ig;
233 if ($orig ne $string) {
234 printf("%s -> %s\n", $orig, $string) if $verbose;
236 return $string;
239 # Produce a wav file of the text given
240 sub voicestring {
241 our $verbose;
242 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
243 my $cmd;
244 binmode(STDOUT, ':encoding(UTF-8)');
245 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
246 switch($$tts_object{"name"}) {
247 case "festival" {
248 # festival_client lies to us, so we have to do awful soul-eating
249 # work with IPC::open3()
250 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
251 print("> $cmd\n") if $verbose;
252 # Open command, and filehandles for STDIN, STDOUT, STDERR
253 my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
254 # Put the string to speak into STDIN and close it
255 binmode(CMD_IN, ':encoding(utf8)');
256 print(CMD_IN $string);
257 close(CMD_IN);
258 # Read all output from festival_client (because it LIES TO US)
259 while (<CMD_ERR>) {
261 close(CMD_OUT);
262 close(CMD_ERR);
264 case "flite" {
265 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
266 print("> $cmd\n") if $verbose;
267 `$cmd`;
269 case "espeak" {
270 $cmd = "espeak $tts_engine_opts -w \"$output\"";
271 print("> $cmd\n") if $verbose;
272 open(ESPEAK, "|-:encoding(utf8)", $cmd);
273 print ESPEAK $string . "\n";
274 close(ESPEAK);
276 case "sapi" {
277 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\n");
279 case "swift" {
280 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
281 print("> $cmd\n") if $verbose;
282 system($cmd);
287 # trim leading / trailing silence from the clip
288 sub wavtrim {
289 our $verbose;
290 my ($file, $threshold, $tts_object) = @_;
291 printf("Trim \"%s\"\n", $file) if $verbose;
292 my $cmd = "wavtrim \"$file\" $threshold";
293 if ($$tts_object{"name"} eq "sapi") {
294 print({$$tts_object{"stdin"}} "EXEC\t$cmd\n");
296 else {
297 print("> $cmd\n") if $verbose;
298 `$cmd`;
302 # Encode a wav file into the given destination file
303 sub encodewav {
304 our $verbose;
305 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
306 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
307 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
308 if ($$tts_object{"name"} eq "sapi") {
309 print({$$tts_object{"stdin"}} "EXEC\t$cmd\n");
311 else {
312 print("> $cmd\n") if $verbose;
313 `$cmd`;
317 # synchronize the clip generation / processing if it's running in another process
318 sub synchronize {
319 my ($tts_object) = @_;
320 if ($$tts_object{"name"} eq "sapi") {
321 print({$$tts_object{"stdin"}} "SYNC\t42\n");
322 my $wait = readline($$tts_object{"stdout"});
323 #ignore what's actually returned
327 # Run genlang and create voice clips for each string
328 sub generateclips {
329 our $verbose;
330 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
331 my $english = dirname($0) . '/../apps/lang/english.lang';
332 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
333 my $id = '';
334 my $voice = '';
335 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
336 my $pool_file;
337 open(VOICEFONTIDS, ">:utf8", "voicefontids");
338 my $i = 0;
339 local $| = 1; # make progress indicator work reliably
341 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
342 print("Generating voice clips");
343 print("\n") if $verbose;
344 for (`$cmd`) {
345 my $line = $_;
346 print(VOICEFONTIDS $line);
347 if ($line =~ /^id: (.*)$/) {
348 $id = $1;
350 elsif ($line =~ /^voice: "(.*)"$/) {
351 $voice = $1;
352 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
353 my $wav = $id . '.wav';
354 my $mp3 = $id . '.mp3';
356 # Print some progress information
357 if (++$i % 10 == 0 and !$verbose) {
358 print(".");
361 # Apply corrections to the string
362 $voice = correct_string($voice, $language, $tts_object);
364 # If we have a pool of snippets, see if the string exists there first
365 if (defined($ENV{'POOL'})) {
366 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
367 md5_hex("$voice $tts_engine $tts_engine_opts $encoder_opts"),
368 $language);
369 if (-f $pool_file) {
370 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
371 copy($pool_file, $mp3);
375 # Don't generate MP3 if it already exists (probably from the POOL)
376 if (! -f $mp3) {
377 if ($id eq "VOICE_PAUSE") {
378 print("Use distributed $wav\n") if $verbose;
379 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
381 else {
382 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
383 wavtrim($wav, 500, $tts_object);
384 # 500 seems to be a reasonable default for now
387 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
388 synchronize($tts_object);
389 if (defined($ENV{'POOL'})) {
390 copy($mp3, $pool_file);
392 unlink($wav);
394 $voice = "";
395 $id = "";
399 print("\n");
400 close(VOICEFONTIDS);
401 shutdown_tts($tts_object);
404 # Assemble the voicefile
405 sub createvoice {
406 our $verbose;
407 my ($language, $target_id) = @_;
408 my $outfile = "";
409 $outfile = sprintf("%s.voice", $language);
410 printf("Saving voice file to %s\n", $outfile) if $verbose;
411 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
412 print("> $cmd\n") if $verbose;
413 my $output = `$cmd`;
414 print($output) if $verbose;
417 sub deletemp3s() {
418 for (glob('*.mp3')) {
419 unlink($_);
421 for (glob('*.wav')) {
422 unlink($_);
426 sub panic_cleanup {
427 deletemp3s();
428 die "moo";
431 # Generate .talk clips
432 sub gentalkclips {
433 our $verbose;
434 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
435 my $d = new DirHandle $dir;
436 while (my $file = $d->read) {
437 my ($voice, $wav, $mp3);
438 # Print some progress information
439 if (++$i % 10 == 0 and !$verbose) {
440 print(".");
443 # Convert to a complete path
444 my $path = sprintf("%s/%s", $dir, $file);
446 $voice = $file;
447 $wav = sprintf("%s.talk.wav", $path);
449 # Ignore dot-dirs and talk files
450 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
451 next;
453 # Element is a dir
454 if ( -d $path) {
455 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
456 $mp3 = sprintf("%s/_dirname.talk", $path);
458 # Element is a file
459 else {
460 $mp3 = sprintf("%s.talk", $path);
461 $voice =~ s/\.[^\.]*$//; # Trim extension
464 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
466 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
467 wavtrim($wav, 500, $tts_object);
468 # 500 seems to be a reasonable default for now
469 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
470 synchronize($tts_object);
471 unlink($wav);
476 # Check parameters
477 my $printusage = 0;
478 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
479 if (defined($V)) {
480 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
481 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
482 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
484 elsif (defined($C)) {
485 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
487 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
488 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
489 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
490 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
491 if ($printusage == 1) { printusage(); exit 1; }
493 if (defined($v) or defined($ENV{'V'})) {
494 our $verbose = 1;
497 # add the tools dir to the path temporarily, for calling various tools
498 $ENV{'PATH'} = dirname($0) . ':' . $ENV{'PATH'};
501 # Do what we're told
502 if ($V == 1) {
503 # Only do the panic cleanup for voicefiles
504 $SIG{INT} = \&panic_cleanup;
505 $SIG{KILL} = \&panic_cleanup;
507 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
508 $t, $l, $e, $E, $s, $S);
509 generateclips($l, $t, $e, $E, $s, $S);
510 createvoice($l, $i);
511 deletemp3s();
513 elsif ($C) {
514 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
515 my $tts_object = init_tts($s, $S, $l);
516 gentalkclips($ARGV[0], $tts_object, $e, $E, $S, 0);
517 shutdown_tts($tts_object);
519 else {
520 printusage();
521 exit 1;