Prepare new maemo release
[maemo-rb.git] / tools / voice.pl
blobf74791babb76a5e68a4e492c9b63d595c96c27b8
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
18 use strict;
19 use warnings;
20 use File::Basename;
21 use File::Copy;
22 use vars qw($V $C $t $l $e $E $s $S $i $v);
23 use IPC::Open2;
24 use IPC::Open3;
25 use Digest::MD5 qw(md5_hex);
26 use DirHandle;
27 use open ':encoding(utf8)';
28 use open ':std';
30 sub printusage {
31 print <<USAGE
33 Usage: voice.pl [options] [path to dir]
35 Create voice file. You must also specify -t and -l.
38 Create .talk clips.
40 -t=<target>
41 Specify which target you want to build voicefile for. Must include
42 any features that target supports.
44 -i=<target_id>
45 Numeric target id. Needed for voice building.
47 -l=<language>
48 Specify which language you want to build. Without .lang extension.
50 -e=<encoder>
51 Which encoder to use for voice strings
53 -E=<encoder options>
54 Which encoder options to use when compressing voice strings. Enclose
55 in double quotes if the options include spaces.
57 -s=<TTS engine>
58 Which TTS engine to use.
60 -S=<TTS engine options>
61 Options to pass to the TTS engine. Enclose in double quotes if the
62 options include spaces.
65 Be verbose
66 USAGE
70 # Initialize TTS engine. May return an object or value which will be passed
71 # to voicestring and shutdown_tts
72 sub init_tts {
73 our $verbose;
74 my ($tts_engine, $tts_engine_opts, $language) = @_;
75 my %ret = ("name" => $tts_engine);
76 # Don't use given/when here - it's not compatible with old perl versions
77 if ($tts_engine eq 'festival') {
78 print("> festival $tts_engine_opts --server\n") if $verbose;
79 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
80 my $dummy = *FESTIVAL_SERVER; #suppress warning
81 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
82 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
83 $ret{"pid"} = $pid;
85 elsif ($tts_engine eq 'sapi') {
86 my $toolsdir = dirname($0);
87 my $path = `cygpath $toolsdir -a -w`;
88 chomp($path);
89 $path = $path . '\\';
90 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
91 $cmd =~ s/\\/\\\\/g;
92 print("> cscript //nologo $cmd\n") if $verbose;
93 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
94 binmode(*CMD_IN, ':encoding(utf16le)');
95 binmode(*CMD_OUT, ':encoding(utf16le)');
96 $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
97 $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
98 print(CMD_IN "QUERY\tVENDOR\r\n");
99 my $vendor = readline(*CMD_OUT);
100 $vendor =~ s/\r\n//;
101 %ret = (%ret,
102 "stdin" => *CMD_IN,
103 "stdout" => *CMD_OUT,
104 "vendor" => $vendor);
106 return \%ret;
109 # Shutdown TTS engine if necessary.
110 sub shutdown_tts {
111 my ($tts_object) = @_;
112 if ($$tts_object{'name'} eq 'festival') {
113 # Send SIGTERM to festival server
114 kill TERM => $$tts_object{"pid"};
116 elsif ($$tts_object{'name'} eq 'sapi') {
117 print({$$tts_object{"stdin"}} "QUIT\r\n");
118 close($$tts_object{"stdin"});
122 # Apply corrections to a voice-string to make it sound better
123 sub correct_string {
124 our $verbose;
125 my ($string, $language, $tts_object) = @_;
126 my $orig = $string;
127 my $corrections = $tts_object->{"corrections"};
129 foreach (@$corrections) {
130 my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
131 . $_->{replace} . $_->{separator} . $_->{modifier};
132 eval ('$string =~' . "$r;");
134 if ($orig ne $string) {
135 printf("%s -> %s\n", $orig, $string) if $verbose;
137 return $string;
140 # Produce a wav file of the text given
141 sub voicestring {
142 our $verbose;
143 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
144 my $cmd;
145 my $name = $$tts_object{'name'};
146 printf("Generate \"%s\" with %s in file %s\n", $string, $name, $output) if $verbose;
147 if ($name eq 'festival') {
148 # festival_client lies to us, so we have to do awful soul-eating
149 # work with IPC::open3()
150 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
151 # Use festival-prolog.scm if it's there (created by user of tools/configure)
152 if (-f "festival-prolog.scm") {
153 $cmd .= " --prolog festival-prolog.scm";
155 print("> $cmd\n") if $verbose;
156 # Open command, and filehandles for STDIN, STDOUT, STDERR
157 my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
158 # Put the string to speak into STDIN and close it
159 print(CMD_IN $string);
160 close(CMD_IN);
161 # Read all output from festival_client (because it LIES TO US)
162 while (<CMD_ERR>) {
164 close(CMD_OUT);
165 close(CMD_ERR);
167 elsif ($name eq 'flite') {
168 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
169 print("> $cmd\n") if $verbose;
170 `$cmd`;
172 elsif ($name eq 'espeak') {
173 $cmd = "espeak $tts_engine_opts -w \"$output\"";
174 print("> $cmd\n") if $verbose;
175 open(ESPEAK, "| $cmd");
176 print ESPEAK $string . "\n";
177 close(ESPEAK);
179 elsif ($name eq 'sapi') {
180 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
182 elsif ($name eq 'swift') {
183 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
184 print("> $cmd\n") if $verbose;
185 system($cmd);
189 # trim leading / trailing silence from the clip
190 sub wavtrim {
191 our $verbose;
192 my ($file, $threshold, $tts_object) = @_;
193 printf("Trim \"%s\"\n", $file) if $verbose;
194 my $cmd = "wavtrim \"$file\" $threshold";
195 if ($$tts_object{"name"} eq "sapi") {
196 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
198 else {
199 print("> $cmd\n") if $verbose;
200 `$cmd`;
204 # Encode a wav file into the given destination file
205 sub encodewav {
206 our $verbose;
207 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
208 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
209 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
210 if ($$tts_object{"name"} eq "sapi") {
211 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
213 else {
214 print("> $cmd\n") if $verbose;
215 `$cmd`;
219 # synchronize the clip generation / processing if it's running in another process
220 sub synchronize {
221 my ($tts_object) = @_;
222 if ($$tts_object{"name"} eq "sapi") {
223 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
224 my $wait = readline($$tts_object{"stdout"});
225 #ignore what's actually returned
229 # Run genlang and create voice clips for each string
230 sub generateclips {
231 our $verbose;
232 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
233 my $english = dirname($0) . '/../apps/lang/english.lang';
234 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
235 my $correctionsfile = dirname($0) . '/voice-corrections.txt';
236 my $id = '';
237 my $voice = '';
238 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
239 my $pool_file;
240 open(VOICEFONTIDS, "> voicefontids");
241 my $i = 0;
242 local $| = 1; # make progress indicator work reliably
244 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
245 # add string corrections to tts_object.
246 my @corrects = ();
247 open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
248 while(<VOICEREGEXP>) {
249 # get first character of line
250 my $line = $_;
251 my $separator = substr($_, 0, 1);
252 if($separator =~ m/\s+/) {
253 next;
255 chomp($line);
256 $line =~ s/^.//g; # remove separator at beginning
257 my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
259 # does language match?
260 if($language !~ m/$lang/) {
261 next;
263 if($$tts_object{"name"} !~ m/$engine/) {
264 next;
266 my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
267 if($v !~ m/$vendor/) {
268 next;
270 push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
273 close(VOICEREGEXP);
274 $tts_object->{corrections} = [@corrects];
276 print("Generating voice clips");
277 print("\n") if $verbose;
278 for (`$cmd`) {
279 my $line = $_;
280 print(VOICEFONTIDS $line);
281 if ($line =~ /^id: (.*)$/) {
282 $id = $1;
284 elsif ($line =~ /^voice: "(.*)"$/) {
285 $voice = $1;
286 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
287 my $wav = $id . '.wav';
288 my $mp3 = $id . '.mp3';
290 # Print some progress information
291 if (++$i % 10 == 0 and !$verbose) {
292 print(".");
295 # Apply corrections to the string
296 $voice = correct_string($voice, $language, $tts_object);
298 # If we have a pool of snippets, see if the string exists there first
299 if (defined($ENV{'POOL'})) {
300 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
301 md5_hex("$voice $tts_engine $tts_engine_opts $encoder_opts"),
302 $language);
303 if (-f $pool_file) {
304 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
305 copy($pool_file, $mp3);
309 # Don't generate MP3 if it already exists (probably from the POOL)
310 if (! -f $mp3) {
311 if ($id eq "VOICE_PAUSE") {
312 print("Use distributed $wav\n") if $verbose;
313 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
315 else {
316 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
317 wavtrim($wav, 500, $tts_object);
318 # 500 seems to be a reasonable default for now
321 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
322 synchronize($tts_object);
323 if (defined($ENV{'POOL'})) {
324 copy($mp3, $pool_file);
326 unlink($wav);
328 $voice = "";
329 $id = "";
333 print("\n");
334 close(VOICEFONTIDS);
335 shutdown_tts($tts_object);
338 # Assemble the voicefile
339 sub createvoice {
340 our $verbose;
341 my ($language, $target_id) = @_;
342 my $outfile = "";
343 $outfile = sprintf("%s.voice", $language);
344 printf("Saving voice file to %s\n", $outfile) if $verbose;
345 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
346 print("> $cmd\n") if $verbose;
347 my $output = `$cmd`;
348 print($output) if $verbose;
351 sub deletemp3s() {
352 for (glob('*.mp3')) {
353 unlink($_);
355 for (glob('*.wav')) {
356 unlink($_);
360 sub panic_cleanup {
361 deletemp3s();
362 die "moo";
365 # Generate .talk clips
366 sub gentalkclips {
367 our $verbose;
368 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
369 my $d = new DirHandle $dir;
370 while (my $file = $d->read) {
371 my ($voice, $wav, $mp3);
372 # Print some progress information
373 if (++$i % 10 == 0 and !$verbose) {
374 print(".");
377 # Convert to a complete path
378 my $path = sprintf("%s/%s", $dir, $file);
380 $voice = $file;
381 $wav = sprintf("%s.talk.wav", $path);
383 # Ignore dot-dirs and talk files
384 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
385 next;
387 # Element is a dir
388 if ( -d $path) {
389 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
390 $mp3 = sprintf("%s/_dirname.talk", $path);
392 # Element is a file
393 else {
394 $mp3 = sprintf("%s.talk", $path);
395 $voice =~ s/\.[^\.]*$//; # Trim extension
398 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
400 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
401 wavtrim($wav, 500, $tts_object);
402 # 500 seems to be a reasonable default for now
403 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
404 synchronize($tts_object);
405 unlink($wav);
410 # Check parameters
411 my $printusage = 0;
412 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
413 if (defined($V)) {
414 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
415 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
416 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
418 elsif (defined($C)) {
419 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
421 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
422 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
423 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
424 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
425 if ($printusage == 1) { printusage(); exit 1; }
427 if (defined($v) or defined($ENV{'V'})) {
428 our $verbose = 1;
431 # add the tools dir to the path temporarily, for calling various tools
432 $ENV{'PATH'} = dirname($0) . ':' . $ENV{'PATH'};
435 # Do what we're told
436 if ($V == 1) {
437 # Only do the panic cleanup for voicefiles
438 $SIG{INT} = \&panic_cleanup;
439 $SIG{KILL} = \&panic_cleanup;
441 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
442 $t, $l, $e, $E, $s, $S);
443 generateclips($l, $t, $e, $E, $s, $S);
444 createvoice($l, $i);
445 deletemp3s();
447 elsif ($C) {
448 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
449 my $tts_object = init_tts($s, $S, $l);
450 gentalkclips($ARGV[0], $tts_object, $e, $E, $S, 0);
451 shutdown_tts($tts_object);
453 else {
454 printusage();
455 exit 1;