Android: Change headphone detection to call into native.
[kugel-rb.git] / tools / voice.pl
blob9c528299b90e80350679924021db6db673fec0ad
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id$
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
18 use strict;
19 use warnings;
20 use feature 'switch';
21 use File::Basename;
22 use File::Copy;
23 use vars qw($V $C $t $l $e $E $s $S $i $v);
24 use IPC::Open2;
25 use IPC::Open3;
26 use Digest::MD5 qw(md5_hex);
27 use DirHandle;
28 use open ':encoding(utf8)';
29 use open ':std';
31 sub printusage {
32 print <<USAGE
34 Usage: voice.pl [options] [path to dir]
36 Create voice file. You must also specify -t and -l.
39 Create .talk clips.
41 -t=<target>
42 Specify which target you want to build voicefile for. Must include
43 any features that target supports.
45 -i=<target_id>
46 Numeric target id. Needed for voice building.
48 -l=<language>
49 Specify which language you want to build. Without .lang extension.
51 -e=<encoder>
52 Which encoder to use for voice strings
54 -E=<encoder options>
55 Which encoder options to use when compressing voice strings. Enclose
56 in double quotes if the options include spaces.
58 -s=<TTS engine>
59 Which TTS engine to use.
61 -S=<TTS engine options>
62 Options to pass to the TTS engine. Enclose in double quotes if the
63 options include spaces.
66 Be verbose
67 USAGE
71 # Initialize TTS engine. May return an object or value which will be passed
72 # to voicestring and shutdown_tts
73 sub init_tts {
74 our $verbose;
75 my ($tts_engine, $tts_engine_opts, $language) = @_;
76 my %ret = ("name" => $tts_engine);
77 given ($tts_engine) {
78 when ("festival") {
79 print("> festival $tts_engine_opts --server\n") if $verbose;
80 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
81 my $dummy = *FESTIVAL_SERVER; #suppress warning
82 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
83 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
84 $ret{"pid"} = $pid;
86 when ("sapi") {
87 my $toolsdir = dirname($0);
88 my $path = `cygpath $toolsdir -a -w`;
89 chomp($path);
90 $path = $path . '\\';
91 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
92 $cmd =~ s/\\/\\\\/g;
93 print("> cscript //nologo $cmd\n") if $verbose;
94 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
95 binmode(*CMD_IN, ':encoding(utf16le)');
96 binmode(*CMD_OUT, ':encoding(utf16le)');
97 $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
98 $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
99 print(CMD_IN "QUERY\tVENDOR\r\n");
100 my $vendor = readline(*CMD_OUT);
101 $vendor =~ s/\r\n//;
102 %ret = (%ret,
103 "stdin" => *CMD_IN,
104 "stdout" => *CMD_OUT,
105 "vendor" => $vendor);
108 return \%ret;
111 # Shutdown TTS engine if necessary.
112 sub shutdown_tts {
113 my ($tts_object) = @_;
114 given ($$tts_object{"name"}) {
115 when ("festival") {
116 # Send SIGTERM to festival server
117 kill TERM => $$tts_object{"pid"};
119 when ("sapi") {
120 print({$$tts_object{"stdin"}} "QUIT\r\n");
121 close($$tts_object{"stdin"});
126 # Apply corrections to a voice-string to make it sound better
127 sub correct_string {
128 our $verbose;
129 my ($string, $language, $tts_object) = @_;
130 my $orig = $string;
131 my $corrections = $tts_object->{"corrections"};
133 foreach (@$corrections) {
134 my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
135 . $_->{replace} . $_->{separator} . $_->{modifier};
136 eval ('$string =~' . "$r;");
138 if ($orig ne $string) {
139 printf("%s -> %s\n", $orig, $string) if $verbose;
141 return $string;
144 # Produce a wav file of the text given
145 sub voicestring {
146 our $verbose;
147 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
148 my $cmd;
149 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
150 given ($$tts_object{"name"}) {
151 when ("festival") {
152 # festival_client lies to us, so we have to do awful soul-eating
153 # work with IPC::open3()
154 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
155 # Use festival-prolog.scm if it's there (created by user of tools/configure)
156 if (-f "festival-prolog.scm") {
157 $cmd .= " --prolog festival-prolog.scm";
159 print("> $cmd\n") if $verbose;
160 # Open command, and filehandles for STDIN, STDOUT, STDERR
161 my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
162 # Put the string to speak into STDIN and close it
163 print(CMD_IN $string);
164 close(CMD_IN);
165 # Read all output from festival_client (because it LIES TO US)
166 while (<CMD_ERR>) {
168 close(CMD_OUT);
169 close(CMD_ERR);
171 when ("flite") {
172 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
173 print("> $cmd\n") if $verbose;
174 `$cmd`;
176 when ("espeak") {
177 $cmd = "espeak $tts_engine_opts -w \"$output\"";
178 print("> $cmd\n") if $verbose;
179 open(ESPEAK, "| $cmd");
180 print ESPEAK $string . "\n";
181 close(ESPEAK);
183 when ("sapi") {
184 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
186 when ("swift") {
187 $cmd = "swift $tts_engine_opts -o \"$output\" \"$string\"";
188 print("> $cmd\n") if $verbose;
189 system($cmd);
194 # trim leading / trailing silence from the clip
195 sub wavtrim {
196 our $verbose;
197 my ($file, $threshold, $tts_object) = @_;
198 printf("Trim \"%s\"\n", $file) if $verbose;
199 my $cmd = "wavtrim \"$file\" $threshold";
200 if ($$tts_object{"name"} eq "sapi") {
201 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
203 else {
204 print("> $cmd\n") if $verbose;
205 `$cmd`;
209 # Encode a wav file into the given destination file
210 sub encodewav {
211 our $verbose;
212 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
213 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
214 my $cmd = "$encoder $encoder_opts \"$input\" \"$output\"";
215 if ($$tts_object{"name"} eq "sapi") {
216 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
218 else {
219 print("> $cmd\n") if $verbose;
220 `$cmd`;
224 # synchronize the clip generation / processing if it's running in another process
225 sub synchronize {
226 my ($tts_object) = @_;
227 if ($$tts_object{"name"} eq "sapi") {
228 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
229 my $wait = readline($$tts_object{"stdout"});
230 #ignore what's actually returned
234 # Run genlang and create voice clips for each string
235 sub generateclips {
236 our $verbose;
237 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
238 my $english = dirname($0) . '/../apps/lang/english.lang';
239 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
240 my $correctionsfile = dirname($0) . '/voice-corrections.txt';
241 my $id = '';
242 my $voice = '';
243 my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
244 my $pool_file;
245 open(VOICEFONTIDS, "> voicefontids");
246 my $i = 0;
247 local $| = 1; # make progress indicator work reliably
249 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
250 # add string corrections to tts_object.
251 my @corrects = ();
252 open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
253 while(<VOICEREGEXP>) {
254 # get first character of line
255 my $line = $_;
256 my $separator = substr($_, 0, 1);
257 if($separator =~ m/\s+/) {
258 next;
260 chomp($line);
261 $line =~ s/^.//g; # remove separator at beginning
262 my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
264 # does language match?
265 if($language !~ m/$lang/) {
266 next;
268 if($$tts_object{"name"} !~ m/$engine/) {
269 next;
271 my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
272 if($v !~ m/$vendor/) {
273 next;
275 push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
278 close(VOICEREGEXP);
279 $tts_object->{corrections} = [@corrects];
281 print("Generating voice clips");
282 print("\n") if $verbose;
283 for (`$cmd`) {
284 my $line = $_;
285 print(VOICEFONTIDS $line);
286 if ($line =~ /^id: (.*)$/) {
287 $id = $1;
289 elsif ($line =~ /^voice: "(.*)"$/) {
290 $voice = $1;
291 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
292 my $wav = $id . '.wav';
293 my $mp3 = $id . '.mp3';
295 # Print some progress information
296 if (++$i % 10 == 0 and !$verbose) {
297 print(".");
300 # Apply corrections to the string
301 $voice = correct_string($voice, $language, $tts_object);
303 # If we have a pool of snippets, see if the string exists there first
304 if (defined($ENV{'POOL'})) {
305 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
306 md5_hex("$voice $tts_engine $tts_engine_opts $encoder_opts"),
307 $language);
308 if (-f $pool_file) {
309 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
310 copy($pool_file, $mp3);
314 # Don't generate MP3 if it already exists (probably from the POOL)
315 if (! -f $mp3) {
316 if ($id eq "VOICE_PAUSE") {
317 print("Use distributed $wav\n") if $verbose;
318 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
320 else {
321 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
322 wavtrim($wav, 500, $tts_object);
323 # 500 seems to be a reasonable default for now
326 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
327 synchronize($tts_object);
328 if (defined($ENV{'POOL'})) {
329 copy($mp3, $pool_file);
331 unlink($wav);
333 $voice = "";
334 $id = "";
338 print("\n");
339 close(VOICEFONTIDS);
340 shutdown_tts($tts_object);
343 # Assemble the voicefile
344 sub createvoice {
345 our $verbose;
346 my ($language, $target_id) = @_;
347 my $outfile = "";
348 $outfile = sprintf("%s.voice", $language);
349 printf("Saving voice file to %s\n", $outfile) if $verbose;
350 my $cmd = "voicefont 'voicefontids' $target_id ./ $outfile";
351 print("> $cmd\n") if $verbose;
352 my $output = `$cmd`;
353 print($output) if $verbose;
356 sub deletemp3s() {
357 for (glob('*.mp3')) {
358 unlink($_);
360 for (glob('*.wav')) {
361 unlink($_);
365 sub panic_cleanup {
366 deletemp3s();
367 die "moo";
370 # Generate .talk clips
371 sub gentalkclips {
372 our $verbose;
373 my ($dir, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i) = @_;
374 my $d = new DirHandle $dir;
375 while (my $file = $d->read) {
376 my ($voice, $wav, $mp3);
377 # Print some progress information
378 if (++$i % 10 == 0 and !$verbose) {
379 print(".");
382 # Convert to a complete path
383 my $path = sprintf("%s/%s", $dir, $file);
385 $voice = $file;
386 $wav = sprintf("%s.talk.wav", $path);
388 # Ignore dot-dirs and talk files
389 if ($file eq '.' || $file eq '..' || $file =~ /\.talk$/) {
390 next;
392 # Element is a dir
393 if ( -d $path) {
394 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i);
395 $mp3 = sprintf("%s/_dirname.talk", $path);
397 # Element is a file
398 else {
399 $mp3 = sprintf("%s.talk", $path);
400 $voice =~ s/\.[^\.]*$//; # Trim extension
403 printf("Talkclip %s: %s", $mp3, $voice) if $verbose;
405 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
406 wavtrim($wav, 500, $tts_object);
407 # 500 seems to be a reasonable default for now
408 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
409 synchronize($tts_object);
410 unlink($wav);
415 # Check parameters
416 my $printusage = 0;
417 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
418 if (defined($V)) {
419 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
420 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
421 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
423 elsif (defined($C)) {
424 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
426 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
427 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
428 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
429 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
430 if ($printusage == 1) { printusage(); exit 1; }
432 if (defined($v) or defined($ENV{'V'})) {
433 our $verbose = 1;
436 # add the tools dir to the path temporarily, for calling various tools
437 $ENV{'PATH'} = dirname($0) . ':' . $ENV{'PATH'};
440 # Do what we're told
441 if ($V == 1) {
442 # Only do the panic cleanup for voicefiles
443 $SIG{INT} = \&panic_cleanup;
444 $SIG{KILL} = \&panic_cleanup;
446 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
447 $t, $l, $e, $E, $s, $S);
448 generateclips($l, $t, $e, $E, $s, $S);
449 createvoice($l, $i);
450 deletemp3s();
452 elsif ($C) {
453 printf("Generating .talk clips\n Path: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n", $ARGV[0], $l, $e, $E, $s, $S);
454 my $tts_object = init_tts($s, $S, $l);
455 gentalkclips($ARGV[0], $tts_object, $e, $E, $S, 0);
456 shutdown_tts($tts_object);
458 else {
459 printusage();
460 exit 1;