Improve metadata use
[Rockbox.git] / tools / voice.pl
blob8982138f998a39d6485be660561b19c38836b4a8
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id:
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
18 use strict;
19 use warnings;
20 use File::Basename;
21 use File::Copy;
22 use Switch;
23 use vars qw($V $C $t $l $e $E $s $S $i $v);
24 use IPC::Open2;
25 use IPC::Open3;
26 use Digest::MD5 qw(md5_hex);
28 sub printusage {
29 print <<USAGE
31 Usage: voice.pl [options] [path to dir]
33 Create voice file. You must also specify -t and -l.
36 Create .talk clips.
38 -t=<target>
39 Specify which target you want to build voicefile for. Must include
40 any features that target supports.
42 -i=<target_id>
43 Numeric target id. Needed for voice building.
45 -l=<language>
46 Specify which language you want to build. Without .lang extension.
48 -e=<encoder>
49 Which encoder to use for voice strings
51 -E=<encoder options>
52 Which encoder options to use when compressing voice strings. Enclose
53 in double quotes if the options include spaces.
55 -s=<TTS engine>
56 Which TTS engine to use.
58 -S=<TTS engine options>
59 Options to pass to the TTS engine. Enclose in double quotes if the
60 options include spaces.
63 Be verbose
64 USAGE
68 # Initialize TTS engine. May return an object or value which will be passed
69 # to voicestring and shutdown_tts
70 sub init_tts {
71 our $verbose;
72 my ($tts_engine, $tts_engine_opts, $language) = @_;
73 my %ret = ("name" => $tts_engine);
74 switch($tts_engine) {
75 case "festival" {
76 print("> festival $tts_engine_opts --server\n") if $verbose;
77 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
78 my $dummy = *FESTIVAL_SERVER; #suppress warning
79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
81 $ret{"pid"} = $pid;
83 case "sapi" {
84 my $toolsdir = dirname($0);
85 my $path = `cygpath $toolsdir -a -w`;
86 chomp($path);
87 $path = $path . '\\';
88 my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
89 $cmd =~ s/\\/\\\\/g;
90 print("> cscript //nologo $cmd\n") if $verbose;
91 my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
92 $SIG{INT} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
93 $SIG{KILL} = sub { print(CMD_IN "QUIT\r\n"); panic_cleanup(); };
94 print(CMD_IN "QUERY\tVENDOR\r\n");
95 my $vendor = readline(CMD_OUT);
96 $vendor =~ s/\r\n//;
97 %ret = (%ret,
98 "stdin" => *CMD_IN,
99 "stdout" => *CMD_OUT,
100 "toolspath" => $path,
101 "vendor" => $vendor);
104 return \%ret;
107 # Shutdown TTS engine if necessary.
108 sub shutdown_tts {
109 my ($tts_object) = @_;
110 switch($$tts_object{"name"}) {
111 case "festival" {
112 # Send SIGTERM to festival server
113 kill TERM => $$tts_object{"pid"};
115 case "sapi" {
116 print({$$tts_object{"stdin"}} "QUIT\r\n");
117 close($$tts_object{"stdin"});
122 # Apply corrections to a voice-string to make it sound better
123 sub correct_string {
124 our $verbose;
125 my ($string, $language, $tts_object) = @_;
126 my $orig = $string;
127 switch($language) {
128 # General for all engines and languages
129 $string =~ s/USB/U S B/g;
130 $string =~ s/ID3/I D 3/g;
132 case "english" {
133 switch($$tts_object{"name"}) {
134 case ["sapi","festival"] {
135 $string =~ s/plugin(s?)/plug-in$1/ig;
139 case "deutsch" {
140 # for all german engines (e.g. for english words)
141 $string =~ s/alkaline/alkalein/ig;
142 $string =~ s/byte(s?)/beit$1/ig;
143 $string =~ s/clip(s?)/klipp$1/ig;
144 $string =~ s/cuesheet/kjuschiet/ig;
145 $string =~ s/dither/didder/ig;
146 $string =~ s/equalizer/iquileiser/ig;
147 $string =~ s/\bflash\b/fläsh/ig;
148 $string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
149 $string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
150 $string =~ s/\bloudness\b/laudness/ig;
151 $string =~ s/\bunicode\b/unikod/ig;
152 switch($$tts_object{"name"}) {
153 case "sapi" { # just for SAPI
154 switch($$tts_object{"vendor"}) {
155 case "AT&T Labs" {
156 $string =~ s/alphabet/alfabet/ig;
157 $string =~ s/ampere/amper/ig;
158 $string =~ s/\bdezibel\b/de-zibell/ig;
159 $string =~ s/diddering/didde-ring/ig;
160 $string =~ s/energie\b/ener-gie/ig;
161 $string =~ s/\bnumerisch\b/numehrisch/ig;
162 $string =~ s/\brücklauf\b/rück-lauf/ig;
163 $string =~ s/\bsuchlauf\b/such-lauf/ig;
170 if ($orig ne $string) {
171 printf("%s -> %s\n", $orig, $string) if $verbose;
173 return $string;
176 # Produce a wav file of the text given
177 sub voicestring {
178 our $verbose;
179 my ($string, $output, $tts_engine_opts, $tts_object) = @_;
180 my $cmd;
181 printf("Generate \"%s\" with %s in file %s\n", $string, $$tts_object{"name"}, $output) if $verbose;
182 switch($$tts_object{"name"}) {
183 case "festival" {
184 # festival_client lies to us, so we have to do awful soul-eating
185 # work with IPC::open3()
186 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
187 print("> $cmd\n") if $verbose;
188 # Open command, and filehandles for STDIN, STDOUT, STDERR
189 my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
190 # Put the string to speak into STDIN and close it
191 print(CMD_IN $string);
192 close(CMD_IN);
193 # Read all output from festival_client (because it LIES TO US)
194 while (<CMD_ERR>) {
196 close(CMD_OUT);
197 close(CMD_ERR);
199 case "flite" {
200 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
201 print("> $cmd\n") if $verbose;
202 `$cmd`;
204 case "espeak" {
205 # xxx: $tts_engine_opts isn't used
206 $cmd = "espeak $tts_engine_opts -w $output";
207 print("> $cmd\n") if $verbose;
208 open(ESPEAK, "| $cmd");
209 print ESPEAK $string . "\n";
210 close(ESPEAK);
212 case "sapi" {
213 print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
215 case "swift" {
216 $cmd = "swift $tts_engine_opts -o $output \"$string\"";
217 print("> $cmd\n") if $verbose;
218 system($cmd);
223 # trim leading / trailing silence from the clip
224 sub wavtrim {
225 our $verbose;
226 my ($file, $threshold, $tts_object) = @_;
227 printf("Trim \"%s\"\n", $file) if $verbose;
228 if ($$tts_object{"name"} eq "sapi") {
229 my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
230 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
232 else {
233 my $cmd = dirname($0) . "/wavtrim $file $threshold";
234 print("> $cmd\n") if $verbose;
235 `$cmd`;
239 # Encode a wav file into the given destination file
240 sub encodewav {
241 our $verbose;
242 my ($input, $output, $encoder, $encoder_opts, $tts_object) = @_;
243 my $cmd = '';
244 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
245 switch ($encoder) {
246 case 'lame' {
247 $cmd = "lame $encoder_opts \"$input\" \"$output\"";
249 case 'vorbis' {
250 $cmd = "oggenc $encoder_opts \"$input\" -o \"$output\"";
252 case 'speexenc' {
253 $cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
256 if ($$tts_object{"name"} eq "sapi") {
257 print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
259 else {
260 print("> $cmd\n") if $verbose;
261 `$cmd`;
265 # synchronize the clip generation / processing if it's running in another process
266 sub synchronize {
267 my ($tts_object) = @_;
268 if ($$tts_object{"name"} eq "sapi") {
269 print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
270 my $wait = readline($$tts_object{"stdout"});
271 #ignore what's actually returned
275 # Run genlang and create voice clips for each string
276 sub generateclips {
277 our $verbose;
278 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
279 my $genlang = dirname($0) . '/genlang';
280 my $english = dirname($0) . '/../apps/lang/english.lang';
281 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
282 my $id = '';
283 my $voice = '';
284 my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null";
285 my $pool_file;
286 open(VOICEFONTIDS, "> voicefontids");
287 my $i = 0;
288 local $| = 1; # make progress indicator work reliably
290 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
291 print("Generating voice clips");
292 print("\n") if $verbose;
293 for (`$cmd`) {
294 my $line = $_;
295 print(VOICEFONTIDS $line);
296 if ($line =~ /^id: (.*)$/) {
297 $id = $1;
299 elsif ($line =~ /^voice: "(.*)"$/) {
300 $voice = $1;
301 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
302 my $wav = $id . '.wav';
303 my $mp3 = $id . '.mp3';
305 # Print some progress information
306 if (++$i % 10 == 0 and !$verbose) {
307 print(".");
310 # Apply corrections to the string
311 $voice = correct_string($voice, $language, $tts_object);
313 # If we have a pool of snippets, see if the string exists there first
314 if (defined($ENV{'POOL'})) {
315 $pool_file = sprintf("%s/%s-%s.mp3", $ENV{'POOL'},
316 md5_hex("$voice $tts_engine $tts_engine_opts $encoder_opts"),
317 $language);
318 if (-f $pool_file) {
319 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
320 copy($pool_file, $mp3);
324 # Don't generate MP3 if it already exists (probably from the POOL)
325 if (! -f $mp3) {
326 if ($id eq "VOICE_PAUSE") {
327 print("Use distributed $wav\n") if $verbose;
328 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
330 else {
331 voicestring($voice, $wav, $tts_engine_opts, $tts_object);
332 wavtrim($wav, 500, $tts_object);
333 # 500 seems to be a reasonable default for now
336 encodewav($wav, $mp3, $encoder, $encoder_opts, $tts_object);
337 synchronize($tts_object);
338 if (defined($ENV{'POOL'})) {
339 copy($mp3, $pool_file);
341 unlink($wav);
343 $voice = "";
344 $id = "";
348 print("\n");
349 close(VOICEFONTIDS);
350 shutdown_tts($tts_object);
353 # Assemble the voicefile
354 sub createvoice {
355 our $verbose;
356 my ($language, $target_id) = @_;
357 my $voicefont = dirname($0) . '/voicefont';
358 my $outfile = "";
359 my $i = 0;
360 do {
361 $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ? '' : '-'.$i));
362 } while (-f $outfile);
363 printf("Saving voice file to %s\n", $outfile) if $verbose;
364 my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile";
365 print("> $cmd\n") if $verbose;
366 my $output = `$cmd`;
367 print($output) if $verbose;
370 sub deletemp3s() {
371 for (glob('*.mp3')) {
372 unlink($_);
374 for (glob('*.wav')) {
375 unlink($_);
379 sub panic_cleanup {
380 deletemp3s();
381 die "moo";
384 # Check parameters
385 my $printusage = 0;
386 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
387 if (defined($V)) {
388 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
389 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
390 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
392 elsif (defined($C)) {
393 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
395 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
396 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
397 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
398 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
399 if ($printusage == 1) { printusage(); exit 1; }
401 $SIG{INT} = \&panic_cleanup;
402 $SIG{KILL} = \&panic_cleanup;
404 if (defined($v) or defined($ENV{'V'})) {
405 our $verbose = 1;
409 # Do what we're told
410 if ($V == 1) {
411 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
412 $t, $l, $e, $E, $s, $S);
413 generateclips($l, $t, $e, $E, $s, $S);
414 createvoice($l, $i);
415 deletemp3s();
417 elsif ($C) {
418 # xxx: Implement .talk clip generation
420 else {
421 printusage();
422 exit 1;