The install window doesn't need to be wider than the other ones.
[Rockbox.git] / tools / voice.pl
blob1635b701f154cb354de4efe4aacf337f4ed1aa69
1 #!/usr/bin/perl -s
2 # __________ __ ___.
3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 # \/ \/ \/ \/ \/
8 # $Id:
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
18 use strict;
19 use warnings;
20 use File::Basename;
21 use File::Copy;
22 use Switch;
23 use vars qw($V $C $t $l $e $E $s $S $i $v);
24 use IPC::Open3;
25 use Digest::MD5 qw(md5_hex);
27 sub printusage {
28 print <<USAGE
30 Usage: voice.pl [options] [path to dir]
32 Create voice file. You must also specify -t and -l.
35 Create .talk clips.
37 -t=<target>
38 Specify which target you want to build voicefile for. Must include
39 any features that target supports.
41 -i=<target_id>
42 Numeric target id. Needed for voice building.
44 -l=<language>
45 Specify which language you want to build. Without .lang extension.
47 -e=<encoder>
48 Which encoder to use for voice strings
50 -E=<encoder options>
51 Which encoder options to use when compressing voice strings. Enclose
52 in double quotes if the options include spaces.
54 -s=<TTS engine>
55 Which TTS engine to use.
57 -S=<TTS engine options>
58 Options to pass to the TTS engine. Enclose in double quotes if the
59 options include spaces.
62 Be verbose
63 USAGE
67 # Initialize TTS engine. May return an object or value which will be passed
68 # to voicestring and shutdown_tts
69 sub init_tts {
70 our $verbose;
71 my ($tts_engine, $tts_engine_opts, $language) = @_;
72 my $ret = undef;
73 switch($tts_engine) {
74 case "festival" {
75 print("> festival $tts_engine_opts --server\n") if $verbose;
76 my $pid = open(FESTIVAL_SERVER, "| festival $tts_engine_opts --server > /dev/null 2>&1");
77 $ret = *FESTIVAL_SERVER;
78 $ret = $pid;
79 $SIG{INT} = sub { kill TERM => $pid; print("foo"); panic_cleanup(); };
80 $SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
82 case "sapi5" {
83 my $toolsdir = dirname($0);
84 my $path = `cygpath $toolsdir -a -w`;
85 chomp($path);
86 $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts";
87 $path =~ s/\\/\\\\/g;
88 print("> cscript /B $path\n") if $verbose;
89 my $pid = open(F, "| cscript /B $path");
90 $ret = *F;
91 $SIG{INT} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
92 $SIG{KILL} = sub { print($ret "\r\n\r\n"); panic_cleanup(); };
95 return $ret;
98 # Shutdown TTS engine if necessary.
99 sub shutdown_tts {
100 my ($tts_engine, $tts_object) = @_;
101 switch($tts_engine) {
102 case "festival" {
103 # Send SIGTERM to festival server
104 kill TERM => $tts_object;
106 case "sapi5" {
107 print($tts_object "\r\n\r\n");
108 close($tts_object);
113 # Apply corrections to a voice-string to make it sound better
114 sub correct_string {
115 our $verbose;
116 my ($string, $language, $tts_engine) = @_;
117 my $orig = $string;
118 switch($language) {
119 # General for all engines and languages (perhaps - just an example)
120 $string =~ s/USB/U S B/;
122 case ("deutsch") {
123 switch($tts_engine) {
124 $string =~ s/alphabet/alfabet/;
125 $string =~ s/alkaline/alkalein/;
126 $string =~ s/ampere/amper/;
127 $string =~ s/byte(s?)\b/beit$1/;
128 $string =~ s/\bdezibel\b/de-zibell/;
129 $string =~ s/energie\b/ener-gie/;
130 $string =~ s/\bflash\b/fläsh/g;
131 $string =~ s/\bfirmware(s?)\b/firmwer$1/;
132 $string =~ s/\bid3 tag\b/id3 täg/g; # can't just use "tag" here
133 $string =~ s/\bloudness\b/laudness/;
134 $string =~ s/\bnumerisch\b/numehrisch/;
135 $string =~ s/\brücklauf\b/rück-lauf/;
136 $string =~ s/\bsuchlauf\b/such-lauf/;
140 if ($orig ne $string) {
141 printf("%s -> %s\n", $orig, $string) if $verbose;
143 return $string;
146 # Produce a wav file of the text given
147 sub voicestring {
148 our $verbose;
149 my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_;
150 my $cmd;
151 printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose;
152 switch($tts_engine) {
153 case "festival" {
154 # festival_client lies to us, so we have to do awful soul-eating
155 # work with IPC::open3()
156 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
157 print("> $cmd\n") if $verbose;
158 # Open command, and filehandles for STDIN, STDOUT, STDERR
159 my $pid = open3(*CMD_IN, *CMD_OUT, *CMD_ERR, $cmd);
160 # Put the string to speak into STDIN and close it
161 print(CMD_IN $string);
162 close(CMD_IN);
163 # Read all output from festival_client (because it LIES TO US)
164 while (<CMD_ERR>) {
166 close(CMD_OUT);
167 close(CMD_ERR);
169 case "flite" {
170 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
171 print("> $cmd\n") if $verbose;
172 `$cmd`;
174 case "espeak" {
175 # xxx: $tts_engine_opts isn't used
176 $cmd = "espeak $tts_engine_opts -w $output";
177 print("> $cmd\n") if $verbose;
178 open(ESPEAK, "| $cmd");
179 print ESPEAK $string . "\n";
180 close(ESPEAK);
182 case "sapi5" {
183 print($tts_object sprintf("%s\r\n%s\r\n", $string, $output));
188 # Encode a wav file into the given destination file
189 sub encodewav {
190 our $verbose;
191 my ($input, $output, $encoder, $encoder_opts) = @_;
192 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
193 switch ($encoder) {
194 case 'lame' {
195 my $cmd = "lame $encoder_opts \"$input\" \"$output\"";
196 print("> $cmd\n") if $verbose;
197 `lame $encoder_opts "$input" "$output"`;
198 `$cmd`;
200 case 'vorbis' {
201 `oggenc $encoder_opts "$input" -o "$output"`;
203 case 'speexenc' {
204 `speexenc $encoder_opts "$input" "$output"`;
209 sub wavtrim {
210 our $verbose;
211 my ($file) = @_;
212 my $cmd = dirname($0) . "/wavtrim \"$file\"";
213 print("> $cmd\n") if $verbose;
214 `$cmd`;
217 # Run genlang and create voice clips for each string
218 sub generateclips {
219 our $verbose;
220 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
221 my $genlang = dirname($0) . '/genlang';
222 my $english = dirname($0) . '/../apps/lang/english.lang';
223 my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
224 my $id = '';
225 my $voice = '';
226 my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null";
227 my $pool_file;
228 open(VOICEFONTIDS, "> voicefontids");
229 my $i = 0;
231 my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
232 print("Generating voice clips");
233 print("\n") if $verbose;
234 for (`$cmd`) {
235 my $line = $_;
236 print(VOICEFONTIDS $line);
237 if ($line =~ /^id: (.*)$/) {
238 $id = $1;
240 elsif ($line =~ /^voice: "(.*)"$/) {
241 $voice = $1;
242 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
243 my $wav = $id . '.wav';
244 my $mp3 = $id . '.mp3';
246 # Print some progress information
247 if (++$i % 10 == 0 and !$verbose) {
248 print(".");
251 # Apply corrections to the string
252 $voice = correct_string($voice);
254 # If we have a pool of snippes, see if the string exists there first
255 if (defined($ENV{'POOL'})) {
256 $pool_file = sprintf("%s/%s-%s-%s.mp3", $ENV{'POOL'}, md5_hex($voice), $language, $tts_engine);
257 if (-f $pool_file) {
258 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
259 copy($pool_file, $mp3);
263 # Don't generate MP3 if it already exists (probably from the POOL)
264 if (! -f $mp3) {
265 if ($id eq "VOICE_PAUSE") {
266 print("Use distributed $wav\n") if $verbose;
267 copy(dirname($0)."/VOICE_PAUSE.wav", $wav);
269 else {
270 voicestring($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object);
271 wavtrim($wav, 500); # 500 seems to be a reasonable default for now
274 encodewav($wav, $mp3, $encoder, $encoder_opts);
275 if (defined($ENV{'POOL'})) {
276 copy($mp3, $pool_file);
278 unlink($wav);
280 $voice = "";
281 $id = "";
285 print("\n");
286 close(VOICEFONTIDS);
287 shutdown_tts($tts_engine, $tts_object);
290 # Assemble the voicefile
291 sub createvoice {
292 our $verbose;
293 my ($language, $target_id) = @_;
294 my $voicefont = dirname($0) . '/voicefont';
295 my $outfile = "";
296 my $i = 0;
297 do {
298 $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ? '' : '-'.$i));
299 } while (-f $outfile);
300 printf("Saving voice file to %s\n", $outfile) if $verbose;
301 my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile";
302 print("> $cmd\n") if $verbose;
303 my $output = `$cmd`;
304 print($output) if $verbose;
307 sub deletemp3s() {
308 for (glob('*.mp3')) {
309 unlink($_);
311 for (glob('*.wav')) {
312 unlink($_);
316 sub panic_cleanup {
317 deletemp3s();
318 die "moo";
321 # Check parameters
322 my $printusage = 0;
323 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
324 if (defined($V)) {
325 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
326 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
327 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
329 elsif (defined($C)) {
330 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
332 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
333 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
334 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
335 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
336 if ($printusage == 1) { printusage(); exit 1; }
338 $SIG{INT} = \&panic_cleanup;
339 $SIG{KILL} = \&panic_cleanup;
341 if (defined($v) or defined($ENV{'V'})) {
342 our $verbose = 1;
346 # Do what we're told
347 if ($V == 1) {
348 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
349 $t, $l, $e, $E, $s, $S);
350 generateclips($l, $t, $e, $E, $s, $S);
351 createvoice($l, $i);
352 deletemp3s();
354 elsif ($C) {
355 # xxx: Implement .talk clip generation
357 else {
358 printusage();
359 exit 1;