3 # Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 # Copyright (C) 2007 Jonas Häggqvist
12 # All files in this archive are subject to the GNU General Public License.
13 # See the file COPYING in the source tree root for full license agreement.
15 # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 # KIND, either express or implied.
23 use vars
qw($V $C $t $l $e $E $s $S $i $v);
25 use Digest::MD5 qw(md5_hex);
30 Usage: voice.pl [options] [path to dir]
32 Create voice file. You must also specify -t and -l.
38 Specify which target you want to build voicefile for. Must include
39 any features that target supports.
42 Numeric target id. Needed for voice building.
45 Specify which language you want to build. Without .lang extension.
48 Which encoder to use for voice strings
51 Which encoder options to use when compressing voice strings. Enclose
52 in double quotes if the options include spaces.
55 Which TTS engine to use.
57 -S=<TTS engine options>
58 Options to pass to the TTS engine. Enclose in double quotes if the
59 options include spaces.
67 # Initialize TTS engine. May return an object or value which will be passed
68 # to voicestring and shutdown_tts
71 my ($tts_engine, $tts_engine_opts, $language) = @_;
75 print("> festival $tts_engine_opts --server\n") if $verbose;
76 my $pid = open(FESTIVAL_SERVER
, "| festival $tts_engine_opts --server > /dev/null 2>&1");
77 $ret = *FESTIVAL_SERVER
;
79 $SIG{INT
} = sub { kill TERM
=> $pid; print("foo"); panic_cleanup
(); };
80 $SIG{KILL
} = sub { kill TERM
=> $pid; print("boo"); panic_cleanup
(); };
83 my $toolsdir = dirname
($0);
84 my $path = `cygpath $toolsdir -a -w`;
86 $path = $path . "\\sapi5_voice_new.vbs $language $tts_engine_opts";
88 print("> cscript /B $path\n") if $verbose;
89 my $pid = open(F
, "| cscript /B $path");
91 $SIG{INT
} = sub { print($ret "\r\n\r\n"); panic_cleanup
(); };
92 $SIG{KILL
} = sub { print($ret "\r\n\r\n"); panic_cleanup
(); };
98 # Shutdown TTS engine if necessary.
100 my ($tts_engine, $tts_object) = @_;
101 switch
($tts_engine) {
103 # Send SIGTERM to festival server
104 kill TERM
=> $tts_object;
107 print($tts_object "\r\n\r\n");
113 # Apply corrections to a voice-string to make it sound better
116 my ($string, $language, $tts_engine) = @_;
119 # General for all engines and languages (perhaps - just an example)
120 $string =~ s/USB/U S B/;
123 switch
($tts_engine) {
124 $string =~ s/alphabet/alfabet/;
125 $string =~ s/alkaline/alkalein/;
126 $string =~ s/ampere/amper/;
127 $string =~ s/byte(s?)\b/beit$1/;
128 $string =~ s/\bdezibel\b/de-zibell/;
129 $string =~ s/energie\b/ener-gie/;
130 $string =~ s/\bflash\b/fläsh/g;
131 $string =~ s/\bfirmware(s?)\b/firmwer$1/;
132 $string =~ s/\bid3 tag\b/id3 täg/g; # can't just use "tag" here
133 $string =~ s/\bloudness\b/laudness/;
134 $string =~ s/\bnumerisch\b/numehrisch/;
135 $string =~ s/\brücklauf\b/rück-lauf/;
136 $string =~ s/\bsuchlauf\b/such-lauf/;
140 if ($orig ne $string) {
141 printf("%s -> %s\n", $orig, $string) if $verbose;
146 # Produce a wav file of the text given
149 my ($string, $output, $tts_engine, $tts_engine_opts, $tts_object) = @_;
151 printf("Generate \"%s\" with %s in file %s\n", $string, $tts_engine, $output) if $verbose;
152 switch
($tts_engine) {
154 # festival_client lies to us, so we have to do awful soul-eating
155 # work with IPC::open3()
156 $cmd = "festival_client --server localhost --otype riff --ttw --output \"$output\"";
157 print("> $cmd\n") if $verbose;
158 # Open command, and filehandles for STDIN, STDOUT, STDERR
159 my $pid = open3
(*CMD_IN
, *CMD_OUT
, *CMD_ERR
, $cmd);
160 # Put the string to speak into STDIN and close it
161 print(CMD_IN
$string);
163 # Read all output from festival_client (because it LIES TO US)
170 $cmd = "flite $tts_engine_opts -t \"$string\" \"$output\"";
171 print("> $cmd\n") if $verbose;
175 # xxx: $tts_engine_opts isn't used
176 $cmd = "espeak $tts_engine_opts -w $output";
177 print("> $cmd\n") if $verbose;
178 open(ESPEAK
, "| $cmd");
179 print ESPEAK
$string . "\n";
183 print($tts_object sprintf("%s\r\n%s\r\n", $string, $output));
188 # Encode a wav file into the given destination file
191 my ($input, $output, $encoder, $encoder_opts) = @_;
192 printf("Encode \"%s\" with %s in file %s\n", $input, $encoder, $output) if $verbose;
195 my $cmd = "lame $encoder_opts \"$input\" \"$output\"";
196 print("> $cmd\n") if $verbose;
197 `lame $encoder_opts "$input" "$output"`;
201 `oggenc $encoder_opts "$input" -o "$output"`;
204 `speexenc $encoder_opts "$input" "$output"`;
212 my $cmd = dirname
($0) . "/wavtrim \"$file\"";
213 print("> $cmd\n") if $verbose;
217 # Run genlang and create voice clips for each string
220 my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
221 my $genlang = dirname
($0) . '/genlang';
222 my $english = dirname
($0) . '/../apps/lang/english.lang';
223 my $langfile = dirname
($0) . '/../apps/lang/' . $language . '.lang';
226 my $cmd = "$genlang -o -t=$target -e=$english $langfile 2>/dev/null";
228 open(VOICEFONTIDS
, "> voicefontids");
231 my $tts_object = init_tts
($tts_engine, $tts_engine_opts, $language);
232 print("Generating voice clips");
233 print("\n") if $verbose;
236 print(VOICEFONTIDS
$line);
237 if ($line =~ /^id: (.*)$/) {
240 elsif ($line =~ /^voice: "(.*)"$/) {
242 if ($id !~ /^NOT_USED_.*$/ && $voice ne "") {
243 my $wav = $id . '.wav';
244 my $mp3 = $id . '.mp3';
246 # Print some progress information
247 if (++$i % 10 == 0 and !$verbose) {
251 # Apply corrections to the string
252 $voice = correct_string
($voice);
254 # If we have a pool of snippes, see if the string exists there first
255 if (defined($ENV{'POOL'})) {
256 $pool_file = sprintf("%s/%s-%s-%s.mp3", $ENV{'POOL'}, md5_hex
($voice), $language, $tts_engine);
258 printf("Re-using %s (%s) from pool\n", $id, $voice) if $verbose;
259 copy
($pool_file, $mp3);
263 # Don't generate MP3 if it already exists (probably from the POOL)
265 if ($id eq "VOICE_PAUSE") {
266 print("Use distributed $wav\n") if $verbose;
267 copy
(dirname
($0)."/VOICE_PAUSE.wav", $wav);
270 voicestring
($voice, $wav, $tts_engine, $tts_engine_opts, $tts_object);
271 wavtrim
($wav, 500); # 500 seems to be a reasonable default for now
274 encodewav
($wav, $mp3, $encoder, $encoder_opts);
275 if (defined($ENV{'POOL'})) {
276 copy
($mp3, $pool_file);
287 shutdown_tts
($tts_engine, $tts_object);
290 # Assemble the voicefile
293 my ($language, $target_id) = @_;
294 my $voicefont = dirname
($0) . '/voicefont';
298 $outfile = sprintf("%s%s.voice", $language, ($i++ == 0 ?
'' : '-'.$i));
299 } while (-f
$outfile);
300 printf("Saving voice file to %s\n", $outfile) if $verbose;
301 my $cmd = "$voicefont 'voicefontids' $target_id ./ $outfile";
302 print("> $cmd\n") if $verbose;
304 print($output) if $verbose;
308 for (glob('*.mp3')) {
311 for (glob('*.wav')) {
323 unless (defined($V) or defined($C)) { print("Missing either -V or -C\n"); $printusage = 1; }
325 unless (defined($t)) { print("Missing -t argument\n"); $printusage = 1; }
326 unless (defined($l)) { print("Missing -l argument\n"); $printusage = 1; }
327 unless (defined($i)) { print("Missing -i argument\n"); $printusage = 1; }
329 elsif (defined($C)) {
330 unless (defined($ARGV[0])) { print "Missing path argument\n"; $printusage = 1; }
332 unless (defined($e)) { print("Missing -e argument\n"); $printusage = 1; }
333 unless (defined($E)) { print("Missing -E argument\n"); $printusage = 1; }
334 unless (defined($s)) { print("Missing -s argument\n"); $printusage = 1; }
335 unless (defined($S)) { print("Missing -S argument\n"); $printusage = 1; }
336 if ($printusage == 1) { printusage
(); exit 1; }
338 $SIG{INT
} = \
&panic_cleanup
;
339 $SIG{KILL
} = \
&panic_cleanup
;
341 if (defined($v) or defined($ENV{'V'})) {
348 printf("Generating voice\n Target: %s\n Language: %s\n Encoder (options): %s (%s)\n TTS Engine (options): %s (%s)\n",
349 $t, $l, $e, $E, $s, $S);
350 generateclips
($l, $t, $e, $E, $s, $S);
355 # xxx: Implement .talk clip generation