1 /***************************************************** vim:set ts=4 sw=4 sts=4:
2 Main speaking functions for the Festival (Interactive) Plug in
5 (C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
7 Original author: Gary Cramblitt <garycramblitt@comcast.net>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 ******************************************************************************/
24 // FestivalInt includes.
25 #include "festivalintproc.h"
26 #include "festivalintproc.moc"
32 #include <QtCore/QThread>
33 #include <QtCore/QTextCodec>
34 #include <QtCore/QByteArray>
39 #include <kconfiggroup.h>
40 #include <kstandarddirs.h>
46 FestivalIntProc::FestivalIntProc( QObject
* parent
, const QStringList
& ) :
47 PlugInProc( parent
, "festivalintproc" ){
48 // kDebug() << "FestivalIntProc::FestivalIntProc: Running";
50 m_writingStdin
= false;
51 m_waitingQueryVoices
= false;
52 m_waitingStop
= false;
55 m_supportsSSML
= ssUnknown
;
56 m_languageCode
= "en";
57 m_codec
= QTextCodec::codecForName("ISO8859-1");
61 FestivalIntProc::~FestivalIntProc(){
62 // kDebug() << "FestivalIntProc::~FestivalIntProc: Running";
65 if (m_festProc
->isRunning())
70 // kDebug() << "FestivalIntProc::~FestivalIntProc: telling Festival to quit.";
73 m_festProc
->writeStdin("(quit)", true);
77 // kDebug() << "FestivalIntProc::~FestivalIntProc: killing Festival.";
86 /** Initialize the speech */
87 bool FestivalIntProc::init(KConfig
*c
, const QString
&configGroup
)
89 // kDebug() << "FestivalIntProc::init: Initializing plug in: Festival";
91 KConfigGroup
config(c
, configGroup
);
92 m_voiceCode
= config
.readEntry("Voice");
93 m_festivalExePath
= config
.readEntry("FestivalExecutablePath", "festival");
94 // kDebug() << "---- The code for the selected voice " << config.readEntry("Voice") << " is " << voiceCode;
95 m_time
= config
.readEntry("time", 100);
96 m_pitch
= config
.readEntry("pitch", 100);
97 m_volume
= config
.readEntry("volume", 100);
98 // If voice should be pre-loaded, start Festival and load the voice.
99 m_preload
= config
.readEntry("Preload", false);
100 m_languageCode
= config
.readEntry("LanguageCode", "en");
101 m_supportsSSML
= static_cast<SupportsSSML
>(config
.readEntry("SupportsSSML", int(ssUnknown
)));
102 QString codecName
= config
.readEntry("Codec", "Latin1");
103 m_codec
= codecNameToCodec(codecName
);
104 if (m_preload
) startEngine(m_festivalExePath
, m_voiceCode
, m_languageCode
, m_codec
);
109 * Say a text. Synthesize and audibilize it.
110 * @param text The text to be spoken.
112 * If the plugin supports asynchronous operation, it should return immediately.
114 void FestivalIntProc::sayText(const QString
&text
)
116 synth(m_festivalExePath
, text
, QString(), m_voiceCode
, m_time
, m_pitch
, m_volume
,
117 m_languageCode
, m_codec
);
121 * Synthesize text into an audio file, but do not send to the audio device.
122 * @param text The text to be synthesized.
123 * @param suggestedFilename Full pathname of file to create. The plugin
124 * may ignore this parameter and choose its own
125 * filename. KTTSD will query the generated
126 * filename using getFilename().
128 * If the plugin supports asynchronous operation, it should return immediately.
130 void FestivalIntProc::synthText(const QString
& text
, const QString
& suggestedFilename
)
132 synth(m_festivalExePath
, text
, suggestedFilename
, m_voiceCode
, m_time
, m_pitch
, m_volume
,
133 m_languageCode
, m_codec
);
137 * Sends command to Festival to query for a list of supported voice codes.
138 * Fires queryVoicesFinished when completed.
139 * @return False if busy doing something else and therefore cannot
142 bool FestivalIntProc::queryVoices(const QString
&festivalExePath
)
144 // kDebug() << "FestivalIntProc::queryVoices: Running";
145 if (m_state
!= psIdle
&& m_waitingQueryVoices
&& m_waitingStop
) return false;
146 // Start Festival if not already running.
147 startEngine(festivalExePath
, QString(), m_languageCode
, m_codec
);
148 // Set state, waiting for voice codes list from Festival.
149 m_waitingQueryVoices
= true;
150 // Voice rab_diphone is needed in order to support SSML.
151 m_supportsSSML
= ssUnknown
;
152 // Send command to query the voice codes.
153 sendToFestival("(print (mapcar (lambda (pair) (car pair)) voice-locations))");
158 * Start Festival engine.
159 * @param festivalExePath Path to the Festival executable, or just "festival".
160 * @param voiceCode Voice code in which to speak text.
161 * @param languageCode Language code, for example, "en".
163 void FestivalIntProc::startEngine(const QString
&festivalExePath
, const QString
&voiceCode
,
164 const QString
&languageCode
, QTextCodec
* codec
)
166 // Initialize Festival only if it's not initialized.
169 // Stop Festival if a different EXE is requested or different language code.
170 // If festProc exists but is not running, it is because it was stopped.
171 if ((festivalExePath
!= m_festivalExePath
) || !m_festProc
->isRunning() ||
172 (m_languageCode
!= languageCode
) || (codec
->name() != m_codec
->name()))
180 // kDebug()<< "FestivalIntProc::startEngine: Creating Festival object";
181 m_festProc
= new K3Process
;
182 *m_festProc
<< festivalExePath
;
183 *m_festProc
<< "--interactive";
184 m_festProc
->setEnvironment("LANG", languageCode
+ '.' + codec
->name());
185 m_festProc
->setEnvironment("LC_CTYPE", languageCode
+ '.' + codec
->name());
186 // kDebug() << "FestivalIntProc::startEngine: setting LANG = LC_CTYPE = " << languageCode << "." << codec->mimeName();
187 connect(m_festProc
, SIGNAL(processExited(K3Process
*)),
188 this, SLOT(slotProcessExited(K3Process
*)));
189 connect(m_festProc
, SIGNAL(receivedStdout(K3Process
*, char*, int)),
190 this, SLOT(slotReceivedStdout(K3Process
*, char*, int)));
191 connect(m_festProc
, SIGNAL(receivedStderr(K3Process
*, char*, int)),
192 this, SLOT(slotReceivedStderr(K3Process
*, char*, int)));
193 connect(m_festProc
, SIGNAL(wroteStdin(K3Process
*)),
194 this, SLOT(slotWroteStdin(K3Process
*)));
196 if (!m_festProc
->isRunning())
198 // kDebug() << "FestivalIntProc::startEngine: Starting Festival process";
199 m_runningVoiceCode
.clear();
201 m_runningPitch
= 100;
203 m_outputQueue
.clear();
204 if (m_festProc
->start(K3Process::NotifyOnExit
, K3Process::All
))
206 // kDebug()<< "FestivalIntProc:startEngine: Festival initialized";
207 m_festivalExePath
= festivalExePath
;
208 m_languageCode
= languageCode
;
210 // Load the SABLE to Wave module.
211 sendToFestival("(load \"" +
212 KGlobal::dirs()->resourceDirs("data").last() + "kttsd/festivalint/sabletowave.scm\")");
216 kDebug() << "FestivalIntProc::startEngine: Error starting Festival process. Is festival in the PATH?";
222 // If we just started Festival, or voiceCode has changed, send code to Festival.
223 if (m_runningVoiceCode
!= voiceCode
&& !voiceCode
.isEmpty()) {
224 sendToFestival("(voice_" + voiceCode
+ ')');
225 m_runningVoiceCode
= voiceCode
;
230 * Say or Synthesize text.
231 * @param festivalExePath Path to the Festival executable, or just "festival".
232 * @param text The text to be synthesized.
233 * @param suggestedFilename If not Null, synthesize only to this filename, otherwise
234 * synthesize and audibilize the text.
235 * @param voiceCode Voice code in which to speak text.
236 * @param time Speed percentage. 50 to 200. 200% = 2x normal.
237 * @param pitch Pitch persentage. 50 to 200.
238 * @param volume Volume percentage. 50 to 200.
239 * @param languageCode Language code, for example, "en".
241 void FestivalIntProc::synth(
242 const QString
&festivalExePath
,
244 const QString
&synthFilename
,
245 const QString
&voiceCode
,
249 const QString
&languageCode
,
252 // kDebug() << "FestivalIntProc::synth: festivalExePath = " << festivalExePath
253 // << " voiceCode = " << voiceCode << endl;
255 // Initialize Festival only if it's not initialized
256 startEngine(festivalExePath
, voiceCode
, languageCode
, codec
);
257 // If we just started Festival, or rate changed, tell festival.
258 if (m_runningTime
!= time
) {
260 if (voiceCode
.contains("_hts") > 0)
262 // Map 50% to 200% onto 0 to 1000.
263 // slider = alpha * (log(percent)-log(50))
264 // with alpha = 1000/(log(200)-log(50))
265 double alpha
= 1000 / (log(200.0) - log(50.0));
266 int slider
= (int)floor (0.5 + alpha
* (log((double)time
)-log(50.0)));
268 slider
= slider
- 500;
269 // Map -500 to 500 onto 0.15 to -0.15.
270 float stretchValue
= -float(slider
) * 0.15 / 500.0;
271 timeMsg
= QString("(set! hts_duration_stretch %1)").arg(
272 stretchValue
, 0, 'f', 3);
275 timeMsg
= QString("(Parameter.set 'Duration_Stretch %1)").arg(
276 1.0/(float(time
)/100.0), 0, 'f', 2);
277 sendToFestival(timeMsg
);
278 m_runningTime
= time
;
280 // If we just started Festival, or pitch changed, tell festival.
281 if (m_runningPitch
!= pitch
) {
282 // Pitch values range from 50 to 200 %, with 100% as the midpoint,
283 // while frequency values range from 41 to 500 with 105 as the "midpoint".
287 pitchValue
= (((pitch
- 50) * 64) / 50) + 41;
291 pitchValue
= (((pitch
- 100) * 395) / 100) + 105;
293 QString pitchMsg
= QString(
294 "(set! int_lr_params '((target_f0_mean %1) (target_f0_std 14)"
295 "(model_f0_mean 170) (model_f0_std 34)))").arg(pitchValue
, 0, 10);
296 sendToFestival(pitchMsg
);
297 m_runningPitch
= pitch
;
300 QString saidText
= text
;
302 // Split really long sentences into shorter sentences, by looking for commas and converting
304 int len
= saidText
.length();
305 while (len
> c_tooLong
)
307 len
= saidText
.lastIndexOf( ", ", len
- (c_tooLong
* 2 / 3), Qt::CaseSensitive
);
310 QString c
= saidText
.mid(len
+2, 1);
311 if (c
!= c
.toUpper())
313 saidText
.replace(len
, 2, ". ");
314 saidText
.replace(len
+2, 1, c
.toUpper());
315 kDebug() << "FestivalIntProc::synth: Splitting long sentence at " << len
;
316 // kDebug() << saidText;
321 // Encode quotation characters.
322 saidText
.replace("\\\"", "#!#!");
323 saidText
.replace('\"', "\\\"");
324 saidText
.replace("#!#!", "\\\"");
325 // Remove certain comment characters.
326 saidText
.remove("--");
329 if (synthFilename
.isNull())
332 m_synthFilename
.clear();
333 // kDebug() << "FestivalIntProc::synth: Saying text: '" << saidText << "' using Festival plug in with voice "
334 // << voiceCode << endl;
335 saidText
= "(SayText \"" + saidText
+ "\")";
336 sendToFestival(saidText
);
338 m_state
= psSynthing
;
339 m_synthFilename
= synthFilename
;
340 // Volume must be given for each utterance.
341 // Volume values range from 50 to 200%, with 100% = normal.
342 // Map onto rescale range of .5 to 2.
343 float volumeValue
= float(volume
) / 100;
344 // Expand to range .25 to 4.
345 // float volumeValue = exp(log(volumeValue) * 2);
346 // kDebug() << "FestivalIntProc::synth: Synthing text: '" << saidText << "' using Festival plug in with voice "
347 // << voiceCode << endl;
348 if (isSable(saidText
))
350 // Synth the text and adjust volume.
352 "(ktts_sabletowave \"" + saidText
+ "\" \"" +
353 synthFilename
+ "\" " +
354 QString::number(volumeValue
) + ')';
359 // Suppress pause at the beginning of each utterance.
360 "(define (insert_initial_pause utt) "
361 "(item.set_feat (utt.relation.first utt 'Segment) 'end 0.0))"
362 // Synth the text and adjust volume.
363 "(set! utt1 (Utterance Text \"" + saidText
+
364 "\"))(utt.synth utt1)" +
365 "(utt.wave.rescale utt1 " + QString::number(volumeValue
) + " t)" +
366 "(utt.save.wave utt1 \"" + synthFilename
+ "\")";
368 sendToFestival(saidText
);
373 * If ready for more output, sends the given text to Festival process, otherwise,
374 * puts it in the queue.
375 * @param text Text to send or queue.
377 void FestivalIntProc::sendToFestival(const QString
& text
)
379 if (text
.isNull()) return;
380 m_outputQueue
.append(text
);
385 * If Festival is ready for more input and there is more output to send, send it.
386 * To be ready for more input, the Stdin buffer must be empty and the "festival>"
387 * prompt must have been received (m_ready = true).
388 * @return False when Festival is ready for more input
389 * but there is nothing to be sent, or if Festival
392 bool FestivalIntProc::sendIfReady()
394 if (!m_ready
) return true;
395 if (m_writingStdin
) return true;
396 if (m_outputQueue
.isEmpty()) return false;
397 if (!m_festProc
->isRunning()) return false;
398 QString text
= m_outputQueue
[0];
400 QByteArray encodedText
;
402 encodedText
= m_codec
->fromUnicode(text
);
404 encodedText
= text
.toLatin1(); // Should not happen, but just in case.
405 m_outputQueue
.pop_front();
407 // kDebug() << "FestivalIntProc::sendIfReady: sending to Festival: " << text;
408 m_writingStdin
= true;
409 m_festProc
->writeStdin(encodedText
, encodedText
.length());
414 * Determine if the text has SABLE tags. If so, we will have to use a different
417 bool FestivalIntProc::isSable(const QString
&text
)
419 return KttsUtils::hasRootElement( text
, "SABLE" );
423 * Get the generated audio filename from synthText.
424 * @return Name of the audio file the plugin generated.
425 * Null if no such file.
427 * The plugin must not re-use the filename.
429 QString
FestivalIntProc::getFilename() { return m_synthFilename
; }
434 void FestivalIntProc::stopText(){
435 // kDebug() << "FestivalIntProc::stopText: Running";
438 if (m_festProc
->isRunning())
444 // If using a preloaded voice, killing Festival is a bad idea because of
445 // huge startup times. So if synthing (not saying), let Festival continue
446 // synthing. When it completes, we will emit the stopped signal.
447 if (m_preload
&& (m_state
== psSynthing
))
449 m_waitingStop
= true;
450 // kDebug() << "FestivalIntProc::stopText: Optimizing stopText() for preloaded voice.";
454 // kDebug() << "FestivalIntProc::stopText: killing Festival.";
455 m_waitingStop
= true;
459 } else m_state
= psIdle
;
460 } else m_state
= psIdle
;
463 void FestivalIntProc::slotProcessExited(K3Process
*)
465 // kDebug() << "FestivalIntProc:slotProcessExited: Festival process has exited.";
467 pluginState prevState
= m_state
;
468 if (m_waitingStop
|| m_waitingQueryVoices
)
472 m_waitingStop
= false;
474 // kDebug() << "FestivalIntProc::slotProcessExited: emitting stopped signal";
477 if (m_waitingQueryVoices
)
479 // kDebug() << "FestivalIntProc::slotProcessExited: canceling queryVoices operation";
480 m_waitingQueryVoices
= false;
484 if (m_state
!= psIdle
) m_state
= psFinished
;
485 if (prevState
== psSaying
)
487 // kDebug() << "FestivalIntProc::slotProcessExited: emitting sayFinished signal";
490 if (prevState
== psSynthing
)
492 // kDebug() << "FestivalIntProc::slotProcessExited: emitting synthFinished signal";
493 emit
synthFinished();
498 m_outputQueue
.clear();
501 void FestivalIntProc::slotReceivedStdout(K3Process
*, char* buffer
, int buflen
)
503 QString buf
= QString::fromLatin1(buffer
, buflen
);
504 // kDebug() << "FestivalIntProc::slotReceivedStdout: Received from Festival: " << buf;
505 bool promptSeen
= (buf
.contains("festival>") > 0);
506 bool emitQueryVoicesFinished
= false;
507 QStringList voiceCodesList
;
508 if (m_waitingQueryVoices
&& m_outputQueue
.isEmpty())
510 // Look for opening ( and closing ).
512 if (buf
.left(3) == "nil") {
513 emitQueryVoicesFinished
= true;
514 m_waitingQueryVoices
= false;
516 if (buf
.left(1) == "(")
518 int rightParen
= buf
.indexOf(')');
521 m_waitingQueryVoices
= false;
522 // Extract contents between parens.
523 buf
= buf
.mid(1, rightParen
- 1);
524 // Space separated list.
525 voiceCodesList
= buf
.split( ' ', QString::SkipEmptyParts
);
526 emitQueryVoicesFinished
= true;
533 // kDebug() << "FestivalIntProc::slotReceivedStdout: Prompt seen";
537 // kDebug() << "FestivalIntProc::slotReceivedStdout: All output sent. ";
538 pluginState prevState
= m_state
;
539 if (m_state
!= psIdle
) m_state
= psFinished
;
540 if (prevState
== psSaying
)
542 // kDebug() << "FestivalIntProc::slotReceivedStdout: emitting sayFinished signal";
545 if (prevState
== psSynthing
)
549 m_waitingStop
= false;
551 // kDebug() << "FestivalIntProc::slotReceivedStdout: emitting optimized stopped signal";
556 // kDebug() << "FestivalIntProc::slotReceivedStdout: emitting synthFinished signal";
557 emit
synthFinished();
562 if (emitQueryVoicesFinished
)
564 // kDebug() << "FestivalIntProc::slotReceivedStdout: emitting queryVoicesFinished";
565 m_supportsSSML
= (voiceCodesList
.contains("rab_diphone")) ? ssYes
: ssNo
;
566 emit
queryVoicesFinished(voiceCodesList
);
570 void FestivalIntProc::slotReceivedStderr(K3Process
*, char* buffer
, int buflen
)
572 QString buf
= QString::fromLatin1(buffer
, buflen
);
573 kDebug() << "FestivalIntProc::slotReceivedStderr: Received error from Festival: " << buf
;
576 void FestivalIntProc::slotWroteStdin(K3Process
* /*proc*/)
578 // kDebug() << "FestivalIntProc::slotWroteStdin: Running";
579 m_writingStdin
= false;
582 // kDebug() << "FestivalIntProc::slotWroteStdin: all output sent";
583 pluginState prevState
= m_state
;
584 if (m_state
!= psIdle
) m_state
= psFinished
;
585 if (prevState
== psSaying
)
587 // kDebug() << "FestivalIntProc::slotWroteStdin: emitting sayFinished signal";
590 if (prevState
== psSynthing
)
592 // kDebug() << "FestivalIntProc::slotWroteStdin: emitting synthFinished signal";
593 emit
synthFinished();
599 bool FestivalIntProc::isReady() { return m_ready
; }
602 * Return the current state of the plugin.
603 * This function only makes sense in asynchronous mode.
604 * @return The pluginState of the plugin.
608 pluginState
FestivalIntProc::getState() { return m_state
; }
611 * Acknowledges a finished state and resets the plugin state to psIdle.
613 * If the plugin is not in state psFinished, nothing happens.
614 * The plugin may use this call to do any post-processing cleanup,
615 * for example, blanking the stored filename (but do not delete the file).
616 * Calling program should call getFilename prior to ackFinished.
618 void FestivalIntProc::ackFinished()
620 if (m_state
== psFinished
)
623 m_synthFilename
.clear();
628 * Returns True if the plugin supports asynchronous processing,
629 * i.e., returns immediately from sayText or synthText.
630 * @return True if this plugin supports asynchronous processing.
632 * If the plugin returns True, it must also implement @ref getState .
633 * It must also emit @ref sayFinished or @ref synthFinished signals when
634 * saying or synthesis is completed.
636 bool FestivalIntProc::supportsAsync() { return true; }
639 * Returns True if the plugin supports synthText method,
640 * i.e., is able to synthesize text to a sound file without
641 * audibilizing the text.
642 * @return True if this plugin supports synthText method.
644 bool FestivalIntProc::supportsSynth() { return true; }
647 * Returns the name of an XSLT stylesheet that will convert a valid SSML file
648 * into a format that can be processed by the synth. For example,
649 * The Festival plugin returns a stylesheet that will convert SSML into
650 * SABLE. Any tags the synth cannot handle should be stripped (leaving
651 * their text contents though). The default stylesheet strips all
652 * tags and converts the file to plain text.
653 * @return Name of the XSLT file.
655 QString
FestivalIntProc::getSsmlXsltFilename()
657 if (m_supportsSSML
== ssYes
)
658 return KGlobal::dirs()->resourceDirs("data").last() + "kttsd/festivalint/xslt/SSMLtoSable.xsl";
660 return PlugInProc::getSsmlXsltFilename();