2 * Asterisk -- An open source telephony toolkit.
4 * Copyright (C) 2006, Digium, Inc.
6 * Joshua Colp <jcolp@digium.com>
8 * See http://www.asterisk.org for more information about
9 * the Asterisk project. Please do not directly contact
10 * any of the maintainers of this project for assistance;
11 * the project provides a web site, mailing lists and IRC
12 * channels for your use.
14 * This program is free software, distributed under the terms of
15 * the GNU General Public License Version 2. See the LICENSE file
16 * at the top of the source tree.
21 * \brief Speech Recognition Utility Applications
23 * \author Joshua Colp <jcolp@digium.com>
25 * \ingroup applications
30 ASTERISK_FILE_VERSION(__FILE__
, "$Revision$");
37 #include "asterisk/file.h"
38 #include "asterisk/logger.h"
39 #include "asterisk/channel.h"
40 #include "asterisk/pbx.h"
41 #include "asterisk/module.h"
42 #include "asterisk/lock.h"
43 #include "asterisk/app.h"
44 #include "asterisk/speech.h"
46 /* Descriptions for each application */
47 static char *speechcreate_descrip
=
48 "SpeechCreate(engine name)\n"
49 "This application creates information to be used by all the other applications. It must be called before doing any speech recognition activities such as activating a grammar.\n"
50 "It takes the engine name to use as the argument, if not specified the default engine will be used.\n";
52 static char *speechactivategrammar_descrip
=
53 "SpeechActivateGrammar(Grammar Name)\n"
54 "This activates the specified grammar to be recognized by the engine. A grammar tells the speech recognition engine what to recognize, \n"
55 "and how to portray it back to you in the dialplan. The grammar name is the only argument to this application.\n";
57 static char *speechstart_descrip
=
59 "Tell the speech recognition engine that it should start trying to get results from audio being fed to it. This has no arguments.\n";
61 static char *speechbackground_descrip
=
62 "SpeechBackground(Sound File|Timeout)\n"
63 "This application plays a sound file and waits for the person to speak. Once they start speaking playback of the file stops, and silence is heard.\n"
64 "Once they stop talking the processing sound is played to indicate the speech recognition engine is working.\n"
65 "Once results are available the application returns and results (score and text) are available using dialplan functions.\n"
66 "The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)} and ${SPEECH_SCORE(1)}.\n"
67 "The first argument is the sound file and the second is the timeout. Note the timeout will only start once the sound file has stopped playing.\n";
69 static char *speechdeactivategrammar_descrip
=
70 "SpeechDeactivateGrammar(Grammar Name)\n"
71 "This deactivates the specified grammar so that it is no longer recognized. The only argument is the grammar name to deactivate.\n";
73 static char *speechprocessingsound_descrip
=
74 "SpeechProcessingSound(Sound File)\n"
75 "This changes the processing sound that SpeechBackground plays back when the speech recognition engine is processing and working to get results.\n"
76 "It takes the sound file as the only argument.\n";
78 static char *speechdestroy_descrip
=
80 "This destroys the information used by all the other speech recognition applications.\n"
81 "If you call this application but end up wanting to recognize more speech, you must call SpeechCreate\n"
82 "again before calling any other application. It takes no arguments.\n";
84 static char *speechload_descrip
=
85 "SpeechLoadGrammar(Grammar Name|Path)\n"
86 "Load a grammar only on the channel, not globally.\n"
87 "It takes the grammar name as first argument and path as second.\n";
89 static char *speechunload_descrip
=
90 "SpeechUnloadGrammar(Grammar Name)\n"
91 "Unload a grammar. It takes the grammar name as the only argument.\n";
93 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
94 static void destroy_callback(void *data
)
96 struct ast_speech
*speech
= (struct ast_speech
*)data
;
103 ast_speech_destroy(speech
);
108 /*! \brief Static structure for datastore information */
109 static const struct ast_datastore_info speech_datastore
= {
111 .destroy
= destroy_callback
114 /*! \brief Helper function used to find the speech structure attached to a channel */
115 static struct ast_speech
*find_speech(struct ast_channel
*chan
)
117 struct ast_speech
*speech
= NULL
;
118 struct ast_datastore
*datastore
= NULL
;
120 datastore
= ast_channel_datastore_find(chan
, &speech_datastore
, NULL
);
121 if (datastore
== NULL
) {
124 speech
= datastore
->data
;
129 /* Helper function to find a specific speech recognition result by number */
130 static struct ast_speech_result
*find_result(struct ast_speech_result
*results
, int num
)
132 struct ast_speech_result
*result
= NULL
;
140 result
= result
->next
;
146 /*! \brief SPEECH_SCORE() Dialplan Function */
147 static int speech_score(struct ast_channel
*chan
, char *cmd
, char *data
,
148 char *buf
, size_t len
)
150 struct ast_speech_result
*result
= NULL
;
151 struct ast_speech
*speech
= find_speech(chan
);
154 if (data
== NULL
|| speech
== NULL
|| !(result
= find_result(speech
->results
, atoi(data
))))
157 snprintf(tmp
, sizeof(tmp
), "%d", result
->score
);
159 ast_copy_string(buf
, tmp
, len
);
164 static struct ast_custom_function speech_score_function
= {
165 .name
= "SPEECH_SCORE",
166 .synopsis
= "Gets the confidence score of a result.",
167 .syntax
= "SPEECH_SCORE(result number)",
169 "Gets the confidence score of a result.\n",
170 .read
= speech_score
,
174 /*! \brief SPEECH_TEXT() Dialplan Function */
175 static int speech_text(struct ast_channel
*chan
, char *cmd
, char *data
,
176 char *buf
, size_t len
)
178 struct ast_speech_result
*result
= NULL
;
179 struct ast_speech
*speech
= find_speech(chan
);
181 if (data
== NULL
|| speech
== NULL
|| !(result
= find_result(speech
->results
, atoi(data
))))
184 if (result
->text
!= NULL
)
185 ast_copy_string(buf
, result
->text
, len
);
190 static struct ast_custom_function speech_text_function
= {
191 .name
= "SPEECH_TEXT",
192 .synopsis
= "Gets the recognized text of a result.",
193 .syntax
= "SPEECH_TEXT(result number)",
195 "Gets the recognized text of a result.\n",
200 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
201 static int speech_grammar(struct ast_channel
*chan
, char *cmd
, char *data
,
202 char *buf
, size_t len
)
204 struct ast_speech_result
*result
= NULL
;
205 struct ast_speech
*speech
= find_speech(chan
);
207 if (data
== NULL
|| speech
== NULL
|| !(result
= find_result(speech
->results
, atoi(data
))))
210 if (result
->grammar
!= NULL
)
211 ast_copy_string(buf
, result
->grammar
, len
);
216 static struct ast_custom_function speech_grammar_function
= {
217 .name
= "SPEECH_GRAMMAR",
218 .synopsis
= "Gets the matched grammar of a result if available.",
219 .syntax
= "SPEECH_GRAMMAR(result number)",
221 "Gets the matched grammar of a result if available.\n",
222 .read
= speech_grammar
,
226 /*! \brief SPEECH_ENGINE() Dialplan Function */
227 static int speech_engine_write(struct ast_channel
*chan
, char *cmd
, char *data
, const char *value
)
229 struct ast_speech
*speech
= find_speech(chan
);
231 if (data
== NULL
|| speech
== NULL
)
234 ast_speech_change(speech
, data
, value
);
239 static struct ast_custom_function speech_engine_function
= {
240 .name
= "SPEECH_ENGINE",
241 .synopsis
= "Change a speech engine specific attribute.",
242 .syntax
= "SPEECH_ENGINE(name)=value",
244 "Changes a speech engine specific attribute.\n",
246 .write
= speech_engine_write
,
249 /*! \brief SPEECH() Dialplan Function */
250 static int speech_read(struct ast_channel
*chan
, char *cmd
, char *data
,
251 char *buf
, size_t len
)
254 struct ast_speech_result
*result
= NULL
;
255 struct ast_speech
*speech
= find_speech(chan
);
258 /* Now go for the various options */
259 if (!strcasecmp(data
, "status")) {
261 ast_copy_string(buf
, "1", len
);
263 ast_copy_string(buf
, "0", len
);
267 /* Make sure we have a speech structure for everything else */
268 if (speech
== NULL
) {
272 /* Check to see if they are checking for silence */
273 if (!strcasecmp(data
, "spoke")) {
274 if (ast_test_flag(speech
, AST_SPEECH_SPOKE
))
275 ast_copy_string(buf
, "1", len
);
277 ast_copy_string(buf
, "0", len
);
278 } else if (!strcasecmp(data
, "results")) {
279 /* Count number of results */
280 result
= speech
->results
;
283 result
= result
->next
;
285 snprintf(tmp
, sizeof(tmp
), "%d", results
);
286 ast_copy_string(buf
, tmp
, len
);
292 static struct ast_custom_function speech_function
= {
294 .synopsis
= "Gets information about speech recognition results.",
295 .syntax
= "SPEECH(argument)",
297 "Gets information about speech recognition results.\n"
298 "status: Returns 1 upon speech object existing, or 0 if not\n"
299 "spoke: Returns 1 if spoker spoke, or 0 if not\n"
300 "results: Returns number of results that were recognized\n",
307 /*! \brief SpeechCreate() Dialplan Application */
308 static int speech_create(struct ast_channel
*chan
, void *data
)
310 struct ast_module_user
*u
= NULL
;
311 struct ast_speech
*speech
= NULL
;
312 struct ast_datastore
*datastore
= NULL
;
314 u
= ast_module_user_add(chan
);
316 /* Request a speech object */
317 speech
= ast_speech_new(data
, AST_FORMAT_SLINEAR
);
318 if (speech
== NULL
) {
320 pbx_builtin_setvar_helper(chan
, "ERROR", "1");
321 ast_module_user_remove(u
);
325 datastore
= ast_channel_datastore_alloc(&speech_datastore
, NULL
);
326 if (datastore
== NULL
) {
327 ast_speech_destroy(speech
);
328 pbx_builtin_setvar_helper(chan
, "ERROR", "1");
329 ast_module_user_remove(u
);
332 datastore
->data
= speech
;
333 ast_channel_datastore_add(chan
, datastore
);
335 ast_module_user_remove(u
);
340 /*! \brief SpeechLoadGrammar(Grammar Name|Path) Dialplan Application */
341 static int speech_load(struct ast_channel
*chan
, void *data
)
343 int res
= 0, argc
= 0;
344 struct ast_module_user
*u
= NULL
;
345 struct ast_speech
*speech
= find_speech(chan
);
346 char *argv
[2], *args
= NULL
, *name
= NULL
, *path
= NULL
;
348 args
= ast_strdupa(data
);
350 u
= ast_module_user_add(chan
);
352 if (speech
== NULL
) {
353 ast_module_user_remove(u
);
357 /* Parse out arguments */
358 argc
= ast_app_separate_args(args
, '|', argv
, sizeof(argv
) / sizeof(argv
[0]));
360 ast_module_user_remove(u
);
366 /* Load the grammar locally on the object */
367 res
= ast_speech_grammar_load(speech
, name
, path
);
369 ast_module_user_remove(u
);
374 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
375 static int speech_unload(struct ast_channel
*chan
, void *data
)
378 struct ast_module_user
*u
= NULL
;
379 struct ast_speech
*speech
= find_speech(chan
);
381 u
= ast_module_user_add(chan
);
383 if (speech
== NULL
) {
384 ast_module_user_remove(u
);
388 /* Unload the grammar */
389 res
= ast_speech_grammar_unload(speech
, data
);
391 ast_module_user_remove(u
);
396 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
397 static int speech_deactivate(struct ast_channel
*chan
, void *data
)
400 struct ast_module_user
*u
= NULL
;
401 struct ast_speech
*speech
= find_speech(chan
);
403 u
= ast_module_user_add(chan
);
405 if (speech
== NULL
) {
406 ast_module_user_remove(u
);
410 /* Deactivate the grammar on the speech object */
411 res
= ast_speech_grammar_deactivate(speech
, data
);
413 ast_module_user_remove(u
);
418 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
419 static int speech_activate(struct ast_channel
*chan
, void *data
)
422 struct ast_module_user
*u
= NULL
;
423 struct ast_speech
*speech
= find_speech(chan
);
425 u
= ast_module_user_add(chan
);
427 if (speech
== NULL
) {
428 ast_module_user_remove(u
);
432 /* Activate the grammar on the speech object */
433 res
= ast_speech_grammar_activate(speech
, data
);
435 ast_module_user_remove(u
);
440 /*! \brief SpeechStart() Dialplan Application */
441 static int speech_start(struct ast_channel
*chan
, void *data
)
444 struct ast_module_user
*u
= NULL
;
445 struct ast_speech
*speech
= find_speech(chan
);
447 u
= ast_module_user_add(chan
);
449 if (speech
== NULL
) {
450 ast_module_user_remove(u
);
454 ast_speech_start(speech
);
456 ast_module_user_remove(u
);
461 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
462 static int speech_processing_sound(struct ast_channel
*chan
, void *data
)
465 struct ast_module_user
*u
= NULL
;
466 struct ast_speech
*speech
= find_speech(chan
);
468 u
= ast_module_user_add(chan
);
470 if (speech
== NULL
) {
471 ast_module_user_remove(u
);
475 if (speech
->processing_sound
!= NULL
) {
476 free(speech
->processing_sound
);
477 speech
->processing_sound
= NULL
;
480 speech
->processing_sound
= strdup(data
);
482 ast_module_user_remove(u
);
487 /*! \brief Helper function used by speech_background to playback a soundfile */
488 static int speech_streamfile(struct ast_channel
*chan
, const char *filename
, const char *preflang
)
490 struct ast_filestream
*fs
;
491 struct ast_filestream
*vfs
=NULL
;
493 fs
= ast_openstream(chan
, filename
, preflang
);
495 vfs
= ast_openvstream(chan
, filename
, preflang
);
497 if (ast_applystream(chan
, fs
))
499 if (vfs
&& ast_applystream(chan
, vfs
))
501 if (ast_playstream(fs
))
503 if (vfs
&& ast_playstream(vfs
))
510 /*! \brief SpeechBackground(Sound File|Timeout) Dialplan Application */
511 static int speech_background(struct ast_channel
*chan
, void *data
)
513 unsigned int timeout
= 0;
514 int res
= 0, done
= 0, argc
= 0, started
= 0;
515 struct ast_module_user
*u
= NULL
;
516 struct ast_speech
*speech
= find_speech(chan
);
517 struct ast_frame
*f
= NULL
;
518 int oldreadformat
= AST_FORMAT_SLINEAR
;
519 char dtmf
[AST_MAX_EXTENSION
] = "";
520 time_t start
, current
;
521 struct ast_datastore
*datastore
= NULL
;
522 char *argv
[2], *args
= NULL
, *filename
= NULL
, tmp
[2] = "";
524 args
= ast_strdupa(data
);
526 u
= ast_module_user_add(chan
);
528 if (speech
== NULL
) {
529 ast_module_user_remove(u
);
533 /* If channel is not already answered, then answer it */
534 if (chan
->_state
!= AST_STATE_UP
&& ast_answer(chan
)) {
535 ast_module_user_remove(u
);
539 /* Record old read format */
540 oldreadformat
= chan
->readformat
;
542 /* Change read format to be signed linear */
543 if (ast_set_read_format(chan
, AST_FORMAT_SLINEAR
)) {
544 ast_module_user_remove(u
);
548 /* Parse out options */
549 argc
= ast_app_separate_args(args
, '|', argv
, sizeof(argv
) / sizeof(argv
[0]));
554 timeout
= atoi(argv
[1]);
557 /* Start streaming the file if possible and specified */
558 if (filename
!= NULL
&& ast_streamfile(chan
, filename
, chan
->language
)) {
559 /* An error occured while streaming */
560 ast_set_read_format(chan
, oldreadformat
);
561 ast_module_user_remove(u
);
565 /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
566 if (speech
->state
== AST_SPEECH_STATE_NOT_READY
|| speech
->state
== AST_SPEECH_STATE_DONE
) {
567 ast_speech_change_state(speech
, AST_SPEECH_STATE_NOT_READY
);
568 ast_speech_start(speech
);
571 /* Okay it's streaming so go into a loop grabbing frames! */
573 /* Run scheduled stuff */
574 ast_sched_runq(chan
->sched
);
577 res
= ast_sched_wait(chan
->sched
);
582 /* If there is a frame waiting, get it - if not - oh well */
583 if (ast_waitfor(chan
, res
) > 0) {
586 /* The channel has hung up most likely */
592 /* Do timeout check (shared between audio/dtmf) */
595 if ((current
-start
) >= timeout
) {
603 /* Do checks on speech structure to see if it's changed */
604 ast_mutex_lock(&speech
->lock
);
605 if (ast_test_flag(speech
, AST_SPEECH_QUIET
) && chan
->stream
!= NULL
) {
606 ast_stopstream(chan
);
607 ast_clear_flag(speech
, AST_SPEECH_QUIET
);
609 /* Check state so we can see what to do */
610 switch (speech
->state
) {
611 case AST_SPEECH_STATE_READY
:
612 /* If audio playback has stopped do a check for timeout purposes */
613 if (chan
->streamid
== -1 && chan
->timingfunc
== NULL
)
614 ast_stopstream(chan
);
615 if (chan
->stream
== NULL
&& timeout
> 0 && started
== 0) {
619 /* Deal with audio frames if present */
620 if (f
!= NULL
&& f
->frametype
== AST_FRAME_VOICE
) {
621 ast_speech_write(speech
, f
->data
, f
->datalen
);
624 case AST_SPEECH_STATE_WAIT
:
625 /* Cue up waiting sound if not already playing */
626 if (chan
->stream
== NULL
) {
627 if (speech
->processing_sound
!= NULL
) {
628 if (strlen(speech
->processing_sound
) > 0 && strcasecmp(speech
->processing_sound
,"none")) {
629 speech_streamfile(chan
, speech
->processing_sound
, chan
->language
);
632 } else if (chan
->streamid
== -1 && chan
->timingfunc
== NULL
) {
633 ast_stopstream(chan
);
634 if (speech
->processing_sound
!= NULL
) {
635 if (strlen(speech
->processing_sound
) > 0 && strcasecmp(speech
->processing_sound
,"none")) {
636 speech_streamfile(chan
, speech
->processing_sound
, chan
->language
);
641 case AST_SPEECH_STATE_DONE
:
642 /* Copy to speech structure the results, if available */
643 speech
->results
= ast_speech_results_get(speech
);
644 /* Now that we are done... let's switch back to not ready state */
645 ast_speech_change_state(speech
, AST_SPEECH_STATE_NOT_READY
);
646 /* Break out of our background too */
648 /* Stop audio playback */
649 if (chan
->stream
!= NULL
) {
650 ast_stopstream(chan
);
656 ast_mutex_unlock(&speech
->lock
);
658 /* Deal with other frame types */
660 /* Free the frame we received */
661 switch (f
->frametype
) {
663 if (f
->subclass
== '#') {
666 if (chan
->stream
!= NULL
) {
667 ast_stopstream(chan
);
668 /* Change timeout to be 5 seconds for DTMF input */
673 snprintf(tmp
, sizeof(tmp
), "%c", f
->subclass
);
674 strncat(dtmf
, tmp
, sizeof(dtmf
));
677 case AST_FRAME_CONTROL
:
678 switch (f
->subclass
) {
679 case AST_CONTROL_HANGUP
:
680 /* Since they hung up we should destroy the speech structure */
693 if (strlen(dtmf
) > 0 && speech
->results
== NULL
) {
694 /* We sort of make a results entry */
695 speech
->results
= ast_calloc(1, sizeof(*speech
->results
));
696 if (speech
->results
!= NULL
) {
697 speech
->results
->score
= 1000;
698 speech
->results
->text
= strdup(dtmf
);
699 speech
->results
->grammar
= strdup("dtmf");
703 /* See if it was because they hung up */
705 /* Destroy speech structure */
706 ast_speech_destroy(speech
);
707 datastore
= ast_channel_datastore_find(chan
, &speech_datastore
, NULL
);
708 if (datastore
!= NULL
) {
709 ast_channel_datastore_remove(chan
, datastore
);
712 /* Channel is okay so restore read format */
713 ast_set_read_format(chan
, oldreadformat
);
716 ast_module_user_remove(u
);
722 /*! \brief SpeechDestroy() Dialplan Application */
723 static int speech_destroy(struct ast_channel
*chan
, void *data
)
726 struct ast_module_user
*u
= NULL
;
727 struct ast_speech
*speech
= find_speech(chan
);
728 struct ast_datastore
*datastore
= NULL
;
730 u
= ast_module_user_add(chan
);
732 if (speech
== NULL
) {
733 ast_module_user_remove(u
);
737 /* Destroy speech structure */
738 ast_speech_destroy(speech
);
740 datastore
= ast_channel_datastore_find(chan
, &speech_datastore
, NULL
);
741 if (datastore
!= NULL
) {
742 ast_channel_datastore_remove(chan
, datastore
);
745 ast_module_user_remove(u
);
750 static int unload_module(void)
754 res
= ast_unregister_application("SpeechCreate");
755 res
|= ast_unregister_application("SpeechLoadGrammar");
756 res
|= ast_unregister_application("SpeechUnloadGrammar");
757 res
|= ast_unregister_application("SpeechActivateGrammar");
758 res
|= ast_unregister_application("SpeechDeactivateGrammar");
759 res
|= ast_unregister_application("SpeechStart");
760 res
|= ast_unregister_application("SpeechBackground");
761 res
|= ast_unregister_application("SpeechDestroy");
762 res
|= ast_unregister_application("SpeechProcessingSound");
763 res
|= ast_custom_function_unregister(&speech_function
);
764 res
|= ast_custom_function_unregister(&speech_score_function
);
765 res
|= ast_custom_function_unregister(&speech_text_function
);
766 res
|= ast_custom_function_unregister(&speech_grammar_function
);
767 res
|= ast_custom_function_unregister(&speech_engine_function
);
769 ast_module_user_hangup_all();
774 static int load_module(void)
778 res
= ast_register_application("SpeechCreate", speech_create
, "Create a Speech Structure", speechcreate_descrip
);
779 res
|= ast_register_application("SpeechLoadGrammar", speech_load
, "Load a Grammar", speechload_descrip
);
780 res
|= ast_register_application("SpeechUnloadGrammar", speech_unload
, "Unload a Grammar", speechunload_descrip
);
781 res
|= ast_register_application("SpeechActivateGrammar", speech_activate
, "Activate a Grammar", speechactivategrammar_descrip
);
782 res
|= ast_register_application("SpeechDeactivateGrammar", speech_deactivate
, "Deactivate a Grammar", speechdeactivategrammar_descrip
);
783 res
|= ast_register_application("SpeechStart", speech_start
, "Start recognizing voice in the audio stream", speechstart_descrip
);
784 res
|= ast_register_application("SpeechBackground", speech_background
, "Play a sound file and wait for speech to be recognized", speechbackground_descrip
);
785 res
|= ast_register_application("SpeechDestroy", speech_destroy
, "End speech recognition", speechdestroy_descrip
);
786 res
|= ast_register_application("SpeechProcessingSound", speech_processing_sound
, "Change background processing sound", speechprocessingsound_descrip
);
787 res
|= ast_custom_function_register(&speech_function
);
788 res
|= ast_custom_function_register(&speech_score_function
);
789 res
|= ast_custom_function_register(&speech_text_function
);
790 res
|= ast_custom_function_register(&speech_grammar_function
);
791 res
|= ast_custom_function_register(&speech_engine_function
);
796 AST_MODULE_INFO_STANDARD(ASTERISK_GPL_KEY
, "Dialplan Speech Applications");