Merge another change from team/russell/events
[asterisk-bristuff.git] / codecs / codec_speex.c
blobb9f87953513f0121bb4e1afd7770f89c6a4e9fd3
1 /*
2 * Asterisk -- An open source telephony toolkit.
4 * Copyright (C) 1999 - 2005, Digium, Inc.
6 * Mark Spencer <markster@digium.com>
9 * See http://www.asterisk.org for more information about
10 * the Asterisk project. Please do not directly contact
11 * any of the maintainers of this project for assistance;
12 * the project provides a web site, mailing lists and IRC
13 * channels for your use.
15 * This program is free software, distributed under the terms of
16 * the GNU General Public License Version 2. See the LICENSE file
17 * at the top of the source tree.
20 /*! \file
22 * \brief Translate between signed linear and Speex (Open Codec)
24 * \note This work was motivated by Jeremy McNamara
25 * hacked to be configurable by anthm and bkw 9/28/2004
27 * \ingroup codecs
29 * \extref The Speex library - http://www.speex.org
33 /*** MODULEINFO
34 <depend>speex</depend>
35 <depend>speex_preprocess</depend>
36 <use>speexdsp</use>
37 ***/
39 #include "asterisk.h"
41 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
43 #include <speex/speex.h>
45 /* We require a post 1.1.8 version of Speex to enable preprocessing
46 and better type handling */
47 #ifdef _SPEEX_TYPES_H
48 #include <speex/speex_preprocess.h>
49 #endif
51 #include "asterisk/translate.h"
52 #include "asterisk/module.h"
53 #include "asterisk/config.h"
54 #include "asterisk/utils.h"
56 /* Sample frame data */
57 #include "slin_speex_ex.h"
58 #include "speex_slin_ex.h"
60 /* codec variables */
61 static int quality = 3;
62 static int complexity = 2;
63 static int enhancement = 0;
64 static int vad = 0;
65 static int vbr = 0;
66 static float vbr_quality = 4;
67 static int abr = 0;
68 static int dtx = 0; /* set to 1 to enable silence detection */
70 static int preproc = 0;
71 static int pp_vad = 0;
72 static int pp_agc = 0;
73 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
74 static int pp_denoise = 0;
75 static int pp_dereverb = 0;
76 static float pp_dereverb_decay = 0.4;
77 static float pp_dereverb_level = 0.3;
79 #define TYPE_SILENCE 0x2
80 #define TYPE_HIGH 0x0
81 #define TYPE_LOW 0x1
82 #define TYPE_MASK 0x3
84 #define BUFFER_SAMPLES 8000
85 #define SPEEX_SAMPLES 160
87 struct speex_coder_pvt {
88 void *speex;
89 SpeexBits bits;
90 int framesize;
91 int silent_state;
92 #ifdef _SPEEX_TYPES_H
93 SpeexPreprocessState *pp;
94 spx_int16_t buf[BUFFER_SAMPLES];
95 #else
96 int16_t buf[BUFFER_SAMPLES]; /* input, waiting to be compressed */
97 #endif
101 static int lintospeex_new(struct ast_trans_pvt *pvt)
103 struct speex_coder_pvt *tmp = pvt->pvt;
105 if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
106 return -1;
108 speex_bits_init(&tmp->bits);
109 speex_bits_reset(&tmp->bits);
110 speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
111 speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
112 #ifdef _SPEEX_TYPES_H
113 if (preproc) {
114 tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
115 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
116 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
117 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
118 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
119 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
120 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
121 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
123 #endif
124 if (!abr && !vbr) {
125 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
126 if (vad)
127 speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
129 if (vbr) {
130 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
131 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
133 if (abr)
134 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
135 if (dtx)
136 speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
137 tmp->silent_state = 0;
139 return 0;
142 static int speextolin_new(struct ast_trans_pvt *pvt)
144 struct speex_coder_pvt *tmp = pvt->pvt;
146 if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
147 return -1;
149 speex_bits_init(&tmp->bits);
150 speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
151 if (enhancement)
152 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
154 return 0;
157 static struct ast_frame *lintospeex_sample(void)
159 static struct ast_frame f;
160 f.frametype = AST_FRAME_VOICE;
161 f.subclass = AST_FORMAT_SLINEAR;
162 f.datalen = sizeof(slin_speex_ex);
163 /* Assume 8000 Hz */
164 f.samples = sizeof(slin_speex_ex)/2;
165 f.mallocd = 0;
166 f.offset = 0;
167 f.src = __PRETTY_FUNCTION__;
168 f.data.ptr = slin_speex_ex;
169 return &f;
172 static struct ast_frame *speextolin_sample(void)
174 static struct ast_frame f;
175 f.frametype = AST_FRAME_VOICE;
176 f.subclass = AST_FORMAT_SPEEX;
177 f.datalen = sizeof(speex_slin_ex);
178 /* All frames are 20 ms long */
179 f.samples = SPEEX_SAMPLES;
180 f.mallocd = 0;
181 f.offset = 0;
182 f.src = __PRETTY_FUNCTION__;
183 f.data.ptr = speex_slin_ex;
184 return &f;
187 /*! \brief convert and store into outbuf */
188 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
190 struct speex_coder_pvt *tmp = pvt->pvt;
192 /* Assuming there's space left, decode into the current buffer at
193 the tail location. Read in as many frames as there are */
194 int x;
195 int res;
196 int16_t *dst = (int16_t *)pvt->outbuf;
197 /* XXX fout is a temporary buffer, may have different types */
198 #ifdef _SPEEX_TYPES_H
199 spx_int16_t fout[1024];
200 #else
201 float fout[1024];
202 #endif
204 if (f->datalen == 0) { /* Native PLC interpolation */
205 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
206 ast_log(LOG_WARNING, "Out of buffer space\n");
207 return -1;
209 #ifdef _SPEEX_TYPES_H
210 speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
211 #else
212 speex_decode(tmp->speex, NULL, fout);
213 for (x=0;x<tmp->framesize;x++) {
214 dst[pvt->samples + x] = (int16_t)fout[x];
216 #endif
217 pvt->samples += tmp->framesize;
218 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
219 return 0;
222 /* Read in bits */
223 speex_bits_read_from(&tmp->bits, f->data.ptr, f->datalen);
224 for (;;) {
225 #ifdef _SPEEX_TYPES_H
226 res = speex_decode_int(tmp->speex, &tmp->bits, fout);
227 #else
228 res = speex_decode(tmp->speex, &tmp->bits, fout);
229 #endif
230 if (res < 0)
231 break;
232 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
233 ast_log(LOG_WARNING, "Out of buffer space\n");
234 return -1;
236 for (x = 0 ; x < tmp->framesize; x++)
237 dst[pvt->samples + x] = (int16_t)fout[x];
238 pvt->samples += tmp->framesize;
239 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
241 return 0;
244 /*! \brief store input frame in work buffer */
245 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
247 struct speex_coder_pvt *tmp = pvt->pvt;
249 /* XXX We should look at how old the rest of our stream is, and if it
250 is too old, then we should overwrite it entirely, otherwise we can
251 get artifacts of earlier talk that do not belong */
252 memcpy(tmp->buf + pvt->samples, f->data.ptr, f->datalen);
253 pvt->samples += f->samples;
254 return 0;
257 /*! \brief convert work buffer and produce output frame */
258 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
260 struct speex_coder_pvt *tmp = pvt->pvt;
261 int is_speech=1;
262 int datalen = 0; /* output bytes */
263 int samples = 0; /* output samples */
265 /* We can't work on anything less than a frame in size */
266 if (pvt->samples < tmp->framesize)
267 return NULL;
268 speex_bits_reset(&tmp->bits);
269 while (pvt->samples >= tmp->framesize) {
270 #ifdef _SPEEX_TYPES_H
271 /* Preprocess audio */
272 if (preproc)
273 is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
274 /* Encode a frame of data */
275 if (is_speech) {
276 /* If DTX enabled speex_encode returns 0 during silence */
277 is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
278 } else {
279 /* 5 zeros interpreted by Speex as silence (submode 0) */
280 speex_bits_pack(&tmp->bits, 0, 5);
282 #else
284 float fbuf[1024];
285 int x;
286 /* Convert to floating point */
287 for (x = 0; x < tmp->framesize; x++)
288 fbuf[x] = tmp->buf[samples + x];
289 /* Encode a frame of data */
290 is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
292 #endif
293 samples += tmp->framesize;
294 pvt->samples -= tmp->framesize;
297 /* Move the data at the end of the buffer to the front */
298 if (pvt->samples)
299 memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
301 /* Use AST_FRAME_CNG to signify the start of any silence period */
302 if (is_speech) {
303 tmp->silent_state = 0;
304 } else {
305 if (tmp->silent_state) {
306 return NULL;
307 } else {
308 tmp->silent_state = 1;
309 speex_bits_reset(&tmp->bits);
310 memset(&pvt->f, 0, sizeof(pvt->f));
311 pvt->f.frametype = AST_FRAME_CNG;
312 pvt->f.samples = samples;
313 /* XXX what now ? format etc... */
317 /* Terminate bit stream */
318 speex_bits_pack(&tmp->bits, 15, 5);
319 datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
320 return ast_trans_frameout(pvt, datalen, samples);
323 static void speextolin_destroy(struct ast_trans_pvt *arg)
325 struct speex_coder_pvt *pvt = arg->pvt;
327 speex_decoder_destroy(pvt->speex);
328 speex_bits_destroy(&pvt->bits);
331 static void lintospeex_destroy(struct ast_trans_pvt *arg)
333 struct speex_coder_pvt *pvt = arg->pvt;
334 #ifdef _SPEEX_TYPES_H
335 if (preproc)
336 speex_preprocess_state_destroy(pvt->pp);
337 #endif
338 speex_encoder_destroy(pvt->speex);
339 speex_bits_destroy(&pvt->bits);
342 static struct ast_translator speextolin = {
343 .name = "speextolin",
344 .srcfmt = AST_FORMAT_SPEEX,
345 .dstfmt = AST_FORMAT_SLINEAR,
346 .newpvt = speextolin_new,
347 .framein = speextolin_framein,
348 .destroy = speextolin_destroy,
349 .sample = speextolin_sample,
350 .desc_size = sizeof(struct speex_coder_pvt),
351 .buffer_samples = BUFFER_SAMPLES,
352 .buf_size = BUFFER_SAMPLES * 2,
353 .native_plc = 1,
356 static struct ast_translator lintospeex = {
357 .name = "lintospeex",
358 .srcfmt = AST_FORMAT_SLINEAR,
359 .dstfmt = AST_FORMAT_SPEEX,
360 .newpvt = lintospeex_new,
361 .framein = lintospeex_framein,
362 .frameout = lintospeex_frameout,
363 .destroy = lintospeex_destroy,
364 .sample = lintospeex_sample,
365 .desc_size = sizeof(struct speex_coder_pvt),
366 .buffer_samples = BUFFER_SAMPLES,
367 .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
370 static int parse_config(int reload)
372 struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
373 struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
374 struct ast_variable *var;
375 int res;
376 float res_f;
378 if (cfg == NULL)
379 return 0;
380 if (cfg == CONFIG_STATUS_FILEUNCHANGED)
381 return 0;
383 for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
384 if (!strcasecmp(var->name, "quality")) {
385 res = abs(atoi(var->value));
386 if (res > -1 && res < 11) {
387 ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
388 quality = res;
389 } else
390 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
391 } else if (!strcasecmp(var->name, "complexity")) {
392 res = abs(atoi(var->value));
393 if (res > -1 && res < 11) {
394 ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
395 complexity = res;
396 } else
397 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
398 } else if (!strcasecmp(var->name, "vbr_quality")) {
399 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
400 ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
401 vbr_quality = res_f;
402 } else
403 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
404 } else if (!strcasecmp(var->name, "abr_quality")) {
405 ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
406 } else if (!strcasecmp(var->name, "enhancement")) {
407 enhancement = ast_true(var->value) ? 1 : 0;
408 ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
409 } else if (!strcasecmp(var->name, "vbr")) {
410 vbr = ast_true(var->value) ? 1 : 0;
411 ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
412 } else if (!strcasecmp(var->name, "abr")) {
413 res = abs(atoi(var->value));
414 if (res >= 0) {
415 if (res > 0)
416 ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
417 else
418 ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
419 abr = res;
420 } else
421 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
422 } else if (!strcasecmp(var->name, "vad")) {
423 vad = ast_true(var->value) ? 1 : 0;
424 ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
425 } else if (!strcasecmp(var->name, "dtx")) {
426 dtx = ast_true(var->value) ? 1 : 0;
427 ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
428 } else if (!strcasecmp(var->name, "preprocess")) {
429 preproc = ast_true(var->value) ? 1 : 0;
430 ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
431 } else if (!strcasecmp(var->name, "pp_vad")) {
432 pp_vad = ast_true(var->value) ? 1 : 0;
433 ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
434 } else if (!strcasecmp(var->name, "pp_agc")) {
435 pp_agc = ast_true(var->value) ? 1 : 0;
436 ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
437 } else if (!strcasecmp(var->name, "pp_agc_level")) {
438 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
439 ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
440 pp_agc_level = res_f;
441 } else
442 ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
443 } else if (!strcasecmp(var->name, "pp_denoise")) {
444 pp_denoise = ast_true(var->value) ? 1 : 0;
445 ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
446 } else if (!strcasecmp(var->name, "pp_dereverb")) {
447 pp_dereverb = ast_true(var->value) ? 1 : 0;
448 ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
449 } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
450 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
451 ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
452 pp_dereverb_decay = res_f;
453 } else
454 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
455 } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
456 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
457 ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
458 pp_dereverb_level = res_f;
459 } else
460 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
463 ast_config_destroy(cfg);
464 return 0;
467 static int reload(void)
469 if (parse_config(1))
470 return AST_MODULE_LOAD_DECLINE;
471 return AST_MODULE_LOAD_SUCCESS;
474 static int unload_module(void)
476 int res;
478 res = ast_unregister_translator(&lintospeex);
479 res |= ast_unregister_translator(&speextolin);
481 return res;
484 static int load_module(void)
486 int res;
488 if (parse_config(0))
489 return AST_MODULE_LOAD_DECLINE;
490 res=ast_register_translator(&speextolin);
491 if (!res)
492 res=ast_register_translator(&lintospeex);
493 else
494 ast_unregister_translator(&speextolin);
495 if (res)
496 return AST_MODULE_LOAD_FAILURE;
497 return AST_MODULE_LOAD_SUCCESS;
500 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
501 .load = load_module,
502 .unload = unload_module,
503 .reload = reload,