if --with-foo=<path> is specific for a configure option, ensure that it is used for...
[asterisk-bristuff.git] / codecs / codec_speex.c
blob6978184ae0df557730a06530060ba565572c88e1
1 /*
2 * Asterisk -- An open source telephony toolkit.
4 * Copyright (C) 1999 - 2005, Digium, Inc.
6 * Mark Spencer <markster@digium.com>
9 * See http://www.asterisk.org for more information about
10 * the Asterisk project. Please do not directly contact
11 * any of the maintainers of this project for assistance;
12 * the project provides a web site, mailing lists and IRC
13 * channels for your use.
15 * This program is free software, distributed under the terms of
16 * the GNU General Public License Version 2. See the LICENSE file
17 * at the top of the source tree.
20 /*! \file
22 * \brief Translate between signed linear and Speex (Open Codec)
24 * http://www.speex.org
25 * \note This work was motivated by Jeremy McNamara
26 * hacked to be configurable by anthm and bkw 9/28/2004
27 * \ingroup codecs
30 /*** MODULEINFO
31 <depend>speex</depend>
32 ***/
34 #include "asterisk.h"
36 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
38 #include <fcntl.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <netinet/in.h>
42 #include <string.h>
43 #include <stdio.h>
44 #include <speex/speex.h>
46 /* We require a post 1.1.8 version of Speex to enable preprocessing
47 and better type handling */
48 #ifdef _SPEEX_TYPES_H
49 #include <speex/speex_preprocess.h>
50 #endif
52 #include "asterisk/lock.h"
53 #include "asterisk/translate.h"
54 #include "asterisk/module.h"
55 #include "asterisk/config.h"
56 #include "asterisk/options.h"
57 #include "asterisk/logger.h"
58 #include "asterisk/channel.h"
59 #include "asterisk/utils.h"
61 /* Sample frame data */
62 #include "slin_speex_ex.h"
63 #include "speex_slin_ex.h"
65 /* codec variables */
66 static int quality = 3;
67 static int complexity = 2;
68 static int enhancement = 0;
69 static int vad = 0;
70 static int vbr = 0;
71 static float vbr_quality = 4;
72 static int abr = 0;
73 static int dtx = 0; /* set to 1 to enable silence detection */
75 static int preproc = 0;
76 static int pp_vad = 0;
77 static int pp_agc = 0;
78 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
79 static int pp_denoise = 0;
80 static int pp_dereverb = 0;
81 static float pp_dereverb_decay = 0.4;
82 static float pp_dereverb_level = 0.3;
84 #define TYPE_SILENCE 0x2
85 #define TYPE_HIGH 0x0
86 #define TYPE_LOW 0x1
87 #define TYPE_MASK 0x3
89 #define BUFFER_SAMPLES 8000
90 #define SPEEX_SAMPLES 160
92 struct speex_coder_pvt {
93 void *speex;
94 SpeexBits bits;
95 int framesize;
96 int silent_state;
97 #ifdef _SPEEX_TYPES_H
98 SpeexPreprocessState *pp;
99 spx_int16_t buf[BUFFER_SAMPLES];
100 #else
101 int16_t buf[BUFFER_SAMPLES]; /* input, waiting to be compressed */
102 #endif
106 static int lintospeex_new(struct ast_trans_pvt *pvt)
108 struct speex_coder_pvt *tmp = pvt->pvt;
110 if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
111 return -1;
113 speex_bits_init(&tmp->bits);
114 speex_bits_reset(&tmp->bits);
115 speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
116 speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
117 #ifdef _SPEEX_TYPES_H
118 if (preproc) {
119 tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
120 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
121 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
122 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
123 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
124 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
125 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
126 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
128 #endif
129 if (!abr && !vbr) {
130 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
131 if (vad)
132 speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
134 if (vbr) {
135 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
136 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
138 if (abr)
139 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
140 if (dtx)
141 speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
142 tmp->silent_state = 0;
144 return 0;
147 static int speextolin_new(struct ast_trans_pvt *pvt)
149 struct speex_coder_pvt *tmp = pvt->pvt;
151 if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
152 return -1;
154 speex_bits_init(&tmp->bits);
155 speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
156 if (enhancement)
157 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
159 return 0;
162 static struct ast_frame *lintospeex_sample(void)
164 static struct ast_frame f;
165 f.frametype = AST_FRAME_VOICE;
166 f.subclass = AST_FORMAT_SLINEAR;
167 f.datalen = sizeof(slin_speex_ex);
168 /* Assume 8000 Hz */
169 f.samples = sizeof(slin_speex_ex)/2;
170 f.mallocd = 0;
171 f.offset = 0;
172 f.src = __PRETTY_FUNCTION__;
173 f.data = slin_speex_ex;
174 return &f;
177 static struct ast_frame *speextolin_sample(void)
179 static struct ast_frame f;
180 f.frametype = AST_FRAME_VOICE;
181 f.subclass = AST_FORMAT_SPEEX;
182 f.datalen = sizeof(speex_slin_ex);
183 /* All frames are 20 ms long */
184 f.samples = SPEEX_SAMPLES;
185 f.mallocd = 0;
186 f.offset = 0;
187 f.src = __PRETTY_FUNCTION__;
188 f.data = speex_slin_ex;
189 return &f;
192 /*! \brief convert and store into outbuf */
193 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
195 struct speex_coder_pvt *tmp = pvt->pvt;
197 /* Assuming there's space left, decode into the current buffer at
198 the tail location. Read in as many frames as there are */
199 int x;
200 int res;
201 int16_t *dst = (int16_t *)pvt->outbuf;
202 /* XXX fout is a temporary buffer, may have different types */
203 #ifdef _SPEEX_TYPES_H
204 spx_int16_t fout[1024];
205 #else
206 float fout[1024];
207 #endif
209 if (f->datalen == 0) { /* Native PLC interpolation */
210 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
211 ast_log(LOG_WARNING, "Out of buffer space\n");
212 return -1;
214 #ifdef _SPEEX_TYPES_H
215 speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
216 #else
217 speex_decode(tmp->speex, NULL, fout);
218 for (x=0;x<tmp->framesize;x++) {
219 dst[pvt->samples + x] = (int16_t)fout[x];
221 #endif
222 pvt->samples += tmp->framesize;
223 return 0;
226 /* Read in bits */
227 speex_bits_read_from(&tmp->bits, f->data, f->datalen);
228 for (;;) {
229 #ifdef _SPEEX_TYPES_H
230 res = speex_decode_int(tmp->speex, &tmp->bits, fout);
231 #else
232 res = speex_decode(tmp->speex, &tmp->bits, fout);
233 #endif
234 if (res < 0)
235 break;
236 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
237 ast_log(LOG_WARNING, "Out of buffer space\n");
238 return -1;
240 for (x = 0 ; x < tmp->framesize; x++)
241 dst[pvt->samples + x] = (int16_t)fout[x];
242 pvt->samples += tmp->framesize;
243 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
245 return 0;
248 /*! \brief store input frame in work buffer */
249 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
251 struct speex_coder_pvt *tmp = pvt->pvt;
253 /* XXX We should look at how old the rest of our stream is, and if it
254 is too old, then we should overwrite it entirely, otherwise we can
255 get artifacts of earlier talk that do not belong */
256 memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
257 pvt->samples += f->samples;
258 return 0;
261 /*! \brief convert work buffer and produce output frame */
262 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
264 struct speex_coder_pvt *tmp = pvt->pvt;
265 int is_speech=1;
266 int datalen = 0; /* output bytes */
267 int samples = 0; /* output samples */
269 /* We can't work on anything less than a frame in size */
270 if (pvt->samples < tmp->framesize)
271 return NULL;
272 speex_bits_reset(&tmp->bits);
273 while (pvt->samples >= tmp->framesize) {
274 #ifdef _SPEEX_TYPES_H
275 /* Preprocess audio */
276 if (preproc)
277 is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
278 /* Encode a frame of data */
279 if (is_speech) {
280 /* If DTX enabled speex_encode returns 0 during silence */
281 is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
282 } else {
283 /* 5 zeros interpreted by Speex as silence (submode 0) */
284 speex_bits_pack(&tmp->bits, 0, 5);
286 #else
288 float fbuf[1024];
289 int x;
290 /* Convert to floating point */
291 for (x = 0; x < tmp->framesize; x++)
292 fbuf[x] = tmp->buf[samples + x];
293 /* Encode a frame of data */
294 is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
296 #endif
297 samples += tmp->framesize;
298 pvt->samples -= tmp->framesize;
301 /* Move the data at the end of the buffer to the front */
302 if (pvt->samples)
303 memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
305 /* Use AST_FRAME_CNG to signify the start of any silence period */
306 if (is_speech) {
307 tmp->silent_state = 0;
308 } else {
309 if (tmp->silent_state) {
310 return NULL;
311 } else {
312 tmp->silent_state = 1;
313 speex_bits_reset(&tmp->bits);
314 memset(&pvt->f, 0, sizeof(pvt->f));
315 pvt->f.frametype = AST_FRAME_CNG;
316 pvt->f.samples = samples;
317 /* XXX what now ? format etc... */
321 /* Terminate bit stream */
322 speex_bits_pack(&tmp->bits, 15, 5);
323 datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
324 return ast_trans_frameout(pvt, datalen, samples);
327 static void speextolin_destroy(struct ast_trans_pvt *arg)
329 struct speex_coder_pvt *pvt = arg->pvt;
331 speex_decoder_destroy(pvt->speex);
332 speex_bits_destroy(&pvt->bits);
335 static void lintospeex_destroy(struct ast_trans_pvt *arg)
337 struct speex_coder_pvt *pvt = arg->pvt;
338 #ifdef _SPEEX_TYPES_H
339 if (preproc)
340 speex_preprocess_state_destroy(pvt->pp);
341 #endif
342 speex_encoder_destroy(pvt->speex);
343 speex_bits_destroy(&pvt->bits);
346 static struct ast_translator speextolin = {
347 .name = "speextolin",
348 .srcfmt = AST_FORMAT_SPEEX,
349 .dstfmt = AST_FORMAT_SLINEAR,
350 .newpvt = speextolin_new,
351 .framein = speextolin_framein,
352 .destroy = speextolin_destroy,
353 .sample = speextolin_sample,
354 .desc_size = sizeof(struct speex_coder_pvt),
355 .buffer_samples = BUFFER_SAMPLES,
356 .buf_size = BUFFER_SAMPLES * 2,
359 static struct ast_translator lintospeex = {
360 .name = "lintospeex",
361 .srcfmt = AST_FORMAT_SLINEAR,
362 .dstfmt = AST_FORMAT_SPEEX,
363 .newpvt = lintospeex_new,
364 .framein = lintospeex_framein,
365 .frameout = lintospeex_frameout,
366 .destroy = lintospeex_destroy,
367 .sample = lintospeex_sample,
368 .desc_size = sizeof(struct speex_coder_pvt),
369 .buffer_samples = BUFFER_SAMPLES,
370 .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
373 static void parse_config(void)
375 struct ast_config *cfg = ast_config_load("codecs.conf");
376 struct ast_variable *var;
377 int res;
378 float res_f;
380 if (cfg == NULL)
381 return;
383 for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
384 if (!strcasecmp(var->name, "quality")) {
385 res = abs(atoi(var->value));
386 if (res > -1 && res < 11) {
387 if (option_verbose > 2)
388 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
389 quality = res;
390 } else
391 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
392 } else if (!strcasecmp(var->name, "complexity")) {
393 res = abs(atoi(var->value));
394 if (res > -1 && res < 11) {
395 if (option_verbose > 2)
396 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
397 complexity = res;
398 } else
399 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
400 } else if (!strcasecmp(var->name, "vbr_quality")) {
401 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
402 if (option_verbose > 2)
403 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
404 vbr_quality = res_f;
405 } else
406 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
407 } else if (!strcasecmp(var->name, "abr_quality")) {
408 ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
409 } else if (!strcasecmp(var->name, "enhancement")) {
410 enhancement = ast_true(var->value) ? 1 : 0;
411 if (option_verbose > 2)
412 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
413 } else if (!strcasecmp(var->name, "vbr")) {
414 vbr = ast_true(var->value) ? 1 : 0;
415 if (option_verbose > 2)
416 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
417 } else if (!strcasecmp(var->name, "abr")) {
418 res = abs(atoi(var->value));
419 if (res >= 0) {
420 if (option_verbose > 2) {
421 if (res > 0)
422 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
423 else
424 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
426 abr = res;
427 } else
428 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
429 } else if (!strcasecmp(var->name, "vad")) {
430 vad = ast_true(var->value) ? 1 : 0;
431 if (option_verbose > 2)
432 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
433 } else if (!strcasecmp(var->name, "dtx")) {
434 dtx = ast_true(var->value) ? 1 : 0;
435 if (option_verbose > 2)
436 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
437 } else if (!strcasecmp(var->name, "preprocess")) {
438 preproc = ast_true(var->value) ? 1 : 0;
439 if (option_verbose > 2)
440 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
441 } else if (!strcasecmp(var->name, "pp_vad")) {
442 pp_vad = ast_true(var->value) ? 1 : 0;
443 if (option_verbose > 2)
444 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
445 } else if (!strcasecmp(var->name, "pp_agc")) {
446 pp_agc = ast_true(var->value) ? 1 : 0;
447 if (option_verbose > 2)
448 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
449 } else if (!strcasecmp(var->name, "pp_agc_level")) {
450 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
451 if (option_verbose > 2)
452 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
453 pp_agc_level = res_f;
454 } else
455 ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
456 } else if (!strcasecmp(var->name, "pp_denoise")) {
457 pp_denoise = ast_true(var->value) ? 1 : 0;
458 if (option_verbose > 2)
459 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
460 } else if (!strcasecmp(var->name, "pp_dereverb")) {
461 pp_dereverb = ast_true(var->value) ? 1 : 0;
462 if (option_verbose > 2)
463 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
464 } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
465 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
466 if (option_verbose > 2)
467 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
468 pp_dereverb_decay = res_f;
469 } else
470 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
471 } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
472 if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
473 if (option_verbose > 2)
474 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
475 pp_dereverb_level = res_f;
476 } else
477 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
480 ast_config_destroy(cfg);
483 static int reload(void)
485 parse_config();
487 return 0;
490 static int unload_module(void)
492 int res;
494 res = ast_unregister_translator(&lintospeex);
495 res |= ast_unregister_translator(&speextolin);
497 return res;
500 static int load_module(void)
502 int res;
504 parse_config();
505 res=ast_register_translator(&speextolin);
506 if (!res)
507 res=ast_register_translator(&lintospeex);
508 else
509 ast_unregister_translator(&speextolin);
511 return res;
514 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
515 .load = load_module,
516 .unload = unload_module,
517 .reload = reload,