codecs/codec_speex.c

   1 /*
   2  * Asterisk -- An open source telephony toolkit.
   3  *
   4  * Copyright (C) 1999 - 2005, Digium, Inc.
   5  *
   6  * Mark Spencer <markster@digium.com>
   7  *
   8  *
   9  * See http://www.asterisk.org for more information about
  10  * the Asterisk project. Please do not directly contact
  11  * any of the maintainers of this project for assistance;
  12  * the project provides a web site, mailing lists and IRC
  13  * channels for your use.
  14  *
  15  * This program is free software, distributed under the terms of
  16  * the GNU General Public License Version 2. See the LICENSE file
  17  * at the top of the source tree.
  18  */
  19
  20 /*! \file
  21  *
  22  * \brief Translate between signed linear and Speex (Open Codec)
  23  *
  24  * http://www.speex.org
  25  * \note This work was motivated by Jeremy McNamara
  26  * hacked to be configurable by anthm and bkw 9/28/2004
  27  * \ingroup codecs
  28  */
  29
  30 /*** MODULEINFO
  31         <depend>speex</depend>
  32         <depend>speex_preprocess</depend>
  33         <use>speexdsp</use>
  34  ***/
  35
  36 #include "asterisk.h"
  37
  38 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
  39
  40 #include <fcntl.h>
  41 #include <stdlib.h>
  42 #include <unistd.h>
  43 #include <netinet/in.h>
  44 #include <string.h>
  45 #include <stdio.h>
  46 #include <speex/speex.h>
  47
  48 /* We require a post 1.1.8 version of Speex to enable preprocessing
  49    and better type handling */
  50 #ifdef _SPEEX_TYPES_H
  51 #include <speex/speex_preprocess.h>
  52 #endif
  53
  54 #include "asterisk/lock.h"
  55 #include "asterisk/translate.h"
  56 #include "asterisk/module.h"
  57 #include "asterisk/config.h"
  58 #include "asterisk/options.h"
  59 #include "asterisk/logger.h"
  60 #include "asterisk/channel.h"
  61 #include "asterisk/utils.h"
  62
  63 /* Sample frame data */
  64 #include "slin_speex_ex.h"
  65 #include "speex_slin_ex.h"
  66
  67 /* codec variables */
  68 static int quality = 3;
  69 static int complexity = 2;
  70 static int enhancement = 0;
  71 static int vad = 0;
  72 static int vbr = 0;
  73 static float vbr_quality = 4;
  74 static int abr = 0;
  75 static int dtx = 0;     /* set to 1 to enable silence detection */
  76
  77 static int preproc = 0;
  78 static int pp_vad = 0;
  79 static int pp_agc = 0;
  80 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
  81 static int pp_denoise = 0;
  82 static int pp_dereverb = 0;
  83 static float pp_dereverb_decay = 0.4;
  84 static float pp_dereverb_level = 0.3;
  85
  86 #define TYPE_SILENCE     0x2
  87 #define TYPE_HIGH        0x0
  88 #define TYPE_LOW         0x1
  89 #define TYPE_MASK        0x3
  90
  91 #define BUFFER_SAMPLES  8000
  92 #define SPEEX_SAMPLES   160
  93
  94 struct speex_coder_pvt {
  95         void *speex;
  96         SpeexBits bits;
  97         int framesize;
  98         int silent_state;
  99 #ifdef _SPEEX_TYPES_H
 100         SpeexPreprocessState *pp;
 101         spx_int16_t buf[BUFFER_SAMPLES];
 102 #else
 103         int16_t buf[BUFFER_SAMPLES];    /* input, waiting to be compressed */
 104 #endif
 105 };
 106
 107
 108 static int lintospeex_new(struct ast_trans_pvt *pvt)
 109 {
 110         struct speex_coder_pvt *tmp = pvt->pvt;
 111
 112         if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
 113                 return -1;
 114
 115         speex_bits_init(&tmp->bits);
 116         speex_bits_reset(&tmp->bits);
 117         speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 118         speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
 119 #ifdef _SPEEX_TYPES_H
 120         if (preproc) {
 121                 tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
 122                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
 123                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
 124                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
 125                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
 126                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
 127                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
 128                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
 129         }
 130 #endif
 131         if (!abr && !vbr) {
 132                 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
 133                 if (vad)
 134                         speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
 135         }
 136         if (vbr) {
 137                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
 138                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
 139         }
 140         if (abr)
 141                 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
 142         if (dtx)
 143                 speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
 144         tmp->silent_state = 0;
 145
 146         return 0;
 147 }
 148
 149 static int speextolin_new(struct ast_trans_pvt *pvt)
 150 {
 151         struct speex_coder_pvt *tmp = pvt->pvt;
 152
 153         if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
 154                 return -1;
 155
 156         speex_bits_init(&tmp->bits);
 157         speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 158         if (enhancement)
 159                 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
 160
 161         return 0;
 162 }
 163
 164 static struct ast_frame *lintospeex_sample(void)
 165 {
 166         static struct ast_frame f;
 167         f.frametype = AST_FRAME_VOICE;
 168         f.subclass = AST_FORMAT_SLINEAR;
 169         f.datalen = sizeof(slin_speex_ex);
 170         /* Assume 8000 Hz */
 171         f.samples = sizeof(slin_speex_ex)/2;
 172         f.mallocd = 0;
 173         f.offset = 0;
 174         f.src = __PRETTY_FUNCTION__;
 175         f.data = slin_speex_ex;
 176         return &f;
 177 }
 178
 179 static struct ast_frame *speextolin_sample(void)
 180 {
 181         static struct ast_frame f;
 182         f.frametype = AST_FRAME_VOICE;
 183         f.subclass = AST_FORMAT_SPEEX;
 184         f.datalen = sizeof(speex_slin_ex);
 185         /* All frames are 20 ms long */
 186         f.samples = SPEEX_SAMPLES;
 187         f.mallocd = 0;
 188         f.offset = 0;
 189         f.src = __PRETTY_FUNCTION__;
 190         f.data = speex_slin_ex;
 191         return &f;
 192 }
 193
 194 /*! \brief convert and store into outbuf */
 195 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 196 {
 197         struct speex_coder_pvt *tmp = pvt->pvt;
 198
 199         /* Assuming there's space left, decode into the current buffer at
 200            the tail location.  Read in as many frames as there are */
 201         int x;
 202         int res;
 203         int16_t *dst = (int16_t *)pvt->outbuf;
 204         /* XXX fout is a temporary buffer, may have different types */
 205 #ifdef _SPEEX_TYPES_H
 206         spx_int16_t fout[1024];
 207 #else
 208         float fout[1024];
 209 #endif
 210
 211         if (f->datalen == 0) {  /* Native PLC interpolation */
 212                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 213                         ast_log(LOG_WARNING, "Out of buffer space\n");
 214                         return -1;
 215                 }
 216 #ifdef _SPEEX_TYPES_H
 217                 speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
 218 #else
 219                 speex_decode(tmp->speex, NULL, fout);
 220                 for (x=0;x<tmp->framesize;x++) {
 221                         dst[pvt->samples + x] = (int16_t)fout[x];
 222                 }
 223 #endif
 224                 pvt->samples += tmp->framesize;
 225                 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
 226                 return 0;
 227         }
 228
 229         /* Read in bits */
 230         speex_bits_read_from(&tmp->bits, f->data, f->datalen);
 231         for (;;) {
 232 #ifdef _SPEEX_TYPES_H
 233                 res = speex_decode_int(tmp->speex, &tmp->bits, fout);
 234 #else
 235                 res = speex_decode(tmp->speex, &tmp->bits, fout);
 236 #endif
 237                 if (res < 0)
 238                         break;
 239                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 240                         ast_log(LOG_WARNING, "Out of buffer space\n");
 241                         return -1;
 242                 }
 243                 for (x = 0 ; x < tmp->framesize; x++)
 244                         dst[pvt->samples + x] = (int16_t)fout[x];
 245                 pvt->samples += tmp->framesize;
 246                 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
 247         }
 248         return 0;
 249 }
 250
 251 /*! \brief store input frame in work buffer */
 252 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 253 {
 254         struct speex_coder_pvt *tmp = pvt->pvt;
 255
 256         /* XXX We should look at how old the rest of our stream is, and if it
 257            is too old, then we should overwrite it entirely, otherwise we can
 258            get artifacts of earlier talk that do not belong */
 259         memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
 260         pvt->samples += f->samples;
 261         return 0;
 262 }
 263
 264 /*! \brief convert work buffer and produce output frame */
 265 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
 266 {
 267         struct speex_coder_pvt *tmp = pvt->pvt;
 268         int is_speech=1;
 269         int datalen = 0;        /* output bytes */
 270         int samples = 0;        /* output samples */
 271
 272         /* We can't work on anything less than a frame in size */
 273         if (pvt->samples < tmp->framesize)
 274                 return NULL;
 275         speex_bits_reset(&tmp->bits);
 276         while (pvt->samples >= tmp->framesize) {
 277 #ifdef _SPEEX_TYPES_H
 278                 /* Preprocess audio */
 279                 if (preproc)
 280                         is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
 281                 /* Encode a frame of data */
 282                 if (is_speech) {
 283                         /* If DTX enabled speex_encode returns 0 during silence */
 284                         is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
 285                 } else {
 286                         /* 5 zeros interpreted by Speex as silence (submode 0) */
 287                         speex_bits_pack(&tmp->bits, 0, 5);
 288                 }
 289 #else
 290                 {
 291                         float fbuf[1024];
 292                         int x;
 293                         /* Convert to floating point */
 294                         for (x = 0; x < tmp->framesize; x++)
 295                                 fbuf[x] = tmp->buf[samples + x];
 296                         /* Encode a frame of data */
 297                         is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
 298                 }
 299 #endif
 300                 samples += tmp->framesize;
 301                 pvt->samples -= tmp->framesize;
 302         }
 303
 304         /* Move the data at the end of the buffer to the front */
 305         if (pvt->samples)
 306                 memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
 307
 308         /* Use AST_FRAME_CNG to signify the start of any silence period */
 309         if (is_speech) {
 310                 tmp->silent_state = 0;
 311         } else {
 312                 if (tmp->silent_state) {
 313                         return NULL;
 314                 } else {
 315                         tmp->silent_state = 1;
 316                         speex_bits_reset(&tmp->bits);
 317                         memset(&pvt->f, 0, sizeof(pvt->f));
 318                         pvt->f.frametype = AST_FRAME_CNG;
 319                         pvt->f.samples = samples;
 320                         /* XXX what now ? format etc... */
 321                 }
 322         }
 323
 324         /* Terminate bit stream */
 325         speex_bits_pack(&tmp->bits, 15, 5);
 326         datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
 327         return ast_trans_frameout(pvt, datalen, samples);
 328 }
 329
 330 static void speextolin_destroy(struct ast_trans_pvt *arg)
 331 {
 332         struct speex_coder_pvt *pvt = arg->pvt;
 333
 334         speex_decoder_destroy(pvt->speex);
 335         speex_bits_destroy(&pvt->bits);
 336 }
 337
 338 static void lintospeex_destroy(struct ast_trans_pvt *arg)
 339 {
 340         struct speex_coder_pvt *pvt = arg->pvt;
 341 #ifdef _SPEEX_TYPES_H
 342         if (preproc)
 343                 speex_preprocess_state_destroy(pvt->pp);
 344 #endif
 345         speex_encoder_destroy(pvt->speex);
 346         speex_bits_destroy(&pvt->bits);
 347 }
 348
 349 static struct ast_translator speextolin = {
 350         .name = "speextolin",
 351         .srcfmt = AST_FORMAT_SPEEX,
 352         .dstfmt =  AST_FORMAT_SLINEAR,
 353         .newpvt = speextolin_new,
 354         .framein = speextolin_framein,
 355         .destroy = speextolin_destroy,
 356         .sample = speextolin_sample,
 357         .desc_size = sizeof(struct speex_coder_pvt),
 358         .buffer_samples = BUFFER_SAMPLES,
 359         .buf_size = BUFFER_SAMPLES * 2,
 360         .native_plc = 1,
 361 };
 362
 363 static struct ast_translator lintospeex = {
 364         .name = "lintospeex",
 365         .srcfmt = AST_FORMAT_SLINEAR,
 366         .dstfmt = AST_FORMAT_SPEEX,
 367         .newpvt = lintospeex_new,
 368         .framein = lintospeex_framein,
 369         .frameout = lintospeex_frameout,
 370         .destroy = lintospeex_destroy,
 371         .sample = lintospeex_sample,
 372         .desc_size = sizeof(struct speex_coder_pvt),
 373         .buffer_samples = BUFFER_SAMPLES,
 374         .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
 375 };
 376
 377 static void parse_config(void)
 378 {
 379         struct ast_config *cfg = ast_config_load("codecs.conf");
 380         struct ast_variable *var;
 381         int res;
 382         float res_f;
 383
 384         if (cfg == NULL)
 385                 return;
 386
 387         for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
 388                 if (!strcasecmp(var->name, "quality")) {
 389                         res = abs(atoi(var->value));
 390                         if (res > -1 && res < 11) {
 391                                 if (option_verbose > 2)
 392                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
 393                                 quality = res;
 394                         } else
 395                                 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
 396                 } else if (!strcasecmp(var->name, "complexity")) {
 397                         res = abs(atoi(var->value));
 398                         if (res > -1 && res < 11) {
 399                                 if (option_verbose > 2)
 400                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
 401                                 complexity = res;
 402                         } else
 403                                 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
 404                 } else if (!strcasecmp(var->name, "vbr_quality")) {
 405                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
 406                                 if (option_verbose > 2)
 407                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
 408                                 vbr_quality = res_f;
 409                         } else
 410                                 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
 411                 } else if (!strcasecmp(var->name, "abr_quality")) {
 412                         ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
 413                 } else if (!strcasecmp(var->name, "enhancement")) {
 414                         enhancement = ast_true(var->value) ? 1 : 0;
 415                         if (option_verbose > 2)
 416                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
 417                 } else if (!strcasecmp(var->name, "vbr")) {
 418                         vbr = ast_true(var->value) ? 1 : 0;
 419                         if (option_verbose > 2)
 420                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
 421                 } else if (!strcasecmp(var->name, "abr")) {
 422                         res = abs(atoi(var->value));
 423                         if (res >= 0) {
 424                                 if (option_verbose > 2) {
 425                                         if (res > 0)
 426                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
 427                                         else
 428                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
 429                                 }
 430                                 abr = res;
 431                         } else
 432                                 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
 433                 } else if (!strcasecmp(var->name, "vad")) {
 434                         vad = ast_true(var->value) ? 1 : 0;
 435                         if (option_verbose > 2)
 436                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
 437                 } else if (!strcasecmp(var->name, "dtx")) {
 438                         dtx = ast_true(var->value) ? 1 : 0;
 439                         if (option_verbose > 2)
 440                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
 441                 } else if (!strcasecmp(var->name, "preprocess")) {
 442                         preproc = ast_true(var->value) ? 1 : 0;
 443                         if (option_verbose > 2)
 444                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
 445                 } else if (!strcasecmp(var->name, "pp_vad")) {
 446                         pp_vad = ast_true(var->value) ? 1 : 0;
 447                         if (option_verbose > 2)
 448                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
 449                 } else if (!strcasecmp(var->name, "pp_agc")) {
 450                         pp_agc = ast_true(var->value) ? 1 : 0;
 451                         if (option_verbose > 2)
 452                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
 453                 } else if (!strcasecmp(var->name, "pp_agc_level")) {
 454                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 455                                 if (option_verbose > 2)
 456                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
 457                                 pp_agc_level = res_f;
 458                         } else
 459                                 ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
 460                 } else if (!strcasecmp(var->name, "pp_denoise")) {
 461                         pp_denoise = ast_true(var->value) ? 1 : 0;
 462                         if (option_verbose > 2)
 463                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
 464                 } else if (!strcasecmp(var->name, "pp_dereverb")) {
 465                         pp_dereverb = ast_true(var->value) ? 1 : 0;
 466                         if (option_verbose > 2)
 467                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
 468                 } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
 469                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 470                                 if (option_verbose > 2)
 471                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
 472                                 pp_dereverb_decay = res_f;
 473                         } else
 474                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
 475                 } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
 476                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 477                                 if (option_verbose > 2)
 478                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
 479                                 pp_dereverb_level = res_f;
 480                         } else
 481                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
 482                 }
 483         }
 484         ast_config_destroy(cfg);
 485 }
 486
 487 static int reload(void)
 488 {
 489         parse_config();
 490
 491         return 0;
 492 }
 493
 494 static int unload_module(void)
 495 {
 496         int res;
 497
 498         res = ast_unregister_translator(&lintospeex);
 499         res |= ast_unregister_translator(&speextolin);
 500
 501         return res;
 502 }
 503
 504 static int load_module(void)
 505 {
 506         int res;
 507
 508         parse_config();
 509         res=ast_register_translator(&speextolin);
 510         if (!res)
 511                 res=ast_register_translator(&lintospeex);
 512         else
 513                 ast_unregister_translator(&speextolin);
 514
 515         return res;
 516 }
 517
 518 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
 519                 .load = load_module,
 520                 .unload = unload_module,
 521                 .reload = reload,
 522                );