codecs/codec_speex.c

   1 /*
   2  * Asterisk -- An open source telephony toolkit.
   3  *
   4  * Copyright (C) 1999 - 2005, Digium, Inc.
   5  *
   6  * Mark Spencer <markster@digium.com>
   7  *
   8  *
   9  * See http://www.asterisk.org for more information about
  10  * the Asterisk project. Please do not directly contact
  11  * any of the maintainers of this project for assistance;
  12  * the project provides a web site, mailing lists and IRC
  13  * channels for your use.
  14  *
  15  * This program is free software, distributed under the terms of
  16  * the GNU General Public License Version 2. See the LICENSE file
  17  * at the top of the source tree.
  18  */
  19
  20 /*! \file
  21  *
  22  * \brief Translate between signed linear and Speex (Open Codec)
  23  *
  24  * \note This work was motivated by Jeremy McNamara
  25  * hacked to be configurable by anthm and bkw 9/28/2004
  26  *
  27  * \ingroup codecs
  28  *
  29  * \extref The Speex library - http://www.speex.org
  30  *
  31  */
  32
  33 /*** MODULEINFO
  34         <depend>speex</depend>
  35         <depend>speex_preprocess</depend>
  36         <use>speexdsp</use>
  37  ***/
  38
  39 #include "asterisk.h"
  40
  41 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
  42
  43 #include <speex/speex.h>
  44
  45 /* We require a post 1.1.8 version of Speex to enable preprocessing
  46    and better type handling */
  47 #ifdef _SPEEX_TYPES_H
  48 #include <speex/speex_preprocess.h>
  49 #endif
  50
  51 #include "asterisk/translate.h"
  52 #include "asterisk/module.h"
  53 #include "asterisk/config.h"
  54 #include "asterisk/utils.h"
  55
  56 /* Sample frame data */
  57 #include "slin_speex_ex.h"
  58 #include "speex_slin_ex.h"
  59
  60 /* codec variables */
  61 static int quality = 3;
  62 static int complexity = 2;
  63 static int enhancement = 0;
  64 static int vad = 0;
  65 static int vbr = 0;
  66 static float vbr_quality = 4;
  67 static int abr = 0;
  68 static int dtx = 0;     /* set to 1 to enable silence detection */
  69
  70 static int preproc = 0;
  71 static int pp_vad = 0;
  72 static int pp_agc = 0;
  73 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
  74 static int pp_denoise = 0;
  75 static int pp_dereverb = 0;
  76 static float pp_dereverb_decay = 0.4;
  77 static float pp_dereverb_level = 0.3;
  78
  79 #define TYPE_SILENCE     0x2
  80 #define TYPE_HIGH        0x0
  81 #define TYPE_LOW         0x1
  82 #define TYPE_MASK        0x3
  83
  84 #define BUFFER_SAMPLES  8000
  85 #define SPEEX_SAMPLES   160
  86
  87 struct speex_coder_pvt {
  88         void *speex;
  89         SpeexBits bits;
  90         int framesize;
  91         int silent_state;
  92 #ifdef _SPEEX_TYPES_H
  93         SpeexPreprocessState *pp;
  94         spx_int16_t buf[BUFFER_SAMPLES];
  95 #else
  96         int16_t buf[BUFFER_SAMPLES];    /* input, waiting to be compressed */
  97 #endif
  98 };
  99
 100
 101 static int lintospeex_new(struct ast_trans_pvt *pvt)
 102 {
 103         struct speex_coder_pvt *tmp = pvt->pvt;
 104
 105         if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
 106                 return -1;
 107
 108         speex_bits_init(&tmp->bits);
 109         speex_bits_reset(&tmp->bits);
 110         speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 111         speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
 112 #ifdef _SPEEX_TYPES_H
 113         if (preproc) {
 114                 tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
 115                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
 116                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
 117                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
 118                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
 119                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
 120                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
 121                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
 122         }
 123 #endif
 124         if (!abr && !vbr) {
 125                 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
 126                 if (vad)
 127                         speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
 128         }
 129         if (vbr) {
 130                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
 131                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
 132         }
 133         if (abr)
 134                 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
 135         if (dtx)
 136                 speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
 137         tmp->silent_state = 0;
 138
 139         return 0;
 140 }
 141
 142 static int speextolin_new(struct ast_trans_pvt *pvt)
 143 {
 144         struct speex_coder_pvt *tmp = pvt->pvt;
 145
 146         if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
 147                 return -1;
 148
 149         speex_bits_init(&tmp->bits);
 150         speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 151         if (enhancement)
 152                 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
 153
 154         return 0;
 155 }
 156
 157 static struct ast_frame *lintospeex_sample(void)
 158 {
 159         static struct ast_frame f;
 160         f.frametype = AST_FRAME_VOICE;
 161         f.subclass = AST_FORMAT_SLINEAR;
 162         f.datalen = sizeof(slin_speex_ex);
 163         /* Assume 8000 Hz */
 164         f.samples = sizeof(slin_speex_ex)/2;
 165         f.mallocd = 0;
 166         f.offset = 0;
 167         f.src = __PRETTY_FUNCTION__;
 168         f.data.ptr = slin_speex_ex;
 169         return &f;
 170 }
 171
 172 static struct ast_frame *speextolin_sample(void)
 173 {
 174         static struct ast_frame f;
 175         f.frametype = AST_FRAME_VOICE;
 176         f.subclass = AST_FORMAT_SPEEX;
 177         f.datalen = sizeof(speex_slin_ex);
 178         /* All frames are 20 ms long */
 179         f.samples = SPEEX_SAMPLES;
 180         f.mallocd = 0;
 181         f.offset = 0;
 182         f.src = __PRETTY_FUNCTION__;
 183         f.data.ptr = speex_slin_ex;
 184         return &f;
 185 }
 186
 187 /*! \brief convert and store into outbuf */
 188 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 189 {
 190         struct speex_coder_pvt *tmp = pvt->pvt;
 191
 192         /* Assuming there's space left, decode into the current buffer at
 193            the tail location.  Read in as many frames as there are */
 194         int x;
 195         int res;
 196         int16_t *dst = (int16_t *)pvt->outbuf;
 197         /* XXX fout is a temporary buffer, may have different types */
 198 #ifdef _SPEEX_TYPES_H
 199         spx_int16_t fout[1024];
 200 #else
 201         float fout[1024];
 202 #endif
 203
 204         if (f->datalen == 0) {  /* Native PLC interpolation */
 205                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 206                         ast_log(LOG_WARNING, "Out of buffer space\n");
 207                         return -1;
 208                 }
 209 #ifdef _SPEEX_TYPES_H
 210                 speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
 211 #else
 212                 speex_decode(tmp->speex, NULL, fout);
 213                 for (x=0;x<tmp->framesize;x++) {
 214                         dst[pvt->samples + x] = (int16_t)fout[x];
 215                 }
 216 #endif
 217                 pvt->samples += tmp->framesize;
 218                 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
 219                 return 0;
 220         }
 221
 222         /* Read in bits */
 223         speex_bits_read_from(&tmp->bits, f->data.ptr, f->datalen);
 224         for (;;) {
 225 #ifdef _SPEEX_TYPES_H
 226                 res = speex_decode_int(tmp->speex, &tmp->bits, fout);
 227 #else
 228                 res = speex_decode(tmp->speex, &tmp->bits, fout);
 229 #endif
 230                 if (res < 0)
 231                         break;
 232                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 233                         ast_log(LOG_WARNING, "Out of buffer space\n");
 234                         return -1;
 235                 }
 236                 for (x = 0 ; x < tmp->framesize; x++)
 237                         dst[pvt->samples + x] = (int16_t)fout[x];
 238                 pvt->samples += tmp->framesize;
 239                 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
 240         }
 241         return 0;
 242 }
 243
 244 /*! \brief store input frame in work buffer */
 245 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 246 {
 247         struct speex_coder_pvt *tmp = pvt->pvt;
 248
 249         /* XXX We should look at how old the rest of our stream is, and if it
 250            is too old, then we should overwrite it entirely, otherwise we can
 251            get artifacts of earlier talk that do not belong */
 252         memcpy(tmp->buf + pvt->samples, f->data.ptr, f->datalen);
 253         pvt->samples += f->samples;
 254         return 0;
 255 }
 256
 257 /*! \brief convert work buffer and produce output frame */
 258 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
 259 {
 260         struct speex_coder_pvt *tmp = pvt->pvt;
 261         int is_speech=1;
 262         int datalen = 0;        /* output bytes */
 263         int samples = 0;        /* output samples */
 264
 265         /* We can't work on anything less than a frame in size */
 266         if (pvt->samples < tmp->framesize)
 267                 return NULL;
 268         speex_bits_reset(&tmp->bits);
 269         while (pvt->samples >= tmp->framesize) {
 270 #ifdef _SPEEX_TYPES_H
 271                 /* Preprocess audio */
 272                 if (preproc)
 273                         is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
 274                 /* Encode a frame of data */
 275                 if (is_speech) {
 276                         /* If DTX enabled speex_encode returns 0 during silence */
 277                         is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
 278                 } else {
 279                         /* 5 zeros interpreted by Speex as silence (submode 0) */
 280                         speex_bits_pack(&tmp->bits, 0, 5);
 281                 }
 282 #else
 283                 {
 284                         float fbuf[1024];
 285                         int x;
 286                         /* Convert to floating point */
 287                         for (x = 0; x < tmp->framesize; x++)
 288                                 fbuf[x] = tmp->buf[samples + x];
 289                         /* Encode a frame of data */
 290                         is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
 291                 }
 292 #endif
 293                 samples += tmp->framesize;
 294                 pvt->samples -= tmp->framesize;
 295         }
 296
 297         /* Move the data at the end of the buffer to the front */
 298         if (pvt->samples)
 299                 memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
 300
 301         /* Use AST_FRAME_CNG to signify the start of any silence period */
 302         if (is_speech) {
 303                 tmp->silent_state = 0;
 304         } else {
 305                 if (tmp->silent_state) {
 306                         return NULL;
 307                 } else {
 308                         tmp->silent_state = 1;
 309                         speex_bits_reset(&tmp->bits);
 310                         memset(&pvt->f, 0, sizeof(pvt->f));
 311                         pvt->f.frametype = AST_FRAME_CNG;
 312                         pvt->f.samples = samples;
 313                         /* XXX what now ? format etc... */
 314                 }
 315         }
 316
 317         /* Terminate bit stream */
 318         speex_bits_pack(&tmp->bits, 15, 5);
 319         datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
 320         return ast_trans_frameout(pvt, datalen, samples);
 321 }
 322
 323 static void speextolin_destroy(struct ast_trans_pvt *arg)
 324 {
 325         struct speex_coder_pvt *pvt = arg->pvt;
 326
 327         speex_decoder_destroy(pvt->speex);
 328         speex_bits_destroy(&pvt->bits);
 329 }
 330
 331 static void lintospeex_destroy(struct ast_trans_pvt *arg)
 332 {
 333         struct speex_coder_pvt *pvt = arg->pvt;
 334 #ifdef _SPEEX_TYPES_H
 335         if (preproc)
 336                 speex_preprocess_state_destroy(pvt->pp);
 337 #endif
 338         speex_encoder_destroy(pvt->speex);
 339         speex_bits_destroy(&pvt->bits);
 340 }
 341
 342 static struct ast_translator speextolin = {
 343         .name = "speextolin",
 344         .srcfmt = AST_FORMAT_SPEEX,
 345         .dstfmt =  AST_FORMAT_SLINEAR,
 346         .newpvt = speextolin_new,
 347         .framein = speextolin_framein,
 348         .destroy = speextolin_destroy,
 349         .sample = speextolin_sample,
 350         .desc_size = sizeof(struct speex_coder_pvt),
 351         .buffer_samples = BUFFER_SAMPLES,
 352         .buf_size = BUFFER_SAMPLES * 2,
 353         .native_plc = 1,
 354 };
 355
 356 static struct ast_translator lintospeex = {
 357         .name = "lintospeex",
 358         .srcfmt = AST_FORMAT_SLINEAR,
 359         .dstfmt = AST_FORMAT_SPEEX,
 360         .newpvt = lintospeex_new,
 361         .framein = lintospeex_framein,
 362         .frameout = lintospeex_frameout,
 363         .destroy = lintospeex_destroy,
 364         .sample = lintospeex_sample,
 365         .desc_size = sizeof(struct speex_coder_pvt),
 366         .buffer_samples = BUFFER_SAMPLES,
 367         .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
 368 };
 369
 370 static int parse_config(int reload)
 371 {
 372         struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
 373         struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
 374         struct ast_variable *var;
 375         int res;
 376         float res_f;
 377
 378         if (cfg == NULL)
 379                 return 0;
 380         if (cfg == CONFIG_STATUS_FILEUNCHANGED)
 381                 return 0;
 382
 383         for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
 384                 if (!strcasecmp(var->name, "quality")) {
 385                         res = abs(atoi(var->value));
 386                         if (res > -1 && res < 11) {
 387                                 ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
 388                                 quality = res;
 389                         } else
 390                                 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
 391                 } else if (!strcasecmp(var->name, "complexity")) {
 392                         res = abs(atoi(var->value));
 393                         if (res > -1 && res < 11) {
 394                                 ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
 395                                 complexity = res;
 396                         } else
 397                                 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
 398                 } else if (!strcasecmp(var->name, "vbr_quality")) {
 399                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
 400                                 ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
 401                                 vbr_quality = res_f;
 402                         } else
 403                                 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
 404                 } else if (!strcasecmp(var->name, "abr_quality")) {
 405                         ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
 406                 } else if (!strcasecmp(var->name, "enhancement")) {
 407                         enhancement = ast_true(var->value) ? 1 : 0;
 408                         ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
 409                 } else if (!strcasecmp(var->name, "vbr")) {
 410                         vbr = ast_true(var->value) ? 1 : 0;
 411                         ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
 412                 } else if (!strcasecmp(var->name, "abr")) {
 413                         res = abs(atoi(var->value));
 414                         if (res >= 0) {
 415                                         if (res > 0)
 416                                         ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
 417                                         else
 418                                         ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
 419                                 abr = res;
 420                         } else
 421                                 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
 422                 } else if (!strcasecmp(var->name, "vad")) {
 423                         vad = ast_true(var->value) ? 1 : 0;
 424                         ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
 425                 } else if (!strcasecmp(var->name, "dtx")) {
 426                         dtx = ast_true(var->value) ? 1 : 0;
 427                         ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
 428                 } else if (!strcasecmp(var->name, "preprocess")) {
 429                         preproc = ast_true(var->value) ? 1 : 0;
 430                         ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
 431                 } else if (!strcasecmp(var->name, "pp_vad")) {
 432                         pp_vad = ast_true(var->value) ? 1 : 0;
 433                         ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
 434                 } else if (!strcasecmp(var->name, "pp_agc")) {
 435                         pp_agc = ast_true(var->value) ? 1 : 0;
 436                         ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
 437                 } else if (!strcasecmp(var->name, "pp_agc_level")) {
 438                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 439                                 ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
 440                                 pp_agc_level = res_f;
 441                         } else
 442                                 ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
 443                 } else if (!strcasecmp(var->name, "pp_denoise")) {
 444                         pp_denoise = ast_true(var->value) ? 1 : 0;
 445                         ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
 446                 } else if (!strcasecmp(var->name, "pp_dereverb")) {
 447                         pp_dereverb = ast_true(var->value) ? 1 : 0;
 448                         ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
 449                 } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
 450                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 451                                 ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
 452                                 pp_dereverb_decay = res_f;
 453                         } else
 454                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
 455                 } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
 456                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 457                                 ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
 458                                 pp_dereverb_level = res_f;
 459                         } else
 460                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
 461                 }
 462         }
 463         ast_config_destroy(cfg);
 464         return 0;
 465 }
 466
 467 static int reload(void)
 468 {
 469         if (parse_config(1))
 470                 return AST_MODULE_LOAD_DECLINE;
 471         return AST_MODULE_LOAD_SUCCESS;
 472 }
 473
 474 static int unload_module(void)
 475 {
 476         int res;
 477
 478         res = ast_unregister_translator(&lintospeex);
 479         res |= ast_unregister_translator(&speextolin);
 480
 481         return res;
 482 }
 483
 484 static int load_module(void)
 485 {
 486         int res;
 487
 488         if (parse_config(0))
 489                 return AST_MODULE_LOAD_DECLINE;
 490         res=ast_register_translator(&speextolin);
 491         if (!res)
 492                 res=ast_register_translator(&lintospeex);
 493         else
 494                 ast_unregister_translator(&speextolin);
 495         if (res)
 496                 return AST_MODULE_LOAD_FAILURE;
 497         return AST_MODULE_LOAD_SUCCESS;
 498 }
 499
 500 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
 501                 .load = load_module,
 502                 .unload = unload_module,
 503                 .reload = reload,
 504                );