codecs/codec_speex.c

   1 /*
   2  * Asterisk -- An open source telephony toolkit.
   3  *
   4  * Copyright (C) 1999 - 2005, Digium, Inc.
   5  *
   6  * Mark Spencer <markster@digium.com>
   7  *
   8  *
   9  * See http://www.asterisk.org for more information about
  10  * the Asterisk project. Please do not directly contact
  11  * any of the maintainers of this project for assistance;
  12  * the project provides a web site, mailing lists and IRC
  13  * channels for your use.
  14  *
  15  * This program is free software, distributed under the terms of
  16  * the GNU General Public License Version 2. See the LICENSE file
  17  * at the top of the source tree.
  18  */
  19
  20 /*! \file
  21  *
  22  * \brief Translate between signed linear and Speex (Open Codec)
  23  *
  24  * http://www.speex.org
  25  * \note This work was motivated by Jeremy McNamara
  26  * hacked to be configurable by anthm and bkw 9/28/2004
  27  * \ingroup codecs
  28  */
  29
  30 /*** MODULEINFO
  31         <depend>speex</depend>
  32  ***/
  33
  34 #include "asterisk.h"
  35
  36 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
  37
  38 #include <fcntl.h>
  39 #include <stdlib.h>
  40 #include <unistd.h>
  41 #include <netinet/in.h>
  42 #include <string.h>
  43 #include <stdio.h>
  44 #include <speex/speex.h>
  45
  46 /* We require a post 1.1.8 version of Speex to enable preprocessing
  47    and better type handling */
  48 #ifdef _SPEEX_TYPES_H
  49 #include <speex/speex_preprocess.h>
  50 #endif
  51
  52 #include "asterisk/lock.h"
  53 #include "asterisk/translate.h"
  54 #include "asterisk/module.h"
  55 #include "asterisk/config.h"
  56 #include "asterisk/options.h"
  57 #include "asterisk/logger.h"
  58 #include "asterisk/channel.h"
  59 #include "asterisk/utils.h"
  60
  61 /* Sample frame data */
  62 #include "slin_speex_ex.h"
  63 #include "speex_slin_ex.h"
  64
  65 /* codec variables */
  66 static int quality = 3;
  67 static int complexity = 2;
  68 static int enhancement = 0;
  69 static int vad = 0;
  70 static int vbr = 0;
  71 static float vbr_quality = 4;
  72 static int abr = 0;
  73 static int dtx = 0;     /* set to 1 to enable silence detection */
  74
  75 static int preproc = 0;
  76 static int pp_vad = 0;
  77 static int pp_agc = 0;
  78 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
  79 static int pp_denoise = 0;
  80 static int pp_dereverb = 0;
  81 static float pp_dereverb_decay = 0.4;
  82 static float pp_dereverb_level = 0.3;
  83
  84 #define TYPE_SILENCE     0x2
  85 #define TYPE_HIGH        0x0
  86 #define TYPE_LOW         0x1
  87 #define TYPE_MASK        0x3
  88
  89 #define BUFFER_SAMPLES  8000
  90 #define SPEEX_SAMPLES   160
  91
  92 struct speex_coder_pvt {
  93         void *speex;
  94         SpeexBits bits;
  95         int framesize;
  96         int silent_state;
  97 #ifdef _SPEEX_TYPES_H
  98         SpeexPreprocessState *pp;
  99         spx_int16_t buf[BUFFER_SAMPLES];
 100 #else
 101         int16_t buf[BUFFER_SAMPLES];    /* input, waiting to be compressed */
 102 #endif
 103 };
 104
 105
 106 static int lintospeex_new(struct ast_trans_pvt *pvt)
 107 {
 108         struct speex_coder_pvt *tmp = pvt->pvt;
 109
 110         if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
 111                 return -1;
 112
 113         speex_bits_init(&tmp->bits);
 114         speex_bits_reset(&tmp->bits);
 115         speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 116         speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
 117 #ifdef _SPEEX_TYPES_H
 118         if (preproc) {
 119                 tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
 120                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
 121                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
 122                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
 123                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
 124                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
 125                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
 126                 speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
 127         }
 128 #endif
 129         if (!abr && !vbr) {
 130                 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
 131                 if (vad)
 132                         speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
 133         }
 134         if (vbr) {
 135                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
 136                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
 137         }
 138         if (abr)
 139                 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
 140         if (dtx)
 141                 speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
 142         tmp->silent_state = 0;
 143
 144         return 0;
 145 }
 146
 147 static int speextolin_new(struct ast_trans_pvt *pvt)
 148 {
 149         struct speex_coder_pvt *tmp = pvt->pvt;
 150
 151         if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
 152                 return -1;
 153
 154         speex_bits_init(&tmp->bits);
 155         speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
 156         if (enhancement)
 157                 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
 158
 159         return 0;
 160 }
 161
 162 static struct ast_frame *lintospeex_sample(void)
 163 {
 164         static struct ast_frame f;
 165         f.frametype = AST_FRAME_VOICE;
 166         f.subclass = AST_FORMAT_SLINEAR;
 167         f.datalen = sizeof(slin_speex_ex);
 168         /* Assume 8000 Hz */
 169         f.samples = sizeof(slin_speex_ex)/2;
 170         f.mallocd = 0;
 171         f.offset = 0;
 172         f.src = __PRETTY_FUNCTION__;
 173         f.data = slin_speex_ex;
 174         return &f;
 175 }
 176
 177 static struct ast_frame *speextolin_sample(void)
 178 {
 179         static struct ast_frame f;
 180         f.frametype = AST_FRAME_VOICE;
 181         f.subclass = AST_FORMAT_SPEEX;
 182         f.datalen = sizeof(speex_slin_ex);
 183         /* All frames are 20 ms long */
 184         f.samples = SPEEX_SAMPLES;
 185         f.mallocd = 0;
 186         f.offset = 0;
 187         f.src = __PRETTY_FUNCTION__;
 188         f.data = speex_slin_ex;
 189         return &f;
 190 }
 191
 192 /*! \brief convert and store into outbuf */
 193 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 194 {
 195         struct speex_coder_pvt *tmp = pvt->pvt;
 196
 197         /* Assuming there's space left, decode into the current buffer at
 198            the tail location.  Read in as many frames as there are */
 199         int x;
 200         int res;
 201         int16_t *dst = (int16_t *)pvt->outbuf;
 202         /* XXX fout is a temporary buffer, may have different types */
 203 #ifdef _SPEEX_TYPES_H
 204         spx_int16_t fout[1024];
 205 #else
 206         float fout[1024];
 207 #endif
 208
 209         if (f->datalen == 0) {  /* Native PLC interpolation */
 210                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 211                         ast_log(LOG_WARNING, "Out of buffer space\n");
 212                         return -1;
 213                 }
 214 #ifdef _SPEEX_TYPES_H
 215                 speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
 216 #else
 217                 speex_decode(tmp->speex, NULL, fout);
 218                 for (x=0;x<tmp->framesize;x++) {
 219                         dst[pvt->samples + x] = (int16_t)fout[x];
 220                 }
 221 #endif
 222                 pvt->samples += tmp->framesize;
 223                 return 0;
 224         }
 225
 226         /* Read in bits */
 227         speex_bits_read_from(&tmp->bits, f->data, f->datalen);
 228         for (;;) {
 229 #ifdef _SPEEX_TYPES_H
 230                 res = speex_decode_int(tmp->speex, &tmp->bits, fout);
 231 #else
 232                 res = speex_decode(tmp->speex, &tmp->bits, fout);
 233 #endif
 234                 if (res < 0)
 235                         break;
 236                 if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
 237                         ast_log(LOG_WARNING, "Out of buffer space\n");
 238                         return -1;
 239                 }
 240                 for (x = 0 ; x < tmp->framesize; x++)
 241                         dst[pvt->samples + x] = (int16_t)fout[x];
 242                 pvt->samples += tmp->framesize;
 243                 pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
 244         }
 245         return 0;
 246 }
 247
 248 /*! \brief store input frame in work buffer */
 249 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
 250 {
 251         struct speex_coder_pvt *tmp = pvt->pvt;
 252
 253         /* XXX We should look at how old the rest of our stream is, and if it
 254            is too old, then we should overwrite it entirely, otherwise we can
 255            get artifacts of earlier talk that do not belong */
 256         memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
 257         pvt->samples += f->samples;
 258         return 0;
 259 }
 260
 261 /*! \brief convert work buffer and produce output frame */
 262 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
 263 {
 264         struct speex_coder_pvt *tmp = pvt->pvt;
 265         int is_speech=1;
 266         int datalen = 0;        /* output bytes */
 267         int samples = 0;        /* output samples */
 268
 269         /* We can't work on anything less than a frame in size */
 270         if (pvt->samples < tmp->framesize)
 271                 return NULL;
 272         speex_bits_reset(&tmp->bits);
 273         while (pvt->samples >= tmp->framesize) {
 274 #ifdef _SPEEX_TYPES_H
 275                 /* Preprocess audio */
 276                 if (preproc)
 277                         is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
 278                 /* Encode a frame of data */
 279                 if (is_speech) {
 280                         /* If DTX enabled speex_encode returns 0 during silence */
 281                         is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
 282                 } else {
 283                         /* 5 zeros interpreted by Speex as silence (submode 0) */
 284                         speex_bits_pack(&tmp->bits, 0, 5);
 285                 }
 286 #else
 287                 {
 288                         float fbuf[1024];
 289                         int x;
 290                         /* Convert to floating point */
 291                         for (x = 0; x < tmp->framesize; x++)
 292                                 fbuf[x] = tmp->buf[samples + x];
 293                         /* Encode a frame of data */
 294                         is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
 295                 }
 296 #endif
 297                 samples += tmp->framesize;
 298                 pvt->samples -= tmp->framesize;
 299         }
 300
 301         /* Move the data at the end of the buffer to the front */
 302         if (pvt->samples)
 303                 memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
 304
 305         /* Use AST_FRAME_CNG to signify the start of any silence period */
 306         if (is_speech) {
 307                 tmp->silent_state = 0;
 308         } else {
 309                 if (tmp->silent_state) {
 310                         return NULL;
 311                 } else {
 312                         tmp->silent_state = 1;
 313                         speex_bits_reset(&tmp->bits);
 314                         memset(&pvt->f, 0, sizeof(pvt->f));
 315                         pvt->f.frametype = AST_FRAME_CNG;
 316                         pvt->f.samples = samples;
 317                         /* XXX what now ? format etc... */
 318                 }
 319         }
 320
 321         /* Terminate bit stream */
 322         speex_bits_pack(&tmp->bits, 15, 5);
 323         datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
 324         return ast_trans_frameout(pvt, datalen, samples);
 325 }
 326
 327 static void speextolin_destroy(struct ast_trans_pvt *arg)
 328 {
 329         struct speex_coder_pvt *pvt = arg->pvt;
 330
 331         speex_decoder_destroy(pvt->speex);
 332         speex_bits_destroy(&pvt->bits);
 333 }
 334
 335 static void lintospeex_destroy(struct ast_trans_pvt *arg)
 336 {
 337         struct speex_coder_pvt *pvt = arg->pvt;
 338 #ifdef _SPEEX_TYPES_H
 339         if (preproc)
 340                 speex_preprocess_state_destroy(pvt->pp);
 341 #endif
 342         speex_encoder_destroy(pvt->speex);
 343         speex_bits_destroy(&pvt->bits);
 344 }
 345
 346 static struct ast_translator speextolin = {
 347         .name = "speextolin",
 348         .srcfmt = AST_FORMAT_SPEEX,
 349         .dstfmt =  AST_FORMAT_SLINEAR,
 350         .newpvt = speextolin_new,
 351         .framein = speextolin_framein,
 352         .destroy = speextolin_destroy,
 353         .sample = speextolin_sample,
 354         .desc_size = sizeof(struct speex_coder_pvt),
 355         .buffer_samples = BUFFER_SAMPLES,
 356         .buf_size = BUFFER_SAMPLES * 2,
 357 };
 358
 359 static struct ast_translator lintospeex = {
 360         .name = "lintospeex",
 361         .srcfmt = AST_FORMAT_SLINEAR,
 362         .dstfmt = AST_FORMAT_SPEEX,
 363         .newpvt = lintospeex_new,
 364         .framein = lintospeex_framein,
 365         .frameout = lintospeex_frameout,
 366         .destroy = lintospeex_destroy,
 367         .sample = lintospeex_sample,
 368         .desc_size = sizeof(struct speex_coder_pvt),
 369         .buffer_samples = BUFFER_SAMPLES,
 370         .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
 371 };
 372
 373 static void parse_config(void)
 374 {
 375         struct ast_config *cfg = ast_config_load("codecs.conf");
 376         struct ast_variable *var;
 377         int res;
 378         float res_f;
 379
 380         if (cfg == NULL)
 381                 return;
 382
 383         for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
 384                 if (!strcasecmp(var->name, "quality")) {
 385                         res = abs(atoi(var->value));
 386                         if (res > -1 && res < 11) {
 387                                 if (option_verbose > 2)
 388                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
 389                                 quality = res;
 390                         } else
 391                                 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
 392                 } else if (!strcasecmp(var->name, "complexity")) {
 393                         res = abs(atoi(var->value));
 394                         if (res > -1 && res < 11) {
 395                                 if (option_verbose > 2)
 396                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
 397                                 complexity = res;
 398                         } else
 399                                 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
 400                 } else if (!strcasecmp(var->name, "vbr_quality")) {
 401                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
 402                                 if (option_verbose > 2)
 403                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
 404                                 vbr_quality = res_f;
 405                         } else
 406                                 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
 407                 } else if (!strcasecmp(var->name, "abr_quality")) {
 408                         ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
 409                 } else if (!strcasecmp(var->name, "enhancement")) {
 410                         enhancement = ast_true(var->value) ? 1 : 0;
 411                         if (option_verbose > 2)
 412                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
 413                 } else if (!strcasecmp(var->name, "vbr")) {
 414                         vbr = ast_true(var->value) ? 1 : 0;
 415                         if (option_verbose > 2)
 416                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
 417                 } else if (!strcasecmp(var->name, "abr")) {
 418                         res = abs(atoi(var->value));
 419                         if (res >= 0) {
 420                                 if (option_verbose > 2) {
 421                                         if (res > 0)
 422                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
 423                                         else
 424                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
 425                                 }
 426                                 abr = res;
 427                         } else
 428                                 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
 429                 } else if (!strcasecmp(var->name, "vad")) {
 430                         vad = ast_true(var->value) ? 1 : 0;
 431                         if (option_verbose > 2)
 432                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
 433                 } else if (!strcasecmp(var->name, "dtx")) {
 434                         dtx = ast_true(var->value) ? 1 : 0;
 435                         if (option_verbose > 2)
 436                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
 437                 } else if (!strcasecmp(var->name, "preprocess")) {
 438                         preproc = ast_true(var->value) ? 1 : 0;
 439                         if (option_verbose > 2)
 440                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
 441                 } else if (!strcasecmp(var->name, "pp_vad")) {
 442                         pp_vad = ast_true(var->value) ? 1 : 0;
 443                         if (option_verbose > 2)
 444                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
 445                 } else if (!strcasecmp(var->name, "pp_agc")) {
 446                         pp_agc = ast_true(var->value) ? 1 : 0;
 447                         if (option_verbose > 2)
 448                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
 449                 } else if (!strcasecmp(var->name, "pp_agc_level")) {
 450                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 451                                 if (option_verbose > 2)
 452                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
 453                                 pp_agc_level = res_f;
 454                         } else
 455                                 ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
 456                 } else if (!strcasecmp(var->name, "pp_denoise")) {
 457                         pp_denoise = ast_true(var->value) ? 1 : 0;
 458                         if (option_verbose > 2)
 459                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
 460                 } else if (!strcasecmp(var->name, "pp_dereverb")) {
 461                         pp_dereverb = ast_true(var->value) ? 1 : 0;
 462                         if (option_verbose > 2)
 463                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
 464                 } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
 465                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 466                                 if (option_verbose > 2)
 467                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
 468                                 pp_dereverb_decay = res_f;
 469                         } else
 470                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
 471                 } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
 472                         if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
 473                                 if (option_verbose > 2)
 474                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
 475                                 pp_dereverb_level = res_f;
 476                         } else
 477                                 ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
 478                 }
 479         }
 480         ast_config_destroy(cfg);
 481 }
 482
 483 static int reload(void)
 484 {
 485         parse_config();
 486
 487         return 0;
 488 }
 489
 490 static int unload_module(void)
 491 {
 492         int res;
 493
 494         res = ast_unregister_translator(&lintospeex);
 495         res |= ast_unregister_translator(&speextolin);
 496
 497         return res;
 498 }
 499
 500 static int load_module(void)
 501 {
 502         int res;
 503
 504         parse_config();
 505         res=ast_register_translator(&speextolin);
 506         if (!res)
 507                 res=ast_register_translator(&lintospeex);
 508         else
 509                 ast_unregister_translator(&speextolin);
 510
 511         return res;
 512 }
 513
 514 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
 515                 .load = load_module,
 516                 .unload = unload_module,
 517                 .reload = reload,
 518                );