apps/codecs/libspeex/preprocess.c

   1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
   2    Copyright (C) 2004-2006 Epic Games
   3
   4    File: preprocess.c
   5    Preprocessor with denoising based on the algorithm by Ephraim and Malah
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11    1. Redistributions of source code must retain the above copyright notice,
  12    this list of conditions and the following disclaimer.
  13
  14    2. Redistributions in binary form must reproduce the above copyright
  15    notice, this list of conditions and the following disclaimer in the
  16    documentation and/or other materials provided with the distribution.
  17
  18    3. The name of the author may not be used to endorse or promote products
  19    derived from this software without specific prior written permission.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31    POSSIBILITY OF SUCH DAMAGE.
  32 */
  33
  34
  35 /*
  36    Recommended papers:
  37
  38    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
  39    short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
  40    Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
  41
  42    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
  43    log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
  44    Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
  45
  46    I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
  47    Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
  48
  49    Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
  50    approach to combined acoustic echo cancellation and noise reduction". IEEE
  51    Transactions on Speech and Audio Processing, 2002.
  52
  53    J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
  54    of simultaneous non-stationary sources". In Proceedings IEEE International
  55    Conference on Acoustics, Speech, and Signal Processing, 2004.
  56 */
  57
  58 #ifdef HAVE_CONFIG_H
  59 #include "config-speex.h"
  60 #endif
  61
  62 #include <math.h>
  63 #include "speex/speex_preprocess.h"
  64 #include "speex/speex_echo.h"
  65 #include "arch.h"
  66 #include "fftwrap.h"
  67 #include "filterbank.h"
  68 #include "math_approx.h"
  69 #include "os_support.h"
  70
  71 #ifndef M_PI
  72 #define M_PI 3.14159263
  73 #endif
  74
  75 #define LOUDNESS_EXP 5.f
  76 #define AMP_SCALE .001f
  77 #define AMP_SCALE_1 1000.f
  78
  79 #define NB_BANDS 24
  80
  81 #define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
  82 #define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
  83 #define NOISE_SUPPRESS_DEFAULT       -15
  84 #define ECHO_SUPPRESS_DEFAULT        -40
  85 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
  86
  87 #ifndef NULL
  88 #define NULL 0
  89 #endif
  90
  91 #define SQR(x) ((x)*(x))
  92 #define SQR16(x) (MULT16_16((x),(x)))
  93 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
  94
  95 #ifdef FIXED_POINT
  96 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
  97 {
  98    if (SHR32(a,7) >= b)
  99    {
 100       return 32767;
 101    } else {
 102       if (b>=QCONST32(1,23))
 103       {
 104          a = SHR32(a,8);
 105          b = SHR32(b,8);
 106       }
 107       if (b>=QCONST32(1,19))
 108       {
 109          a = SHR32(a,4);
 110          b = SHR32(b,4);
 111       }
 112       if (b>=QCONST32(1,15))
 113       {
 114          a = SHR32(a,4);
 115          b = SHR32(b,4);
 116       }
 117       a = SHL32(a,8);
 118       return PDIV32_16(a,b);
 119    }
 120
 121 }
 122 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
 123 {
 124    if (SHR32(a,15) >= b)
 125    {
 126       return 32767;
 127    } else {
 128       if (b>=QCONST32(1,23))
 129       {
 130          a = SHR32(a,8);
 131          b = SHR32(b,8);
 132       }
 133       if (b>=QCONST32(1,19))
 134       {
 135          a = SHR32(a,4);
 136          b = SHR32(b,4);
 137       }
 138       if (b>=QCONST32(1,15))
 139       {
 140          a = SHR32(a,4);
 141          b = SHR32(b,4);
 142       }
 143       a = SHL32(a,15)-a;
 144       return DIV32_16(a,b);
 145    }
 146 }
 147 #define SNR_SCALING 256.f
 148 #define SNR_SCALING_1 0.0039062f
 149 #define SNR_SHIFT 8
 150
 151 #define FRAC_SCALING 32767.f
 152 #define FRAC_SCALING_1 3.0518e-05
 153 #define FRAC_SHIFT 1
 154
 155 #define EXPIN_SCALING 2048.f
 156 #define EXPIN_SCALING_1 0.00048828f
 157 #define EXPIN_SHIFT 11
 158 #define EXPOUT_SCALING_1 1.5259e-05
 159
 160 #define NOISE_SHIFT 7
 161
 162 #else
 163
 164 #define DIV32_16_Q8(a,b) ((a)/(b))
 165 #define DIV32_16_Q15(a,b) ((a)/(b))
 166 #define SNR_SCALING 1.f
 167 #define SNR_SCALING_1 1.f
 168 #define SNR_SHIFT 0
 169 #define FRAC_SCALING 1.f
 170 #define FRAC_SCALING_1 1.f
 171 #define FRAC_SHIFT 0
 172 #define NOISE_SHIFT 0
 173
 174 #define EXPIN_SCALING 1.f
 175 #define EXPIN_SCALING_1 1.f
 176 #define EXPOUT_SCALING_1 1.f
 177
 178 #endif
 179
 180 /** Speex pre-processor state. */
 181 struct SpeexPreprocessState_ {
 182    /* Basic info */
 183    int    frame_size;        /**< Number of samples processed each time */
 184    int    ps_size;           /**< Number of points in the power spectrum */
 185    int    sampling_rate;     /**< Sampling rate of the input/output */
 186    int    nbands;
 187    FilterBank *bank;
 188
 189    /* Parameters */
 190    int    denoise_enabled;
 191    int    vad_enabled;
 192    int    dereverb_enabled;
 193    spx_word16_t  reverb_decay;
 194    spx_word16_t  reverb_level;
 195    spx_word16_t speech_prob_start;
 196    spx_word16_t speech_prob_continue;
 197    int    noise_suppress;
 198    int    echo_suppress;
 199    int    echo_suppress_active;
 200    SpeexEchoState *echo_state;
 201
 202    /* DSP-related arrays */
 203    spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
 204    spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
 205    spx_word32_t *ps;         /**< Current power spectrum */
 206    spx_word16_t *gain2;      /**< Adjusted gains */
 207    spx_word16_t *gain_floor; /**< Minimum gain allowed */
 208    spx_word16_t *window;     /**< Analysis/Synthesis window */
 209    spx_word32_t *noise;      /**< Noise estimate */
 210    spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
 211    spx_word32_t *old_ps;     /**< Power spectrum for last frame */
 212    spx_word16_t *gain;       /**< Ephraim Malah gain */
 213    spx_word16_t *prior;      /**< A-priori SNR */
 214    spx_word16_t *post;       /**< A-posteriori SNR */
 215
 216    spx_word32_t *S;          /**< Smoothed power spectrum */
 217    spx_word32_t *Smin;       /**< See Cohen paper */
 218    spx_word32_t *Stmp;       /**< See Cohen paper */
 219    int *update_prob;         /**< Probability of speech presence for noise update */
 220
 221    spx_word16_t *zeta;       /**< Smoothed a priori SNR */
 222    spx_word32_t *echo_noise;
 223    spx_word32_t *residual_echo;
 224
 225    /* Misc */
 226    spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
 227    spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
 228
 229    /* AGC stuff, only for floating point for now */
 230 #ifndef FIXED_POINT
 231    int    agc_enabled;
 232    float  agc_level;
 233    float  loudness_accum;
 234    float *loudness_weight;   /**< Perceptual loudness curve */
 235    float  loudness;          /**< Loudness estimate */
 236    float  agc_gain;          /**< Current AGC gain */
 237    int    nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */
 238    float  max_gain;          /**< Maximum gain allowed */
 239    float  max_increase_step; /**< Maximum increase in gain from one frame to another */
 240    float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
 241    float  prev_loudness;     /**< Loudness of previous frame */
 242    float  init_max;          /**< Current gain limit during initialisation */
 243 #endif
 244    int    nb_adapt;          /**< Number of frames used for adaptation so far */
 245    int    was_speech;
 246    int    min_count;         /**< Number of frames processed so far */
 247    void  *fft_lookup;        /**< Lookup table for the FFT */
 248 #ifdef FIXED_POINT
 249    int    frame_shift;
 250 #endif
 251 };
 252
 253
 254 static void conj_window(spx_word16_t *w, int len)
 255 {
 256    int i;
 257    for (i=0;i<len;i++)
 258    {
 259       spx_word16_t tmp;
 260 #ifdef FIXED_POINT
 261       spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
 262 #else
 263       spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
 264 #endif
 265       int inv=0;
 266       if (x<QCONST16(1.f,13))
 267       {
 268       } else if (x<QCONST16(2.f,13))
 269       {
 270          x=QCONST16(2.f,13)-x;
 271          inv=1;
 272       } else if (x<QCONST16(3.f,13))
 273       {
 274          x=x-QCONST16(2.f,13);
 275          inv=1;
 276       } else {
 277          x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
 278       }
 279       x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
 280       tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
 281       if (inv)
 282          tmp=SUB16(Q15_ONE,tmp);
 283       w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
 284    }
 285 }
 286
 287
 288 #ifdef FIXED_POINT
 289 /* This function approximates the gain function
 290    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
 291    which multiplied by xi/(1+xi) is the optimal gain
 292    in the loudness domain ( sqrt[amplitude] )
 293    Input in Q11 format, output in Q15
 294 */
 295 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
 296 {
 297    int ind;
 298    spx_word16_t frac;
 299    /* Q13 table */
 300    static const spx_word16_t table[21] = {
 301        6730,  8357,  9868, 11267, 12563, 13770, 14898,
 302       15959, 16961, 17911, 18816, 19682, 20512, 21311,
 303       22082, 22827, 23549, 24250, 24931, 25594, 26241};
 304       ind = SHR32(xx,10);
 305       if (ind<0)
 306          return Q15_ONE;
 307       if (ind>19)
 308          return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
 309       frac = SHL32(xx-SHL32(ind,10),5);
 310       return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
 311 }
 312
 313 static inline spx_word16_t qcurve(spx_word16_t x)
 314 {
 315    x = MAX16(x, 1);
 316    return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
 317 }
 318
 319 /* Compute the gain floor based on different floors for the background noise and residual echo */
 320 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
 321 {
 322    int i;
 323
 324    if (noise_suppress > effective_echo_suppress)
 325    {
 326       spx_word16_t noise_gain, gain_ratio;
 327       noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
 328       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
 329
 330       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
 331       for (i=0;i<len;i++)
 332          gain_floor[i] = MULT16_16_Q15(noise_gain,
 333                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
 334                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
 335    } else {
 336       spx_word16_t echo_gain, gain_ratio;
 337       echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
 338       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
 339
 340       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
 341       for (i=0;i<len;i++)
 342          gain_floor[i] = MULT16_16_Q15(echo_gain,
 343                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
 344                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
 345    }
 346 }
 347
 348 #else
 349 /* This function approximates the gain function
 350    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
 351    which multiplied by xi/(1+xi) is the optimal gain
 352    in the loudness domain ( sqrt[amplitude] )
 353 */
 354 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
 355 {
 356    int ind;
 357    float integer, frac;
 358    float x;
 359    static const float table[21] = {
 360       0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
 361       1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
 362       2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
 363       x = EXPIN_SCALING_1*xx;
 364       integer = floor(2*x);
 365       ind = (int)integer;
 366       if (ind<0)
 367          return FRAC_SCALING;
 368       if (ind>19)
 369          return FRAC_SCALING*(1+.1296/x);
 370       frac = 2*x-integer;
 371       return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
 372 }
 373
 374 static inline spx_word16_t qcurve(spx_word16_t x)
 375 {
 376    return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
 377 }
 378
 379 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
 380 {
 381    int i;
 382    float echo_floor;
 383    float noise_floor;
 384
 385    noise_floor = exp(.2302585f*noise_suppress);
 386    echo_floor = exp(.2302585f*effective_echo_suppress);
 387
 388    /* Compute the gain floor based on different floors for the background noise and residual echo */
 389    for (i=0;i<len;i++)
 390       gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
 391 }
 392
 393 #endif
 394 SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
 395 {
 396    int i;
 397    int N, N3, N4, M;
 398
 399    SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
 400    st->frame_size = frame_size;
 401
 402    /* Round ps_size down to the nearest power of two */
 403 #if 0
 404    i=1;
 405    st->ps_size = st->frame_size;
 406    while(1)
 407    {
 408       if (st->ps_size & ~i)
 409       {
 410          st->ps_size &= ~i;
 411          i<<=1;
 412       } else {
 413          break;
 414       }
 415    }
 416
 417
 418    if (st->ps_size < 3*st->frame_size/4)
 419       st->ps_size = st->ps_size * 3 / 2;
 420 #else
 421    st->ps_size = st->frame_size;
 422 #endif
 423
 424    N = st->ps_size;
 425    N3 = 2*N - st->frame_size;
 426    N4 = st->frame_size - N3;
 427
 428    st->sampling_rate = sampling_rate;
 429    st->denoise_enabled = 1;
 430    st->vad_enabled = 0;
 431    st->dereverb_enabled = 0;
 432    st->reverb_decay = 0;
 433    st->reverb_level = 0;
 434    st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
 435    st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
 436    st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
 437
 438    st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
 439    st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
 440
 441    st->echo_state = NULL;
 442
 443    st->nbands = NB_BANDS;
 444    M = st->nbands;
 445    st->bank = filterbank_new(M, sampling_rate, N, 1);
 446
 447    st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
 448    st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
 449    st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
 450
 451    st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 452    st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 453    st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 454    st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 455    st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 456    st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
 457    st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 458    st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 459    st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 460    st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 461    st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 462    st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
 463
 464    st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
 465    st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
 466    st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
 467    st->update_prob = (int*)speex_alloc(N*sizeof(int));
 468
 469    st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
 470    st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
 471
 472    conj_window(st->window, 2*N3);
 473    for (i=2*N3;i<2*st->ps_size;i++)
 474       st->window[i]=Q15_ONE;
 475
 476    if (N4>0)
 477    {
 478       for (i=N3-1;i>=0;i--)
 479       {
 480          st->window[i+N3+N4]=st->window[i+N3];
 481          st->window[i+N3]=1;
 482       }
 483    }
 484    for (i=0;i<N+M;i++)
 485    {
 486       st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
 487       st->reverb_estimate[i]=0;
 488       st->old_ps[i]=1;
 489       st->gain[i]=Q15_ONE;
 490       st->post[i]=SHL16(1, SNR_SHIFT);
 491       st->prior[i]=SHL16(1, SNR_SHIFT);
 492    }
 493
 494    for (i=0;i<N;i++)
 495       st->update_prob[i] = 1;
 496    for (i=0;i<N3;i++)
 497    {
 498       st->inbuf[i]=0;
 499       st->outbuf[i]=0;
 500    }
 501 #ifndef FIXED_POINT
 502    st->agc_enabled = 0;
 503    st->agc_level = 8000;
 504    st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
 505    for (i=0;i<N;i++)
 506    {
 507       float ff=((float)i)*.5*sampling_rate/((float)N);
 508       /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
 509       st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
 510       if (st->loudness_weight[i]<.01f)
 511          st->loudness_weight[i]=.01f;
 512       st->loudness_weight[i] *= st->loudness_weight[i];
 513    }
 514    /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
 515    st->loudness = 1e-15;
 516    st->agc_gain = 1;
 517    st->nb_loudness_adapt = 0;
 518    st->max_gain = 30;
 519    st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
 520    st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
 521    st->prev_loudness = 1;
 522    st->init_max = 1;
 523 #endif
 524    st->was_speech = 0;
 525
 526    st->fft_lookup = spx_fft_init(2*N);
 527
 528    st->nb_adapt=0;
 529    st->min_count=0;
 530    return st;
 531 }
 532
 533 void speex_preprocess_state_destroy(SpeexPreprocessState *st)
 534 {
 535    speex_free(st->frame);
 536    speex_free(st->ft);
 537    speex_free(st->ps);
 538    speex_free(st->gain2);
 539    speex_free(st->gain_floor);
 540    speex_free(st->window);
 541    speex_free(st->noise);
 542    speex_free(st->reverb_estimate);
 543    speex_free(st->old_ps);
 544    speex_free(st->gain);
 545    speex_free(st->prior);
 546    speex_free(st->post);
 547 #ifndef FIXED_POINT
 548    speex_free(st->loudness_weight);
 549 #endif
 550    speex_free(st->echo_noise);
 551    speex_free(st->residual_echo);
 552
 553    speex_free(st->S);
 554    speex_free(st->Smin);
 555    speex_free(st->Stmp);
 556    speex_free(st->update_prob);
 557    speex_free(st->zeta);
 558
 559    speex_free(st->inbuf);
 560    speex_free(st->outbuf);
 561
 562    spx_fft_destroy(st->fft_lookup);
 563    filterbank_destroy(st->bank);
 564    speex_free(st);
 565 }
 566
 567 /* FIXME: The AGC doesn't work yet with fixed-point*/
 568 #ifndef FIXED_POINT
 569 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
 570 {
 571    int i;
 572    int N = st->ps_size;
 573    float target_gain;
 574    float loudness=1.f;
 575    float rate;
 576
 577    for (i=2;i<N;i++)
 578    {
 579       loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
 580    }
 581    loudness=sqrt(loudness);
 582       /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
 583    loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
 584    if (Pframe>.3f)
 585    {
 586       st->nb_loudness_adapt++;
 587       /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
 588       rate = .03*Pframe*Pframe;
 589       st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
 590       st->loudness_accum = (1-rate)*st->loudness_accum + rate;
 591       if (st->init_max < st->max_gain && st->nb_adapt > 20)
 592          st->init_max *= 1.f + .1f*Pframe*Pframe;
 593    }
 594    /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
 595
 596    target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
 597
 598    if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
 599    {
 600       if (target_gain > st->max_increase_step*st->agc_gain)
 601          target_gain = st->max_increase_step*st->agc_gain;
 602       if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
 603          target_gain = st->max_decrease_step*st->agc_gain;
 604       if (target_gain > st->max_gain)
 605          target_gain = st->max_gain;
 606       if (target_gain > st->init_max)
 607          target_gain = st->init_max;
 608
 609       st->agc_gain = target_gain;
 610    }
 611    /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
 612
 613    for (i=0;i<2*N;i++)
 614       ft[i] *= st->agc_gain;
 615    st->prev_loudness = loudness;
 616 }
 617 #endif
 618
 619 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
 620 {
 621    int i;
 622    int N = st->ps_size;
 623    int N3 = 2*N - st->frame_size;
 624    int N4 = st->frame_size - N3;
 625    spx_word32_t *ps=st->ps;
 626
 627    /* 'Build' input frame */
 628    for (i=0;i<N3;i++)
 629       st->frame[i]=st->inbuf[i];
 630    for (i=0;i<st->frame_size;i++)
 631       st->frame[N3+i]=x[i];
 632
 633    /* Update inbuf */
 634    for (i=0;i<N3;i++)
 635       st->inbuf[i]=x[N4+i];
 636
 637    /* Windowing */
 638    for (i=0;i<2*N;i++)
 639       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
 640
 641 #ifdef FIXED_POINT
 642    {
 643       spx_word16_t max_val=0;
 644       for (i=0;i<2*N;i++)
 645          max_val = MAX16(max_val, ABS16(st->frame[i]));
 646       st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
 647       for (i=0;i<2*N;i++)
 648          st->frame[i] = SHL16(st->frame[i], st->frame_shift);
 649    }
 650 #endif
 651
 652    /* Perform FFT */
 653    spx_fft(st->fft_lookup, st->frame, st->ft);
 654
 655    /* Power spectrum */
 656    ps[0]=MULT16_16(st->ft[0],st->ft[0]);
 657    for (i=1;i<N;i++)
 658       ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
 659    for (i=0;i<N;i++)
 660       st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
 661
 662    filterbank_compute_bank32(st->bank, ps, ps+N);
 663 }
 664
 665 static void update_noise_prob(SpeexPreprocessState *st)
 666 {
 667    int i;
 668    int min_range;
 669    int N = st->ps_size;
 670
 671    for (i=1;i<N-1;i++)
 672       st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
 673                       + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
 674    st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
 675    st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
 676
 677    if (st->nb_adapt==1)
 678    {
 679       for (i=0;i<N;i++)
 680          st->Smin[i] = st->Stmp[i] = 0;
 681    }
 682
 683    if (st->nb_adapt < 100)
 684       min_range = 15;
 685    else if (st->nb_adapt < 1000)
 686       min_range = 50;
 687    else if (st->nb_adapt < 10000)
 688       min_range = 150;
 689    else
 690       min_range = 300;
 691    if (st->min_count > min_range)
 692    {
 693       st->min_count = 0;
 694       for (i=0;i<N;i++)
 695       {
 696          st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
 697          st->Stmp[i] = st->S[i];
 698       }
 699    } else {
 700       for (i=0;i<N;i++)
 701       {
 702          st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
 703          st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
 704       }
 705    }
 706    for (i=0;i<N;i++)
 707    {
 708       if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20)))
 709          st->update_prob[i] = 1;
 710       else
 711          st->update_prob[i] = 0;
 712       /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
 713       /*fprintf (stderr, "%f ", st->update_prob[i]);*/
 714    }
 715
 716 }
 717
 718 #define NOISE_OVERCOMPENS 1.
 719
 720 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
 721
 722 int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
 723 {
 724    return speex_preprocess_run(st, x);
 725 }
 726
 727 int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
 728 {
 729    int i;
 730    int M;
 731    int N = st->ps_size;
 732    int N3 = 2*N - st->frame_size;
 733    int N4 = st->frame_size - N3;
 734    spx_word32_t *ps=st->ps;
 735    spx_word32_t Zframe;
 736    spx_word16_t Pframe;
 737    spx_word16_t beta, beta_1;
 738    spx_word16_t effective_echo_suppress;
 739
 740    st->nb_adapt++;
 741    if (st->nb_adapt>20000)
 742       st->nb_adapt = 20000;
 743    st->min_count++;
 744
 745    beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
 746    beta_1 = Q15_ONE-beta;
 747    M = st->nbands;
 748    /* Deal with residual echo if provided */
 749    if (st->echo_state)
 750    {
 751       speex_echo_get_residual(st->echo_state, st->residual_echo, N);
 752 #ifndef FIXED_POINT
 753       /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
 754       if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
 755       {
 756          for (i=0;i<N;i++)
 757             st->residual_echo[i] = 0;
 758       }
 759 #endif
 760       for (i=0;i<N;i++)
 761          st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
 762       filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
 763    } else {
 764       for (i=0;i<N+M;i++)
 765          st->echo_noise[i] = 0;
 766    }
 767    preprocess_analysis(st, x);
 768
 769    update_noise_prob(st);
 770
 771    /* Noise estimation always updated for the 10 first frames */
 772    /*if (st->nb_adapt<10)
 773    {
 774       for (i=1;i<N-1;i++)
 775          st->update_prob[i] = 0;
 776    }
 777    */
 778
 779    /* Update the noise estimate for the frequencies where it can be */
 780    for (i=0;i<N;i++)
 781    {
 782       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
 783          st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
 784    }
 785    filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
 786
 787    /* Special case for first frame */
 788    if (st->nb_adapt==1)
 789       for (i=0;i<N+M;i++)
 790          st->old_ps[i] = ps[i];
 791
 792    /* Compute a posteriori SNR */
 793    for (i=0;i<N+M;i++)
 794    {
 795       spx_word16_t gamma;
 796
 797       /* Total noise estimate including residual echo and reverberation */
 798       spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
 799
 800       /* A posteriori SNR = ps/noise - 1*/
 801       st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
 802       st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
 803
 804       /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
 805       gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
 806
 807       /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
 808       st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
 809       st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
 810    }
 811
 812    /*print_vec(st->post, N+M, "");*/
 813
 814    /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
 815    st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
 816    for (i=1;i<N-1;i++)
 817       st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
 818                            MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
 819    for (i=N-1;i<N+M;i++)
 820       st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
 821
 822    /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
 823    Zframe = 0;
 824    for (i=N;i<N+M;i++)
 825       Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
 826    Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
 827
 828    effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
 829
 830    compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
 831
 832    /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
 833       Technically this is actually wrong because the EM gaim assumes a slightly different probability
 834       distribution */
 835    for (i=N;i<N+M;i++)
 836    {
 837       /* See EM and Cohen papers*/
 838       spx_word32_t theta;
 839       /* Gain from hypergeometric function */
 840       spx_word32_t MM;
 841       /* Weiner filter gain */
 842       spx_word16_t prior_ratio;
 843       /* a priority probability of speech presence based on Bark sub-band alone */
 844       spx_word16_t P1;
 845       /* Speech absence a priori probability (considering sub-band and frame) */
 846       spx_word16_t q;
 847 #ifdef FIXED_POINT
 848       spx_word16_t tmp;
 849 #endif
 850
 851       prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
 852       theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
 853
 854       MM = hypergeom_gain(theta);
 855       /* Gain with bound */
 856       st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
 857       /* Save old Bark power spectrum */
 858       st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
 859
 860       P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
 861       q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
 862 #ifdef FIXED_POINT
 863       theta = MIN32(theta, EXTEND32(32767));
 864 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
 865       tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
 866 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
 867       st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
 868 #else
 869       st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
 870 #endif
 871    }
 872    /* Convert the EM gains and speech prob to linear frequency */
 873    filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
 874    filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
 875
 876    /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
 877    if (1)
 878    {
 879       filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
 880
 881       /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
 882       for (i=0;i<N;i++)
 883       {
 884          spx_word32_t MM;
 885          spx_word32_t theta;
 886          spx_word16_t prior_ratio;
 887          spx_word16_t tmp;
 888          spx_word16_t p;
 889          spx_word16_t g;
 890
 891          /* Wiener filter gain */
 892          prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
 893          theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
 894
 895          /* Optimal estimator for loudness domain */
 896          MM = hypergeom_gain(theta);
 897          /* EM gain with bound */
 898          g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
 899          /* Interpolated speech probability of presence */
 900          p = st->gain2[i];
 901
 902          /* Constrain the gain to be close to the Bark scale gain */
 903          if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
 904             g = MULT16_16(3,st->gain[i]);
 905          st->gain[i] = g;
 906
 907          /* Save old power spectrum */
 908          st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
 909
 910          /* Apply gain floor */
 911          if (st->gain[i] < st->gain_floor[i])
 912             st->gain[i] = st->gain_floor[i];
 913
 914          /* Exponential decay model for reverberation (unused) */
 915          /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
 916
 917          /* Take into account speech probability of presence (loudness domain MMSE estimator) */
 918          /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
 919          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
 920          st->gain2[i]=SQR16_Q15(tmp);
 921
 922          /* Use this if you want a log-domain MMSE estimator instead */
 923          /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
 924       }
 925    } else {
 926       for (i=N;i<N+M;i++)
 927       {
 928          spx_word16_t tmp;
 929          spx_word16_t p = st->gain2[i];
 930          st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
 931          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
 932          st->gain2[i]=SQR16_Q15(tmp);
 933       }
 934       filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
 935    }
 936
 937    /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
 938    if (!st->denoise_enabled)
 939    {
 940       for (i=0;i<N+M;i++)
 941          st->gain2[i]=Q15_ONE;
 942    }
 943
 944    /* Apply computed gain */
 945    for (i=1;i<N;i++)
 946    {
 947       st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
 948       st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
 949    }
 950    st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
 951    st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
 952
 953    /*FIXME: This *will* not work for fixed-point */
 954 #ifndef FIXED_POINT
 955    if (st->agc_enabled)
 956       speex_compute_agc(st, Pframe, st->ft);
 957 #endif
 958
 959    /* Inverse FFT with 1/N scaling */
 960    spx_ifft(st->fft_lookup, st->ft, st->frame);
 961    /* Scale back to original (lower) amplitude */
 962    for (i=0;i<2*N;i++)
 963       st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
 964
 965    /*FIXME: This *will* not work for fixed-point */
 966 #ifndef FIXED_POINT
 967    if (st->agc_enabled)
 968    {
 969       float max_sample=0;
 970       for (i=0;i<2*N;i++)
 971          if (fabs(st->frame[i])>max_sample)
 972             max_sample = fabs(st->frame[i]);
 973       if (max_sample>28000.f)
 974       {
 975          float damp = 28000.f/max_sample;
 976          for (i=0;i<2*N;i++)
 977             st->frame[i] *= damp;
 978       }
 979    }
 980 #endif
 981
 982    /* Synthesis window (for WOLA) */
 983    for (i=0;i<2*N;i++)
 984       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
 985
 986    /* Perform overlap and add */
 987    for (i=0;i<N3;i++)
 988       x[i] = st->outbuf[i] + st->frame[i];
 989    for (i=0;i<N4;i++)
 990       x[N3+i] = st->frame[N3+i];
 991
 992    /* Update outbuf */
 993    for (i=0;i<N3;i++)
 994       st->outbuf[i] = st->frame[st->frame_size+i];
 995
 996    /* FIXME: This VAD is a kludge */
 997    if (st->vad_enabled)
 998    {
 999       if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue))
1000       {
1001          st->was_speech=1;
1002          return 1;
1003       } else
1004       {
1005          st->was_speech=0;
1006          return 0;
1007       }
1008    } else {
1009       return 1;
1010    }
1011 }
1012
1013 void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1014 {
1015    int i;
1016    int N = st->ps_size;
1017    int N3 = 2*N - st->frame_size;
1018    int M;
1019    spx_word32_t *ps=st->ps;
1020
1021    M = st->nbands;
1022    st->min_count++;
1023
1024    preprocess_analysis(st, x);
1025
1026    update_noise_prob(st);
1027
1028    for (i=1;i<N-1;i++)
1029    {
1030       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1031       {
1032          st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1033       }
1034    }
1035
1036    for (i=0;i<N3;i++)
1037       st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1038
1039    /* Save old power spectrum */
1040    for (i=0;i<N+M;i++)
1041       st->old_ps[i] = ps[i];
1042
1043    for (i=0;i<N;i++)
1044       st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1045 }
1046
1047
1048 int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1049 {
1050    int i;
1051    SpeexPreprocessState *st;
1052    st=(SpeexPreprocessState*)state;
1053    switch(request)
1054    {
1055    case SPEEX_PREPROCESS_SET_DENOISE:
1056       st->denoise_enabled = (*(spx_int32_t*)ptr);
1057       break;
1058    case SPEEX_PREPROCESS_GET_DENOISE:
1059       (*(spx_int32_t*)ptr) = st->denoise_enabled;
1060       break;
1061 #ifndef FIXED_POINT
1062    case SPEEX_PREPROCESS_SET_AGC:
1063       st->agc_enabled = (*(spx_int32_t*)ptr);
1064       break;
1065    case SPEEX_PREPROCESS_GET_AGC:
1066       (*(spx_int32_t*)ptr) = st->agc_enabled;
1067       break;
1068 #ifndef DISABLE_FLOAT_API
1069    case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1070       st->agc_level = (*(float*)ptr);
1071       if (st->agc_level<1)
1072          st->agc_level=1;
1073       if (st->agc_level>32768)
1074          st->agc_level=32768;
1075       break;
1076    case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1077       (*(float*)ptr) = st->agc_level;
1078       break;
1079 #endif /* #ifndef DISABLE_FLOAT_API */
1080    case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1081       st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1082       break;
1083    case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1084       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1085       break;
1086    case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1087       st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1088       break;
1089    case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1090       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1091       break;
1092    case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1093       st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1094       break;
1095    case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1096       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1097       break;
1098 #endif
1099    case SPEEX_PREPROCESS_SET_VAD:
1100       speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1101       st->vad_enabled = (*(spx_int32_t*)ptr);
1102       break;
1103    case SPEEX_PREPROCESS_GET_VAD:
1104       (*(spx_int32_t*)ptr) = st->vad_enabled;
1105       break;
1106
1107    case SPEEX_PREPROCESS_SET_DEREVERB:
1108       st->dereverb_enabled = (*(spx_int32_t*)ptr);
1109       for (i=0;i<st->ps_size;i++)
1110          st->reverb_estimate[i]=0;
1111       break;
1112    case SPEEX_PREPROCESS_GET_DEREVERB:
1113       (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1114       break;
1115
1116    case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1117       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1118       /*st->reverb_level = (*(float*)ptr);*/
1119       break;
1120    case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1121       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1122       /*(*(float*)ptr) = st->reverb_level;*/
1123       break;
1124
1125    case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1126       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1127       /*st->reverb_decay = (*(float*)ptr);*/
1128       break;
1129    case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1130       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1131       /*(*(float*)ptr) = st->reverb_decay;*/
1132       break;
1133
1134    case SPEEX_PREPROCESS_SET_PROB_START:
1135       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1136       st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1137       break;
1138    case SPEEX_PREPROCESS_GET_PROB_START:
1139       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1140       break;
1141
1142    case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1143       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1144       st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1145       break;
1146    case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1147       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1148       break;
1149
1150    case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1151       st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1152       break;
1153    case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1154       (*(spx_int32_t*)ptr) = st->noise_suppress;
1155       break;
1156    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1157       st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1158       break;
1159    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1160       (*(spx_int32_t*)ptr) = st->echo_suppress;
1161       break;
1162    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1163       st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1164       break;
1165    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1166       (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1167       break;
1168    case SPEEX_PREPROCESS_SET_ECHO_STATE:
1169       st->echo_state = (SpeexEchoState*)ptr;
1170       break;
1171    case SPEEX_PREPROCESS_GET_ECHO_STATE:
1172       ptr = (void*)st->echo_state;
1173       break;
1174 #ifndef FIXED_POINT
1175    case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1176       (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1177       break;
1178 #endif
1179
1180    default:
1181       speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1182       return -1;
1183    }
1184    return 0;
1185 }