apps/codecs/libspeex/mdf.c

   1 /* Copyright (C) 2003-2006 Jean-Marc Valin
   2
   3    File: mdf.c
   4    Echo canceller based on the MDF algorithm (see below)
   5
   6    Redistribution and use in source and binary forms, with or without
   7    modification, are permitted provided that the following conditions are
   8    met:
   9
  10    1. Redistributions of source code must retain the above copyright notice,
  11    this list of conditions and the following disclaimer.
  12
  13    2. Redistributions in binary form must reproduce the above copyright
  14    notice, this list of conditions and the following disclaimer in the
  15    documentation and/or other materials provided with the distribution.
  16
  17    3. The name of the author may not be used to endorse or promote products
  18    derived from this software without specific prior written permission.
  19
  20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30    POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33 /*
  34    The echo canceller is based on the MDF algorithm described in:
  35
  36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
  37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
  38    February 1990.
  39
  40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
  41    double-talk is achieved using a variable learning rate as described in:
  42
  43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
  44    Cancellation With Double-Talk. IEEE Transactions on Audio,
  45    Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
  46    http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
  47
  48    There is no explicit double-talk detection, but a continuous variation
  49    in the learning rate based on residual echo, double-talk and background
  50    noise.
  51
  52    About the fixed-point version:
  53    All the signals are represented with 16-bit words. The filter weights
  54    are represented with 32-bit words, but only the top 16 bits are used
  55    in most cases. The lower 16 bits are completely unreliable (due to the
  56    fact that the update is done only on the top bits), but help in the
  57    adaptation -- probably by removing a "threshold effect" due to
  58    quantization (rounding going to zero) when the gradient is small.
  59
  60    Another kludge that seems to work good: when performing the weight
  61    update, we only move half the way toward the "goal" this seems to
  62    reduce the effect of quantization noise in the update phase. This
  63    can be seen as applying a gradient descent on a "soft constraint"
  64    instead of having a hard constraint.
  65
  66 */
  67
  68 #ifdef HAVE_CONFIG_H
  69 #include "config-speex.h"
  70 #endif
  71
  72 #include "arch.h"
  73 #include "speex/speex_echo.h"
  74 #include "fftwrap.h"
  75 #include "pseudofloat.h"
  76 #include "math_approx.h"
  77 #include "os_support.h"
  78
  79 #ifndef M_PI
  80 #define M_PI 3.14159265358979323846
  81 #endif
  82
  83 #ifdef FIXED_POINT
  84 #define WEIGHT_SHIFT 11
  85 #define NORMALIZE_SCALEDOWN 5
  86 #define NORMALIZE_SCALEUP 3
  87 #else
  88 #define WEIGHT_SHIFT 0
  89 #endif
  90
  91 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
  92    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
  93 #define TWO_PATH
  94
  95 #ifdef FIXED_POINT
  96 static const spx_float_t MIN_LEAK = {20972, -22};
  97
  98 /* Constants for the two-path filter */
  99 static const spx_float_t VAR1_SMOOTH = {23593, -16};
 100 static const spx_float_t VAR2_SMOOTH = {23675, -15};
 101 static const spx_float_t VAR1_UPDATE = {16384, -15};
 102 static const spx_float_t VAR2_UPDATE = {16384, -16};
 103 static const spx_float_t VAR_BACKTRACK = {16384, -12};
 104 #define TOP16(x) ((x)>>16)
 105
 106 #else
 107
 108 static const spx_float_t MIN_LEAK = .005f;
 109
 110 /* Constants for the two-path filter */
 111 static const spx_float_t VAR1_SMOOTH = .36f;
 112 static const spx_float_t VAR2_SMOOTH = .7225f;
 113 static const spx_float_t VAR1_UPDATE = .5f;
 114 static const spx_float_t VAR2_UPDATE = .25f;
 115 static const spx_float_t VAR_BACKTRACK = 4.f;
 116 #define TOP16(x) (x)
 117 #endif
 118
 119
 120 #define PLAYBACK_DELAY 2
 121
 122 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
 123
 124
 125 /** Speex echo cancellation state. */
 126 struct SpeexEchoState_ {
 127    int frame_size;           /**< Number of samples processed each time */
 128    int window_size;
 129    int M;
 130    int cancel_count;
 131    int adapted;
 132    int saturated;
 133    int screwed_up;
 134    spx_int32_t sampling_rate;
 135    spx_word16_t spec_average;
 136    spx_word16_t beta0;
 137    spx_word16_t beta_max;
 138    spx_word32_t sum_adapt;
 139    spx_word16_t leak_estimate;
 140
 141    spx_word16_t *e;      /* scratch */
 142    spx_word16_t *x;      /* Far-end input buffer (2N) */
 143    spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
 144    spx_word16_t *input;  /* scratch */
 145    spx_word16_t *y;      /* scratch */
 146    spx_word16_t *last_y;
 147    spx_word16_t *Y;      /* scratch */
 148    spx_word16_t *E;
 149    spx_word32_t *PHI;    /* scratch */
 150    spx_word32_t *W;      /* (Background) filter weights */
 151 #ifdef TWO_PATH
 152    spx_word16_t *foreground; /* Foreground filter weights */
 153    spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
 154    spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
 155    spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
 156    spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
 157 #endif
 158    spx_word32_t *power;  /* Power of the far-end signal */
 159    spx_float_t  *power_1;/* Inverse power of far-end */
 160    spx_word16_t *wtmp;   /* scratch */
 161 #ifdef FIXED_POINT
 162    spx_word16_t *wtmp2;  /* scratch */
 163 #endif
 164    spx_word32_t *Rf;     /* scratch */
 165    spx_word32_t *Yf;     /* scratch */
 166    spx_word32_t *Xf;     /* scratch */
 167    spx_word32_t *Eh;
 168    spx_word32_t *Yh;
 169    spx_float_t   Pey;
 170    spx_float_t   Pyy;
 171    spx_word16_t *window;
 172    spx_word16_t *prop;
 173    void *fft_table;
 174    spx_word16_t memX, memD, memE;
 175    spx_word16_t preemph;
 176    spx_word16_t notch_radius;
 177    spx_mem_t notch_mem[2];
 178
 179    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
 180    spx_int16_t *play_buf;
 181    int play_buf_pos;
 182    int play_buf_started;
 183 };
 184
 185 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
 186 {
 187    int i;
 188    spx_word16_t den2;
 189 #ifdef FIXED_POINT
 190    den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
 191 #else
 192    den2 = radius*radius + .7*(1-radius)*(1-radius);
 193 #endif
 194    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
 195    for (i=0;i<len;i++)
 196    {
 197       spx_word16_t vin = in[i];
 198       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
 199 #ifdef FIXED_POINT
 200       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
 201 #else
 202       mem[0] = mem[1] + 2*(-vin + radius*vout);
 203 #endif
 204       mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
 205       out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
 206    }
 207 }
 208
 209 /* This inner product is slightly different from the codec version because of fixed-point */
 210 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 211 {
 212    spx_word32_t sum=0;
 213    len >>= 1;
 214    while(len--)
 215    {
 216       spx_word32_t part=0;
 217       part = MAC16_16(part,*x++,*y++);
 218       part = MAC16_16(part,*x++,*y++);
 219       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
 220       sum = ADD32(sum,SHR32(part,6));
 221    }
 222    return sum;
 223 }
 224
 225 /** Compute power spectrum of a half-complex (packed) vector */
 226 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
 227 {
 228    int i, j;
 229    ps[0]=MULT16_16(X[0],X[0]);
 230    for (i=1,j=1;i<N-1;i+=2,j++)
 231    {
 232       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
 233    }
 234    ps[j]=MULT16_16(X[i],X[i]);
 235 }
 236
 237 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 238 #ifdef FIXED_POINT
 239 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 240 {
 241    int i,j;
 242    spx_word32_t tmp1=0,tmp2=0;
 243    for (j=0;j<M;j++)
 244    {
 245       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
 246    }
 247    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
 248    for (i=1;i<N-1;i+=2)
 249    {
 250       tmp1 = tmp2 = 0;
 251       for (j=0;j<M;j++)
 252       {
 253          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
 254          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
 255       }
 256       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
 257       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
 258    }
 259    tmp1 = tmp2 = 0;
 260    for (j=0;j<M;j++)
 261    {
 262       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
 263    }
 264    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
 265 }
 266 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
 267 {
 268    int i,j;
 269    spx_word32_t tmp1=0,tmp2=0;
 270    for (j=0;j<M;j++)
 271    {
 272       tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
 273    }
 274    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
 275    for (i=1;i<N-1;i+=2)
 276    {
 277       tmp1 = tmp2 = 0;
 278       for (j=0;j<M;j++)
 279       {
 280          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
 281          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
 282       }
 283       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
 284       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
 285    }
 286    tmp1 = tmp2 = 0;
 287    for (j=0;j<M;j++)
 288    {
 289       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
 290    }
 291    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
 292 }
 293
 294 #else
 295 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 296 {
 297    int i,j;
 298    for (i=0;i<N;i++)
 299       acc[i] = 0;
 300    for (j=0;j<M;j++)
 301    {
 302       acc[0] += X[0]*Y[0];
 303       for (i=1;i<N-1;i+=2)
 304       {
 305          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
 306          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
 307       }
 308       acc[i] += X[i]*Y[i];
 309       X += N;
 310       Y += N;
 311    }
 312 }
 313 #define spectral_mul_accum16 spectral_mul_accum
 314 #endif
 315
 316 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
 317 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
 318 {
 319    int i, j;
 320    spx_float_t W;
 321    W = FLOAT_AMULT(p, w[0]);
 322    prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
 323    for (i=1,j=1;i<N-1;i+=2,j++)
 324    {
 325       W = FLOAT_AMULT(p, w[j]);
 326       prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
 327       prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
 328    }
 329    W = FLOAT_AMULT(p, w[j]);
 330    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
 331 }
 332
 333 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)
 334 {
 335    int i, j;
 336    spx_word16_t max_sum = 1;
 337    spx_word32_t prop_sum = 1;
 338    for (i=0;i<M;i++)
 339    {
 340       spx_word32_t tmp = 1;
 341       for (j=0;j<N;j++)
 342          tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18)));
 343 #ifdef FIXED_POINT
 344       /* Just a security in case an overflow were to occur */
 345       tmp = MIN32(ABS32(tmp), 536870912);
 346 #endif
 347       prop[i] = spx_sqrt(tmp);
 348       if (prop[i] > max_sum)
 349          max_sum = prop[i];
 350    }
 351    for (i=0;i<M;i++)
 352    {
 353       prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
 354       prop_sum += EXTEND32(prop[i]);
 355    }
 356    for (i=0;i<M;i++)
 357    {
 358       prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
 359       /*printf ("%f ", prop[i]);*/
 360    }
 361    /*printf ("\n");*/
 362 }
 363
 364 #ifdef DUMP_ECHO_CANCEL_DATA
 365 #include <stdio.h>
 366 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
 367
 368 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
 369 {
 370    if (!(rFile && pFile && oFile))
 371    {
 372       speex_fatal("Dump files not open");
 373    }
 374    fwrite(rec, sizeof(spx_int16_t), len, rFile);
 375    fwrite(play, sizeof(spx_int16_t), len, pFile);
 376    fwrite(out, sizeof(spx_int16_t), len, oFile);
 377 }
 378 #endif
 379
 380 /** Creates a new echo canceller state */
 381 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 382 {
 383    int i,N,M;
 384    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
 385
 386 #ifdef DUMP_ECHO_CANCEL_DATA
 387    if (rFile || pFile || oFile)
 388       speex_fatal("Opening dump files twice");
 389    rFile = fopen("aec_rec.sw", "wb");
 390    pFile = fopen("aec_play.sw", "wb");
 391    oFile = fopen("aec_out.sw", "wb");
 392 #endif
 393
 394    st->frame_size = frame_size;
 395    st->window_size = 2*frame_size;
 396    N = st->window_size;
 397    M = st->M = (filter_length+st->frame_size-1)/frame_size;
 398    st->cancel_count=0;
 399    st->sum_adapt = 0;
 400    st->saturated = 0;
 401    st->screwed_up = 0;
 402    /* This is the default sampling rate */
 403    st->sampling_rate = 8000;
 404    st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
 405 #ifdef FIXED_POINT
 406    st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
 407    st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
 408 #else
 409    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
 410    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
 411 #endif
 412    st->leak_estimate = 0;
 413
 414    st->fft_table = spx_fft_init(N);
 415
 416    st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 417    st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 418    st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t));
 419    st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 420    st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 421    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 422    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 423    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 424    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 425    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 426
 427    st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t));
 428    st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 429    st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 430    st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
 431 #ifdef TWO_PATH
 432    st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
 433 #endif
 434    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
 435    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
 436    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
 437    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 438    st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
 439    st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 440 #ifdef FIXED_POINT
 441    st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
 442    for (i=0;i<N>>1;i++)
 443    {
 444       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
 445       st->window[N-i-1] = st->window[i];
 446    }
 447 #else
 448    for (i=0;i<N;i++)
 449       st->window[i] = .5-.5*cos(2*M_PI*i/N);
 450 #endif
 451    for (i=0;i<=st->frame_size;i++)
 452       st->power_1[i] = FLOAT_ONE;
 453    for (i=0;i<N*M;i++)
 454       st->W[i] = 0;
 455    {
 456       spx_word32_t sum = 0;
 457       /* Ratio of ~10 between adaptation rate of first and last block */
 458       spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
 459       st->prop[0] = QCONST16(.7, 15);
 460       sum = EXTEND32(st->prop[0]);
 461       for (i=1;i<M;i++)
 462       {
 463          st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
 464          sum = ADD32(sum, EXTEND32(st->prop[i]));
 465       }
 466       for (i=M-1;i>=0;i--)
 467       {
 468          st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum);
 469       }
 470    }
 471
 472    st->memX=st->memD=st->memE=0;
 473    st->preemph = QCONST16(.9,15);
 474    if (st->sampling_rate<12000)
 475       st->notch_radius = QCONST16(.9, 15);
 476    else if (st->sampling_rate<24000)
 477       st->notch_radius = QCONST16(.982, 15);
 478    else
 479       st->notch_radius = QCONST16(.992, 15);
 480
 481    st->notch_mem[0] = st->notch_mem[1] = 0;
 482    st->adapted = 0;
 483    st->Pey = st->Pyy = FLOAT_ONE;
 484
 485 #ifdef TWO_PATH
 486    st->Davg1 = st->Davg2 = 0;
 487    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 488 #endif
 489
 490    st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
 491    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
 492    st->play_buf_started = 0;
 493
 494    return st;
 495 }
 496
 497 /** Resets echo canceller state */
 498 void speex_echo_state_reset(SpeexEchoState *st)
 499 {
 500    int i, M, N;
 501    st->cancel_count=0;
 502    st->screwed_up = 0;
 503    N = st->window_size;
 504    M = st->M;
 505    for (i=0;i<N*M;i++)
 506       st->W[i] = 0;
 507 #ifdef TWO_PATH
 508    for (i=0;i<N*M;i++)
 509       st->foreground[i] = 0;
 510 #endif
 511    for (i=0;i<N*(M+1);i++)
 512       st->X[i] = 0;
 513    for (i=0;i<=st->frame_size;i++)
 514    {
 515       st->power[i] = 0;
 516       st->power_1[i] = FLOAT_ONE;
 517       st->Eh[i] = 0;
 518       st->Yh[i] = 0;
 519    }
 520    for (i=0;i<st->frame_size;i++)
 521    {
 522       st->last_y[i] = 0;
 523    }
 524    for (i=0;i<N;i++)
 525    {
 526       st->E[i] = 0;
 527       st->x[i] = 0;
 528    }
 529    st->notch_mem[0] = st->notch_mem[1] = 0;
 530    st->memX=st->memD=st->memE=0;
 531
 532    st->saturated = 0;
 533    st->adapted = 0;
 534    st->sum_adapt = 0;
 535    st->Pey = st->Pyy = FLOAT_ONE;
 536 #ifdef TWO_PATH
 537    st->Davg1 = st->Davg2 = 0;
 538    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 539 #endif
 540    for (i=0;i<3*st->frame_size;i++)
 541       st->play_buf[i] = 0;
 542    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
 543    st->play_buf_started = 0;
 544
 545 }
 546
 547 /** Destroys an echo canceller state */
 548 void speex_echo_state_destroy(SpeexEchoState *st)
 549 {
 550    spx_fft_destroy(st->fft_table);
 551
 552    speex_free(st->e);
 553    speex_free(st->x);
 554    speex_free(st->input);
 555    speex_free(st->y);
 556    speex_free(st->last_y);
 557    speex_free(st->Yf);
 558    speex_free(st->Rf);
 559    speex_free(st->Xf);
 560    speex_free(st->Yh);
 561    speex_free(st->Eh);
 562
 563    speex_free(st->X);
 564    speex_free(st->Y);
 565    speex_free(st->E);
 566    speex_free(st->W);
 567 #ifdef TWO_PATH
 568    speex_free(st->foreground);
 569 #endif
 570    speex_free(st->PHI);
 571    speex_free(st->power);
 572    speex_free(st->power_1);
 573    speex_free(st->window);
 574    speex_free(st->prop);
 575    speex_free(st->wtmp);
 576 #ifdef FIXED_POINT
 577    speex_free(st->wtmp2);
 578 #endif
 579    speex_free(st->play_buf);
 580    speex_free(st);
 581
 582 #ifdef DUMP_ECHO_CANCEL_DATA
 583    fclose(rFile);
 584    fclose(pFile);
 585    fclose(oFile);
 586    rFile = pFile = oFile = NULL;
 587 #endif
 588 }
 589
 590 void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
 591 {
 592    int i;
 593    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
 594    st->play_buf_started = 1;
 595    if (st->play_buf_pos>=st->frame_size)
 596    {
 597       speex_echo_cancellation(st, rec, st->play_buf, out);
 598       st->play_buf_pos -= st->frame_size;
 599       for (i=0;i<st->play_buf_pos;i++)
 600          st->play_buf[i] = st->play_buf[i+st->frame_size];
 601    } else {
 602       speex_warning("No playback frame available (your application is buggy and/or got xruns)");
 603       if (st->play_buf_pos!=0)
 604       {
 605          speex_warning("internal playback buffer corruption?");
 606          st->play_buf_pos = 0;
 607       }
 608       for (i=0;i<st->frame_size;i++)
 609          out[i] = rec[i];
 610    }
 611 }
 612
 613 void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
 614 {
 615    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
 616    if (!st->play_buf_started)
 617    {
 618       speex_warning("discarded first playback frame");
 619       return;
 620    }
 621    if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
 622    {
 623       int i;
 624       for (i=0;i<st->frame_size;i++)
 625          st->play_buf[st->play_buf_pos+i] = play[i];
 626       st->play_buf_pos += st->frame_size;
 627       if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
 628       {
 629          speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
 630          for (i=0;i<st->frame_size;i++)
 631             st->play_buf[st->play_buf_pos+i] = play[i];
 632          st->play_buf_pos += st->frame_size;
 633       }
 634    } else {
 635       speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
 636    }
 637 }
 638
 639 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
 640 void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
 641 {
 642    speex_echo_cancellation(st, in, far_end, out);
 643 }
 644
 645 /** Performs echo cancellation on a frame */
 646 void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
 647 {
 648    int i,j;
 649    int N,M;
 650    spx_word32_t Syy,See,Sxx,Sdd, Sff;
 651 #ifdef TWO_PATH
 652    spx_word32_t Dbf;
 653    int update_foreground;
 654 #endif
 655    spx_word32_t Sey;
 656    spx_word16_t ss, ss_1;
 657    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
 658    spx_float_t alpha, alpha_1;
 659    spx_word16_t RER;
 660    spx_word32_t tmp32;
 661
 662    N = st->window_size;
 663    M = st->M;
 664    st->cancel_count++;
 665 #ifdef FIXED_POINT
 666    ss=DIV32_16(11469,M);
 667    ss_1 = SUB16(32767,ss);
 668 #else
 669    ss=.35/M;
 670    ss_1 = 1-ss;
 671 #endif
 672
 673    /* Apply a notch filter to make sure DC doesn't end up causing problems */
 674    filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
 675    /* Copy input data to buffer and apply pre-emphasis */
 676    for (i=0;i<st->frame_size;i++)
 677    {
 678       spx_word32_t tmp32;
 679       tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
 680 #ifdef FIXED_POINT
 681       /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
 682       if (tmp32 > 32767)
 683       {
 684          tmp32 = 32767;
 685          st->saturated = M+1;
 686       }
 687       if (tmp32 < -32767)
 688       {
 689          tmp32 = -32767;
 690          st->saturated = M+1;
 691       }
 692 #endif
 693       st->x[i+st->frame_size] = EXTRACT16(tmp32);
 694       st->memX = far_end[i];
 695
 696       tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
 697 #ifdef FIXED_POINT
 698       if (tmp32 > 32767)
 699       {
 700          tmp32 = 32767;
 701          if (st->saturated == 0)
 702             st->saturated = 1;
 703       }
 704       if (tmp32 < -32767)
 705       {
 706          tmp32 = -32767;
 707          if (st->saturated == 0)
 708             st->saturated = 1;
 709       }
 710 #endif
 711       st->memD = st->input[i];
 712       st->input[i] = tmp32;
 713    }
 714
 715    /* Shift memory: this could be optimized eventually*/
 716    for (j=M-1;j>=0;j--)
 717    {
 718       for (i=0;i<N;i++)
 719          st->X[(j+1)*N+i] = st->X[j*N+i];
 720    }
 721
 722    /* Convert x (far end) to frequency domain */
 723    spx_fft(st->fft_table, st->x, &st->X[0]);
 724    for (i=0;i<N;i++)
 725       st->last_y[i] = st->x[i];
 726    Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
 727    for (i=0;i<st->frame_size;i++)
 728       st->x[i] = st->x[i+st->frame_size];
 729    /* From here on, the top part of x is used as scratch space */
 730
 731 #ifdef TWO_PATH
 732    /* Compute foreground filter */
 733    spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);
 734    spx_ifft(st->fft_table, st->Y, st->e);
 735    for (i=0;i<st->frame_size;i++)
 736       st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]);
 737    Sff = mdf_inner_prod(st->e, st->e, st->frame_size);
 738 #endif
 739
 740    /* Adjust proportional adaption rate */
 741    mdf_adjust_prop (st->W, N, M, st->prop);
 742    /* Compute weight gradient */
 743    if (st->saturated == 0)
 744    {
 745       for (j=M-1;j>=0;j--)
 746       {
 747          weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N);
 748          for (i=0;i<N;i++)
 749             st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]);
 750
 751       }
 752    } else {
 753       st->saturated--;
 754    }
 755
 756    /* Update weight to prevent circular convolution (MDF / AUMDF) */
 757    for (j=0;j<M;j++)
 758    {
 759       /* This is a variant of the Alternatively Updated MDF (AUMDF) */
 760       /* Remove the "if" to make this an MDF filter */
 761       if (j==0 || st->cancel_count%(M-1) == j-1)
 762       {
 763 #ifdef FIXED_POINT
 764          for (i=0;i<N;i++)
 765             st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
 766          spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
 767          for (i=0;i<st->frame_size;i++)
 768          {
 769             st->wtmp[i]=0;
 770          }
 771          for (i=st->frame_size;i<N;i++)
 772          {
 773             st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
 774          }
 775          spx_fft(st->fft_table, st->wtmp, st->wtmp2);
 776          /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
 777          for (i=0;i<N;i++)
 778             st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
 779 #else
 780          spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
 781          for (i=st->frame_size;i<N;i++)
 782          {
 783             st->wtmp[i]=0;
 784          }
 785          spx_fft(st->fft_table, st->wtmp, &st->W[j*N]);
 786 #endif
 787       }
 788    }
 789
 790    /* Compute filter response Y */
 791    spectral_mul_accum(st->X, st->W, st->Y, N, M);
 792    spx_ifft(st->fft_table, st->Y, st->y);
 793
 794 #ifdef TWO_PATH
 795    /* Difference in response, this is used to estimate the variance of our residual power estimate */
 796    for (i=0;i<st->frame_size;i++)
 797       st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]);
 798    Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size);
 799 #endif
 800
 801    for (i=0;i<st->frame_size;i++)
 802       st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
 803    See = mdf_inner_prod(st->e, st->e, st->frame_size);
 804 #ifndef TWO_PATH
 805    Sff = See;
 806 #endif
 807
 808 #ifdef TWO_PATH
 809    /* Logic for updating the foreground filter */
 810
 811    /* For two time windows, compute the mean of the energy difference, as well as the variance */
 812    st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
 813    st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
 814    st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
 815    st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
 816
 817    /* Equivalent float code:
 818    st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
 819    st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
 820    st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
 821    st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
 822    */
 823
 824    update_foreground = 0;
 825    /* Check if we have a statistically significant reduction in the residual echo */
 826    /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
 827    if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
 828       update_foreground = 1;
 829    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
 830       update_foreground = 1;
 831    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
 832       update_foreground = 1;
 833
 834    /* Do we update? */
 835    if (update_foreground)
 836    {
 837       st->Davg1 = st->Davg2 = 0;
 838       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 839       /* Copy background filter to foreground filter */
 840       for (i=0;i<N*M;i++)
 841          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
 842       /* Apply a smooth transition so as to not introduce blocking artifacts */
 843       for (i=0;i<st->frame_size;i++)
 844          st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
 845    } else {
 846       int reset_background=0;
 847       /* Otherwise, check if the background filter is significantly worse */
 848       if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
 849          reset_background = 1;
 850       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
 851          reset_background = 1;
 852       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
 853          reset_background = 1;
 854       if (reset_background)
 855       {
 856          /* Copy foreground filter to background filter */
 857          for (i=0;i<N*M;i++)
 858             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
 859          /* We also need to copy the output so as to get correct adaptation */
 860          for (i=0;i<st->frame_size;i++)
 861             st->y[i+st->frame_size] = st->e[i+st->frame_size];
 862          for (i=0;i<st->frame_size;i++)
 863             st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
 864          See = Sff;
 865          st->Davg1 = st->Davg2 = 0;
 866          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 867       }
 868    }
 869 #endif
 870
 871    /* Compute error signal (for the output with de-emphasis) */
 872    for (i=0;i<st->frame_size;i++)
 873    {
 874       spx_word32_t tmp_out;
 875 #ifdef TWO_PATH
 876       tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size]));
 877 #else
 878       tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size]));
 879 #endif
 880       /* Saturation */
 881       if (tmp_out>32767)
 882          tmp_out = 32767;
 883       else if (tmp_out<-32768)
 884          tmp_out = -32768;
 885       tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
 886       /* This is an arbitrary test for saturation in the microphone signal */
 887       if (in[i] <= -32000 || in[i] >= 32000)
 888       {
 889          tmp_out = 0;
 890          if (st->saturated == 0)
 891             st->saturated = 1;
 892       }
 893       out[i] = (spx_int16_t)tmp_out;
 894       st->memE = tmp_out;
 895    }
 896
 897 #ifdef DUMP_ECHO_CANCEL_DATA
 898    dump_audio(in, far_end, out, st->frame_size);
 899 #endif
 900
 901    /* Compute error signal (filter update version) */
 902    for (i=0;i<st->frame_size;i++)
 903    {
 904       st->e[i+st->frame_size] = st->e[i];
 905       st->e[i] = 0;
 906    }
 907
 908    /* Compute a bunch of correlations */
 909    Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
 910    Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
 911    Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
 912
 913    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
 914
 915    /* Do some sanity check */
 916    if (!(Syy>=0 && Sxx>=0 && See >= 0)
 917 #ifndef FIXED_POINT
 918        || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
 919 #endif
 920       )
 921    {
 922       /* Things have gone really bad */
 923       st->screwed_up += 50;
 924       for (i=0;i<st->frame_size;i++)
 925          out[i] = 0;
 926    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
 927    {
 928       /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
 929       st->screwed_up++;
 930    } else {
 931       /* Everything's fine */
 932       st->screwed_up=0;
 933    }
 934    if (st->screwed_up>=50)
 935    {
 936       speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
 937       speex_echo_state_reset(st);
 938       return;
 939    }
 940
 941    /* Add a small noise floor to make sure not to have problems when dividing */
 942    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
 943
 944    /* Convert error to frequency domain */
 945    spx_fft(st->fft_table, st->e, st->E);
 946    for (i=0;i<st->frame_size;i++)
 947       st->y[i] = 0;
 948    spx_fft(st->fft_table, st->y, st->Y);
 949
 950    /* Compute power spectrum of far end (X), error (E) and filter response (Y) */
 951    power_spectrum(st->E, st->Rf, N);
 952    power_spectrum(st->Y, st->Yf, N);
 953    power_spectrum(st->X, st->Xf, N);
 954
 955    /* Smooth far end energy estimate over time */
 956    for (j=0;j<=st->frame_size;j++)
 957       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
 958
 959    /* Enable this to compute the power based only on the tail (would need to compute more
 960       efficiently to make this really useful */
 961    if (0)
 962    {
 963       float scale2 = .5f/M;
 964       for (j=0;j<=st->frame_size;j++)
 965          st->power[j] = 100;
 966       for (i=0;i<M;i++)
 967       {
 968          power_spectrum(&st->X[i*N], st->Xf, N);
 969          for (j=0;j<=st->frame_size;j++)
 970             st->power[j] += scale2*st->Xf[j];
 971       }
 972    }
 973
 974    /* Compute filtered spectra and (cross-)correlations */
 975    for (j=st->frame_size;j>=0;j--)
 976    {
 977       spx_float_t Eh, Yh;
 978       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
 979       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
 980       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
 981       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
 982 #ifdef FIXED_POINT
 983       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
 984       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
 985 #else
 986       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
 987       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
 988 #endif
 989    }
 990
 991    Pyy = FLOAT_SQRT(Pyy);
 992    Pey = FLOAT_DIVU(Pey,Pyy);
 993
 994    /* Compute correlation updatete rate */
 995    tmp32 = MULT16_32_Q15(st->beta0,Syy);
 996    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
 997       tmp32 = MULT16_32_Q15(st->beta_max,See);
 998    alpha = FLOAT_DIV32(tmp32, See);
 999    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1000    /* Update correlations (recursive average) */
1001    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1002    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1003    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1004       st->Pyy = FLOAT_ONE;
1005    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1006    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1007       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1008    if (FLOAT_GT(st->Pey, st->Pyy))
1009       st->Pey = st->Pyy;
1010    /* leak_estimate is the linear regression result */
1011    st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1012    /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1013    if (st->leak_estimate > 16383)
1014       st->leak_estimate = 32767;
1015    else
1016       st->leak_estimate = SHL16(st->leak_estimate,1);
1017    /*printf ("%f\n", st->leak_estimate);*/
1018
1019    /* Compute Residual to Error Ratio */
1020 #ifdef FIXED_POINT
1021    tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1022    tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1023    /* Check for y in e (lower bound on RER) */
1024    {
1025       spx_float_t bound = PSEUDOFLOAT(Sey);
1026       bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1027       if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1028          tmp32 = See;
1029       else if (tmp32 < FLOAT_EXTRACT32(bound))
1030          tmp32 = FLOAT_EXTRACT32(bound);
1031    }
1032    if (tmp32 > SHR32(See,1))
1033       tmp32 = SHR32(See,1);
1034    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1035 #else
1036    RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1037    /* Check for y in e (lower bound on RER) */
1038    if (RER < Sey*Sey/(1+See*Syy))
1039       RER = Sey*Sey/(1+See*Syy);
1040    if (RER > .5)
1041       RER = .5;
1042 #endif
1043
1044    /* We consider that the filter has had minimal adaptation if the following is true*/
1045    if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1046    {
1047       st->adapted = 1;
1048    }
1049
1050    if (st->adapted)
1051    {
1052       /* Normal learning rate calculation once we're past the minimal adaptation phase */
1053       for (i=0;i<=st->frame_size;i++)
1054       {
1055          spx_word32_t r, e;
1056          /* Compute frequency-domain adaptation mask */
1057          r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1058          e = SHL32(st->Rf[i],3)+1;
1059 #ifdef FIXED_POINT
1060          if (r>SHR32(e,1))
1061             r = SHR32(e,1);
1062 #else
1063          if (r>.5*e)
1064             r = .5*e;
1065 #endif
1066          r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1067          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1068          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1069       }
1070    } else {
1071       /* Temporary adaption rate if filter is not yet adapted enough */
1072       spx_word16_t adapt_rate=0;
1073
1074       if (Sxx > SHR32(MULT16_16(N, 1000),6))
1075       {
1076          tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1077 #ifdef FIXED_POINT
1078          if (tmp32 > SHR32(See,2))
1079             tmp32 = SHR32(See,2);
1080 #else
1081          if (tmp32 > .25*See)
1082             tmp32 = .25*See;
1083 #endif
1084          adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1085       }
1086       for (i=0;i<=st->frame_size;i++)
1087          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1088
1089
1090       /* How much have we adapted so far? */
1091       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1092    }
1093
1094    /* Save residual echo so it can be used by the nonlinear processor */
1095    if (st->adapted)
1096    {
1097       /* If the filter is adapted, take the filtered echo */
1098       for (i=0;i<st->frame_size;i++)
1099          st->last_y[i] = st->last_y[st->frame_size+i];
1100       for (i=0;i<st->frame_size;i++)
1101          st->last_y[st->frame_size+i] = in[i]-out[i];
1102    } else {
1103       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1104       /* moved earlier: for (i=0;i<N;i++)
1105       st->last_y[i] = st->x[i];*/
1106    }
1107
1108 }
1109
1110 /* Compute spectrum of estimated echo for use in an echo post-filter */
1111 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1112 {
1113    int i;
1114    spx_word16_t leak2;
1115    int N;
1116
1117    N = st->window_size;
1118
1119    /* Apply hanning window (should pre-compute it)*/
1120    for (i=0;i<N;i++)
1121       st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1122
1123    /* Compute power spectrum of the echo */
1124    spx_fft(st->fft_table, st->y, st->Y);
1125    power_spectrum(st->Y, residual_echo, N);
1126
1127 #ifdef FIXED_POINT
1128    if (st->leak_estimate > 16383)
1129       leak2 = 32767;
1130    else
1131       leak2 = SHL16(st->leak_estimate, 1);
1132 #else
1133    if (st->leak_estimate>.5)
1134       leak2 = 1;
1135    else
1136       leak2 = 2*st->leak_estimate;
1137 #endif
1138    /* Estimate residual echo */
1139    for (i=0;i<=st->frame_size;i++)
1140       residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1141
1142 }
1143
1144 int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1145 {
1146    switch(request)
1147    {
1148
1149       case SPEEX_ECHO_GET_FRAME_SIZE:
1150          (*(int*)ptr) = st->frame_size;
1151          break;
1152       case SPEEX_ECHO_SET_SAMPLING_RATE:
1153          st->sampling_rate = (*(int*)ptr);
1154          st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1155 #ifdef FIXED_POINT
1156          st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1157          st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1158 #else
1159          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1160          st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1161 #endif
1162          if (st->sampling_rate<12000)
1163             st->notch_radius = QCONST16(.9, 15);
1164          else if (st->sampling_rate<24000)
1165             st->notch_radius = QCONST16(.982, 15);
1166          else
1167             st->notch_radius = QCONST16(.992, 15);
1168          break;
1169       case SPEEX_ECHO_GET_SAMPLING_RATE:
1170          (*(int*)ptr) = st->sampling_rate;
1171          break;
1172       default:
1173          speex_warning_int("Unknown speex_echo_ctl request: ", request);
1174          return -1;
1175    }
1176    return 0;
1177 }