1 /* Copyright (C) 2003-2006 Jean-Marc Valin
4 Echo canceller based on the MDF algorithm (see below)
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 3. The name of the author may not be used to endorse or promote products
18 derived from this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
34 The echo canceller is based on the MDF algorithm described in:
36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41 double-talk is achieved using a variable learning rate as described in:
43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44 Cancellation With Double-Talk. IEEE Transactions on Audio,
45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
48 There is no explicit double-talk detection, but a continuous variation
49 in the learning rate based on residual echo, double-talk and background
52 About the fixed-point version:
53 All the signals are represented with 16-bit words. The filter weights
54 are represented with 32-bit words, but only the top 16 bits are used
55 in most cases. The lower 16 bits are completely unreliable (due to the
56 fact that the update is done only on the top bits), but help in the
57 adaptation -- probably by removing a "threshold effect" due to
58 quantization (rounding going to zero) when the gradient is small.
60 Another kludge that seems to work good: when performing the weight
61 update, we only move half the way toward the "goal" this seems to
62 reduce the effect of quantization noise in the update phase. This
63 can be seen as applying a gradient descent on a "soft constraint"
64 instead of having a hard constraint.
69 #include "config-speex.h"
73 #include "speex/speex_echo.h"
75 #include "pseudofloat.h"
76 #include "math_approx.h"
77 #include "os_support.h"
80 #define M_PI 3.14159265358979323846
84 #define WEIGHT_SHIFT 11
85 #define NORMALIZE_SCALEDOWN 5
86 #define NORMALIZE_SCALEUP 3
88 #define WEIGHT_SHIFT 0
91 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
92 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
96 static const spx_float_t MIN_LEAK
= {20972, -22};
98 /* Constants for the two-path filter */
99 static const spx_float_t VAR1_SMOOTH
= {23593, -16};
100 static const spx_float_t VAR2_SMOOTH
= {23675, -15};
101 static const spx_float_t VAR1_UPDATE
= {16384, -15};
102 static const spx_float_t VAR2_UPDATE
= {16384, -16};
103 static const spx_float_t VAR_BACKTRACK
= {16384, -12};
104 #define TOP16(x) ((x)>>16)
108 static const spx_float_t MIN_LEAK
= .005f
;
110 /* Constants for the two-path filter */
111 static const spx_float_t VAR1_SMOOTH
= .36f
;
112 static const spx_float_t VAR2_SMOOTH
= .7225f
;
113 static const spx_float_t VAR1_UPDATE
= .5f
;
114 static const spx_float_t VAR2_UPDATE
= .25f
;
115 static const spx_float_t VAR_BACKTRACK
= 4.f
;
120 #define PLAYBACK_DELAY 2
122 void speex_echo_get_residual(SpeexEchoState
*st
, spx_word32_t
*Yout
, int len
);
125 /** Speex echo cancellation state. */
126 struct SpeexEchoState_
{
127 int frame_size
; /**< Number of samples processed each time */
134 spx_int32_t sampling_rate
;
135 spx_word16_t spec_average
;
137 spx_word16_t beta_max
;
138 spx_word32_t sum_adapt
;
139 spx_word16_t leak_estimate
;
141 spx_word16_t
*e
; /* scratch */
142 spx_word16_t
*x
; /* Far-end input buffer (2N) */
143 spx_word16_t
*X
; /* Far-end buffer (M+1 frames) in frequency domain */
144 spx_word16_t
*input
; /* scratch */
145 spx_word16_t
*y
; /* scratch */
146 spx_word16_t
*last_y
;
147 spx_word16_t
*Y
; /* scratch */
149 spx_word32_t
*PHI
; /* scratch */
150 spx_word32_t
*W
; /* (Background) filter weights */
152 spx_word16_t
*foreground
; /* Foreground filter weights */
153 spx_word32_t Davg1
; /* 1st recursive average of the residual power difference */
154 spx_word32_t Davg2
; /* 2nd recursive average of the residual power difference */
155 spx_float_t Dvar1
; /* Estimated variance of 1st estimator */
156 spx_float_t Dvar2
; /* Estimated variance of 2nd estimator */
158 spx_word32_t
*power
; /* Power of the far-end signal */
159 spx_float_t
*power_1
;/* Inverse power of far-end */
160 spx_word16_t
*wtmp
; /* scratch */
162 spx_word16_t
*wtmp2
; /* scratch */
164 spx_word32_t
*Rf
; /* scratch */
165 spx_word32_t
*Yf
; /* scratch */
166 spx_word32_t
*Xf
; /* scratch */
171 spx_word16_t
*window
;
174 spx_word16_t memX
, memD
, memE
;
175 spx_word16_t preemph
;
176 spx_word16_t notch_radius
;
177 spx_mem_t notch_mem
[2];
179 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
180 spx_int16_t
*play_buf
;
182 int play_buf_started
;
185 static inline void filter_dc_notch16(const spx_int16_t
*in
, spx_word16_t radius
, spx_word16_t
*out
, int len
, spx_mem_t
*mem
)
190 den2
= MULT16_16_Q15(radius
,radius
) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius
,32767-radius
));
192 den2
= radius
*radius
+ .7*(1-radius
)*(1-radius
);
194 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
197 spx_word16_t vin
= in
[i
];
198 spx_word32_t vout
= mem
[0] + SHL32(EXTEND32(vin
),15);
200 mem
[0] = mem
[1] + SHL32(SHL32(-EXTEND32(vin
),15) + MULT16_32_Q15(radius
,vout
),1);
202 mem
[0] = mem
[1] + 2*(-vin
+ radius
*vout
);
204 mem
[1] = SHL32(EXTEND32(vin
),15) - MULT16_32_Q15(den2
,vout
);
205 out
[i
] = SATURATE32(PSHR32(MULT16_32_Q15(radius
,vout
),15),32767);
209 /* This inner product is slightly different from the codec version because of fixed-point */
210 static inline spx_word32_t
mdf_inner_prod(const spx_word16_t
*x
, const spx_word16_t
*y
, int len
)
217 part
= MAC16_16(part
,*x
++,*y
++);
218 part
= MAC16_16(part
,*x
++,*y
++);
219 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
220 sum
= ADD32(sum
,SHR32(part
,6));
225 /** Compute power spectrum of a half-complex (packed) vector */
226 static inline void power_spectrum(const spx_word16_t
*X
, spx_word32_t
*ps
, int N
)
229 ps
[0]=MULT16_16(X
[0],X
[0]);
230 for (i
=1,j
=1;i
<N
-1;i
+=2,j
++)
232 ps
[j
] = MULT16_16(X
[i
],X
[i
]) + MULT16_16(X
[i
+1],X
[i
+1]);
234 ps
[j
]=MULT16_16(X
[i
],X
[i
]);
237 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
239 static inline void spectral_mul_accum(const spx_word16_t
*X
, const spx_word32_t
*Y
, spx_word16_t
*acc
, int N
, int M
)
242 spx_word32_t tmp1
=0,tmp2
=0;
245 tmp1
= MAC16_16(tmp1
, X
[j
*N
],TOP16(Y
[j
*N
]));
247 acc
[0] = PSHR32(tmp1
,WEIGHT_SHIFT
);
253 tmp1
= SUB32(MAC16_16(tmp1
, X
[j
*N
+i
],TOP16(Y
[j
*N
+i
])), MULT16_16(X
[j
*N
+i
+1],TOP16(Y
[j
*N
+i
+1])));
254 tmp2
= MAC16_16(MAC16_16(tmp2
, X
[j
*N
+i
+1],TOP16(Y
[j
*N
+i
])), X
[j
*N
+i
], TOP16(Y
[j
*N
+i
+1]));
256 acc
[i
] = PSHR32(tmp1
,WEIGHT_SHIFT
);
257 acc
[i
+1] = PSHR32(tmp2
,WEIGHT_SHIFT
);
262 tmp1
= MAC16_16(tmp1
, X
[(j
+1)*N
-1],TOP16(Y
[(j
+1)*N
-1]));
264 acc
[N
-1] = PSHR32(tmp1
,WEIGHT_SHIFT
);
266 static inline void spectral_mul_accum16(const spx_word16_t
*X
, const spx_word16_t
*Y
, spx_word16_t
*acc
, int N
, int M
)
269 spx_word32_t tmp1
=0,tmp2
=0;
272 tmp1
= MAC16_16(tmp1
, X
[j
*N
],Y
[j
*N
]);
274 acc
[0] = PSHR32(tmp1
,WEIGHT_SHIFT
);
280 tmp1
= SUB32(MAC16_16(tmp1
, X
[j
*N
+i
],Y
[j
*N
+i
]), MULT16_16(X
[j
*N
+i
+1],Y
[j
*N
+i
+1]));
281 tmp2
= MAC16_16(MAC16_16(tmp2
, X
[j
*N
+i
+1],Y
[j
*N
+i
]), X
[j
*N
+i
], Y
[j
*N
+i
+1]);
283 acc
[i
] = PSHR32(tmp1
,WEIGHT_SHIFT
);
284 acc
[i
+1] = PSHR32(tmp2
,WEIGHT_SHIFT
);
289 tmp1
= MAC16_16(tmp1
, X
[(j
+1)*N
-1],Y
[(j
+1)*N
-1]);
291 acc
[N
-1] = PSHR32(tmp1
,WEIGHT_SHIFT
);
295 static inline void spectral_mul_accum(const spx_word16_t
*X
, const spx_word32_t
*Y
, spx_word16_t
*acc
, int N
, int M
)
305 acc
[i
] += (X
[i
]*Y
[i
] - X
[i
+1]*Y
[i
+1]);
306 acc
[i
+1] += (X
[i
+1]*Y
[i
] + X
[i
]*Y
[i
+1]);
313 #define spectral_mul_accum16 spectral_mul_accum
316 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
317 static inline void weighted_spectral_mul_conj(const spx_float_t
*w
, const spx_float_t p
, const spx_word16_t
*X
, const spx_word16_t
*Y
, spx_word32_t
*prod
, int N
)
321 W
= FLOAT_AMULT(p
, w
[0]);
322 prod
[0] = FLOAT_MUL32(W
,MULT16_16(X
[0],Y
[0]));
323 for (i
=1,j
=1;i
<N
-1;i
+=2,j
++)
325 W
= FLOAT_AMULT(p
, w
[j
]);
326 prod
[i
] = FLOAT_MUL32(W
,MAC16_16(MULT16_16(X
[i
],Y
[i
]), X
[i
+1],Y
[i
+1]));
327 prod
[i
+1] = FLOAT_MUL32(W
,MAC16_16(MULT16_16(-X
[i
+1],Y
[i
]), X
[i
],Y
[i
+1]));
329 W
= FLOAT_AMULT(p
, w
[j
]);
330 prod
[i
] = FLOAT_MUL32(W
,MULT16_16(X
[i
],Y
[i
]));
333 static inline void mdf_adjust_prop(const spx_word32_t
*W
, int N
, int M
, spx_word16_t
*prop
)
336 spx_word16_t max_sum
= 1;
337 spx_word32_t prop_sum
= 1;
340 spx_word32_t tmp
= 1;
342 tmp
+= MULT16_16(EXTRACT16(SHR32(W
[i
*N
+j
],18)), EXTRACT16(SHR32(W
[i
*N
+j
],18)));
344 /* Just a security in case an overflow were to occur */
345 tmp
= MIN32(ABS32(tmp
), 536870912);
347 prop
[i
] = spx_sqrt(tmp
);
348 if (prop
[i
] > max_sum
)
353 prop
[i
] += MULT16_16_Q15(QCONST16(.1f
,15),max_sum
);
354 prop_sum
+= EXTEND32(prop
[i
]);
358 prop
[i
] = DIV32(MULT16_16(QCONST16(.99f
,15), prop
[i
]),prop_sum
);
359 /*printf ("%f ", prop[i]);*/
364 #ifdef DUMP_ECHO_CANCEL_DATA
366 static FILE *rFile
=NULL
, *pFile
=NULL
, *oFile
=NULL
;
368 static void dump_audio(const spx_int16_t
*rec
, const spx_int16_t
*play
, const spx_int16_t
*out
, int len
)
370 if (!(rFile
&& pFile
&& oFile
))
372 speex_fatal("Dump files not open");
374 fwrite(rec
, sizeof(spx_int16_t
), len
, rFile
);
375 fwrite(play
, sizeof(spx_int16_t
), len
, pFile
);
376 fwrite(out
, sizeof(spx_int16_t
), len
, oFile
);
380 /** Creates a new echo canceller state */
381 SpeexEchoState
*speex_echo_state_init(int frame_size
, int filter_length
)
384 SpeexEchoState
*st
= (SpeexEchoState
*)speex_alloc(sizeof(SpeexEchoState
));
386 #ifdef DUMP_ECHO_CANCEL_DATA
387 if (rFile
|| pFile
|| oFile
)
388 speex_fatal("Opening dump files twice");
389 rFile
= fopen("aec_rec.sw", "wb");
390 pFile
= fopen("aec_play.sw", "wb");
391 oFile
= fopen("aec_out.sw", "wb");
394 st
->frame_size
= frame_size
;
395 st
->window_size
= 2*frame_size
;
397 M
= st
->M
= (filter_length
+st
->frame_size
-1)/frame_size
;
402 /* This is the default sampling rate */
403 st
->sampling_rate
= 8000;
404 st
->spec_average
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 15), st
->sampling_rate
);
406 st
->beta0
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 16), st
->sampling_rate
);
407 st
->beta_max
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 14), st
->sampling_rate
);
409 st
->beta0
= (2.0f
*st
->frame_size
)/st
->sampling_rate
;
410 st
->beta_max
= (.5f
*st
->frame_size
)/st
->sampling_rate
;
412 st
->leak_estimate
= 0;
414 st
->fft_table
= spx_fft_init(N
);
416 st
->e
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
417 st
->x
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
418 st
->input
= (spx_word16_t
*)speex_alloc(st
->frame_size
*sizeof(spx_word16_t
));
419 st
->y
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
420 st
->last_y
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
421 st
->Yf
= (spx_word32_t
*)speex_alloc((st
->frame_size
+1)*sizeof(spx_word32_t
));
422 st
->Rf
= (spx_word32_t
*)speex_alloc((st
->frame_size
+1)*sizeof(spx_word32_t
));
423 st
->Xf
= (spx_word32_t
*)speex_alloc((st
->frame_size
+1)*sizeof(spx_word32_t
));
424 st
->Yh
= (spx_word32_t
*)speex_alloc((st
->frame_size
+1)*sizeof(spx_word32_t
));
425 st
->Eh
= (spx_word32_t
*)speex_alloc((st
->frame_size
+1)*sizeof(spx_word32_t
));
427 st
->X
= (spx_word16_t
*)speex_alloc((M
+1)*N
*sizeof(spx_word16_t
));
428 st
->Y
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
429 st
->E
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
430 st
->W
= (spx_word32_t
*)speex_alloc(M
*N
*sizeof(spx_word32_t
));
432 st
->foreground
= (spx_word16_t
*)speex_alloc(M
*N
*sizeof(spx_word16_t
));
434 st
->PHI
= (spx_word32_t
*)speex_alloc(N
*sizeof(spx_word32_t
));
435 st
->power
= (spx_word32_t
*)speex_alloc((frame_size
+1)*sizeof(spx_word32_t
));
436 st
->power_1
= (spx_float_t
*)speex_alloc((frame_size
+1)*sizeof(spx_float_t
));
437 st
->window
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
438 st
->prop
= (spx_word16_t
*)speex_alloc(M
*sizeof(spx_word16_t
));
439 st
->wtmp
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
441 st
->wtmp2
= (spx_word16_t
*)speex_alloc(N
*sizeof(spx_word16_t
));
444 st
->window
[i
] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i
<<1),N
)),1));
445 st
->window
[N
-i
-1] = st
->window
[i
];
449 st
->window
[i
] = .5-.5*cos(2*M_PI
*i
/N
);
451 for (i
=0;i
<=st
->frame_size
;i
++)
452 st
->power_1
[i
] = FLOAT_ONE
;
456 spx_word32_t sum
= 0;
457 /* Ratio of ~10 between adaptation rate of first and last block */
458 spx_word16_t decay
= SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M
))),1);
459 st
->prop
[0] = QCONST16(.7, 15);
460 sum
= EXTEND32(st
->prop
[0]);
463 st
->prop
[i
] = MULT16_16_Q15(st
->prop
[i
-1], decay
);
464 sum
= ADD32(sum
, EXTEND32(st
->prop
[i
]));
468 st
->prop
[i
] = DIV32(MULT16_16(QCONST16(.8,15), st
->prop
[i
]),sum
);
472 st
->memX
=st
->memD
=st
->memE
=0;
473 st
->preemph
= QCONST16(.9,15);
474 if (st
->sampling_rate
<12000)
475 st
->notch_radius
= QCONST16(.9, 15);
476 else if (st
->sampling_rate
<24000)
477 st
->notch_radius
= QCONST16(.982, 15);
479 st
->notch_radius
= QCONST16(.992, 15);
481 st
->notch_mem
[0] = st
->notch_mem
[1] = 0;
483 st
->Pey
= st
->Pyy
= FLOAT_ONE
;
486 st
->Davg1
= st
->Davg2
= 0;
487 st
->Dvar1
= st
->Dvar2
= FLOAT_ZERO
;
490 st
->play_buf
= (spx_int16_t
*)speex_alloc((PLAYBACK_DELAY
+1)*st
->frame_size
*sizeof(spx_int16_t
));
491 st
->play_buf_pos
= PLAYBACK_DELAY
*st
->frame_size
;
492 st
->play_buf_started
= 0;
497 /** Resets echo canceller state */
498 void speex_echo_state_reset(SpeexEchoState
*st
)
509 st
->foreground
[i
] = 0;
511 for (i
=0;i
<N
*(M
+1);i
++)
513 for (i
=0;i
<=st
->frame_size
;i
++)
516 st
->power_1
[i
] = FLOAT_ONE
;
520 for (i
=0;i
<st
->frame_size
;i
++)
529 st
->notch_mem
[0] = st
->notch_mem
[1] = 0;
530 st
->memX
=st
->memD
=st
->memE
=0;
535 st
->Pey
= st
->Pyy
= FLOAT_ONE
;
537 st
->Davg1
= st
->Davg2
= 0;
538 st
->Dvar1
= st
->Dvar2
= FLOAT_ZERO
;
540 for (i
=0;i
<3*st
->frame_size
;i
++)
542 st
->play_buf_pos
= PLAYBACK_DELAY
*st
->frame_size
;
543 st
->play_buf_started
= 0;
547 /** Destroys an echo canceller state */
548 void speex_echo_state_destroy(SpeexEchoState
*st
)
550 spx_fft_destroy(st
->fft_table
);
554 speex_free(st
->input
);
556 speex_free(st
->last_y
);
568 speex_free(st
->foreground
);
571 speex_free(st
->power
);
572 speex_free(st
->power_1
);
573 speex_free(st
->window
);
574 speex_free(st
->prop
);
575 speex_free(st
->wtmp
);
577 speex_free(st
->wtmp2
);
579 speex_free(st
->play_buf
);
582 #ifdef DUMP_ECHO_CANCEL_DATA
586 rFile
= pFile
= oFile
= NULL
;
590 void speex_echo_capture(SpeexEchoState
*st
, const spx_int16_t
*rec
, spx_int16_t
*out
)
593 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
594 st
->play_buf_started
= 1;
595 if (st
->play_buf_pos
>=st
->frame_size
)
597 speex_echo_cancellation(st
, rec
, st
->play_buf
, out
);
598 st
->play_buf_pos
-= st
->frame_size
;
599 for (i
=0;i
<st
->play_buf_pos
;i
++)
600 st
->play_buf
[i
] = st
->play_buf
[i
+st
->frame_size
];
602 speex_warning("No playback frame available (your application is buggy and/or got xruns)");
603 if (st
->play_buf_pos
!=0)
605 speex_warning("internal playback buffer corruption?");
606 st
->play_buf_pos
= 0;
608 for (i
=0;i
<st
->frame_size
;i
++)
613 void speex_echo_playback(SpeexEchoState
*st
, const spx_int16_t
*play
)
615 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
616 if (!st
->play_buf_started
)
618 speex_warning("discarded first playback frame");
621 if (st
->play_buf_pos
<=PLAYBACK_DELAY
*st
->frame_size
)
624 for (i
=0;i
<st
->frame_size
;i
++)
625 st
->play_buf
[st
->play_buf_pos
+i
] = play
[i
];
626 st
->play_buf_pos
+= st
->frame_size
;
627 if (st
->play_buf_pos
<= (PLAYBACK_DELAY
-1)*st
->frame_size
)
629 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
630 for (i
=0;i
<st
->frame_size
;i
++)
631 st
->play_buf
[st
->play_buf_pos
+i
] = play
[i
];
632 st
->play_buf_pos
+= st
->frame_size
;
635 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
639 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
640 void speex_echo_cancel(SpeexEchoState
*st
, const spx_int16_t
*in
, const spx_int16_t
*far_end
, spx_int16_t
*out
, spx_int32_t
*Yout
)
642 speex_echo_cancellation(st
, in
, far_end
, out
);
645 /** Performs echo cancellation on a frame */
646 void speex_echo_cancellation(SpeexEchoState
*st
, const spx_int16_t
*in
, const spx_int16_t
*far_end
, spx_int16_t
*out
)
650 spx_word32_t Syy
,See
,Sxx
,Sdd
, Sff
;
653 int update_foreground
;
656 spx_word16_t ss
, ss_1
;
657 spx_float_t Pey
= FLOAT_ONE
, Pyy
=FLOAT_ONE
;
658 spx_float_t alpha
, alpha_1
;
666 ss
=DIV32_16(11469,M
);
667 ss_1
= SUB16(32767,ss
);
673 /* Apply a notch filter to make sure DC doesn't end up causing problems */
674 filter_dc_notch16(in
, st
->notch_radius
, st
->input
, st
->frame_size
, st
->notch_mem
);
675 /* Copy input data to buffer and apply pre-emphasis */
676 for (i
=0;i
<st
->frame_size
;i
++)
679 tmp32
= SUB32(EXTEND32(far_end
[i
]), EXTEND32(MULT16_16_P15(st
->preemph
, st
->memX
)));
681 /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
693 st
->x
[i
+st
->frame_size
] = EXTRACT16(tmp32
);
694 st
->memX
= far_end
[i
];
696 tmp32
= SUB32(EXTEND32(st
->input
[i
]), EXTEND32(MULT16_16_P15(st
->preemph
, st
->memD
)));
701 if (st
->saturated
== 0)
707 if (st
->saturated
== 0)
711 st
->memD
= st
->input
[i
];
712 st
->input
[i
] = tmp32
;
715 /* Shift memory: this could be optimized eventually*/
719 st
->X
[(j
+1)*N
+i
] = st
->X
[j
*N
+i
];
722 /* Convert x (far end) to frequency domain */
723 spx_fft(st
->fft_table
, st
->x
, &st
->X
[0]);
725 st
->last_y
[i
] = st
->x
[i
];
726 Sxx
= mdf_inner_prod(st
->x
+st
->frame_size
, st
->x
+st
->frame_size
, st
->frame_size
);
727 for (i
=0;i
<st
->frame_size
;i
++)
728 st
->x
[i
] = st
->x
[i
+st
->frame_size
];
729 /* From here on, the top part of x is used as scratch space */
732 /* Compute foreground filter */
733 spectral_mul_accum16(st
->X
, st
->foreground
, st
->Y
, N
, M
);
734 spx_ifft(st
->fft_table
, st
->Y
, st
->e
);
735 for (i
=0;i
<st
->frame_size
;i
++)
736 st
->e
[i
] = SUB16(st
->input
[i
], st
->e
[i
+st
->frame_size
]);
737 Sff
= mdf_inner_prod(st
->e
, st
->e
, st
->frame_size
);
740 /* Adjust proportional adaption rate */
741 mdf_adjust_prop (st
->W
, N
, M
, st
->prop
);
742 /* Compute weight gradient */
743 if (st
->saturated
== 0)
747 weighted_spectral_mul_conj(st
->power_1
, FLOAT_SHL(PSEUDOFLOAT(st
->prop
[j
]),-15), &st
->X
[(j
+1)*N
], st
->E
, st
->PHI
, N
);
749 st
->W
[j
*N
+i
] = ADD32(st
->W
[j
*N
+i
], st
->PHI
[i
]);
756 /* Update weight to prevent circular convolution (MDF / AUMDF) */
759 /* This is a variant of the Alternatively Updated MDF (AUMDF) */
760 /* Remove the "if" to make this an MDF filter */
761 if (j
==0 || st
->cancel_count
%(M
-1) == j
-1)
765 st
->wtmp2
[i
] = EXTRACT16(PSHR32(st
->W
[j
*N
+i
],NORMALIZE_SCALEDOWN
+16));
766 spx_ifft(st
->fft_table
, st
->wtmp2
, st
->wtmp
);
767 for (i
=0;i
<st
->frame_size
;i
++)
771 for (i
=st
->frame_size
;i
<N
;i
++)
773 st
->wtmp
[i
]=SHL16(st
->wtmp
[i
],NORMALIZE_SCALEUP
);
775 spx_fft(st
->fft_table
, st
->wtmp
, st
->wtmp2
);
776 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
778 st
->W
[j
*N
+i
] -= SHL32(EXTEND32(st
->wtmp2
[i
]),16+NORMALIZE_SCALEDOWN
-NORMALIZE_SCALEUP
-1);
780 spx_ifft(st
->fft_table
, &st
->W
[j
*N
], st
->wtmp
);
781 for (i
=st
->frame_size
;i
<N
;i
++)
785 spx_fft(st
->fft_table
, st
->wtmp
, &st
->W
[j
*N
]);
790 /* Compute filter response Y */
791 spectral_mul_accum(st
->X
, st
->W
, st
->Y
, N
, M
);
792 spx_ifft(st
->fft_table
, st
->Y
, st
->y
);
795 /* Difference in response, this is used to estimate the variance of our residual power estimate */
796 for (i
=0;i
<st
->frame_size
;i
++)
797 st
->e
[i
] = SUB16(st
->e
[i
+st
->frame_size
], st
->y
[i
+st
->frame_size
]);
798 Dbf
= 10+mdf_inner_prod(st
->e
, st
->e
, st
->frame_size
);
801 for (i
=0;i
<st
->frame_size
;i
++)
802 st
->e
[i
] = SUB16(st
->input
[i
], st
->y
[i
+st
->frame_size
]);
803 See
= mdf_inner_prod(st
->e
, st
->e
, st
->frame_size
);
809 /* Logic for updating the foreground filter */
811 /* For two time windows, compute the mean of the energy difference, as well as the variance */
812 st
->Davg1
= ADD32(MULT16_32_Q15(QCONST16(.6f
,15),st
->Davg1
), MULT16_32_Q15(QCONST16(.4f
,15),SUB32(Sff
,See
)));
813 st
->Davg2
= ADD32(MULT16_32_Q15(QCONST16(.85f
,15),st
->Davg2
), MULT16_32_Q15(QCONST16(.15f
,15),SUB32(Sff
,See
)));
814 st
->Dvar1
= FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH
, st
->Dvar1
), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f
,15),Sff
), MULT16_32_Q15(QCONST16(.4f
,15),Dbf
)));
815 st
->Dvar2
= FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH
, st
->Dvar2
), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f
,15),Sff
), MULT16_32_Q15(QCONST16(.15f
,15),Dbf
)));
817 /* Equivalent float code:
818 st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
819 st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
820 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
821 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
824 update_foreground
= 0;
825 /* Check if we have a statistically significant reduction in the residual echo */
826 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
827 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff
,See
),ABS32(SUB32(Sff
,See
))), FLOAT_MUL32U(Sff
,Dbf
)))
828 update_foreground
= 1;
829 else if (FLOAT_GT(FLOAT_MUL32U(st
->Davg1
, ABS32(st
->Davg1
)), FLOAT_MULT(VAR1_UPDATE
,(st
->Dvar1
))))
830 update_foreground
= 1;
831 else if (FLOAT_GT(FLOAT_MUL32U(st
->Davg2
, ABS32(st
->Davg2
)), FLOAT_MULT(VAR2_UPDATE
,(st
->Dvar2
))))
832 update_foreground
= 1;
835 if (update_foreground
)
837 st
->Davg1
= st
->Davg2
= 0;
838 st
->Dvar1
= st
->Dvar2
= FLOAT_ZERO
;
839 /* Copy background filter to foreground filter */
841 st
->foreground
[i
] = EXTRACT16(PSHR32(st
->W
[i
],16));
842 /* Apply a smooth transition so as to not introduce blocking artifacts */
843 for (i
=0;i
<st
->frame_size
;i
++)
844 st
->e
[i
+st
->frame_size
] = MULT16_16_Q15(st
->window
[i
+st
->frame_size
],st
->e
[i
+st
->frame_size
]) + MULT16_16_Q15(st
->window
[i
],st
->y
[i
+st
->frame_size
]);
846 int reset_background
=0;
847 /* Otherwise, check if the background filter is significantly worse */
848 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff
,See
)),ABS32(SUB32(Sff
,See
))), FLOAT_MULT(VAR_BACKTRACK
,FLOAT_MUL32U(Sff
,Dbf
))))
849 reset_background
= 1;
850 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st
->Davg1
), ABS32(st
->Davg1
)), FLOAT_MULT(VAR_BACKTRACK
,st
->Dvar1
)))
851 reset_background
= 1;
852 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st
->Davg2
), ABS32(st
->Davg2
)), FLOAT_MULT(VAR_BACKTRACK
,st
->Dvar2
)))
853 reset_background
= 1;
854 if (reset_background
)
856 /* Copy foreground filter to background filter */
858 st
->W
[i
] = SHL32(EXTEND32(st
->foreground
[i
]),16);
859 /* We also need to copy the output so as to get correct adaptation */
860 for (i
=0;i
<st
->frame_size
;i
++)
861 st
->y
[i
+st
->frame_size
] = st
->e
[i
+st
->frame_size
];
862 for (i
=0;i
<st
->frame_size
;i
++)
863 st
->e
[i
] = SUB16(st
->input
[i
], st
->y
[i
+st
->frame_size
]);
865 st
->Davg1
= st
->Davg2
= 0;
866 st
->Dvar1
= st
->Dvar2
= FLOAT_ZERO
;
871 /* Compute error signal (for the output with de-emphasis) */
872 for (i
=0;i
<st
->frame_size
;i
++)
874 spx_word32_t tmp_out
;
876 tmp_out
= SUB32(EXTEND32(st
->input
[i
]), EXTEND32(st
->e
[i
+st
->frame_size
]));
878 tmp_out
= SUB32(EXTEND32(st
->input
[i
]), EXTEND32(st
->y
[i
+st
->frame_size
]));
883 else if (tmp_out
<-32768)
885 tmp_out
= ADD32(tmp_out
, EXTEND32(MULT16_16_P15(st
->preemph
, st
->memE
)));
886 /* This is an arbitrary test for saturation in the microphone signal */
887 if (in
[i
] <= -32000 || in
[i
] >= 32000)
890 if (st
->saturated
== 0)
893 out
[i
] = (spx_int16_t
)tmp_out
;
897 #ifdef DUMP_ECHO_CANCEL_DATA
898 dump_audio(in
, far_end
, out
, st
->frame_size
);
901 /* Compute error signal (filter update version) */
902 for (i
=0;i
<st
->frame_size
;i
++)
904 st
->e
[i
+st
->frame_size
] = st
->e
[i
];
908 /* Compute a bunch of correlations */
909 Sey
= mdf_inner_prod(st
->e
+st
->frame_size
, st
->y
+st
->frame_size
, st
->frame_size
);
910 Syy
= mdf_inner_prod(st
->y
+st
->frame_size
, st
->y
+st
->frame_size
, st
->frame_size
);
911 Sdd
= mdf_inner_prod(st
->input
, st
->input
, st
->frame_size
);
913 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
915 /* Do some sanity check */
916 if (!(Syy
>=0 && Sxx
>=0 && See
>= 0)
918 || !(Sff
< N
*1e9
&& Syy
< N
*1e9
&& Sxx
< N
*1e9
)
922 /* Things have gone really bad */
923 st
->screwed_up
+= 50;
924 for (i
=0;i
<st
->frame_size
;i
++)
926 } else if (SHR32(Sff
, 2) > ADD32(Sdd
, SHR32(MULT16_16(N
, 10000),6)))
928 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
931 /* Everything's fine */
934 if (st
->screwed_up
>=50)
936 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
937 speex_echo_state_reset(st
);
941 /* Add a small noise floor to make sure not to have problems when dividing */
942 See
= MAX32(See
, SHR32(MULT16_16(N
, 100),6));
944 /* Convert error to frequency domain */
945 spx_fft(st
->fft_table
, st
->e
, st
->E
);
946 for (i
=0;i
<st
->frame_size
;i
++)
948 spx_fft(st
->fft_table
, st
->y
, st
->Y
);
950 /* Compute power spectrum of far end (X), error (E) and filter response (Y) */
951 power_spectrum(st
->E
, st
->Rf
, N
);
952 power_spectrum(st
->Y
, st
->Yf
, N
);
953 power_spectrum(st
->X
, st
->Xf
, N
);
955 /* Smooth far end energy estimate over time */
956 for (j
=0;j
<=st
->frame_size
;j
++)
957 st
->power
[j
] = MULT16_32_Q15(ss_1
,st
->power
[j
]) + 1 + MULT16_32_Q15(ss
,st
->Xf
[j
]);
959 /* Enable this to compute the power based only on the tail (would need to compute more
960 efficiently to make this really useful */
963 float scale2
= .5f
/M
;
964 for (j
=0;j
<=st
->frame_size
;j
++)
968 power_spectrum(&st
->X
[i
*N
], st
->Xf
, N
);
969 for (j
=0;j
<=st
->frame_size
;j
++)
970 st
->power
[j
] += scale2
*st
->Xf
[j
];
974 /* Compute filtered spectra and (cross-)correlations */
975 for (j
=st
->frame_size
;j
>=0;j
--)
978 Eh
= PSEUDOFLOAT(st
->Rf
[j
] - st
->Eh
[j
]);
979 Yh
= PSEUDOFLOAT(st
->Yf
[j
] - st
->Yh
[j
]);
980 Pey
= FLOAT_ADD(Pey
,FLOAT_MULT(Eh
,Yh
));
981 Pyy
= FLOAT_ADD(Pyy
,FLOAT_MULT(Yh
,Yh
));
983 st
->Eh
[j
] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st
->spec_average
),st
->Eh
[j
]), st
->spec_average
, st
->Rf
[j
]);
984 st
->Yh
[j
] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st
->spec_average
),st
->Yh
[j
]), st
->spec_average
, st
->Yf
[j
]);
986 st
->Eh
[j
] = (1-st
->spec_average
)*st
->Eh
[j
] + st
->spec_average
*st
->Rf
[j
];
987 st
->Yh
[j
] = (1-st
->spec_average
)*st
->Yh
[j
] + st
->spec_average
*st
->Yf
[j
];
991 Pyy
= FLOAT_SQRT(Pyy
);
992 Pey
= FLOAT_DIVU(Pey
,Pyy
);
994 /* Compute correlation updatete rate */
995 tmp32
= MULT16_32_Q15(st
->beta0
,Syy
);
996 if (tmp32
> MULT16_32_Q15(st
->beta_max
,See
))
997 tmp32
= MULT16_32_Q15(st
->beta_max
,See
);
998 alpha
= FLOAT_DIV32(tmp32
, See
);
999 alpha_1
= FLOAT_SUB(FLOAT_ONE
, alpha
);
1000 /* Update correlations (recursive average) */
1001 st
->Pey
= FLOAT_ADD(FLOAT_MULT(alpha_1
,st
->Pey
) , FLOAT_MULT(alpha
,Pey
));
1002 st
->Pyy
= FLOAT_ADD(FLOAT_MULT(alpha_1
,st
->Pyy
) , FLOAT_MULT(alpha
,Pyy
));
1003 if (FLOAT_LT(st
->Pyy
, FLOAT_ONE
))
1004 st
->Pyy
= FLOAT_ONE
;
1005 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1006 if (FLOAT_LT(st
->Pey
, FLOAT_MULT(MIN_LEAK
,st
->Pyy
)))
1007 st
->Pey
= FLOAT_MULT(MIN_LEAK
,st
->Pyy
);
1008 if (FLOAT_GT(st
->Pey
, st
->Pyy
))
1010 /* leak_estimate is the linear regression result */
1011 st
->leak_estimate
= FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st
->Pey
, st
->Pyy
),14));
1012 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1013 if (st
->leak_estimate
> 16383)
1014 st
->leak_estimate
= 32767;
1016 st
->leak_estimate
= SHL16(st
->leak_estimate
,1);
1017 /*printf ("%f\n", st->leak_estimate);*/
1019 /* Compute Residual to Error Ratio */
1021 tmp32
= MULT16_32_Q15(st
->leak_estimate
,Syy
);
1022 tmp32
= ADD32(SHR32(Sxx
,13), ADD32(tmp32
, SHL32(tmp32
,1)));
1023 /* Check for y in e (lower bound on RER) */
1025 spx_float_t bound
= PSEUDOFLOAT(Sey
);
1026 bound
= FLOAT_DIVU(FLOAT_MULT(bound
, bound
), PSEUDOFLOAT(ADD32(1,Syy
)));
1027 if (FLOAT_GT(bound
, PSEUDOFLOAT(See
)))
1029 else if (tmp32
< FLOAT_EXTRACT32(bound
))
1030 tmp32
= FLOAT_EXTRACT32(bound
);
1032 if (tmp32
> SHR32(See
,1))
1033 tmp32
= SHR32(See
,1);
1034 RER
= FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32
,See
),15));
1036 RER
= (.0001*Sxx
+ 3.*MULT16_32_Q15(st
->leak_estimate
,Syy
)) / See
;
1037 /* Check for y in e (lower bound on RER) */
1038 if (RER
< Sey
*Sey
/(1+See
*Syy
))
1039 RER
= Sey
*Sey
/(1+See
*Syy
);
1044 /* We consider that the filter has had minimal adaptation if the following is true*/
1045 if (!st
->adapted
&& st
->sum_adapt
> SHL32(EXTEND32(M
),15) && MULT16_32_Q15(st
->leak_estimate
,Syy
) > MULT16_32_Q15(QCONST16(.03f
,15),Syy
))
1052 /* Normal learning rate calculation once we're past the minimal adaptation phase */
1053 for (i
=0;i
<=st
->frame_size
;i
++)
1056 /* Compute frequency-domain adaptation mask */
1057 r
= MULT16_32_Q15(st
->leak_estimate
,SHL32(st
->Yf
[i
],3));
1058 e
= SHL32(st
->Rf
[i
],3)+1;
1066 r
= MULT16_32_Q15(QCONST16(.7,15),r
) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t
)(MULT16_32_Q15(RER
,e
)));
1067 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1068 st
->power_1
[i
] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r
,FLOAT_MUL32U(e
,st
->power
[i
]+10)),WEIGHT_SHIFT
+16);
1071 /* Temporary adaption rate if filter is not yet adapted enough */
1072 spx_word16_t adapt_rate
=0;
1074 if (Sxx
> SHR32(MULT16_16(N
, 1000),6))
1076 tmp32
= MULT16_32_Q15(QCONST16(.25f
, 15), Sxx
);
1078 if (tmp32
> SHR32(See
,2))
1079 tmp32
= SHR32(See
,2);
1081 if (tmp32
> .25*See
)
1084 adapt_rate
= FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32
, See
),15));
1086 for (i
=0;i
<=st
->frame_size
;i
++)
1087 st
->power_1
[i
] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate
),ADD32(st
->power
[i
],10)),WEIGHT_SHIFT
+1);
1090 /* How much have we adapted so far? */
1091 st
->sum_adapt
= ADD32(st
->sum_adapt
,adapt_rate
);
1094 /* Save residual echo so it can be used by the nonlinear processor */
1097 /* If the filter is adapted, take the filtered echo */
1098 for (i
=0;i
<st
->frame_size
;i
++)
1099 st
->last_y
[i
] = st
->last_y
[st
->frame_size
+i
];
1100 for (i
=0;i
<st
->frame_size
;i
++)
1101 st
->last_y
[st
->frame_size
+i
] = in
[i
]-out
[i
];
1103 /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1104 /* moved earlier: for (i=0;i<N;i++)
1105 st->last_y[i] = st->x[i];*/
1110 /* Compute spectrum of estimated echo for use in an echo post-filter */
1111 void speex_echo_get_residual(SpeexEchoState
*st
, spx_word32_t
*residual_echo
, int len
)
1117 N
= st
->window_size
;
1119 /* Apply hanning window (should pre-compute it)*/
1121 st
->y
[i
] = MULT16_16_Q15(st
->window
[i
],st
->last_y
[i
]);
1123 /* Compute power spectrum of the echo */
1124 spx_fft(st
->fft_table
, st
->y
, st
->Y
);
1125 power_spectrum(st
->Y
, residual_echo
, N
);
1128 if (st
->leak_estimate
> 16383)
1131 leak2
= SHL16(st
->leak_estimate
, 1);
1133 if (st
->leak_estimate
>.5)
1136 leak2
= 2*st
->leak_estimate
;
1138 /* Estimate residual echo */
1139 for (i
=0;i
<=st
->frame_size
;i
++)
1140 residual_echo
[i
] = (spx_int32_t
)MULT16_32_Q15(leak2
,residual_echo
[i
]);
1144 int speex_echo_ctl(SpeexEchoState
*st
, int request
, void *ptr
)
1149 case SPEEX_ECHO_GET_FRAME_SIZE
:
1150 (*(int*)ptr
) = st
->frame_size
;
1152 case SPEEX_ECHO_SET_SAMPLING_RATE
:
1153 st
->sampling_rate
= (*(int*)ptr
);
1154 st
->spec_average
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 15), st
->sampling_rate
);
1156 st
->beta0
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 16), st
->sampling_rate
);
1157 st
->beta_max
= DIV32_16(SHL32(EXTEND32(st
->frame_size
), 14), st
->sampling_rate
);
1159 st
->beta0
= (2.0f
*st
->frame_size
)/st
->sampling_rate
;
1160 st
->beta_max
= (.5f
*st
->frame_size
)/st
->sampling_rate
;
1162 if (st
->sampling_rate
<12000)
1163 st
->notch_radius
= QCONST16(.9, 15);
1164 else if (st
->sampling_rate
<24000)
1165 st
->notch_radius
= QCONST16(.982, 15);
1167 st
->notch_radius
= QCONST16(.992, 15);
1169 case SPEEX_ECHO_GET_SAMPLING_RATE
:
1170 (*(int*)ptr
) = st
->sampling_rate
;
1173 speex_warning_int("Unknown speex_echo_ctl request: ", request
);