release/src-rt-6.x.4708/linux/linux-2.6.36/drivers/staging/echo/echo.c

   1 /*
   2  * SpanDSP - a series of DSP components for telephony
   3  *
   4  * echo.c - A line echo canceller.  This code is being developed
   5  *          against and partially complies with G168.
   6  *
   7  * Written by Steve Underwood <steveu@coppice.org>
   8  *         and David Rowe <david_at_rowetel_dot_com>
   9  *
  10  * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
  11  *
  12  * Based on a bit from here, a bit from there, eye of toad, ear of
  13  * bat, 15 years of failed attempts by David and a few fried brain
  14  * cells.
  15  *
  16  * All rights reserved.
  17  *
  18  * This program is free software; you can redistribute it and/or modify
  19  * it under the terms of the GNU General Public License version 2, as
  20  * published by the Free Software Foundation.
  21  *
  22  * This program is distributed in the hope that it will be useful,
  23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25  * GNU General Public License for more details.
  26  *
  27  * You should have received a copy of the GNU General Public License
  28  * along with this program; if not, write to the Free Software
  29  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  30  */
  31
  32 /*! \file */
  33
  34 /* Implementation Notes
  35    David Rowe
  36    April 2007
  37
  38    This code started life as Steve's NLMS algorithm with a tap
  39    rotation algorithm to handle divergence during double talk.  I
  40    added a Geigel Double Talk Detector (DTD) [2] and performed some
  41    G168 tests.  However I had trouble meeting the G168 requirements,
  42    especially for double talk - there were always cases where my DTD
  43    failed, for example where near end speech was under the 6dB
  44    threshold required for declaring double talk.
  45
  46    So I tried a two path algorithm [1], which has so far given better
  47    results.  The original tap rotation/Geigel algorithm is available
  48    in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
  49    It's probably possible to make it work if some one wants to put some
  50    serious work into it.
  51
  52    At present no special treatment is provided for tones, which
  53    generally cause NLMS algorithms to diverge.  Initial runs of a
  54    subset of the G168 tests for tones (e.g ./echo_test 6) show the
  55    current algorithm is passing OK, which is kind of surprising.  The
  56    full set of tests needs to be performed to confirm this result.
  57
  58    One other interesting change is that I have managed to get the NLMS
  59    code to work with 16 bit coefficients, rather than the original 32
  60    bit coefficents.  This reduces the MIPs and storage required.
  61    I evaulated the 16 bit port using g168_tests.sh and listening tests
  62    on 4 real-world samples.
  63
  64    I also attempted the implementation of a block based NLMS update
  65    [2] but although this passes g168_tests.sh it didn't converge well
  66    on the real-world samples.  I have no idea why, perhaps a scaling
  67    problem.  The block based code is also available in SVN
  68    http://svn.rowetel.com/software/oslec/tags/before_16bit.  If this
  69    code can be debugged, it will lead to further reduction in MIPS, as
  70    the block update code maps nicely onto DSP instruction sets (it's a
  71    dot product) compared to the current sample-by-sample update.
  72
  73    Steve also has some nice notes on echo cancellers in echo.h
  74
  75    References:
  76
  77    [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
  78        Path Models", IEEE Transactions on communications, COM-25,
  79        No. 6, June
  80        1977.
  81        http://www.rowetel.com/images/echo/dual_path_paper.pdf
  82
  83    [2] The classic, very useful paper that tells you how to
  84        actually build a real world echo canceller:
  85          Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
  86          Echo Canceller with a TMS320020,
  87          http://www.rowetel.com/images/echo/spra129.pdf
  88
  89    [3] I have written a series of blog posts on this work, here is
  90        Part 1: http://www.rowetel.com/blog/?p=18
  91
  92    [4] The source code http://svn.rowetel.com/software/oslec/
  93
  94    [5] A nice reference on LMS filters:
  95          http://en.wikipedia.org/wiki/Least_mean_squares_filter
  96
  97    Credits:
  98
  99    Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
 100    Muthukrishnan for their suggestions and email discussions.  Thanks
 101    also to those people who collected echo samples for me such as
 102    Mark, Pawel, and Pavel.
 103 */
 104
 105 #include <linux/kernel.h>
 106 #include <linux/module.h>
 107 #include <linux/slab.h>
 108
 109 #include "echo.h"
 110
 111 #define MIN_TX_POWER_FOR_ADAPTION       64
 112 #define MIN_RX_POWER_FOR_ADAPTION       64
 113 #define DTD_HANGOVER                    600     /* 600 samples, or 75ms     */
 114 #define DC_LOG2BETA                     3       /* log2() of DC filter Beta */
 115
 116
 117 /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
 118
 119 #ifdef __bfin__
 120 static inline void lms_adapt_bg(struct oslec_state *ec, int clean,
 121                                     int shift)
 122 {
 123         int i, j;
 124         int offset1;
 125         int offset2;
 126         int factor;
 127         int exp;
 128         int16_t *phist;
 129         int n;
 130
 131         if (shift > 0)
 132                 factor = clean << shift;
 133         else
 134                 factor = clean >> -shift;
 135
 136         /* Update the FIR taps */
 137
 138         offset2 = ec->curr_pos;
 139         offset1 = ec->taps - offset2;
 140         phist = &ec->fir_state_bg.history[offset2];
 141
 142         /* st: and en: help us locate the assembler in echo.s */
 143
 144         /* asm("st:"); */
 145         n = ec->taps;
 146         for (i = 0, j = offset2; i < n; i++, j++) {
 147                 exp = *phist++ * factor;
 148                 ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 149         }
 150         /* asm("en:"); */
 151
 152         /* Note the asm for the inner loop above generated by Blackfin gcc
 153            4.1.1 is pretty good (note even parallel instructions used):
 154
 155            R0 = W [P0++] (X);
 156            R0 *= R2;
 157            R0 = R0 + R3 (NS) ||
 158            R1 = W [P1] (X) ||
 159            nop;
 160            R0 >>>= 15;
 161            R0 = R0 + R1;
 162            W [P1++] = R0;
 163
 164            A block based update algorithm would be much faster but the
 165            above can't be improved on much.  Every instruction saved in
 166            the loop above is 2 MIPs/ch!  The for loop above is where the
 167            Blackfin spends most of it's time - about 17 MIPs/ch measured
 168            with speedtest.c with 256 taps (32ms).  Write-back and
 169            Write-through cache gave about the same performance.
 170          */
 171 }
 172
 173 /*
 174    IDEAS for further optimisation of lms_adapt_bg():
 175
 176    1/ The rounding is quite costly.  Could we keep as 32 bit coeffs
 177    then make filter pluck the MS 16-bits of the coeffs when filtering?
 178    However this would lower potential optimisation of filter, as I
 179    think the dual-MAC architecture requires packed 16 bit coeffs.
 180
 181    2/ Block based update would be more efficient, as per comments above,
 182    could use dual MAC architecture.
 183
 184    3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
 185    packing.
 186
 187    4/ Execute the whole e/c in a block of say 20ms rather than sample
 188    by sample.  Processing a few samples every ms is inefficient.
 189 */
 190
 191 #else
 192 static inline void lms_adapt_bg(struct oslec_state *ec, int clean,
 193                                     int shift)
 194 {
 195         int i;
 196
 197         int offset1;
 198         int offset2;
 199         int factor;
 200         int exp;
 201
 202         if (shift > 0)
 203                 factor = clean << shift;
 204         else
 205                 factor = clean >> -shift;
 206
 207         /* Update the FIR taps */
 208
 209         offset2 = ec->curr_pos;
 210         offset1 = ec->taps - offset2;
 211
 212         for (i = ec->taps - 1; i >= offset1; i--) {
 213                 exp = (ec->fir_state_bg.history[i - offset1] * factor);
 214                 ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 215         }
 216         for (; i >= 0; i--) {
 217                 exp = (ec->fir_state_bg.history[i + offset2] * factor);
 218                 ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
 219         }
 220 }
 221 #endif
 222
 223 static inline int top_bit(unsigned int bits)
 224 {
 225         if (bits == 0)
 226                 return -1;
 227         else
 228                 return (int)fls((int32_t)bits)-1;
 229 }
 230
 231 struct oslec_state *oslec_create(int len, int adaption_mode)
 232 {
 233         struct oslec_state *ec;
 234         int i;
 235
 236         ec = kzalloc(sizeof(*ec), GFP_KERNEL);
 237         if (!ec)
 238                 return NULL;
 239
 240         ec->taps = len;
 241         ec->log2taps = top_bit(len);
 242         ec->curr_pos = ec->taps - 1;
 243
 244         for (i = 0; i < 2; i++) {
 245                 ec->fir_taps16[i] =
 246                     kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
 247                 if (!ec->fir_taps16[i])
 248                         goto error_oom;
 249         }
 250
 251         fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
 252         fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
 253
 254         for (i = 0; i < 5; i++)
 255                 ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
 256
 257         ec->cng_level = 1000;
 258         oslec_adaption_mode(ec, adaption_mode);
 259
 260         ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
 261         if (!ec->snapshot)
 262                 goto error_oom;
 263
 264         ec->cond_met = 0;
 265         ec->Pstates = 0;
 266         ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
 267         ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
 268         ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
 269         ec->Lbgn = ec->Lbgn_acc = 0;
 270         ec->Lbgn_upper = 200;
 271         ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
 272
 273         return ec;
 274
 275 error_oom:
 276         for (i = 0; i < 2; i++)
 277                 kfree(ec->fir_taps16[i]);
 278
 279         kfree(ec);
 280         return NULL;
 281 }
 282 EXPORT_SYMBOL_GPL(oslec_create);
 283
 284 void oslec_free(struct oslec_state *ec)
 285 {
 286         int i;
 287
 288         fir16_free(&ec->fir_state);
 289         fir16_free(&ec->fir_state_bg);
 290         for (i = 0; i < 2; i++)
 291                 kfree(ec->fir_taps16[i]);
 292         kfree(ec->snapshot);
 293         kfree(ec);
 294 }
 295 EXPORT_SYMBOL_GPL(oslec_free);
 296
 297 void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
 298 {
 299         ec->adaption_mode = adaption_mode;
 300 }
 301 EXPORT_SYMBOL_GPL(oslec_adaption_mode);
 302
 303 void oslec_flush(struct oslec_state *ec)
 304 {
 305         int i;
 306
 307         ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
 308         ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
 309         ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
 310
 311         ec->Lbgn = ec->Lbgn_acc = 0;
 312         ec->Lbgn_upper = 200;
 313         ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
 314
 315         ec->nonupdate_dwell = 0;
 316
 317         fir16_flush(&ec->fir_state);
 318         fir16_flush(&ec->fir_state_bg);
 319         ec->fir_state.curr_pos = ec->taps - 1;
 320         ec->fir_state_bg.curr_pos = ec->taps - 1;
 321         for (i = 0; i < 2; i++)
 322                 memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
 323
 324         ec->curr_pos = ec->taps - 1;
 325         ec->Pstates = 0;
 326 }
 327 EXPORT_SYMBOL_GPL(oslec_flush);
 328
 329 void oslec_snapshot(struct oslec_state *ec)
 330 {
 331         memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
 332 }
 333 EXPORT_SYMBOL_GPL(oslec_snapshot);
 334
 335 /* Dual Path Echo Canceller */
 336
 337 int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
 338 {
 339         int32_t echo_value;
 340         int clean_bg;
 341         int tmp, tmp1;
 342
 343         /*
 344          * Input scaling was found be required to prevent problems when tx
 345          * starts clipping.  Another possible way to handle this would be the
 346          * filter coefficent scaling.
 347          */
 348
 349         ec->tx = tx;
 350         ec->rx = rx;
 351         tx >>= 1;
 352         rx >>= 1;
 353
 354         /*
 355          * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
 356          * required otherwise values do not track down to 0. Zero at DC, Pole
 357          * at (1-Beta) on real axis.  Some chip sets (like Si labs) don't
 358          * need this, but something like a $10 X100P card does.  Any DC really
 359          * slows down convergence.
 360          *
 361          * Note: removes some low frequency from the signal, this reduces the
 362          * speech quality when listening to samples through headphones but may
 363          * not be obvious through a telephone handset.
 364          *
 365          * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
 366          * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
 367          */
 368
 369         if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
 370                 tmp = rx << 15;
 371
 372                 /*
 373                  * Make sure the gain of the HPF is 1.0. This can still
 374                  * saturate a little under impulse conditions, and it might
 375                  * roll to 32768 and need clipping on sustained peak level
 376                  * signals. However, the scale of such clipping is small, and
 377                  * the error due to any saturation should not markedly affect
 378                  * the downstream processing.
 379                  */
 380                 tmp -= (tmp >> 4);
 381
 382                 ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
 383
 384                 /*
 385                  * hard limit filter to prevent clipping.  Note that at this
 386                  * stage rx should be limited to +/- 16383 due to right shift
 387                  * above
 388                  */
 389                 tmp1 = ec->rx_1 >> 15;
 390                 if (tmp1 > 16383)
 391                         tmp1 = 16383;
 392                 if (tmp1 < -16383)
 393                         tmp1 = -16383;
 394                 rx = tmp1;
 395                 ec->rx_2 = tmp;
 396         }
 397
 398         /* Block average of power in the filter states.  Used for
 399            adaption power calculation. */
 400
 401         {
 402                 int new, old;
 403
 404                 /* efficient "out with the old and in with the new" algorithm so
 405                    we don't have to recalculate over the whole block of
 406                    samples. */
 407                 new = (int)tx * (int)tx;
 408                 old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
 409                     (int)ec->fir_state.history[ec->fir_state.curr_pos];
 410                 ec->Pstates +=
 411                     ((new - old) + (1 << (ec->log2taps-1))) >> ec->log2taps;
 412                 if (ec->Pstates < 0)
 413                         ec->Pstates = 0;
 414         }
 415
 416         /* Calculate short term average levels using simple single pole IIRs */
 417
 418         ec->Ltxacc += abs(tx) - ec->Ltx;
 419         ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
 420         ec->Lrxacc += abs(rx) - ec->Lrx;
 421         ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
 422
 423         /* Foreground filter */
 424
 425         ec->fir_state.coeffs = ec->fir_taps16[0];
 426         echo_value = fir16(&ec->fir_state, tx);
 427         ec->clean = rx - echo_value;
 428         ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
 429         ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
 430
 431         /* Background filter */
 432
 433         echo_value = fir16(&ec->fir_state_bg, tx);
 434         clean_bg = rx - echo_value;
 435         ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
 436         ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
 437
 438         /* Background Filter adaption */
 439
 440         /* Almost always adap bg filter, just simple DT and energy
 441            detection to minimise adaption in cases of strong double talk.
 442            However this is not critical for the dual path algorithm.
 443          */
 444         ec->factor = 0;
 445         ec->shift = 0;
 446         if ((ec->nonupdate_dwell == 0)) {
 447                 int P, logP, shift;
 448
 449                 /* Determine:
 450
 451                    f = Beta * clean_bg_rx/P ------ (1)
 452
 453                    where P is the total power in the filter states.
 454
 455                    The Boffins have shown that if we obey (1) we converge
 456                    quickly and avoid instability.
 457
 458                    The correct factor f must be in Q30, as this is the fixed
 459                    point format required by the lms_adapt_bg() function,
 460                    therefore the scaled version of (1) is:
 461
 462                    (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
 463                    factor      = (2^30) * Beta * clean_bg_rx/P     ----- (2)
 464
 465                    We have chosen Beta = 0.25 by experiment, so:
 466
 467                    factor      = (2^30) * (2^-2) * clean_bg_rx/P
 468
 469                                                 (30 - 2 - log2(P))
 470                    factor      = clean_bg_rx 2                     ----- (3)
 471
 472                    To avoid a divide we approximate log2(P) as top_bit(P),
 473                    which returns the position of the highest non-zero bit in
 474                    P.  This approximation introduces an error as large as a
 475                    factor of 2, but the algorithm seems to handle it OK.
 476
 477                    Come to think of it a divide may not be a big deal on a
 478                    modern DSP, so its probably worth checking out the cycles
 479                    for a divide versus a top_bit() implementation.
 480                  */
 481
 482                 P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
 483                 logP = top_bit(P) + ec->log2taps;
 484                 shift = 30 - 2 - logP;
 485                 ec->shift = shift;
 486
 487                 lms_adapt_bg(ec, clean_bg, shift);
 488         }
 489
 490         /* very simple DTD to make sure we dont try and adapt with strong
 491            near end speech */
 492
 493         ec->adapt = 0;
 494         if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
 495                 ec->nonupdate_dwell = DTD_HANGOVER;
 496         if (ec->nonupdate_dwell)
 497                 ec->nonupdate_dwell--;
 498
 499         /* Transfer logic */
 500
 501         /* These conditions are from the dual path paper [1], I messed with
 502            them a bit to improve performance. */
 503
 504         if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
 505             (ec->nonupdate_dwell == 0) &&
 506             /* (ec->Lclean_bg < 0.875*ec->Lclean) */
 507             (8 * ec->Lclean_bg < 7 * ec->Lclean) &&
 508             /* (ec->Lclean_bg < 0.125*ec->Ltx) */
 509             (8 * ec->Lclean_bg < ec->Ltx)) {
 510                 if (ec->cond_met == 6) {
 511                         /*
 512                          * BG filter has had better results for 6 consecutive
 513                          * samples
 514                          */
 515                         ec->adapt = 1;
 516                         memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
 517                                 ec->taps * sizeof(int16_t));
 518                 } else
 519                         ec->cond_met++;
 520         } else
 521                 ec->cond_met = 0;
 522
 523         /* Non-Linear Processing */
 524
 525         ec->clean_nlp = ec->clean;
 526         if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
 527                 /*
 528                  * Non-linear processor - a fancy way to say "zap small
 529                  * signals, to avoid residual echo due to (uLaw/ALaw)
 530                  * non-linearity in the channel.".
 531                  */
 532
 533                 if ((16 * ec->Lclean < ec->Ltx)) {
 534                         /*
 535                          * Our e/c has improved echo by at least 24 dB (each
 536                          * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
 537                          * 6+6+6+6=24dB)
 538                          */
 539                         if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
 540                                 ec->cng_level = ec->Lbgn;
 541
 542                                 /*
 543                                  * Very elementary comfort noise generation.
 544                                  * Just random numbers rolled off very vaguely
 545                                  * Hoth-like.  DR: This noise doesn't sound
 546                                  * quite right to me - I suspect there are some
 547                                  * overlfow issues in the filtering as it's too
 548                                  * "crackly".
 549                                  * TODO: debug this, maybe just play noise at
 550                                  * high level or look at spectrum.
 551                                  */
 552
 553                                 ec->cng_rndnum =
 554                                     1664525U * ec->cng_rndnum + 1013904223U;
 555                                 ec->cng_filter =
 556                                     ((ec->cng_rndnum & 0xFFFF) - 32768 +
 557                                      5 * ec->cng_filter) >> 3;
 558                                 ec->clean_nlp =
 559                                     (ec->cng_filter * ec->cng_level * 8) >> 14;
 560
 561                         } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
 562                                 /* This sounds much better than CNG */
 563                                 if (ec->clean_nlp > ec->Lbgn)
 564                                         ec->clean_nlp = ec->Lbgn;
 565                                 if (ec->clean_nlp < -ec->Lbgn)
 566                                         ec->clean_nlp = -ec->Lbgn;
 567                         } else {
 568                                 /*
 569                                  * just mute the residual, doesn't sound very
 570                                  * good, used mainly in G168 tests
 571                                  */
 572                                 ec->clean_nlp = 0;
 573                         }
 574                 } else {
 575                         /*
 576                          * Background noise estimator.  I tried a few
 577                          * algorithms here without much luck.  This very simple
 578                          * one seems to work best, we just average the level
 579                          * using a slow (1 sec time const) filter if the
 580                          * current level is less than a (experimentally
 581                          * derived) constant.  This means we dont include high
 582                          * level signals like near end speech.  When combined
 583                          * with CNG or especially CLIP seems to work OK.
 584                          */
 585                         if (ec->Lclean < 40) {
 586                                 ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
 587                                 ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
 588                         }
 589                 }
 590         }
 591
 592         /* Roll around the taps buffer */
 593         if (ec->curr_pos <= 0)
 594                 ec->curr_pos = ec->taps;
 595         ec->curr_pos--;
 596
 597         if (ec->adaption_mode & ECHO_CAN_DISABLE)
 598                 ec->clean_nlp = rx;
 599
 600         /* Output scaled back up again to match input scaling */
 601
 602         return (int16_t) ec->clean_nlp << 1;
 603 }
 604 EXPORT_SYMBOL_GPL(oslec_update);
 605
 606 /* This function is separated from the echo canceller is it is usually called
 607    as part of the tx process.  See rx HP (DC blocking) filter above, it's
 608    the same design.
 609
 610    Some soft phones send speech signals with a lot of low frequency
 611    energy, e.g. down to 20Hz.  This can make the hybrid non-linear
 612    which causes the echo canceller to fall over.  This filter can help
 613    by removing any low frequency before it gets to the tx port of the
 614    hybrid.
 615
 616    It can also help by removing and DC in the tx signal.  DC is bad
 617    for LMS algorithms.
 618
 619    This is one of the classic DC removal filters, adjusted to provide
 620    sufficient bass rolloff to meet the above requirement to protect hybrids
 621    from things that upset them. The difference between successive samples
 622    produces a lousy HPF, and then a suitably placed pole flattens things out.
 623    The final result is a nicely rolled off bass end. The filtering is
 624    implemented with extended fractional precision, which noise shapes things,
 625    giving very clean DC removal.
 626 */
 627
 628 int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
 629 {
 630         int tmp, tmp1;
 631
 632         if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
 633                 tmp = tx << 15;
 634
 635                 /*
 636                  * Make sure the gain of the HPF is 1.0. The first can still
 637                  * saturate a little under impulse conditions, and it might
 638                  * roll to 32768 and need clipping on sustained peak level
 639                  * signals. However, the scale of such clipping is small, and
 640                  * the error due to any saturation should not markedly affect
 641                  * the downstream processing.
 642                  */
 643                 tmp -= (tmp >> 4);
 644
 645                 ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
 646                 tmp1 = ec->tx_1 >> 15;
 647                 if (tmp1 > 32767)
 648                         tmp1 = 32767;
 649                 if (tmp1 < -32767)
 650                         tmp1 = -32767;
 651                 tx = tmp1;
 652                 ec->tx_2 = tmp;
 653         }
 654
 655         return tx;
 656 }
 657 EXPORT_SYMBOL_GPL(oslec_hpf_tx);
 658
 659 MODULE_LICENSE("GPL");
 660 MODULE_AUTHOR("David Rowe");
 661 MODULE_DESCRIPTION("Open Source Line Echo Canceller");
 662 MODULE_VERSION("0.3.0");