apps/codecs/libspeex/filters.c

   1 /* Copyright (C) 2002-2006 Jean-Marc Valin
   2    File: filters.c
   3    Various analysis/synthesis filters
   4
   5    Redistribution and use in source and binary forms, with or without
   6    modification, are permitted provided that the following conditions
   7    are met:
   8
   9    - Redistributions of source code must retain the above copyright
  10    notice, this list of conditions and the following disclaimer.
  11
  12    - Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15
  16    - Neither the name of the Xiph.org Foundation nor the names of its
  17    contributors may be used to endorse or promote products derived from
  18    this software without specific prior written permission.
  19
  20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
  24    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33 #ifdef HAVE_CONFIG_H
  34 #include "config-speex.h"
  35 #endif
  36
  37 #include "filters.h"
  38 #include "stack_alloc.h"
  39 #include "arch.h"
  40 #include "math_approx.h"
  41 #include "ltp.h"
  42 #include <math.h>
  43
  44 #ifdef _USE_SSE
  45 #include "filters_sse.h"
  46 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
  47 #include "filters_arm4.h"
  48 #define OVERRIDE_IIR_MEM16
  49 #define OVERRIDE_QMF_SYNTH
  50 #define OVERRIDE_SIGNAL_MUL
  51 #elif defined (COLDFIRE_ASM)
  52 #define OVERRIDE_IIR_MEM16
  53 #define OVERRIDE_QMF_SYNTH
  54 #define OVERRIDE_SIGNAL_MUL
  55 #elif defined (BFIN_ASM)
  56 #include "filters_bfin.h"
  57 #endif
  58
  59
  60
  61 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
  62 {
  63    int i;
  64    spx_word16_t tmp=gamma;
  65    for (i=0;i<order;i++)
  66    {
  67       lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
  68       tmp = MULT16_16_P15(tmp, gamma);
  69    }
  70 }
  71
  72 void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
  73 {
  74    int i;
  75    for (i=0;i<len;i++)
  76    {
  77       /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
  78       if (!(vec[i]>=min_val && vec[i] <= max_val))
  79       {
  80          if (vec[i] < min_val)
  81             vec[i] = min_val;
  82          else if (vec[i] > max_val)
  83             vec[i] = max_val;
  84          else /* Has to be NaN */
  85             vec[i] = 0;
  86       }
  87    }
  88 }
  89
  90 void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
  91 {
  92    int i;
  93 #ifdef FIXED_POINT
  94    static const spx_word16_t Pcoef[5][3] ICONST_ATTR = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
  95    static const spx_word16_t Zcoef[5][3] ICONST_ATTR = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
  96 #else
  97    const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
  98    const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
  99 #endif
 100    const spx_word16_t *den, *num;
 101    if (filtID>4)
 102       filtID=4;
 103
 104    den = Pcoef[filtID]; num = Zcoef[filtID];
 105    /*return;*/
 106    for (i=0;i<len;i++)
 107    {
 108       spx_word16_t yi;
 109       spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
 110       yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
 111       mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
 112       mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
 113       y[i] = yi;
 114    }
 115 }
 116
 117 #ifdef FIXED_POINT
 118
 119 #ifndef OVERRIDE_SIGNAL_MUL
 120 /* FIXME: These functions are ugly and probably introduce too much error */
 121 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 122 {
 123    int i;
 124    for (i=0;i<len;i++)
 125    {
 126       y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
 127    }
 128 }
 129 #endif
 130
 131 #ifndef SPEEX_DISABLE_ENCODER
 132 void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
 133 {
 134    int i;
 135    if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
 136    {
 137       spx_word16_t scale_1;
 138       scale = PSHR32(scale, SIG_SHIFT);
 139       scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
 140       for (i=0;i<len;i++)
 141       {
 142          y[i] = MULT16_16_P15(scale_1, x[i]);
 143       }
 144    } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
 145       spx_word16_t scale_1;
 146       scale = PSHR32(scale, SIG_SHIFT-5);
 147       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
 148       for (i=0;i<len;i++)
 149       {
 150          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
 151       }
 152    } else {
 153       spx_word16_t scale_1;
 154       scale = PSHR32(scale, SIG_SHIFT-7);
 155       if (scale < 5)
 156          scale = 5;
 157       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
 158       for (i=0;i<len;i++)
 159       {
 160          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
 161       }
 162    }
 163 }
 164 #endif /* SPEEX_DISABLE_ENCODER */
 165
 166 #else
 167
 168 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 169 {
 170    int i;
 171    for (i=0;i<len;i++)
 172       y[i] = scale*x[i];
 173 }
 174
 175 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 176 {
 177    int i;
 178    float scale_1 = 1/scale;
 179    for (i=0;i<len;i++)
 180       y[i] = scale_1*x[i];
 181 }
 182 #endif
 183
 184
 185
 186 #ifdef FIXED_POINT
 187
 188
 189
 190 #ifndef SPEEX_DISABLE_ENCODER
 191 spx_word16_t compute_rms(const spx_sig_t *x, int len)
 192 {
 193    int i;
 194    spx_word32_t sum=0;
 195    spx_sig_t max_val=1;
 196    int sig_shift;
 197
 198    for (i=0;i<len;i++)
 199    {
 200       spx_sig_t tmp = x[i];
 201       if (tmp<0)
 202          tmp = -tmp;
 203       if (tmp > max_val)
 204          max_val = tmp;
 205    }
 206
 207    sig_shift=0;
 208    while (max_val>16383)
 209    {
 210       sig_shift++;
 211       max_val >>= 1;
 212    }
 213
 214    for (i=0;i<len;i+=4)
 215    {
 216       spx_word32_t sum2=0;
 217       spx_word16_t tmp;
 218       tmp = EXTRACT16(SHR32(x[i],sig_shift));
 219       sum2 = MAC16_16(sum2,tmp,tmp);
 220       tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
 221       sum2 = MAC16_16(sum2,tmp,tmp);
 222       tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
 223       sum2 = MAC16_16(sum2,tmp,tmp);
 224       tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
 225       sum2 = MAC16_16(sum2,tmp,tmp);
 226       sum = ADD32(sum,SHR32(sum2,6));
 227    }
 228
 229    return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
 230 }
 231 #endif /*SPEEX_DISABLE_ENCODER */
 232
 233 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
 234 {
 235    int i;
 236    spx_word16_t max_val=10;
 237
 238    for (i=0;i<len;i++)
 239    {
 240       spx_sig_t tmp = x[i];
 241       if (tmp<0)
 242          tmp = -tmp;
 243       if (tmp > max_val)
 244          max_val = tmp;
 245    }
 246    if (max_val>16383)
 247    {
 248       spx_word32_t sum=0;
 249       for (i=0;i<len;i+=4)
 250       {
 251          spx_word32_t sum2=0;
 252          sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
 253          sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
 254          sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
 255          sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
 256          sum = ADD32(sum,SHR32(sum2,6));
 257       }
 258       return SHL16(spx_sqrt(DIV32(sum,len)),4);
 259    } else {
 260       spx_word32_t sum=0;
 261       int sig_shift=0;
 262       if (max_val < 8192)
 263          sig_shift=1;
 264       if (max_val < 4096)
 265          sig_shift=2;
 266       if (max_val < 2048)
 267          sig_shift=3;
 268       for (i=0;i<len;i+=4)
 269       {
 270          spx_word32_t sum2=0;
 271          sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
 272          sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
 273          sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
 274          sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
 275          sum = ADD32(sum,SHR32(sum2,6));
 276       }
 277       return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
 278    }
 279 }
 280
 281 #ifndef OVERRIDE_NORMALIZE16
 282 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
 283 {
 284    int i;
 285    spx_sig_t max_val=1;
 286    int sig_shift;
 287
 288    for (i=0;i<len;i++)
 289    {
 290       spx_sig_t tmp = x[i];
 291       if (tmp<0)
 292          tmp = NEG32(tmp);
 293       if (tmp >= max_val)
 294          max_val = tmp;
 295    }
 296
 297    sig_shift=0;
 298    while (max_val>max_scale)
 299    {
 300       sig_shift++;
 301       max_val >>= 1;
 302    }
 303
 304    for (i=0;i<len;i++)
 305       y[i] = EXTRACT16(SHR32(x[i], sig_shift));
 306
 307    return sig_shift;
 308 }
 309 #endif
 310
 311 #else
 312
 313 spx_word16_t compute_rms(const spx_sig_t *x, int len)
 314 {
 315    int i;
 316    float sum=0;
 317    for (i=0;i<len;i++)
 318    {
 319       sum += x[i]*x[i];
 320    }
 321    return sqrt(.1+sum/len);
 322 }
 323 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
 324 {
 325    return compute_rms(x, len);
 326 }
 327 #endif
 328
 329
 330
 331 #ifndef SPEEX_DISABLE_ENCODER
 332 #ifndef OVERRIDE_FILTER_MEM16
 333 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 334 {
 335    int i,j;
 336    spx_word16_t xi,yi,nyi;
 337    for (i=0;i<N;i++)
 338    {
 339       xi= x[i];
 340       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 341       nyi = NEG16(yi);
 342       for (j=0;j<ord-1;j++)
 343       {
 344          mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
 345       }
 346       mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
 347       y[i] = yi;
 348    }
 349 }
 350 #endif
 351 #endif /* SPEEX_DISABLE_ENCODER */
 352
 353 #ifndef OVERRIDE_IIR_MEM16
 354 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 355 {
 356     (void)stack;
 357    int i,j;
 358    spx_word16_t yi,nyi;
 359
 360    for (i=0;i<N;i++)
 361    {
 362       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 363       nyi = NEG16(yi);
 364       for (j=0;j<ord-1;j++)
 365       {
 366          mem[j] = MAC16_16(mem[j+1],den[j],nyi);
 367       }
 368       mem[ord-1] = MULT16_16(den[ord-1],nyi);
 369       y[i] = yi;
 370    }
 371 }
 372 #endif
 373
 374 #ifndef SPEEX_DISABLE_ENCODER
 375 #ifndef OVERRIDE_FIR_MEM16
 376 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 377 {
 378    int i,j;
 379    spx_word16_t xi,yi;
 380
 381    for (i=0;i<N;i++)
 382    {
 383       xi=x[i];
 384       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 385       for (j=0;j<ord-1;j++)
 386       {
 387          mem[j] = MAC16_16(mem[j+1], num[j],xi);
 388       }
 389       mem[ord-1] = MULT16_16(num[ord-1],xi);
 390       y[i] = yi;
 391    }
 392 }
 393 #endif
 394
 395
 396 void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 397 {
 398    int i;
 399    VARDECL(spx_mem_t *mem);
 400    ALLOC(mem, ord, spx_mem_t);
 401    for (i=0;i<ord;i++)
 402       mem[i]=0;
 403    iir_mem16(xx, ak, y, N, ord, mem, stack);
 404    for (i=0;i<ord;i++)
 405       mem[i]=0;
 406    filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
 407 }
 408 void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 409 {
 410    int i;
 411    VARDECL(spx_mem_t *mem);
 412    ALLOC(mem, ord, spx_mem_t);
 413    for (i=0;i<ord;i++)
 414       mem[i]=0;
 415    filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
 416    for (i=0;i<ord;i++)
 417       mem[i]=0;
 418    fir_mem16(y, awk2, y, N, ord, mem, stack);
 419 }
 420
 421
 422 #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
 423 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 424 {
 425    int i,j;
 426    spx_word16_t y1, ny1i, ny2i;
 427    VARDECL(spx_mem_t *mem1);
 428    VARDECL(spx_mem_t *mem2);
 429    ALLOC(mem1, ord, spx_mem_t);
 430    ALLOC(mem2, ord, spx_mem_t);
 431
 432    y[0] = LPC_SCALING;
 433    for (i=0;i<ord;i++)
 434       y[i+1] = awk1[i];
 435    i++;
 436    for (;i<N;i++)
 437       y[i] = VERY_SMALL;
 438    for (i=0;i<ord;i++)
 439       mem1[i] = mem2[i] = 0;
 440    for (i=0;i<N;i++)
 441    {
 442       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
 443       ny1i = NEG16(y1);
 444       y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
 445       ny2i = NEG16(y[i]);
 446       for (j=0;j<ord-1;j++)
 447       {
 448          mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
 449          mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
 450       }
 451       mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
 452       mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
 453    }
 454 }
 455 #endif
 456
 457 /* Decomposes a signal into low-band and high-band using a QMF */
 458 void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
 459 {
 460    int i,j,k,M2;
 461    VARDECL(spx_word16_t *a);
 462    VARDECL(spx_word16_t *x);
 463    spx_word16_t *x2;
 464
 465    ALLOC(a, M, spx_word16_t);
 466    ALLOC(x, N+M-1, spx_word16_t);
 467    x2=x+M-1;
 468    M2=M>>1;
 469    for (i=0;i<M;i++)
 470       a[M-i-1]= aa[i];
 471    for (i=0;i<M-1;i++)
 472       x[i]=mem[M-i-2];
 473    for (i=0;i<N;i++)
 474       x[i+M-1]=SHR16(xx[i],1);
 475    for (i=0;i<M-1;i++)
 476       mem[i]=SHR16(xx[N-i-1],1);
 477    for (i=0,k=0;i<N;i+=2,k++)
 478    {
 479       spx_word32_t y1k=0, y2k=0;
 480       for (j=0;j<M2;j++)
 481       {
 482          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
 483          y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
 484          j++;
 485          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
 486          y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
 487       }
 488       y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
 489       y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
 490    }
 491 }
 492 #endif /* SPEEX_DISABLE_ENCODER */
 493
 494 #ifndef OVERRIDE_QMF_SYNTH
 495 /* Re-synthesised a signal from the QMF low-band and high-band signals */
 496 void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
 497    /* assumptions:
 498       all odd x[i] are zero -- well, actually they are left out of the array now
 499       N and M are multiples of 4 */
 500 {
 501     (void)stack;
 502    int i, j;
 503    int M2, N2;
 504    VARDECL(spx_word16_t *xx1);
 505    VARDECL(spx_word16_t *xx2);
 506
 507    M2 = M>>1;
 508    N2 = N>>1;
 509    ALLOC(xx1, M2+N2, spx_word16_t);
 510    ALLOC(xx2, M2+N2, spx_word16_t);
 511
 512    for (i = 0; i < N2; i++)
 513       xx1[i] = x1[N2-1-i];
 514    for (i = 0; i < M2; i++)
 515       xx1[N2+i] = mem1[2*i+1];
 516    for (i = 0; i < N2; i++)
 517       xx2[i] = x2[N2-1-i];
 518    for (i = 0; i < M2; i++)
 519       xx2[N2+i] = mem2[2*i+1];
 520
 521    for (i = 0; i < N2; i += 2) {
 522       spx_sig_t y0, y1, y2, y3;
 523       spx_word16_t x10, x20;
 524
 525       y0 = y1 = y2 = y3 = 0;
 526       x10 = xx1[N2-2-i];
 527       x20 = xx2[N2-2-i];
 528
 529       for (j = 0; j < M2; j += 2) {
 530          spx_word16_t x11, x21;
 531          spx_word16_t a0, a1;
 532
 533          a0 = a[2*j];
 534          a1 = a[2*j+1];
 535          x11 = xx1[N2-1+j-i];
 536          x21 = xx2[N2-1+j-i];
 537
 538 #ifdef FIXED_POINT
 539          /* We multiply twice by the same coef to avoid overflows */
 540          y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
 541          y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
 542          y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
 543          y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
 544 #else
 545          y0 = ADD32(y0,MULT16_16(a0, x11-x21));
 546          y1 = ADD32(y1,MULT16_16(a1, x11+x21));
 547          y2 = ADD32(y2,MULT16_16(a0, x10-x20));
 548          y3 = ADD32(y3,MULT16_16(a1, x10+x20));
 549 #endif
 550          a0 = a[2*j+2];
 551          a1 = a[2*j+3];
 552          x10 = xx1[N2+j-i];
 553          x20 = xx2[N2+j-i];
 554
 555 #ifdef FIXED_POINT
 556          /* We multiply twice by the same coef to avoid overflows */
 557          y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
 558          y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
 559          y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
 560          y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
 561 #else
 562          y0 = ADD32(y0,MULT16_16(a0, x10-x20));
 563          y1 = ADD32(y1,MULT16_16(a1, x10+x20));
 564          y2 = ADD32(y2,MULT16_16(a0, x11-x21));
 565          y3 = ADD32(y3,MULT16_16(a1, x11+x21));
 566 #endif
 567       }
 568 #ifdef FIXED_POINT
 569       y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
 570       y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
 571       y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
 572       y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
 573 #else
 574       /* Normalize up explicitly if we're in float */
 575       y[2*i] = 2.f*y0;
 576       y[2*i+1] = 2.f*y1;
 577       y[2*i+2] = 2.f*y2;
 578       y[2*i+3] = 2.f*y3;
 579 #endif
 580    }
 581
 582    for (i = 0; i < M2; i++)
 583       mem1[2*i+1] = xx1[i];
 584    for (i = 0; i < M2; i++)
 585       mem2[2*i+1] = xx2[i];
 586 }
 587 #endif
 588
 589 #ifdef FIXED_POINT
 590 #if 0
 591 const spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
 592                                  {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
 593                                  {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
 594 #else
 595 const spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
 596                                 {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
 597                                  {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
 598 #endif
 599 #else
 600 #if 0
 601 const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
 602                           {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
 603                           {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
 604 #else
 605 const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
 606                           {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
 607                           {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
 608 #endif
 609 #endif
 610
 611 static int interp_pitch(
 612 spx_word16_t *exc,          /*decoded excitation*/
 613 spx_word16_t *interp,          /*decoded excitation*/
 614 int pitch,               /*pitch period*/
 615 int len
 616 )
 617 {
 618    int i,j,k;
 619    spx_word32_t corr[4][7];
 620    spx_word32_t maxcorr;
 621    int maxi, maxj;
 622    for (i=0;i<7;i++)
 623    {
 624       corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
 625    }
 626    for (i=0;i<3;i++)
 627    {
 628       for (j=0;j<7;j++)
 629       {
 630          int i1, i2;
 631          spx_word32_t tmp=0;
 632          i1 = 3-j;
 633          if (i1<0)
 634             i1 = 0;
 635          i2 = 10-j;
 636          if (i2>7)
 637             i2 = 7;
 638          for (k=i1;k<i2;k++)
 639             tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
 640          corr[i+1][j] = tmp;
 641       }
 642    }
 643    maxi=maxj=0;
 644    maxcorr = corr[0][0];
 645    for (i=0;i<4;i++)
 646    {
 647       for (j=0;j<7;j++)
 648       {
 649          if (corr[i][j] > maxcorr)
 650          {
 651             maxcorr = corr[i][j];
 652             maxi=i;
 653             maxj=j;
 654          }
 655       }
 656    }
 657    for (i=0;i<len;i++)
 658    {
 659       spx_word32_t tmp = 0;
 660       if (maxi>0)
 661       {
 662          for (k=0;k<7;k++)
 663          {
 664             tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
 665          }
 666       } else {
 667          tmp = SHL32(exc[i-(pitch-maxj+3)],15);
 668       }
 669       interp[i] = PSHR32(tmp,15);
 670    }
 671    return pitch-maxj+3;
 672 }
 673
 674 void multicomb(
 675 spx_word16_t *exc,          /*decoded excitation*/
 676 spx_word16_t *new_exc,      /*enhanced excitation*/
 677 spx_coef_t *ak,           /*LPC filter coefs*/
 678 int p,               /*LPC order*/
 679 int nsf,             /*sub-frame size*/
 680 int pitch,           /*pitch period*/
 681 int max_pitch,
 682 spx_word16_t  comb_gain,    /*gain of comb filter*/
 683 char *stack
 684 )
 685 {
 686     (void)ak;
 687     (void)p;
 688     (void)stack;
 689    int i;
 690    VARDECL(spx_word16_t *iexc);
 691    spx_word16_t old_ener, new_ener;
 692    int corr_pitch;
 693
 694    spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
 695    spx_word32_t corr0, corr1;
 696    spx_word16_t gain0, gain1;
 697    spx_word16_t pgain1, pgain2;
 698    spx_word16_t c1, c2;
 699    spx_word16_t g1, g2;
 700    spx_word16_t ngain;
 701    spx_word16_t gg1, gg2;
 702 #ifdef FIXED_POINT
 703    int scaledown=0;
 704 #endif
 705 #if 0 /* Set to 1 to enable full pitch search */
 706    int nol_pitch[6];
 707    spx_word16_t nol_pitch_coef[6];
 708    spx_word16_t ol_pitch_coef;
 709    open_loop_nbest_pitch(exc, 20, 120, nsf,
 710                          nol_pitch, nol_pitch_coef, 6, stack);
 711    corr_pitch=nol_pitch[0];
 712    ol_pitch_coef = nol_pitch_coef[0];
 713    /*Try to remove pitch multiples*/
 714    for (i=1;i<6;i++)
 715    {
 716 #ifdef FIXED_POINT
 717       if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
 718 #else
 719       if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
 720 #endif
 721          (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
 722          ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
 723       {
 724          corr_pitch = nol_pitch[i];
 725       }
 726    }
 727 #else
 728    corr_pitch = pitch;
 729 #endif
 730
 731    ALLOC(iexc, 2*nsf, spx_word16_t);
 732
 733    interp_pitch(exc, iexc, corr_pitch, 80);
 734    if (corr_pitch>max_pitch)
 735       interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
 736    else
 737       interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
 738
 739 #ifdef FIXED_POINT
 740    for (i=0;i<nsf;i++)
 741    {
 742       if (ABS16(exc[i])>16383)
 743       {
 744          scaledown = 1;
 745          break;
 746       }
 747    }
 748    if (scaledown)
 749    {
 750       for (i=0;i<nsf;i++)
 751          exc[i] = SHR16(exc[i],1);
 752       for (i=0;i<2*nsf;i++)
 753          iexc[i] = SHR16(iexc[i],1);
 754    }
 755 #endif
 756    /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
 757
 758    /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
 759    iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
 760    iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
 761    exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
 762    corr0  = inner_prod(iexc,exc,nsf);
 763    if (corr0<0)
 764       corr0=0;
 765    corr1 = inner_prod(iexc+nsf,exc,nsf);
 766    if (corr1<0)
 767       corr1=0;
 768 #ifdef FIXED_POINT
 769    /* Doesn't cost much to limit the ratio and it makes the rest easier */
 770    if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
 771       iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
 772    if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
 773       iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
 774 #endif
 775    if (corr0 > MULT16_16(iexc0_mag,exc_mag))
 776       pgain1 = QCONST16(1., 14);
 777    else
 778       pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
 779    if (corr1 > MULT16_16(iexc1_mag,exc_mag))
 780       pgain2 = QCONST16(1., 14);
 781    else
 782       pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
 783    gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
 784    gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
 785    if (comb_gain>0)
 786    {
 787 #ifdef FIXED_POINT
 788       c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
 789       c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
 790 #else
 791       c1 = .4*comb_gain+.07;
 792       c2 = .5+1.72*(c1-.07);
 793 #endif
 794    } else
 795    {
 796       c1=c2=0;
 797    }
 798 #ifdef FIXED_POINT
 799    g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
 800    g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
 801 #else
 802    g1 = 1-c2*pgain1*pgain1;
 803    g2 = 1-c2*pgain2*pgain2;
 804 #endif
 805    if (g1<c1)
 806       g1 = c1;
 807    if (g2<c1)
 808       g2 = c1;
 809    g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
 810    g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
 811    if (corr_pitch>max_pitch)
 812    {
 813       gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
 814       gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
 815    } else {
 816       gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
 817       gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
 818    }
 819    for (i=0;i<nsf;i++)
 820       new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
 821    /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
 822    new_ener = compute_rms16(new_exc, nsf);
 823    old_ener = compute_rms16(exc, nsf);
 824
 825    if (old_ener < 1)
 826       old_ener = 1;
 827    if (new_ener < 1)
 828       new_ener = 1;
 829    if (old_ener > new_ener)
 830       old_ener = new_ener;
 831    ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
 832
 833    for (i=0;i<nsf;i++)
 834       new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
 835 #ifdef FIXED_POINT
 836    if (scaledown)
 837    {
 838       for (i=0;i<nsf;i++)
 839          exc[i] = SHL16(exc[i],1);
 840       for (i=0;i<nsf;i++)
 841          new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
 842    }
 843 #endif
 844 }
 845