media/libopus/celt/celt_lpc.c

   1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
   2    Written by Jean-Marc Valin */
   3 /*
   4    Redistribution and use in source and binary forms, with or without
   5    modification, are permitted provided that the following conditions
   6    are met:
   7
   8    - Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10
  11    - Redistributions in binary form must reproduce the above copyright
  12    notice, this list of conditions and the following disclaimer in the
  13    documentation and/or other materials provided with the distribution.
  14
  15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
  19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 */
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include "celt_lpc.h"
  33 #include "stack_alloc.h"
  34 #include "mathops.h"
  35 #include "pitch.h"
  36
  37 void _celt_lpc(
  38       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
  39 const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
  40 int          p
  41 )
  42 {
  43    int i, j;
  44    opus_val32 r;
  45    opus_val32 error = ac[0];
  46 #ifdef FIXED_POINT
  47    opus_val32 lpc[LPC_ORDER];
  48 #else
  49    float *lpc = _lpc;
  50 #endif
  51
  52    OPUS_CLEAR(lpc, p);
  53 #ifdef FIXED_POINT
  54    if (ac[0] != 0)
  55 #else
  56    if (ac[0] > 1e-10f)
  57 #endif
  58    {
  59       for (i = 0; i < p; i++) {
  60          /* Sum up this iteration's reflection coefficient */
  61          opus_val32 rr = 0;
  62          for (j = 0; j < i; j++)
  63             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
  64          rr += SHR32(ac[i + 1],6);
  65          r = -frac_div32(SHL32(rr,6), error);
  66          /*  Update LPC coefficients and total error */
  67          lpc[i] = SHR32(r,6);
  68          for (j = 0; j < (i+1)>>1; j++)
  69          {
  70             opus_val32 tmp1, tmp2;
  71             tmp1 = lpc[j];
  72             tmp2 = lpc[i-1-j];
  73             lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
  74             lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
  75          }
  76
  77          error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
  78          /* Bail out once we get 30 dB gain */
  79 #ifdef FIXED_POINT
  80          if (error<=SHR32(ac[0],10))
  81             break;
  82 #else
  83          if (error<=.001f*ac[0])
  84             break;
  85 #endif
  86       }
  87    }
  88 #ifdef FIXED_POINT
  89    {
  90       /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
  91          This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
  92          fixes should also be applied there. */
  93       int iter, idx = 0;
  94       opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
  95
  96       for (iter = 0; iter < 10; iter++) {
  97          maxabs = 0;
  98          for (i = 0; i < p; i++) {
  99             absval = ABS32(lpc[i]);
 100             if (absval > maxabs) {
 101                maxabs = absval;
 102                idx = i;
 103             }
 104          }
 105          maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
 106
 107          if (maxabs > 32767) {
 108             maxabs = MIN32(maxabs, 163838);
 109             chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
 110                                                     SHR32(MULT32_32_32(maxabs, idx + 1), 2));
 111             chirp_minus_one_Q16 = chirp_Q16 - 65536;
 112
 113             /* Apply bandwidth expansion. */
 114             for (i = 0; i < p - 1; i++) {
 115                lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
 116                chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
 117             }
 118             lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
 119          } else {
 120             break;
 121          }
 122       }
 123
 124       if (iter == 10) {
 125          /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
 126             fall back to the A(z)=1 filter. */
 127          OPUS_CLEAR(lpc, p);
 128          _lpc[0] = 4096;  /* Q12 */
 129       } else {
 130          for (i = 0; i < p; i++) {
 131             _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
 132          }
 133       }
 134    }
 135 #endif
 136 }
 137
 138
 139 void celt_fir_c(
 140          const opus_val16 *x,
 141          const opus_val16 *num,
 142          opus_val16 *y,
 143          int N,
 144          int ord,
 145          int arch)
 146 {
 147    int i,j;
 148    VARDECL(opus_val16, rnum);
 149    SAVE_STACK;
 150    celt_assert(x != y);
 151    ALLOC(rnum, ord, opus_val16);
 152    for(i=0;i<ord;i++)
 153       rnum[i] = num[ord-i-1];
 154    for (i=0;i<N-3;i+=4)
 155    {
 156       opus_val32 sum[4];
 157       sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
 158       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
 159       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
 160       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
 161       xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
 162       y[i  ] = SROUND16(sum[0], SIG_SHIFT);
 163       y[i+1] = SROUND16(sum[1], SIG_SHIFT);
 164       y[i+2] = SROUND16(sum[2], SIG_SHIFT);
 165       y[i+3] = SROUND16(sum[3], SIG_SHIFT);
 166    }
 167    for (;i<N;i++)
 168    {
 169       opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
 170       for (j=0;j<ord;j++)
 171          sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
 172       y[i] = SROUND16(sum, SIG_SHIFT);
 173    }
 174    RESTORE_STACK;
 175 }
 176
 177 void celt_iir(const opus_val32 *_x,
 178          const opus_val16 *den,
 179          opus_val32 *_y,
 180          int N,
 181          int ord,
 182          opus_val16 *mem,
 183          int arch)
 184 {
 185 #ifdef SMALL_FOOTPRINT
 186    int i,j;
 187    (void)arch;
 188    for (i=0;i<N;i++)
 189    {
 190       opus_val32 sum = _x[i];
 191       for (j=0;j<ord;j++)
 192       {
 193          sum -= MULT16_16(den[j],mem[j]);
 194       }
 195       for (j=ord-1;j>=1;j--)
 196       {
 197          mem[j]=mem[j-1];
 198       }
 199       mem[0] = SROUND16(sum, SIG_SHIFT);
 200       _y[i] = sum;
 201    }
 202 #else
 203    int i,j;
 204    VARDECL(opus_val16, rden);
 205    VARDECL(opus_val16, y);
 206    SAVE_STACK;
 207
 208    celt_assert((ord&3)==0);
 209    ALLOC(rden, ord, opus_val16);
 210    ALLOC(y, N+ord, opus_val16);
 211    for(i=0;i<ord;i++)
 212       rden[i] = den[ord-i-1];
 213    for(i=0;i<ord;i++)
 214       y[i] = -mem[ord-i-1];
 215    for(;i<N+ord;i++)
 216       y[i]=0;
 217    for (i=0;i<N-3;i+=4)
 218    {
 219       /* Unroll by 4 as if it were an FIR filter */
 220       opus_val32 sum[4];
 221       sum[0]=_x[i];
 222       sum[1]=_x[i+1];
 223       sum[2]=_x[i+2];
 224       sum[3]=_x[i+3];
 225       xcorr_kernel(rden, y+i, sum, ord, arch);
 226
 227       /* Patch up the result to compensate for the fact that this is an IIR */
 228       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
 229       _y[i  ] = sum[0];
 230       sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
 231       y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
 232       _y[i+1] = sum[1];
 233       sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
 234       sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
 235       y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
 236       _y[i+2] = sum[2];
 237
 238       sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
 239       sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
 240       sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
 241       y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
 242       _y[i+3] = sum[3];
 243    }
 244    for (;i<N;i++)
 245    {
 246       opus_val32 sum = _x[i];
 247       for (j=0;j<ord;j++)
 248          sum -= MULT16_16(rden[j],y[i+j]);
 249       y[i+ord] = SROUND16(sum,SIG_SHIFT);
 250       _y[i] = sum;
 251    }
 252    for(i=0;i<ord;i++)
 253       mem[i] = _y[N-i-1];
 254    RESTORE_STACK;
 255 #endif
 256 }
 257
 258 int _celt_autocorr(
 259                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
 260                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
 261                    const opus_val16       *window,
 262                    int          overlap,
 263                    int          lag,
 264                    int          n,
 265                    int          arch
 266                   )
 267 {
 268    opus_val32 d;
 269    int i, k;
 270    int fastN=n-lag;
 271    int shift;
 272    const opus_val16 *xptr;
 273    VARDECL(opus_val16, xx);
 274    SAVE_STACK;
 275    ALLOC(xx, n, opus_val16);
 276    celt_assert(n>0);
 277    celt_assert(overlap>=0);
 278    if (overlap == 0)
 279    {
 280       xptr = x;
 281    } else {
 282       for (i=0;i<n;i++)
 283          xx[i] = x[i];
 284       for (i=0;i<overlap;i++)
 285       {
 286          xx[i] = MULT16_16_Q15(x[i],window[i]);
 287          xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
 288       }
 289       xptr = xx;
 290    }
 291    shift=0;
 292 #ifdef FIXED_POINT
 293    {
 294       opus_val32 ac0;
 295       ac0 = 1+(n<<7);
 296       if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
 297       for(i=(n&1);i<n;i+=2)
 298       {
 299          ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
 300          ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
 301       }
 302
 303       shift = celt_ilog2(ac0)-30+10;
 304       shift = (shift)/2;
 305       if (shift>0)
 306       {
 307          for(i=0;i<n;i++)
 308             xx[i] = PSHR32(xptr[i], shift);
 309          xptr = xx;
 310       } else
 311          shift = 0;
 312    }
 313 #endif
 314    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
 315    for (k=0;k<=lag;k++)
 316    {
 317       for (i = k+fastN, d = 0; i < n; i++)
 318          d = MAC16_16(d, xptr[i], xptr[i-k]);
 319       ac[k] += d;
 320    }
 321 #ifdef FIXED_POINT
 322    shift = 2*shift;
 323    if (shift<=0)
 324       ac[0] += SHL32((opus_int32)1, -shift);
 325    if (ac[0] < 268435456)
 326    {
 327       int shift2 = 29 - EC_ILOG(ac[0]);
 328       for (i=0;i<=lag;i++)
 329          ac[i] = SHL32(ac[i], shift2);
 330       shift -= shift2;
 331    } else if (ac[0] >= 536870912)
 332    {
 333       int shift2=1;
 334       if (ac[0] >= 1073741824)
 335          shift2++;
 336       for (i=0;i<=lag;i++)
 337          ac[i] = SHR32(ac[i], shift2);
 338       shift += shift2;
 339    }
 340 #endif
 341
 342    RESTORE_STACK;
 343    return shift;
 344 }