apps/codecs/libspeex/ltp.c

   1 /* Copyright (C) 2002-2006 Jean-Marc Valin
   2    File: ltp.c
   3    Long-Term Prediction functions
   4
   5    Redistribution and use in source and binary forms, with or without
   6    modification, are permitted provided that the following conditions
   7    are met:
   8
   9    - Redistributions of source code must retain the above copyright
  10    notice, this list of conditions and the following disclaimer.
  11
  12    - Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15
  16    - Neither the name of the Xiph.org Foundation nor the names of its
  17    contributors may be used to endorse or promote products derived from
  18    this software without specific prior written permission.
  19
  20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
  24    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33 #ifdef HAVE_CONFIG_H
  34 #include "config-speex.h"
  35 #endif
  36
  37 #include <math.h>
  38 #include "ltp.h"
  39 #include "stack_alloc.h"
  40 #include "filters.h"
  41 #include "speex/speex_bits.h"
  42 #include "math_approx.h"
  43 #include "os_support.h"
  44
  45 #ifndef NULL
  46 #define NULL 0
  47 #endif
  48
  49
  50 #ifdef _USE_SSE
  51 #include "ltp_sse.h"
  52 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
  53 #include "ltp_arm4.h"
  54 #elif defined (COLDFIRE_ASM)
  55 #define OVERRIDE_INNER_PROD
  56 #elif defined (BFIN_ASM)
  57 #include "ltp_bfin.h"
  58 #endif
  59
  60 #ifndef OVERRIDE_INNER_PROD
  61 spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
  62 {
  63    spx_word32_t sum=0;
  64    len >>= 2;
  65    while(len--)
  66    {
  67       spx_word32_t part=0;
  68       part = MAC16_16(part,*x++,*y++);
  69       part = MAC16_16(part,*x++,*y++);
  70       part = MAC16_16(part,*x++,*y++);
  71       part = MAC16_16(part,*x++,*y++);
  72       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
  73       sum = ADD32(sum,SHR32(part,6));
  74    }
  75    return sum;
  76 }
  77 #endif
  78
  79 #ifndef SPEEX_DISABLE_ENCODER
  80 #ifndef OVERRIDE_PITCH_XCORR
  81 #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
  82 void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
  83 {
  84    int i,j;
  85    for (i=0;i<nb_pitch;i+=4)
  86    {
  87       /* Compute correlation*/
  88       /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
  89       spx_word32_t sum1=0;
  90       spx_word32_t sum2=0;
  91       spx_word32_t sum3=0;
  92       spx_word32_t sum4=0;
  93       const spx_word16_t *y = _y+i;
  94       const spx_word16_t *x = _x;
  95       spx_word16_t y0, y1, y2, y3;
  96       /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
  97       y0=*y++;
  98       y1=*y++;
  99       y2=*y++;
 100       y3=*y++;
 101       for (j=0;j<len;j+=4)
 102       {
 103          spx_word32_t part1;
 104          spx_word32_t part2;
 105          spx_word32_t part3;
 106          spx_word32_t part4;
 107          part1 = MULT16_16(*x,y0);
 108          part2 = MULT16_16(*x,y1);
 109          part3 = MULT16_16(*x,y2);
 110          part4 = MULT16_16(*x,y3);
 111          x++;
 112          y0=*y++;
 113          part1 = MAC16_16(part1,*x,y1);
 114          part2 = MAC16_16(part2,*x,y2);
 115          part3 = MAC16_16(part3,*x,y3);
 116          part4 = MAC16_16(part4,*x,y0);
 117          x++;
 118          y1=*y++;
 119          part1 = MAC16_16(part1,*x,y2);
 120          part2 = MAC16_16(part2,*x,y3);
 121          part3 = MAC16_16(part3,*x,y0);
 122          part4 = MAC16_16(part4,*x,y1);
 123          x++;
 124          y2=*y++;
 125          part1 = MAC16_16(part1,*x,y3);
 126          part2 = MAC16_16(part2,*x,y0);
 127          part3 = MAC16_16(part3,*x,y1);
 128          part4 = MAC16_16(part4,*x,y2);
 129          x++;
 130          y3=*y++;
 131
 132          sum1 = ADD32(sum1,SHR32(part1,6));
 133          sum2 = ADD32(sum2,SHR32(part2,6));
 134          sum3 = ADD32(sum3,SHR32(part3,6));
 135          sum4 = ADD32(sum4,SHR32(part4,6));
 136       }
 137       corr[nb_pitch-1-i]=sum1;
 138       corr[nb_pitch-2-i]=sum2;
 139       corr[nb_pitch-3-i]=sum3;
 140       corr[nb_pitch-4-i]=sum4;
 141    }
 142
 143 }
 144 #else
 145 void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
 146 {
 147    int i;
 148    for (i=0;i<nb_pitch;i++)
 149    {
 150       /* Compute correlation*/
 151       corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
 152    }
 153
 154 }
 155 #endif
 156 #endif
 157
 158 #ifndef OVERRIDE_COMPUTE_PITCH_ERROR
 159 static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
 160 {
 161    spx_word32_t sum = 0;
 162    sum = ADD32(sum,MULT16_16(MULT16_16_16(g[0],pitch_control),C[0]));
 163    sum = ADD32(sum,MULT16_16(MULT16_16_16(g[1],pitch_control),C[1]));
 164    sum = ADD32(sum,MULT16_16(MULT16_16_16(g[2],pitch_control),C[2]));
 165    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[1]),C[3]));
 166    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[1]),C[4]));
 167    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[0]),C[5]));
 168    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[0]),C[6]));
 169    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[1],g[1]),C[7]));
 170    sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[2]),C[8]));
 171    return sum;
 172 }
 173 #endif
 174
 175 #ifndef OVERRIDE_OPEN_LOOP_NBEST_PITCH
 176 void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
 177 {
 178    int i,j,k;
 179    VARDECL(spx_word32_t *best_score);
 180    VARDECL(spx_word32_t *best_ener);
 181    spx_word32_t e0;
 182    VARDECL(spx_word32_t *corr);
 183 #ifdef FIXED_POINT
 184    /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16)
 185       arrays for (normalized) 16-bit values */
 186    VARDECL(spx_word16_t *corr16);
 187    VARDECL(spx_word16_t *ener16);
 188    spx_word32_t *energy;
 189    int cshift=0, eshift=0;
 190    int scaledown = 0;
 191    ALLOC(corr16, end-start+1, spx_word16_t);
 192    ALLOC(ener16, end-start+1, spx_word16_t);
 193    ALLOC(corr, end-start+1, spx_word32_t);
 194    energy = corr;
 195 #else
 196    /* In floating-point, we need to float arrays and no normalized copies */
 197    VARDECL(spx_word32_t *energy);
 198    spx_word16_t *corr16;
 199    spx_word16_t *ener16;
 200    ALLOC(energy, end-start+2, spx_word32_t);
 201    ALLOC(corr, end-start+1, spx_word32_t);
 202    corr16 = corr;
 203    ener16 = energy;
 204 #endif
 205
 206    ALLOC(best_score, N, spx_word32_t);
 207    ALLOC(best_ener, N, spx_word32_t);
 208    for (i=0;i<N;i++)
 209    {
 210         best_score[i]=-1;
 211         best_ener[i]=0;
 212         pitch[i]=start;
 213    }
 214
 215 #ifdef FIXED_POINT
 216    for (i=-end;i<len;i++)
 217    {
 218       if (ABS16(sw[i])>16383)
 219       {
 220          scaledown=1;
 221          break;
 222       }
 223    }
 224    /* If the weighted input is close to saturation, then we scale it down */
 225    if (scaledown)
 226    {
 227       for (i=-end;i<len;i++)
 228       {
 229          sw[i]=SHR16(sw[i],1);
 230       }
 231    }
 232 #endif
 233    energy[0]=inner_prod(sw-start, sw-start, len);
 234    e0=inner_prod(sw, sw, len);
 235    for (i=start;i<end;i++)
 236    {
 237       /* Update energy for next pitch*/
 238       energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6));
 239       if (energy[i-start+1] < 0)
 240          energy[i-start+1] = 0;
 241    }
 242
 243 #ifdef FIXED_POINT
 244    eshift = normalize16(energy, ener16, 32766, end-start+1);
 245 #endif
 246
 247    /* In fixed-point, this actually overrites the energy array (aliased to corr) */
 248    pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
 249
 250 #ifdef FIXED_POINT
 251    /* Normalize to 180 so we can square it and it still fits in 16 bits */
 252    cshift = normalize16(corr, corr16, 180, end-start+1);
 253    /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
 254    if (scaledown)
 255    {
 256       for (i=-end;i<len;i++)
 257       {
 258          sw[i]=SHL16(sw[i],1);
 259       }
 260    }
 261 #endif
 262
 263    /* Search for the best pitch prediction gain */
 264    for (i=start;i<=end;i++)
 265    {
 266       spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
 267       /* Instead of dividing the tmp by the energy, we multiply on the other side */
 268       if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
 269       {
 270          /* We can safely put it last and then check */
 271          best_score[N-1]=tmp;
 272          best_ener[N-1]=ener16[i-start]+1;
 273          pitch[N-1]=i;
 274          /* Check if it comes in front of others */
 275          for (j=0;j<N-1;j++)
 276          {
 277             if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
 278             {
 279                for (k=N-1;k>j;k--)
 280                {
 281                   best_score[k]=best_score[k-1];
 282                   best_ener[k]=best_ener[k-1];
 283                   pitch[k]=pitch[k-1];
 284                }
 285                best_score[j]=tmp;
 286                best_ener[j]=ener16[i-start]+1;
 287                pitch[j]=i;
 288                break;
 289             }
 290          }
 291       }
 292    }
 293
 294    /* Compute open-loop gain if necessary */
 295    if (gain)
 296    {
 297       for (j=0;j<N;j++)
 298       {
 299          spx_word16_t g;
 300          i=pitch[j];
 301          g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
 302          /* FIXME: g = max(g,corr/energy) */
 303          if (g<0)
 304             g = 0;
 305          gain[j]=g;
 306       }
 307    }
 308
 309
 310 }
 311 #endif
 312
 313 #ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
 314 static int pitch_gain_search_3tap_vq(
 315   const signed char *gain_cdbk,
 316   int                gain_cdbk_size,
 317   spx_word16_t      *C16,
 318   spx_word16_t       max_gain
 319 )
 320 {
 321   const signed char *ptr=gain_cdbk;
 322   int                best_cdbk=0;
 323   spx_word32_t       best_sum=-VERY_LARGE32;
 324   spx_word32_t       sum=0;
 325   spx_word16_t       g[3];
 326   spx_word16_t       pitch_control=64;
 327   spx_word16_t       gain_sum;
 328   int                i;
 329
 330   for (i=0;i<gain_cdbk_size;i++) {
 331
 332     ptr = gain_cdbk+4*i;
 333     g[0]=ADD16((spx_word16_t)ptr[0],32);
 334     g[1]=ADD16((spx_word16_t)ptr[1],32);
 335     g[2]=ADD16((spx_word16_t)ptr[2],32);
 336     gain_sum = (spx_word16_t)ptr[3];
 337
 338     sum = compute_pitch_error(C16, g, pitch_control);
 339
 340     if (sum>best_sum && gain_sum<=max_gain) {
 341       best_sum=sum;
 342       best_cdbk=i;
 343     }
 344   }
 345
 346   return best_cdbk;
 347 }
 348 #endif
 349
 350 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
 351 static spx_word32_t pitch_gain_search_3tap(
 352 const spx_word16_t target[],       /* Target vector */
 353 const spx_coef_t ak[],          /* LPCs for this subframe */
 354 const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
 355 const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
 356 spx_sig_t exc[],                /* Excitation */
 357 const signed char *gain_cdbk,
 358 int gain_cdbk_size,
 359 int   pitch,                    /* Pitch value */
 360 int   p,                        /* Number of LPC coeffs */
 361 int   nsf,                      /* Number of samples in subframe */
 362 SpeexBits *bits,
 363 char *stack,
 364 const spx_word16_t *exc2,
 365 const spx_word16_t *r,
 366 spx_word16_t *new_target,
 367 int  *cdbk_index,
 368 int plc_tuning,
 369 spx_word32_t cumul_gain,
 370 int scaledown
 371 )
 372 {
 373    int i,j;
 374    VARDECL(spx_word16_t *tmp1);
 375    VARDECL(spx_word16_t *e);
 376    spx_word16_t *x[3];
 377    spx_word32_t corr[3];
 378    spx_word32_t A[3][3];
 379    spx_word16_t gain[3];
 380    spx_word32_t err;
 381    spx_word16_t max_gain=128;
 382    int          best_cdbk=0;
 383
 384    ALLOC(tmp1, 3*nsf, spx_word16_t);
 385    ALLOC(e, nsf, spx_word16_t);
 386
 387    if (cumul_gain > 262144)
 388       max_gain = 31;
 389
 390    x[0]=tmp1;
 391    x[1]=tmp1+nsf;
 392    x[2]=tmp1+2*nsf;
 393
 394    for (j=0;j<nsf;j++)
 395       new_target[j] = target[j];
 396
 397    {
 398       VARDECL(spx_mem_t *mm);
 399       int pp=pitch-1;
 400       ALLOC(mm, p, spx_mem_t);
 401       for (j=0;j<nsf;j++)
 402       {
 403          if (j-pp<0)
 404             e[j]=exc2[j-pp];
 405          else if (j-pp-pitch<0)
 406             e[j]=exc2[j-pp-pitch];
 407          else
 408             e[j]=0;
 409       }
 410 #ifdef FIXED_POINT
 411       /* Scale target and excitation down if needed (avoiding overflow) */
 412       if (scaledown)
 413       {
 414          for (j=0;j<nsf;j++)
 415             e[j] = SHR16(e[j],1);
 416          for (j=0;j<nsf;j++)
 417             new_target[j] = SHR16(new_target[j],1);
 418       }
 419 #endif
 420       for (j=0;j<p;j++)
 421          mm[j] = 0;
 422       iir_mem16(e, ak, e, nsf, p, mm, stack);
 423       for (j=0;j<p;j++)
 424          mm[j] = 0;
 425       filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
 426       for (j=0;j<nsf;j++)
 427          x[2][j] = e[j];
 428    }
 429    for (i=1;i>=0;i--)
 430    {
 431       spx_word16_t e0=exc2[-pitch-1+i];
 432 #ifdef FIXED_POINT
 433       /* Scale excitation down if needed (avoiding overflow) */
 434       if (scaledown)
 435          e0 = SHR16(e0,1);
 436 #endif
 437       x[i][0]=MULT16_16_Q14(r[0], e0);
 438       for (j=0;j<nsf-1;j++)
 439          x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0));
 440    }
 441
 442    for (i=0;i<3;i++)
 443       corr[i]=inner_prod(x[i],new_target,nsf);
 444    for (i=0;i<3;i++)
 445       for (j=0;j<=i;j++)
 446          A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
 447
 448    {
 449       spx_word32_t C[9];
 450 #ifdef FIXED_POINT
 451       spx_word16_t C16[9];
 452 #else
 453       spx_word16_t *C16=C;
 454 #endif
 455       C[0]=corr[2];
 456       C[1]=corr[1];
 457       C[2]=corr[0];
 458       C[3]=A[1][2];
 459       C[4]=A[0][1];
 460       C[5]=A[0][2];
 461       C[6]=A[2][2];
 462       C[7]=A[1][1];
 463       C[8]=A[0][0];
 464
 465       /*plc_tuning *= 2;*/
 466       if (plc_tuning<2)
 467          plc_tuning=2;
 468       if (plc_tuning>30)
 469          plc_tuning=30;
 470 #ifdef FIXED_POINT
 471       C[0] = SHL32(C[0],1);
 472       C[1] = SHL32(C[1],1);
 473       C[2] = SHL32(C[2],1);
 474       C[3] = SHL32(C[3],1);
 475       C[4] = SHL32(C[4],1);
 476       C[5] = SHL32(C[5],1);
 477       C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]);
 478       C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]);
 479       C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]);
 480       normalize16(C, C16, 32767, 9);
 481 #else
 482       C[6]*=.5*(1+.02*plc_tuning);
 483       C[7]*=.5*(1+.02*plc_tuning);
 484       C[8]*=.5*(1+.02*plc_tuning);
 485 #endif
 486
 487       best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);
 488
 489 #ifdef FIXED_POINT
 490       gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]);
 491       gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]);
 492       gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]);
 493       /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
 494 #else
 495       gain[0] = 0.015625*gain_cdbk[best_cdbk*4]  + .5;
 496       gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5;
 497       gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5;
 498 #endif
 499       *cdbk_index=best_cdbk;
 500    }
 501
 502    SPEEX_MEMSET(exc, 0, nsf);
 503    for (i=0;i<3;i++)
 504    {
 505       int j;
 506       int tmp1, tmp3;
 507       int pp=pitch+1-i;
 508       tmp1=nsf;
 509       if (tmp1>pp)
 510          tmp1=pp;
 511       for (j=0;j<tmp1;j++)
 512          exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp]);
 513       tmp3=nsf;
 514       if (tmp3>pp+pitch)
 515          tmp3=pp+pitch;
 516       for (j=tmp1;j<tmp3;j++)
 517          exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp-pitch]);
 518    }
 519    for (i=0;i<nsf;i++)
 520    {
 521       spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])),
 522                             MULT16_16(gain[2],x[0][i]));
 523       new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp,6)));
 524    }
 525    err = inner_prod(new_target, new_target, nsf);
 526
 527    return err;
 528 }
 529
 530 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
 531 int pitch_search_3tap(
 532 spx_word16_t target[],                 /* Target vector */
 533 spx_word16_t *sw,
 534 spx_coef_t ak[],                     /* LPCs for this subframe */
 535 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 536 spx_coef_t awk2[],                   /* Weighted LPCs #2 for this subframe */
 537 spx_sig_t exc[],                    /* Excitation */
 538 const void *par,
 539 int   start,                    /* Smallest pitch value allowed */
 540 int   end,                      /* Largest pitch value allowed */
 541 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
 542 int   p,                        /* Number of LPC coeffs */
 543 int   nsf,                      /* Number of samples in subframe */
 544 SpeexBits *bits,
 545 char *stack,
 546 spx_word16_t *exc2,
 547 spx_word16_t *r,
 548 int complexity,
 549 int cdbk_offset,
 550 int plc_tuning,
 551 spx_word32_t *cumul_gain
 552 )
 553 {
 554    int i;
 555    int cdbk_index, pitch=0, best_gain_index=0;
 556    VARDECL(spx_sig_t *best_exc);
 557    VARDECL(spx_word16_t *new_target);
 558    VARDECL(spx_word16_t *best_target);
 559    int best_pitch=0;
 560    spx_word32_t err, best_err=-1;
 561    int N;
 562    const ltp_params *params;
 563    const signed char *gain_cdbk;
 564    int   gain_cdbk_size;
 565    int scaledown=0;
 566
 567    VARDECL(int *nbest);
 568
 569    params = (const ltp_params*) par;
 570    gain_cdbk_size = 1<<params->gain_bits;
 571    gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
 572
 573    N=complexity;
 574    if (N>10)
 575       N=10;
 576    if (N<1)
 577       N=1;
 578
 579    ALLOC(nbest, N, int);
 580    params = (const ltp_params*) par;
 581
 582    if (end<start)
 583    {
 584       speex_bits_pack(bits, 0, params->pitch_bits);
 585       speex_bits_pack(bits, 0, params->gain_bits);
 586       SPEEX_MEMSET(exc, 0, nsf);
 587       return start;
 588    }
 589
 590 #ifdef FIXED_POINT
 591    /* Check if we need to scale everything down in the pitch search to avoid overflows */
 592    for (i=0;i<nsf;i++)
 593    {
 594       if (ABS16(target[i])>16383)
 595       {
 596          scaledown=1;
 597          break;
 598       }
 599    }
 600    for (i=-end;i<nsf;i++)
 601    {
 602       if (ABS16(exc2[i])>16383)
 603       {
 604          scaledown=1;
 605          break;
 606       }
 607    }
 608 #endif
 609    if (N>end-start+1)
 610       N=end-start+1;
 611    if (end != start)
 612       open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack);
 613    else
 614       nbest[0] = start;
 615
 616    ALLOC(best_exc, nsf, spx_sig_t);
 617    ALLOC(new_target, nsf, spx_word16_t);
 618    ALLOC(best_target, nsf, spx_word16_t);
 619
 620    for (i=0;i<N;i++)
 621    {
 622       pitch=nbest[i];
 623       SPEEX_MEMSET(exc, 0, nsf);
 624       err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf,
 625                                  bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown);
 626       if (err<best_err || best_err<0)
 627       {
 628          SPEEX_COPY(best_exc, exc, nsf);
 629          SPEEX_COPY(best_target, new_target, nsf);
 630          best_err=err;
 631          best_pitch=pitch;
 632          best_gain_index=cdbk_index;
 633       }
 634    }
 635    /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/
 636    speex_bits_pack(bits, best_pitch-start, params->pitch_bits);
 637    speex_bits_pack(bits, best_gain_index, params->gain_bits);
 638 #ifdef FIXED_POINT
 639    *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain));
 640 #else
 641    *cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3];
 642 #endif
 643    /*printf ("%f\n", cumul_gain);*/
 644    /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/
 645    SPEEX_COPY(exc, best_exc, nsf);
 646    SPEEX_COPY(target, best_target, nsf);
 647 #ifdef FIXED_POINT
 648    /* Scale target back up if needed */
 649    if (scaledown)
 650    {
 651       for (i=0;i<nsf;i++)
 652          target[i]=SHL16(target[i],1);
 653    }
 654 #endif
 655    return pitch;
 656 }
 657 #endif /* SPEEX_DISABLE_ENCODER */
 658
 659 void pitch_unquant_3tap(
 660 spx_word16_t exc[],             /* Input excitation */
 661 spx_word32_t exc_out[],         /* Output excitation */
 662 int   start,                    /* Smallest pitch value allowed */
 663 int   end,                      /* Largest pitch value allowed */
 664 spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 665 const void *par,
 666 int   nsf,                      /* Number of samples in subframe */
 667 int *pitch_val,
 668 spx_word16_t *gain_val,
 669 SpeexBits *bits,
 670 char *stack,
 671 int count_lost,
 672 int subframe_offset,
 673 spx_word16_t last_pitch_gain,
 674 int cdbk_offset
 675 )
 676 {
 677     (void)end;
 678     (void)pitch_coef;
 679     (void)stack;
 680    int i;
 681    int pitch;
 682    int gain_index;
 683    spx_word16_t gain[3];
 684    const signed char *gain_cdbk;
 685    int gain_cdbk_size;
 686    const ltp_params *params;
 687
 688    params = (const ltp_params*) par;
 689    gain_cdbk_size = 1<<params->gain_bits;
 690    gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
 691
 692    pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
 693    pitch += start;
 694    gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
 695    /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
 696 #ifdef FIXED_POINT
 697    gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]);
 698    gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]);
 699    gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]);
 700 #else
 701    gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5;
 702    gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5;
 703    gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5;
 704 #endif
 705
 706    if (count_lost && pitch > subframe_offset)
 707    {
 708       spx_word16_t gain_sum;
 709       if (1) {
 710 #ifdef FIXED_POINT
 711          spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain,1);
 712          if (tmp>62)
 713             tmp=62;
 714 #else
 715          spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain;
 716          if (tmp>.95)
 717             tmp=.95;
 718 #endif
 719          gain_sum = gain_3tap_to_1tap(gain);
 720
 721          if (gain_sum > tmp)
 722          {
 723             spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp),14),gain_sum);
 724             for (i=0;i<3;i++)
 725                gain[i]=MULT16_16_Q14(fact,gain[i]);
 726          }
 727
 728       }
 729
 730    }
 731
 732    *pitch_val = pitch;
 733    gain_val[0]=gain[0];
 734    gain_val[1]=gain[1];
 735    gain_val[2]=gain[2];
 736    gain[0] = SHL16(gain[0],7);
 737    gain[1] = SHL16(gain[1],7);
 738    gain[2] = SHL16(gain[2],7);
 739    SPEEX_MEMSET(exc_out, 0, nsf);
 740    for (i=0;i<3;i++)
 741    {
 742       int j;
 743       int tmp1, tmp3;
 744       int pp=pitch+1-i;
 745       tmp1=nsf;
 746       if (tmp1>pp)
 747          tmp1=pp;
 748       for (j=0;j<tmp1;j++)
 749          exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp]);
 750       tmp3=nsf;
 751       if (tmp3>pp+pitch)
 752          tmp3=pp+pitch;
 753       for (j=tmp1;j<tmp3;j++)
 754          exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp-pitch]);
 755    }
 756    /*for (i=0;i<nsf;i++)
 757    exc[i]=PSHR32(exc32[i],13);*/
 758 }
 759
 760
 761 #ifndef SPEEX_DISABLE_ENCODER
 762 /** Forced pitch delay and gain */
 763 int forced_pitch_quant(
 764 spx_word16_t target[],                 /* Target vector */
 765 spx_word16_t *sw,
 766 spx_coef_t ak[],                     /* LPCs for this subframe */
 767 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 768 spx_coef_t awk2[],                   /* Weighted LPCs #2 for this subframe */
 769 spx_sig_t exc[],                    /* Excitation */
 770 const void *par,
 771 int   start,                    /* Smallest pitch value allowed */
 772 int   end,                      /* Largest pitch value allowed */
 773 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
 774 int   p,                        /* Number of LPC coeffs */
 775 int   nsf,                      /* Number of samples in subframe */
 776 SpeexBits *bits,
 777 char *stack,
 778 spx_word16_t *exc2,
 779 spx_word16_t *r,
 780 int complexity,
 781 int cdbk_offset,
 782 int plc_tuning,
 783 spx_word32_t *cumul_gain
 784 )
 785 {
 786    int i;
 787    VARDECL(spx_word16_t *res);
 788    ALLOC(res, nsf, spx_word16_t);
 789 #ifdef FIXED_POINT
 790    if (pitch_coef>63)
 791       pitch_coef=63;
 792 #else
 793    if (pitch_coef>.99)
 794       pitch_coef=.99;
 795 #endif
 796    for (i=0;i<nsf&&i<start;i++)
 797    {
 798       exc[i]=MULT16_16(SHL16(pitch_coef, 7),exc2[i-start]);
 799    }
 800    for (;i<nsf;i++)
 801    {
 802       exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]);
 803    }
 804    for (i=0;i<nsf;i++)
 805       res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT-1));
 806    syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack);
 807    for (i=0;i<nsf;i++)
 808       target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),EXTEND32(res[i])),32700));
 809    return start;
 810 }
 811 #endif /* SPEEX_DISABLE_ENCODER */
 812
 813 /** Unquantize forced pitch delay and gain */
 814 void forced_pitch_unquant(
 815 spx_word16_t exc[],             /* Input excitation */
 816 spx_word32_t exc_out[],         /* Output excitation */
 817 int   start,                    /* Smallest pitch value allowed */
 818 int   end,                      /* Largest pitch value allowed */
 819 spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 820 const void *par,
 821 int   nsf,                      /* Number of samples in subframe */
 822 int *pitch_val,
 823 spx_word16_t *gain_val,
 824 SpeexBits *bits,
 825 char *stack,
 826 int count_lost,
 827 int subframe_offset,
 828 spx_word16_t last_pitch_gain,
 829 int cdbk_offset
 830 )
 831 {
 832     (void)end;
 833     (void)par;
 834     (void)bits;
 835     (void)stack;
 836     (void)count_lost;
 837     (void)subframe_offset;
 838     (void)last_pitch_gain;
 839     (void)cdbk_offset;
 840    int i;
 841 #ifdef FIXED_POINT
 842    if (pitch_coef>63)
 843       pitch_coef=63;
 844 #else
 845    if (pitch_coef>.99)
 846       pitch_coef=.99;
 847 #endif
 848    for (i=0;i<nsf;i++)
 849    {
 850       exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7));
 851       exc[i] = EXTRACT16(PSHR32(exc_out[i],13));
 852    }
 853    *pitch_val = start;
 854    gain_val[0]=gain_val[2]=0;
 855    gain_val[1] = pitch_coef;
 856 }