toolame-02l/psycho_1.c

   1 #include <stdio.h>
   2 #include <math.h>
   3 #include "common.h"
   4 #include "encoder.h"
   5 #include "mem.h"
   6 #include "fft.h"
   7 #include "psycho_1.h"
   8 #include "psycho_1_priv.h"
   9
  10 #define DBTAB 1000
  11 double dbtable[DBTAB];
  12
  13 /**********************************************************************
  14
  15         This module implements the psychoacoustic model I for the
  16  MPEG encoder layer II. It uses simplified tonal and noise masking
  17  threshold analysis to generate SMR for the encoder bit allocation
  18  routine.
  19
  20 **********************************************************************/
  21
  22 void psycho_1 (short buffer[2][1152], double scale[2][SBLIMIT],
  23                double ltmin[2][SBLIMIT], frame_info * frame)
  24 {
  25   frame_header *header = frame->header;
  26   int nch = frame->nch;
  27   int sblimit = frame->sblimit;
  28   int k, i, tone = 0, noise = 0;
  29   static char init = 0;
  30   static int off[2] = { 256, 256 };
  31   double sample[FFT_SIZE];
  32   double spike[2][SBLIMIT];
  33   static D1408 *fft_buf;
  34   static mask_ptr power;
  35   static g_ptr ltg;
  36   FLOAT energy[FFT_SIZE];
  37
  38   /* call functions for critical boundaries, freq. */
  39   if (!init) {                  /* bands, bark values, and mapping */
  40     fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
  41     power = (mask_ptr) mem_alloc (sizeof (mask) * HAN_SIZE, "power");
  42     if (header->version == MPEG_AUDIO_ID) {
  43       psycho_1_read_cbound (header->lay, header->sampling_frequency);
  44       psycho_1_read_freq_band (&ltg, header->lay, header->sampling_frequency);
  45     } else {
  46       psycho_1_read_cbound (header->lay, header->sampling_frequency + 4);
  47       psycho_1_read_freq_band (&ltg, header->lay, header->sampling_frequency + 4);
  48     }
  49     psycho_1_make_map (power, ltg);
  50     for (i = 0; i < 1408; i++)
  51       fft_buf[0][i] = fft_buf[1][i] = 0;
  52
  53     psycho_1_init_add_db ();            /* create the add_db table */
  54
  55     init = 1;
  56   }
  57   for (k = 0; k < nch; k++) {
  58     /* check pcm input for 3 blocks of 384 samples */
  59     /* sami's speedup, added in 02j
  60        saves about 4% overall during an encode */
  61     int ok = off[k] % 1408;
  62     for (i = 0; i < 1152; i++) {
  63       fft_buf[k][ok++] = (double) buffer[k][i] / SCALE;
  64       if (ok >= 1408)
  65         ok = 0;
  66     }
  67     ok = (off[k] + 1216) % 1408;
  68     for (i = 0; i < FFT_SIZE; i++) {
  69       sample[i] = fft_buf[k][ok++];
  70       if (ok >= 1408)
  71         ok = 0;
  72     }
  73     off[k] += 1152;
  74     off[k] %= 1408;
  75
  76     psycho_1_hann_fft_pickmax (sample, power, &spike[k][0], energy);
  77     psycho_1_tonal_label (power, &tone);
  78     psycho_1_noise_label (power, &noise, ltg, energy);
  79     //psycho_1_dump(power, &tone, &noise) ;
  80     psycho_1_subsampling (power, ltg, &tone, &noise);
  81     psycho_1_threshold (power, ltg, &tone, &noise,
  82                bitrate[header->version][header->bitrate_index] / nch);
  83     psycho_1_minimum_mask (ltg, &ltmin[k][0], sblimit);
  84     psycho_1_smr (&ltmin[k][0], &spike[k][0], &scale[k][0], sblimit);
  85   }
  86
  87 }
  88
  89
  90 int crit_band;
  91 int *cbound;
  92 int sub_size;
  93
  94 void psycho_1_read_cbound (int lay, int freq)
  95 /* this function reads in critical  band boundaries */
  96 {
  97
  98 #include "critband.h"
  99   //static const int FirstCriticalBand[7][27] = {...
 100
 101   int i, k;
 102
 103   if ((lay < 1) || (lay > 2)) {
 104     printf ("Internal error (read_cbound())\n");
 105     return;
 106   }
 107   if ((freq < 0) || (freq > 6) || (freq == 3)) {
 108     printf ("Internal error (read_cbound())\n");
 109     return;
 110   }
 111
 112   crit_band = SecondCriticalBand[freq][0];
 113   cbound = (int *) mem_alloc (sizeof (int) * crit_band, "cbound");
 114   for (i = 0; i < crit_band; i++) {
 115     k = SecondCriticalBand[freq][i + 1];
 116     if (k != 0) {
 117       cbound[i] = k;
 118     } else {
 119       printf ("Internal error (read_cbound())\n");
 120       return;
 121     }
 122   }
 123 }
 124
 125 void psycho_1_read_freq_band (ltg, lay, freq)   /* this function reads in   */
 126      int lay, freq;             /* frequency bands and bark */
 127      g_ptr *ltg;                /* values                   */
 128 {
 129
 130 #include "freqtable.h"
 131
 132   int i, k;
 133
 134   if ((freq < 0) || (freq > 6) || (freq == 3)) {
 135     printf ("Internal error (read_freq_band())\n");
 136     return;
 137   }
 138
 139   /* read input for freq. subbands */
 140
 141   sub_size = SecondFreqEntries[freq] + 1;
 142   *ltg = (g_ptr) mem_alloc (sizeof (g_thres) * sub_size, "ltg");
 143   (*ltg)[0].line = 0;           /* initialize global masking threshold */
 144   (*ltg)[0].bark = 0.0;
 145   (*ltg)[0].hear = 0.0;
 146   for (i = 1; i < sub_size; i++) {
 147     k = SecondFreqSubband[freq][i - 1].line;
 148     if (k != 0) {
 149       (*ltg)[i].line = k;
 150       (*ltg)[i].bark = SecondFreqSubband[freq][i - 1].bark;
 151       (*ltg)[i].hear = SecondFreqSubband[freq][i - 1].hear;
 152     } else {
 153       printf ("Internal error (read_freq_band())\n");
 154         return;
 155     }
 156   }
 157 }
 158
 159
 160 void psycho_1_make_map (mask power[HAN_SIZE], g_thres * ltg)
 161 /* this function calculates the global masking threshold */
 162 {
 163   int i, j;
 164
 165   for (i = 1; i < sub_size; i++)
 166     for (j = ltg[i - 1].line; j <= ltg[i].line; j++)
 167       power[j].map = i;
 168 }
 169
 170 void psycho_1_init_add_db (void)
 171 {
 172   int i;
 173   double x;
 174   for (i = 0; i < DBTAB; i++) {
 175     x = (double) i / 10.0;
 176     dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
 177   }
 178 }
 179
 180 INLINE double add_db (double a, double b)
 181 {
 182   /* MFC - if the difference between a and b is large (>99), then just return the
 183      largest one. (about 10% of the time)
 184      - For differences between 0 and 99, return the largest value, but add
 185      in a pre-calculated difference value.
 186      - the value 99 was chosen arbitarily.
 187      - maximum (a-b) i've seen is 572 */
 188   FLOAT fdiff;
 189   int idiff;
 190   fdiff = (10.0 * (a - b));
 191
 192   if (fdiff > 990.0) {
 193     return a;
 194   }
 195   if (fdiff < -990.0) {
 196     return (b);
 197   }
 198
 199   idiff = (int) fdiff;
 200   if (idiff >= 0) {
 201     return (a + dbtable[idiff]);
 202   }
 203
 204   return (b + dbtable[-idiff]);
 205 }
 206
 207 /****************************************************************
 208 *       Window the samples then,
 209 *        Fast Fourier transform of the input samples.
 210 *
 211 *   (  call the FHT-based fft() in fft.c )
 212 *
 213 *
 214 ****************************************************************/
 215 void psycho_1_hann_fft_pickmax (double sample[FFT_SIZE], mask power[HAN_SIZE],
 216                        double spike[SBLIMIT], FLOAT energy[FFT_SIZE])
 217 {
 218   FLOAT x_real[FFT_SIZE];
 219   register int i, j;
 220   register double sqrt_8_over_3;
 221   static int init = 0;
 222   static double *window;
 223   double sum;
 224
 225   if (!init) {                  /* calculate window function for the Fourier transform */
 226     window = (double *) mem_alloc (sizeof (DFFT), "window");
 227     sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
 228     for (i = 0; i < FFT_SIZE; i++) {
 229       /* Hann window formula */
 230       window[i] =
 231         sqrt_8_over_3 * 0.5 * (1 -
 232                                cos (2.0 * PI * i / (FFT_SIZE))) / FFT_SIZE;
 233     }
 234     init = 1;
 235   }
 236   for (i = 0; i < FFT_SIZE; i++)
 237     x_real[i] = (FLOAT) (sample[i] * window[i]);
 238
 239   psycho_1_fft (x_real, energy, FFT_SIZE);
 240
 241   for (i = 0; i < HAN_SIZE; i++) {      /* calculate power density spectrum */
 242     if (energy[i] < 1E-20)
 243       power[i].x = -200.0 + POWERNORM;
 244     else
 245       power[i].x = 10 * log10 (energy[i]) + POWERNORM;
 246     power[i].next = STOP;
 247     power[i].type = FALSE;
 248   }
 249
 250   /* Calculate the sum of spectral component in each subband from bound 4-16 */
 251
 252 #define CF 1073741824           /* pow(10, 0.1*POWERNORM) */
 253 #define DBM  1E-20              /* pow(10.0, 0.1*DBMIN */
 254   for (i = 0; i < HAN_SIZE; spike[i >> 4] = 10.0 * log10 (sum), i += 16) {
 255     for (j = 0, sum = DBM; j < 16; j++)
 256       sum += CF * energy[i + j];
 257   }
 258 }
 259
 260 /****************************************************************
 261 *
 262 *        This function labels the tonal component in the power
 263 * spectrum.
 264 *
 265 ****************************************************************/
 266
 267 void psycho_1_tonal_label (mask power[HAN_SIZE], int *tone)
 268 /* this function extracts (tonal)  sinusoidals from the spectrum  */
 269 {
 270   int i, j, last = LAST, first, run, last_but_one = LAST;       /* dpwe */
 271   double max;
 272
 273   *tone = LAST;
 274   for (i = 2; i < HAN_SIZE - 12; i++) {
 275     if (power[i].x > power[i - 1].x && power[i].x >= power[i + 1].x) {
 276       power[i].type = TONE;
 277       power[i].next = LAST;
 278       if (last != LAST)
 279         power[last].next = i;
 280       else
 281         first = *tone = i;
 282       last = i;
 283     }
 284   }
 285   last = LAST;
 286   first = *tone;
 287   *tone = LAST;
 288   while ((first != LAST) && (first != STOP)) {  /* the conditions for the tonal          */
 289     if (first < 3 || first > 500)
 290       run = 0;                  /* otherwise k+/-j will be out of bounds */
 291     else if (first < 63)
 292       run = 2;                  /* components in layer II, which         */
 293     else if (first < 127)
 294       run = 3;                  /* are the boundaries for calc.          */
 295     else if (first < 255)
 296       run = 6;                  /* the tonal components                  */
 297     else
 298       run = 12;
 299     max = power[first].x - 7;   /* after calculation of tonal   */
 300     for (j = 2; j <= run; j++)  /* components, set to local max */
 301       if (max < power[first - j].x || max < power[first + j].x) {
 302         power[first].type = FALSE;
 303         break;
 304       }
 305     if (power[first].type == TONE) {    /* extract tonal components */
 306       int help = first;
 307       if (*tone == LAST)
 308         *tone = first;
 309       while ((power[help].next != LAST) && (power[help].next - first) <= run)
 310         help = power[help].next;
 311       help = power[help].next;
 312       power[first].next = help;
 313       if ((first - last) <= run) {
 314         if (last_but_one != LAST)
 315           power[last_but_one].next = first;
 316       }
 317       if (first > 1 && first < 500) {   /* calculate the sum of the */
 318         double tmp;             /* powers of the components */
 319         tmp = add_db (power[first - 1].x, power[first + 1].x);
 320         power[first].x = add_db (power[first].x, tmp);
 321       }
 322       for (j = 1; j <= run; j++) {
 323         power[first - j].x = power[first + j].x = DBMIN;
 324         power[first - j].next = power[first + j].next = STOP;
 325         power[first - j].type = power[first + j].type = FALSE;
 326       }
 327       last_but_one = last;
 328       last = first;
 329       first = power[first].next;
 330     } else {
 331       int ll;
 332       if (last == LAST);        /* *tone = power[first].next; dpwe */
 333       else
 334         power[last].next = power[first].next;
 335       ll = first;
 336       first = power[first].next;
 337       power[ll].next = STOP;
 338     }
 339   }
 340 }
 341
 342 /****************************************************************
 343 *
 344 *        This function groups all the remaining non-tonal
 345 * spectral lines into critical band where they are replaced by
 346 * one single line.
 347 *
 348 ****************************************************************/
 349
 350 void psycho_1_noise_label (mask * power, int *noise, g_thres * ltg,
 351                   FLOAT energy[FFT_SIZE])
 352 {
 353   int i, j, centre, last = LAST;
 354   double index, weight, sum;
 355   /* calculate the remaining spectral */
 356   for (i = 0; i < crit_band - 1; i++) { /* lines for non-tonal components   */
 357     for (j = cbound[i], weight = 0.0, sum = DBMIN; j < cbound[i + 1]; j++) {
 358       if (power[j].type != TONE) {
 359         if (power[j].x != DBMIN) {
 360           sum = add_db (power[j].x, sum);
 361           /* Weight is used in finding the geometric mean of the noise energy within a subband */
 362           weight += CF * energy[j] * (double) (j - cbound[i]) / (double) (cbound[i + 1] - cbound[i]);   /* correction */
 363           power[j].x = DBMIN;
 364         }
 365       }                         /*  check to see if the spectral line is low dB, and if  */
 366     }                           /* so replace the center of the critical band, which is */
 367     /* the center freq. of the noise component              */
 368
 369     if (sum <= DBMIN)
 370       centre = (cbound[i + 1] + cbound[i]) / 2;
 371     else {
 372       /* fprintf(stderr, "%i [%f %f] -", count++,weight/pow(10.0,0.1*sum), weight*pow(10.0,-0.1*sum)); */
 373       index = weight * pow (10.0, -0.1 * sum);
 374       centre =
 375         cbound[i] + (int) (index * (double) (cbound[i + 1] - cbound[i]));
 376     }
 377
 378
 379     /* locate next non-tonal component until finished; */
 380     /* add to list of non-tonal components             */
 381
 382     /* Masahiro Iwadare's fix for infinite looping problem? */
 383     if (power[centre].type == TONE) {
 384       if (power[centre + 1].type == TONE) {
 385         centre++;
 386       } else
 387         centre--;
 388     }
 389
 390     if (last == LAST)
 391       *noise = centre;
 392     else {
 393       power[centre].next = LAST;
 394       power[last].next = centre;
 395     }
 396     power[centre].x = sum;
 397     power[centre].type = NOISE;
 398     last = centre;
 399   }
 400 }
 401
 402 /****************************************************************
 403 *
 404 *        This function reduces the number of noise and tonal
 405 * component for further threshold analysis.
 406 *
 407 ****************************************************************/
 408
 409 void psycho_1_subsampling (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise)
 410 {
 411   int i, old;
 412
 413   i = *tone;
 414   old = STOP;                   /* calculate tonal components for */
 415
 416   while ((i != LAST) && (i != STOP))
 417   {                             /* reduction of spectral lines    */
 418     if (power[i].x < ltg[power[i].map].hear) {
 419       power[i].type = FALSE;
 420       power[i].x = DBMIN;
 421       if (old == STOP)
 422         *tone = power[i].next;
 423       else
 424         power[old].next = power[i].next;
 425     } else
 426       old = i;
 427     i = power[i].next;
 428   }
 429   i = *noise;
 430   old = STOP;                   /* calculate non-tonal components for */
 431   while ((i != LAST) && (i != STOP)) {  /* reduction of spectral lines        */
 432     if (power[i].x < ltg[power[i].map].hear) {
 433       power[i].type = FALSE;
 434       power[i].x = DBMIN;
 435       if (old == STOP)
 436         *noise = power[i].next;
 437       else
 438         power[old].next = power[i].next;
 439     } else
 440       old = i;
 441     i = power[i].next;
 442   }
 443   i = *tone;
 444   old = STOP;
 445   while ((i != LAST) && (i != STOP))
 446   {                             /* if more than one */
 447     if (power[i].next == LAST)
 448       break;                    /* tonal component  */
 449     if (ltg[power[power[i].next].map].bark -    /* is less than .5  */
 450         ltg[power[i].map].bark < 0.5) { /* bark, take the   */
 451       if (power[power[i].next].x > power[i].x) {        /* maximum          */
 452         if (old == STOP)
 453           *tone = power[i].next;
 454         else
 455           power[old].next = power[i].next;
 456         power[i].type = FALSE;
 457         power[i].x = DBMIN;
 458         i = power[i].next;
 459       } else {
 460         power[power[i].next].type = FALSE;
 461         power[power[i].next].x = DBMIN;
 462         power[i].next = power[power[i].next].next;
 463         old = i;
 464       }
 465     } else {
 466       old = i;
 467       i = power[i].next;
 468     }
 469   }
 470 }
 471
 472 /****************************************************************
 473 *
 474 *        This function calculates the individual threshold and
 475 * sum with the quiet threshold to find the global threshold.
 476 *
 477 ****************************************************************/
 478
 479 /* mainly just changed the way range checking was done MFC Nov 1999 */
 480 void psycho_1_threshold (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise,
 481                 int bit_rate)
 482 {
 483   int k, t;
 484   double dz, tmps, vf;
 485
 486   for (k = 1; k < sub_size; k++) {
 487     ltg[k].x = DBMIN;
 488     t = *tone;                  /* calculate individual masking threshold for */
 489     while ((t != LAST) && (t != STOP))
 490     {                           /* components in order to find the global     */
 491       dz = ltg[k].bark - ltg[power[t].map].bark;        /* distance of bark value */
 492       if (dz >= -3.0 && dz < 8.0) {
 493         tmps = -1.525 - 0.275 * ltg[power[t].map].bark - 4.5 + power[t].x;
 494         /* masking function for lower & upper slopes */
 495         if (dz < -1)
 496           vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
 497         else if (dz < 0)
 498           vf = (0.4 * power[t].x + 6) * dz;
 499         else if (dz < 1)
 500           vf = (-17 * dz);
 501         else
 502           vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
 503         ltg[k].x = add_db (ltg[k].x, tmps + vf);
 504       }
 505       t = power[t].next;
 506     }
 507
 508     t = *noise;                 /* calculate individual masking threshold  */
 509     while ((t != LAST) && (t != STOP)) {        /* for non-tonal components to find LTG    */
 510       dz = ltg[k].bark - ltg[power[t].map].bark;        /* distance of bark value */
 511       if (dz >= -3.0 && dz < 8.0) {
 512         tmps = -1.525 - 0.175 * ltg[power[t].map].bark - 0.5 + power[t].x;
 513         /* masking function for lower & upper slopes */
 514         if (dz < -1)
 515           vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
 516         else if (dz < 0)
 517           vf = (0.4 * power[t].x + 6) * dz;
 518         else if (dz < 1)
 519           vf = (-17 * dz);
 520         else
 521           vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
 522         ltg[k].x = add_db (ltg[k].x, tmps + vf);
 523       }
 524       t = power[t].next;
 525     }
 526     if (bit_rate < 96)
 527       ltg[k].x = add_db (ltg[k].hear, ltg[k].x);
 528     else
 529       ltg[k].x = add_db (ltg[k].hear - 12.0, ltg[k].x);
 530   }
 531
 532 }
 533
 534 /****************************************************************
 535 *
 536 *        This function finds the minimum masking threshold and
 537 * return the value to the encoder.
 538 *
 539 ****************************************************************/
 540
 541 void psycho_1_minimum_mask (g_thres * ltg, double ltmin[SBLIMIT], int sblimit)
 542 {
 543   double min;
 544   int i, j;
 545
 546   j = 1;
 547   for (i = 0; i < sblimit; i++)
 548     if (j >= sub_size - 1)      /* check subband limit, and       */
 549       ltmin[i] = ltg[sub_size - 1].hear;        /* calculate the minimum masking  */
 550     else {                      /* level of LTMIN for each subband */
 551       min = ltg[j].x;
 552       while (ltg[j].line >> 4 == i && j < sub_size) {
 553         if (min > ltg[j].x)
 554           min = ltg[j].x;
 555         j++;
 556       }
 557       ltmin[i] = min;
 558     }
 559 }
 560
 561 /*****************************************************************
 562 *
 563 *        This procedure is called in musicin to pick out the
 564 * smaller of the scalefactor or threshold.
 565 *
 566 *****************************************************************/
 567
 568 void psycho_1_smr (double ltmin[SBLIMIT], double spike[SBLIMIT], double scale[SBLIMIT],
 569           int sblimit)
 570 {
 571   int i;
 572   double max;
 573
 574   for (i = 0; i < sblimit; i++) {       /* determine the signal   */
 575     max = 20 * log10 (scale[i] * 32768) - 10;   /* level for each subband */
 576     if (spike[i] > max)
 577       max = spike[i];           /* for the maximum scale  */
 578     max -= ltmin[i];            /* factors                */
 579     ltmin[i] = max;
 580   }
 581 }
 582
 583 void psycho_1_dump(mask power[HAN_SIZE], int *tone, int *noise) {
 584   int t;
 585
 586   fprintf(stdout,"1 Ton: ");
 587   t=*tone;
 588   while (t!=LAST && t!=STOP) {
 589     fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
 590     t = power[t].next;
 591   }
 592   fprintf(stdout,"\n");
 593
 594   fprintf(stdout,"1 Nos: ");
 595   t=*noise;
 596   while (t!=LAST && t!=STOP) {
 597     fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
 598     t = power[t].next;
 599   }
 600   fprintf(stdout,"\n");
 601 }