src/ac3_decoder/ac3_imdct.c

   1 /*****************************************************************************
   2  * ac3_imdct.c: ac3 DCT
   3  *****************************************************************************
   4  * Copyright (C) 1999, 2000 VideoLAN
   5  * $Id: ac3_imdct.c,v 1.16 2001/04/26 11:23:16 sam Exp $
   6  *
   7  * Authors: Michel Kaempf <maxx@via.ecp.fr>
   8  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
   9  *          Renaud Dartus <reno@videolan.org>
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of the GNU General Public License as published by
  13  * the Free Software Foundation; either version 2 of the License, or
  14  * (at your option) any later version.
  15  *
  16  * This program is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License
  22  * along with this program; if not, write to the Free Software
  23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  24  *****************************************************************************/
  25
  26 #include "defs.h"
  27
  28 #include <math.h>
  29 #include <stdio.h>
  30
  31 #include "config.h"
  32 #include "common.h"
  33 #include "threads.h"
  34 #include "mtime.h"
  35
  36 #include "stream_control.h"
  37 #include "input_ext-dec.h"
  38
  39 #include "ac3_decoder.h"
  40 #include "ac3_internal.h"
  41
  42 #include "ac3_downmix.h"
  43
  44 #ifndef M_PI
  45 #   define M_PI 3.14159265358979323846
  46 #endif
  47
  48 void imdct_do_256(imdct_t * p_imdct, float x[],float y[], int id);
  49 void imdct_do_512(imdct_t * p_imdct, float x[],float y[], int id);
  50
  51 /* 128 point bit-reverse LUT */
  52 static const u8 bit_reverse_512[] = {
  53     0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
  54     0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
  55     0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
  56     0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
  57     0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
  58     0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
  59     0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
  60     0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
  61     0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
  62     0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
  63     0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
  64     0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
  65     0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
  66     0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
  67     0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
  68     0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
  69
  70 static const u8 bit_reverse_256[] = {
  71     0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
  72     0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
  73     0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
  74     0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
  75     0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
  76     0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
  77     0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
  78     0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
  79
  80 /* Windowing function for Modified DCT - Thank you acroread */
  81 static float window[] = {
  82     0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
  83     0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
  84     0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
  85     0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
  86     0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
  87     0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
  88     0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
  89     0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
  90     0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
  91     0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
  92     0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
  93     0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
  94     0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
  95     0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
  96     0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
  97     0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
  98     0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
  99     0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
 100     0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
 101     0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
 102     0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
 103     0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
 104     0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
 105     0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
 106     0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
 107     0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
 108     0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
 109     0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
 110     0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
 111     0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
 112     0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
 113     1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
 114
 115 static __inline__ void swap_cmplx(complex_t *a, complex_t *b)
 116 {
 117     complex_t tmp;
 118
 119     tmp = *a;
 120     *a = *b;
 121     *b = tmp;
 122 }
 123
 124 static __inline__ complex_t cmplx_mult(complex_t a, complex_t b)
 125 {
 126     complex_t ret;
 127
 128     ret.real = a.real * b.real - a.imag * b.imag;
 129     ret.imag = a.real * b.imag + a.imag * b.real;
 130
 131     return ret;
 132 }
 133
 134 void imdct_init(imdct_t * p_imdct)
 135 {
 136     int i,k;
 137     complex_t angle_step;
 138     complex_t current_angle;
 139
 140     /* Twiddle factors to turn IFFT into IMDCT */
 141     for (i=0; i < N/4; i++) {
 142         p_imdct->xcos1[i] = -cos(2 * M_PI * (8*i+1)/(8*N)) ;
 143         p_imdct->xsin1[i] = -sin(2 * M_PI * (8*i+1)/(8*N)) ;
 144     }
 145
 146     /* More twiddle factors to turn IFFT into IMDCT */
 147     for (i=0; i < N/8; i++) {
 148         p_imdct->xcos2[i] = -cos(2 * M_PI * (8*i+1)/(4*N)) ;
 149         p_imdct->xsin2[i] = -sin(2 * M_PI * (8*i+1)/(4*N)) ;
 150     }
 151
 152     /* Canonical twiddle factors for FFT */
 153     p_imdct->w[0] = p_imdct->w_1;
 154     p_imdct->w[1] = p_imdct->w_2;
 155     p_imdct->w[2] = p_imdct->w_4;
 156     p_imdct->w[3] = p_imdct->w_8;
 157     p_imdct->w[4] = p_imdct->w_16;
 158     p_imdct->w[5] = p_imdct->w_32;
 159     p_imdct->w[6] = p_imdct->w_64;
 160
 161     for (i = 0; i < 7; i++) {
 162         angle_step.real = cos(-2.0f * M_PI / (1 << (i+1)));
 163         angle_step.imag = sin(-2.0f * M_PI / (1 << (i+1)));
 164
 165         current_angle.real = 1.0f;
 166         current_angle.imag = 0.0f;
 167
 168         for (k = 0; k < 1 << i; k++) {
 169             p_imdct->w[i][k] = current_angle;
 170             current_angle = cmplx_mult(current_angle,angle_step);
 171         }
 172     }
 173 }
 174
 175 void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
 176 {
 177     int i, i_stream_done;
 178     int doable = 0;
 179     void (*do_imdct)(imdct_t * p_imdct, float x[],float y[], int id);
 180
 181     /* Test if dm in frequency is doable */
 182     if ( !(doable = p_ac3dec->audblk.blksw[0]) )
 183         do_imdct = imdct_do_512;
 184     else
 185         do_imdct = imdct_do_256;
 186
 187     /* Downmix in the frequency domain if all the channes use the same imdct */
 188     for (i=0; i < p_ac3dec->bsi.nfchans; i++)
 189     {
 190         if ( doable != p_ac3dec->audblk.blksw[i] )
 191         {
 192             do_imdct = NULL;
 193             break;
 194         }
 195     }
 196
 197     if (do_imdct)
 198     {
 199         i_stream_done = downmix(p_ac3dec, p_ac3dec->coeffs.fbw[0], buffer);
 200         do_imdct(&p_ac3dec->imdct,p_ac3dec->coeffs.fbw[0],p_ac3dec->samples.channel[0], 0);
 201         do_imdct(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[1],p_ac3dec->samples.channel[1], 1);
 202     } else {
 203         for (i=0; i<p_ac3dec->bsi.nfchans;i++) {
 204             if (p_ac3dec->audblk.blksw[i])
 205                 imdct_do_256(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i], i);
 206             else
 207                 imdct_do_512(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i], i);
 208         }
 209         i_stream_done = downmix(p_ac3dec, p_ac3dec->samples.channel[0], buffer);
 210     }
 211
 212     if ( !i_stream_done ) /* We have to stream sample */
 213     {
 214         stream_sample_2ch_to_s16_c(buffer, p_ac3dec->samples.channel[0],
 215                 p_ac3dec->samples.channel[1]);
 216     } else {
 217         stream_sample_1ch_to_s16_c(buffer, p_ac3dec->samples.channel[0]);
 218     }
 219
 220     /* XXX?? We don't bother with the IMDCT for the LFE as it's currently
 221      * unused. */
 222     //if (bsi->lfeon)
 223     //    imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]);
 224 }
 225
 226 void imdct_do_512(imdct_t * p_imdct, float x[], float y[], int id)
 227 {
 228     int i,k;
 229     int p,q;
 230     int m;
 231     int two_m;
 232     int two_m_plus_one;
 233
 234     float tmp_a_i;
 235     float tmp_a_r;
 236     float tmp_b_i;
 237     float tmp_b_r;
 238
 239
 240     float *y_ptr;
 241     float *delay_ptr;
 242     float *window_ptr;
 243
 244     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 245     for (i=0; i < N/4; i++) {
 246         /* z[i] = (X[N/2-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
 247         p_imdct->buf[i].real =  (x[N/2-2*i-1] * p_imdct->xcos1[i])  - (x[2*i]       * p_imdct->xsin1[i]);
 248         p_imdct->buf[i].imag = -((x[2*i]       * p_imdct->xcos1[i])  + (x[N/2-2*i-1] * p_imdct->xsin1[i]));
 249     }
 250
 251     /* Bit reversed shuffling */
 252     for (i=0; i<N/4; i++) {
 253         k = bit_reverse_512[i];
 254         if (k < i)
 255             swap_cmplx(&p_imdct->buf[i],&p_imdct->buf[k]);
 256     }
 257
 258     /* FFT Merge */
 259     for (m=0; m < 7; m++) {
 260         two_m = (1 << m);
 261         two_m_plus_one = (1 << (m+1));
 262
 263         for (k = 0; k < two_m; k++) {
 264             for (i = 0; i < 128; i += two_m_plus_one) {
 265                 p = k + i;
 266                 q = p + two_m;
 267                 tmp_a_r = p_imdct->buf[p].real;
 268                 tmp_a_i = p_imdct->buf[p].imag;
 269                 tmp_b_r = p_imdct->buf[q].real * p_imdct->w[m][k].real - p_imdct->buf[q].imag * p_imdct->w[m][k].imag;
 270                 tmp_b_i = p_imdct->buf[q].imag * p_imdct->w[m][k].real + p_imdct->buf[q].real * p_imdct->w[m][k].imag;
 271                 p_imdct->buf[p].real = tmp_a_r + tmp_b_r;
 272                 p_imdct->buf[p].imag =  tmp_a_i + tmp_b_i;
 273                 p_imdct->buf[q].real = tmp_a_r - tmp_b_r;
 274                 p_imdct->buf[q].imag =  tmp_a_i - tmp_b_i;
 275             }
 276         }
 277     }
 278
 279     /* Post IFFT complex multiply  plus IFFT complex conjugate*/
 280     for (i=0; i < N/4; i++) {
 281         /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
 282         tmp_a_r =   p_imdct->buf[i].real;
 283         tmp_a_i = - p_imdct->buf[i].imag;
 284         p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i])  - (tmp_a_i  * p_imdct->xsin1[i]);
 285         p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i])  + (tmp_a_i  * p_imdct->xcos1[i]);
 286     }
 287
 288     y_ptr = y;
 289     delay_ptr = p_imdct->delay[id];
 290     window_ptr = window;
 291     /* Window and convert to real valued signal */
 292     for (i=0; i<N/8; i++) {
 293         *y_ptr++   = 2.0f * (-p_imdct->buf[N/8+i].imag   * *window_ptr++ + *delay_ptr++);
 294         *y_ptr++   = 2.0f * (p_imdct->buf[N/8-i-1].real * *window_ptr++ + *delay_ptr++);
 295     }
 296
 297     for (i=0; i<N/8; i++) {
 298         *y_ptr++  = 2.0f * (-p_imdct->buf[i].real       * *window_ptr++ + *delay_ptr++);
 299         *y_ptr++  = 2.0f * (p_imdct->buf[N/4-i-1].imag * *window_ptr++ + *delay_ptr++);
 300     }
 301
 302     /* The trailing edge of the window goes into the delay line */
 303     delay_ptr = p_imdct->delay[id];
 304
 305     for (i=0; i<N/8; i++) {
 306         *delay_ptr++  = -p_imdct->buf[N/8+i].real   * *--window_ptr;
 307         *delay_ptr++  =  p_imdct->buf[N/8-i-1].imag * *--window_ptr;
 308     }
 309
 310     for (i=0; i<N/8; i++) {
 311         *delay_ptr++  =  p_imdct->buf[i].imag       * *--window_ptr;
 312         *delay_ptr++  = -p_imdct->buf[N/4-i-1].real * *--window_ptr;
 313     }
 314 }
 315
 316 void imdct_do_256(imdct_t * p_imdct,float x[],float y[], int id)
 317 {
 318     int i,k;
 319     int p,q;
 320     int m;
 321     int two_m;
 322     int two_m_plus_one;
 323
 324     float tmp_a_i;
 325     float tmp_a_r;
 326     float tmp_b_i;
 327     float tmp_b_r;
 328
 329     complex_t *buf_1, *buf_2;
 330
 331     buf_1 = &p_imdct->buf[0];
 332     buf_2 = &p_imdct->buf[64];
 333
 334     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 335     for (k=0; k<N/8; k++) {
 336         /* X1[k] = X[2*k]  */
 337         /* X2[k] = X[2*k+1]     */
 338
 339         p = 2 * (N/4-2*k-1);
 340         q = 2 * (2 * k);
 341
 342        /* Z1[k] = (X1[N/4-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 343         buf_1[k].real =    x[p] * p_imdct->xcos2[k] - x[q] * p_imdct->xsin2[k];
 344         buf_1[k].imag = - (x[q] * p_imdct->xcos2[k] + x[p] * p_imdct->xsin2[k]);
 345        /* Z2[k] = (X2[N/4-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 346         buf_2[k].real =    x[p + 1] * p_imdct->xcos2[k] - x[q + 1] * p_imdct->xsin2[k];
 347         buf_2[k].imag = - (x[q + 1] * p_imdct->xcos2[k] + x[p + 1] * p_imdct->xsin2[k]);
 348     }
 349
 350     /* IFFT Bit reversed shuffling */
 351     for (i=0; i<N/8; i++) {
 352         k = bit_reverse_256[i];
 353         if (k < i) {
 354             swap_cmplx(&buf_1[i],&buf_1[k]);
 355             swap_cmplx(&buf_2[i],&buf_2[k]);
 356         }
 357     }
 358
 359     /* FFT Merge */
 360     for (m=0; m < 6; m++) {
 361         two_m = (1 << m);
 362         two_m_plus_one = (1 << (m+1));
 363
 364         for (k = 0; k < two_m; k++) {
 365             for (i = 0; i < 64; i += two_m_plus_one) {
 366                 p = k + i;
 367                 q = p + two_m;
 368                 /* Do block 1 */
 369                 tmp_a_r = buf_1[p].real;
 370                 tmp_a_i = buf_1[p].imag;
 371                 tmp_b_r = buf_1[q].real * p_imdct->w[m][k].real - buf_1[q].imag * p_imdct->w[m][k].imag;
 372                 tmp_b_i = buf_1[q].imag * p_imdct->w[m][k].real + buf_1[q].real * p_imdct->w[m][k].imag;
 373                 buf_1[p].real = tmp_a_r + tmp_b_r;
 374                 buf_1[p].imag =  tmp_a_i + tmp_b_i;
 375                 buf_1[q].real = tmp_a_r - tmp_b_r;
 376                 buf_1[q].imag =  tmp_a_i - tmp_b_i;
 377
 378                 /* Do block 2 */
 379                 tmp_a_r = buf_2[p].real;
 380                 tmp_a_i = buf_2[p].imag;
 381                 tmp_b_r = buf_2[q].real * p_imdct->w[m][k].real - buf_2[q].imag * p_imdct->w[m][k].imag;
 382                 tmp_b_i = buf_2[q].imag * p_imdct->w[m][k].real + buf_2[q].real * p_imdct->w[m][k].imag;
 383                 buf_2[p].real = tmp_a_r + tmp_b_r;
 384                 buf_2[p].imag =  tmp_a_i + tmp_b_i;
 385                 buf_2[q].real = tmp_a_r - tmp_b_r;
 386                 buf_2[q].imag =  tmp_a_i - tmp_b_i;
 387             }
 388         }
 389     }
 390
 391     /* Post IFFT complex multiply */
 392     for (i=0; i < N/8; i++) {
 393         /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
 394         tmp_a_r =   buf_1[i].real;
 395         tmp_a_i = - buf_1[i].imag;
 396         buf_1[i].real =(tmp_a_r * p_imdct->xcos2[i])  - (tmp_a_i  * p_imdct->xsin2[i]);
 397         buf_1[i].imag =(tmp_a_r * p_imdct->xsin2[i])  + (tmp_a_i  * p_imdct->xcos2[i]);
 398         /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
 399         tmp_a_r =   buf_2[i].real;
 400         tmp_a_i = - buf_2[i].imag;
 401         buf_2[i].real =(tmp_a_r * p_imdct->xcos2[i])  - (tmp_a_i  * p_imdct->xsin2[i]);
 402         buf_2[i].imag =(tmp_a_r * p_imdct->xsin2[i])  + (tmp_a_i  * p_imdct->xcos2[i]);
 403     }
 404
 405     /* Window and convert to real valued signal */
 406     for (i=0; i<N/8; i++) {
 407         y[2*i]         = -buf_1[i].imag       * window[2*i];
 408         y[2*i+1]       =  buf_1[N/8-i-1].real * window[2*i+1];
 409         y[N/4+2*i]     = -buf_1[i].real       * window[N/4+2*i];
 410         y[N/4+2*i+1]   =  buf_1[N/8-i-1].imag * window[N/4+2*i+1];
 411         y[N/2+2*i]     = -buf_2[i].real       * window[N/2-2*i-1];
 412         y[N/2+2*i+1]   =  buf_2[N/8-i-1].imag * window[N/2-2*i-2];
 413         y[3*N/4+2*i]   =  buf_2[i].imag       * window[N/4-2*i-1];
 414         y[3*N/4+2*i+1] = -buf_2[N/8-i-1].real * window[N/4-2*i-2];
 415     }
 416
 417     /* Overlap and add */
 418     for (i=0; i<N/2; i++) {
 419         y[i] = 2 * (y[i] + p_imdct->delay[id][i]);
 420         p_imdct->delay[id][i] = y[N/2+i];
 421     }
 422 }