* Fixed the ac3dec compilation under BeOS.
[vlc.git] / src / ac3_decoder / ac3_imdct.c
blobae8bc17dedd50a972d2213d31b2fc0432679bdca
1 /*****************************************************************************
2 * ac3_imdct.c: ac3 DCT
3 *****************************************************************************
4 * Copyright (C) 1999, 2000 VideoLAN
5 * $Id: ac3_imdct.c,v 1.16 2001/04/26 11:23:16 sam Exp $
7 * Authors: Michel Kaempf <maxx@via.ecp.fr>
8 * Aaron Holtzman <aholtzma@engr.uvic.ca>
9 * Renaud Dartus <reno@videolan.org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
24 *****************************************************************************/
26 #include "defs.h"
28 #include <math.h>
29 #include <stdio.h>
31 #include "config.h"
32 #include "common.h"
33 #include "threads.h"
34 #include "mtime.h"
36 #include "stream_control.h"
37 #include "input_ext-dec.h"
39 #include "ac3_decoder.h"
40 #include "ac3_internal.h"
42 #include "ac3_downmix.h"
44 #ifndef M_PI
45 # define M_PI 3.14159265358979323846
46 #endif
48 void imdct_do_256(imdct_t * p_imdct, float x[],float y[], int id);
49 void imdct_do_512(imdct_t * p_imdct, float x[],float y[], int id);
51 /* 128 point bit-reverse LUT */
52 static const u8 bit_reverse_512[] = {
53 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
54 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
55 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
56 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
57 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
58 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
59 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
60 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
61 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
62 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
63 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
64 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
65 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
66 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
67 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
68 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
70 static const u8 bit_reverse_256[] = {
71 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
72 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
73 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
74 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
75 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
76 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
77 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
78 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
80 /* Windowing function for Modified DCT - Thank you acroread */
81 static float window[] = {
82 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
83 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
84 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
85 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
86 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
87 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
88 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
89 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
90 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
91 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
92 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
93 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
94 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
95 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
96 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
97 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
98 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
99 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
100 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
101 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
102 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
103 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
104 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
105 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
106 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
107 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
108 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
109 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
110 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
111 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
112 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
113 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
115 static __inline__ void swap_cmplx(complex_t *a, complex_t *b)
117 complex_t tmp;
119 tmp = *a;
120 *a = *b;
121 *b = tmp;
124 static __inline__ complex_t cmplx_mult(complex_t a, complex_t b)
126 complex_t ret;
128 ret.real = a.real * b.real - a.imag * b.imag;
129 ret.imag = a.real * b.imag + a.imag * b.real;
131 return ret;
134 void imdct_init(imdct_t * p_imdct)
136 int i,k;
137 complex_t angle_step;
138 complex_t current_angle;
140 /* Twiddle factors to turn IFFT into IMDCT */
141 for (i=0; i < N/4; i++) {
142 p_imdct->xcos1[i] = -cos(2 * M_PI * (8*i+1)/(8*N)) ;
143 p_imdct->xsin1[i] = -sin(2 * M_PI * (8*i+1)/(8*N)) ;
146 /* More twiddle factors to turn IFFT into IMDCT */
147 for (i=0; i < N/8; i++) {
148 p_imdct->xcos2[i] = -cos(2 * M_PI * (8*i+1)/(4*N)) ;
149 p_imdct->xsin2[i] = -sin(2 * M_PI * (8*i+1)/(4*N)) ;
152 /* Canonical twiddle factors for FFT */
153 p_imdct->w[0] = p_imdct->w_1;
154 p_imdct->w[1] = p_imdct->w_2;
155 p_imdct->w[2] = p_imdct->w_4;
156 p_imdct->w[3] = p_imdct->w_8;
157 p_imdct->w[4] = p_imdct->w_16;
158 p_imdct->w[5] = p_imdct->w_32;
159 p_imdct->w[6] = p_imdct->w_64;
161 for (i = 0; i < 7; i++) {
162 angle_step.real = cos(-2.0f * M_PI / (1 << (i+1)));
163 angle_step.imag = sin(-2.0f * M_PI / (1 << (i+1)));
165 current_angle.real = 1.0f;
166 current_angle.imag = 0.0f;
168 for (k = 0; k < 1 << i; k++) {
169 p_imdct->w[i][k] = current_angle;
170 current_angle = cmplx_mult(current_angle,angle_step);
175 void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
177 int i, i_stream_done;
178 int doable = 0;
179 void (*do_imdct)(imdct_t * p_imdct, float x[],float y[], int id);
181 /* Test if dm in frequency is doable */
182 if ( !(doable = p_ac3dec->audblk.blksw[0]) )
183 do_imdct = imdct_do_512;
184 else
185 do_imdct = imdct_do_256;
187 /* Downmix in the frequency domain if all the channes use the same imdct */
188 for (i=0; i < p_ac3dec->bsi.nfchans; i++)
190 if ( doable != p_ac3dec->audblk.blksw[i] )
192 do_imdct = NULL;
193 break;
197 if (do_imdct)
199 i_stream_done = downmix(p_ac3dec, p_ac3dec->coeffs.fbw[0], buffer);
200 do_imdct(&p_ac3dec->imdct,p_ac3dec->coeffs.fbw[0],p_ac3dec->samples.channel[0], 0);
201 do_imdct(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[1],p_ac3dec->samples.channel[1], 1);
202 } else {
203 for (i=0; i<p_ac3dec->bsi.nfchans;i++) {
204 if (p_ac3dec->audblk.blksw[i])
205 imdct_do_256(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i], i);
206 else
207 imdct_do_512(&p_ac3dec->imdct, p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i], i);
209 i_stream_done = downmix(p_ac3dec, p_ac3dec->samples.channel[0], buffer);
212 if ( !i_stream_done ) /* We have to stream sample */
214 stream_sample_2ch_to_s16_c(buffer, p_ac3dec->samples.channel[0],
215 p_ac3dec->samples.channel[1]);
216 } else {
217 stream_sample_1ch_to_s16_c(buffer, p_ac3dec->samples.channel[0]);
220 /* XXX?? We don't bother with the IMDCT for the LFE as it's currently
221 * unused. */
222 //if (bsi->lfeon)
223 // imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]);
226 void imdct_do_512(imdct_t * p_imdct, float x[], float y[], int id)
228 int i,k;
229 int p,q;
230 int m;
231 int two_m;
232 int two_m_plus_one;
234 float tmp_a_i;
235 float tmp_a_r;
236 float tmp_b_i;
237 float tmp_b_r;
240 float *y_ptr;
241 float *delay_ptr;
242 float *window_ptr;
244 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
245 for (i=0; i < N/4; i++) {
246 /* z[i] = (X[N/2-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
247 p_imdct->buf[i].real = (x[N/2-2*i-1] * p_imdct->xcos1[i]) - (x[2*i] * p_imdct->xsin1[i]);
248 p_imdct->buf[i].imag = -((x[2*i] * p_imdct->xcos1[i]) + (x[N/2-2*i-1] * p_imdct->xsin1[i]));
251 /* Bit reversed shuffling */
252 for (i=0; i<N/4; i++) {
253 k = bit_reverse_512[i];
254 if (k < i)
255 swap_cmplx(&p_imdct->buf[i],&p_imdct->buf[k]);
258 /* FFT Merge */
259 for (m=0; m < 7; m++) {
260 two_m = (1 << m);
261 two_m_plus_one = (1 << (m+1));
263 for (k = 0; k < two_m; k++) {
264 for (i = 0; i < 128; i += two_m_plus_one) {
265 p = k + i;
266 q = p + two_m;
267 tmp_a_r = p_imdct->buf[p].real;
268 tmp_a_i = p_imdct->buf[p].imag;
269 tmp_b_r = p_imdct->buf[q].real * p_imdct->w[m][k].real - p_imdct->buf[q].imag * p_imdct->w[m][k].imag;
270 tmp_b_i = p_imdct->buf[q].imag * p_imdct->w[m][k].real + p_imdct->buf[q].real * p_imdct->w[m][k].imag;
271 p_imdct->buf[p].real = tmp_a_r + tmp_b_r;
272 p_imdct->buf[p].imag = tmp_a_i + tmp_b_i;
273 p_imdct->buf[q].real = tmp_a_r - tmp_b_r;
274 p_imdct->buf[q].imag = tmp_a_i - tmp_b_i;
279 /* Post IFFT complex multiply plus IFFT complex conjugate*/
280 for (i=0; i < N/4; i++) {
281 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
282 tmp_a_r = p_imdct->buf[i].real;
283 tmp_a_i = - p_imdct->buf[i].imag;
284 p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) - (tmp_a_i * p_imdct->xsin1[i]);
285 p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) + (tmp_a_i * p_imdct->xcos1[i]);
288 y_ptr = y;
289 delay_ptr = p_imdct->delay[id];
290 window_ptr = window;
291 /* Window and convert to real valued signal */
292 for (i=0; i<N/8; i++) {
293 *y_ptr++ = 2.0f * (-p_imdct->buf[N/8+i].imag * *window_ptr++ + *delay_ptr++);
294 *y_ptr++ = 2.0f * (p_imdct->buf[N/8-i-1].real * *window_ptr++ + *delay_ptr++);
297 for (i=0; i<N/8; i++) {
298 *y_ptr++ = 2.0f * (-p_imdct->buf[i].real * *window_ptr++ + *delay_ptr++);
299 *y_ptr++ = 2.0f * (p_imdct->buf[N/4-i-1].imag * *window_ptr++ + *delay_ptr++);
302 /* The trailing edge of the window goes into the delay line */
303 delay_ptr = p_imdct->delay[id];
305 for (i=0; i<N/8; i++) {
306 *delay_ptr++ = -p_imdct->buf[N/8+i].real * *--window_ptr;
307 *delay_ptr++ = p_imdct->buf[N/8-i-1].imag * *--window_ptr;
310 for (i=0; i<N/8; i++) {
311 *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr;
312 *delay_ptr++ = -p_imdct->buf[N/4-i-1].real * *--window_ptr;
316 void imdct_do_256(imdct_t * p_imdct,float x[],float y[], int id)
318 int i,k;
319 int p,q;
320 int m;
321 int two_m;
322 int two_m_plus_one;
324 float tmp_a_i;
325 float tmp_a_r;
326 float tmp_b_i;
327 float tmp_b_r;
329 complex_t *buf_1, *buf_2;
331 buf_1 = &p_imdct->buf[0];
332 buf_2 = &p_imdct->buf[64];
334 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
335 for (k=0; k<N/8; k++) {
336 /* X1[k] = X[2*k] */
337 /* X2[k] = X[2*k+1] */
339 p = 2 * (N/4-2*k-1);
340 q = 2 * (2 * k);
342 /* Z1[k] = (X1[N/4-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
343 buf_1[k].real = x[p] * p_imdct->xcos2[k] - x[q] * p_imdct->xsin2[k];
344 buf_1[k].imag = - (x[q] * p_imdct->xcos2[k] + x[p] * p_imdct->xsin2[k]);
345 /* Z2[k] = (X2[N/4-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
346 buf_2[k].real = x[p + 1] * p_imdct->xcos2[k] - x[q + 1] * p_imdct->xsin2[k];
347 buf_2[k].imag = - (x[q + 1] * p_imdct->xcos2[k] + x[p + 1] * p_imdct->xsin2[k]);
350 /* IFFT Bit reversed shuffling */
351 for (i=0; i<N/8; i++) {
352 k = bit_reverse_256[i];
353 if (k < i) {
354 swap_cmplx(&buf_1[i],&buf_1[k]);
355 swap_cmplx(&buf_2[i],&buf_2[k]);
359 /* FFT Merge */
360 for (m=0; m < 6; m++) {
361 two_m = (1 << m);
362 two_m_plus_one = (1 << (m+1));
364 for (k = 0; k < two_m; k++) {
365 for (i = 0; i < 64; i += two_m_plus_one) {
366 p = k + i;
367 q = p + two_m;
368 /* Do block 1 */
369 tmp_a_r = buf_1[p].real;
370 tmp_a_i = buf_1[p].imag;
371 tmp_b_r = buf_1[q].real * p_imdct->w[m][k].real - buf_1[q].imag * p_imdct->w[m][k].imag;
372 tmp_b_i = buf_1[q].imag * p_imdct->w[m][k].real + buf_1[q].real * p_imdct->w[m][k].imag;
373 buf_1[p].real = tmp_a_r + tmp_b_r;
374 buf_1[p].imag = tmp_a_i + tmp_b_i;
375 buf_1[q].real = tmp_a_r - tmp_b_r;
376 buf_1[q].imag = tmp_a_i - tmp_b_i;
378 /* Do block 2 */
379 tmp_a_r = buf_2[p].real;
380 tmp_a_i = buf_2[p].imag;
381 tmp_b_r = buf_2[q].real * p_imdct->w[m][k].real - buf_2[q].imag * p_imdct->w[m][k].imag;
382 tmp_b_i = buf_2[q].imag * p_imdct->w[m][k].real + buf_2[q].real * p_imdct->w[m][k].imag;
383 buf_2[p].real = tmp_a_r + tmp_b_r;
384 buf_2[p].imag = tmp_a_i + tmp_b_i;
385 buf_2[q].real = tmp_a_r - tmp_b_r;
386 buf_2[q].imag = tmp_a_i - tmp_b_i;
391 /* Post IFFT complex multiply */
392 for (i=0; i < N/8; i++) {
393 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
394 tmp_a_r = buf_1[i].real;
395 tmp_a_i = - buf_1[i].imag;
396 buf_1[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
397 buf_1[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
398 /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
399 tmp_a_r = buf_2[i].real;
400 tmp_a_i = - buf_2[i].imag;
401 buf_2[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
402 buf_2[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
405 /* Window and convert to real valued signal */
406 for (i=0; i<N/8; i++) {
407 y[2*i] = -buf_1[i].imag * window[2*i];
408 y[2*i+1] = buf_1[N/8-i-1].real * window[2*i+1];
409 y[N/4+2*i] = -buf_1[i].real * window[N/4+2*i];
410 y[N/4+2*i+1] = buf_1[N/8-i-1].imag * window[N/4+2*i+1];
411 y[N/2+2*i] = -buf_2[i].real * window[N/2-2*i-1];
412 y[N/2+2*i+1] = buf_2[N/8-i-1].imag * window[N/2-2*i-2];
413 y[3*N/4+2*i] = buf_2[i].imag * window[N/4-2*i-1];
414 y[3*N/4+2*i+1] = -buf_2[N/8-i-1].real * window[N/4-2*i-2];
417 /* Overlap and add */
418 for (i=0; i<N/2; i++) {
419 y[i] = 2 * (y[i] + p_imdct->delay[id][i]);
420 p_imdct->delay[id][i] = y[N/2+i];