cache, stream: avoid extra memcpy when using cache
[mplayer/glamo.git] / libfaad2 / mdct.c
blob4c9144d54e00c0d41ed0623c9db568354c3a26aa
1 /*
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ** Any non-GPL usage of this software or parts of this software is strictly
20 ** forbidden.
22 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
25 ** $Id: mdct.c,v 1.43 2004/09/04 14:56:28 menno Exp $
26 **/
29 * Fast (I)MDCT Implementation using (I)FFT ((Inverse) Fast Fourier Transform)
30 * and consists of three steps: pre-(I)FFT complex multiplication, complex
31 * (I)FFT, post-(I)FFT complex multiplication,
33 * As described in:
34 * P. Duhamel, Y. Mahieux, and J.P. Petit, "A Fast Algorithm for the
35 * Implementation of Filter Banks Based on 'Time Domain Aliasing
36 * Cancellation’," IEEE Proc. on ICASSP‘91, 1991, pp. 2209-2212.
39 * As of April 6th 2002 completely rewritten.
40 * This (I)MDCT can now be used for any data size n, where n is divisible by 8.
44 #include "common.h"
45 #include "structs.h"
47 #include <stdlib.h>
48 #ifdef _WIN32_WCE
49 #define assert(x)
50 #else
51 #include <assert.h>
52 #endif
54 #include "cfft.h"
55 #include "mdct.h"
56 #include "mdct_tab.h"
59 mdct_info *faad_mdct_init(uint16_t N)
61 mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info));
63 assert(N % 8 == 0);
65 mdct->N = N;
67 /* NOTE: For "small framelengths" in FIXED_POINT the coefficients need to be
68 * scaled by sqrt("(nearest power of 2) > N" / N) */
70 /* RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N));
71 * IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); */
72 /* scale is 1 for fixed point, sqrt(N) for floating point */
73 switch (N)
75 case 2048: mdct->sincos = (complex_t*)mdct_tab_2048; break;
76 case 256: mdct->sincos = (complex_t*)mdct_tab_256; break;
77 #ifdef LD_DEC
78 case 1024: mdct->sincos = (complex_t*)mdct_tab_1024; break;
79 #endif
80 #ifdef ALLOW_SMALL_FRAMELENGTH
81 case 1920: mdct->sincos = (complex_t*)mdct_tab_1920; break;
82 case 240: mdct->sincos = (complex_t*)mdct_tab_240; break;
83 #ifdef LD_DEC
84 case 960: mdct->sincos = (complex_t*)mdct_tab_960; break;
85 #endif
86 #endif
87 #ifdef SSR_DEC
88 case 512: mdct->sincos = (complex_t*)mdct_tab_512; break;
89 case 64: mdct->sincos = (complex_t*)mdct_tab_64; break;
90 #endif
93 /* initialise fft */
94 mdct->cfft = cffti(N/4);
96 #ifdef PROFILE
97 mdct->cycles = 0;
98 mdct->fft_cycles = 0;
99 #endif
101 return mdct;
104 void faad_mdct_end(mdct_info *mdct)
106 if (mdct != NULL)
108 #ifdef PROFILE
109 printf("MDCT[%.4d]: %I64d cycles\n", mdct->N, mdct->cycles);
110 printf("CFFT[%.4d]: %I64d cycles\n", mdct->N/4, mdct->fft_cycles);
111 #endif
113 cfftu(mdct->cfft);
115 faad_free(mdct);
119 void faad_imdct(mdct_info *mdct, real_t *X_in, real_t *X_out)
121 uint16_t k;
123 complex_t x;
124 #ifdef ALLOW_SMALL_FRAMELENGTH
125 #ifdef FIXED_POINT
126 real_t scale, b_scale = 0;
127 #endif
128 #endif
129 ALIGN complex_t Z1[512];
130 complex_t *sincos = mdct->sincos;
132 uint16_t N = mdct->N;
133 uint16_t N2 = N >> 1;
134 uint16_t N4 = N >> 2;
135 uint16_t N8 = N >> 3;
137 #ifdef PROFILE
138 int64_t count1, count2 = faad_get_ts();
139 #endif
141 #ifdef ALLOW_SMALL_FRAMELENGTH
142 #ifdef FIXED_POINT
143 /* detect non-power of 2 */
144 if (N & (N-1))
146 /* adjust scale for non-power of 2 MDCT */
147 /* 2048/1920 */
148 b_scale = 1;
149 scale = COEF_CONST(1.0666666666666667);
151 #endif
152 #endif
154 /* pre-IFFT complex multiplication */
155 for (k = 0; k < N4; k++)
157 ComplexMult(&IM(Z1[k]), &RE(Z1[k]),
158 X_in[2*k], X_in[N2 - 1 - 2*k], RE(sincos[k]), IM(sincos[k]));
161 #ifdef PROFILE
162 count1 = faad_get_ts();
163 #endif
165 /* complex IFFT, any non-scaling FFT can be used here */
166 cfftb(mdct->cfft, Z1);
168 #ifdef PROFILE
169 count1 = faad_get_ts() - count1;
170 #endif
172 /* post-IFFT complex multiplication */
173 for (k = 0; k < N4; k++)
175 RE(x) = RE(Z1[k]);
176 IM(x) = IM(Z1[k]);
177 ComplexMult(&IM(Z1[k]), &RE(Z1[k]),
178 IM(x), RE(x), RE(sincos[k]), IM(sincos[k]));
180 #ifdef ALLOW_SMALL_FRAMELENGTH
181 #ifdef FIXED_POINT
182 /* non-power of 2 MDCT scaling */
183 if (b_scale)
185 RE(Z1[k]) = MUL_C(RE(Z1[k]), scale);
186 IM(Z1[k]) = MUL_C(IM(Z1[k]), scale);
188 #endif
189 #endif
192 /* reordering */
193 for (k = 0; k < N8; k+=2)
195 X_out[ 2*k] = IM(Z1[N8 + k]);
196 X_out[ 2 + 2*k] = IM(Z1[N8 + 1 + k]);
198 X_out[ 1 + 2*k] = -RE(Z1[N8 - 1 - k]);
199 X_out[ 3 + 2*k] = -RE(Z1[N8 - 2 - k]);
201 X_out[N4 + 2*k] = RE(Z1[ k]);
202 X_out[N4 + + 2 + 2*k] = RE(Z1[ 1 + k]);
204 X_out[N4 + 1 + 2*k] = -IM(Z1[N4 - 1 - k]);
205 X_out[N4 + 3 + 2*k] = -IM(Z1[N4 - 2 - k]);
207 X_out[N2 + 2*k] = RE(Z1[N8 + k]);
208 X_out[N2 + + 2 + 2*k] = RE(Z1[N8 + 1 + k]);
210 X_out[N2 + 1 + 2*k] = -IM(Z1[N8 - 1 - k]);
211 X_out[N2 + 3 + 2*k] = -IM(Z1[N8 - 2 - k]);
213 X_out[N2 + N4 + 2*k] = -IM(Z1[ k]);
214 X_out[N2 + N4 + 2 + 2*k] = -IM(Z1[ 1 + k]);
216 X_out[N2 + N4 + 1 + 2*k] = RE(Z1[N4 - 1 - k]);
217 X_out[N2 + N4 + 3 + 2*k] = RE(Z1[N4 - 2 - k]);
220 #ifdef PROFILE
221 count2 = faad_get_ts() - count2;
222 mdct->fft_cycles += count1;
223 mdct->cycles += (count2 - count1);
224 #endif
227 #ifdef LTP_DEC
228 void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out)
230 uint16_t k;
232 complex_t x;
233 ALIGN complex_t Z1[512];
234 complex_t *sincos = mdct->sincos;
236 uint16_t N = mdct->N;
237 uint16_t N2 = N >> 1;
238 uint16_t N4 = N >> 2;
239 uint16_t N8 = N >> 3;
241 #ifndef FIXED_POINT
242 real_t scale = REAL_CONST(N);
243 #else
244 real_t scale = REAL_CONST(4.0/N);
245 #endif
247 #ifdef ALLOW_SMALL_FRAMELENGTH
248 #ifdef FIXED_POINT
249 /* detect non-power of 2 */
250 if (N & (N-1))
252 /* adjust scale for non-power of 2 MDCT */
253 /* *= sqrt(2048/1920) */
254 scale = MUL_C(scale, COEF_CONST(1.0327955589886444));
256 #endif
257 #endif
259 /* pre-FFT complex multiplication */
260 for (k = 0; k < N8; k++)
262 uint16_t n = k << 1;
263 RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 + n];
264 IM(x) = X_in[ N4 + n] - X_in[ N4 - 1 - n];
266 ComplexMult(&RE(Z1[k]), &IM(Z1[k]),
267 RE(x), IM(x), RE(sincos[k]), IM(sincos[k]));
269 RE(Z1[k]) = MUL_R(RE(Z1[k]), scale);
270 IM(Z1[k]) = MUL_R(IM(Z1[k]), scale);
272 RE(x) = X_in[N2 - 1 - n] - X_in[ n];
273 IM(x) = X_in[N2 + n] + X_in[N - 1 - n];
275 ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]),
276 RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8]));
278 RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale);
279 IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale);
282 /* complex FFT, any non-scaling FFT can be used here */
283 cfftf(mdct->cfft, Z1);
285 /* post-FFT complex multiplication */
286 for (k = 0; k < N4; k++)
288 uint16_t n = k << 1;
289 ComplexMult(&RE(x), &IM(x),
290 RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k]));
292 X_out[ n] = -RE(x);
293 X_out[N2 - 1 - n] = IM(x);
294 X_out[N2 + n] = -IM(x);
295 X_out[N - 1 - n] = RE(x);
298 #endif