fix crashes reported by Debian Cylab Mayhem Team
[swftools.git] / lib / lame / fft.c
blob4b6d7272d57f8d6942e308db7e286173f081f77e
1 /*
2 ** FFT and FHT routines
3 ** Copyright 1988, 1993; Ron Mayer
4 **
5 ** fht(fz,n);
6 ** Does a hartley transform of "n" points in the array "fz".
7 **
8 ** NOTE: This routine uses at least 2 patented algorithms, and may be
9 ** under the restrictions of a bunch of different organizations.
10 ** Although I wrote it completely myself; it is kind of a derivative
11 ** of a routine I once authored and released under the GPL, so it
12 ** may fall under the free software foundation's restrictions;
13 ** it was worked on as a Stanford Univ project, so they claim
14 ** some rights to it; it was further optimized at work here, so
15 ** I think this company claims parts of it. The patents are
16 ** held by R. Bracewell (the FHT algorithm) and O. Buneman (the
17 ** trig generator), both at Stanford Univ.
18 ** If it were up to me, I'd say go do whatever you want with it;
19 ** but it would be polite to give credit to the following people
20 ** if you use this anywhere:
21 ** Euler - probable inventor of the fourier transform.
22 ** Gauss - probable inventor of the FFT.
23 ** Hartley - probable inventor of the hartley transform.
24 ** Buneman - for a really cool trig generator
25 ** Mayer(me) - for authoring this particular version and
26 ** including all the optimizations in one package.
27 ** Thanks,
28 ** Ron Mayer; mayer@acuson.com
29 ** and added some optimization by
30 ** Mather - idea of using lookup table
31 ** Takehiro - some dirty hack for speed up
34 /* $Id: fft.c,v 1.2 2006/02/09 16:56:23 kramm Exp $ */
36 #include <stdlib.h>
37 #include "config_static.h"
39 #include <math.h>
40 #include "util.h"
41 #include "fft.h"
46 #ifdef WITH_DMALLOC
47 #include <dmalloc.h>
48 #endif
50 #define TRI_SIZE (5-1) /* 1024 = 4**5 */
52 static const FLOAT costab[TRI_SIZE*2] = {
53 9.238795325112867e-01, 3.826834323650898e-01,
54 9.951847266721969e-01, 9.801714032956060e-02,
55 9.996988186962042e-01, 2.454122852291229e-02,
56 9.999811752826011e-01, 6.135884649154475e-03
59 static void fht(FLOAT *fz, int n)
61 const FLOAT *tri = costab;
62 int k4;
63 FLOAT *fi, *fn, *gi;
65 n <<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */
66 fn = fz + n;
67 k4 = 4;
68 do {
69 FLOAT s1, c1;
70 int i, k1, k2, k3, kx;
71 kx = k4 >> 1;
72 k1 = k4;
73 k2 = k4 << 1;
74 k3 = k2 + k1;
75 k4 = k2 << 1;
76 fi = fz;
77 gi = fi + kx;
78 do {
79 FLOAT f0,f1,f2,f3;
80 f1 = fi[0] - fi[k1];
81 f0 = fi[0] + fi[k1];
82 f3 = fi[k2] - fi[k3];
83 f2 = fi[k2] + fi[k3];
84 fi[k2] = f0 - f2;
85 fi[0 ] = f0 + f2;
86 fi[k3] = f1 - f3;
87 fi[k1] = f1 + f3;
88 f1 = gi[0] - gi[k1];
89 f0 = gi[0] + gi[k1];
90 f3 = SQRT2 * gi[k3];
91 f2 = SQRT2 * gi[k2];
92 gi[k2] = f0 - f2;
93 gi[0 ] = f0 + f2;
94 gi[k3] = f1 - f3;
95 gi[k1] = f1 + f3;
96 gi += k4;
97 fi += k4;
98 } while (fi<fn);
99 c1 = tri[0];
100 s1 = tri[1];
101 for (i = 1; i < kx; i++) {
102 FLOAT c2,s2;
103 c2 = 1 - (2*s1)*s1;
104 s2 = (2*s1)*c1;
105 fi = fz + i;
106 gi = fz + k1 - i;
107 do {
108 FLOAT a,b,g0,f0,f1,g1,f2,g2,f3,g3;
109 b = s2*fi[k1] - c2*gi[k1];
110 a = c2*fi[k1] + s2*gi[k1];
111 f1 = fi[0 ] - a;
112 f0 = fi[0 ] + a;
113 g1 = gi[0 ] - b;
114 g0 = gi[0 ] + b;
115 b = s2*fi[k3] - c2*gi[k3];
116 a = c2*fi[k3] + s2*gi[k3];
117 f3 = fi[k2] - a;
118 f2 = fi[k2] + a;
119 g3 = gi[k2] - b;
120 g2 = gi[k2] + b;
121 b = s1*f2 - c1*g3;
122 a = c1*f2 + s1*g3;
123 fi[k2] = f0 - a;
124 fi[0 ] = f0 + a;
125 gi[k3] = g1 - b;
126 gi[k1] = g1 + b;
127 b = c1*g2 - s1*f3;
128 a = s1*g2 + c1*f3;
129 gi[k2] = g0 - a;
130 gi[0 ] = g0 + a;
131 fi[k3] = f1 - b;
132 fi[k1] = f1 + b;
133 gi += k4;
134 fi += k4;
135 } while (fi<fn);
136 c2 = c1;
137 c1 = c2 * tri[0] - s1 * tri[1];
138 s1 = c2 * tri[1] + s1 * tri[0];
140 tri += 2;
141 } while (k4<n);
144 static const unsigned char rv_tbl[] = {
145 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
146 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
147 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
148 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
149 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
150 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
151 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
152 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
153 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
154 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
155 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
156 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
157 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
158 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
159 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
160 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
163 #define ch01(index) (buffer[chn][index])
165 #define ml00(f) (window[i ] * f(i))
166 #define ml10(f) (window[i + 0x200] * f(i + 0x200))
167 #define ml20(f) (window[i + 0x100] * f(i + 0x100))
168 #define ml30(f) (window[i + 0x300] * f(i + 0x300))
170 #define ml01(f) (window[i + 0x001] * f(i + 0x001))
171 #define ml11(f) (window[i + 0x201] * f(i + 0x201))
172 #define ml21(f) (window[i + 0x101] * f(i + 0x101))
173 #define ml31(f) (window[i + 0x301] * f(i + 0x301))
175 #define ms00(f) (window_s[i ] * f(i + k))
176 #define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
177 #define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
178 #define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
180 #define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
181 #define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
182 #define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
183 #define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
186 void fft_short(lame_internal_flags * const gfc,
187 FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t *buffer[2])
189 const FLOAT* window_s = (const FLOAT *)&gfc->window_s[0];
190 int i;
191 int j;
192 int b;
194 for (b = 0; b < 3; b++) {
195 FLOAT *x = &x_real[b][BLKSIZE_s / 2];
196 short k = (576 / 3) * (b + 1);
197 j = BLKSIZE_s / 8 - 1;
198 do {
199 FLOAT f0,f1,f2,f3, w;
201 i = rv_tbl[j << 2];
203 f0 = ms00(ch01); w = ms10(ch01); f1 = f0 - w; f0 = f0 + w;
204 f2 = ms20(ch01); w = ms30(ch01); f3 = f2 - w; f2 = f2 + w;
206 x -= 4;
207 x[0] = f0 + f2;
208 x[2] = f0 - f2;
209 x[1] = f1 + f3;
210 x[3] = f1 - f3;
212 f0 = ms01(ch01); w = ms11(ch01); f1 = f0 - w; f0 = f0 + w;
213 f2 = ms21(ch01); w = ms31(ch01); f3 = f2 - w; f2 = f2 + w;
215 x[BLKSIZE_s / 2 + 0] = f0 + f2;
216 x[BLKSIZE_s / 2 + 2] = f0 - f2;
217 x[BLKSIZE_s / 2 + 1] = f1 + f3;
218 x[BLKSIZE_s / 2 + 3] = f1 - f3;
219 } while (--j >= 0);
221 gfc->fft_fht(x, BLKSIZE_s/2);
222 /* BLKSIZE_s/2 because of 3DNow! ASM routine */
226 void fft_long(lame_internal_flags * const gfc,
227 FLOAT x[BLKSIZE], int chn, const sample_t *buffer[2] )
229 const FLOAT* window = (const FLOAT *)&gfc->window[0];
230 int i;
231 int jj = BLKSIZE / 8 - 1;
232 x += BLKSIZE / 2;
234 do {
235 FLOAT f0,f1,f2,f3, w;
237 i = rv_tbl[jj];
238 f0 = ml00(ch01); w = ml10(ch01); f1 = f0 - w; f0 = f0 + w;
239 f2 = ml20(ch01); w = ml30(ch01); f3 = f2 - w; f2 = f2 + w;
241 x -= 4;
242 x[0] = f0 + f2;
243 x[2] = f0 - f2;
244 x[1] = f1 + f3;
245 x[3] = f1 - f3;
247 f0 = ml01(ch01); w = ml11(ch01); f1 = f0 - w; f0 = f0 + w;
248 f2 = ml21(ch01); w = ml31(ch01); f3 = f2 - w; f2 = f2 + w;
250 x[BLKSIZE / 2 + 0] = f0 + f2;
251 x[BLKSIZE / 2 + 2] = f0 - f2;
252 x[BLKSIZE / 2 + 1] = f1 + f3;
253 x[BLKSIZE / 2 + 3] = f1 - f3;
254 } while (--jj >= 0);
256 gfc->fft_fht(x, BLKSIZE/2);
257 /* BLKSIZE/2 because of 3DNow! ASM routine */
261 void init_fft(lame_internal_flags * const gfc)
263 FLOAT *window = &gfc->window[0];
264 FLOAT *window_s = &gfc->window_s[0];
265 int i;
267 #if 0
268 if (gfc->nsPsy.use) {
269 for (i = 0; i < BLKSIZE ; i++)
270 /* blackman window */
271 window[i] = 0.42-0.5*cos(2*PI*i/(BLKSIZE-1))+0.08*cos(4*PI*i/(BLKSIZE-1));
272 } else {
274 * calculate HANN window coefficients
276 for (i = 0; i < BLKSIZE ; i++)
277 window[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE));
279 #endif
281 // The type of window used here will make no real difference, but
282 // in the interest of merging nspsytune stuff - switch to blackman window
283 for (i = 0; i < BLKSIZE ; i++)
284 /* blackman window */
285 window[i] = 0.42-0.5*cos(2*PI*(i+.5)/BLKSIZE)+
286 0.08*cos(4*PI*(i+.5)/BLKSIZE);
288 for (i = 0; i < BLKSIZE_s/2 ; i++)
289 window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
291 #ifdef HAVE_NASM
292 if (gfc->CPU_features.AMD_3DNow) {
293 extern void fht_3DN(FLOAT *fz, int n);
294 gfc->fft_fht = fht_3DN;
295 } else
296 #endif
297 #ifdef USE_FFTSSE
298 if (gfc->CPU_features.SIMD) {
299 extern void fht_SSE(FLOAT *fz, int n);
300 gfc->fft_fht = fht_SSE;
301 } else
302 #endif
303 #ifdef USE_FFTFPU
304 if (gfc->CPU_features.i387) {
305 extern void fht_FPU(FLOAT *fz, int n);
306 gfc->fft_fht = fht_FPU;
307 } else
308 #endif
309 gfc->fft_fht = fht;