3 * Copyright (c) 2002 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * FFT/IFFT transforms.
30 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
33 int ff_fft_init(FFTContext
*s
, int nbits
, int inverse
)
36 float alpha
, c1
, s1
, s2
;
41 s
->exptab
= av_malloc((n
/ 2) * sizeof(FFTComplex
));
44 s
->revtab
= av_malloc(n
* sizeof(uint16_t));
49 s2
= inverse
? 1.0 : -1.0;
51 for(i
=0;i
<(n
/2);i
++) {
52 alpha
= 2 * M_PI
* (float)i
/ (float)n
;
58 s
->fft_calc
= ff_fft_calc_c
;
59 s
->imdct_calc
= ff_imdct_calc
;
62 /* compute constant table for HAVE_SSE version */
63 #if defined(HAVE_MMX) \
64 || (defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE))
66 int has_vectors
= mm_support();
70 if (has_vectors
& MM_3DNOWEXT
) {
71 /* 3DNowEx for K7/K8 */
72 s
->imdct_calc
= ff_imdct_calc_3dn2
;
73 s
->fft_calc
= ff_fft_calc_3dn2
;
74 } else if (has_vectors
& MM_3DNOW
) {
75 /* 3DNow! for K6-2/3 */
76 s
->fft_calc
= ff_fft_calc_3dn
;
77 } else if (has_vectors
& MM_SSE
) {
79 s
->imdct_calc
= ff_imdct_calc_sse
;
80 s
->fft_calc
= ff_fft_calc_sse
;
83 if (has_vectors
& MM_ALTIVEC
)
84 s
->fft_calc
= ff_fft_calc_altivec
;
87 if (s
->fft_calc
!= ff_fft_calc_c
) {
88 int np
, nblocks
, np2
, l
;
94 s
->exptab1
= av_malloc(np
* 2 * sizeof(FFTComplex
));
99 for(l
= 0; l
< np2
; l
+= 2 * nblocks
) {
101 *q
++ = s
->exptab
[l
+ nblocks
];
103 q
->re
= -s
->exptab
[l
].im
;
104 q
->im
= s
->exptab
[l
].re
;
106 q
->re
= -s
->exptab
[l
+ nblocks
].im
;
107 q
->im
= s
->exptab
[l
+ nblocks
].re
;
110 nblocks
= nblocks
>> 1;
111 } while (nblocks
!= 0);
112 av_freep(&s
->exptab
);
117 /* compute bit reverse table */
121 for(j
=0;j
<nbits
;j
++) {
122 m
|= ((i
>> j
) & 1) << (nbits
-j
-1);
128 av_freep(&s
->revtab
);
129 av_freep(&s
->exptab
);
130 av_freep(&s
->exptab1
);
135 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
137 FFTSample ax, ay, bx, by;\
148 #define MUL16(a,b) ((a) * (b))
150 #define CMUL(pre, pim, are, aim, bre, bim) \
152 pre = (MUL16(are, bre) - MUL16(aim, bim));\
153 pim = (MUL16(are, bim) + MUL16(bre, aim));\
157 * Do a complex FFT with the parameters defined in ff_fft_init(). The
158 * input data must be permuted before with s->revtab table. No
159 * 1.0/sqrt(n) normalization is done.
161 void ff_fft_calc_c(FFTContext
*s
, FFTComplex
*z
)
166 register FFTComplex
*p
, *q
;
167 FFTComplex
*exptab
= s
->exptab
;
169 FFTSample tmp_re
, tmp_im
;
178 BF(p
[0].re
, p
[0].im
, p
[1].re
, p
[1].im
,
179 p
[0].re
, p
[0].im
, p
[1].re
, p
[1].im
);
190 BF(p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
,
191 p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
);
192 BF(p
[1].re
, p
[1].im
, p
[3].re
, p
[3].im
,
193 p
[1].re
, p
[1].im
, -p
[3].im
, p
[3].re
);
198 BF(p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
,
199 p
[0].re
, p
[0].im
, p
[2].re
, p
[2].im
);
200 BF(p
[1].re
, p
[1].im
, p
[3].re
, p
[3].im
,
201 p
[1].re
, p
[1].im
, p
[3].im
, -p
[3].re
);
213 for (j
= 0; j
< nblocks
; ++j
) {
214 BF(p
->re
, p
->im
, q
->re
, q
->im
,
215 p
->re
, p
->im
, q
->re
, q
->im
);
219 for(l
= nblocks
; l
< np2
; l
+= nblocks
) {
220 CMUL(tmp_re
, tmp_im
, exptab
[l
].re
, exptab
[l
].im
, q
->re
, q
->im
);
221 BF(p
->re
, p
->im
, q
->re
, q
->im
,
222 p
->re
, p
->im
, tmp_re
, tmp_im
);
230 nblocks
= nblocks
>> 1;
231 nloops
= nloops
<< 1;
232 } while (nblocks
!= 0);
236 * Do the permutation needed BEFORE calling ff_fft_calc()
238 void ff_fft_permute(FFTContext
*s
, FFTComplex
*z
)
242 const uint16_t *revtab
= s
->revtab
;
256 void ff_fft_end(FFTContext
*s
)
258 av_freep(&s
->revtab
);
259 av_freep(&s
->exptab
);
260 av_freep(&s
->exptab1
);