2 Copyright (c) 2003-2010, Mark Borgerding
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 #include "_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
24 const kiss_fft_cfg st
,
29 kiss_fft_cpx
* tw1
= st
->twiddles
;
33 C_FIXDIV(*Fout
,2); C_FIXDIV(*Fout2
,2);
35 C_MUL (t
, *Fout2
, *tw1
);
37 C_SUB( *Fout2
, *Fout
, t
);
47 const kiss_fft_cfg st
,
51 kiss_fft_cpx
*tw1
,*tw2
,*tw3
;
52 kiss_fft_cpx scratch
[6];
58 tw3
= tw2
= tw1
= st
->twiddles
;
61 C_FIXDIV(*Fout
,4); C_FIXDIV(Fout
[m
],4); C_FIXDIV(Fout
[m2
],4); C_FIXDIV(Fout
[m3
],4);
63 C_MUL(scratch
[0],Fout
[m
] , *tw1
);
64 C_MUL(scratch
[1],Fout
[m2
] , *tw2
);
65 C_MUL(scratch
[2],Fout
[m3
] , *tw3
);
67 C_SUB( scratch
[5] , *Fout
, scratch
[1] );
68 C_ADDTO(*Fout
, scratch
[1]);
69 C_ADD( scratch
[3] , scratch
[0] , scratch
[2] );
70 C_SUB( scratch
[4] , scratch
[0] , scratch
[2] );
71 C_SUB( Fout
[m2
], *Fout
, scratch
[3] );
75 C_ADDTO( *Fout
, scratch
[3] );
78 Fout
[m
].r
= scratch
[5].r
- scratch
[4].i
;
79 Fout
[m
].i
= scratch
[5].i
+ scratch
[4].r
;
80 Fout
[m3
].r
= scratch
[5].r
+ scratch
[4].i
;
81 Fout
[m3
].i
= scratch
[5].i
- scratch
[4].r
;
83 Fout
[m
].r
= scratch
[5].r
+ scratch
[4].i
;
84 Fout
[m
].i
= scratch
[5].i
- scratch
[4].r
;
85 Fout
[m3
].r
= scratch
[5].r
- scratch
[4].i
;
86 Fout
[m3
].i
= scratch
[5].i
+ scratch
[4].r
;
95 const kiss_fft_cfg st
,
100 const size_t m2
= 2*m
;
101 kiss_fft_cpx
*tw1
,*tw2
;
102 kiss_fft_cpx scratch
[5];
104 epi3
= st
->twiddles
[fstride
*m
];
106 tw1
=tw2
=st
->twiddles
;
109 C_FIXDIV(*Fout
,3); C_FIXDIV(Fout
[m
],3); C_FIXDIV(Fout
[m2
],3);
111 C_MUL(scratch
[1],Fout
[m
] , *tw1
);
112 C_MUL(scratch
[2],Fout
[m2
] , *tw2
);
114 C_ADD(scratch
[3],scratch
[1],scratch
[2]);
115 C_SUB(scratch
[0],scratch
[1],scratch
[2]);
119 Fout
[m
].r
= Fout
->r
- HALF_OF(scratch
[3].r
);
120 Fout
[m
].i
= Fout
->i
- HALF_OF(scratch
[3].i
);
122 C_MULBYSCALAR( scratch
[0] , epi3
.i
);
124 C_ADDTO(*Fout
,scratch
[3]);
126 Fout
[m2
].r
= Fout
[m
].r
+ scratch
[0].i
;
127 Fout
[m2
].i
= Fout
[m
].i
- scratch
[0].r
;
129 Fout
[m
].r
-= scratch
[0].i
;
130 Fout
[m
].i
+= scratch
[0].r
;
136 static void kf_bfly5(
138 const size_t fstride
,
139 const kiss_fft_cfg st
,
143 kiss_fft_cpx
*Fout0
,*Fout1
,*Fout2
,*Fout3
,*Fout4
;
145 kiss_fft_cpx scratch
[13];
146 kiss_fft_cpx
* twiddles
= st
->twiddles
;
149 ya
= twiddles
[fstride
*m
];
150 yb
= twiddles
[fstride
*2*m
];
159 for ( u
=0; u
<m
; ++u
) {
160 C_FIXDIV( *Fout0
,5); C_FIXDIV( *Fout1
,5); C_FIXDIV( *Fout2
,5); C_FIXDIV( *Fout3
,5); C_FIXDIV( *Fout4
,5);
163 C_MUL(scratch
[1] ,*Fout1
, tw
[u
*fstride
]);
164 C_MUL(scratch
[2] ,*Fout2
, tw
[2*u
*fstride
]);
165 C_MUL(scratch
[3] ,*Fout3
, tw
[3*u
*fstride
]);
166 C_MUL(scratch
[4] ,*Fout4
, tw
[4*u
*fstride
]);
168 C_ADD( scratch
[7],scratch
[1],scratch
[4]);
169 C_SUB( scratch
[10],scratch
[1],scratch
[4]);
170 C_ADD( scratch
[8],scratch
[2],scratch
[3]);
171 C_SUB( scratch
[9],scratch
[2],scratch
[3]);
173 Fout0
->r
+= scratch
[7].r
+ scratch
[8].r
;
174 Fout0
->i
+= scratch
[7].i
+ scratch
[8].i
;
176 scratch
[5].r
= scratch
[0].r
+ S_MUL(scratch
[7].r
,ya
.r
) + S_MUL(scratch
[8].r
,yb
.r
);
177 scratch
[5].i
= scratch
[0].i
+ S_MUL(scratch
[7].i
,ya
.r
) + S_MUL(scratch
[8].i
,yb
.r
);
179 scratch
[6].r
= S_MUL(scratch
[10].i
,ya
.i
) + S_MUL(scratch
[9].i
,yb
.i
);
180 scratch
[6].i
= -S_MUL(scratch
[10].r
,ya
.i
) - S_MUL(scratch
[9].r
,yb
.i
);
182 C_SUB(*Fout1
,scratch
[5],scratch
[6]);
183 C_ADD(*Fout4
,scratch
[5],scratch
[6]);
185 scratch
[11].r
= scratch
[0].r
+ S_MUL(scratch
[7].r
,yb
.r
) + S_MUL(scratch
[8].r
,ya
.r
);
186 scratch
[11].i
= scratch
[0].i
+ S_MUL(scratch
[7].i
,yb
.r
) + S_MUL(scratch
[8].i
,ya
.r
);
187 scratch
[12].r
= - S_MUL(scratch
[10].i
,yb
.i
) + S_MUL(scratch
[9].i
,ya
.i
);
188 scratch
[12].i
= S_MUL(scratch
[10].r
,yb
.i
) - S_MUL(scratch
[9].r
,ya
.i
);
190 C_ADD(*Fout2
,scratch
[11],scratch
[12]);
191 C_SUB(*Fout3
,scratch
[11],scratch
[12]);
193 ++Fout0
;++Fout1
;++Fout2
;++Fout3
;++Fout4
;
197 /* perform the butterfly for one stage of a mixed radix FFT */
198 static void kf_bfly_generic(
200 const size_t fstride
,
201 const kiss_fft_cfg st
,
207 kiss_fft_cpx
* twiddles
= st
->twiddles
;
209 int Norig
= st
->nfft
;
211 kiss_fft_cpx
* scratch
= (kiss_fft_cpx
*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx
)*p
);
213 for ( u
=0; u
<m
; ++u
) {
215 for ( q1
=0 ; q1
<p
; ++q1
) {
216 scratch
[q1
] = Fout
[ k
];
217 C_FIXDIV(scratch
[q1
],p
);
222 for ( q1
=0 ; q1
<p
; ++q1
) {
224 Fout
[ k
] = scratch
[0];
226 twidx
+= fstride
* k
;
227 if (twidx
>=Norig
) twidx
-=Norig
;
228 C_MUL(t
,scratch
[q
] , twiddles
[twidx
] );
229 C_ADDTO( Fout
[ k
] ,t
);
234 KISS_FFT_TMP_FREE(scratch
);
240 const kiss_fft_cpx
* f
,
241 const size_t fstride
,
244 const kiss_fft_cfg st
247 kiss_fft_cpx
* Fout_beg
=Fout
;
248 const int p
=*factors
++; /* the radix */
249 const int m
=*factors
++; /* stage's fft length/p */
250 const kiss_fft_cpx
* Fout_end
= Fout
+ p
*m
;
253 // use openmp extensions at the
254 // top-level (not recursive)
255 if (fstride
==1 && p
<=5)
259 // execute the p different work units in different threads
260 # pragma omp parallel for
262 kf_work( Fout
+k
*m
, f
+ fstride
*in_stride
*k
,fstride
*p
,in_stride
,factors
,st
);
263 // all threads have joined by this point
266 case 2: kf_bfly2(Fout
,fstride
,st
,m
); break;
267 case 3: kf_bfly3(Fout
,fstride
,st
,m
); break;
268 case 4: kf_bfly4(Fout
,fstride
,st
,m
); break;
269 case 5: kf_bfly5(Fout
,fstride
,st
,m
); break;
270 default: kf_bfly_generic(Fout
,fstride
,st
,m
,p
); break;
279 f
+= fstride
*in_stride
;
280 }while(++Fout
!= Fout_end
);
284 // DFT of size m*p performed by doing
285 // p instances of smaller DFTs of size m,
286 // each one takes a decimated version of the input
287 kf_work( Fout
, f
, fstride
*p
, in_stride
, factors
,st
);
288 f
+= fstride
*in_stride
;
289 }while( (Fout
+= m
) != Fout_end
);
294 // recombine the p smaller DFTs
296 case 2: kf_bfly2(Fout
,fstride
,st
,m
); break;
297 case 3: kf_bfly3(Fout
,fstride
,st
,m
); break;
298 case 4: kf_bfly4(Fout
,fstride
,st
,m
); break;
299 case 5: kf_bfly5(Fout
,fstride
,st
,m
); break;
300 default: kf_bfly_generic(Fout
,fstride
,st
,m
,p
); break;
304 /* facbuf is populated by p1,m1,p2,m2, ...
309 void kf_factor(int n
,int * facbuf
)
313 floor_sqrt
= floor( sqrt((double)n
) );
315 /*factor out powers of 4, powers of 2, then any remaining primes */
319 case 4: p
= 2; break;
320 case 2: p
= 3; break;
321 default: p
+= 2; break;
324 p
= n
; /* no more factors, skip to end */
334 * User-callable function to allocate all necessary storage space for the fft.
336 * The return value is a contiguous block of memory, allocated with malloc. As such,
337 * It can be freed with free(), rather than a kiss_fft-specific function.
339 kiss_fft_cfg
kiss_fft_alloc(int nfft
,int inverse_fft
,void * mem
,size_t * lenmem
)
341 kiss_fft_cfg st
=NULL
;
342 size_t memneeded
= sizeof(struct kiss_fft_state
)
343 + sizeof(kiss_fft_cpx
)*(nfft
-1); /* twiddle factors*/
345 if ( lenmem
==NULL
) {
346 st
= ( kiss_fft_cfg
)KISS_FFT_MALLOC( memneeded
);
348 if (mem
!= NULL
&& *lenmem
>= memneeded
)
349 st
= (kiss_fft_cfg
)mem
;
355 st
->inverse
= inverse_fft
;
357 for (i
=0;i
<nfft
;++i
) {
358 const double pi
=3.141592653589793238462643383279502884197169399375105820974944;
359 double phase
= -2*pi
*i
/ nfft
;
362 kf_cexp(st
->twiddles
+i
, phase
);
365 kf_factor(nfft
,st
->factors
);
371 void kiss_fft_stride(kiss_fft_cfg st
,const kiss_fft_cpx
*fin
,kiss_fft_cpx
*fout
,int in_stride
)
374 //NOTE: this is not really an in-place FFT algorithm.
375 //It just performs an out-of-place FFT into a temp buffer
376 kiss_fft_cpx
* tmpbuf
= (kiss_fft_cpx
*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx
)*st
->nfft
);
377 kf_work(tmpbuf
,fin
,1,in_stride
, st
->factors
,st
);
378 memcpy(fout
,tmpbuf
,sizeof(kiss_fft_cpx
)*st
->nfft
);
379 KISS_FFT_TMP_FREE(tmpbuf
);
381 kf_work( fout
, fin
, 1,in_stride
, st
->factors
,st
);
385 void kiss_fft(kiss_fft_cfg cfg
,const kiss_fft_cpx
*fin
,kiss_fft_cpx
*fout
)
387 kiss_fft_stride(cfg
,fin
,fout
,1);
391 void kiss_fft_cleanup(void)
393 // nothing needed any more
396 int kiss_fft_next_fast_size(int n
)
400 while ( (m
%2) == 0 ) m
/=2;
401 while ( (m
%3) == 0 ) m
/=3;
402 while ( (m
%5) == 0 ) m
/=5;
404 break; /* n is completely factorable by twos, threes, and fives */