2 * Copyright (c) 2003-2010, Mark Borgerding
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the
18 * * Neither the author nor the names of any contributors may be used to
19 * endorse or promote products derived from this software without
20 * specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 /* Ported by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
35 * Understanding is not required. Only obedience.
37 * This program is free software: you can redistribute it and/or modify
38 * it under the terms of the GNU General Public License as published by
39 * the Free Software Foundation, version 3 of the License ONLY.
41 * This program is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 * GNU General Public License for more details.
46 * You should have received a copy of the GNU General Public License
47 * along with this program. If not, see <http://www.gnu.org/licenses/>.
50 private nothrow @trusted @nogc:
52 version(aliced
) {} else alias usize
= size_t
;
54 //version = kiss_fft_use_parallel; // absolutely no reason to
57 // ////////////////////////////////////////////////////////////////////////// //
59 public enum KissFFT
{ Forward
, Inverse
}
62 //public alias kiss_fft_scalar = float;
63 //public alias kiss_fft_scalar = double;
66 public template kiss_fft_scalar(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
67 alias kiss_fft_scalar
= T
;
72 public template isGoodKissFFTScalar(T
) {
73 static if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
74 enum isGoodKissFFTScalar
= true;
76 enum isGoodKissFFTScalar
= false;
82 public template isKissFFTComplex(T
) {
83 static if (is(T
: kiss_fft_cpx
!S
, S
)) {
84 enum isKissFFTComplex
= true;
86 enum isKissFFTComplex
= false;
92 public template KissFFTScalar(T
) {
93 static if (is(T
: kiss_fft_cpx
!S
, S
)) {
94 alias KissFFTScalar
= S
;
96 static assert(0, "not a KissFFT complex type");
101 public align(1) struct kiss_fft_cpx(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
107 T
opIndex (uint n
) const pure nothrow @trusted @nogc { pragma(inline
, true); return (n ? i
: r
); }
108 void opIndexAssign (in T v
, uint n
) nothrow @trusted @nogc { pragma(inline
, true); if (n
) i
= v
; else r
= v
; }
112 public alias kiss_fft_cpx_f
= kiss_fft_cpx
!float; ///
113 public alias kiss_fft_cpx_d
= kiss_fft_cpx
!double; ///
116 public alias kiss_fft_cfg_f
= kiss_fft_state
!float*; ///
117 public alias kiss_fft_cfg_d
= kiss_fft_state
!double*; ///
119 public template kiss_fft_cfg(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
120 alias kiss_fft_cfg
= kiss_fft_state
!T
*;
124 // ////////////////////////////////////////////////////////////////////////// //
125 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
127 * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, KissFFT.Forward);`
129 * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
131 * If `lenmem` is `null`, then kiss_fft_alloc will allocate a cfg buffer using `malloc`.
132 * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
134 * The state can be placed in a user supplied buffer `mem`:
135 * If `lenmem` is not `null` and `mem` is not `null` and `*lenmem` is large enough,
136 * then the function places the cfg in `mem` and the size used in `*lenmem`,
139 * If `lenmem` is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
140 * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
142 public kiss_fft_cfg
!S
kiss_fft_alloc(S
) (uint nfft
, KissFFT dir
, void* mem
=null, usize
* lenmem
=null)
143 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
145 kiss_fft_cfg
!S st
= null;
146 usize memneeded
= (kiss_fft_state
!S
).sizeof
+(kiss_fft_cpx
!S
).sizeof
*(nfft
-1); // twiddle factors
148 if (lenmem
is null) {
149 import core
.stdc
.stdlib
: malloc
;
150 st
= cast(kiss_fft_cfg
!S
)malloc(memneeded
);
152 if (mem
!is null && *lenmem
>= memneeded
) st
= cast(kiss_fft_cfg
!S
)mem
;
158 st
.inverse
= (dir
== KissFFT
.Inverse
);
159 foreach (immutable uint i
; 0..nfft
) {
160 enum pi
= 3.141592653589793238462643383279502884197169399375105820974944;
161 immutable double phase
= -2*pi
*i
/nfft
*(st
.inverse ?
-1 : 1);
162 //if (st.inverse) phase = -phase;
163 mixin(kf_cexp
!("st.twiddles.ptr+i", "phase"));
165 kf_factor
!S(nfft
, st
.factors
.ptr
);
172 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
173 * buffer and can be simply free()d when no longer needed
175 public void kiss_fft_free(S
) (ref kiss_fft_cfg
!S p
)
176 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
179 import core
.stdc
.stdlib
: free
;
186 /** Perform an FFT on a complex input buffer.
189 * fin should be f[0] , f[1] , ... ,f[nfft-1]
190 * fout will be F[0] , F[1] , ... ,F[nfft-1]
191 * Note that each element is complex and can be accessed like f[k].r and f[k].i
193 public void kiss_fft(S
) (kiss_fft_cfg
!S cfg
, const(kiss_fft_cpx
!S
)* fin
, kiss_fft_cpx
!S
* fout
)
194 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
196 assert(cfg
!is null);
197 kiss_fft_stride
!S(cfg
, fin
, fout
, 1);
201 /** A more generic version of the above function. It reads its input from every Nth sample. */
202 public void kiss_fft_stride(S
) (kiss_fft_cfg
!S st
, const(kiss_fft_cpx
!S
)* fin
, kiss_fft_cpx
!S
* fout
, uint in_stride
)
203 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
205 import core
.stdc
.stdlib
: alloca
;
208 import core
.stdc
.string
: memcpy
;
209 //NOTE: this is not really an in-place FFT algorithm.
210 //It just performs an out-of-place FFT into a temp buffer
211 //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
212 kiss_fft_cpx
!S
* tmpbuf
= cast(kiss_fft_cpx
!S
*)alloca((kiss_fft_cpx
!S
).sizeof
*st
.nfft
);
213 kf_work
!S(tmpbuf
, fin
, 1, in_stride
, st
.factors
.ptr
, st
);
214 memcpy(fout
, tmpbuf
, (kiss_fft_cpx
!S
).sizeof
*st
.nfft
);
215 //KISS_FFT_TMP_FREE(tmpbuf);
217 kf_work
!S(fout
, fin
, 1, in_stride
, st
.factors
.ptr
, st
);
222 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
223 public uint kiss_fft_next_fast_size (uint n
) {
226 while ((m
%2) == 0) m
/= 2;
227 while ((m
%3) == 0) m
/= 3;
228 while ((m
%5) == 0) m
/= 5;
229 if (m
<= 1) break; // n is completely factorable by twos, threes, and fives
236 // ////////////////////////////////////////////////////////////////////////// //
238 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
240 public alias kiss_fftr_cfg_f
= kiss_fftr_state
!float*; ///
241 public alias kiss_fftr_cfg_d
= kiss_fftr_state
!double*; ///
244 public template kiss_fftr_cfg(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
245 alias kiss_fftr_cfg
= kiss_fftr_state
!T
*;
248 struct kiss_fftr_state(S
) if (is(S
== float) ||
is(S
== double) ||
is(S
== real)) {
249 kiss_fft_cfg
!S substate
;
250 kiss_fft_cpx
!S
* tmpbuf
;
251 kiss_fft_cpx
!S
* super_twiddles
;
258 * If you don't care to allocate space, use mem = lenmem = null
260 public kiss_fftr_cfg
!S
kiss_fftr_alloc(S
) (uint nfft
, KissFFT dir
, void* mem
, usize
* lenmem
)
261 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
263 kiss_fftr_cfg
!S st
= null;
264 usize subsize
, memneeded
;
266 if (nfft
&1) return null; //assert(0, "real FFT optimization must be even");
269 kiss_fft_alloc
!S(nfft
, dir
, null, &subsize
);
270 memneeded
= (kiss_fftr_state
!S
).sizeof
+subsize
+(kiss_fft_cpx
!S
).sizeof
*(nfft
*3/2);
272 if (lenmem
is null) {
273 import core
.stdc
.stdlib
: malloc
;
274 st
= cast(kiss_fftr_cfg
!S
)malloc(memneeded
);
276 if (*lenmem
>= memneeded
) st
= cast(kiss_fftr_cfg
!S
)mem
;
279 if (st
is null) return null;
281 st
.substate
= cast(kiss_fft_cfg
!S
)(st
+1); // just beyond kiss_fftr_state struct
282 st
.tmpbuf
= cast(kiss_fft_cpx
!S
*)((cast(ubyte*)st
.substate
)+subsize
);
283 st
.super_twiddles
= st
.tmpbuf
+nfft
;
284 kiss_fft_alloc
!S(nfft
, dir
, st
.substate
, &subsize
);
286 foreach (immutable i
; 0..nfft
/2) {
287 enum pi
= 3.141592653589793238462643383279502884197169399375105820974944;
288 immutable double phase
= -pi
*(cast(double)(i
+1)/nfft
+0.5)*(dir
== KissFFT
.Inverse ?
-1 : 1);
289 //if (dir == KissFFT.Inverse) phase = -phase;
290 mixin(kf_cexp
!("st.super_twiddles+i", "phase"));
297 /** Use this to free `fftr` buffer. */
298 public void kiss_fft_free(S
) (ref kiss_fftr_cfg
!S p
)
299 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
302 import core
.stdc
.stdlib
: free
;
309 /** input timedata has nfft scalar points
310 * output freqdata has nfft/2+1 complex points
312 public void kiss_fftr(S
) (kiss_fftr_cfg
!S st
, const(kiss_fft_scalar
!S
)* timedata
, kiss_fft_cpx
!S
* freqdata
)
313 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
315 // input buffer timedata is stored row-wise
317 kiss_fft_cpx
!S fpnk
, fpk
, f1k
, f2k
, tw
, tdc
;
319 if (st
.substate
.inverse
) assert(0, "kiss fft usage error: improper alloc");
321 ncfft
= st
.substate
.nfft
;
323 // perform the parallel fft of two real signals packed in real,imag
324 kiss_fft
!S(st
.substate
, cast(const(kiss_fft_cpx
!S
)*)timedata
, st
.tmpbuf
);
325 /* The real part of the DC element of the frequency spectrum in st.tmpbuf
326 * contains the sum of the even-numbered elements of the input time sequence
327 * The imag part is the sum of the odd-numbered elements
329 * The sum of tdc.r and tdc.i is the sum of the input time sequence.
330 * yielding DC of input time sequence
331 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
332 * yielding Nyquist bin of input time sequence
335 tdc
.r
= st
.tmpbuf
[0].r
;
336 tdc
.i
= st
.tmpbuf
[0].i
;
337 freqdata
[0].r
= tdc
.r
+tdc
.i
;
338 freqdata
[ncfft
].r
= tdc
.r
-tdc
.i
;
339 freqdata
[ncfft
].i
= freqdata
[0].i
= 0;
341 for (k
= 1; k
<= ncfft
/2; ++k
) {
343 fpnk
.r
= st
.tmpbuf
[ncfft
-k
].r
;
344 fpnk
.i
= -st
.tmpbuf
[ncfft
-k
].i
;
346 mixin(C_ADD
!("f1k", "fpk", "fpnk"));
347 mixin(C_SUB
!("f2k", "fpk", "fpnk"));
348 mixin(C_MUL
!("tw", "f2k", "st.super_twiddles[k-1]"));
350 freqdata
[k
].r
= mixin(HALF_OF
!"f1k.r+tw.r");
351 freqdata
[k
].i
= mixin(HALF_OF
!"f1k.i+tw.i");
352 freqdata
[ncfft
-k
].r
= mixin(HALF_OF
!"f1k.r-tw.r");
353 freqdata
[ncfft
-k
].i
= mixin(HALF_OF
!"tw.i-f1k.i");
358 /** input freqdata has nfft/2+1 complex points
359 * output timedata has nfft scalar points
361 public void kiss_fftri(S
) (kiss_fftr_cfg
!S st
, const(kiss_fft_cpx
!S
)* freqdata
, kiss_fft_scalar
!S
* timedata
)
362 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
364 // input buffer timedata is stored row-wise
365 if (st
.substate
.inverse
== 0) assert(0, "kiss fft usage error: improper alloc");
367 uint ncfft
= st
.substate
.nfft
;
369 st
.tmpbuf
[0].r
= freqdata
[0].r
+freqdata
[ncfft
].r
;
370 st
.tmpbuf
[0].i
= freqdata
[0].r
-freqdata
[ncfft
].r
;
372 foreach (immutable uint k
; 1..ncfft
/2+1) {
373 kiss_fft_cpx
!S fnkc
= void, fek
= void, fok
= void, tmp
= void;
374 kiss_fft_cpx
!S fk
= freqdata
[k
];
375 fnkc
.r
= freqdata
[ncfft
-k
].r
;
376 fnkc
.i
= -freqdata
[ncfft
-k
].i
;
377 mixin(C_ADD
!("fek", "fk", "fnkc"));
378 mixin(C_SUB
!("tmp", "fk", "fnkc"));
379 mixin(C_MUL
!("fok", "tmp", "st.super_twiddles[k-1]"));
380 mixin(C_ADD
!("st.tmpbuf[k]", "fek", "fok"));
381 mixin(C_SUB
!("st.tmpbuf[ncfft-k]", "fek", "fok"));
382 st
.tmpbuf
[ncfft
-k
].i
*= -1;
384 kiss_fft
!S(st
.substate
, st
.tmpbuf
, cast(kiss_fft_cpx
!S
*)timedata
);
388 /** for real ffts, we need an even size */
389 public uint kiss_fftr_next_fast_size_real (uint n
) {
390 pragma(inline
, true);
391 return kiss_fft_next_fast_size((n
+1)>>1)<<1;
395 // ////////////////////////////////////////////////////////////////////////// //
396 enum MAXFACTORS
= 32;
397 /* e.g. an fft of length 128 has 4 factors
398 as far as kissfft is concerned
402 struct kiss_fft_state(S
) if (is(S
== float) ||
is(S
== double) ||
is(S
== real)) {
405 uint[2*MAXFACTORS
] factors
;
406 kiss_fft_cpx
!S
[1] twiddles
;
410 // ////////////////////////////////////////////////////////////////////////// //
411 private void kf_bfly2(S
) (kiss_fft_cpx
!S
* Fout
, in usize fstride
, const(kiss_fft_cfg
!S
) st
, int m
) {
412 kiss_fft_cpx
!S
* Fout2
;
413 const(kiss_fft_cpx
!S
)* tw1
= st
.twiddles
.ptr
;
417 mixin(C_MUL
!("t", "*Fout2", "*tw1"));
419 mixin(C_SUB
!("*Fout2", "*Fout", "t"));
420 mixin(C_ADDTO
!("*Fout", "t"));
427 private void kf_bfly4(S
) (kiss_fft_cpx
!S
* Fout
, in usize fstride
, const(kiss_fft_cfg
!S
) st
, in usize m
) {
428 const(kiss_fft_cpx
!S
)* tw1
, tw2
, tw3
;
429 kiss_fft_cpx
!S
[6] scratch
= void;
431 immutable usize m2
= 2*m
;
432 immutable usize m3
= 3*m
;
433 tw3
= tw2
= tw1
= st
.twiddles
.ptr
;
435 mixin(C_MUL
!("scratch[0]", "Fout[m]", "*tw1"));
436 mixin(C_MUL
!("scratch[1]", "Fout[m2]", "*tw2"));
437 mixin(C_MUL
!("scratch[2]", "Fout[m3]", "*tw3"));
439 mixin(C_SUB
!("scratch[5]", "*Fout", "scratch[1]"));
440 mixin(C_ADDTO
!("*Fout", "scratch[1]"));
441 mixin(C_ADD
!("scratch[3]", "scratch[0]", "scratch[2]"));
442 mixin(C_SUB
!("scratch[4]", "scratch[0]", "scratch[2]"));
443 mixin(C_SUB
!("Fout[m2]", "*Fout", "scratch[3]"));
447 mixin(C_ADDTO
!("*Fout", "scratch[3]"));
450 Fout
[m
].r
= scratch
[5].r
-scratch
[4].i
;
451 Fout
[m
].i
= scratch
[5].i
+scratch
[4].r
;
452 Fout
[m3
].r
= scratch
[5].r
+scratch
[4].i
;
453 Fout
[m3
].i
= scratch
[5].i
-scratch
[4].r
;
455 Fout
[m
].r
= scratch
[5].r
+scratch
[4].i
;
456 Fout
[m
].i
= scratch
[5].i
-scratch
[4].r
;
457 Fout
[m3
].r
= scratch
[5].r
-scratch
[4].i
;
458 Fout
[m3
].i
= scratch
[5].i
+scratch
[4].r
;
465 private void kf_bfly3(S
) (kiss_fft_cpx
!S
* Fout
, in usize fstride
, const(kiss_fft_cfg
!S
) st
, usize m
) {
467 immutable usize m2
= 2*m
;
468 const(kiss_fft_cpx
!S
)* tw1
, tw2
;
469 kiss_fft_cpx
!S
[5] scratch
= void;
471 epi3
= st
.twiddles
[fstride
*m
];
472 tw1
= tw2
= st
.twiddles
.ptr
;
474 mixin(C_MUL
!("scratch[1]", "Fout[m]", "*tw1"));
475 mixin(C_MUL
!("scratch[2]", "Fout[m2]", "*tw2"));
477 mixin(C_ADD
!("scratch[3]", "scratch[1]", "scratch[2]"));
478 mixin(C_SUB
!("scratch[0]", "scratch[1]", "scratch[2]"));
482 Fout
[m
].r
= Fout
.r
-mixin(HALF_OF
!"scratch[3].r");
483 Fout
[m
].i
= Fout
.i
-mixin(HALF_OF
!"scratch[3].i");
485 mixin(C_MULBYSCALAR
!("scratch[0]", "epi3.i"));
487 mixin(C_ADDTO
!("*Fout", "scratch[3]"));
489 Fout
[m2
].r
= Fout
[m
].r
+scratch
[0].i
;
490 Fout
[m2
].i
= Fout
[m
].i
-scratch
[0].r
;
492 Fout
[m
].r
-= scratch
[0].i
;
493 Fout
[m
].i
+= scratch
[0].r
;
500 private void kf_bfly5(S
) (kiss_fft_cpx
!S
* Fout
, in usize fstride
, const(kiss_fft_cfg
!S
) st
, uint m
) {
501 kiss_fft_cpx
!S
* Fout0
, Fout1
, Fout2
, Fout3
, Fout4
;
502 kiss_fft_cpx
!S
[13] scratch
= void;
503 const(kiss_fft_cpx
!S
)* twiddles
= st
.twiddles
.ptr
;
504 const(kiss_fft_cpx
!S
)* tw
;
505 kiss_fft_cpx
!S ya
= twiddles
[fstride
*m
];
506 kiss_fft_cpx
!S yb
= twiddles
[fstride
*2*m
];
514 tw
= st
.twiddles
.ptr
;
515 foreach (immutable uint u
; 0..m
) {
518 mixin(C_MUL
!("scratch[1]", "*Fout1", "tw[u*fstride]"));
519 mixin(C_MUL
!("scratch[2]", "*Fout2", "tw[2*u*fstride]"));
520 mixin(C_MUL
!("scratch[3]", "*Fout3", "tw[3*u*fstride]"));
521 mixin(C_MUL
!("scratch[4]", "*Fout4", "tw[4*u*fstride]"));
523 mixin(C_ADD
!("scratch[7]", "scratch[1]", "scratch[4]"));
524 mixin(C_SUB
!("scratch[10]", "scratch[1]", "scratch[4]"));
525 mixin(C_ADD
!("scratch[8]", "scratch[2]", "scratch[3]"));
526 mixin(C_SUB
!("scratch[9]", "scratch[2]", "scratch[3]"));
528 Fout0
.r
+= scratch
[7].r
+scratch
[8].r
;
529 Fout0
.i
+= scratch
[7].i
+scratch
[8].i
;
531 scratch
[5].r
= scratch
[0].r
+scratch
[7].r
*ya
.r
+scratch
[8].r
*yb
.r
;
532 scratch
[5].i
= scratch
[0].i
+scratch
[7].i
*ya
.r
+scratch
[8].i
*yb
.r
;
534 scratch
[6].r
= scratch
[10].i
*ya
.i
+scratch
[9].i
*yb
.i
;
535 scratch
[6].i
= -scratch
[10].r
*ya
.i
-scratch
[9].r
*yb
.i
;
537 mixin(C_SUB
!("*Fout1", "scratch[5]", "scratch[6]"));
538 mixin(C_ADD
!("*Fout4", "scratch[5]", "scratch[6]"));
540 scratch
[11].r
= scratch
[0].r
+scratch
[7].r
*yb
.r
+scratch
[8].r
*ya
.r
;
541 scratch
[11].i
= scratch
[0].i
+scratch
[7].i
*yb
.r
+scratch
[8].i
*ya
.r
;
542 scratch
[12].r
= -scratch
[10].i
*yb
.i
+scratch
[9].i
*ya
.i
;
543 scratch
[12].i
= scratch
[10].r
*yb
.i
-scratch
[9].r
*ya
.i
;
545 mixin(C_ADD
!("*Fout2", "scratch[11]", "scratch[12]"));
546 mixin(C_SUB
!("*Fout3", "scratch[11]", "scratch[12]"));
556 // perform the butterfly for one stage of a mixed radix FFT
557 private void kf_bfly_generic(S
) (kiss_fft_cpx
!S
* Fout
, in usize fstride
, const(kiss_fft_cfg
!S
) st
, uint m
, uint p
) {
558 import core
.stdc
.stdlib
: alloca
;
561 const(kiss_fft_cpx
!S
)* twiddles
= st
.twiddles
.ptr
;
563 uint Norig
= st
.nfft
;
565 //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
566 kiss_fft_cpx
!S
* scratch
= cast(kiss_fft_cpx
!S
*)alloca((kiss_fft_cpx
!S
).sizeof
*p
);
568 foreach (immutable uint u
; 0..m
) {
570 foreach (immutable uint q1
; 0..p
) {
571 scratch
[q1
] = Fout
[k
];
576 foreach (immutable uint q1
; 0..p
) {
578 Fout
[k
] = scratch
[0];
579 foreach (immutable uint q
; 1..p
) {
581 if (twidx
>= Norig
) twidx
-= Norig
;
582 mixin(C_MUL
!("t", "scratch[q]", "twiddles[twidx]"));
583 mixin(C_ADDTO
!("Fout[k]", "t"));
588 //KISS_FFT_TMP_FREE(scratch);
592 private void kf_work(S
) (kiss_fft_cpx
!S
* Fout
, const(kiss_fft_cpx
!S
)* f
, in usize fstride
, uint in_stride
, uint* factors
, const(kiss_fft_cfg
!S
) st
) {
593 kiss_fft_cpx
!S
* Fout_beg
= Fout
;
594 immutable uint p
= *factors
++; // the radix
595 immutable uint m
= *factors
++; // stage's fft length/p
596 const(kiss_fft_cpx
!S
)* Fout_end
= Fout
+p
*m
;
598 version(kiss_fft_use_parallel
) {
599 // use threads at the top-level (not recursive)
600 import std
.parallelism
;
601 import std
.range
: iota
;
602 if (fstride
== 1 && p
<= 5) {
603 // execute the p different work units in different threads
604 foreach (uint k
; parallel(iota(p
))) {
605 kf_work
!S(Fout
+k
*m
, f
+fstride
*in_stride
*k
, fstride
*p
, in_stride
, factors
, st
);
607 // all threads have joined by this point
609 case 2: kf_bfly2
!S(Fout
, fstride
, st
, m
); break;
610 case 3: kf_bfly3
!S(Fout
, fstride
, st
, m
); break;
611 case 4: kf_bfly4
!S(Fout
, fstride
, st
, m
); break;
612 case 5: kf_bfly5
!S(Fout
, fstride
, st
, m
); break;
613 default: kf_bfly_generic
!S(Fout
, fstride
, st
, m
, p
); break;
622 f
+= fstride
*in_stride
;
623 } while (++Fout
!is Fout_end
);
627 // DFT of size m*p performed by doing
628 // p instances of smaller DFTs of size m,
629 // each one takes a decimated version of the input
630 kf_work
!S(Fout
, f
, fstride
*p
, in_stride
, factors
, st
);
631 f
+= fstride
*in_stride
;
632 } while ((Fout
+= m
) !is Fout_end
);
637 // recombine the p smaller DFTs
639 case 2: kf_bfly2
!S(Fout
, fstride
, st
, m
); break;
640 case 3: kf_bfly3
!S(Fout
, fstride
, st
, m
); break;
641 case 4: kf_bfly4
!S(Fout
, fstride
, st
, m
); break;
642 case 5: kf_bfly5
!S(Fout
, fstride
, st
, m
); break;
643 default: kf_bfly_generic
!S(Fout
, fstride
, st
, m
, p
); break;
648 /* facbuf is populated by p1, m1, p2, m2, ...
653 private void kf_factor(S
) (uint n
, uint* facbuf
) {
654 import std
.math
: floor
, sqrt
;
655 immutable double floor_sqrt
= floor(sqrt(cast(double)n
));
657 // factor out powers of 4, powers of 2, then any remaining primes
661 case 4: p
= 2; break;
662 case 2: p
= 3; break;
663 default: p
+= 2; break;
665 if (p
> floor_sqrt
) p
= n
; // no more factors, skip to end
674 // ////////////////////////////////////////////////////////////////////////// //
677 Explanation of macros dealing with complex math:
679 C_MUL(m,a,b) : m = a*b
680 C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
681 C_SUB( res, a,b) : res = a-b
682 C_SUBFROM( res , a) : res -= a
683 C_ADDTO( res , a) : res += a
685 //enum S_MUL(string a, string b) = "(("~a~")*("~b~"))";
686 enum C_MUL(string m
, string a
, string b
) =
687 "{ ("~m
~").r = ("~a
~").r*("~b
~").r-("~a
~").i*("~b
~").i;
688 ("~m
~").i = ("~a
~").r*("~b
~").i+("~a
~").i*("~b
~").r; }";
689 enum C_MULBYSCALAR(string c
, string s
) = " { ("~c
~").r *= ("~s
~"); ("~c
~").i *= ("~s
~"); }";
691 enum C_ADD(string res
, string a
, string b
) = "{ ("~res
~").r=("~a
~").r+("~b
~").r; ("~res
~").i=("~a
~").i+("~b
~").i; }";
692 enum C_SUB(string res
, string a
, string b
) = "{ ("~res
~").r=("~a
~").r-("~b
~").r; ("~res
~").i=("~a
~").i-("~b
~").i; }";
693 enum C_ADDTO(string res
, string a
) = "{ ("~res
~").r += ("~a
~").r; ("~res
~").i += ("~a
~").i; }";
694 enum C_SUBFROM(string res
, string a
) = "{ ("~res
~").r -= ("~a
~").r; ("~res
~").i -= ("~a
~").i; }";
697 //kiss_fft_scalar KISS_FFT_COS(T) (T phase) { import std.math : cos; return cos(phase); }
698 //kiss_fft_scalar KISS_FFT_SIN(T) (T phase) { import std.math : sin; return sin(phase); }
699 //kiss_fft_scalar HALF_OF (kiss_fft_scalar x) { return x*cast(kiss_fft_scalar)0.5; }
700 enum HALF_OF(string x
) = "(cast(kiss_fft_scalar!S)(("~x
~")*cast(kiss_fft_scalar!S)0.5))";
702 //enum kf_cexp(string x, string phase) = "{ ("~x~").r = KISS_FFT_COS("~phase~"); ("~x~").i = KISS_FFT_SIN("~phase~"); }";
703 enum kf_cexp(string x
, string phase
) = "{ import std.math : cos, sin; ("~x
~").r = cos("~phase
~"); ("~x
~").i = sin("~phase
~"); }";
707 #ifdef KISS_FFT_USE_ALLOCA
708 // define this to allow use of alloca instead of malloc for temporary buffers
709 // Temporary buffers are used in two case:
710 // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
711 // 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
713 #define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
714 #define KISS_FFT_TMP_FREE(ptr)
716 #define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
717 #define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
722 // ////////////////////////////////////////////////////////////////////////// //
724 public alias kiss_fftnd_cfg_f
= kiss_fftnd_state
!float*; ///
725 public alias kiss_fftnd_cfg_d
= kiss_fftnd_state
!double*; ///
727 public template kiss_fftnd_cfg(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
728 alias kiss_fftnd_cfg
= kiss_fftnd_state
!T
*;
731 struct kiss_fftnd_state(S
)
732 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
734 uint dimprod
; /* dimsum would be mighty tasty right now */
737 kiss_fft_cfg
!S
* states
; /* cfg states for each dimension */
738 kiss_fft_cpx
!S
* tmpbuf
; /* buffer capable of hold the entire input */
743 public kiss_fftnd_cfg
!S
kiss_fftnd_alloc(S
) (const(uint)[] dims
, KissFFT dir
, void* mem
=null, usize
* lenmem
=null)
744 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
746 import core
.stdc
.stdlib
: malloc
;
748 if (dims
.length
< 1 || dims
.length
> ushort.max
) return null;
749 immutable uint ndims
= cast(uint)dims
.length
;
750 kiss_fftnd_cfg
!S st
= null;
752 usize memneeded
= (kiss_fftnd_state
!S
).sizeof
;
755 foreach (immutable uint i
; 0..ndims
) {
757 kiss_fft_alloc
!S(dims
[i
], dir
, null, &sublen
);
758 memneeded
+= sublen
; /* st.states[i] */
761 memneeded
+= int.sizeof
*ndims
;/* st.dims */
762 memneeded
+= (void*).sizeof
*ndims
;/* st.states */
763 memneeded
+= (kiss_fft_cpx
!S
).sizeof
*dimprod
; /* st.tmpbuf */
765 if (lenmem
is null) {
766 /* allocate for the caller */
767 st
= cast(kiss_fftnd_cfg
!S
)malloc(memneeded
);
769 /* initialize supplied buffer if big enough */
770 if (*lenmem
>= memneeded
) st
= cast(kiss_fftnd_cfg
!S
)mem
;
771 *lenmem
= memneeded
; /* tell caller how big struct is (or would be) */
773 if (st
is null) return null; /* malloc failed or buffer too small */
775 st
.dimprod
= dimprod
;
777 ptr
= cast(ubyte*)(st
+1);
779 st
.states
= cast(kiss_fft_cfg
!S
*)ptr
;
780 ptr
+= (void*).sizeof
*ndims
;
782 st
.dims
= cast(uint*)ptr
;
783 ptr
+= int.sizeof
*ndims
;
785 st
.tmpbuf
= cast(kiss_fft_cpx
!S
*)ptr
;
786 ptr
+= (kiss_fft_cpx
!S
).sizeof
*dimprod
;
788 foreach (immutable uint i
; 0..ndims
) {
790 st
.dims
[i
] = dims
[i
];
791 kiss_fft_alloc
!S(st
.dims
[i
], dir
, null, &len
);
792 st
.states
[i
] = kiss_fft_alloc
!S(st
.dims
[i
], dir
, ptr
, &len
);
801 public void kiss_fftnd_free(S
) (ref kiss_fftnd_cfg
!S p
)
802 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
805 import core
.stdc
.stdlib
: free
;
813 This works by tackling one dimension at a time.
816 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
817 A Di-sized fft is taken of each column, transposing the matrix as it goes.
819 Here's a 3-d example:
820 Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
821 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
822 [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
824 Stage 0 ( D=2): treat the buffer as a 2x12 matrix
828 FFT each column with size 2.
829 Transpose the matrix at the same time using kiss_fft_stride.
837 Note fft([x y]) == [x+y x-y]
839 Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
840 [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
841 [ e+q e-q f+r f-r g+s g-s h+t h-t ]
842 [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
844 And perform FFTs (size=3) on each of the columns as above, transposing
845 the matrix as it goes. The output of stage 1 is
846 (Legend: ap = [ a+m e+q i+u ]
847 am = [ a-m e-q i-u ] )
849 [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
850 [ sum(am) fft(am)[0] fft(am)[1] ]
851 [ sum(bp) fft(bp)[0] fft(bp)[1] ]
852 [ sum(bm) fft(bm)[0] fft(bm)[1] ]
853 [ sum(cp) fft(cp)[0] fft(cp)[1] ]
854 [ sum(cm) fft(cm)[0] fft(cm)[1] ]
855 [ sum(dp) fft(dp)[0] fft(dp)[1] ]
856 [ sum(dm) fft(dm)[0] fft(dm)[1] ] ]
858 Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
859 [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
860 [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
861 [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
862 [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ]
864 Then FFTs each column, transposing as it goes.
866 The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
868 Note as a sanity check that the first element of the final
869 stage's output (DC term) is
870 sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
871 , i.e. the summation of all 24 input elements.
874 public void kiss_fftnd(S
) (kiss_fftnd_cfg
!S st
, const(kiss_fft_cpx
!S
)* fin
, kiss_fft_cpx
!S
* fout
)
875 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
877 import core
.stdc
.string
: memcpy
;
879 const(kiss_fft_cpx
!S
)* bufin
= fin
;
880 kiss_fft_cpx
!S
* bufout
;
882 /* arrange it so the last bufout == fout */
886 memcpy(st
.tmpbuf
, fin
, (kiss_fft_cpx
!S
).sizeof
*st
.dimprod
);
893 foreach (immutable uint k
; 0..st
.ndims
) {
894 uint curdim
= st
.dims
[k
];
895 uint stride
= st
.dimprod
/curdim
;
897 foreach (immutable uint i
; 0..stride
) {
898 kiss_fft_stride
!S(st
.states
[k
], bufin
+i
, bufout
+i
*curdim
, stride
);
901 /* toggle back and forth between the two buffers */
902 if (bufout
is st
.tmpbuf
) {
913 // ////////////////////////////////////////////////////////////////////////// //
915 public alias kiss_fftndr_cfg_f
= kiss_fftndr_state
!float*; ///
916 public alias kiss_fftndr_cfg_d
= kiss_fftndr_state
!double*; ///
918 public template kiss_fftndr_cfg(T
) if (is(T
== float) ||
is(T
== double) ||
is(T
== real)) {
919 alias kiss_fftndr_cfg
= kiss_fftndr_state
!T
*;
922 struct kiss_fftndr_state(S
)
923 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
927 kiss_fftr_cfg
!S cfg_r
;
928 kiss_fftnd_cfg
!S cfg_nd
;
933 int prod (const(uint)[] dims
) pure nothrow @trusted @nogc {
935 uint ndims
= cast(uint)dims
.length
;
936 const(uint)* dp
= dims
.ptr
;
937 while (ndims
--) x
*= *dp
++;
941 T
MAX(T
) (in T a
, in T b
) pure nothrow @safe @nogc { pragma(inline
, true); return (a
> b ? a
: b
); }
947 If you don't care to allocate space, use mem = lenmem = null
949 public kiss_fftndr_cfg
!S
kiss_fftndr_alloc(S
) (const(uint)[] dims
, KissFFT dir
, void* mem
=null, usize
* lenmem
=null)
950 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
952 import core
.stdc
.stdlib
: malloc
, free
;
953 import core
.stdc
.string
: memset
;
955 if (dims
.length
< 1 || dims
.length
> ushort.max
) return null;
956 immutable uint ndims
= cast(uint)dims
.length
;
958 kiss_fftndr_cfg
!S st
= null;
959 usize nr
= 0, nd
= 0, ntmp
= 0;
960 uint dimReal
= dims
[ndims
-1];
961 uint dimOther
= prod(dims
[0..$-1]);
964 kiss_fftr_alloc
!S(dimReal
, dir
, null, &nr
);
965 kiss_fftnd_alloc
!S(dims
[0..$-1], dir
, null, &nd
);
967 MAX(2*dimOther
, dimReal
+2)*(kiss_fft_scalar
!S
).sizeof
+ // freq buffer for one pass
968 dimOther
*(dimReal
+2)*(kiss_fft_scalar
!S
).sizeof
; // large enough to hold entire input in case of in-place
970 memneeded
= (kiss_fftndr_state
!S
).sizeof
+nr
+nd
+ntmp
;
972 bool malloced
= false;
973 if (lenmem
is null) {
974 st
= cast(kiss_fftndr_cfg
!S
)malloc(memneeded
);
977 if (*lenmem
>= memneeded
) st
= cast(kiss_fftndr_cfg
!S
)mem
;
980 if (st
is null) return null;
982 memset(st
, 0, memneeded
);
984 st
.dimReal
= dimReal
;
985 st
.dimOther
= dimOther
;
986 st
.cfg_r
= kiss_fftr_alloc
!S(dimReal
, dir
, st
+1, &nr
);
987 if (st
.cfg_r
is null) { if (malloced
) free(st
); return null; }
988 st
.cfg_nd
= kiss_fftnd_alloc
!S(dims
[0..$-1], dir
, (cast(ubyte*)st
.cfg_r
)+nr
, &nd
);
989 if (st
.cfg_nd
is null) { if (malloced
) free(st
); return null; }
990 st
.tmpbuf
= cast(ubyte*)st
.cfg_nd
+nd
;
991 assert(st
.tmpbuf
!is null);
998 public void kiss_fftndr_free(S
) (ref kiss_fftndr_cfg
!S p
)
999 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
1002 import core
.stdc
.stdlib
: free
;
1010 input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points
1011 output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points
1013 public void kiss_fftndr(S
) (kiss_fftndr_cfg
!S st
, const(kiss_fft_scalar
!S
)* timedata
, kiss_fft_cpx
!S
* freqdata
)
1014 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
1016 uint dimReal
= st
.dimReal
;
1017 uint dimOther
= st
.dimOther
;
1018 uint nrbins
= dimReal
/2+1;
1020 kiss_fft_cpx
!S
* tmp1
= cast(kiss_fft_cpx
!S
*)st
.tmpbuf
;
1021 kiss_fft_cpx
!S
* tmp2
= tmp1
+MAX(nrbins
, dimOther
);
1022 //assert(tmp1 !is null);
1024 // timedata is N0 x N1 x ... x Nk real
1026 // take a real chunk of data, fft it and place the output at correct intervals
1027 foreach (immutable uint k1
; 0..dimOther
) {
1028 kiss_fftr
!S(st
.cfg_r
, timedata
+k1
*dimReal
, tmp1
); // tmp1 now holds nrbins complex points
1029 foreach (immutable uint k2
; 0..nrbins
) tmp2
[k2
*dimOther
+k1
] = tmp1
[k2
];
1032 foreach (immutable uint k2
; 0..nrbins
) {
1033 kiss_fftnd
!S(st
.cfg_nd
, tmp2
+k2
*dimOther
, tmp1
); // tmp1 now holds dimOther complex points
1034 foreach (immutable uint k1
; 0..dimOther
) freqdata
[k1
*nrbins
+k2
] = tmp1
[k1
];
1040 input and output dimensions are the exact opposite of kiss_fftndr
1042 public void kiss_fftndri(S
) (kiss_fftndr_cfg
!S st
, const(kiss_fft_cpx
!S
)* freqdata
, kiss_fft_scalar
!S
* timedata
)
1043 if (is(S
== float) ||
is(S
== double) ||
is(S
== real))
1045 int dimReal
= st
.dimReal
;
1046 int dimOther
= st
.dimOther
;
1047 int nrbins
= dimReal
/2+1;
1048 kiss_fft_cpx
!S
* tmp1
= cast(kiss_fft_cpx
!S
*)st
.tmpbuf
;
1049 kiss_fft_cpx
!S
* tmp2
= tmp1
+MAX(nrbins
, dimOther
);
1051 foreach (immutable uint k2
; 0..nrbins
) {
1052 foreach (immutable uint k1
; 0..dimOther
) tmp1
[k1
] = freqdata
[k1
*(nrbins
)+k2
];
1053 kiss_fftnd
!S(st
.cfg_nd
, tmp1
, tmp2
+k2
*dimOther
);
1056 foreach (immutable uint k1
; 0..dimOther
) {
1057 foreach (immutable uint k2
; 0..nrbins
) tmp1
[k2
] = tmp2
[k2
*dimOther
+k1
];
1058 kiss_fftri
!S(st
.cfg_r
, tmp1
, timedata
+k1
*dimReal
);