iv.nanovega.textlayouter: more API!
[iv.d.git] / kissfft.d
blob27544f22c2166406c6fee73ed52f5c690189f03c
1 /*
2 * Copyright (c) 2003-2010, Mark Borgerding
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the
16 * distribution.
18 * * Neither the author nor the names of any contributors may be used to
19 * endorse or promote products derived from this software without
20 * specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 /* Ported by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
35 * Understanding is not required. Only obedience.
37 * This program is free software: you can redistribute it and/or modify
38 * it under the terms of the GNU General Public License as published by
39 * the Free Software Foundation, either version 3 of the License, or
40 * (at your option) any later version.
42 * This program is distributed in the hope that it will be useful,
43 * but WITHOUT ANY WARRANTY; without even the implied warranty of
44 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 * GNU General Public License for more details.
47 * You should have received a copy of the GNU General Public License
48 * along with this program. If not, see <http://www.gnu.org/licenses/>.
50 module iv.kissfft;
51 private nothrow @trusted @nogc:
53 version(aliced) {} else alias usize = size_t;
55 //version = kiss_fft_use_parallel; // absolutely no reason to
58 // ////////////////////////////////////////////////////////////////////////// //
59 ///
60 public enum KissFFT { Forward, Inverse }
62 ///
63 //public alias kiss_fft_scalar = float;
64 //public alias kiss_fft_scalar = double;
66 ///
67 public template kiss_fft_scalar(T) if (is(T == float) || is(T == double) || is(T == real)) {
68 alias kiss_fft_scalar = T;
72 ///
73 public template isGoodKissFFTScalar(T) {
74 static if (is(T == float) || is(T == double) || is(T == real)) {
75 enum isGoodKissFFTScalar = true;
76 } else {
77 enum isGoodKissFFTScalar = false;
82 ///
83 public template isKissFFTComplex(T) {
84 static if (is(T : kiss_fft_cpx!S, S)) {
85 enum isKissFFTComplex = true;
86 } else {
87 enum isKissFFTComplex = false;
92 ///
93 public template KissFFTScalar(T) {
94 static if (is(T : kiss_fft_cpx!S, S)) {
95 alias KissFFTScalar = S;
96 } else {
97 static assert(0, "not a KissFFT complex type");
102 public align(1) struct kiss_fft_cpx(T) if (is(T == float) || is(T == double) || is(T == real)) {
103 align(1):
104 alias Scalar = T;
105 T r;
106 T i;
108 T opIndex (uint n) const pure nothrow @trusted @nogc { pragma(inline, true); return (n ? i : r); }
109 void opIndexAssign (in T v, uint n) nothrow @trusted @nogc { pragma(inline, true); if (n) i = v; else r = v; }
113 public alias kiss_fft_cpx_f = kiss_fft_cpx!float; ///
114 public alias kiss_fft_cpx_d = kiss_fft_cpx!double; ///
117 public alias kiss_fft_cfg_f = kiss_fft_state!float*; ///
118 public alias kiss_fft_cfg_d = kiss_fft_state!double*; ///
120 public template kiss_fft_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
121 alias kiss_fft_cfg = kiss_fft_state!T*;
125 // ////////////////////////////////////////////////////////////////////////// //
126 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
128 * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, KissFFT.Forward);`
130 * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
132 * If `lenmem` is `null`, then kiss_fft_alloc will allocate a cfg buffer using `malloc`.
133 * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
135 * The state can be placed in a user supplied buffer `mem`:
136 * If `lenmem` is not `null` and `mem` is not `null` and `*lenmem` is large enough,
137 * then the function places the cfg in `mem` and the size used in `*lenmem`,
138 * and returns `mem`.
140 * If `lenmem` is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
141 * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
143 public kiss_fft_cfg!S kiss_fft_alloc(S) (uint nfft, KissFFT dir, void* mem=null, usize* lenmem=null)
144 if (is(S == float) || is(S == double) || is(S == real))
146 kiss_fft_cfg!S st = null;
147 usize memneeded = (kiss_fft_state!S).sizeof+(kiss_fft_cpx!S).sizeof*(nfft-1); // twiddle factors
149 if (lenmem is null) {
150 import core.stdc.stdlib : malloc;
151 st = cast(kiss_fft_cfg!S)malloc(memneeded);
152 } else {
153 if (mem !is null && *lenmem >= memneeded) st = cast(kiss_fft_cfg!S)mem;
154 *lenmem = memneeded;
157 if (st !is null) {
158 st.nfft = nfft;
159 st.inverse = (dir == KissFFT.Inverse);
160 foreach (immutable uint i; 0..nfft) {
161 enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
162 immutable double phase = -2*pi*i/nfft*(st.inverse ? -1 : 1);
163 //if (st.inverse) phase = -phase;
164 mixin(kf_cexp!("st.twiddles.ptr+i", "phase"));
166 kf_factor!S(nfft, st.factors.ptr);
169 return st;
173 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
174 * buffer and can be simply free()d when no longer needed
176 public void kiss_fft_free(S) (ref kiss_fft_cfg!S p)
177 if (is(S == float) || is(S == double) || is(S == real))
179 if (p !is null) {
180 import core.stdc.stdlib : free;
181 free(p);
182 p = null;
187 /** Perform an FFT on a complex input buffer.
189 * for a forward FFT,
190 * fin should be f[0] , f[1] , ... ,f[nfft-1]
191 * fout will be F[0] , F[1] , ... ,F[nfft-1]
192 * Note that each element is complex and can be accessed like f[k].r and f[k].i
194 public void kiss_fft(S) (kiss_fft_cfg!S cfg, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
195 if (is(S == float) || is(S == double) || is(S == real))
197 assert(cfg !is null);
198 kiss_fft_stride!S(cfg, fin, fout, 1);
202 /** A more generic version of the above function. It reads its input from every Nth sample. */
203 public void kiss_fft_stride(S) (kiss_fft_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout, uint in_stride)
204 if (is(S == float) || is(S == double) || is(S == real))
206 import core.stdc.stdlib : alloca;
207 assert(st !is null);
208 if (fin is fout) {
209 import core.stdc.string : memcpy;
210 //NOTE: this is not really an in-place FFT algorithm.
211 //It just performs an out-of-place FFT into a temp buffer
212 //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
213 kiss_fft_cpx!S* tmpbuf = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*st.nfft);
214 kf_work!S(tmpbuf, fin, 1, in_stride, st.factors.ptr, st);
215 memcpy(fout, tmpbuf, (kiss_fft_cpx!S).sizeof*st.nfft);
216 //KISS_FFT_TMP_FREE(tmpbuf);
217 } else {
218 kf_work!S(fout, fin, 1, in_stride, st.factors.ptr, st);
223 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
224 public uint kiss_fft_next_fast_size (uint n) {
225 for (;;) {
226 uint m = n;
227 while ((m%2) == 0) m /= 2;
228 while ((m%3) == 0) m /= 3;
229 while ((m%5) == 0) m /= 5;
230 if (m <= 1) break; // n is completely factorable by twos, threes, and fives
231 ++n;
233 return n;
237 // ////////////////////////////////////////////////////////////////////////// //
238 // kissfftr
239 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
241 public alias kiss_fftr_cfg_f = kiss_fftr_state!float*; ///
242 public alias kiss_fftr_cfg_d = kiss_fftr_state!double*; ///
245 public template kiss_fftr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
246 alias kiss_fftr_cfg = kiss_fftr_state!T*;
249 struct kiss_fftr_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
250 kiss_fft_cfg!S substate;
251 kiss_fft_cpx!S* tmpbuf;
252 kiss_fft_cpx!S* super_twiddles;
257 ** nfft must be even
259 * If you don't care to allocate space, use mem = lenmem = null
261 public kiss_fftr_cfg!S kiss_fftr_alloc(S) (uint nfft, KissFFT dir, void* mem, usize* lenmem)
262 if (is(S == float) || is(S == double) || is(S == real))
264 kiss_fftr_cfg!S st = null;
265 usize subsize, memneeded;
267 if (nfft&1) return null; //assert(0, "real FFT optimization must be even");
268 nfft >>= 1;
270 kiss_fft_alloc!S(nfft, dir, null, &subsize);
271 memneeded = (kiss_fftr_state!S).sizeof+subsize+(kiss_fft_cpx!S).sizeof*(nfft*3/2);
273 if (lenmem is null) {
274 import core.stdc.stdlib : malloc;
275 st = cast(kiss_fftr_cfg!S)malloc(memneeded);
276 } else {
277 if (*lenmem >= memneeded) st = cast(kiss_fftr_cfg!S)mem;
278 *lenmem = memneeded;
280 if (st is null) return null;
282 st.substate = cast(kiss_fft_cfg!S)(st+1); // just beyond kiss_fftr_state struct
283 st.tmpbuf = cast(kiss_fft_cpx!S*)((cast(ubyte*)st.substate)+subsize);
284 st.super_twiddles = st.tmpbuf+nfft;
285 kiss_fft_alloc!S(nfft, dir, st.substate, &subsize);
287 foreach (immutable i; 0..nfft/2) {
288 enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
289 immutable double phase = -pi*(cast(double)(i+1)/nfft+0.5)*(dir == KissFFT.Inverse ? -1 : 1);
290 //if (dir == KissFFT.Inverse) phase = -phase;
291 mixin(kf_cexp!("st.super_twiddles+i", "phase"));
294 return st;
298 /** Use this to free `fftr` buffer. */
299 public void kiss_fft_free(S) (ref kiss_fftr_cfg!S p)
300 if (is(S == float) || is(S == double) || is(S == real))
302 if (p !is null) {
303 import core.stdc.stdlib : free;
304 free(p);
305 p = null;
310 /** input timedata has nfft scalar points
311 * output freqdata has nfft/2+1 complex points
313 public void kiss_fftr(S) (kiss_fftr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
314 if (is(S == float) || is(S == double) || is(S == real))
316 // input buffer timedata is stored row-wise
317 uint k, ncfft;
318 kiss_fft_cpx!S fpnk, fpk, f1k, f2k, tw, tdc;
320 if (st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
322 ncfft = st.substate.nfft;
324 // perform the parallel fft of two real signals packed in real,imag
325 kiss_fft!S(st.substate, cast(const(kiss_fft_cpx!S)*)timedata, st.tmpbuf);
326 /* The real part of the DC element of the frequency spectrum in st.tmpbuf
327 * contains the sum of the even-numbered elements of the input time sequence
328 * The imag part is the sum of the odd-numbered elements
330 * The sum of tdc.r and tdc.i is the sum of the input time sequence.
331 * yielding DC of input time sequence
332 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
333 * yielding Nyquist bin of input time sequence
336 tdc.r = st.tmpbuf[0].r;
337 tdc.i = st.tmpbuf[0].i;
338 freqdata[0].r = tdc.r+tdc.i;
339 freqdata[ncfft].r = tdc.r-tdc.i;
340 freqdata[ncfft].i = freqdata[0].i = 0;
342 for (k = 1; k <= ncfft/2; ++k) {
343 fpk = st.tmpbuf[k];
344 fpnk.r = st.tmpbuf[ncfft-k].r;
345 fpnk.i = -st.tmpbuf[ncfft-k].i;
347 mixin(C_ADD!("f1k", "fpk", "fpnk"));
348 mixin(C_SUB!("f2k", "fpk", "fpnk"));
349 mixin(C_MUL!("tw", "f2k", "st.super_twiddles[k-1]"));
351 freqdata[k].r = mixin(HALF_OF!"f1k.r+tw.r");
352 freqdata[k].i = mixin(HALF_OF!"f1k.i+tw.i");
353 freqdata[ncfft-k].r = mixin(HALF_OF!"f1k.r-tw.r");
354 freqdata[ncfft-k].i = mixin(HALF_OF!"tw.i-f1k.i");
359 /** input freqdata has nfft/2+1 complex points
360 * output timedata has nfft scalar points
362 public void kiss_fftri(S) (kiss_fftr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
363 if (is(S == float) || is(S == double) || is(S == real))
365 // input buffer timedata is stored row-wise
366 if (st.substate.inverse == 0) assert(0, "kiss fft usage error: improper alloc");
368 uint ncfft = st.substate.nfft;
370 st.tmpbuf[0].r = freqdata[0].r+freqdata[ncfft].r;
371 st.tmpbuf[0].i = freqdata[0].r-freqdata[ncfft].r;
373 foreach (immutable uint k; 1..ncfft/2+1) {
374 kiss_fft_cpx!S fnkc = void, fek = void, fok = void, tmp = void;
375 kiss_fft_cpx!S fk = freqdata[k];
376 fnkc.r = freqdata[ncfft-k].r;
377 fnkc.i = -freqdata[ncfft-k].i;
378 mixin(C_ADD!("fek", "fk", "fnkc"));
379 mixin(C_SUB!("tmp", "fk", "fnkc"));
380 mixin(C_MUL!("fok", "tmp", "st.super_twiddles[k-1]"));
381 mixin(C_ADD!("st.tmpbuf[k]", "fek", "fok"));
382 mixin(C_SUB!("st.tmpbuf[ncfft-k]", "fek", "fok"));
383 st.tmpbuf[ncfft-k].i *= -1;
385 kiss_fft!S(st.substate, st.tmpbuf, cast(kiss_fft_cpx!S*)timedata);
389 /** for real ffts, we need an even size */
390 public uint kiss_fftr_next_fast_size_real (uint n) {
391 pragma(inline, true);
392 return kiss_fft_next_fast_size((n+1)>>1)<<1;
396 // ////////////////////////////////////////////////////////////////////////// //
397 enum MAXFACTORS = 32;
398 /* e.g. an fft of length 128 has 4 factors
399 as far as kissfft is concerned
400 4*4*4*2
403 struct kiss_fft_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
404 uint nfft;
405 uint inverse;
406 uint[2*MAXFACTORS] factors;
407 kiss_fft_cpx!S[1] twiddles;
411 // ////////////////////////////////////////////////////////////////////////// //
412 private void kf_bfly2(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, int m) {
413 kiss_fft_cpx!S* Fout2;
414 const(kiss_fft_cpx!S)* tw1 = st.twiddles.ptr;
415 kiss_fft_cpx!S t;
416 Fout2 = Fout+m;
417 do {
418 mixin(C_MUL!("t", "*Fout2", "*tw1"));
419 tw1 += fstride;
420 mixin(C_SUB!("*Fout2", "*Fout", "t"));
421 mixin(C_ADDTO!("*Fout", "t"));
422 ++Fout2;
423 ++Fout;
424 } while (--m);
428 private void kf_bfly4(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, in usize m) {
429 const(kiss_fft_cpx!S)* tw1, tw2, tw3;
430 kiss_fft_cpx!S[6] scratch = void;
431 usize k = m;
432 immutable usize m2 = 2*m;
433 immutable usize m3 = 3*m;
434 tw3 = tw2 = tw1 = st.twiddles.ptr;
435 do {
436 mixin(C_MUL!("scratch[0]", "Fout[m]", "*tw1"));
437 mixin(C_MUL!("scratch[1]", "Fout[m2]", "*tw2"));
438 mixin(C_MUL!("scratch[2]", "Fout[m3]", "*tw3"));
440 mixin(C_SUB!("scratch[5]", "*Fout", "scratch[1]"));
441 mixin(C_ADDTO!("*Fout", "scratch[1]"));
442 mixin(C_ADD!("scratch[3]", "scratch[0]", "scratch[2]"));
443 mixin(C_SUB!("scratch[4]", "scratch[0]", "scratch[2]"));
444 mixin(C_SUB!("Fout[m2]", "*Fout", "scratch[3]"));
445 tw1 += fstride;
446 tw2 += fstride*2;
447 tw3 += fstride*3;
448 mixin(C_ADDTO!("*Fout", "scratch[3]"));
450 if (st.inverse) {
451 Fout[m].r = scratch[5].r-scratch[4].i;
452 Fout[m].i = scratch[5].i+scratch[4].r;
453 Fout[m3].r = scratch[5].r+scratch[4].i;
454 Fout[m3].i = scratch[5].i-scratch[4].r;
455 } else {
456 Fout[m].r = scratch[5].r+scratch[4].i;
457 Fout[m].i = scratch[5].i-scratch[4].r;
458 Fout[m3].r = scratch[5].r-scratch[4].i;
459 Fout[m3].i = scratch[5].i+scratch[4].r;
461 ++Fout;
462 } while (--k);
466 private void kf_bfly3(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, usize m) {
467 usize k = m;
468 immutable usize m2 = 2*m;
469 const(kiss_fft_cpx!S)* tw1, tw2;
470 kiss_fft_cpx!S[5] scratch = void;
471 kiss_fft_cpx!S epi3;
472 epi3 = st.twiddles[fstride*m];
473 tw1 = tw2 = st.twiddles.ptr;
474 do {
475 mixin(C_MUL!("scratch[1]", "Fout[m]", "*tw1"));
476 mixin(C_MUL!("scratch[2]", "Fout[m2]", "*tw2"));
478 mixin(C_ADD!("scratch[3]", "scratch[1]", "scratch[2]"));
479 mixin(C_SUB!("scratch[0]", "scratch[1]", "scratch[2]"));
480 tw1 += fstride;
481 tw2 += fstride*2;
483 Fout[m].r = Fout.r-mixin(HALF_OF!"scratch[3].r");
484 Fout[m].i = Fout.i-mixin(HALF_OF!"scratch[3].i");
486 mixin(C_MULBYSCALAR!("scratch[0]", "epi3.i"));
488 mixin(C_ADDTO!("*Fout", "scratch[3]"));
490 Fout[m2].r = Fout[m].r+scratch[0].i;
491 Fout[m2].i = Fout[m].i-scratch[0].r;
493 Fout[m].r -= scratch[0].i;
494 Fout[m].i += scratch[0].r;
496 ++Fout;
497 } while (--k);
501 private void kf_bfly5(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m) {
502 kiss_fft_cpx!S* Fout0, Fout1, Fout2, Fout3, Fout4;
503 kiss_fft_cpx!S[13] scratch = void;
504 const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
505 const(kiss_fft_cpx!S)* tw;
506 kiss_fft_cpx!S ya = twiddles[fstride*m];
507 kiss_fft_cpx!S yb = twiddles[fstride*2*m];
509 Fout0 = Fout;
510 Fout1 = Fout0+m;
511 Fout2 = Fout0+2*m;
512 Fout3 = Fout0+3*m;
513 Fout4 = Fout0+4*m;
515 tw = st.twiddles.ptr;
516 foreach (immutable uint u; 0..m) {
517 scratch[0] = *Fout0;
519 mixin(C_MUL!("scratch[1]", "*Fout1", "tw[u*fstride]"));
520 mixin(C_MUL!("scratch[2]", "*Fout2", "tw[2*u*fstride]"));
521 mixin(C_MUL!("scratch[3]", "*Fout3", "tw[3*u*fstride]"));
522 mixin(C_MUL!("scratch[4]", "*Fout4", "tw[4*u*fstride]"));
524 mixin(C_ADD!("scratch[7]", "scratch[1]", "scratch[4]"));
525 mixin(C_SUB!("scratch[10]", "scratch[1]", "scratch[4]"));
526 mixin(C_ADD!("scratch[8]", "scratch[2]", "scratch[3]"));
527 mixin(C_SUB!("scratch[9]", "scratch[2]", "scratch[3]"));
529 Fout0.r += scratch[7].r+scratch[8].r;
530 Fout0.i += scratch[7].i+scratch[8].i;
532 scratch[5].r = scratch[0].r+scratch[7].r*ya.r+scratch[8].r*yb.r;
533 scratch[5].i = scratch[0].i+scratch[7].i*ya.r+scratch[8].i*yb.r;
535 scratch[6].r = scratch[10].i*ya.i+scratch[9].i*yb.i;
536 scratch[6].i = -scratch[10].r*ya.i-scratch[9].r*yb.i;
538 mixin(C_SUB!("*Fout1", "scratch[5]", "scratch[6]"));
539 mixin(C_ADD!("*Fout4", "scratch[5]", "scratch[6]"));
541 scratch[11].r = scratch[0].r+scratch[7].r*yb.r+scratch[8].r*ya.r;
542 scratch[11].i = scratch[0].i+scratch[7].i*yb.r+scratch[8].i*ya.r;
543 scratch[12].r = -scratch[10].i*yb.i+scratch[9].i*ya.i;
544 scratch[12].i = scratch[10].r*yb.i-scratch[9].r*ya.i;
546 mixin(C_ADD!("*Fout2", "scratch[11]", "scratch[12]"));
547 mixin(C_SUB!("*Fout3", "scratch[11]", "scratch[12]"));
549 ++Fout0;
550 ++Fout1;
551 ++Fout2;
552 ++Fout3;
553 ++Fout4;
557 // perform the butterfly for one stage of a mixed radix FFT
558 private void kf_bfly_generic(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m, uint p) {
559 import core.stdc.stdlib : alloca;
561 //uint q1, q;
562 const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
563 kiss_fft_cpx!S t;
564 uint Norig = st.nfft;
566 //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
567 kiss_fft_cpx!S* scratch = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*p);
569 foreach (immutable uint u; 0..m) {
570 uint k = u;
571 foreach (immutable uint q1; 0..p) {
572 scratch[q1] = Fout[k];
573 k += m;
576 k = u;
577 foreach (immutable uint q1; 0..p) {
578 uint twidx = 0;
579 Fout[k] = scratch[0];
580 foreach (immutable uint q; 1..p) {
581 twidx += fstride*k;
582 if (twidx >= Norig) twidx -= Norig;
583 mixin(C_MUL!("t", "scratch[q]", "twiddles[twidx]"));
584 mixin(C_ADDTO!("Fout[k]", "t"));
586 k += m;
589 //KISS_FFT_TMP_FREE(scratch);
593 private void kf_work(S) (kiss_fft_cpx!S* Fout, const(kiss_fft_cpx!S)* f, in usize fstride, uint in_stride, uint* factors, const(kiss_fft_cfg!S) st) {
594 kiss_fft_cpx!S* Fout_beg = Fout;
595 immutable uint p = *factors++; // the radix
596 immutable uint m = *factors++; // stage's fft length/p
597 const(kiss_fft_cpx!S)* Fout_end = Fout+p*m;
599 version(kiss_fft_use_parallel) {
600 // use threads at the top-level (not recursive)
601 import std.parallelism;
602 import std.range : iota;
603 if (fstride == 1 && p <= 5) {
604 // execute the p different work units in different threads
605 foreach (uint k; parallel(iota(p))) {
606 kf_work!S(Fout+k*m, f+fstride*in_stride*k, fstride*p, in_stride, factors, st);
608 // all threads have joined by this point
609 switch (p) {
610 case 2: kf_bfly2!S(Fout, fstride, st, m); break;
611 case 3: kf_bfly3!S(Fout, fstride, st, m); break;
612 case 4: kf_bfly4!S(Fout, fstride, st, m); break;
613 case 5: kf_bfly5!S(Fout, fstride, st, m); break;
614 default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
616 return;
620 if (m == 1) {
621 do {
622 *Fout = *f;
623 f += fstride*in_stride;
624 } while (++Fout !is Fout_end);
625 } else {
626 do {
627 // recursive call:
628 // DFT of size m*p performed by doing
629 // p instances of smaller DFTs of size m,
630 // each one takes a decimated version of the input
631 kf_work!S(Fout, f, fstride*p, in_stride, factors, st);
632 f += fstride*in_stride;
633 } while ((Fout += m) !is Fout_end);
636 Fout = Fout_beg;
638 // recombine the p smaller DFTs
639 switch (p) {
640 case 2: kf_bfly2!S(Fout, fstride, st, m); break;
641 case 3: kf_bfly3!S(Fout, fstride, st, m); break;
642 case 4: kf_bfly4!S(Fout, fstride, st, m); break;
643 case 5: kf_bfly5!S(Fout, fstride, st, m); break;
644 default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
649 /* facbuf is populated by p1, m1, p2, m2, ...
650 * where
651 * p[i]*m[i] = m[i-1]
652 * m0 = n
654 private void kf_factor(S) (uint n, uint* facbuf) {
655 import std.math : floor, sqrt;
656 immutable double floor_sqrt = floor(sqrt(cast(double)n));
657 uint p = 4;
658 // factor out powers of 4, powers of 2, then any remaining primes
659 do {
660 while (n%p) {
661 switch (p) {
662 case 4: p = 2; break;
663 case 2: p = 3; break;
664 default: p += 2; break;
666 if (p > floor_sqrt) p = n; // no more factors, skip to end
668 n /= p;
669 *facbuf++ = p;
670 *facbuf++ = n;
671 } while (n > 1);
675 // ////////////////////////////////////////////////////////////////////////// //
676 // kissfft_guts
678 Explanation of macros dealing with complex math:
680 C_MUL(m,a,b) : m = a*b
681 C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
682 C_SUB( res, a,b) : res = a-b
683 C_SUBFROM( res , a) : res -= a
684 C_ADDTO( res , a) : res += a
686 //enum S_MUL(string a, string b) = "(("~a~")*("~b~"))";
687 enum C_MUL(string m, string a, string b) =
688 "{ ("~m~").r = ("~a~").r*("~b~").r-("~a~").i*("~b~").i;
689 ("~m~").i = ("~a~").r*("~b~").i+("~a~").i*("~b~").r; }";
690 enum C_MULBYSCALAR(string c, string s) = " { ("~c~").r *= ("~s~"); ("~c~").i *= ("~s~"); }";
692 enum C_ADD(string res, string a, string b) = "{ ("~res~").r=("~a~").r+("~b~").r; ("~res~").i=("~a~").i+("~b~").i; }";
693 enum C_SUB(string res, string a, string b) = "{ ("~res~").r=("~a~").r-("~b~").r; ("~res~").i=("~a~").i-("~b~").i; }";
694 enum C_ADDTO(string res, string a) = "{ ("~res~").r += ("~a~").r; ("~res~").i += ("~a~").i; }";
695 enum C_SUBFROM(string res, string a) = "{ ("~res~").r -= ("~a~").r; ("~res~").i -= ("~a~").i; }";
698 //kiss_fft_scalar KISS_FFT_COS(T) (T phase) { import std.math : cos; return cos(phase); }
699 //kiss_fft_scalar KISS_FFT_SIN(T) (T phase) { import std.math : sin; return sin(phase); }
700 //kiss_fft_scalar HALF_OF (kiss_fft_scalar x) { return x*cast(kiss_fft_scalar)0.5; }
701 enum HALF_OF(string x) = "(cast(kiss_fft_scalar!S)(("~x~")*cast(kiss_fft_scalar!S)0.5))";
703 //enum kf_cexp(string x, string phase) = "{ ("~x~").r = KISS_FFT_COS("~phase~"); ("~x~").i = KISS_FFT_SIN("~phase~"); }";
704 enum kf_cexp(string x, string phase) = "{ import std.math : cos, sin; ("~x~").r = cos("~phase~"); ("~x~").i = sin("~phase~"); }";
708 #ifdef KISS_FFT_USE_ALLOCA
709 // define this to allow use of alloca instead of malloc for temporary buffers
710 // Temporary buffers are used in two case:
711 // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
712 // 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
713 #include <alloca.h>
714 #define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
715 #define KISS_FFT_TMP_FREE(ptr)
716 #else
717 #define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
718 #define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
719 #endif
723 // ////////////////////////////////////////////////////////////////////////// //
725 public alias kiss_fftnd_cfg_f = kiss_fftnd_state!float*; ///
726 public alias kiss_fftnd_cfg_d = kiss_fftnd_state!double*; ///
728 public template kiss_fftnd_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
729 alias kiss_fftnd_cfg = kiss_fftnd_state!T*;
732 struct kiss_fftnd_state(S)
733 if (is(S == float) || is(S == double) || is(S == real))
735 uint dimprod; /* dimsum would be mighty tasty right now */
736 uint ndims;
737 uint* dims;
738 kiss_fft_cfg!S* states; /* cfg states for each dimension */
739 kiss_fft_cpx!S* tmpbuf; /* buffer capable of hold the entire input */
744 public kiss_fftnd_cfg!S kiss_fftnd_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
745 if (is(S == float) || is(S == double) || is(S == real))
747 import core.stdc.stdlib : malloc;
749 if (dims.length < 1 || dims.length > ushort.max) return null;
750 immutable uint ndims = cast(uint)dims.length;
751 kiss_fftnd_cfg!S st = null;
752 int dimprod = 1;
753 usize memneeded = (kiss_fftnd_state!S).sizeof;
754 ubyte* ptr;
756 foreach (immutable uint i; 0..ndims) {
757 usize sublen = 0;
758 kiss_fft_alloc!S(dims[i], dir, null, &sublen);
759 memneeded += sublen; /* st.states[i] */
760 dimprod *= dims[i];
762 memneeded += int.sizeof*ndims;/* st.dims */
763 memneeded += (void*).sizeof*ndims;/* st.states */
764 memneeded += (kiss_fft_cpx!S).sizeof*dimprod; /* st.tmpbuf */
766 if (lenmem is null) {
767 /* allocate for the caller */
768 st = cast(kiss_fftnd_cfg!S)malloc(memneeded);
769 } else {
770 /* initialize supplied buffer if big enough */
771 if (*lenmem >= memneeded) st = cast(kiss_fftnd_cfg!S)mem;
772 *lenmem = memneeded; /* tell caller how big struct is (or would be) */
774 if (st is null) return null; /* malloc failed or buffer too small */
776 st.dimprod = dimprod;
777 st.ndims = ndims;
778 ptr = cast(ubyte*)(st+1);
780 st.states = cast(kiss_fft_cfg!S*)ptr;
781 ptr += (void*).sizeof*ndims;
783 st.dims = cast(uint*)ptr;
784 ptr += int.sizeof*ndims;
786 st.tmpbuf = cast(kiss_fft_cpx!S*)ptr;
787 ptr += (kiss_fft_cpx!S).sizeof*dimprod;
789 foreach (immutable uint i; 0..ndims) {
790 usize len;
791 st.dims[i] = dims[i];
792 kiss_fft_alloc!S(st.dims[i], dir, null, &len);
793 st.states[i] = kiss_fft_alloc!S(st.dims[i], dir, ptr, &len);
794 ptr += len;
797 return st;
802 public void kiss_fftnd_free(S) (ref kiss_fftnd_cfg!S p)
803 if (is(S == float) || is(S == double) || is(S == real))
805 if (p !is null) {
806 import core.stdc.stdlib : free;
807 free(p);
808 p = null;
814 This works by tackling one dimension at a time.
816 In effect,
817 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
818 A Di-sized fft is taken of each column, transposing the matrix as it goes.
820 Here's a 3-d example:
821 Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
822 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
823 [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
825 Stage 0 ( D=2): treat the buffer as a 2x12 matrix
826 [ [a b ... k l]
827 [m n ... w x] ]
829 FFT each column with size 2.
830 Transpose the matrix at the same time using kiss_fft_stride.
832 [ [ a+m a-m ]
833 [ b+n b-n]
835 [ k+w k-w ]
836 [ l+x l-x ] ]
838 Note fft([x y]) == [x+y x-y]
840 Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
841 [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
842 [ e+q e-q f+r f-r g+s g-s h+t h-t ]
843 [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
845 And perform FFTs (size=3) on each of the columns as above, transposing
846 the matrix as it goes. The output of stage 1 is
847 (Legend: ap = [ a+m e+q i+u ]
848 am = [ a-m e-q i-u ] )
850 [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
851 [ sum(am) fft(am)[0] fft(am)[1] ]
852 [ sum(bp) fft(bp)[0] fft(bp)[1] ]
853 [ sum(bm) fft(bm)[0] fft(bm)[1] ]
854 [ sum(cp) fft(cp)[0] fft(cp)[1] ]
855 [ sum(cm) fft(cm)[0] fft(cm)[1] ]
856 [ sum(dp) fft(dp)[0] fft(dp)[1] ]
857 [ sum(dm) fft(dm)[0] fft(dm)[1] ] ]
859 Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
860 [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
861 [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
862 [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
863 [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ]
865 Then FFTs each column, transposing as it goes.
867 The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
869 Note as a sanity check that the first element of the final
870 stage's output (DC term) is
871 sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
872 , i.e. the summation of all 24 input elements.
875 public void kiss_fftnd(S) (kiss_fftnd_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
876 if (is(S == float) || is(S == double) || is(S == real))
878 import core.stdc.string : memcpy;
880 const(kiss_fft_cpx!S)* bufin = fin;
881 kiss_fft_cpx!S* bufout;
883 /* arrange it so the last bufout == fout */
884 if (st.ndims&1) {
885 bufout = fout;
886 if (fin is fout) {
887 memcpy(st.tmpbuf, fin, (kiss_fft_cpx!S).sizeof*st.dimprod);
888 bufin = st.tmpbuf;
890 } else {
891 bufout = st.tmpbuf;
894 foreach (immutable uint k; 0..st.ndims) {
895 uint curdim = st.dims[k];
896 uint stride = st.dimprod/curdim;
898 foreach (immutable uint i; 0..stride) {
899 kiss_fft_stride!S(st.states[k], bufin+i, bufout+i*curdim, stride);
902 /* toggle back and forth between the two buffers */
903 if (bufout is st.tmpbuf) {
904 bufout = fout;
905 bufin = st.tmpbuf;
906 } else {
907 bufout = st.tmpbuf;
908 bufin = fout;
914 // ////////////////////////////////////////////////////////////////////////// //
916 public alias kiss_fftndr_cfg_f = kiss_fftndr_state!float*; ///
917 public alias kiss_fftndr_cfg_d = kiss_fftndr_state!double*; ///
919 public template kiss_fftndr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
920 alias kiss_fftndr_cfg = kiss_fftndr_state!T*;
923 struct kiss_fftndr_state(S)
924 if (is(S == float) || is(S == double) || is(S == real))
926 uint dimReal;
927 uint dimOther;
928 kiss_fftr_cfg!S cfg_r;
929 kiss_fftnd_cfg!S cfg_nd;
930 ubyte* tmpbuf;
934 int prod (const(uint)[] dims) pure nothrow @trusted @nogc {
935 uint x = 1;
936 uint ndims = cast(uint)dims.length;
937 const(uint)* dp = dims.ptr;
938 while (ndims--) x *= *dp++;
939 return x;
942 T MAX(T) (in T a, in T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
946 dims[0] must be even
948 If you don't care to allocate space, use mem = lenmem = null
950 public kiss_fftndr_cfg!S kiss_fftndr_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
951 if (is(S == float) || is(S == double) || is(S == real))
953 import core.stdc.stdlib : malloc, free;
954 import core.stdc.string : memset;
956 if (dims.length < 1 || dims.length > ushort.max) return null;
957 immutable uint ndims = cast(uint)dims.length;
959 kiss_fftndr_cfg!S st = null;
960 usize nr = 0, nd = 0, ntmp = 0;
961 uint dimReal = dims[ndims-1];
962 uint dimOther = prod(dims[0..$-1]);
963 usize memneeded;
965 kiss_fftr_alloc!S(dimReal, dir, null, &nr);
966 kiss_fftnd_alloc!S(dims[0..$-1], dir, null, &nd);
967 ntmp =
968 MAX(2*dimOther, dimReal+2)*(kiss_fft_scalar!S).sizeof+ // freq buffer for one pass
969 dimOther*(dimReal+2)*(kiss_fft_scalar!S).sizeof; // large enough to hold entire input in case of in-place
971 memneeded = (kiss_fftndr_state!S).sizeof+nr+nd+ntmp;
973 bool malloced = false;
974 if (lenmem is null) {
975 st = cast(kiss_fftndr_cfg!S)malloc(memneeded);
976 malloced = true;
977 } else {
978 if (*lenmem >= memneeded) st = cast(kiss_fftndr_cfg!S)mem;
979 *lenmem = memneeded;
981 if (st is null) return null;
983 memset(st, 0, memneeded);
985 st.dimReal = dimReal;
986 st.dimOther = dimOther;
987 st.cfg_r = kiss_fftr_alloc!S(dimReal, dir, st+1, &nr);
988 if (st.cfg_r is null) { if (malloced) free(st); return null; }
989 st.cfg_nd = kiss_fftnd_alloc!S(dims[0..$-1], dir, (cast(ubyte*)st.cfg_r)+nr, &nd);
990 if (st.cfg_nd is null) { if (malloced) free(st); return null; }
991 st.tmpbuf = cast(ubyte*)st.cfg_nd+nd;
992 assert(st.tmpbuf !is null);
994 return st;
999 public void kiss_fftndr_free(S) (ref kiss_fftndr_cfg!S p)
1000 if (is(S == float) || is(S == double) || is(S == real))
1002 if (p !is null) {
1003 import core.stdc.stdlib : free;
1004 free(p);
1005 p = null;
1011 input timedata has dims[0] X dims[1] X ... X dims[ndims-1] scalar points
1012 output freqdata has dims[0] X dims[1] X ... X dims[ndims-1]/2+1 complex points
1014 public void kiss_fftndr(S) (kiss_fftndr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
1015 if (is(S == float) || is(S == double) || is(S == real))
1017 uint dimReal = st.dimReal;
1018 uint dimOther = st.dimOther;
1019 uint nrbins = dimReal/2+1;
1021 kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1022 kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1023 //assert(tmp1 !is null);
1025 // timedata is N0 x N1 x ... x Nk real
1027 // take a real chunk of data, fft it and place the output at correct intervals
1028 foreach (immutable uint k1; 0..dimOther) {
1029 kiss_fftr!S(st.cfg_r, timedata+k1*dimReal, tmp1); // tmp1 now holds nrbins complex points
1030 foreach (immutable uint k2; 0..nrbins) tmp2[k2*dimOther+k1] = tmp1[k2];
1033 foreach (immutable uint k2; 0..nrbins) {
1034 kiss_fftnd!S(st.cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
1035 foreach (immutable uint k1; 0..dimOther) freqdata[k1*nrbins+k2] = tmp1[k1];
1041 input and output dimensions are the exact opposite of kiss_fftndr
1043 public void kiss_fftndri(S) (kiss_fftndr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
1044 if (is(S == float) || is(S == double) || is(S == real))
1046 int dimReal = st.dimReal;
1047 int dimOther = st.dimOther;
1048 int nrbins = dimReal/2+1;
1049 kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1050 kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1052 foreach (immutable uint k2; 0..nrbins) {
1053 foreach (immutable uint k1; 0..dimOther) tmp1[k1] = freqdata[k1*(nrbins)+k2];
1054 kiss_fftnd!S(st.cfg_nd, tmp1, tmp2+k2*dimOther);
1057 foreach (immutable uint k1; 0..dimOther) {
1058 foreach (immutable uint k2; 0..nrbins) tmp1[k2] = tmp2[k2*dimOther+k1];
1059 kiss_fftri!S(st.cfg_r, tmp1, timedata+k1*dimReal);