kissfft.d

   1 /*
   2  * Copyright (c) 2003-2010, Mark Borgerding
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are
   8  * met:
   9  *
  10  *  * Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  *
  13  *  * Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the
  16  *    distribution.
  17  *
  18  *  * Neither the author nor the names of any contributors may be used to
  19  *    endorse or promote products derived from this software without
  20  *    specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  26  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33  */
  34 /* Ported by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
  35  * Understanding is not required. Only obedience.
  36  *
  37  * This program is free software: you can redistribute it and/or modify
  38  * it under the terms of the GNU General Public License as published by
  39  * the Free Software Foundation, version 3 of the License ONLY.
  40  *
  41  * This program is distributed in the hope that it will be useful,
  42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44  * GNU General Public License for more details.
  45  *
  46  * You should have received a copy of the GNU General Public License
  47  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  48  */
  49 module iv.kissfft;
  50 private nothrow @trusted @nogc:
  51
  52 version(aliced) {} else alias usize = size_t;
  53
  54 //version = kiss_fft_use_parallel; // absolutely no reason to
  55
  56
  57 // ////////////////////////////////////////////////////////////////////////// //
  58 ///
  59 public enum KissFFT { Forward, Inverse }
  60
  61 ///
  62 //public alias kiss_fft_scalar = float;
  63 //public alias kiss_fft_scalar = double;
  64
  65 ///
  66 public template kiss_fft_scalar(T) if (is(T == float) || is(T == double) || is(T == real)) {
  67   alias kiss_fft_scalar = T;
  68 }
  69
  70
  71 ///
  72 public template isGoodKissFFTScalar(T) {
  73   static if (is(T == float) || is(T == double) || is(T == real)) {
  74     enum isGoodKissFFTScalar = true;
  75   } else {
  76     enum isGoodKissFFTScalar = false;
  77   }
  78 }
  79
  80
  81 ///
  82 public template isKissFFTComplex(T) {
  83   static if (is(T : kiss_fft_cpx!S, S)) {
  84     enum isKissFFTComplex = true;
  85   } else {
  86     enum isKissFFTComplex = false;
  87   }
  88 }
  89
  90
  91 ///
  92 public template KissFFTScalar(T) {
  93   static if (is(T : kiss_fft_cpx!S, S)) {
  94     alias KissFFTScalar = S;
  95   } else {
  96     static assert(0, "not a KissFFT complex type");
  97   }
  98 }
  99
 100 ///
 101 public align(1) struct kiss_fft_cpx(T) if (is(T == float) || is(T == double) || is(T == real)) {
 102 align(1):
 103   alias Scalar = T;
 104   T r;
 105   T i;
 106
 107   T opIndex (uint n) const pure nothrow @trusted @nogc { pragma(inline, true); return (n ? i : r); }
 108   void opIndexAssign (in T v, uint n) nothrow @trusted @nogc { pragma(inline, true); if (n) i = v; else r = v; }
 109 }
 110
 111
 112 public alias kiss_fft_cpx_f = kiss_fft_cpx!float; ///
 113 public alias kiss_fft_cpx_d = kiss_fft_cpx!double; ///
 114
 115
 116 public alias kiss_fft_cfg_f = kiss_fft_state!float*; ///
 117 public alias kiss_fft_cfg_d = kiss_fft_state!double*; ///
 118
 119 public template kiss_fft_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 120   alias kiss_fft_cfg = kiss_fft_state!T*;
 121 }
 122
 123
 124 // ////////////////////////////////////////////////////////////////////////// //
 125 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
 126  *
 127  * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, KissFFT.Forward);`
 128  *
 129  * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
 130  *
 131  * If `lenmem` is `null`, then kiss_fft_alloc will allocate a cfg buffer using `malloc`.
 132  * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
 133  *
 134  * The state can be placed in a user supplied buffer `mem`:
 135  * If `lenmem` is not `null` and `mem` is not `null` and `*lenmem` is large enough,
 136  * then the function places the cfg in `mem` and the size used in `*lenmem`,
 137  * and returns `mem`.
 138  *
 139  * If `lenmem` is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
 140  * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
 141  */
 142 public kiss_fft_cfg!S kiss_fft_alloc(S) (uint nfft, KissFFT dir, void* mem=null, usize* lenmem=null)
 143 if (is(S == float) || is(S == double) || is(S == real))
 144 {
 145   kiss_fft_cfg!S st = null;
 146   usize memneeded = (kiss_fft_state!S).sizeof+(kiss_fft_cpx!S).sizeof*(nfft-1); // twiddle factors
 147
 148   if (lenmem is null) {
 149     import core.stdc.stdlib : malloc;
 150     st = cast(kiss_fft_cfg!S)malloc(memneeded);
 151   } else {
 152     if (mem !is null && *lenmem >= memneeded) st = cast(kiss_fft_cfg!S)mem;
 153     *lenmem = memneeded;
 154   }
 155
 156   if (st !is null) {
 157     st.nfft = nfft;
 158     st.inverse = (dir == KissFFT.Inverse);
 159     foreach (immutable uint i; 0..nfft) {
 160       enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
 161       immutable double phase = -2*pi*i/nfft*(st.inverse ? -1 : 1);
 162       //if (st.inverse) phase = -phase;
 163       mixin(kf_cexp!("st.twiddles.ptr+i", "phase"));
 164     }
 165     kf_factor!S(nfft, st.factors.ptr);
 166   }
 167
 168   return st;
 169 }
 170
 171
 172 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
 173  * buffer and can be simply free()d when no longer needed
 174  */
 175 public void kiss_fft_free(S) (ref kiss_fft_cfg!S p)
 176 if (is(S == float) || is(S == double) || is(S == real))
 177 {
 178   if (p !is null) {
 179     import core.stdc.stdlib : free;
 180     free(p);
 181     p = null;
 182   }
 183 }
 184
 185
 186 /** Perform an FFT on a complex input buffer.
 187  *
 188  * for a forward FFT,
 189  * fin should be  f[0] , f[1] , ... ,f[nfft-1]
 190  * fout will be   F[0] , F[1] , ... ,F[nfft-1]
 191  * Note that each element is complex and can be accessed like f[k].r and f[k].i
 192  */
 193 public void kiss_fft(S) (kiss_fft_cfg!S cfg, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
 194 if (is(S == float) || is(S == double) || is(S == real))
 195 {
 196   assert(cfg !is null);
 197   kiss_fft_stride!S(cfg, fin, fout, 1);
 198 }
 199
 200
 201 /** A more generic version of the above function. It reads its input from every Nth sample. */
 202 public void kiss_fft_stride(S) (kiss_fft_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout, uint in_stride)
 203 if (is(S == float) || is(S == double) || is(S == real))
 204 {
 205   import core.stdc.stdlib : alloca;
 206   assert(st !is null);
 207   if (fin is fout) {
 208     import core.stdc.string : memcpy;
 209     //NOTE: this is not really an in-place FFT algorithm.
 210     //It just performs an out-of-place FFT into a temp buffer
 211     //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
 212     kiss_fft_cpx!S* tmpbuf = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*st.nfft);
 213     kf_work!S(tmpbuf, fin, 1, in_stride, st.factors.ptr, st);
 214     memcpy(fout, tmpbuf, (kiss_fft_cpx!S).sizeof*st.nfft);
 215     //KISS_FFT_TMP_FREE(tmpbuf);
 216   } else {
 217     kf_work!S(fout, fin, 1, in_stride, st.factors.ptr, st);
 218   }
 219 }
 220
 221
 222 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
 223 public uint kiss_fft_next_fast_size (uint n) {
 224   for (;;) {
 225     uint m = n;
 226     while ((m%2) == 0) m /= 2;
 227     while ((m%3) == 0) m /= 3;
 228     while ((m%5) == 0) m /= 5;
 229     if (m <= 1) break; // n is completely factorable by twos, threes, and fives
 230     ++n;
 231   }
 232   return n;
 233 }
 234
 235
 236 // ////////////////////////////////////////////////////////////////////////// //
 237 // kissfftr
 238 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
 239
 240 public alias kiss_fftr_cfg_f = kiss_fftr_state!float*; ///
 241 public alias kiss_fftr_cfg_d = kiss_fftr_state!double*; ///
 242
 243 ///
 244 public template kiss_fftr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 245   alias kiss_fftr_cfg = kiss_fftr_state!T*;
 246 }
 247
 248 struct kiss_fftr_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
 249   kiss_fft_cfg!S substate;
 250   kiss_fft_cpx!S* tmpbuf;
 251   kiss_fft_cpx!S* super_twiddles;
 252 }
 253
 254
 255 /*
 256  ** nfft must be even
 257  *
 258  * If you don't care to allocate space, use mem = lenmem = null
 259  */
 260 public kiss_fftr_cfg!S kiss_fftr_alloc(S) (uint nfft, KissFFT dir, void* mem, usize* lenmem)
 261 if (is(S == float) || is(S == double) || is(S == real))
 262 {
 263   kiss_fftr_cfg!S st = null;
 264   usize subsize, memneeded;
 265
 266   if (nfft&1) return null; //assert(0, "real FFT optimization must be even");
 267   nfft >>= 1;
 268
 269   kiss_fft_alloc!S(nfft, dir, null, &subsize);
 270   memneeded = (kiss_fftr_state!S).sizeof+subsize+(kiss_fft_cpx!S).sizeof*(nfft*3/2);
 271
 272   if (lenmem is null) {
 273     import core.stdc.stdlib : malloc;
 274     st = cast(kiss_fftr_cfg!S)malloc(memneeded);
 275   } else {
 276     if (*lenmem >= memneeded) st = cast(kiss_fftr_cfg!S)mem;
 277     *lenmem = memneeded;
 278   }
 279   if (st is null) return null;
 280
 281   st.substate = cast(kiss_fft_cfg!S)(st+1); // just beyond kiss_fftr_state struct
 282   st.tmpbuf = cast(kiss_fft_cpx!S*)((cast(ubyte*)st.substate)+subsize);
 283   st.super_twiddles = st.tmpbuf+nfft;
 284   kiss_fft_alloc!S(nfft, dir, st.substate, &subsize);
 285
 286   foreach (immutable i; 0..nfft/2) {
 287     enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
 288     immutable double phase = -pi*(cast(double)(i+1)/nfft+0.5)*(dir == KissFFT.Inverse ? -1 : 1);
 289     //if (dir == KissFFT.Inverse) phase = -phase;
 290     mixin(kf_cexp!("st.super_twiddles+i", "phase"));
 291   }
 292
 293   return st;
 294 }
 295
 296
 297 /** Use this to free `fftr` buffer. */
 298 public void kiss_fft_free(S) (ref kiss_fftr_cfg!S p)
 299 if (is(S == float) || is(S == double) || is(S == real))
 300 {
 301   if (p !is null) {
 302     import core.stdc.stdlib : free;
 303     free(p);
 304     p = null;
 305   }
 306 }
 307
 308
 309 /** input timedata has nfft scalar points
 310  * output freqdata has nfft/2+1 complex points
 311  */
 312 public void kiss_fftr(S) (kiss_fftr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
 313 if (is(S == float) || is(S == double) || is(S == real))
 314 {
 315   // input buffer timedata is stored row-wise
 316   uint k, ncfft;
 317   kiss_fft_cpx!S fpnk, fpk, f1k, f2k, tw, tdc;
 318
 319   if (st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
 320
 321   ncfft = st.substate.nfft;
 322
 323   // perform the parallel fft of two real signals packed in real,imag
 324   kiss_fft!S(st.substate, cast(const(kiss_fft_cpx!S)*)timedata, st.tmpbuf);
 325   /* The real part of the DC element of the frequency spectrum in st.tmpbuf
 326    * contains the sum of the even-numbered elements of the input time sequence
 327    * The imag part is the sum of the odd-numbered elements
 328    *
 329    * The sum of tdc.r and tdc.i is the sum of the input time sequence.
 330    *      yielding DC of input time sequence
 331    * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
 332    *      yielding Nyquist bin of input time sequence
 333    */
 334
 335   tdc.r = st.tmpbuf[0].r;
 336   tdc.i = st.tmpbuf[0].i;
 337   freqdata[0].r = tdc.r+tdc.i;
 338   freqdata[ncfft].r = tdc.r-tdc.i;
 339   freqdata[ncfft].i = freqdata[0].i = 0;
 340
 341   for (k = 1; k <= ncfft/2; ++k) {
 342     fpk = st.tmpbuf[k];
 343     fpnk.r = st.tmpbuf[ncfft-k].r;
 344     fpnk.i = -st.tmpbuf[ncfft-k].i;
 345
 346     mixin(C_ADD!("f1k", "fpk", "fpnk"));
 347     mixin(C_SUB!("f2k", "fpk", "fpnk"));
 348     mixin(C_MUL!("tw", "f2k", "st.super_twiddles[k-1]"));
 349
 350     freqdata[k].r = mixin(HALF_OF!"f1k.r+tw.r");
 351     freqdata[k].i = mixin(HALF_OF!"f1k.i+tw.i");
 352     freqdata[ncfft-k].r = mixin(HALF_OF!"f1k.r-tw.r");
 353     freqdata[ncfft-k].i = mixin(HALF_OF!"tw.i-f1k.i");
 354   }
 355 }
 356
 357
 358 /** input freqdata has  nfft/2+1 complex points
 359  * output timedata has nfft scalar points
 360  */
 361 public void kiss_fftri(S) (kiss_fftr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
 362 if (is(S == float) || is(S == double) || is(S == real))
 363 {
 364   // input buffer timedata is stored row-wise
 365   if (st.substate.inverse == 0) assert(0, "kiss fft usage error: improper alloc");
 366
 367   uint ncfft = st.substate.nfft;
 368
 369   st.tmpbuf[0].r = freqdata[0].r+freqdata[ncfft].r;
 370   st.tmpbuf[0].i = freqdata[0].r-freqdata[ncfft].r;
 371
 372   foreach (immutable uint k; 1..ncfft/2+1) {
 373     kiss_fft_cpx!S fnkc = void, fek = void, fok = void, tmp = void;
 374     kiss_fft_cpx!S fk = freqdata[k];
 375     fnkc.r = freqdata[ncfft-k].r;
 376     fnkc.i = -freqdata[ncfft-k].i;
 377     mixin(C_ADD!("fek", "fk", "fnkc"));
 378     mixin(C_SUB!("tmp", "fk", "fnkc"));
 379     mixin(C_MUL!("fok", "tmp", "st.super_twiddles[k-1]"));
 380     mixin(C_ADD!("st.tmpbuf[k]", "fek", "fok"));
 381     mixin(C_SUB!("st.tmpbuf[ncfft-k]", "fek", "fok"));
 382     st.tmpbuf[ncfft-k].i *= -1;
 383   }
 384   kiss_fft!S(st.substate, st.tmpbuf, cast(kiss_fft_cpx!S*)timedata);
 385 }
 386
 387
 388 /** for real ffts, we need an even size */
 389 public uint kiss_fftr_next_fast_size_real (uint n) {
 390   pragma(inline, true);
 391   return kiss_fft_next_fast_size((n+1)>>1)<<1;
 392 }
 393
 394
 395 // ////////////////////////////////////////////////////////////////////////// //
 396 enum MAXFACTORS = 32;
 397 /* e.g. an fft of length 128 has 4 factors
 398  as far as kissfft is concerned
 399  4*4*4*2
 400  */
 401
 402 struct kiss_fft_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
 403   uint nfft;
 404   uint inverse;
 405   uint[2*MAXFACTORS] factors;
 406   kiss_fft_cpx!S[1] twiddles;
 407 }
 408
 409
 410 // ////////////////////////////////////////////////////////////////////////// //
 411 private void kf_bfly2(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, int m) {
 412   kiss_fft_cpx!S* Fout2;
 413   const(kiss_fft_cpx!S)* tw1 = st.twiddles.ptr;
 414   kiss_fft_cpx!S t;
 415   Fout2 = Fout+m;
 416   do {
 417     mixin(C_MUL!("t", "*Fout2", "*tw1"));
 418     tw1 += fstride;
 419     mixin(C_SUB!("*Fout2", "*Fout", "t"));
 420     mixin(C_ADDTO!("*Fout", "t"));
 421     ++Fout2;
 422     ++Fout;
 423   } while (--m);
 424 }
 425
 426
 427 private void kf_bfly4(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, in usize m) {
 428   const(kiss_fft_cpx!S)* tw1, tw2, tw3;
 429   kiss_fft_cpx!S[6] scratch = void;
 430   usize k = m;
 431   immutable usize m2 = 2*m;
 432   immutable usize m3 = 3*m;
 433   tw3 = tw2 = tw1 = st.twiddles.ptr;
 434   do {
 435     mixin(C_MUL!("scratch[0]", "Fout[m]", "*tw1"));
 436     mixin(C_MUL!("scratch[1]", "Fout[m2]", "*tw2"));
 437     mixin(C_MUL!("scratch[2]", "Fout[m3]", "*tw3"));
 438
 439     mixin(C_SUB!("scratch[5]", "*Fout", "scratch[1]"));
 440     mixin(C_ADDTO!("*Fout", "scratch[1]"));
 441     mixin(C_ADD!("scratch[3]", "scratch[0]", "scratch[2]"));
 442     mixin(C_SUB!("scratch[4]", "scratch[0]", "scratch[2]"));
 443     mixin(C_SUB!("Fout[m2]", "*Fout", "scratch[3]"));
 444     tw1 += fstride;
 445     tw2 += fstride*2;
 446     tw3 += fstride*3;
 447     mixin(C_ADDTO!("*Fout", "scratch[3]"));
 448
 449     if (st.inverse) {
 450       Fout[m].r = scratch[5].r-scratch[4].i;
 451       Fout[m].i = scratch[5].i+scratch[4].r;
 452       Fout[m3].r = scratch[5].r+scratch[4].i;
 453       Fout[m3].i = scratch[5].i-scratch[4].r;
 454     } else {
 455       Fout[m].r = scratch[5].r+scratch[4].i;
 456       Fout[m].i = scratch[5].i-scratch[4].r;
 457       Fout[m3].r = scratch[5].r-scratch[4].i;
 458       Fout[m3].i = scratch[5].i+scratch[4].r;
 459     }
 460     ++Fout;
 461   } while (--k);
 462 }
 463
 464
 465 private void kf_bfly3(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, usize m) {
 466   usize k = m;
 467   immutable usize m2 = 2*m;
 468   const(kiss_fft_cpx!S)* tw1, tw2;
 469   kiss_fft_cpx!S[5] scratch = void;
 470   kiss_fft_cpx!S epi3;
 471   epi3 = st.twiddles[fstride*m];
 472   tw1 = tw2 = st.twiddles.ptr;
 473   do {
 474     mixin(C_MUL!("scratch[1]", "Fout[m]", "*tw1"));
 475     mixin(C_MUL!("scratch[2]", "Fout[m2]", "*tw2"));
 476
 477     mixin(C_ADD!("scratch[3]", "scratch[1]", "scratch[2]"));
 478     mixin(C_SUB!("scratch[0]", "scratch[1]", "scratch[2]"));
 479     tw1 += fstride;
 480     tw2 += fstride*2;
 481
 482     Fout[m].r = Fout.r-mixin(HALF_OF!"scratch[3].r");
 483     Fout[m].i = Fout.i-mixin(HALF_OF!"scratch[3].i");
 484
 485     mixin(C_MULBYSCALAR!("scratch[0]", "epi3.i"));
 486
 487     mixin(C_ADDTO!("*Fout", "scratch[3]"));
 488
 489     Fout[m2].r = Fout[m].r+scratch[0].i;
 490     Fout[m2].i = Fout[m].i-scratch[0].r;
 491
 492     Fout[m].r -= scratch[0].i;
 493     Fout[m].i += scratch[0].r;
 494
 495     ++Fout;
 496   } while (--k);
 497 }
 498
 499
 500 private void kf_bfly5(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m) {
 501   kiss_fft_cpx!S* Fout0, Fout1, Fout2, Fout3, Fout4;
 502   kiss_fft_cpx!S[13] scratch = void;
 503   const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
 504   const(kiss_fft_cpx!S)* tw;
 505   kiss_fft_cpx!S ya = twiddles[fstride*m];
 506   kiss_fft_cpx!S yb = twiddles[fstride*2*m];
 507
 508   Fout0 = Fout;
 509   Fout1 = Fout0+m;
 510   Fout2 = Fout0+2*m;
 511   Fout3 = Fout0+3*m;
 512   Fout4 = Fout0+4*m;
 513
 514   tw = st.twiddles.ptr;
 515   foreach (immutable uint u; 0..m) {
 516     scratch[0] = *Fout0;
 517
 518     mixin(C_MUL!("scratch[1]", "*Fout1", "tw[u*fstride]"));
 519     mixin(C_MUL!("scratch[2]", "*Fout2", "tw[2*u*fstride]"));
 520     mixin(C_MUL!("scratch[3]", "*Fout3", "tw[3*u*fstride]"));
 521     mixin(C_MUL!("scratch[4]", "*Fout4", "tw[4*u*fstride]"));
 522
 523     mixin(C_ADD!("scratch[7]", "scratch[1]", "scratch[4]"));
 524     mixin(C_SUB!("scratch[10]", "scratch[1]", "scratch[4]"));
 525     mixin(C_ADD!("scratch[8]", "scratch[2]", "scratch[3]"));
 526     mixin(C_SUB!("scratch[9]", "scratch[2]", "scratch[3]"));
 527
 528     Fout0.r += scratch[7].r+scratch[8].r;
 529     Fout0.i += scratch[7].i+scratch[8].i;
 530
 531     scratch[5].r = scratch[0].r+scratch[7].r*ya.r+scratch[8].r*yb.r;
 532     scratch[5].i = scratch[0].i+scratch[7].i*ya.r+scratch[8].i*yb.r;
 533
 534     scratch[6].r =  scratch[10].i*ya.i+scratch[9].i*yb.i;
 535     scratch[6].i = -scratch[10].r*ya.i-scratch[9].r*yb.i;
 536
 537     mixin(C_SUB!("*Fout1", "scratch[5]", "scratch[6]"));
 538     mixin(C_ADD!("*Fout4", "scratch[5]", "scratch[6]"));
 539
 540     scratch[11].r = scratch[0].r+scratch[7].r*yb.r+scratch[8].r*ya.r;
 541     scratch[11].i = scratch[0].i+scratch[7].i*yb.r+scratch[8].i*ya.r;
 542     scratch[12].r = -scratch[10].i*yb.i+scratch[9].i*ya.i;
 543     scratch[12].i =  scratch[10].r*yb.i-scratch[9].r*ya.i;
 544
 545     mixin(C_ADD!("*Fout2", "scratch[11]", "scratch[12]"));
 546     mixin(C_SUB!("*Fout3", "scratch[11]", "scratch[12]"));
 547
 548     ++Fout0;
 549     ++Fout1;
 550     ++Fout2;
 551     ++Fout3;
 552     ++Fout4;
 553   }
 554 }
 555
 556 // perform the butterfly for one stage of a mixed radix FFT
 557 private void kf_bfly_generic(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m, uint p) {
 558   import core.stdc.stdlib : alloca;
 559
 560   //uint q1, q;
 561   const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
 562   kiss_fft_cpx!S t;
 563   uint Norig = st.nfft;
 564
 565   //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
 566   kiss_fft_cpx!S* scratch = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*p);
 567
 568   foreach (immutable uint u; 0..m) {
 569     uint k = u;
 570     foreach (immutable uint q1; 0..p) {
 571       scratch[q1] = Fout[k];
 572       k += m;
 573     }
 574
 575     k = u;
 576     foreach (immutable uint q1; 0..p) {
 577       uint twidx = 0;
 578       Fout[k] = scratch[0];
 579       foreach (immutable uint q; 1..p) {
 580         twidx += fstride*k;
 581         if (twidx >= Norig) twidx -= Norig;
 582         mixin(C_MUL!("t", "scratch[q]", "twiddles[twidx]"));
 583         mixin(C_ADDTO!("Fout[k]", "t"));
 584       }
 585       k += m;
 586     }
 587   }
 588   //KISS_FFT_TMP_FREE(scratch);
 589 }
 590
 591
 592 private void kf_work(S) (kiss_fft_cpx!S* Fout, const(kiss_fft_cpx!S)* f, in usize fstride, uint in_stride, uint* factors, const(kiss_fft_cfg!S) st) {
 593   kiss_fft_cpx!S* Fout_beg = Fout;
 594   immutable uint p = *factors++; // the radix
 595   immutable uint m = *factors++; // stage's fft length/p
 596   const(kiss_fft_cpx!S)* Fout_end = Fout+p*m;
 597
 598   version(kiss_fft_use_parallel) {
 599     // use threads at the top-level (not recursive)
 600     import std.parallelism;
 601     import std.range : iota;
 602     if (fstride == 1 && p <= 5) {
 603       // execute the p different work units in different threads
 604       foreach (uint k; parallel(iota(p))) {
 605         kf_work!S(Fout+k*m, f+fstride*in_stride*k, fstride*p, in_stride, factors, st);
 606       }
 607       // all threads have joined by this point
 608       switch (p) {
 609         case 2: kf_bfly2!S(Fout, fstride, st, m); break;
 610         case 3: kf_bfly3!S(Fout, fstride, st, m); break;
 611         case 4: kf_bfly4!S(Fout, fstride, st, m); break;
 612         case 5: kf_bfly5!S(Fout, fstride, st, m); break;
 613         default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
 614       }
 615       return;
 616     }
 617   }
 618
 619   if (m == 1) {
 620     do {
 621       *Fout = *f;
 622       f += fstride*in_stride;
 623     } while (++Fout !is Fout_end);
 624   } else {
 625     do {
 626       // recursive call:
 627       // DFT of size m*p performed by doing
 628       // p instances of smaller DFTs of size m,
 629       // each one takes a decimated version of the input
 630       kf_work!S(Fout, f, fstride*p, in_stride, factors, st);
 631       f += fstride*in_stride;
 632     } while ((Fout += m) !is Fout_end);
 633   }
 634
 635   Fout = Fout_beg;
 636
 637   // recombine the p smaller DFTs
 638   switch (p) {
 639     case 2: kf_bfly2!S(Fout, fstride, st, m); break;
 640     case 3: kf_bfly3!S(Fout, fstride, st, m); break;
 641     case 4: kf_bfly4!S(Fout, fstride, st, m); break;
 642     case 5: kf_bfly5!S(Fout, fstride, st, m); break;
 643     default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
 644   }
 645 }
 646
 647
 648 /* facbuf is populated by p1, m1, p2, m2, ...
 649  * where
 650  *   p[i]*m[i] = m[i-1]
 651  *   m0 = n
 652  */
 653 private void kf_factor(S) (uint n, uint* facbuf) {
 654   import std.math : floor, sqrt;
 655   immutable double floor_sqrt = floor(sqrt(cast(double)n));
 656   uint p = 4;
 657   // factor out powers of 4, powers of 2, then any remaining primes
 658   do {
 659     while (n%p) {
 660       switch (p) {
 661         case 4: p = 2; break;
 662         case 2: p = 3; break;
 663         default: p += 2; break;
 664       }
 665       if (p > floor_sqrt) p = n; // no more factors, skip to end
 666     }
 667     n /= p;
 668     *facbuf++ = p;
 669     *facbuf++ = n;
 670   } while (n > 1);
 671 }
 672
 673
 674 // ////////////////////////////////////////////////////////////////////////// //
 675 // kissfft_guts
 676 /*
 677   Explanation of macros dealing with complex math:
 678
 679    C_MUL(m,a,b)         : m = a*b
 680    C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 681    C_SUB( res, a,b)     : res = a-b
 682    C_SUBFROM( res , a)  : res -= a
 683    C_ADDTO( res , a)    : res += a
 684  */
 685 //enum S_MUL(string a, string b) = "(("~a~")*("~b~"))";
 686 enum C_MUL(string m, string a, string b) =
 687   "{ ("~m~").r = ("~a~").r*("~b~").r-("~a~").i*("~b~").i;
 688      ("~m~").i = ("~a~").r*("~b~").i+("~a~").i*("~b~").r; }";
 689 enum C_MULBYSCALAR(string c, string s) = " { ("~c~").r *= ("~s~"); ("~c~").i *= ("~s~"); }";
 690
 691 enum C_ADD(string res, string a, string b) = "{ ("~res~").r=("~a~").r+("~b~").r; ("~res~").i=("~a~").i+("~b~").i; }";
 692 enum C_SUB(string res, string a, string b) = "{ ("~res~").r=("~a~").r-("~b~").r; ("~res~").i=("~a~").i-("~b~").i; }";
 693 enum C_ADDTO(string res, string a) = "{ ("~res~").r += ("~a~").r; ("~res~").i += ("~a~").i; }";
 694 enum C_SUBFROM(string res, string a) = "{ ("~res~").r -= ("~a~").r; ("~res~").i -= ("~a~").i; }";
 695
 696
 697 //kiss_fft_scalar KISS_FFT_COS(T) (T phase) { import std.math : cos; return cos(phase); }
 698 //kiss_fft_scalar KISS_FFT_SIN(T) (T phase) { import std.math : sin; return sin(phase); }
 699 //kiss_fft_scalar HALF_OF (kiss_fft_scalar x) { return x*cast(kiss_fft_scalar)0.5; }
 700 enum HALF_OF(string x) = "(cast(kiss_fft_scalar!S)(("~x~")*cast(kiss_fft_scalar!S)0.5))";
 701
 702 //enum kf_cexp(string x, string phase) = "{ ("~x~").r = KISS_FFT_COS("~phase~"); ("~x~").i = KISS_FFT_SIN("~phase~"); }";
 703 enum kf_cexp(string x, string phase) = "{ import std.math : cos, sin; ("~x~").r = cos("~phase~"); ("~x~").i = sin("~phase~"); }";
 704
 705
 706 /+
 707 #ifdef KISS_FFT_USE_ALLOCA
 708 // define this to allow use of alloca instead of malloc for temporary buffers
 709 // Temporary buffers are used in two case:
 710 // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
 711 // 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
 712 #include <alloca.h>
 713 #define  KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
 714 #define  KISS_FFT_TMP_FREE(ptr)
 715 #else
 716 #define  KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
 717 #define  KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
 718 #endif
 719 +/
 720
 721
 722 // ////////////////////////////////////////////////////////////////////////// //
 723
 724 public alias kiss_fftnd_cfg_f = kiss_fftnd_state!float*; ///
 725 public alias kiss_fftnd_cfg_d = kiss_fftnd_state!double*; ///
 726
 727 public template kiss_fftnd_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 728   alias kiss_fftnd_cfg = kiss_fftnd_state!T*;
 729 }
 730
 731 struct kiss_fftnd_state(S)
 732 if (is(S == float) || is(S == double) || is(S == real))
 733 {
 734   uint dimprod; /* dimsum would be mighty tasty right now */
 735   uint ndims;
 736   uint* dims;
 737   kiss_fft_cfg!S* states; /* cfg states for each dimension */
 738   kiss_fft_cpx!S* tmpbuf; /* buffer capable of hold the entire input */
 739 }
 740
 741
 742 ///
 743 public kiss_fftnd_cfg!S kiss_fftnd_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
 744 if (is(S == float) || is(S == double) || is(S == real))
 745 {
 746   import core.stdc.stdlib : malloc;
 747
 748   if (dims.length < 1 || dims.length > ushort.max) return null;
 749   immutable uint ndims = cast(uint)dims.length;
 750   kiss_fftnd_cfg!S st = null;
 751   int dimprod = 1;
 752   usize memneeded = (kiss_fftnd_state!S).sizeof;
 753   ubyte* ptr;
 754
 755   foreach (immutable uint i; 0..ndims) {
 756     usize sublen = 0;
 757     kiss_fft_alloc!S(dims[i], dir, null, &sublen);
 758     memneeded += sublen; /* st.states[i] */
 759     dimprod *= dims[i];
 760   }
 761   memneeded += int.sizeof*ndims;/*  st.dims */
 762   memneeded += (void*).sizeof*ndims;/* st.states  */
 763   memneeded += (kiss_fft_cpx!S).sizeof*dimprod; /* st.tmpbuf */
 764
 765   if (lenmem is null) {
 766     /* allocate for the caller */
 767     st = cast(kiss_fftnd_cfg!S)malloc(memneeded);
 768   } else {
 769     /* initialize supplied buffer if big enough */
 770     if (*lenmem >= memneeded) st = cast(kiss_fftnd_cfg!S)mem;
 771     *lenmem = memneeded; /* tell caller how big struct is (or would be) */
 772   }
 773   if (st is null) return null; /* malloc failed or buffer too small */
 774
 775   st.dimprod = dimprod;
 776   st.ndims = ndims;
 777   ptr = cast(ubyte*)(st+1);
 778
 779   st.states = cast(kiss_fft_cfg!S*)ptr;
 780   ptr += (void*).sizeof*ndims;
 781
 782   st.dims = cast(uint*)ptr;
 783   ptr += int.sizeof*ndims;
 784
 785   st.tmpbuf = cast(kiss_fft_cpx!S*)ptr;
 786   ptr += (kiss_fft_cpx!S).sizeof*dimprod;
 787
 788   foreach (immutable uint i; 0..ndims) {
 789     usize len;
 790     st.dims[i] = dims[i];
 791     kiss_fft_alloc!S(st.dims[i], dir, null, &len);
 792     st.states[i] = kiss_fft_alloc!S(st.dims[i], dir, ptr, &len);
 793     ptr += len;
 794   }
 795
 796   return st;
 797 }
 798
 799
 800 ///
 801 public void kiss_fftnd_free(S) (ref kiss_fftnd_cfg!S p)
 802 if (is(S == float) || is(S == double) || is(S == real))
 803 {
 804   if (p !is null) {
 805     import core.stdc.stdlib : free;
 806     free(p);
 807     p = null;
 808   }
 809 }
 810
 811
 812 /**
 813 This works by tackling one dimension at a time.
 814
 815 In effect,
 816 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
 817 A Di-sized fft is taken of each column, transposing the matrix as it goes.
 818
 819 Here's a 3-d example:
 820 Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
 821 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
 822  [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
 823
 824 Stage 0 ( D=2): treat the buffer as a 2x12 matrix
 825  [ [a b ... k l]
 826    [m n ... w x] ]
 827
 828  FFT each column with size 2.
 829  Transpose the matrix at the same time using kiss_fft_stride.
 830
 831  [ [ a+m a-m ]
 832    [ b+n b-n]
 833    ...
 834    [ k+w k-w ]
 835    [ l+x l-x ] ]
 836
 837  Note fft([x y]) == [x+y x-y]
 838
 839 Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
 840  [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
 841    [ e+q e-q f+r f-r g+s g-s h+t h-t ]
 842    [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
 843
 844  And perform FFTs (size=3) on each of the columns as above, transposing
 845  the matrix as it goes.  The output of stage 1 is
 846      (Legend: ap = [ a+m e+q i+u ]
 847               am = [ a-m e-q i-u ] )
 848
 849  [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
 850    [ sum(am) fft(am)[0] fft(am)[1] ]
 851    [ sum(bp) fft(bp)[0] fft(bp)[1] ]
 852    [ sum(bm) fft(bm)[0] fft(bm)[1] ]
 853    [ sum(cp) fft(cp)[0] fft(cp)[1] ]
 854    [ sum(cm) fft(cm)[0] fft(cm)[1] ]
 855    [ sum(dp) fft(dp)[0] fft(dp)[1] ]
 856    [ sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
 857
 858 Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
 859  [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
 860    [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
 861    [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
 862    [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
 863
 864  Then FFTs each column, transposing as it goes.
 865
 866  The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
 867
 868  Note as a sanity check that the first element of the final
 869  stage's output (DC term) is
 870  sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
 871  , i.e. the summation of all 24 input elements.
 872
 873 */
 874 public void kiss_fftnd(S) (kiss_fftnd_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
 875 if (is(S == float) || is(S == double) || is(S == real))
 876 {
 877   import core.stdc.string : memcpy;
 878
 879   const(kiss_fft_cpx!S)* bufin = fin;
 880   kiss_fft_cpx!S* bufout;
 881
 882   /* arrange it so the last bufout == fout */
 883   if (st.ndims&1) {
 884     bufout = fout;
 885     if (fin is fout) {
 886       memcpy(st.tmpbuf, fin, (kiss_fft_cpx!S).sizeof*st.dimprod);
 887       bufin = st.tmpbuf;
 888     }
 889   } else {
 890     bufout = st.tmpbuf;
 891   }
 892
 893   foreach (immutable uint k; 0..st.ndims) {
 894     uint curdim = st.dims[k];
 895     uint stride = st.dimprod/curdim;
 896
 897     foreach (immutable uint i; 0..stride) {
 898       kiss_fft_stride!S(st.states[k], bufin+i, bufout+i*curdim, stride);
 899     }
 900
 901     /* toggle back and forth between the two buffers */
 902     if (bufout is st.tmpbuf) {
 903       bufout = fout;
 904       bufin = st.tmpbuf;
 905     } else {
 906       bufout = st.tmpbuf;
 907       bufin = fout;
 908     }
 909   }
 910 }
 911
 912
 913 // ////////////////////////////////////////////////////////////////////////// //
 914
 915 public alias kiss_fftndr_cfg_f = kiss_fftndr_state!float*; ///
 916 public alias kiss_fftndr_cfg_d = kiss_fftndr_state!double*; ///
 917
 918 public template kiss_fftndr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 919   alias kiss_fftndr_cfg = kiss_fftndr_state!T*;
 920 }
 921
 922 struct kiss_fftndr_state(S)
 923 if (is(S == float) || is(S == double) || is(S == real))
 924 {
 925   uint dimReal;
 926   uint dimOther;
 927   kiss_fftr_cfg!S cfg_r;
 928   kiss_fftnd_cfg!S cfg_nd;
 929   ubyte* tmpbuf;
 930 }
 931
 932
 933 int prod (const(uint)[] dims) pure nothrow @trusted @nogc {
 934   uint x = 1;
 935   uint ndims = cast(uint)dims.length;
 936   const(uint)* dp = dims.ptr;
 937   while (ndims--) x *= *dp++;
 938   return x;
 939 }
 940
 941 T MAX(T) (in T a, in T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
 942
 943
 944 /*
 945 dims[0] must be even
 946
 947 If you don't care to allocate space, use mem = lenmem = null
 948 */
 949 public kiss_fftndr_cfg!S kiss_fftndr_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
 950 if (is(S == float) || is(S == double) || is(S == real))
 951 {
 952   import core.stdc.stdlib : malloc, free;
 953   import core.stdc.string : memset;
 954
 955   if (dims.length < 1 || dims.length > ushort.max) return null;
 956   immutable uint ndims = cast(uint)dims.length;
 957
 958   kiss_fftndr_cfg!S st = null;
 959   usize nr = 0, nd = 0, ntmp = 0;
 960   uint dimReal = dims[ndims-1];
 961   uint dimOther = prod(dims[0..$-1]);
 962   usize memneeded;
 963
 964   kiss_fftr_alloc!S(dimReal, dir, null, &nr);
 965   kiss_fftnd_alloc!S(dims[0..$-1], dir, null, &nd);
 966   ntmp =
 967       MAX(2*dimOther, dimReal+2)*(kiss_fft_scalar!S).sizeof+ // freq buffer for one pass
 968       dimOther*(dimReal+2)*(kiss_fft_scalar!S).sizeof; // large enough to hold entire input in case of in-place
 969
 970   memneeded = (kiss_fftndr_state!S).sizeof+nr+nd+ntmp;
 971
 972   bool malloced = false;
 973   if (lenmem is null) {
 974     st = cast(kiss_fftndr_cfg!S)malloc(memneeded);
 975     malloced = true;
 976   } else {
 977     if (*lenmem >= memneeded) st = cast(kiss_fftndr_cfg!S)mem;
 978     *lenmem = memneeded;
 979   }
 980   if (st is null) return null;
 981
 982   memset(st, 0, memneeded);
 983
 984   st.dimReal = dimReal;
 985   st.dimOther = dimOther;
 986   st.cfg_r = kiss_fftr_alloc!S(dimReal, dir, st+1, &nr);
 987   if (st.cfg_r is null) { if (malloced) free(st); return null; }
 988   st.cfg_nd = kiss_fftnd_alloc!S(dims[0..$-1], dir, (cast(ubyte*)st.cfg_r)+nr, &nd);
 989   if (st.cfg_nd is null) { if (malloced) free(st); return null; }
 990   st.tmpbuf = cast(ubyte*)st.cfg_nd+nd;
 991   assert(st.tmpbuf !is null);
 992
 993   return st;
 994 }
 995
 996
 997 ///
 998 public void kiss_fftndr_free(S) (ref kiss_fftndr_cfg!S p)
 999 if (is(S == float) || is(S == double) || is(S == real))
1000 {
1001   if (p !is null) {
1002     import core.stdc.stdlib : free;
1003     free(p);
1004     p = null;
1005   }
1006 }
1007
1008
1009 /**
1010 input timedata has dims[0] X dims[1] X ... X  dims[ndims-1] scalar points
1011 output freqdata has dims[0] X dims[1] X ... X  dims[ndims-1]/2+1 complex points
1012 */
1013 public void kiss_fftndr(S) (kiss_fftndr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
1014 if (is(S == float) || is(S == double) || is(S == real))
1015 {
1016   uint dimReal = st.dimReal;
1017   uint dimOther = st.dimOther;
1018   uint nrbins = dimReal/2+1;
1019
1020   kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1021   kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1022   //assert(tmp1 !is null);
1023
1024   // timedata is N0 x N1 x ... x Nk real
1025
1026   // take a real chunk of data, fft it and place the output at correct intervals
1027   foreach (immutable uint k1; 0..dimOther) {
1028     kiss_fftr!S(st.cfg_r, timedata+k1*dimReal, tmp1); // tmp1 now holds nrbins complex points
1029     foreach (immutable uint k2; 0..nrbins) tmp2[k2*dimOther+k1] = tmp1[k2];
1030   }
1031
1032   foreach (immutable uint k2; 0..nrbins) {
1033     kiss_fftnd!S(st.cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
1034     foreach (immutable uint k1; 0..dimOther) freqdata[k1*nrbins+k2] = tmp1[k1];
1035   }
1036 }
1037
1038
1039 /**
1040 input and output dimensions are the exact opposite of kiss_fftndr
1041 */
1042 public void kiss_fftndri(S) (kiss_fftndr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
1043 if (is(S == float) || is(S == double) || is(S == real))
1044 {
1045   int dimReal = st.dimReal;
1046   int dimOther = st.dimOther;
1047   int nrbins = dimReal/2+1;
1048   kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1049   kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1050
1051   foreach (immutable uint k2; 0..nrbins) {
1052     foreach (immutable uint k1; 0..dimOther) tmp1[k1] = freqdata[k1*(nrbins)+k2];
1053     kiss_fftnd!S(st.cfg_nd, tmp1, tmp2+k2*dimOther);
1054   }
1055
1056   foreach (immutable uint k1; 0..dimOther) {
1057     foreach (immutable uint k2; 0..nrbins) tmp1[k2] = tmp2[k2*dimOther+k1];
1058     kiss_fftri!S(st.cfg_r, tmp1, timedata+k1*dimReal);
1059   }
1060 }