kissfft.d

   1 /*
   2  * Copyright (c) 2003-2010, Mark Borgerding
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are
   8  * met:
   9  *
  10  *  * Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  *
  13  *  * Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the
  16  *    distribution.
  17  *
  18  *  * Neither the author nor the names of any contributors may be used to
  19  *    endorse or promote products derived from this software without
  20  *    specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  26  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33  */
  34 /* Ported by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
  35  * Understanding is not required. Only obedience.
  36  *
  37  * This program is free software: you can redistribute it and/or modify
  38  * it under the terms of the GNU General Public License as published by
  39  * the Free Software Foundation, either version 3 of the License, or
  40  * (at your option) any later version.
  41  *
  42  * This program is distributed in the hope that it will be useful,
  43  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  44  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45  * GNU General Public License for more details.
  46  *
  47  * You should have received a copy of the GNU General Public License
  48  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  49  */
  50 module iv.kissfft;
  51 private nothrow @trusted @nogc:
  52
  53 version(aliced) {} else alias usize = size_t;
  54
  55 //version = kiss_fft_use_parallel; // absolutely no reason to
  56
  57
  58 // ////////////////////////////////////////////////////////////////////////// //
  59 ///
  60 public enum KissFFT { Forward, Inverse }
  61
  62 ///
  63 //public alias kiss_fft_scalar = float;
  64 //public alias kiss_fft_scalar = double;
  65
  66 ///
  67 public template kiss_fft_scalar(T) if (is(T == float) || is(T == double) || is(T == real)) {
  68   alias kiss_fft_scalar = T;
  69 }
  70
  71
  72 ///
  73 public template isGoodKissFFTScalar(T) {
  74   static if (is(T == float) || is(T == double) || is(T == real)) {
  75     enum isGoodKissFFTScalar = true;
  76   } else {
  77     enum isGoodKissFFTScalar = false;
  78   }
  79 }
  80
  81
  82 ///
  83 public template isKissFFTComplex(T) {
  84   static if (is(T : kiss_fft_cpx!S, S)) {
  85     enum isKissFFTComplex = true;
  86   } else {
  87     enum isKissFFTComplex = false;
  88   }
  89 }
  90
  91
  92 ///
  93 public template KissFFTScalar(T) {
  94   static if (is(T : kiss_fft_cpx!S, S)) {
  95     alias KissFFTScalar = S;
  96   } else {
  97     static assert(0, "not a KissFFT complex type");
  98   }
  99 }
 100
 101 ///
 102 public align(1) struct kiss_fft_cpx(T) if (is(T == float) || is(T == double) || is(T == real)) {
 103 align(1):
 104   alias Scalar = T;
 105   T r;
 106   T i;
 107
 108   T opIndex (uint n) const pure nothrow @trusted @nogc { pragma(inline, true); return (n ? i : r); }
 109   void opIndexAssign (in T v, uint n) nothrow @trusted @nogc { pragma(inline, true); if (n) i = v; else r = v; }
 110 }
 111
 112
 113 public alias kiss_fft_cpx_f = kiss_fft_cpx!float; ///
 114 public alias kiss_fft_cpx_d = kiss_fft_cpx!double; ///
 115
 116
 117 public alias kiss_fft_cfg_f = kiss_fft_state!float*; ///
 118 public alias kiss_fft_cfg_d = kiss_fft_state!double*; ///
 119
 120 public template kiss_fft_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 121   alias kiss_fft_cfg = kiss_fft_state!T*;
 122 }
 123
 124
 125 // ////////////////////////////////////////////////////////////////////////// //
 126 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
 127  *
 128  * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, KissFFT.Forward);`
 129  *
 130  * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
 131  *
 132  * If `lenmem` is `null`, then kiss_fft_alloc will allocate a cfg buffer using `malloc`.
 133  * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
 134  *
 135  * The state can be placed in a user supplied buffer `mem`:
 136  * If `lenmem` is not `null` and `mem` is not `null` and `*lenmem` is large enough,
 137  * then the function places the cfg in `mem` and the size used in `*lenmem`,
 138  * and returns `mem`.
 139  *
 140  * If `lenmem` is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
 141  * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
 142  */
 143 public kiss_fft_cfg!S kiss_fft_alloc(S) (uint nfft, KissFFT dir, void* mem=null, usize* lenmem=null)
 144 if (is(S == float) || is(S == double) || is(S == real))
 145 {
 146   kiss_fft_cfg!S st = null;
 147   usize memneeded = (kiss_fft_state!S).sizeof+(kiss_fft_cpx!S).sizeof*(nfft-1); // twiddle factors
 148
 149   if (lenmem is null) {
 150     import core.stdc.stdlib : malloc;
 151     st = cast(kiss_fft_cfg!S)malloc(memneeded);
 152   } else {
 153     if (mem !is null && *lenmem >= memneeded) st = cast(kiss_fft_cfg!S)mem;
 154     *lenmem = memneeded;
 155   }
 156
 157   if (st !is null) {
 158     st.nfft = nfft;
 159     st.inverse = (dir == KissFFT.Inverse);
 160     foreach (immutable uint i; 0..nfft) {
 161       enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
 162       immutable double phase = -2*pi*i/nfft*(st.inverse ? -1 : 1);
 163       //if (st.inverse) phase = -phase;
 164       mixin(kf_cexp!("st.twiddles.ptr+i", "phase"));
 165     }
 166     kf_factor!S(nfft, st.factors.ptr);
 167   }
 168
 169   return st;
 170 }
 171
 172
 173 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
 174  * buffer and can be simply free()d when no longer needed
 175  */
 176 public void kiss_fft_free(S) (ref kiss_fft_cfg!S p)
 177 if (is(S == float) || is(S == double) || is(S == real))
 178 {
 179   if (p !is null) {
 180     import core.stdc.stdlib : free;
 181     free(p);
 182     p = null;
 183   }
 184 }
 185
 186
 187 /** Perform an FFT on a complex input buffer.
 188  *
 189  * for a forward FFT,
 190  * fin should be  f[0] , f[1] , ... ,f[nfft-1]
 191  * fout will be   F[0] , F[1] , ... ,F[nfft-1]
 192  * Note that each element is complex and can be accessed like f[k].r and f[k].i
 193  */
 194 public void kiss_fft(S) (kiss_fft_cfg!S cfg, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
 195 if (is(S == float) || is(S == double) || is(S == real))
 196 {
 197   assert(cfg !is null);
 198   kiss_fft_stride!S(cfg, fin, fout, 1);
 199 }
 200
 201
 202 /** A more generic version of the above function. It reads its input from every Nth sample. */
 203 public void kiss_fft_stride(S) (kiss_fft_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout, uint in_stride)
 204 if (is(S == float) || is(S == double) || is(S == real))
 205 {
 206   import core.stdc.stdlib : alloca;
 207   assert(st !is null);
 208   if (fin is fout) {
 209     import core.stdc.string : memcpy;
 210     //NOTE: this is not really an in-place FFT algorithm.
 211     //It just performs an out-of-place FFT into a temp buffer
 212     //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
 213     kiss_fft_cpx!S* tmpbuf = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*st.nfft);
 214     kf_work!S(tmpbuf, fin, 1, in_stride, st.factors.ptr, st);
 215     memcpy(fout, tmpbuf, (kiss_fft_cpx!S).sizeof*st.nfft);
 216     //KISS_FFT_TMP_FREE(tmpbuf);
 217   } else {
 218     kf_work!S(fout, fin, 1, in_stride, st.factors.ptr, st);
 219   }
 220 }
 221
 222
 223 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
 224 public uint kiss_fft_next_fast_size (uint n) {
 225   for (;;) {
 226     uint m = n;
 227     while ((m%2) == 0) m /= 2;
 228     while ((m%3) == 0) m /= 3;
 229     while ((m%5) == 0) m /= 5;
 230     if (m <= 1) break; // n is completely factorable by twos, threes, and fives
 231     ++n;
 232   }
 233   return n;
 234 }
 235
 236
 237 // ////////////////////////////////////////////////////////////////////////// //
 238 // kissfftr
 239 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
 240
 241 public alias kiss_fftr_cfg_f = kiss_fftr_state!float*; ///
 242 public alias kiss_fftr_cfg_d = kiss_fftr_state!double*; ///
 243
 244 ///
 245 public template kiss_fftr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 246   alias kiss_fftr_cfg = kiss_fftr_state!T*;
 247 }
 248
 249 struct kiss_fftr_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
 250   kiss_fft_cfg!S substate;
 251   kiss_fft_cpx!S* tmpbuf;
 252   kiss_fft_cpx!S* super_twiddles;
 253 }
 254
 255
 256 /*
 257  ** nfft must be even
 258  *
 259  * If you don't care to allocate space, use mem = lenmem = null
 260  */
 261 public kiss_fftr_cfg!S kiss_fftr_alloc(S) (uint nfft, KissFFT dir, void* mem, usize* lenmem)
 262 if (is(S == float) || is(S == double) || is(S == real))
 263 {
 264   kiss_fftr_cfg!S st = null;
 265   usize subsize, memneeded;
 266
 267   if (nfft&1) return null; //assert(0, "real FFT optimization must be even");
 268   nfft >>= 1;
 269
 270   kiss_fft_alloc!S(nfft, dir, null, &subsize);
 271   memneeded = (kiss_fftr_state!S).sizeof+subsize+(kiss_fft_cpx!S).sizeof*(nfft*3/2);
 272
 273   if (lenmem is null) {
 274     import core.stdc.stdlib : malloc;
 275     st = cast(kiss_fftr_cfg!S)malloc(memneeded);
 276   } else {
 277     if (*lenmem >= memneeded) st = cast(kiss_fftr_cfg!S)mem;
 278     *lenmem = memneeded;
 279   }
 280   if (st is null) return null;
 281
 282   st.substate = cast(kiss_fft_cfg!S)(st+1); // just beyond kiss_fftr_state struct
 283   st.tmpbuf = cast(kiss_fft_cpx!S*)((cast(ubyte*)st.substate)+subsize);
 284   st.super_twiddles = st.tmpbuf+nfft;
 285   kiss_fft_alloc!S(nfft, dir, st.substate, &subsize);
 286
 287   foreach (immutable i; 0..nfft/2) {
 288     enum pi = 3.141592653589793238462643383279502884197169399375105820974944;
 289     immutable double phase = -pi*(cast(double)(i+1)/nfft+0.5)*(dir == KissFFT.Inverse ? -1 : 1);
 290     //if (dir == KissFFT.Inverse) phase = -phase;
 291     mixin(kf_cexp!("st.super_twiddles+i", "phase"));
 292   }
 293
 294   return st;
 295 }
 296
 297
 298 /** Use this to free `fftr` buffer. */
 299 public void kiss_fft_free(S) (ref kiss_fftr_cfg!S p)
 300 if (is(S == float) || is(S == double) || is(S == real))
 301 {
 302   if (p !is null) {
 303     import core.stdc.stdlib : free;
 304     free(p);
 305     p = null;
 306   }
 307 }
 308
 309
 310 /** input timedata has nfft scalar points
 311  * output freqdata has nfft/2+1 complex points
 312  */
 313 public void kiss_fftr(S) (kiss_fftr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
 314 if (is(S == float) || is(S == double) || is(S == real))
 315 {
 316   // input buffer timedata is stored row-wise
 317   uint k, ncfft;
 318   kiss_fft_cpx!S fpnk, fpk, f1k, f2k, tw, tdc;
 319
 320   if (st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
 321
 322   ncfft = st.substate.nfft;
 323
 324   // perform the parallel fft of two real signals packed in real,imag
 325   kiss_fft!S(st.substate, cast(const(kiss_fft_cpx!S)*)timedata, st.tmpbuf);
 326   /* The real part of the DC element of the frequency spectrum in st.tmpbuf
 327    * contains the sum of the even-numbered elements of the input time sequence
 328    * The imag part is the sum of the odd-numbered elements
 329    *
 330    * The sum of tdc.r and tdc.i is the sum of the input time sequence.
 331    *      yielding DC of input time sequence
 332    * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
 333    *      yielding Nyquist bin of input time sequence
 334    */
 335
 336   tdc.r = st.tmpbuf[0].r;
 337   tdc.i = st.tmpbuf[0].i;
 338   freqdata[0].r = tdc.r+tdc.i;
 339   freqdata[ncfft].r = tdc.r-tdc.i;
 340   freqdata[ncfft].i = freqdata[0].i = 0;
 341
 342   for (k = 1; k <= ncfft/2; ++k) {
 343     fpk = st.tmpbuf[k];
 344     fpnk.r = st.tmpbuf[ncfft-k].r;
 345     fpnk.i = -st.tmpbuf[ncfft-k].i;
 346
 347     mixin(C_ADD!("f1k", "fpk", "fpnk"));
 348     mixin(C_SUB!("f2k", "fpk", "fpnk"));
 349     mixin(C_MUL!("tw", "f2k", "st.super_twiddles[k-1]"));
 350
 351     freqdata[k].r = mixin(HALF_OF!"f1k.r+tw.r");
 352     freqdata[k].i = mixin(HALF_OF!"f1k.i+tw.i");
 353     freqdata[ncfft-k].r = mixin(HALF_OF!"f1k.r-tw.r");
 354     freqdata[ncfft-k].i = mixin(HALF_OF!"tw.i-f1k.i");
 355   }
 356 }
 357
 358
 359 /** input freqdata has  nfft/2+1 complex points
 360  * output timedata has nfft scalar points
 361  */
 362 public void kiss_fftri(S) (kiss_fftr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
 363 if (is(S == float) || is(S == double) || is(S == real))
 364 {
 365   // input buffer timedata is stored row-wise
 366   if (st.substate.inverse == 0) assert(0, "kiss fft usage error: improper alloc");
 367
 368   uint ncfft = st.substate.nfft;
 369
 370   st.tmpbuf[0].r = freqdata[0].r+freqdata[ncfft].r;
 371   st.tmpbuf[0].i = freqdata[0].r-freqdata[ncfft].r;
 372
 373   foreach (immutable uint k; 1..ncfft/2+1) {
 374     kiss_fft_cpx!S fnkc = void, fek = void, fok = void, tmp = void;
 375     kiss_fft_cpx!S fk = freqdata[k];
 376     fnkc.r = freqdata[ncfft-k].r;
 377     fnkc.i = -freqdata[ncfft-k].i;
 378     mixin(C_ADD!("fek", "fk", "fnkc"));
 379     mixin(C_SUB!("tmp", "fk", "fnkc"));
 380     mixin(C_MUL!("fok", "tmp", "st.super_twiddles[k-1]"));
 381     mixin(C_ADD!("st.tmpbuf[k]", "fek", "fok"));
 382     mixin(C_SUB!("st.tmpbuf[ncfft-k]", "fek", "fok"));
 383     st.tmpbuf[ncfft-k].i *= -1;
 384   }
 385   kiss_fft!S(st.substate, st.tmpbuf, cast(kiss_fft_cpx!S*)timedata);
 386 }
 387
 388
 389 /** for real ffts, we need an even size */
 390 public uint kiss_fftr_next_fast_size_real (uint n) {
 391   pragma(inline, true);
 392   return kiss_fft_next_fast_size((n+1)>>1)<<1;
 393 }
 394
 395
 396 // ////////////////////////////////////////////////////////////////////////// //
 397 enum MAXFACTORS = 32;
 398 /* e.g. an fft of length 128 has 4 factors
 399  as far as kissfft is concerned
 400  4*4*4*2
 401  */
 402
 403 struct kiss_fft_state(S) if (is(S == float) || is(S == double) || is(S == real)) {
 404   uint nfft;
 405   uint inverse;
 406   uint[2*MAXFACTORS] factors;
 407   kiss_fft_cpx!S[1] twiddles;
 408 }
 409
 410
 411 // ////////////////////////////////////////////////////////////////////////// //
 412 private void kf_bfly2(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, int m) {
 413   kiss_fft_cpx!S* Fout2;
 414   const(kiss_fft_cpx!S)* tw1 = st.twiddles.ptr;
 415   kiss_fft_cpx!S t;
 416   Fout2 = Fout+m;
 417   do {
 418     mixin(C_MUL!("t", "*Fout2", "*tw1"));
 419     tw1 += fstride;
 420     mixin(C_SUB!("*Fout2", "*Fout", "t"));
 421     mixin(C_ADDTO!("*Fout", "t"));
 422     ++Fout2;
 423     ++Fout;
 424   } while (--m);
 425 }
 426
 427
 428 private void kf_bfly4(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, in usize m) {
 429   const(kiss_fft_cpx!S)* tw1, tw2, tw3;
 430   kiss_fft_cpx!S[6] scratch = void;
 431   usize k = m;
 432   immutable usize m2 = 2*m;
 433   immutable usize m3 = 3*m;
 434   tw3 = tw2 = tw1 = st.twiddles.ptr;
 435   do {
 436     mixin(C_MUL!("scratch[0]", "Fout[m]", "*tw1"));
 437     mixin(C_MUL!("scratch[1]", "Fout[m2]", "*tw2"));
 438     mixin(C_MUL!("scratch[2]", "Fout[m3]", "*tw3"));
 439
 440     mixin(C_SUB!("scratch[5]", "*Fout", "scratch[1]"));
 441     mixin(C_ADDTO!("*Fout", "scratch[1]"));
 442     mixin(C_ADD!("scratch[3]", "scratch[0]", "scratch[2]"));
 443     mixin(C_SUB!("scratch[4]", "scratch[0]", "scratch[2]"));
 444     mixin(C_SUB!("Fout[m2]", "*Fout", "scratch[3]"));
 445     tw1 += fstride;
 446     tw2 += fstride*2;
 447     tw3 += fstride*3;
 448     mixin(C_ADDTO!("*Fout", "scratch[3]"));
 449
 450     if (st.inverse) {
 451       Fout[m].r = scratch[5].r-scratch[4].i;
 452       Fout[m].i = scratch[5].i+scratch[4].r;
 453       Fout[m3].r = scratch[5].r+scratch[4].i;
 454       Fout[m3].i = scratch[5].i-scratch[4].r;
 455     } else {
 456       Fout[m].r = scratch[5].r+scratch[4].i;
 457       Fout[m].i = scratch[5].i-scratch[4].r;
 458       Fout[m3].r = scratch[5].r-scratch[4].i;
 459       Fout[m3].i = scratch[5].i+scratch[4].r;
 460     }
 461     ++Fout;
 462   } while (--k);
 463 }
 464
 465
 466 private void kf_bfly3(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, usize m) {
 467   usize k = m;
 468   immutable usize m2 = 2*m;
 469   const(kiss_fft_cpx!S)* tw1, tw2;
 470   kiss_fft_cpx!S[5] scratch = void;
 471   kiss_fft_cpx!S epi3;
 472   epi3 = st.twiddles[fstride*m];
 473   tw1 = tw2 = st.twiddles.ptr;
 474   do {
 475     mixin(C_MUL!("scratch[1]", "Fout[m]", "*tw1"));
 476     mixin(C_MUL!("scratch[2]", "Fout[m2]", "*tw2"));
 477
 478     mixin(C_ADD!("scratch[3]", "scratch[1]", "scratch[2]"));
 479     mixin(C_SUB!("scratch[0]", "scratch[1]", "scratch[2]"));
 480     tw1 += fstride;
 481     tw2 += fstride*2;
 482
 483     Fout[m].r = Fout.r-mixin(HALF_OF!"scratch[3].r");
 484     Fout[m].i = Fout.i-mixin(HALF_OF!"scratch[3].i");
 485
 486     mixin(C_MULBYSCALAR!("scratch[0]", "epi3.i"));
 487
 488     mixin(C_ADDTO!("*Fout", "scratch[3]"));
 489
 490     Fout[m2].r = Fout[m].r+scratch[0].i;
 491     Fout[m2].i = Fout[m].i-scratch[0].r;
 492
 493     Fout[m].r -= scratch[0].i;
 494     Fout[m].i += scratch[0].r;
 495
 496     ++Fout;
 497   } while (--k);
 498 }
 499
 500
 501 private void kf_bfly5(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m) {
 502   kiss_fft_cpx!S* Fout0, Fout1, Fout2, Fout3, Fout4;
 503   kiss_fft_cpx!S[13] scratch = void;
 504   const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
 505   const(kiss_fft_cpx!S)* tw;
 506   kiss_fft_cpx!S ya = twiddles[fstride*m];
 507   kiss_fft_cpx!S yb = twiddles[fstride*2*m];
 508
 509   Fout0 = Fout;
 510   Fout1 = Fout0+m;
 511   Fout2 = Fout0+2*m;
 512   Fout3 = Fout0+3*m;
 513   Fout4 = Fout0+4*m;
 514
 515   tw = st.twiddles.ptr;
 516   foreach (immutable uint u; 0..m) {
 517     scratch[0] = *Fout0;
 518
 519     mixin(C_MUL!("scratch[1]", "*Fout1", "tw[u*fstride]"));
 520     mixin(C_MUL!("scratch[2]", "*Fout2", "tw[2*u*fstride]"));
 521     mixin(C_MUL!("scratch[3]", "*Fout3", "tw[3*u*fstride]"));
 522     mixin(C_MUL!("scratch[4]", "*Fout4", "tw[4*u*fstride]"));
 523
 524     mixin(C_ADD!("scratch[7]", "scratch[1]", "scratch[4]"));
 525     mixin(C_SUB!("scratch[10]", "scratch[1]", "scratch[4]"));
 526     mixin(C_ADD!("scratch[8]", "scratch[2]", "scratch[3]"));
 527     mixin(C_SUB!("scratch[9]", "scratch[2]", "scratch[3]"));
 528
 529     Fout0.r += scratch[7].r+scratch[8].r;
 530     Fout0.i += scratch[7].i+scratch[8].i;
 531
 532     scratch[5].r = scratch[0].r+scratch[7].r*ya.r+scratch[8].r*yb.r;
 533     scratch[5].i = scratch[0].i+scratch[7].i*ya.r+scratch[8].i*yb.r;
 534
 535     scratch[6].r =  scratch[10].i*ya.i+scratch[9].i*yb.i;
 536     scratch[6].i = -scratch[10].r*ya.i-scratch[9].r*yb.i;
 537
 538     mixin(C_SUB!("*Fout1", "scratch[5]", "scratch[6]"));
 539     mixin(C_ADD!("*Fout4", "scratch[5]", "scratch[6]"));
 540
 541     scratch[11].r = scratch[0].r+scratch[7].r*yb.r+scratch[8].r*ya.r;
 542     scratch[11].i = scratch[0].i+scratch[7].i*yb.r+scratch[8].i*ya.r;
 543     scratch[12].r = -scratch[10].i*yb.i+scratch[9].i*ya.i;
 544     scratch[12].i =  scratch[10].r*yb.i-scratch[9].r*ya.i;
 545
 546     mixin(C_ADD!("*Fout2", "scratch[11]", "scratch[12]"));
 547     mixin(C_SUB!("*Fout3", "scratch[11]", "scratch[12]"));
 548
 549     ++Fout0;
 550     ++Fout1;
 551     ++Fout2;
 552     ++Fout3;
 553     ++Fout4;
 554   }
 555 }
 556
 557 // perform the butterfly for one stage of a mixed radix FFT
 558 private void kf_bfly_generic(S) (kiss_fft_cpx!S* Fout, in usize fstride, const(kiss_fft_cfg!S) st, uint m, uint p) {
 559   import core.stdc.stdlib : alloca;
 560
 561   //uint q1, q;
 562   const(kiss_fft_cpx!S)* twiddles = st.twiddles.ptr;
 563   kiss_fft_cpx!S t;
 564   uint Norig = st.nfft;
 565
 566   //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
 567   kiss_fft_cpx!S* scratch = cast(kiss_fft_cpx!S*)alloca((kiss_fft_cpx!S).sizeof*p);
 568
 569   foreach (immutable uint u; 0..m) {
 570     uint k = u;
 571     foreach (immutable uint q1; 0..p) {
 572       scratch[q1] = Fout[k];
 573       k += m;
 574     }
 575
 576     k = u;
 577     foreach (immutable uint q1; 0..p) {
 578       uint twidx = 0;
 579       Fout[k] = scratch[0];
 580       foreach (immutable uint q; 1..p) {
 581         twidx += fstride*k;
 582         if (twidx >= Norig) twidx -= Norig;
 583         mixin(C_MUL!("t", "scratch[q]", "twiddles[twidx]"));
 584         mixin(C_ADDTO!("Fout[k]", "t"));
 585       }
 586       k += m;
 587     }
 588   }
 589   //KISS_FFT_TMP_FREE(scratch);
 590 }
 591
 592
 593 private void kf_work(S) (kiss_fft_cpx!S* Fout, const(kiss_fft_cpx!S)* f, in usize fstride, uint in_stride, uint* factors, const(kiss_fft_cfg!S) st) {
 594   kiss_fft_cpx!S* Fout_beg = Fout;
 595   immutable uint p = *factors++; // the radix
 596   immutable uint m = *factors++; // stage's fft length/p
 597   const(kiss_fft_cpx!S)* Fout_end = Fout+p*m;
 598
 599   version(kiss_fft_use_parallel) {
 600     // use threads at the top-level (not recursive)
 601     import std.parallelism;
 602     import std.range : iota;
 603     if (fstride == 1 && p <= 5) {
 604       // execute the p different work units in different threads
 605       foreach (uint k; parallel(iota(p))) {
 606         kf_work!S(Fout+k*m, f+fstride*in_stride*k, fstride*p, in_stride, factors, st);
 607       }
 608       // all threads have joined by this point
 609       switch (p) {
 610         case 2: kf_bfly2!S(Fout, fstride, st, m); break;
 611         case 3: kf_bfly3!S(Fout, fstride, st, m); break;
 612         case 4: kf_bfly4!S(Fout, fstride, st, m); break;
 613         case 5: kf_bfly5!S(Fout, fstride, st, m); break;
 614         default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
 615       }
 616       return;
 617     }
 618   }
 619
 620   if (m == 1) {
 621     do {
 622       *Fout = *f;
 623       f += fstride*in_stride;
 624     } while (++Fout !is Fout_end);
 625   } else {
 626     do {
 627       // recursive call:
 628       // DFT of size m*p performed by doing
 629       // p instances of smaller DFTs of size m,
 630       // each one takes a decimated version of the input
 631       kf_work!S(Fout, f, fstride*p, in_stride, factors, st);
 632       f += fstride*in_stride;
 633     } while ((Fout += m) !is Fout_end);
 634   }
 635
 636   Fout = Fout_beg;
 637
 638   // recombine the p smaller DFTs
 639   switch (p) {
 640     case 2: kf_bfly2!S(Fout, fstride, st, m); break;
 641     case 3: kf_bfly3!S(Fout, fstride, st, m); break;
 642     case 4: kf_bfly4!S(Fout, fstride, st, m); break;
 643     case 5: kf_bfly5!S(Fout, fstride, st, m); break;
 644     default: kf_bfly_generic!S(Fout, fstride, st, m, p); break;
 645   }
 646 }
 647
 648
 649 /* facbuf is populated by p1, m1, p2, m2, ...
 650  * where
 651  *   p[i]*m[i] = m[i-1]
 652  *   m0 = n
 653  */
 654 private void kf_factor(S) (uint n, uint* facbuf) {
 655   import std.math : floor, sqrt;
 656   immutable double floor_sqrt = floor(sqrt(cast(double)n));
 657   uint p = 4;
 658   // factor out powers of 4, powers of 2, then any remaining primes
 659   do {
 660     while (n%p) {
 661       switch (p) {
 662         case 4: p = 2; break;
 663         case 2: p = 3; break;
 664         default: p += 2; break;
 665       }
 666       if (p > floor_sqrt) p = n; // no more factors, skip to end
 667     }
 668     n /= p;
 669     *facbuf++ = p;
 670     *facbuf++ = n;
 671   } while (n > 1);
 672 }
 673
 674
 675 // ////////////////////////////////////////////////////////////////////////// //
 676 // kissfft_guts
 677 /*
 678   Explanation of macros dealing with complex math:
 679
 680    C_MUL(m,a,b)         : m = a*b
 681    C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
 682    C_SUB( res, a,b)     : res = a-b
 683    C_SUBFROM( res , a)  : res -= a
 684    C_ADDTO( res , a)    : res += a
 685  */
 686 //enum S_MUL(string a, string b) = "(("~a~")*("~b~"))";
 687 enum C_MUL(string m, string a, string b) =
 688   "{ ("~m~").r = ("~a~").r*("~b~").r-("~a~").i*("~b~").i;
 689      ("~m~").i = ("~a~").r*("~b~").i+("~a~").i*("~b~").r; }";
 690 enum C_MULBYSCALAR(string c, string s) = " { ("~c~").r *= ("~s~"); ("~c~").i *= ("~s~"); }";
 691
 692 enum C_ADD(string res, string a, string b) = "{ ("~res~").r=("~a~").r+("~b~").r; ("~res~").i=("~a~").i+("~b~").i; }";
 693 enum C_SUB(string res, string a, string b) = "{ ("~res~").r=("~a~").r-("~b~").r; ("~res~").i=("~a~").i-("~b~").i; }";
 694 enum C_ADDTO(string res, string a) = "{ ("~res~").r += ("~a~").r; ("~res~").i += ("~a~").i; }";
 695 enum C_SUBFROM(string res, string a) = "{ ("~res~").r -= ("~a~").r; ("~res~").i -= ("~a~").i; }";
 696
 697
 698 //kiss_fft_scalar KISS_FFT_COS(T) (T phase) { import std.math : cos; return cos(phase); }
 699 //kiss_fft_scalar KISS_FFT_SIN(T) (T phase) { import std.math : sin; return sin(phase); }
 700 //kiss_fft_scalar HALF_OF (kiss_fft_scalar x) { return x*cast(kiss_fft_scalar)0.5; }
 701 enum HALF_OF(string x) = "(cast(kiss_fft_scalar!S)(("~x~")*cast(kiss_fft_scalar!S)0.5))";
 702
 703 //enum kf_cexp(string x, string phase) = "{ ("~x~").r = KISS_FFT_COS("~phase~"); ("~x~").i = KISS_FFT_SIN("~phase~"); }";
 704 enum kf_cexp(string x, string phase) = "{ import std.math : cos, sin; ("~x~").r = cos("~phase~"); ("~x~").i = sin("~phase~"); }";
 705
 706
 707 /+
 708 #ifdef KISS_FFT_USE_ALLOCA
 709 // define this to allow use of alloca instead of malloc for temporary buffers
 710 // Temporary buffers are used in two case:
 711 // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
 712 // 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
 713 #include <alloca.h>
 714 #define  KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
 715 #define  KISS_FFT_TMP_FREE(ptr)
 716 #else
 717 #define  KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
 718 #define  KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
 719 #endif
 720 +/
 721
 722
 723 // ////////////////////////////////////////////////////////////////////////// //
 724
 725 public alias kiss_fftnd_cfg_f = kiss_fftnd_state!float*; ///
 726 public alias kiss_fftnd_cfg_d = kiss_fftnd_state!double*; ///
 727
 728 public template kiss_fftnd_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 729   alias kiss_fftnd_cfg = kiss_fftnd_state!T*;
 730 }
 731
 732 struct kiss_fftnd_state(S)
 733 if (is(S == float) || is(S == double) || is(S == real))
 734 {
 735   uint dimprod; /* dimsum would be mighty tasty right now */
 736   uint ndims;
 737   uint* dims;
 738   kiss_fft_cfg!S* states; /* cfg states for each dimension */
 739   kiss_fft_cpx!S* tmpbuf; /* buffer capable of hold the entire input */
 740 }
 741
 742
 743 ///
 744 public kiss_fftnd_cfg!S kiss_fftnd_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
 745 if (is(S == float) || is(S == double) || is(S == real))
 746 {
 747   import core.stdc.stdlib : malloc;
 748
 749   if (dims.length < 1 || dims.length > ushort.max) return null;
 750   immutable uint ndims = cast(uint)dims.length;
 751   kiss_fftnd_cfg!S st = null;
 752   int dimprod = 1;
 753   usize memneeded = (kiss_fftnd_state!S).sizeof;
 754   ubyte* ptr;
 755
 756   foreach (immutable uint i; 0..ndims) {
 757     usize sublen = 0;
 758     kiss_fft_alloc!S(dims[i], dir, null, &sublen);
 759     memneeded += sublen; /* st.states[i] */
 760     dimprod *= dims[i];
 761   }
 762   memneeded += int.sizeof*ndims;/*  st.dims */
 763   memneeded += (void*).sizeof*ndims;/* st.states  */
 764   memneeded += (kiss_fft_cpx!S).sizeof*dimprod; /* st.tmpbuf */
 765
 766   if (lenmem is null) {
 767     /* allocate for the caller */
 768     st = cast(kiss_fftnd_cfg!S)malloc(memneeded);
 769   } else {
 770     /* initialize supplied buffer if big enough */
 771     if (*lenmem >= memneeded) st = cast(kiss_fftnd_cfg!S)mem;
 772     *lenmem = memneeded; /* tell caller how big struct is (or would be) */
 773   }
 774   if (st is null) return null; /* malloc failed or buffer too small */
 775
 776   st.dimprod = dimprod;
 777   st.ndims = ndims;
 778   ptr = cast(ubyte*)(st+1);
 779
 780   st.states = cast(kiss_fft_cfg!S*)ptr;
 781   ptr += (void*).sizeof*ndims;
 782
 783   st.dims = cast(uint*)ptr;
 784   ptr += int.sizeof*ndims;
 785
 786   st.tmpbuf = cast(kiss_fft_cpx!S*)ptr;
 787   ptr += (kiss_fft_cpx!S).sizeof*dimprod;
 788
 789   foreach (immutable uint i; 0..ndims) {
 790     usize len;
 791     st.dims[i] = dims[i];
 792     kiss_fft_alloc!S(st.dims[i], dir, null, &len);
 793     st.states[i] = kiss_fft_alloc!S(st.dims[i], dir, ptr, &len);
 794     ptr += len;
 795   }
 796
 797   return st;
 798 }
 799
 800
 801 ///
 802 public void kiss_fftnd_free(S) (ref kiss_fftnd_cfg!S p)
 803 if (is(S == float) || is(S == double) || is(S == real))
 804 {
 805   if (p !is null) {
 806     import core.stdc.stdlib : free;
 807     free(p);
 808     p = null;
 809   }
 810 }
 811
 812
 813 /**
 814 This works by tackling one dimension at a time.
 815
 816 In effect,
 817 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
 818 A Di-sized fft is taken of each column, transposing the matrix as it goes.
 819
 820 Here's a 3-d example:
 821 Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
 822 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
 823  [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
 824
 825 Stage 0 ( D=2): treat the buffer as a 2x12 matrix
 826  [ [a b ... k l]
 827    [m n ... w x] ]
 828
 829  FFT each column with size 2.
 830  Transpose the matrix at the same time using kiss_fft_stride.
 831
 832  [ [ a+m a-m ]
 833    [ b+n b-n]
 834    ...
 835    [ k+w k-w ]
 836    [ l+x l-x ] ]
 837
 838  Note fft([x y]) == [x+y x-y]
 839
 840 Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
 841  [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
 842    [ e+q e-q f+r f-r g+s g-s h+t h-t ]
 843    [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
 844
 845  And perform FFTs (size=3) on each of the columns as above, transposing
 846  the matrix as it goes.  The output of stage 1 is
 847      (Legend: ap = [ a+m e+q i+u ]
 848               am = [ a-m e-q i-u ] )
 849
 850  [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
 851    [ sum(am) fft(am)[0] fft(am)[1] ]
 852    [ sum(bp) fft(bp)[0] fft(bp)[1] ]
 853    [ sum(bm) fft(bm)[0] fft(bm)[1] ]
 854    [ sum(cp) fft(cp)[0] fft(cp)[1] ]
 855    [ sum(cm) fft(cm)[0] fft(cm)[1] ]
 856    [ sum(dp) fft(dp)[0] fft(dp)[1] ]
 857    [ sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
 858
 859 Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
 860  [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
 861    [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
 862    [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
 863    [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ]  ]
 864
 865  Then FFTs each column, transposing as it goes.
 866
 867  The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
 868
 869  Note as a sanity check that the first element of the final
 870  stage's output (DC term) is
 871  sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
 872  , i.e. the summation of all 24 input elements.
 873
 874 */
 875 public void kiss_fftnd(S) (kiss_fftnd_cfg!S st, const(kiss_fft_cpx!S)* fin, kiss_fft_cpx!S* fout)
 876 if (is(S == float) || is(S == double) || is(S == real))
 877 {
 878   import core.stdc.string : memcpy;
 879
 880   const(kiss_fft_cpx!S)* bufin = fin;
 881   kiss_fft_cpx!S* bufout;
 882
 883   /* arrange it so the last bufout == fout */
 884   if (st.ndims&1) {
 885     bufout = fout;
 886     if (fin is fout) {
 887       memcpy(st.tmpbuf, fin, (kiss_fft_cpx!S).sizeof*st.dimprod);
 888       bufin = st.tmpbuf;
 889     }
 890   } else {
 891     bufout = st.tmpbuf;
 892   }
 893
 894   foreach (immutable uint k; 0..st.ndims) {
 895     uint curdim = st.dims[k];
 896     uint stride = st.dimprod/curdim;
 897
 898     foreach (immutable uint i; 0..stride) {
 899       kiss_fft_stride!S(st.states[k], bufin+i, bufout+i*curdim, stride);
 900     }
 901
 902     /* toggle back and forth between the two buffers */
 903     if (bufout is st.tmpbuf) {
 904       bufout = fout;
 905       bufin = st.tmpbuf;
 906     } else {
 907       bufout = st.tmpbuf;
 908       bufin = fout;
 909     }
 910   }
 911 }
 912
 913
 914 // ////////////////////////////////////////////////////////////////////////// //
 915
 916 public alias kiss_fftndr_cfg_f = kiss_fftndr_state!float*; ///
 917 public alias kiss_fftndr_cfg_d = kiss_fftndr_state!double*; ///
 918
 919 public template kiss_fftndr_cfg(T) if (is(T == float) || is(T == double) || is(T == real)) {
 920   alias kiss_fftndr_cfg = kiss_fftndr_state!T*;
 921 }
 922
 923 struct kiss_fftndr_state(S)
 924 if (is(S == float) || is(S == double) || is(S == real))
 925 {
 926   uint dimReal;
 927   uint dimOther;
 928   kiss_fftr_cfg!S cfg_r;
 929   kiss_fftnd_cfg!S cfg_nd;
 930   ubyte* tmpbuf;
 931 }
 932
 933
 934 int prod (const(uint)[] dims) pure nothrow @trusted @nogc {
 935   uint x = 1;
 936   uint ndims = cast(uint)dims.length;
 937   const(uint)* dp = dims.ptr;
 938   while (ndims--) x *= *dp++;
 939   return x;
 940 }
 941
 942 T MAX(T) (in T a, in T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
 943
 944
 945 /*
 946 dims[0] must be even
 947
 948 If you don't care to allocate space, use mem = lenmem = null
 949 */
 950 public kiss_fftndr_cfg!S kiss_fftndr_alloc(S) (const(uint)[] dims, KissFFT dir, void* mem=null, usize* lenmem=null)
 951 if (is(S == float) || is(S == double) || is(S == real))
 952 {
 953   import core.stdc.stdlib : malloc, free;
 954   import core.stdc.string : memset;
 955
 956   if (dims.length < 1 || dims.length > ushort.max) return null;
 957   immutable uint ndims = cast(uint)dims.length;
 958
 959   kiss_fftndr_cfg!S st = null;
 960   usize nr = 0, nd = 0, ntmp = 0;
 961   uint dimReal = dims[ndims-1];
 962   uint dimOther = prod(dims[0..$-1]);
 963   usize memneeded;
 964
 965   kiss_fftr_alloc!S(dimReal, dir, null, &nr);
 966   kiss_fftnd_alloc!S(dims[0..$-1], dir, null, &nd);
 967   ntmp =
 968       MAX(2*dimOther, dimReal+2)*(kiss_fft_scalar!S).sizeof+ // freq buffer for one pass
 969       dimOther*(dimReal+2)*(kiss_fft_scalar!S).sizeof; // large enough to hold entire input in case of in-place
 970
 971   memneeded = (kiss_fftndr_state!S).sizeof+nr+nd+ntmp;
 972
 973   bool malloced = false;
 974   if (lenmem is null) {
 975     st = cast(kiss_fftndr_cfg!S)malloc(memneeded);
 976     malloced = true;
 977   } else {
 978     if (*lenmem >= memneeded) st = cast(kiss_fftndr_cfg!S)mem;
 979     *lenmem = memneeded;
 980   }
 981   if (st is null) return null;
 982
 983   memset(st, 0, memneeded);
 984
 985   st.dimReal = dimReal;
 986   st.dimOther = dimOther;
 987   st.cfg_r = kiss_fftr_alloc!S(dimReal, dir, st+1, &nr);
 988   if (st.cfg_r is null) { if (malloced) free(st); return null; }
 989   st.cfg_nd = kiss_fftnd_alloc!S(dims[0..$-1], dir, (cast(ubyte*)st.cfg_r)+nr, &nd);
 990   if (st.cfg_nd is null) { if (malloced) free(st); return null; }
 991   st.tmpbuf = cast(ubyte*)st.cfg_nd+nd;
 992   assert(st.tmpbuf !is null);
 993
 994   return st;
 995 }
 996
 997
 998 ///
 999 public void kiss_fftndr_free(S) (ref kiss_fftndr_cfg!S p)
1000 if (is(S == float) || is(S == double) || is(S == real))
1001 {
1002   if (p !is null) {
1003     import core.stdc.stdlib : free;
1004     free(p);
1005     p = null;
1006   }
1007 }
1008
1009
1010 /**
1011 input timedata has dims[0] X dims[1] X ... X  dims[ndims-1] scalar points
1012 output freqdata has dims[0] X dims[1] X ... X  dims[ndims-1]/2+1 complex points
1013 */
1014 public void kiss_fftndr(S) (kiss_fftndr_cfg!S st, const(kiss_fft_scalar!S)* timedata, kiss_fft_cpx!S* freqdata)
1015 if (is(S == float) || is(S == double) || is(S == real))
1016 {
1017   uint dimReal = st.dimReal;
1018   uint dimOther = st.dimOther;
1019   uint nrbins = dimReal/2+1;
1020
1021   kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1022   kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1023   //assert(tmp1 !is null);
1024
1025   // timedata is N0 x N1 x ... x Nk real
1026
1027   // take a real chunk of data, fft it and place the output at correct intervals
1028   foreach (immutable uint k1; 0..dimOther) {
1029     kiss_fftr!S(st.cfg_r, timedata+k1*dimReal, tmp1); // tmp1 now holds nrbins complex points
1030     foreach (immutable uint k2; 0..nrbins) tmp2[k2*dimOther+k1] = tmp1[k2];
1031   }
1032
1033   foreach (immutable uint k2; 0..nrbins) {
1034     kiss_fftnd!S(st.cfg_nd, tmp2+k2*dimOther, tmp1); // tmp1 now holds dimOther complex points
1035     foreach (immutable uint k1; 0..dimOther) freqdata[k1*nrbins+k2] = tmp1[k1];
1036   }
1037 }
1038
1039
1040 /**
1041 input and output dimensions are the exact opposite of kiss_fftndr
1042 */
1043 public void kiss_fftndri(S) (kiss_fftndr_cfg!S st, const(kiss_fft_cpx!S)* freqdata, kiss_fft_scalar!S* timedata)
1044 if (is(S == float) || is(S == double) || is(S == real))
1045 {
1046   int dimReal = st.dimReal;
1047   int dimOther = st.dimOther;
1048   int nrbins = dimReal/2+1;
1049   kiss_fft_cpx!S* tmp1 = cast(kiss_fft_cpx!S*)st.tmpbuf;
1050   kiss_fft_cpx!S* tmp2 = tmp1+MAX(nrbins, dimOther);
1051
1052   foreach (immutable uint k2; 0..nrbins) {
1053     foreach (immutable uint k1; 0..dimOther) tmp1[k1] = freqdata[k1*(nrbins)+k2];
1054     kiss_fftnd!S(st.cfg_nd, tmp1, tmp2+k2*dimOther);
1055   }
1056
1057   foreach (immutable uint k1; 0..dimOther) {
1058     foreach (immutable uint k2; 0..nrbins) tmp1[k2] = tmp2[k2*dimOther+k1];
1059     kiss_fftri!S(st.cfg_r, tmp1, timedata+k1*dimReal);
1060   }
1061 }