mbandeq.d

   1 // code from LADSPA plugins project: http://plugin.org.uk/
   2 // GNU GPLv3
   3 module iv.mbandeq /*is aliced*/;
   4 private:
   5 import iv.alice;
   6 nothrow @trusted @nogc:
   7
   8 // ////////////////////////////////////////////////////////////////////////// //
   9 //version = FFTW3;
  10
  11 version = mbandeq_extended;
  12 //version = mbandeq_normal;
  13 //version = mbandeq_winamp;
  14
  15
  16 version(FFTW3) {
  17   pragma(lib, "fftw3f");
  18   alias fft_plan = void*;
  19   alias fftw_real = float;
  20   extern(C) nothrow @trusted @nogc {
  21     enum FFTW_MEASURE = 0;
  22     enum { FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2 }
  23     fft_plan fftwf_plan_r2r_1d (int n, fftw_real* inp, fftw_real* outp, usize kind, uint flags);
  24     void fftwf_execute (fft_plan plan);
  25   }
  26 } else {
  27   //import kissfft;
  28   alias fft_plan = kiss_fftr_cfg;
  29   alias fftw_real = kiss_fft_scalar;
  30 }
  31
  32
  33 // ////////////////////////////////////////////////////////////////////////// //
  34 public struct MBandEq {
  35 private:
  36   enum FFT_LENGTH = 1024;
  37   enum OVER_SAMP = 4;
  38   enum STEP_SIZE = FFT_LENGTH/OVER_SAMP;
  39
  40 public:
  41   version(mbandeq_extended) {
  42     enum Bands = 30;
  43   } else version(mbandeq_normal) {
  44     enum Bands = 15;
  45   } else {
  46     enum Bands = 10;
  47   }
  48   enum Latency = FFT_LENGTH-STEP_SIZE;
  49
  50 public:
  51   int[Bands] bands = 0; // [-70..30)
  52   int* binBase;
  53   float* binDelta;
  54   fftw_real* comp;
  55   float* dbtable;
  56   uint fifoPos;
  57   float* inFifo;
  58   float* outAccum;
  59   float* outFifo;
  60   fft_plan planRC;
  61   fft_plan planCR;
  62   fftw_real* realx;
  63   float* window;
  64   int smpRate;
  65
  66   this (int asrate) {
  67     setup(asrate);
  68   }
  69
  70   ~this () {
  71     cleanup();
  72   }
  73
  74   void setup (int asrate) {
  75     import std.math : cos, pow, PI;
  76
  77     cleanup();
  78     //scope(failure) cleanup();
  79
  80     if (asrate < 1024 || asrate > 48000) assert(0, "invalid sampling rate");
  81     smpRate = asrate;
  82     float hzPerBin = cast(float)asrate/cast(float)FFT_LENGTH;
  83
  84     zalloc(inFifo, FFT_LENGTH);
  85     zalloc(outFifo, FFT_LENGTH);
  86     zalloc(outAccum, FFT_LENGTH*2);
  87     zalloc(realx, FFT_LENGTH+16);
  88     zalloc(comp, FFT_LENGTH+16);
  89     zalloc(window, FFT_LENGTH);
  90     zalloc(binBase, FFT_LENGTH/2);
  91     zalloc(binDelta, FFT_LENGTH/2);
  92     fifoPos = Latency;
  93     inFifo[0..FFT_LENGTH] = 0;
  94     outFifo[0..FFT_LENGTH] = 0;
  95
  96     version(FFTW3) {
  97       planRC = fftwf_plan_r2r_1d(FFT_LENGTH, realx, comp, FFTW_R2HC, FFTW_MEASURE);
  98       planCR = fftwf_plan_r2r_1d(FFT_LENGTH, comp, realx, FFTW_HC2R, FFTW_MEASURE);
  99     } else {
 100       planRC = kiss_fftr_alloc(FFT_LENGTH, false); // normal
 101       planCR = kiss_fftr_alloc(FFT_LENGTH, true); // inverse
 102     }
 103
 104     // create raised cosine window table
 105     foreach (immutable i; 0..FFT_LENGTH) {
 106       window[i] = -0.5f*cos(2.0f*PI*cast(double)i/cast(double)FFT_LENGTH)+0.5f;
 107       window[i] *= 2.0f;
 108     }
 109
 110     // create db->coeffiecnt lookup table
 111     zalloc(dbtable, 1000);
 112     foreach (immutable i; 0..1000) {
 113       float db = (cast(float)i/10)-70;
 114       dbtable[i] = pow(10.0f, db/20.0f);
 115     }
 116
 117     // create FFT bin -> band+delta tables
 118     int bin = 0;
 119     while (bin <= bandfrqs[0]/hzPerBin) {
 120       binBase[bin] = 0;
 121       binDelta[bin++] = 0.0f;
 122     }
 123     for (int i = 1; i < Bands-1 && bin < (FFT_LENGTH/2)-1 && bandfrqs[i+1] < asrate/2; ++i) {
 124       float lastBin = bin;
 125       float nextBin = (bandfrqs[i+1])/hzPerBin;
 126       while (bin <= nextBin) {
 127         binBase[bin] = i;
 128         binDelta[bin] = cast(float)(bin-lastBin)/cast(float)(nextBin-lastBin);
 129         ++bin;
 130       }
 131     }
 132     //{ import core.stdc.stdio; printf("bin=%d (%d)\n", bin, FFT_LENGTH/2); }
 133     for (; bin < FFT_LENGTH/2; ++bin) {
 134       binBase[bin] = Bands-1;
 135       binDelta[bin] = 0.0f;
 136     }
 137   }
 138
 139   void cleanup () {
 140     xfree(inFifo);
 141     xfree(outFifo);
 142     xfree(outAccum);
 143     xfree(realx);
 144     xfree(comp);
 145     xfree(window);
 146     xfree(binBase);
 147     xfree(binDelta);
 148     xfree(dbtable);
 149     version(FFTW3) {
 150       //??? no need to do FFTW cleanup?
 151     } else {
 152       kiss_fft_free(planRC);
 153       kiss_fft_free(planCR);
 154       //planRC = null;
 155       //planCR = null;
 156     }
 157   }
 158
 159   // input: input (array of floats of length sample_count)
 160   // output: output (array of floats of length sample_count)
 161   void run (fftw_real[] output, const(fftw_real)[] input, uint stride=1, uint ofs=0) {
 162     import core.stdc.string : memmove;
 163
 164     if (output.length < input.length) assert(0, "wtf?!");
 165     if (stride == 0) assert(0, "wtf?!");
 166     if (ofs >= input.length || input.length < stride) return;
 167
 168     float[Bands+1] gains = void;
 169     foreach (immutable idx, int v; bands[]) {
 170       if (v < -70) v = -70; else if (v > 30) v = 30;
 171       gains.ptr[idx] = cast(float)v;
 172     }
 173     gains[$-1] = 0.0f;
 174
 175     float[FFT_LENGTH/2] coefs = void;
 176
 177     // convert gains from dB to co-efficents
 178     foreach (immutable i; 0..Bands) {
 179       int gain_idx = cast(int)((gains.ptr[i]*10)+700);
 180       if (gain_idx < 0) gain_idx = 0; else if (gain_idx > 999) gain_idx = 999;
 181       gains.ptr[i] = dbtable[gain_idx];
 182     }
 183
 184     // calculate coefficients for each bin of FFT
 185     coefs.ptr[0] = 0.0f;
 186     for (int bin = 1; bin < FFT_LENGTH/2-1; ++bin) {
 187       coefs.ptr[bin] = ((1.0f-binDelta[bin])*gains.ptr[binBase[bin]])+(binDelta[bin]*gains.ptr[binBase[bin]+1]);
 188     }
 189
 190     //if (fifoPos == 0) fifoPos = Latency;
 191
 192     foreach (immutable pos; 0..input.length/stride) {
 193       inFifo[fifoPos] = input.ptr[pos*stride+ofs];
 194       output.ptr[pos*stride+ofs] = outFifo[fifoPos-Latency];
 195       ++fifoPos;
 196       // if the FIFO is full
 197       if (fifoPos >= FFT_LENGTH) {
 198         fifoPos = Latency;
 199         // window input FIFO
 200         foreach (immutable i; 0..FFT_LENGTH) realx[i] = inFifo[i]*window[i];
 201         version(FFTW3) {
 202           // run the real->complex transform
 203           fftwf_execute(planRC);
 204           // multiply the bins magnitudes by the coeficients
 205           comp[0] *= coefs.ptr[0];
 206           foreach (immutable i; 1..FFT_LENGTH/2) {
 207             comp[i] *= coefs.ptr[i];
 208             comp[FFT_LENGTH-i] *= coefs.ptr[i];
 209           }
 210           // run the complex->real transform
 211           fftwf_execute(planCR);
 212         } else {
 213           // run the real->complex transform
 214           realx[FFT_LENGTH..FFT_LENGTH+16] = 0; // just in case
 215           comp[FFT_LENGTH-16..FFT_LENGTH+16] = 0; // just in case
 216           kiss_fftr(planRC, realx, cast(kiss_fft_cpx*)comp);
 217           // multiply the bins magnitudes by the coeficients
 218           comp[0*2+0] *= coefs.ptr[0];
 219           kiss_fft_cpx* cc = cast(kiss_fft_cpx*)comp;
 220           foreach (immutable i; 1..FFT_LENGTH/2) {
 221             //comp[i*2+0] *= coefs.ptr[i];
 222             //comp[i*2+1] *= coefs.ptr[i];
 223             cc[i].r *= coefs.ptr[i];
 224             cc[i].i *= coefs.ptr[i];
 225           }
 226           // run the complex->real transform
 227           kiss_fftri(planCR, cast(const(kiss_fft_cpx)*)comp, realx);
 228         }
 229         // window into the output accumulator
 230         foreach (immutable i; 0..FFT_LENGTH) outAccum[i] += 0.9186162f*window[i]*realx[i]/(FFT_LENGTH*OVER_SAMP);
 231         //foreach (immutable i; 0..STEP_SIZE) outFifo[i] = outAccum[i];
 232         outFifo[0..STEP_SIZE] = outAccum[0..STEP_SIZE];
 233         // shift output accumulator
 234         memmove(outAccum, outAccum+STEP_SIZE, FFT_LENGTH*outAccum[0].sizeof);
 235         // shift input fifo
 236         //foreach (immutable i; 0..Latency) inFifo[i] = inFifo[i+STEP_SIZE];
 237         //memmove(inFifo, inFifo+Latency, (FFT_LENGTH-Latency)*float.sizeof);
 238         memmove(inFifo, inFifo+STEP_SIZE, Latency*inFifo[0].sizeof);
 239       }
 240     }
 241   }
 242
 243 public:
 244   version(mbandeq_extended) {
 245     /*
 246     static immutable float[Bands] bandfrqs = [
 247       12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100, 125, 150, 175, 200, 250, 300, 350, 400,
 248       500, 600, 700, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, 3200, 4000, 4800,
 249       5600, 6400, 8000, 9600, 11200, 12800, 16000, 19200, 22400
 250     ];
 251     */
 252     static immutable float[Bands] bandfrqs = [
 253       50, 75, 125, 150, 200, 250, 300, 350, 400,
 254       500, 600, 700, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, 3200, 4000, 4800,
 255       5600, 6400, 8000, 9600, 11200, 12800, 16000
 256     ];
 257   } else version(mbandeq_normal) {
 258     static immutable float[Bands] bandfrqs = [
 259        50.00f,  100.00f,  155.56f,  220.00f,  311.13f, 440.00f, 622.25f,
 260       880.00f, 1244.51f, 1760.00f, 2489.02f, 3519.95, 4978.04f, 9956.08f,
 261       19912.16f
 262     ];
 263   } else {
 264     static immutable float[Bands] bandfrqs = [ 60.00f,  170.00f,  310.00f,  600.00f, 1000.00f, 3000.00f, 6000.00f, 12000.00f, 14000.00f, 16000.00f ];
 265   }
 266
 267 private:
 268   static void zalloc(T) (ref T* res, uint count) nothrow @trusted @nogc {
 269     import core.stdc.stdlib : malloc;
 270     import core.stdc.string : memcpy, memset;
 271     if (count == 0 || count > 1024*1024*8) assert(0, "wtf?!");
 272     res = cast(T*)malloc(T.sizeof*count);
 273     if (res is null) assert(0, "out of memory");
 274     memset(res, 0, T.sizeof*count);
 275   }
 276
 277   static void xfree(T) (ref T* ptr) nothrow @trusted @nogc {
 278     import core.stdc.stdlib : free;
 279     if (ptr !is null) { free(ptr); ptr = null; }
 280   }
 281 }
 282
 283
 284 // ////////////////////////////////////////////////////////////////////////// //
 285 // kissfft
 286 version(FFTW3) {} else {
 287 /*
 288  * Copyright (c) 2003-2010, Mark Borgerding
 289  *
 290  * All rights reserved.
 291  *
 292  * Redistribution and use in source and binary forms, with or without
 293  * modification, are permitted provided that the following conditions are
 294  * met:
 295  *
 296  *  * Redistributions of source code must retain the above copyright
 297  *    notice, this list of conditions and the following disclaimer.
 298  *
 299  *  * Redistributions in binary form must reproduce the above copyright
 300  *    notice, this list of conditions and the following disclaimer in the
 301  *    documentation and/or other materials provided with the
 302  *    distribution.
 303  *
 304  *  * Neither the author nor the names of any contributors may be used to
 305  *    endorse or promote products derived from this software without
 306  *    specific prior written permission.
 307  *
 308  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 309  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 310  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 311  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 312  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 313  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 314  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 315  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 316  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 317  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 318  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 319  */
 320 //module kissfft /*is aliced*/;
 321 private:
 322 nothrow @trusted @nogc:
 323
 324 // ////////////////////////////////////////////////////////////////////////// //
 325 ///
 326 public alias kiss_fft_scalar = float;
 327
 328
 329 ///
 330 public align(1) struct kiss_fft_cpx {
 331 align(1):
 332   kiss_fft_scalar r;
 333   kiss_fft_scalar i;
 334
 335 pure nothrow @safe @nogc:
 336   // C_MULBYSCALAR
 337   void opOpAssign(string op:"*") (in kiss_fft_scalar s) {
 338     pragma(inline, true);
 339     r *= s;
 340     i *= s;
 341   }
 342
 343   // C_ADDTO, C_SUBFROM
 344   void opOpAssign(string op) (in kiss_fft_cpx b) if (op == "+" || op == "-") {
 345     pragma(inline, true);
 346     mixin("r"~op~"=b.r;");
 347     mixin("i"~op~"=b.i;");
 348   }
 349
 350   // C_MUL
 351   kiss_fft_cpx opBinary(string op:"*") (in auto ref kiss_fft_cpx b) const {
 352     pragma(inline, true);
 353     return kiss_fft_cpx(r*b.r-i*b.i, r*b.i+i*b.r);
 354   }
 355
 356   // C_ADD, C_SUB
 357   kiss_fft_cpx opBinary(string op) (in auto ref kiss_fft_cpx b) const if (op == "+" || op == "-") {
 358     pragma(inline, true);
 359     mixin("return kiss_fft_cpx(r"~op~"b.r, i"~op~"b.i);");
 360   }
 361 }
 362
 363
 364 ///
 365 public alias kiss_fft_cfg = kiss_fft_state*;
 366
 367
 368 // ////////////////////////////////////////////////////////////////////////// //
 369 enum MAXFACTORS = 32;
 370 /* e.g. an fft of length 128 has 4 factors
 371  as far as kissfft is concerned
 372  4*4*4*2
 373  */
 374
 375 struct kiss_fft_state {
 376   int nfft;
 377   bool inverse;
 378   int[2*MAXFACTORS] factors;
 379   kiss_fft_cpx[1] twiddles;
 380 }
 381
 382
 383 // ////////////////////////////////////////////////////////////////////////// //
 384 private void kf_bfly2 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m) {
 385   kiss_fft_cpx* Fout2;
 386   const(kiss_fft_cpx)* tw1 = st.twiddles.ptr;
 387   kiss_fft_cpx t;
 388   Fout2 = Fout+m;
 389   do {
 390     t = (*Fout2)*(*tw1);
 391     tw1 += fstride;
 392     (*Fout2) = (*Fout)-t;
 393     (*Fout) += t;
 394     ++Fout2;
 395     ++Fout;
 396   } while (--m);
 397 }
 398
 399
 400 private void kf_bfly4 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, in usize m) {
 401   const(kiss_fft_cpx)* tw1, tw2, tw3;
 402   kiss_fft_cpx[6] scratch = void;
 403   usize k = m;
 404   immutable usize m2 = 2*m;
 405   immutable usize m3 = 3*m;
 406   tw3 = tw2 = tw1 = st.twiddles.ptr;
 407   do {
 408     scratch.ptr[0] = Fout[m]*(*tw1);
 409     scratch.ptr[1] = Fout[m2]*(*tw2);
 410     scratch.ptr[2] = Fout[m3]*(*tw3);
 411
 412     scratch.ptr[5] = (*Fout)-scratch.ptr[1];
 413     (*Fout) += scratch.ptr[1];
 414     scratch.ptr[3] = scratch.ptr[0]+scratch.ptr[2];
 415     scratch.ptr[4] = scratch.ptr[0]-scratch.ptr[2];
 416     Fout[m2] = (*Fout)-scratch.ptr[3];
 417     tw1 += fstride;
 418     tw2 += fstride*2;
 419     tw3 += fstride*3;
 420     (*Fout) += scratch.ptr[3];
 421
 422     if (st.inverse) {
 423       Fout[m].r = scratch.ptr[5].r-scratch.ptr[4].i;
 424       Fout[m].i = scratch.ptr[5].i+scratch.ptr[4].r;
 425       Fout[m3].r = scratch.ptr[5].r+scratch.ptr[4].i;
 426       Fout[m3].i = scratch.ptr[5].i-scratch.ptr[4].r;
 427     } else {
 428       Fout[m].r = scratch.ptr[5].r+scratch.ptr[4].i;
 429       Fout[m].i = scratch.ptr[5].i-scratch.ptr[4].r;
 430       Fout[m3].r = scratch.ptr[5].r-scratch.ptr[4].i;
 431       Fout[m3].i = scratch.ptr[5].i+scratch.ptr[4].r;
 432     }
 433     ++Fout;
 434   } while (--k);
 435 }
 436
 437
 438 private void kf_bfly3 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, usize m) {
 439   usize k = m;
 440   immutable usize m2 = 2*m;
 441   const(kiss_fft_cpx)* tw1, tw2;
 442   kiss_fft_cpx[5] scratch = void;
 443   kiss_fft_cpx epi3;
 444   epi3 = st.twiddles.ptr[fstride*m];
 445   tw1 = tw2 = st.twiddles.ptr;
 446   do {
 447     scratch.ptr[1] = Fout[m]*(*tw1);
 448     scratch.ptr[2] = Fout[m2]*(*tw2);
 449
 450     scratch.ptr[3] = scratch.ptr[1]+scratch.ptr[2];
 451     scratch.ptr[0] = scratch.ptr[1]-scratch.ptr[2];
 452     tw1 += fstride;
 453     tw2 += fstride*2;
 454
 455     Fout[m].r = Fout.r-(scratch.ptr[3].r*cast(kiss_fft_scalar)0.5);
 456     Fout[m].i = Fout.i-(scratch.ptr[3].i*cast(kiss_fft_scalar)0.5);
 457
 458     scratch.ptr[0] *= epi3.i;
 459
 460     (*Fout) += scratch.ptr[3];
 461
 462     Fout[m2].r = Fout[m].r+scratch.ptr[0].i;
 463     Fout[m2].i = Fout[m].i-scratch.ptr[0].r;
 464
 465     Fout[m].r -= scratch.ptr[0].i;
 466     Fout[m].i += scratch.ptr[0].r;
 467
 468     ++Fout;
 469   } while (--k);
 470 }
 471
 472
 473 private void kf_bfly5 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m) {
 474   kiss_fft_cpx* Fout0, Fout1, Fout2, Fout3, Fout4;
 475   int u;
 476   kiss_fft_cpx[13] scratch = void;
 477   const(kiss_fft_cpx)* twiddles = st.twiddles.ptr;
 478   const(kiss_fft_cpx)* tw;
 479   kiss_fft_cpx ya, yb;
 480   ya = twiddles[fstride*m];
 481   yb = twiddles[fstride*2*m];
 482
 483   Fout0 = Fout;
 484   Fout1 = Fout0+m;
 485   Fout2 = Fout0+2*m;
 486   Fout3 = Fout0+3*m;
 487   Fout4 = Fout0+4*m;
 488
 489   tw = st.twiddles.ptr;
 490   for (u = 0; u < m; ++u) {
 491     scratch.ptr[0] = *Fout0;
 492
 493     scratch.ptr[1] = (*Fout1)*tw[u*fstride];
 494     scratch.ptr[2] = (*Fout2)*tw[2*u*fstride];
 495     scratch.ptr[3] = (*Fout3)*tw[3*u*fstride];
 496     scratch.ptr[4] = (*Fout4)*tw[4*u*fstride];
 497
 498     scratch.ptr[7] = scratch.ptr[1]+scratch.ptr[4];
 499     scratch.ptr[10] = scratch.ptr[1]-scratch.ptr[4];
 500     scratch.ptr[8] = scratch.ptr[2]+scratch.ptr[3];
 501     scratch.ptr[9] = scratch.ptr[2]-scratch.ptr[3];
 502
 503     Fout0.r += scratch.ptr[7].r+scratch.ptr[8].r;
 504     Fout0.i += scratch.ptr[7].i+scratch.ptr[8].i;
 505
 506     scratch.ptr[5].r = scratch.ptr[0].r+scratch.ptr[7].r*ya.r+scratch.ptr[8].r*yb.r;
 507     scratch.ptr[5].i = scratch.ptr[0].i+scratch.ptr[7].i*ya.r+scratch.ptr[8].i*yb.r;
 508
 509     scratch.ptr[6].r =  scratch.ptr[10].i*ya.i+scratch.ptr[9].i*yb.i;
 510     scratch.ptr[6].i = -scratch.ptr[10].r*ya.i-scratch.ptr[9].r*yb.i;
 511
 512     (*Fout1) = scratch.ptr[5]-scratch.ptr[6];
 513     (*Fout4) = scratch.ptr[5]+scratch.ptr[6];
 514
 515     scratch.ptr[11].r = scratch.ptr[0].r+scratch.ptr[7].r*yb.r+scratch.ptr[8].r*ya.r;
 516     scratch.ptr[11].i = scratch.ptr[0].i+scratch.ptr[7].i*yb.r+scratch.ptr[8].i*ya.r;
 517     scratch.ptr[12].r = -scratch.ptr[10].i*yb.i+scratch.ptr[9].i*ya.i;
 518     scratch.ptr[12].i =  scratch.ptr[10].r*yb.i-scratch.ptr[9].r*ya.i;
 519
 520     (*Fout2) = scratch.ptr[11]+scratch.ptr[12];
 521     (*Fout3) = scratch.ptr[11]-scratch.ptr[12];
 522
 523     ++Fout0;
 524     ++Fout1;
 525     ++Fout2;
 526     ++Fout3;
 527     ++Fout4;
 528   }
 529 }
 530
 531
 532 // perform the butterfly for one stage of a mixed radix FFT
 533 private void kf_bfly_generic (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m, int p) {
 534   import core.stdc.stdlib : alloca;
 535   int u, k, q1, q;
 536   const(kiss_fft_cpx)* twiddles = st.twiddles.ptr;
 537   kiss_fft_cpx t;
 538   int Norig = st.nfft;
 539
 540   //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
 541   kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)alloca(kiss_fft_cpx.sizeof*p);
 542
 543   for (u = 0; u < m; ++u) {
 544     k = u;
 545     for (q1 = 0; q1 < p; ++q1) {
 546       scratch[q1] = Fout[k];
 547       k += m;
 548     }
 549
 550     k = u;
 551     for (q1 = 0; q1 < p; ++q1) {
 552       int twidx = 0;
 553       Fout[k] = scratch[0];
 554       for (q = 1; q < p; ++q) {
 555         twidx += fstride*k;
 556         if (twidx >= Norig) twidx -= Norig;
 557         t = scratch[q]*twiddles[twidx];
 558         Fout[k] += t;
 559       }
 560       k += m;
 561     }
 562   }
 563   //KISS_FFT_TMP_FREE(scratch);
 564 }
 565
 566
 567 private void kf_work (kiss_fft_cpx* Fout, const(kiss_fft_cpx)* f, in usize fstride, int in_stride, int* factors, const(kiss_fft_cfg) st) {
 568   kiss_fft_cpx* Fout_beg = Fout;
 569   immutable int p = *factors++; // the radix
 570   immutable int m = *factors++; // stage's fft length/p
 571   const(kiss_fft_cpx)* Fout_end = Fout+p*m;
 572
 573   if (m == 1) {
 574     do {
 575       *Fout = *f;
 576       f += fstride*in_stride;
 577     } while (++Fout != Fout_end);
 578   } else {
 579     do {
 580       // recursive call:
 581       // DFT of size m*p performed by doing
 582       // p instances of smaller DFTs of size m,
 583       // each one takes a decimated version of the input
 584       kf_work(Fout, f, fstride*p, in_stride, factors, st);
 585       f += fstride*in_stride;
 586     } while ((Fout += m) != Fout_end);
 587   }
 588
 589   Fout = Fout_beg;
 590
 591   // recombine the p smaller DFTs
 592   switch (p) {
 593     case 2: kf_bfly2(Fout, fstride, st, m); break;
 594     case 3: kf_bfly3(Fout, fstride, st, m); break;
 595     case 4: kf_bfly4(Fout, fstride, st, m); break;
 596     case 5: kf_bfly5(Fout, fstride, st, m); break;
 597     default: kf_bfly_generic(Fout, fstride, st, m, p); break;
 598   }
 599 }
 600
 601
 602 /* facbuf is populated by p1, m1, p2, m2, ...
 603  * where
 604  *   p[i]*m[i] = m[i-1]
 605  *   m0 = n
 606  */
 607 private void kf_factor (int n, int* facbuf) {
 608   import std.math : floor, sqrt;
 609   immutable double floor_sqrt = floor(sqrt(cast(double)n));
 610   int p = 4;
 611   // factor out powers of 4, powers of 2, then any remaining primes
 612   do {
 613     while (n%p) {
 614       switch (p) {
 615         case 4: p = 2; break;
 616         case 2: p = 3; break;
 617         default: p += 2; break;
 618       }
 619       if (p > floor_sqrt) p = n; // no more factors, skip to end
 620     }
 621     n /= p;
 622     *facbuf++ = p;
 623     *facbuf++ = n;
 624   } while (n > 1);
 625 }
 626
 627
 628 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
 629  *
 630  * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, 0, null, null);`
 631  *
 632  * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
 633  *
 634  * If lenmem is `null`, then kiss_fft_alloc will allocate a cfg buffer using malloc.
 635  * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
 636  *
 637  * The state can be placed in a user supplied buffer `mem`:
 638  * If lenmem is not `null` and `mem` is not `null` and `*lenmem` is large enough,
 639  * then the function places the cfg in `mem` and the size used in `*lenmem`,
 640  * and returns mem.
 641  *
 642  * If lenmem is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
 643  * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
 644  */
 645 public kiss_fft_cfg kiss_fft_alloc (int nfft, bool inverse_fft, void* mem=null, usize* lenmem=null) {
 646   kiss_fft_cfg st = null;
 647   usize memneeded = kiss_fft_state.sizeof+kiss_fft_cpx.sizeof*(nfft-1); // twiddle factors
 648   if (lenmem is null) {
 649     import core.stdc.stdlib : malloc;
 650     st = cast(kiss_fft_cfg)malloc(memneeded);
 651   } else {
 652     if (mem !is null && *lenmem >= memneeded) st = cast(kiss_fft_cfg)mem;
 653     *lenmem = memneeded;
 654   }
 655   if (st !is null) {
 656     st.nfft = nfft;
 657     st.inverse = inverse_fft;
 658     for (int i = 0; i < nfft; ++i) {
 659       import std.math : cos, sin, PI;
 660       double phase = -2*PI*i/nfft;
 661       if (st.inverse) phase *= -1;
 662       st.twiddles.ptr[i].r = cos(phase);
 663       st.twiddles.ptr[i].i = sin(phase);
 664
 665     }
 666     kf_factor(nfft, st.factors.ptr);
 667   }
 668   return st;
 669 }
 670
 671
 672 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
 673  * buffer and can be simply free()d when no longer needed
 674  */
 675 public void kiss_fft_free(T) (ref T* p) {
 676   if (p !is null) {
 677     import core.stdc.stdlib : free;
 678     free(p);
 679     p = null;
 680   }
 681 }
 682
 683
 684 /** Perform an FFT on a complex input buffer.
 685  *
 686  * for a forward FFT,
 687  * fin should be  f[0] , f[1] , ... ,f[nfft-1]
 688  * fout will be   F[0] , F[1] , ... ,F[nfft-1]
 689  * Note that each element is complex and can be accessed like f[k].r and f[k].i
 690  */
 691 public void kiss_fft (kiss_fft_cfg cfg, const(kiss_fft_cpx)* fin, kiss_fft_cpx* fout) {
 692   kiss_fft_stride(cfg, fin, fout, 1);
 693 }
 694
 695
 696 /** A more generic version of the above function. It reads its input from every Nth sample. */
 697 public void kiss_fft_stride (kiss_fft_cfg st, const(kiss_fft_cpx)* fin, kiss_fft_cpx* fout, int in_stride) {
 698   import core.stdc.stdlib : alloca;
 699   if (fin is fout) {
 700     import core.stdc.string : memcpy;
 701     //NOTE: this is not really an in-place FFT algorithm.
 702     //It just performs an out-of-place FFT into a temp buffer
 703     //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
 704     kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)alloca(kiss_fft_cpx.sizeof*st.nfft);
 705     kf_work(tmpbuf, fin, 1, in_stride, st.factors.ptr, st);
 706     memcpy(fout, tmpbuf, kiss_fft_cpx.sizeof*st.nfft);
 707     //KISS_FFT_TMP_FREE(tmpbuf);
 708   } else {
 709     kf_work(fout, fin, 1, in_stride, st.factors.ptr, st);
 710   }
 711 }
 712
 713
 714 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
 715 public int kiss_fft_next_fast_size (int n) {
 716   for (;;) {
 717     int m = n;
 718     while ((m%2) == 0) m /= 2;
 719     while ((m%3) == 0) m /= 3;
 720     while ((m%5) == 0) m /= 5;
 721     if (m <= 1) break; // n is completely factorable by twos, threes, and fives
 722     ++n;
 723   }
 724   return n;
 725 }
 726
 727
 728 /** for real ffts, we need an even size */
 729 public int kiss_fftr_next_fast_size_real (int n) {
 730   return kiss_fft_next_fast_size((n+1)>>1)<<1;
 731 }
 732
 733
 734 // ////////////////////////////////////////////////////////////////////////// //
 735 // kissfftr
 736 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
 737 public alias kiss_fftr_cfg = kiss_fftr_state*;
 738
 739 struct kiss_fftr_state {
 740   kiss_fft_cfg substate;
 741   kiss_fft_cpx* tmpbuf;
 742   kiss_fft_cpx* super_twiddles;
 743 }
 744
 745
 746 /*
 747  ** nfft must be even
 748  *
 749  * If you don't care to allocate space, use mem = lenmem = null
 750  */
 751 public kiss_fftr_cfg kiss_fftr_alloc (int nfft, bool inverse_fft, void* mem=null, usize* lenmem=null) {
 752   kiss_fftr_cfg st = null;
 753   usize subsize, memneeded;
 754
 755   if (nfft&1) assert(0, "real FFT optimization must be even");
 756   nfft >>= 1;
 757
 758   kiss_fft_alloc(nfft, inverse_fft, null, &subsize);
 759   memneeded = kiss_fftr_state.sizeof+subsize+kiss_fft_cpx.sizeof*(nfft*3/2);
 760
 761   if (lenmem is null) {
 762     import core.stdc.stdlib : malloc;
 763     st = cast(kiss_fftr_cfg)malloc(memneeded);
 764   } else {
 765     if (*lenmem >= memneeded) st = cast(kiss_fftr_cfg)mem;
 766     *lenmem = memneeded;
 767   }
 768   if (st is null) return null;
 769
 770   st.substate = cast(kiss_fft_cfg)(st+1); // just beyond kiss_fftr_state struct
 771   st.tmpbuf = cast(kiss_fft_cpx*)((cast(ubyte*)st.substate)+subsize);
 772   st.super_twiddles = st.tmpbuf+nfft;
 773   kiss_fft_alloc(nfft, inverse_fft, st.substate, &subsize);
 774
 775   foreach (immutable i; 0..nfft/2) {
 776     import std.math : cos, sin, PI;
 777     double phase = -PI*(cast(double)(i+1)/nfft+0.5);
 778     if (inverse_fft) phase *= -1;
 779     st.super_twiddles[i].r = cos(phase);
 780     st.super_twiddles[i].i = sin(phase);
 781   }
 782
 783   return st;
 784 }
 785
 786
 787 /** input timedata has nfft scalar points
 788  * output freqdata has nfft/2+1 complex points
 789  */
 790 public void kiss_fftr (kiss_fftr_cfg st, const(kiss_fft_scalar)* timedata, kiss_fft_cpx* freqdata) {
 791   // input buffer timedata is stored row-wise
 792   int k, ncfft;
 793   kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
 794
 795   if (st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
 796
 797   ncfft = st.substate.nfft;
 798
 799   // perform the parallel fft of two real signals packed in real,imag
 800   kiss_fft(st.substate, cast(const(kiss_fft_cpx)*)timedata, st.tmpbuf);
 801   /* The real part of the DC element of the frequency spectrum in st->tmpbuf
 802    * contains the sum of the even-numbered elements of the input time sequence
 803    * The imag part is the sum of the odd-numbered elements
 804    *
 805    * The sum of tdc.r and tdc.i is the sum of the input time sequence.
 806    *      yielding DC of input time sequence
 807    * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
 808    *      yielding Nyquist bin of input time sequence
 809    */
 810
 811   tdc.r = st.tmpbuf[0].r;
 812   tdc.i = st.tmpbuf[0].i;
 813   freqdata[0].r = tdc.r+tdc.i;
 814   freqdata[ncfft].r = tdc.r-tdc.i;
 815   freqdata[ncfft].i = freqdata[0].i = 0;
 816
 817   for (k = 1; k <= ncfft/2; ++k) {
 818     fpk = st.tmpbuf[k];
 819     fpnk.r = st.tmpbuf[ncfft-k].r;
 820     fpnk.i = -st.tmpbuf[ncfft-k].i;
 821
 822     f1k = fpk+fpnk;
 823     f2k = fpk-fpnk;
 824     tw = f2k*st.super_twiddles[k-1];
 825
 826     freqdata[k].r = (f1k.r+tw.r)*cast(kiss_fft_scalar)0.5;
 827     freqdata[k].i = (f1k.i+tw.i)*cast(kiss_fft_scalar)0.5;
 828     freqdata[ncfft-k].r = (f1k.r-tw.r)*cast(kiss_fft_scalar)0.5;
 829     freqdata[ncfft-k].i = (tw.i-f1k.i)*cast(kiss_fft_scalar)0.5;
 830   }
 831 }
 832
 833
 834 /** input freqdata has  nfft/2+1 complex points
 835  * output timedata has nfft scalar points
 836  */
 837 public void kiss_fftri (kiss_fftr_cfg st, const(kiss_fft_cpx)* freqdata, kiss_fft_scalar* timedata) {
 838   // input buffer timedata is stored row-wise
 839   int k, ncfft;
 840
 841   if (!st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
 842
 843   ncfft = st.substate.nfft;
 844
 845   st.tmpbuf[0].r = freqdata[0].r+freqdata[ncfft].r;
 846   st.tmpbuf[0].i = freqdata[0].r-freqdata[ncfft].r;
 847
 848   for (k = 1; k <= ncfft/2; ++k) {
 849     kiss_fft_cpx fk, fnkc, fek, fok, tmp;
 850     fk = freqdata[k];
 851     fnkc.r = freqdata[ncfft-k].r;
 852     fnkc.i = -freqdata[ncfft-k].i;
 853
 854     fek = fk+fnkc;
 855     tmp = fk-fnkc;
 856     fok = tmp*st.super_twiddles[k-1];
 857     st.tmpbuf[k] = fek+fok;
 858     st.tmpbuf[ncfft-k] = fek-fok;
 859     st.tmpbuf[ncfft-k].i *= -1;
 860   }
 861   kiss_fft(st.substate, st.tmpbuf, cast(kiss_fft_cpx*)timedata);
 862 }
 863 } // version, kissfft