some updates
[iv.d.git] / mbandeq.d
blob2f6dce6feb59b4f9666e9bfa21991e2e4d49f9e8
1 // code from LADSPA plugins project: http://plugin.org.uk/
2 // GNU GPLv3
3 module iv.mbandeq /*is aliced*/;
4 private:
5 import iv.alice;
6 nothrow @trusted @nogc:
8 // ////////////////////////////////////////////////////////////////////////// //
9 //version = FFTW3;
11 version = mbandeq_extended;
12 //version = mbandeq_normal;
13 //version = mbandeq_winamp;
16 version(FFTW3) {
17 pragma(lib, "fftw3f");
18 alias fft_plan = void*;
19 alias fftw_real = float;
20 extern(C) nothrow @trusted @nogc {
21 enum FFTW_MEASURE = 0;
22 enum { FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2 }
23 fft_plan fftwf_plan_r2r_1d (int n, fftw_real* inp, fftw_real* outp, usize kind, uint flags);
24 void fftwf_execute (fft_plan plan);
26 } else {
27 //import kissfft;
28 alias fft_plan = kiss_fftr_cfg;
29 alias fftw_real = kiss_fft_scalar;
33 // ////////////////////////////////////////////////////////////////////////// //
34 public struct MBandEq {
35 private:
36 enum FFT_LENGTH = 1024;
37 enum OVER_SAMP = 4;
38 enum STEP_SIZE = FFT_LENGTH/OVER_SAMP;
40 public:
41 version(mbandeq_extended) {
42 enum Bands = 30;
43 } else version(mbandeq_normal) {
44 enum Bands = 15;
45 } else {
46 enum Bands = 10;
48 enum Latency = FFT_LENGTH-STEP_SIZE;
50 public:
51 int[Bands] bands = 0; // [-70..30)
52 int* binBase;
53 float* binDelta;
54 fftw_real* comp;
55 float* dbtable;
56 uint fifoPos;
57 float* inFifo;
58 float* outAccum;
59 float* outFifo;
60 fft_plan planRC;
61 fft_plan planCR;
62 fftw_real* realx;
63 float* window;
64 int smpRate;
66 this (int asrate) {
67 setup(asrate);
70 ~this () {
71 cleanup();
74 void setup (int asrate) {
75 import std.math : cos, pow, PI;
77 cleanup();
78 //scope(failure) cleanup();
80 if (asrate < 1024 || asrate > 48000) assert(0, "invalid sampling rate");
81 smpRate = asrate;
82 float hzPerBin = cast(float)asrate/cast(float)FFT_LENGTH;
84 zalloc(inFifo, FFT_LENGTH);
85 zalloc(outFifo, FFT_LENGTH);
86 zalloc(outAccum, FFT_LENGTH*2);
87 zalloc(realx, FFT_LENGTH+16);
88 zalloc(comp, FFT_LENGTH+16);
89 zalloc(window, FFT_LENGTH);
90 zalloc(binBase, FFT_LENGTH/2);
91 zalloc(binDelta, FFT_LENGTH/2);
92 fifoPos = Latency;
93 inFifo[0..FFT_LENGTH] = 0;
94 outFifo[0..FFT_LENGTH] = 0;
96 version(FFTW3) {
97 planRC = fftwf_plan_r2r_1d(FFT_LENGTH, realx, comp, FFTW_R2HC, FFTW_MEASURE);
98 planCR = fftwf_plan_r2r_1d(FFT_LENGTH, comp, realx, FFTW_HC2R, FFTW_MEASURE);
99 } else {
100 planRC = kiss_fftr_alloc(FFT_LENGTH, false); // normal
101 planCR = kiss_fftr_alloc(FFT_LENGTH, true); // inverse
104 // create raised cosine window table
105 foreach (immutable i; 0..FFT_LENGTH) {
106 window[i] = -0.5f*cos(2.0f*PI*cast(double)i/cast(double)FFT_LENGTH)+0.5f;
107 window[i] *= 2.0f;
110 // create db->coeffiecnt lookup table
111 zalloc(dbtable, 1000);
112 foreach (immutable i; 0..1000) {
113 float db = (cast(float)i/10)-70;
114 dbtable[i] = pow(10.0f, db/20.0f);
117 // create FFT bin -> band+delta tables
118 int bin = 0;
119 while (bin <= bandfrqs[0]/hzPerBin) {
120 binBase[bin] = 0;
121 binDelta[bin++] = 0.0f;
123 for (int i = 1; i < Bands-1 && bin < (FFT_LENGTH/2)-1 && bandfrqs[i+1] < asrate/2; ++i) {
124 float lastBin = bin;
125 float nextBin = (bandfrqs[i+1])/hzPerBin;
126 while (bin <= nextBin) {
127 binBase[bin] = i;
128 binDelta[bin] = cast(float)(bin-lastBin)/cast(float)(nextBin-lastBin);
129 ++bin;
132 //{ import core.stdc.stdio; printf("bin=%d (%d)\n", bin, FFT_LENGTH/2); }
133 for (; bin < FFT_LENGTH/2; ++bin) {
134 binBase[bin] = Bands-1;
135 binDelta[bin] = 0.0f;
139 void cleanup () {
140 xfree(inFifo);
141 xfree(outFifo);
142 xfree(outAccum);
143 xfree(realx);
144 xfree(comp);
145 xfree(window);
146 xfree(binBase);
147 xfree(binDelta);
148 xfree(dbtable);
149 version(FFTW3) {
150 //??? no need to do FFTW cleanup?
151 } else {
152 kiss_fft_free(planRC);
153 kiss_fft_free(planCR);
154 //planRC = null;
155 //planCR = null;
159 // input: input (array of floats of length sample_count)
160 // output: output (array of floats of length sample_count)
161 void run (fftw_real[] output, const(fftw_real)[] input, uint stride=1, uint ofs=0) {
162 import core.stdc.string : memmove;
164 if (output.length < input.length) assert(0, "wtf?!");
165 if (stride == 0) assert(0, "wtf?!");
166 if (ofs >= input.length || input.length < stride) return;
168 float[Bands+1] gains = void;
169 foreach (immutable idx, int v; bands[]) {
170 if (v < -70) v = -70; else if (v > 30) v = 30;
171 gains.ptr[idx] = cast(float)v;
173 gains[$-1] = 0.0f;
175 float[FFT_LENGTH/2] coefs = void;
177 // convert gains from dB to co-efficents
178 foreach (immutable i; 0..Bands) {
179 int gain_idx = cast(int)((gains.ptr[i]*10)+700);
180 if (gain_idx < 0) gain_idx = 0; else if (gain_idx > 999) gain_idx = 999;
181 gains.ptr[i] = dbtable[gain_idx];
184 // calculate coefficients for each bin of FFT
185 coefs.ptr[0] = 0.0f;
186 for (int bin = 1; bin < FFT_LENGTH/2-1; ++bin) {
187 coefs.ptr[bin] = ((1.0f-binDelta[bin])*gains.ptr[binBase[bin]])+(binDelta[bin]*gains.ptr[binBase[bin]+1]);
190 //if (fifoPos == 0) fifoPos = Latency;
192 foreach (immutable pos; 0..input.length/stride) {
193 inFifo[fifoPos] = input.ptr[pos*stride+ofs];
194 output.ptr[pos*stride+ofs] = outFifo[fifoPos-Latency];
195 ++fifoPos;
196 // if the FIFO is full
197 if (fifoPos >= FFT_LENGTH) {
198 fifoPos = Latency;
199 // window input FIFO
200 foreach (immutable i; 0..FFT_LENGTH) realx[i] = inFifo[i]*window[i];
201 version(FFTW3) {
202 // run the real->complex transform
203 fftwf_execute(planRC);
204 // multiply the bins magnitudes by the coeficients
205 comp[0] *= coefs.ptr[0];
206 foreach (immutable i; 1..FFT_LENGTH/2) {
207 comp[i] *= coefs.ptr[i];
208 comp[FFT_LENGTH-i] *= coefs.ptr[i];
210 // run the complex->real transform
211 fftwf_execute(planCR);
212 } else {
213 // run the real->complex transform
214 realx[FFT_LENGTH..FFT_LENGTH+16] = 0; // just in case
215 comp[FFT_LENGTH-16..FFT_LENGTH+16] = 0; // just in case
216 kiss_fftr(planRC, realx, cast(kiss_fft_cpx*)comp);
217 // multiply the bins magnitudes by the coeficients
218 comp[0*2+0] *= coefs.ptr[0];
219 kiss_fft_cpx* cc = cast(kiss_fft_cpx*)comp;
220 foreach (immutable i; 1..FFT_LENGTH/2) {
221 //comp[i*2+0] *= coefs.ptr[i];
222 //comp[i*2+1] *= coefs.ptr[i];
223 cc[i].r *= coefs.ptr[i];
224 cc[i].i *= coefs.ptr[i];
226 // run the complex->real transform
227 kiss_fftri(planCR, cast(const(kiss_fft_cpx)*)comp, realx);
229 // window into the output accumulator
230 foreach (immutable i; 0..FFT_LENGTH) outAccum[i] += 0.9186162f*window[i]*realx[i]/(FFT_LENGTH*OVER_SAMP);
231 //foreach (immutable i; 0..STEP_SIZE) outFifo[i] = outAccum[i];
232 outFifo[0..STEP_SIZE] = outAccum[0..STEP_SIZE];
233 // shift output accumulator
234 memmove(outAccum, outAccum+STEP_SIZE, FFT_LENGTH*outAccum[0].sizeof);
235 // shift input fifo
236 //foreach (immutable i; 0..Latency) inFifo[i] = inFifo[i+STEP_SIZE];
237 //memmove(inFifo, inFifo+Latency, (FFT_LENGTH-Latency)*float.sizeof);
238 memmove(inFifo, inFifo+STEP_SIZE, Latency*inFifo[0].sizeof);
243 public:
244 version(mbandeq_extended) {
246 static immutable float[Bands] bandfrqs = [
247 12.5, 25, 37.5, 50, 62.5, 75, 87.5, 100, 125, 150, 175, 200, 250, 300, 350, 400,
248 500, 600, 700, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, 3200, 4000, 4800,
249 5600, 6400, 8000, 9600, 11200, 12800, 16000, 19200, 22400
252 static immutable float[Bands] bandfrqs = [
253 50, 75, 125, 150, 200, 250, 300, 350, 400,
254 500, 600, 700, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, 3200, 4000, 4800,
255 5600, 6400, 8000, 9600, 11200, 12800, 16000
257 } else version(mbandeq_normal) {
258 static immutable float[Bands] bandfrqs = [
259 50.00f, 100.00f, 155.56f, 220.00f, 311.13f, 440.00f, 622.25f,
260 880.00f, 1244.51f, 1760.00f, 2489.02f, 3519.95, 4978.04f, 9956.08f,
261 19912.16f
263 } else {
264 static immutable float[Bands] bandfrqs = [ 60.00f, 170.00f, 310.00f, 600.00f, 1000.00f, 3000.00f, 6000.00f, 12000.00f, 14000.00f, 16000.00f ];
267 private:
268 static void zalloc(T) (ref T* res, uint count) nothrow @trusted @nogc {
269 import core.stdc.stdlib : malloc;
270 import core.stdc.string : memcpy, memset;
271 if (count == 0 || count > 1024*1024*8) assert(0, "wtf?!");
272 res = cast(T*)malloc(T.sizeof*count);
273 if (res is null) assert(0, "out of memory");
274 memset(res, 0, T.sizeof*count);
277 static void xfree(T) (ref T* ptr) nothrow @trusted @nogc {
278 import core.stdc.stdlib : free;
279 if (ptr !is null) { free(ptr); ptr = null; }
284 // ////////////////////////////////////////////////////////////////////////// //
285 // kissfft
286 version(FFTW3) {} else {
288 * Copyright (c) 2003-2010, Mark Borgerding
290 * All rights reserved.
292 * Redistribution and use in source and binary forms, with or without
293 * modification, are permitted provided that the following conditions are
294 * met:
296 * * Redistributions of source code must retain the above copyright
297 * notice, this list of conditions and the following disclaimer.
299 * * Redistributions in binary form must reproduce the above copyright
300 * notice, this list of conditions and the following disclaimer in the
301 * documentation and/or other materials provided with the
302 * distribution.
304 * * Neither the author nor the names of any contributors may be used to
305 * endorse or promote products derived from this software without
306 * specific prior written permission.
308 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
309 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
310 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
311 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
312 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
313 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
314 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
315 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
316 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
317 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
318 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
320 //module kissfft /*is aliced*/;
321 private:
322 nothrow @trusted @nogc:
324 // ////////////////////////////////////////////////////////////////////////// //
326 public alias kiss_fft_scalar = float;
330 public align(1) struct kiss_fft_cpx {
331 align(1):
332 kiss_fft_scalar r;
333 kiss_fft_scalar i;
335 pure nothrow @safe @nogc:
336 // C_MULBYSCALAR
337 void opOpAssign(string op:"*") (in kiss_fft_scalar s) {
338 pragma(inline, true);
339 r *= s;
340 i *= s;
343 // C_ADDTO, C_SUBFROM
344 void opOpAssign(string op) (in kiss_fft_cpx b) if (op == "+" || op == "-") {
345 pragma(inline, true);
346 mixin("r"~op~"=b.r;");
347 mixin("i"~op~"=b.i;");
350 // C_MUL
351 kiss_fft_cpx opBinary(string op:"*") (in auto ref kiss_fft_cpx b) const {
352 pragma(inline, true);
353 return kiss_fft_cpx(r*b.r-i*b.i, r*b.i+i*b.r);
356 // C_ADD, C_SUB
357 kiss_fft_cpx opBinary(string op) (in auto ref kiss_fft_cpx b) const if (op == "+" || op == "-") {
358 pragma(inline, true);
359 mixin("return kiss_fft_cpx(r"~op~"b.r, i"~op~"b.i);");
365 public alias kiss_fft_cfg = kiss_fft_state*;
368 // ////////////////////////////////////////////////////////////////////////// //
369 enum MAXFACTORS = 32;
370 /* e.g. an fft of length 128 has 4 factors
371 as far as kissfft is concerned
372 4*4*4*2
375 struct kiss_fft_state {
376 int nfft;
377 bool inverse;
378 int[2*MAXFACTORS] factors;
379 kiss_fft_cpx[1] twiddles;
383 // ////////////////////////////////////////////////////////////////////////// //
384 private void kf_bfly2 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m) {
385 kiss_fft_cpx* Fout2;
386 const(kiss_fft_cpx)* tw1 = st.twiddles.ptr;
387 kiss_fft_cpx t;
388 Fout2 = Fout+m;
389 do {
390 t = (*Fout2)*(*tw1);
391 tw1 += fstride;
392 (*Fout2) = (*Fout)-t;
393 (*Fout) += t;
394 ++Fout2;
395 ++Fout;
396 } while (--m);
400 private void kf_bfly4 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, in usize m) {
401 const(kiss_fft_cpx)* tw1, tw2, tw3;
402 kiss_fft_cpx[6] scratch = void;
403 usize k = m;
404 immutable usize m2 = 2*m;
405 immutable usize m3 = 3*m;
406 tw3 = tw2 = tw1 = st.twiddles.ptr;
407 do {
408 scratch.ptr[0] = Fout[m]*(*tw1);
409 scratch.ptr[1] = Fout[m2]*(*tw2);
410 scratch.ptr[2] = Fout[m3]*(*tw3);
412 scratch.ptr[5] = (*Fout)-scratch.ptr[1];
413 (*Fout) += scratch.ptr[1];
414 scratch.ptr[3] = scratch.ptr[0]+scratch.ptr[2];
415 scratch.ptr[4] = scratch.ptr[0]-scratch.ptr[2];
416 Fout[m2] = (*Fout)-scratch.ptr[3];
417 tw1 += fstride;
418 tw2 += fstride*2;
419 tw3 += fstride*3;
420 (*Fout) += scratch.ptr[3];
422 if (st.inverse) {
423 Fout[m].r = scratch.ptr[5].r-scratch.ptr[4].i;
424 Fout[m].i = scratch.ptr[5].i+scratch.ptr[4].r;
425 Fout[m3].r = scratch.ptr[5].r+scratch.ptr[4].i;
426 Fout[m3].i = scratch.ptr[5].i-scratch.ptr[4].r;
427 } else {
428 Fout[m].r = scratch.ptr[5].r+scratch.ptr[4].i;
429 Fout[m].i = scratch.ptr[5].i-scratch.ptr[4].r;
430 Fout[m3].r = scratch.ptr[5].r-scratch.ptr[4].i;
431 Fout[m3].i = scratch.ptr[5].i+scratch.ptr[4].r;
433 ++Fout;
434 } while (--k);
438 private void kf_bfly3 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, usize m) {
439 usize k = m;
440 immutable usize m2 = 2*m;
441 const(kiss_fft_cpx)* tw1, tw2;
442 kiss_fft_cpx[5] scratch = void;
443 kiss_fft_cpx epi3;
444 epi3 = st.twiddles.ptr[fstride*m];
445 tw1 = tw2 = st.twiddles.ptr;
446 do {
447 scratch.ptr[1] = Fout[m]*(*tw1);
448 scratch.ptr[2] = Fout[m2]*(*tw2);
450 scratch.ptr[3] = scratch.ptr[1]+scratch.ptr[2];
451 scratch.ptr[0] = scratch.ptr[1]-scratch.ptr[2];
452 tw1 += fstride;
453 tw2 += fstride*2;
455 Fout[m].r = Fout.r-(scratch.ptr[3].r*cast(kiss_fft_scalar)0.5);
456 Fout[m].i = Fout.i-(scratch.ptr[3].i*cast(kiss_fft_scalar)0.5);
458 scratch.ptr[0] *= epi3.i;
460 (*Fout) += scratch.ptr[3];
462 Fout[m2].r = Fout[m].r+scratch.ptr[0].i;
463 Fout[m2].i = Fout[m].i-scratch.ptr[0].r;
465 Fout[m].r -= scratch.ptr[0].i;
466 Fout[m].i += scratch.ptr[0].r;
468 ++Fout;
469 } while (--k);
473 private void kf_bfly5 (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m) {
474 kiss_fft_cpx* Fout0, Fout1, Fout2, Fout3, Fout4;
475 int u;
476 kiss_fft_cpx[13] scratch = void;
477 const(kiss_fft_cpx)* twiddles = st.twiddles.ptr;
478 const(kiss_fft_cpx)* tw;
479 kiss_fft_cpx ya, yb;
480 ya = twiddles[fstride*m];
481 yb = twiddles[fstride*2*m];
483 Fout0 = Fout;
484 Fout1 = Fout0+m;
485 Fout2 = Fout0+2*m;
486 Fout3 = Fout0+3*m;
487 Fout4 = Fout0+4*m;
489 tw = st.twiddles.ptr;
490 for (u = 0; u < m; ++u) {
491 scratch.ptr[0] = *Fout0;
493 scratch.ptr[1] = (*Fout1)*tw[u*fstride];
494 scratch.ptr[2] = (*Fout2)*tw[2*u*fstride];
495 scratch.ptr[3] = (*Fout3)*tw[3*u*fstride];
496 scratch.ptr[4] = (*Fout4)*tw[4*u*fstride];
498 scratch.ptr[7] = scratch.ptr[1]+scratch.ptr[4];
499 scratch.ptr[10] = scratch.ptr[1]-scratch.ptr[4];
500 scratch.ptr[8] = scratch.ptr[2]+scratch.ptr[3];
501 scratch.ptr[9] = scratch.ptr[2]-scratch.ptr[3];
503 Fout0.r += scratch.ptr[7].r+scratch.ptr[8].r;
504 Fout0.i += scratch.ptr[7].i+scratch.ptr[8].i;
506 scratch.ptr[5].r = scratch.ptr[0].r+scratch.ptr[7].r*ya.r+scratch.ptr[8].r*yb.r;
507 scratch.ptr[5].i = scratch.ptr[0].i+scratch.ptr[7].i*ya.r+scratch.ptr[8].i*yb.r;
509 scratch.ptr[6].r = scratch.ptr[10].i*ya.i+scratch.ptr[9].i*yb.i;
510 scratch.ptr[6].i = -scratch.ptr[10].r*ya.i-scratch.ptr[9].r*yb.i;
512 (*Fout1) = scratch.ptr[5]-scratch.ptr[6];
513 (*Fout4) = scratch.ptr[5]+scratch.ptr[6];
515 scratch.ptr[11].r = scratch.ptr[0].r+scratch.ptr[7].r*yb.r+scratch.ptr[8].r*ya.r;
516 scratch.ptr[11].i = scratch.ptr[0].i+scratch.ptr[7].i*yb.r+scratch.ptr[8].i*ya.r;
517 scratch.ptr[12].r = -scratch.ptr[10].i*yb.i+scratch.ptr[9].i*ya.i;
518 scratch.ptr[12].i = scratch.ptr[10].r*yb.i-scratch.ptr[9].r*ya.i;
520 (*Fout2) = scratch.ptr[11]+scratch.ptr[12];
521 (*Fout3) = scratch.ptr[11]-scratch.ptr[12];
523 ++Fout0;
524 ++Fout1;
525 ++Fout2;
526 ++Fout3;
527 ++Fout4;
532 // perform the butterfly for one stage of a mixed radix FFT
533 private void kf_bfly_generic (kiss_fft_cpx* Fout, in usize fstride, const(kiss_fft_cfg) st, int m, int p) {
534 import core.stdc.stdlib : alloca;
535 int u, k, q1, q;
536 const(kiss_fft_cpx)* twiddles = st.twiddles.ptr;
537 kiss_fft_cpx t;
538 int Norig = st.nfft;
540 //kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*p);
541 kiss_fft_cpx* scratch = cast(kiss_fft_cpx*)alloca(kiss_fft_cpx.sizeof*p);
543 for (u = 0; u < m; ++u) {
544 k = u;
545 for (q1 = 0; q1 < p; ++q1) {
546 scratch[q1] = Fout[k];
547 k += m;
550 k = u;
551 for (q1 = 0; q1 < p; ++q1) {
552 int twidx = 0;
553 Fout[k] = scratch[0];
554 for (q = 1; q < p; ++q) {
555 twidx += fstride*k;
556 if (twidx >= Norig) twidx -= Norig;
557 t = scratch[q]*twiddles[twidx];
558 Fout[k] += t;
560 k += m;
563 //KISS_FFT_TMP_FREE(scratch);
567 private void kf_work (kiss_fft_cpx* Fout, const(kiss_fft_cpx)* f, in usize fstride, int in_stride, int* factors, const(kiss_fft_cfg) st) {
568 kiss_fft_cpx* Fout_beg = Fout;
569 immutable int p = *factors++; // the radix
570 immutable int m = *factors++; // stage's fft length/p
571 const(kiss_fft_cpx)* Fout_end = Fout+p*m;
573 if (m == 1) {
574 do {
575 *Fout = *f;
576 f += fstride*in_stride;
577 } while (++Fout != Fout_end);
578 } else {
579 do {
580 // recursive call:
581 // DFT of size m*p performed by doing
582 // p instances of smaller DFTs of size m,
583 // each one takes a decimated version of the input
584 kf_work(Fout, f, fstride*p, in_stride, factors, st);
585 f += fstride*in_stride;
586 } while ((Fout += m) != Fout_end);
589 Fout = Fout_beg;
591 // recombine the p smaller DFTs
592 switch (p) {
593 case 2: kf_bfly2(Fout, fstride, st, m); break;
594 case 3: kf_bfly3(Fout, fstride, st, m); break;
595 case 4: kf_bfly4(Fout, fstride, st, m); break;
596 case 5: kf_bfly5(Fout, fstride, st, m); break;
597 default: kf_bfly_generic(Fout, fstride, st, m, p); break;
602 /* facbuf is populated by p1, m1, p2, m2, ...
603 * where
604 * p[i]*m[i] = m[i-1]
605 * m0 = n
607 private void kf_factor (int n, int* facbuf) {
608 import std.math : floor, sqrt;
609 immutable double floor_sqrt = floor(sqrt(cast(double)n));
610 int p = 4;
611 // factor out powers of 4, powers of 2, then any remaining primes
612 do {
613 while (n%p) {
614 switch (p) {
615 case 4: p = 2; break;
616 case 2: p = 3; break;
617 default: p += 2; break;
619 if (p > floor_sqrt) p = n; // no more factors, skip to end
621 n /= p;
622 *facbuf++ = p;
623 *facbuf++ = n;
624 } while (n > 1);
628 /** Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
630 * typical usage: `kiss_fft_cfg mycfg = kiss_fft_alloc(1024, 0, null, null);`
632 * The return value from fft_alloc is a cfg buffer used internally by the fft routine or `null`.
634 * If lenmem is `null`, then kiss_fft_alloc will allocate a cfg buffer using malloc.
635 * The returned value should be `kiss_fft_free()`d when done to avoid memory leaks.
637 * The state can be placed in a user supplied buffer `mem`:
638 * If lenmem is not `null` and `mem` is not `null` and `*lenmem` is large enough,
639 * then the function places the cfg in `mem` and the size used in `*lenmem`,
640 * and returns mem.
642 * If lenmem is not `null` and (`mem` is `null` or `*lenmem` is not large enough),
643 * then the function returns `null` and places the minimum cfg buffer size in `*lenmem`.
645 public kiss_fft_cfg kiss_fft_alloc (int nfft, bool inverse_fft, void* mem=null, usize* lenmem=null) {
646 kiss_fft_cfg st = null;
647 usize memneeded = kiss_fft_state.sizeof+kiss_fft_cpx.sizeof*(nfft-1); // twiddle factors
648 if (lenmem is null) {
649 import core.stdc.stdlib : malloc;
650 st = cast(kiss_fft_cfg)malloc(memneeded);
651 } else {
652 if (mem !is null && *lenmem >= memneeded) st = cast(kiss_fft_cfg)mem;
653 *lenmem = memneeded;
655 if (st !is null) {
656 st.nfft = nfft;
657 st.inverse = inverse_fft;
658 for (int i = 0; i < nfft; ++i) {
659 import std.math : cos, sin, PI;
660 double phase = -2*PI*i/nfft;
661 if (st.inverse) phase *= -1;
662 st.twiddles.ptr[i].r = cos(phase);
663 st.twiddles.ptr[i].i = sin(phase);
666 kf_factor(nfft, st.factors.ptr);
668 return st;
672 /** If kiss_fft_alloc allocated a buffer, it is one contiguous
673 * buffer and can be simply free()d when no longer needed
675 public void kiss_fft_free(T) (ref T* p) {
676 if (p !is null) {
677 import core.stdc.stdlib : free;
678 free(p);
679 p = null;
684 /** Perform an FFT on a complex input buffer.
686 * for a forward FFT,
687 * fin should be f[0] , f[1] , ... ,f[nfft-1]
688 * fout will be F[0] , F[1] , ... ,F[nfft-1]
689 * Note that each element is complex and can be accessed like f[k].r and f[k].i
691 public void kiss_fft (kiss_fft_cfg cfg, const(kiss_fft_cpx)* fin, kiss_fft_cpx* fout) {
692 kiss_fft_stride(cfg, fin, fout, 1);
696 /** A more generic version of the above function. It reads its input from every Nth sample. */
697 public void kiss_fft_stride (kiss_fft_cfg st, const(kiss_fft_cpx)* fin, kiss_fft_cpx* fout, int in_stride) {
698 import core.stdc.stdlib : alloca;
699 if (fin is fout) {
700 import core.stdc.string : memcpy;
701 //NOTE: this is not really an in-place FFT algorithm.
702 //It just performs an out-of-place FFT into a temp buffer
703 //kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(kiss_fft_cpx.sizeof*st.nfft);
704 kiss_fft_cpx* tmpbuf = cast(kiss_fft_cpx*)alloca(kiss_fft_cpx.sizeof*st.nfft);
705 kf_work(tmpbuf, fin, 1, in_stride, st.factors.ptr, st);
706 memcpy(fout, tmpbuf, kiss_fft_cpx.sizeof*st.nfft);
707 //KISS_FFT_TMP_FREE(tmpbuf);
708 } else {
709 kf_work(fout, fin, 1, in_stride, st.factors.ptr, st);
714 /** Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) */
715 public int kiss_fft_next_fast_size (int n) {
716 for (;;) {
717 int m = n;
718 while ((m%2) == 0) m /= 2;
719 while ((m%3) == 0) m /= 3;
720 while ((m%5) == 0) m /= 5;
721 if (m <= 1) break; // n is completely factorable by twos, threes, and fives
722 ++n;
724 return n;
728 /** for real ffts, we need an even size */
729 public int kiss_fftr_next_fast_size_real (int n) {
730 return kiss_fft_next_fast_size((n+1)>>1)<<1;
734 // ////////////////////////////////////////////////////////////////////////// //
735 // kissfftr
736 // Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
737 public alias kiss_fftr_cfg = kiss_fftr_state*;
739 struct kiss_fftr_state {
740 kiss_fft_cfg substate;
741 kiss_fft_cpx* tmpbuf;
742 kiss_fft_cpx* super_twiddles;
747 ** nfft must be even
749 * If you don't care to allocate space, use mem = lenmem = null
751 public kiss_fftr_cfg kiss_fftr_alloc (int nfft, bool inverse_fft, void* mem=null, usize* lenmem=null) {
752 kiss_fftr_cfg st = null;
753 usize subsize, memneeded;
755 if (nfft&1) assert(0, "real FFT optimization must be even");
756 nfft >>= 1;
758 kiss_fft_alloc(nfft, inverse_fft, null, &subsize);
759 memneeded = kiss_fftr_state.sizeof+subsize+kiss_fft_cpx.sizeof*(nfft*3/2);
761 if (lenmem is null) {
762 import core.stdc.stdlib : malloc;
763 st = cast(kiss_fftr_cfg)malloc(memneeded);
764 } else {
765 if (*lenmem >= memneeded) st = cast(kiss_fftr_cfg)mem;
766 *lenmem = memneeded;
768 if (st is null) return null;
770 st.substate = cast(kiss_fft_cfg)(st+1); // just beyond kiss_fftr_state struct
771 st.tmpbuf = cast(kiss_fft_cpx*)((cast(ubyte*)st.substate)+subsize);
772 st.super_twiddles = st.tmpbuf+nfft;
773 kiss_fft_alloc(nfft, inverse_fft, st.substate, &subsize);
775 foreach (immutable i; 0..nfft/2) {
776 import std.math : cos, sin, PI;
777 double phase = -PI*(cast(double)(i+1)/nfft+0.5);
778 if (inverse_fft) phase *= -1;
779 st.super_twiddles[i].r = cos(phase);
780 st.super_twiddles[i].i = sin(phase);
783 return st;
787 /** input timedata has nfft scalar points
788 * output freqdata has nfft/2+1 complex points
790 public void kiss_fftr (kiss_fftr_cfg st, const(kiss_fft_scalar)* timedata, kiss_fft_cpx* freqdata) {
791 // input buffer timedata is stored row-wise
792 int k, ncfft;
793 kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
795 if (st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
797 ncfft = st.substate.nfft;
799 // perform the parallel fft of two real signals packed in real,imag
800 kiss_fft(st.substate, cast(const(kiss_fft_cpx)*)timedata, st.tmpbuf);
801 /* The real part of the DC element of the frequency spectrum in st->tmpbuf
802 * contains the sum of the even-numbered elements of the input time sequence
803 * The imag part is the sum of the odd-numbered elements
805 * The sum of tdc.r and tdc.i is the sum of the input time sequence.
806 * yielding DC of input time sequence
807 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
808 * yielding Nyquist bin of input time sequence
811 tdc.r = st.tmpbuf[0].r;
812 tdc.i = st.tmpbuf[0].i;
813 freqdata[0].r = tdc.r+tdc.i;
814 freqdata[ncfft].r = tdc.r-tdc.i;
815 freqdata[ncfft].i = freqdata[0].i = 0;
817 for (k = 1; k <= ncfft/2; ++k) {
818 fpk = st.tmpbuf[k];
819 fpnk.r = st.tmpbuf[ncfft-k].r;
820 fpnk.i = -st.tmpbuf[ncfft-k].i;
822 f1k = fpk+fpnk;
823 f2k = fpk-fpnk;
824 tw = f2k*st.super_twiddles[k-1];
826 freqdata[k].r = (f1k.r+tw.r)*cast(kiss_fft_scalar)0.5;
827 freqdata[k].i = (f1k.i+tw.i)*cast(kiss_fft_scalar)0.5;
828 freqdata[ncfft-k].r = (f1k.r-tw.r)*cast(kiss_fft_scalar)0.5;
829 freqdata[ncfft-k].i = (tw.i-f1k.i)*cast(kiss_fft_scalar)0.5;
834 /** input freqdata has nfft/2+1 complex points
835 * output timedata has nfft scalar points
837 public void kiss_fftri (kiss_fftr_cfg st, const(kiss_fft_cpx)* freqdata, kiss_fft_scalar* timedata) {
838 // input buffer timedata is stored row-wise
839 int k, ncfft;
841 if (!st.substate.inverse) assert(0, "kiss fft usage error: improper alloc");
843 ncfft = st.substate.nfft;
845 st.tmpbuf[0].r = freqdata[0].r+freqdata[ncfft].r;
846 st.tmpbuf[0].i = freqdata[0].r-freqdata[ncfft].r;
848 for (k = 1; k <= ncfft/2; ++k) {
849 kiss_fft_cpx fk, fnkc, fek, fok, tmp;
850 fk = freqdata[k];
851 fnkc.r = freqdata[ncfft-k].r;
852 fnkc.i = -freqdata[ncfft-k].i;
854 fek = fk+fnkc;
855 tmp = fk-fnkc;
856 fok = tmp*st.super_twiddles[k-1];
857 st.tmpbuf[k] = fek+fok;
858 st.tmpbuf[ncfft-k] = fek-fok;
859 st.tmpbuf[ncfft-k].i *= -1;
861 kiss_fft(st.substate, st.tmpbuf, cast(kiss_fft_cpx*)timedata);
863 } // version, kissfft