Merge remote-tracking branch 'cockos/main' into next
[wdl/wdl-tale.git] / WDL / convoengine.cpp
blob793f9067e4c872e4692213c1ff3f508d249793ae
1 /*
2 WDL - convoengine.cpp
3 Copyright (C) 2006 and later Cockos Incorporated
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
24 #include <math.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <memory.h>
28 #include "convoengine.h"
30 #include "denormal.h"
32 //#define TIMING
33 #ifdef TIMING
34 #include "timing.c"
35 #endif
37 #define CONVOENGINE_SILENCE_THRESH 1.0e-12 // -240dB
38 #define CONVOENGINE_IMPULSE_SILENCE_THRESH 1.0e-15 // -300dB
40 #if !defined(WDL_CONVO_SSE) && !defined(WDL_CONVO_SSE3)
41 #if defined(__SSE3__)
42 #define WDL_CONVO_SSE3
43 #elif defined(__SSE__) || _M_IX86_FP >= 1 || defined(_M_X64) || defined(_M_AMD64)
44 #define WDL_CONVO_SSE
45 #endif
46 #endif
48 #if WDL_FFT_REALSIZE != 4 || defined(WDL_CONVO_WANT_FULLPRECISION_IMPULSE_STORAGE)
49 #undef WDL_CONVO_SSE
50 #undef WDL_CONVO_SSE3
51 #endif
53 #ifdef WDL_CONVO_SSE // define for SSE optimised CplxMul
54 #include <xmmintrin.h>
55 #endif
57 #ifdef WDL_CONVO_SSE3 // define for SSE3 optimised CplxMul
58 #include <pmmintrin.h>
59 #endif
61 #if defined(WDL_CONVO_SSE) || defined(WDL_CONVO_SSE3)
62 #define WDL_CONVO_ALIGN 16
63 #define WDL_CONVO_ALIGN_IMPULSEBUFf (WDL_CONVO_ALIGN / sizeof(WDL_CONVO_IMPULSEBUFf))
64 #define WDL_CONVO_ALIGN_FFT_REAL (WDL_CONVO_ALIGN / sizeof(WDL_FFT_REAL))
65 #define WDL_CONVO_GETALIGNED() GetAligned(WDL_CONVO_ALIGN)
66 #else
67 #define WDL_CONVO_ALIGN 1
68 #define WDL_CONVO_ALIGN_IMPULSEBUFf 0
69 #define WDL_CONVO_ALIGN_FFT_REAL 0
70 #define WDL_CONVO_GETALIGNED() Get()
71 #endif
73 #if !defined(WDL_CONVO_SSE) && !defined(WDL_CONVO_SSE3)
74 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
76 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
77 if (n<2 || (n&1)) return;
79 do {
80 t1 = a[0].re * b[0].re;
81 t2 = a[0].im * b[0].im;
82 t3 = a[0].im * b[0].re;
83 t4 = a[0].re * b[0].im;
84 t5 = a[1].re * b[1].re;
85 t6 = a[1].im * b[1].im;
86 t7 = a[1].im * b[1].re;
87 t8 = a[1].re * b[1].im;
88 t1 -= t2;
89 t3 += t4;
90 t5 -= t6;
91 t7 += t8;
92 c[0].re = t1;
93 c[1].re = t5;
94 c[0].im = t3;
95 c[1].im = t7;
96 a += 2;
97 b += 2;
98 c += 2;
99 } while (n -= 2);
101 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
103 WDL_FFT_REAL t1, t2, t3, t4, t5, t6, t7, t8;
104 if (n<2 || (n&1)) return;
106 do {
107 t1 = a[0].re * b[0].re;
108 t2 = a[0].im * b[0].im;
109 t3 = a[0].im * b[0].re;
110 t4 = a[0].re * b[0].im;
111 t5 = a[1].re * b[1].re;
112 t6 = a[1].im * b[1].im;
113 t7 = a[1].im * b[1].re;
114 t8 = a[1].re * b[1].im;
115 t1 -= t2;
116 t3 += t4;
117 t5 -= t6;
118 t7 += t8;
119 c[0].re += t1;
120 c[1].re += t5;
121 c[0].im += t3;
122 c[1].im += t7;
123 a += 2;
124 b += 2;
125 c += 2;
126 } while (n -= 2);
129 #elif defined(WDL_CONVO_SSE3)
130 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
132 __m128 xmm0, xmm1, xmm2;
133 if (n<2 || (n&1)) return;
135 do {
136 xmm2 = _mm_load_ps((const float*)b);
137 xmm1 = _mm_moveldup_ps(_mm_load_ps((const float*)a));
138 xmm0 = _mm_movehdup_ps(_mm_load_ps((const float*)a));
139 xmm1 = _mm_mul_ps(xmm1, xmm2);
140 xmm2 = _mm_shuffle_ps(xmm2, xmm2, 0xB1);
141 xmm2 = _mm_mul_ps(xmm2, xmm0);
142 xmm1 = _mm_addsub_ps(xmm1, xmm2);
143 _mm_store_ps((float*)c, xmm1);
144 a += 2;
145 b += 2;
146 c += 2;
147 } while (n -= 2);
149 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
151 __m128 xmm0, xmm1, xmm2;
152 if (n<2 || (n&1)) return;
154 do {
155 xmm2 = _mm_load_ps((const float*)b);
156 xmm1 = _mm_moveldup_ps(_mm_load_ps((const float*)a));
157 xmm0 = _mm_movehdup_ps(_mm_load_ps((const float*)a));
158 xmm1 = _mm_mul_ps(xmm1, xmm2);
159 xmm2 = _mm_shuffle_ps(xmm2, xmm2, 0xB1);
160 xmm2 = _mm_mul_ps(xmm2, xmm0);
161 xmm1 = _mm_addsub_ps(xmm1, xmm2);
162 xmm1 = _mm_add_ps(xmm1, _mm_load_ps((const float*)c));
163 _mm_store_ps((float*)c, xmm1);
164 a += 2;
165 b += 2;
166 c += 2;
167 } while (n -= 2);
170 #elif defined(WDL_CONVO_SSE)
171 static void WDL_CONVO_CplxMul2(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
173 __m128 xmm0, xmm1, xmm2, xmm3, xmm4;
174 if (n<2 || (n&1)) return;
176 xmm4 = _mm_set_ps(1, -1, 1, -1);
177 do {
178 xmm3 = _mm_load_ps((const float*)a);
179 xmm2 = _mm_load_ps((const float*)b);
180 xmm0 = xmm3;
181 xmm1 = xmm2;
182 xmm0 = _mm_shuffle_ps(xmm0, xmm3, 0xB1);
183 xmm1 = _mm_shuffle_ps(xmm1, xmm2, 0xF5);
184 xmm1 = _mm_mul_ps(xmm1, xmm0);
185 xmm2 = _mm_shuffle_ps(xmm2, xmm2, 0xA0);
186 xmm2 = _mm_mul_ps(xmm2, xmm3);
187 xmm1 = _mm_mul_ps(xmm1, xmm4);
188 xmm1 = _mm_add_ps(xmm1, xmm2);
189 _mm_store_ps((float*)c, xmm1);
190 a += 2;
191 b += 2;
192 c += 2;
193 } while (n -= 2);
195 static void WDL_CONVO_CplxMul3(WDL_FFT_COMPLEX *c, const WDL_FFT_COMPLEX *a, const WDL_CONVO_IMPULSEBUFCPLXf *b, int n)
197 __m128 xmm0, xmm1, xmm2, xmm3, xmm4;
198 if (n<2 || (n&1)) return;
200 xmm4 = _mm_set_ps(1, -1, 1, -1);
201 do {
202 xmm3 = _mm_load_ps((const float*)a);
203 xmm2 = _mm_load_ps((const float*)b);
204 xmm0 = xmm3;
205 xmm1 = xmm2;
206 xmm0 = _mm_shuffle_ps(xmm0, xmm3, 0xB1);
207 xmm1 = _mm_shuffle_ps(xmm1, xmm2, 0xF5);
208 xmm1 = _mm_mul_ps(xmm1, xmm0);
209 xmm2 = _mm_shuffle_ps(xmm2, xmm2, 0xA0);
210 xmm2 = _mm_mul_ps(xmm2, xmm3);
211 xmm1 = _mm_mul_ps(xmm1, xmm4);
212 xmm1 = _mm_add_ps(xmm1, xmm2);
213 xmm1 = _mm_add_ps(xmm1, _mm_load_ps((const float*)c));
214 _mm_store_ps((float*)c, xmm1);
215 a += 2;
216 b += 2;
217 c += 2;
218 } while (n -= 2);
220 #endif // WDL_CONVO_SSE
222 static bool CompareQueueToBuf(WDL_FastQueue *q, const void *data, int len)
224 int offs=0;
225 while (len>0)
227 void *td=NULL;
228 int sz=q->GetPtr(offs,&td);
229 if (sz<1) return true; // not enough data = not equal!
230 if (sz>len) sz=len;
232 int i=sz/sizeof(WDL_FFT_REAL);
233 WDL_FFT_REAL *a1=(WDL_FFT_REAL*)td;
234 WDL_FFT_REAL *b1=(WDL_FFT_REAL*)data;
235 while (i--)
237 if (fabs(*a1-*b1)>CONVOENGINE_SILENCE_THRESH) return true;
238 a1++;
239 b1++;
242 data = ((char *)data)+sz;
243 offs+=sz;
244 len-=sz;
246 return false;
250 WDL_ConvolutionEngine::WDL_ConvolutionEngine()
252 WDL_fft_init();
253 m_fft_size=0;
254 m_impdata.Add(new ImpChannelInfo);
255 m_impulse_len=0;
256 m_proc_nch=0;
259 WDL_ConvolutionEngine::~WDL_ConvolutionEngine()
261 m_impdata.Empty(true);
262 m_proc.Empty(true);
265 int WDL_ConvolutionEngine::SetImpulse(WDL_ImpulseBuffer *impulse, int fft_size, int impulse_sample_offset, int max_imp_size, bool forceBrute)
267 int impulse_len=0;
268 int x;
269 int nch=impulse->GetNumChannels();
270 for (x = 0; x < nch; x ++)
272 int l=impulse->impulses[x].GetSize()-impulse_sample_offset;
273 if (max_imp_size && l>max_imp_size) l=max_imp_size;
274 if (impulse_len < l) impulse_len=l;
277 if (nch>1) // detect mono signals pretending to be multichannel
279 for (x = 1; x < nch; x ++)
281 if (impulse->impulses[x].GetSize()!=impulse->impulses[0].GetSize()||
282 memcmp(impulse->impulses[x].Get(),impulse->impulses[0].Get(),
283 impulse->impulses[0].GetSize()*sizeof(WDL_FFT_REAL)))
284 break;
286 if (x >= nch) nch=1;
289 m_impulse_len=impulse_len;
290 m_proc_nch=-1;
292 while (m_impdata.GetSize() > nch)
293 m_impdata.Delete(m_impdata.GetSize()-1,true);
294 while (m_impdata.GetSize() < nch)
295 m_impdata.Add(new ImpChannelInfo);
297 if (forceBrute)
299 m_fft_size=0;
301 // save impulse
302 for (x = 0; x < m_impdata.GetSize(); x ++)
304 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
305 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
306 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
308 ImpChannelInfo *impdata = m_impdata.Get(x);
309 impdata->imp.Resize(lenout>0 ? lenout+WDL_CONVO_ALIGN_IMPULSEBUFf : 0);
310 WDL_CONVO_IMPULSEBUFf *impout=impdata->imp.WDL_CONVO_GETALIGNED()+lenout;
311 while (lenout-->0) *--impout = (WDL_CONVO_IMPULSEBUFf) *imp++;
314 for (x = 0; x < m_proc.GetSize(); x ++)
316 ProcChannelInfo *inf = m_proc.Get(x);
317 inf->samplesin.Clear();
318 inf->samplesin2.Clear();
319 inf->samplesout.Clear();
322 return 0;
326 if (fft_size<=0)
328 int msz=fft_size<=-16? -fft_size*2 : 32768;
330 fft_size=32;
331 while (fft_size < impulse_len*2 && fft_size < msz) fft_size*=2;
334 m_fft_size=fft_size;
336 int impchunksize=fft_size/2;
337 int nblocks=(impulse_len+impchunksize-1)/impchunksize;
338 //wdl_log("il=%d, ffts=%d, cs=%d, nb=%d\n",impulse_len,fft_size,impchunksize,nblocks);
340 const bool smallerSizeMode=sizeof(WDL_CONVO_IMPULSEBUFf)!=sizeof(WDL_FFT_REAL);
342 WDL_FFT_REAL scale=(WDL_FFT_REAL) (1.0/fft_size);
343 for (x = 0; x < m_impdata.GetSize(); x ++)
345 WDL_FFT_REAL *imp=impulse->impulses[x].Get()+impulse_sample_offset;
347 WDL_FFT_REAL *imp2=x < m_impdata.GetSize()-1 ? impulse->impulses[x+1].Get()+impulse_sample_offset : NULL;
349 int nb=nblocks+!!smallerSizeMode;
350 ImpChannelInfo *impdata = m_impdata.Get(x);
351 impdata->imp.Resize(nb>0 ? nb*fft_size*2+WDL_CONVO_ALIGN_IMPULSEBUFf : 0);
352 WDL_CONVO_IMPULSEBUFf *impout=impdata->imp.WDL_CONVO_GETALIGNED();
353 char *zbuf=m_impdata.Get(x)->zflag.Resize(nblocks);
354 int lenout=impulse->impulses[x].GetSize()-impulse_sample_offset;
355 if (max_imp_size && lenout>max_imp_size) lenout=max_imp_size;
357 int bl;
358 for (bl = 0; bl < nblocks; bl ++)
361 int thissz=lenout;
362 if (thissz > impchunksize) thissz=impchunksize;
364 lenout -= thissz;
365 int i=0;
366 WDL_FFT_REAL mv=0.0;
367 WDL_FFT_REAL mv2=0.0;
368 WDL_FFT_REAL *imptmp = (WDL_FFT_REAL *)impout; //-V615
370 for (; i < thissz; i ++)
372 WDL_FFT_REAL v=*imp++;
373 WDL_FFT_REAL v2=(WDL_FFT_REAL)fabs(v);
374 if (v2 > mv) mv=v2;
376 imptmp[i*2]=denormal_filter_aggressive(v * scale);
378 if (imp2)
380 v=*imp2++;
381 v2=(WDL_FFT_REAL)fabs(v);
382 if (v2>mv2) mv2=v2;
383 imptmp[i*2+1]=denormal_filter_aggressive(v*scale);
385 else imptmp[i*2+1]=0.0;
387 for (; i < fft_size; i ++)
389 imptmp[i*2]=0.0;
390 imptmp[i*2+1]=0.0;
392 if (mv>CONVOENGINE_IMPULSE_SILENCE_THRESH||mv2>CONVOENGINE_IMPULSE_SILENCE_THRESH)
394 *zbuf++=mv>CONVOENGINE_IMPULSE_SILENCE_THRESH ? 2 : 1; // 1 means only second channel has content
395 WDL_fft((WDL_FFT_COMPLEX*)impout,fft_size,0);
397 if (smallerSizeMode)
399 int x,n=fft_size*2;
400 for(x=0;x<n;x++) impout[x]=(WDL_CONVO_IMPULSEBUFf)imptmp[x];
403 else *zbuf++=0;
405 impout+=fft_size*2;
408 return m_fft_size/2;
412 void WDL_ConvolutionEngine::Reset() // clears out any latent samples
414 for (int x = 0; x < m_proc.GetSize(); x ++)
416 ProcChannelInfo *inf = m_proc.Get(x);
417 inf->samplesin.Clear();
418 inf->samplesin2.Clear();
419 inf->samplesout.Clear();
420 inf->hist_pos = 0;
421 memset(inf->samplehist_zflag.Get(),0,inf->samplehist_zflag.GetSize());
422 memset(inf->samplehist.Get(),0,inf->samplehist.GetSize()*sizeof(WDL_FFT_REAL));
423 memset(inf->overlaphist.Get(),0,inf->overlaphist.GetSize()*sizeof(WDL_FFT_REAL));
427 void WDL_ConvolutionEngine::Add(WDL_FFT_REAL **bufs, int len, int nch)
429 while (m_proc.GetSize() < nch) m_proc.Add(new ProcChannelInfo);
430 while (m_proc.GetSize() > nch) m_proc.Delete(m_proc.GetSize()-1,true);
432 if (m_fft_size<1)
434 m_proc_nch=nch;
436 for (int ch = 0; ch < nch; ch ++)
438 int wch = ch % m_impdata.GetSize();
439 ImpChannelInfo *impdata = m_impdata.Get(wch);
440 WDL_CONVO_IMPULSEBUFf *imp=impdata->imp.WDL_CONVO_GETALIGNED();
441 int imp_len = impdata->imp.GetSize()-WDL_CONVO_ALIGN_IMPULSEBUFf;
442 ProcChannelInfo *pinf = m_proc.Get(ch);
444 if (imp_len>0)
446 if (pinf->samplesin2.Available()<imp_len*(int)sizeof(WDL_FFT_REAL))
448 int sza=imp_len*sizeof(WDL_FFT_REAL)-pinf->samplesin2.Available();
449 memset(pinf->samplesin2.Add(NULL,sza),0,sza);
451 WDL_FFT_REAL *psrc;
453 if (bufs && bufs[ch])
454 psrc=(WDL_FFT_REAL*)pinf->samplesin2.Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
455 else
457 psrc=(WDL_FFT_REAL*)pinf->samplesin2.Add(NULL,len*sizeof(WDL_FFT_REAL));
458 memset(psrc,0,len*sizeof(WDL_FFT_REAL));
461 WDL_FFT_REAL *pout=(WDL_FFT_REAL*)pinf->samplesout.Add(NULL,len*sizeof(WDL_FFT_REAL));
462 int x;
463 int len1 = len&~1;
464 for (x=0; x < len1 ; x += 2)
466 int i=imp_len;
467 double sum=0.0,sum2=0.0;
468 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
469 WDL_CONVO_IMPULSEBUFf *ip=imp;
470 int j=i/4; i&=3;
471 while (j--) // produce 2 samples, 4 impulse samples at a time
473 double a = ip[0],b=ip[1],aa=ip[2],bb=ip[3];
474 double c = sp[1],d=sp[2],cc=sp[3];
475 sum+=a * sp[0] + b * c + aa * d + bb * cc;
476 sum2+=a * c + b * d + aa * cc + bb * sp[4];
477 ip+=4;
478 sp+=4;
481 while (i--)
483 double a = *ip++;
484 sum+=a * sp[0];
485 sum2+=a * sp[1];
486 sp++;
488 pout[x]=(WDL_FFT_REAL) sum;
489 pout[x+1]=(WDL_FFT_REAL) sum2;
491 for(;x<len;x++) // any odd samples left
493 int i=imp_len;
494 double sum=0.0;
495 WDL_FFT_REAL *sp=psrc+x-imp_len + 1;
496 WDL_CONVO_IMPULSEBUFf *ip=imp;
497 int j=i/4; i&=3;
498 while (j--)
500 sum+=ip[0] * sp[0] + ip[1] * sp[1] + ip[2] * sp[2] + ip[3] * sp[3];
501 ip+=4;
502 sp+=4;
505 while (i--) sum+=*ip++ * *sp++;
506 pout[x]=(WDL_FFT_REAL) sum;
508 pinf->samplesin2.Advance(len*sizeof(WDL_FFT_REAL));
509 pinf->samplesin2.Compact();
511 else
513 if (bufs && bufs[ch]) pinf->samplesout.Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
514 else
516 memset(pinf->samplesout.Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
521 return;
525 int impchunksize=m_fft_size/2;
526 int nblocks=(m_impulse_len+impchunksize-1)/impchunksize;
528 if (m_proc_nch != nch)
530 m_proc_nch=nch;
532 int mso=0;
533 for (int ch = 0; ch < nch; ch ++)
535 ProcChannelInfo *pinf = m_proc.Get(ch);
536 pinf->hist_pos = 0;
537 int so=pinf->samplesin.Available() + pinf->samplesout.Available();
538 if (so>mso) mso=so;
540 if (m_impulse_len<1||!nblocks)
542 if (pinf->samplesin.Available())
544 int s=pinf->samplesin.Available();
545 void *buf=pinf->samplesout.Add(NULL,s);
546 pinf->samplesin.GetToBuf(0,buf,s);
547 pinf->samplesin.Clear();
551 if (so < mso)
553 memset(pinf->samplesout.Add(NULL,mso-so),0,mso-so);
556 const int sz=nblocks*m_fft_size;
558 memset(pinf->samplehist_zflag.Resize(nblocks),0,nblocks);
559 pinf->samplehist.Resize(sz>0 ? sz*2+WDL_CONVO_ALIGN_FFT_REAL : 0);
560 pinf->overlaphist.Resize(m_fft_size/2);
561 memset(pinf->samplehist.Get(),0,pinf->samplehist.GetSize()*sizeof(WDL_FFT_REAL));
562 memset(pinf->overlaphist.Get(),0,pinf->overlaphist.GetSize()*sizeof(WDL_FFT_REAL));
566 if (m_impulse_len<1||!nblocks)
568 for (int ch = 0; ch < nch; ch ++)
570 ProcChannelInfo *pinf = m_proc.Get(ch);
571 if (bufs && bufs[ch])
572 pinf->samplesout.Add(bufs[ch],len*sizeof(WDL_FFT_REAL));
573 else
574 memset(pinf->samplesout.Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
576 // pass through
577 return;
580 for (int ch = 0; ch < nch; ch ++)
582 ProcChannelInfo *pinf = m_proc.Get(ch);
583 if (pinf->samplehist.GetSize()<WDL_CONVO_ALIGN_FFT_REAL || !pinf->overlaphist.GetSize()) continue;
584 pinf->samplesin.Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
588 void WDL_ConvolutionEngine::AddSilenceToOutput(int len)
590 for (int ch = 0; ch < m_proc_nch; ch++)
592 ProcChannelInfo *pinf = m_proc.Get(ch);
593 memset(pinf->samplesout.Add(NULL,len*sizeof(WDL_FFT_REAL)),0,len*sizeof(WDL_FFT_REAL));
597 int WDL_ConvolutionEngine::Avail(int want)
599 if (m_fft_size<1)
601 ProcChannelInfo *pinf = m_proc.Get(0);
602 return pinf ? pinf->samplesout.Available()/sizeof(WDL_FFT_REAL) : 0;
605 const int sz=m_fft_size/2;
606 const int chunksize=m_fft_size/2;
607 const int nblocks=(m_impulse_len+chunksize-1)/chunksize;
608 // clear combining buffer
609 m_combinebuf.Resize(m_fft_size*4+WDL_CONVO_ALIGN_FFT_REAL); // temp space
610 WDL_FFT_REAL *workbuf2 = m_combinebuf.WDL_CONVO_GETALIGNED();
612 int ch;
614 for (ch = 0; ch < m_proc_nch; ch ++)
616 ProcChannelInfo *pinf = m_proc.Get(ch);
617 ProcChannelInfo *pinf2 = ch+1 < m_proc_nch ? m_proc.Get(ch+1) : NULL;
619 if (pinf->samplehist.GetSize()<WDL_CONVO_ALIGN_FFT_REAL || !pinf->overlaphist.GetSize()) continue;
620 int srcc=ch % m_impdata.GetSize();
622 bool allow_mono_input_mode=true;
623 bool mono_impulse_mode=false;
625 if (m_impdata.GetSize()==1 && pinf2 &&
626 pinf2->samplehist.GetSize()>=WDL_CONVO_ALIGN_FFT_REAL &&
627 pinf2->overlaphist.GetSize() &&
628 pinf->samplesin.Available()==pinf2->samplesin.Available() &&
629 pinf->samplesout.Available()==pinf2->samplesout.Available()
631 { // 2x processing mode
632 mono_impulse_mode=true;
633 allow_mono_input_mode=false;
637 const int in_needed=sz;
639 // useSilentList[x] = 1 for mono signal, 2 for stereo, 0 for silent
640 char *useSilentList=pinf->samplehist_zflag.GetSize()==nblocks ? pinf->samplehist_zflag.Get() : NULL;
641 while (pinf->samplesin.Available()/(int)sizeof(WDL_FFT_REAL) >= sz &&
642 pinf->samplesout.Available() < want*(int)sizeof(WDL_FFT_REAL))
644 int histpos;
645 if ((histpos=++pinf->hist_pos) >= nblocks) histpos=pinf->hist_pos=0;
647 // get samples from input, to history
648 WDL_FFT_REAL *optr = pinf->samplehist.WDL_CONVO_GETALIGNED()+histpos*m_fft_size*2;
650 pinf->samplesin.GetToBuf(0,optr+sz,in_needed*sizeof(WDL_FFT_REAL));
651 pinf->samplesin.Advance(in_needed*sizeof(WDL_FFT_REAL));
654 bool mono_input_mode=false;
656 bool nonzflag=false;
657 if (mono_impulse_mode)
659 if (++pinf2->hist_pos >= nblocks) pinf2->hist_pos=0;
660 pinf2->samplesin.GetToBuf(0,workbuf2,sz*sizeof(WDL_FFT_REAL));
661 pinf2->samplesin.Advance(sz*sizeof(WDL_FFT_REAL));
662 int i;
663 for (i = 0; i < sz; i ++) // unpack samples
665 WDL_FFT_REAL f = optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
666 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
667 f=optr[i*2+1]=denormal_filter_aggressive(workbuf2[i]);
668 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
671 else
673 if (allow_mono_input_mode &&
674 pinf2 &&
675 srcc<m_impdata.GetSize()-1 &&
676 !CompareQueueToBuf(&pinf2->samplesin,optr+sz,sz*sizeof(WDL_FFT_REAL))
679 mono_input_mode=true;
681 else
683 allow_mono_input_mode=false;
686 int i;
687 for (i = 0; i < sz; i ++) // unpack samples
689 WDL_FFT_REAL f=optr[i*2]=denormal_filter_aggressive(optr[sz+i]);
690 optr[i*2+1]=0.0;
691 if (!nonzflag && (f<-CONVOENGINE_SILENCE_THRESH || f>CONVOENGINE_SILENCE_THRESH)) nonzflag=true;
695 int i;
696 for (i = 1; mono_input_mode && i < nblocks; i ++) // start @ 1, since hist[histpos] is no longer used for here
698 int srchistpos = histpos-i;
699 if (srchistpos < 0) srchistpos += nblocks;
700 if (!useSilentList || useSilentList[srchistpos]==2) mono_input_mode=false;
703 if (nonzflag||!useSilentList) memset(optr+sz*2,0,sz*2*sizeof(WDL_FFT_REAL));
706 #ifdef WDLCONVO_ZL_ACCOUNTING
707 m_zl_fftcnt++;
708 #endif
710 if (nonzflag) WDL_fft((WDL_FFT_COMPLEX*)optr,m_fft_size,0);
712 if (useSilentList) useSilentList[histpos]=nonzflag ? (mono_input_mode ? 1 : 2) : 0;
714 int mzfl=2;
715 if (mono_input_mode)
717 mzfl=1;
719 pinf2->samplesin.Advance(sz*sizeof(WDL_FFT_REAL));
721 // save a valid copy in sample hist incase we switch from mono to stereo
722 if (++pinf2->hist_pos >= nblocks) pinf2->hist_pos=0;
723 if (pinf2->samplehist_zflag.GetSize()==nblocks)
724 pinf2->samplehist_zflag.Get()[pinf2->hist_pos] = nonzflag ? 1 : 0;
726 WDL_FFT_REAL *optr2 = pinf2->samplehist.WDL_CONVO_GETALIGNED()+pinf2->hist_pos*m_fft_size*2;
727 memcpy(optr2,optr,m_fft_size*2*sizeof(WDL_FFT_REAL));
730 int applycnt=0;
731 char *useImpSilentList=m_impdata.Get(srcc)->zflag.GetSize() == nblocks ? m_impdata.Get(srcc)->zflag.Get() : NULL;
733 WDL_CONVO_IMPULSEBUFf *impulseptr=m_impdata.Get(srcc)->imp.WDL_CONVO_GETALIGNED();
734 for (i = 0; i < nblocks; i ++, impulseptr+=m_fft_size*2)
736 int srchistpos = histpos-i;
737 if (srchistpos < 0) srchistpos += nblocks;
739 if (useImpSilentList && useImpSilentList[i]<mzfl) continue;
740 if (useSilentList && !useSilentList[srchistpos]) continue; // silent block
742 WDL_FFT_REAL *samplehist=pinf->samplehist.WDL_CONVO_GETALIGNED() + m_fft_size*srchistpos*2;
744 if (applycnt++) // add to output
745 WDL_CONVO_CplxMul3((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
746 else // replace output
747 WDL_CONVO_CplxMul2((WDL_FFT_COMPLEX*)workbuf2,(WDL_FFT_COMPLEX*)samplehist,(WDL_CONVO_IMPULSEBUFCPLXf*)impulseptr,m_fft_size);
750 if (!applycnt)
751 memset(workbuf2,0,m_fft_size*2*sizeof(WDL_FFT_REAL));
752 else
753 WDL_fft((WDL_FFT_COMPLEX*)workbuf2,m_fft_size,1);
755 WDL_FFT_REAL *olhist=pinf->overlaphist.Get(); // errors from last time
756 WDL_FFT_REAL *p1=workbuf2,*p3=workbuf2+m_fft_size,*p1o=workbuf2;
758 if (mono_impulse_mode||mono_input_mode)
760 WDL_FFT_REAL *p2o=workbuf2+m_fft_size*2;
761 WDL_FFT_REAL *olhist2=pinf2->overlaphist.Get(); // errors from last time
762 int s=sz/2;
763 while (s--)
765 p2o[0] = p1[1]+olhist2[0];
766 p2o[1] = p1[3]+olhist2[1];
767 p1o[0] = p1[0]+olhist[0];
768 p1o[1] = p1[2]+olhist[1];
769 p1o+=2;
770 p2o+=2;
771 p1+=4;
773 olhist[0]=p3[0];
774 olhist[1]=p3[2];
775 olhist2[0]=p3[1];
776 olhist2[1]=p3[3];
777 p3+=4;
779 olhist+=2;
780 olhist2+=2;
782 // add samples to output
783 pinf->samplesout.Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
784 pinf2->samplesout.Add(workbuf2+m_fft_size*2,sz*sizeof(WDL_FFT_REAL));
786 else
788 int s=sz/2;
789 while (s--)
791 p1o[0] = p1[0]+olhist[0];
792 p1o[1] = p1[2]+olhist[1];
793 p1o+=2;
794 p1+=4;
796 olhist[0]=p3[0];
797 olhist[1]=p3[2];
798 p3+=4;
800 olhist+=2;
802 // add samples to output
803 pinf->samplesout.Add(workbuf2,sz*sizeof(WDL_FFT_REAL));
805 } // while available
807 if (mono_impulse_mode) ch++;
810 int mv = want;
811 for (ch=0;ch<m_proc_nch;ch++)
813 const ProcChannelInfo *pinf = m_proc.Get(ch);
814 int v = pinf ? pinf->samplesout.Available()/sizeof(WDL_FFT_REAL) : 0;
815 if (!ch || v<mv)mv=v;
817 return mv;
820 WDL_FFT_REAL **WDL_ConvolutionEngine::Get()
822 WDL_FFT_REAL **ret = m_get_tmpptrs.ResizeOK(m_proc_nch,false);
823 if (WDL_NORMALLY(ret))
824 for (int x = 0; x < m_proc_nch; x ++) ret[x]=(WDL_FFT_REAL *)m_proc.Get(x)->samplesout.Get();
825 return ret;
828 void WDL_ConvolutionEngine::Advance(int len)
830 for (int ch = 0; ch < m_proc_nch; ch ++)
832 ProcChannelInfo *pinf = m_proc.Get(ch);
833 pinf->samplesout.Advance(len*sizeof(WDL_FFT_REAL));
834 pinf->samplesout.Compact();
840 /****************************************************************
841 ** low latency version
844 WDL_ConvolutionEngine_Div::WDL_ConvolutionEngine_Div()
846 #ifdef TIMING
847 timingInit();
848 #endif
849 for (int x = 0; x < 2; x ++) m_sout.Add(new WDL_Queue);
850 m_need_feedsilence=true;
853 int WDL_ConvolutionEngine_Div::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
855 m_need_feedsilence=true;
857 m_engines.Empty(true);
858 if (maxfft_size<0)maxfft_size=-maxfft_size;
859 maxfft_size*=2;
860 if (!maxfft_size || maxfft_size>32768) maxfft_size=32768;
863 const int MAX_SIZE_FOR_BRUTE=64;
865 int fftsize = MAX_SIZE_FOR_BRUTE;
866 int impulsechunksize = MAX_SIZE_FOR_BRUTE;
868 if (known_blocksize && !(known_blocksize&(known_blocksize-1)) && known_blocksize>MAX_SIZE_FOR_BRUTE*2)
870 fftsize=known_blocksize/2;
871 impulsechunksize=known_blocksize/2;
873 if (latency_allowed*2 > fftsize)
875 int x = 16;
876 while (x <= latency_allowed) x*=2;
877 if (x>32768) x=32768;
878 fftsize=impulsechunksize=x;
881 int offs=0;
882 int samplesleft=impulse->impulses[0].GetSize()-impulse_offset;
883 if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
887 WDL_ConvolutionEngine *eng=new WDL_ConvolutionEngine;
889 bool wantBrute = !latency_allowed && !offs;
890 if (impulsechunksize*(wantBrute ? 2 : 3) >= samplesleft) impulsechunksize=samplesleft; // early-out, no point going to a larger FFT (since if we did this, we wouldnt have enough samples for a complete next pass)
891 if (fftsize>=maxfft_size) { impulsechunksize=samplesleft; fftsize=maxfft_size; } // if FFTs are as large as possible, finish up
893 eng->SetImpulse(impulse,fftsize,offs+impulse_offset,impulsechunksize, wantBrute);
894 eng->m_zl_delaypos = offs;
895 eng->m_zl_dumpage=0;
896 m_engines.Add(eng);
898 #ifdef WDLCONVO_ZL_ACCOUNTING
899 wdl_log("ce%d: offs=%d, len=%d, fftsize=%d\n",m_engines.GetSize(),offs,impulsechunksize,fftsize);
900 #endif
902 samplesleft -= impulsechunksize;
903 offs+=impulsechunksize;
905 #if 1 // this seems about 10% faster (maybe due to better cache use from less sized ffts used?)
906 impulsechunksize=offs*3;
907 fftsize=offs*2;
908 #else
909 impulsechunksize=fftsize;
911 fftsize*=2;
912 #endif
914 while (samplesleft > 0);
916 return GetLatency();
919 int WDL_ConvolutionEngine_Div::GetLatency()
921 return m_engines.GetSize() ? m_engines.Get(0)->GetLatency() : 0;
925 void WDL_ConvolutionEngine_Div::Reset()
927 int x;
928 for (x = 0; x < m_engines.GetSize(); x ++)
930 WDL_ConvolutionEngine *eng=m_engines.Get(x);
931 eng->Reset();
933 for (x = 0; x < m_sout.GetSize(); x ++)
935 m_sout.Get(x)->Clear();
938 m_need_feedsilence=true;
941 WDL_ConvolutionEngine_Div::~WDL_ConvolutionEngine_Div()
943 #ifdef TIMING
944 timingPrint();
945 #endif
946 m_engines.Empty(true);
947 m_sout.Empty(true);
950 void WDL_ConvolutionEngine_Div::Add(WDL_FFT_REAL **bufs, int len, int nch)
952 while (m_sout.GetSize() < nch)
953 m_sout.Add(new WDL_Queue);
954 while (m_sout.GetSize() > nch)
955 m_sout.Delete(m_sout.GetSize()-1,true);
957 bool ns=m_need_feedsilence;
958 m_need_feedsilence=false;
960 int x;
961 for (x = 0; x < m_engines.GetSize(); x ++)
963 WDL_ConvolutionEngine *eng=m_engines.Get(x);
964 if (ns)
966 eng->m_zl_dumpage = (x>0 && x < m_engines.GetSize()-1) ? (eng->GetLatency()/4) : 0; // reduce max number of ffts per block by staggering them
968 if (eng->m_zl_dumpage>0)
969 eng->Add(NULL,eng->m_zl_dumpage,nch); // added silence to input (to control when fft happens)
972 eng->Add(bufs,len,nch);
974 if (ns) eng->AddSilenceToOutput(eng->m_zl_delaypos); // add silence to output (to delay output to its correct time)
978 WDL_FFT_REAL **WDL_ConvolutionEngine_Div::Get()
980 WDL_FFT_REAL **ret = m_get_tmpptrs.ResizeOK(m_sout.GetSize(),false);
981 if (WDL_NORMALLY(ret))
982 for (int x = 0; x < m_sout.GetSize(); x ++) ret[x]=(WDL_FFT_REAL *)m_sout.Get(x)->Get();
983 return ret;
986 void WDL_ConvolutionEngine_Div::Advance(int len)
988 int x;
989 for (x = 0; x < m_sout.GetSize(); x ++)
991 WDL_Queue *q = m_sout.Get(x);
992 q->Advance(len*sizeof(WDL_FFT_REAL));
993 q->Compact();
997 int WDL_ConvolutionEngine_Div::Avail(int wantSamples)
999 #ifdef TIMING
1000 timingEnter(1);
1001 #endif
1002 int wso=wantSamples;
1003 int x;
1004 #ifdef WDLCONVO_ZL_ACCOUNTING
1005 int cnt=0;
1006 static int maxcnt=-1;
1007 int h=0;
1008 #endif
1009 for (x = 0; x < m_engines.GetSize(); x ++)
1011 WDL_ConvolutionEngine *eng=m_engines.Get(x);
1012 #ifdef WDLCONVO_ZL_ACCOUNTING
1013 eng->m_zl_fftcnt=0;
1014 #endif
1015 int a=eng->Avail(wso+eng->m_zl_dumpage) - eng->m_zl_dumpage;
1016 #ifdef WDLCONVO_ZL_ACCOUNTING
1017 cnt += !!eng->m_zl_fftcnt;
1019 #if 0
1020 if (eng->m_zl_fftcnt)
1021 h|=1<<x;
1023 if (eng->m_zl_fftcnt && x==m_engines.GetSize()-1 && cnt>1)
1025 wdl_log("fft flags=%08x (%08x=max)\n",h,1<<x);
1027 #endif
1028 #endif
1029 if (a < wantSamples) wantSamples=a;
1032 #ifdef WDLCONVO_ZL_ACCOUNTING
1033 static DWORD lastt=0;
1034 if (cnt>maxcnt)maxcnt=cnt;
1035 if (GetTickCount()>lastt+1000)
1037 lastt=GetTickCount();
1038 wdl_log("maxcnt=%d\n",maxcnt);
1039 maxcnt=-1;
1041 #endif
1042 if (wantSamples>0)
1044 const int add_sz = wantSamples*sizeof(WDL_FFT_REAL);
1045 for (x =0; x < m_sout.GetSize(); x ++)
1047 void *add = m_sout.Get(x)->Add(NULL,add_sz);
1048 if (WDL_NORMALLY(add != NULL)) memset(add,0,add_sz);
1051 for (x = 0; x < m_engines.GetSize(); x ++)
1053 WDL_ConvolutionEngine *eng=m_engines.Get(x);
1054 if (eng->m_zl_dumpage>0) { eng->Advance(eng->m_zl_dumpage); eng->m_zl_dumpage=0; }
1056 WDL_FFT_REAL **p=eng->Get();
1057 if (p)
1059 int i;
1060 for (i =0; i < m_sout.GetSize(); i ++)
1062 WDL_Queue *q = m_sout.Get(i);
1063 const int qsz = q->Available();
1064 if (WDL_NORMALLY(qsz >= add_sz))
1066 WDL_FFT_REAL *o=(WDL_FFT_REAL *)((char *)q->Get() + qsz - add_sz);
1067 const WDL_FFT_REAL *in=p[i];
1068 int j=wantSamples;
1069 while (j-->0) *o++ += *in++;
1073 eng->Advance(wantSamples);
1076 #ifdef TIMING
1077 timingLeave(1);
1078 #endif
1080 WDL_Queue *q0 = m_sout.Get(0);
1081 int av=WDL_NORMALLY(q0 != NULL) ? (int) (q0->Available()/sizeof(WDL_FFT_REAL)) : 0;
1082 return av>wso ? wso : av;
1087 /****************************************************************
1088 ** threaded low latency version
1091 #ifdef WDL_CONVO_THREAD
1093 #ifdef _WIN32
1095 #define WDL_CONVO_thread_state (m_thread && m_thread_state)
1097 #else
1099 #define WDL_CONVO_thread_state m_thread_state
1101 static void WDL_CONVO_cond_init(bool *isSignal, pthread_cond_t *cond, pthread_mutex_t *mutex)
1103 *isSignal = false;
1104 pthread_mutex_init(mutex,NULL);
1105 pthread_cond_init(cond,NULL);
1108 static void WDL_CONVO_cond_signal(bool *isSignal, pthread_cond_t *cond, pthread_mutex_t *mutex)
1110 pthread_mutex_lock(mutex);
1111 if (!*isSignal)
1113 *isSignal = true;
1114 pthread_cond_signal(cond);
1116 pthread_mutex_unlock(mutex);
1119 static void WDL_CONVO_cond_wait(bool *isSignal, pthread_cond_t *cond, pthread_mutex_t *mutex)
1121 pthread_mutex_lock(mutex);
1122 while (!*isSignal) pthread_cond_wait(cond,mutex);
1123 *isSignal=false;
1124 pthread_mutex_unlock(mutex);
1127 static void WDL_CONVO_cond_destroy(pthread_cond_t *cond, pthread_mutex_t *mutex)
1129 pthread_cond_destroy(cond);
1130 pthread_mutex_destroy(mutex);
1133 #endif // _WIN32
1135 WDL_ConvolutionEngine_Thread::WDL_ConvolutionEngine_Thread()
1137 m_proc_nch=2;
1138 m_need_feedsilence=true;
1140 m_thread_enable = true;
1141 #ifdef _WIN32
1142 m_thread = NULL;
1143 m_signal_thread = CreateEvent(NULL, FALSE, FALSE, NULL);
1144 m_signal_main = CreateEvent(NULL, FALSE, FALSE, NULL);
1146 m_thread_state = m_signal_thread && m_signal_main;
1147 #else
1148 m_thread_state = false;
1149 WDL_CONVO_cond_init(&m_signal_thread, &m_signal_thread_cond, &m_signal_thread_mutex);
1150 WDL_CONVO_cond_init(&m_signal_main, &m_signal_main_cond, &m_signal_main_mutex);
1151 #endif
1154 int WDL_ConvolutionEngine_Thread::SetImpulse(WDL_ImpulseBuffer *impulse, int maxfft_size, int known_blocksize, int max_imp_size, int impulse_offset, int latency_allowed)
1156 if (!m_thread_enable) CloseThread();
1157 Reset();
1159 if (maxfft_size<0)maxfft_size=-maxfft_size;
1160 if (!maxfft_size || maxfft_size>16384) maxfft_size=16384;
1162 int samplesleft=impulse->GetLength()-impulse_offset;
1163 if (max_imp_size>0 && samplesleft>max_imp_size) samplesleft=max_imp_size;
1165 int impulsechunksize = maxfft_size;
1166 if (impulsechunksize >= samplesleft || !m_thread_enable) impulsechunksize=samplesleft;
1167 m_zl_engine.SetImpulse(impulse, maxfft_size, known_blocksize, impulsechunksize, impulse_offset, latency_allowed);
1169 samplesleft -= impulsechunksize;
1170 m_thread_engine.SetImpulse(impulse, maxfft_size*2, impulse_offset + impulsechunksize, samplesleft);
1171 m_thread_engine.m_zl_delaypos = samplesleft > 0 ? impulsechunksize : -1;
1172 m_thread_engine.m_zl_dumpage=0;
1174 return GetLatency();
1177 void WDL_ConvolutionEngine_Thread::Reset()
1179 if (WDL_CONVO_thread_state)
1181 #ifdef _WIN32
1182 SetEvent(m_signal_thread);
1183 WaitForSingleObject(m_signal_main, INFINITE);
1184 #else
1185 WDL_CONVO_cond_signal(&m_signal_thread, &m_signal_thread_cond, &m_signal_thread_mutex);
1186 WDL_CONVO_cond_wait(&m_signal_main, &m_signal_main_cond, &m_signal_main_mutex);
1187 #endif
1190 m_zl_engine.Reset();
1191 m_thread_engine.Reset();
1193 int x;
1194 for (x = 0; x < WDL_CONVO_MAX_PROC_NCH; x ++)
1196 m_samplesin[x].Clear();
1197 m_samplesin2[x].Clear();
1198 m_samplesout[x].Clear();
1199 m_samplesout2[x].Clear();
1202 m_need_feedsilence=true;
1205 WDL_ConvolutionEngine_Thread::~WDL_ConvolutionEngine_Thread()
1207 CloseThread();
1208 #ifdef _WIN32
1209 if (m_signal_thread) CloseHandle(m_signal_thread);
1210 if (m_signal_main) CloseHandle(m_signal_main);
1211 #else
1212 WDL_CONVO_cond_destroy(&m_signal_thread_cond, &m_signal_thread_mutex);
1213 WDL_CONVO_cond_destroy(&m_signal_main_cond, &m_signal_main_mutex);
1214 #endif
1217 void WDL_ConvolutionEngine_Thread::Add(WDL_FFT_REAL **bufs, int len, int nch)
1219 m_proc_nch=nch;
1221 if (m_thread_engine.m_zl_delaypos >= 0 && CreateThread())
1223 int ch;
1224 m_samplesin_lock.Enter();
1225 for (ch = 0; ch < nch; ch ++)
1227 m_samplesin[ch].Add(bufs ? bufs[ch] : NULL,len*sizeof(WDL_FFT_REAL));
1229 m_samplesin_lock.Leave();
1230 #ifdef _WIN32
1231 SetEvent(m_signal_thread);
1232 #else
1233 WDL_CONVO_cond_signal(&m_signal_thread, &m_signal_thread_cond, &m_signal_thread_mutex);
1234 #endif
1237 m_zl_engine.Add(bufs,len,nch);
1240 WDL_FFT_REAL **WDL_ConvolutionEngine_Thread::Get()
1242 int x;
1243 for (x = 0; x < m_proc_nch; x ++)
1245 m_get_tmpptrs[x]=(WDL_FFT_REAL *)m_samplesout2[x].Get();
1247 return m_get_tmpptrs;
1250 void WDL_ConvolutionEngine_Thread::Advance(int len)
1252 int x;
1253 for (x = 0; x < m_proc_nch; x ++)
1255 m_samplesout2[x].Advance(len*sizeof(WDL_FFT_REAL));
1256 m_samplesout2[x].Compact();
1260 int WDL_ConvolutionEngine_Thread::Avail(int wantSamples)
1262 int wso=wantSamples;
1263 int x;
1265 int av=m_samplesout2[0].Available()/sizeof(WDL_FFT_REAL);
1266 if (av >= wantSamples) return av;
1267 wantSamples -= av;
1269 av=m_zl_engine.Avail(wantSamples);
1270 if (av < wantSamples) wantSamples=av;
1272 if (m_thread_engine.m_zl_delaypos >= 0)
1274 m_samplesout_lock.Enter();
1275 av=m_samplesout[0].Available();
1276 m_samplesout_lock.Leave();
1277 while (av < wantSamples)
1279 int a;
1280 if (WDL_CONVO_thread_state)
1282 #ifdef _WIN32
1283 SetEvent(m_signal_thread);
1284 WaitForSingleObject(m_signal_main, INFINITE);
1285 #else
1286 WDL_CONVO_cond_signal(&m_signal_thread, &m_signal_thread_cond, &m_signal_thread_mutex);
1287 WDL_CONVO_cond_wait(&m_signal_main, &m_signal_main_cond, &m_signal_main_mutex);
1288 #endif
1290 m_samplesout_lock.Enter();
1291 a=m_samplesout[0].Available();
1292 m_samplesout_lock.Leave();
1294 else
1296 a=av;
1298 if (a>av) av=a; else wantSamples=av;
1302 if (wantSamples>0)
1304 WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
1305 for (x =0; x < m_proc_nch; x ++)
1307 memset(tp[x]=(WDL_FFT_REAL*)m_samplesout2[x].Add(NULL,wantSamples*sizeof(WDL_FFT_REAL)),0,wantSamples*sizeof(WDL_FFT_REAL));
1310 WDL_FFT_REAL **p=m_zl_engine.Get();
1311 if (p)
1313 int i;
1314 for (i =0; i < m_proc_nch; i ++)
1316 WDL_FFT_REAL *o=tp[i];
1317 WDL_FFT_REAL *in=p[i];
1318 int j=wantSamples;
1319 while (j-->0) *o++ += *in++;
1322 m_zl_engine.Advance(wantSamples);
1324 if (m_thread_engine.m_zl_delaypos >= 0)
1326 m_samplesout_lock.Enter();
1328 int i;
1329 for (i =0; i < m_proc_nch; i ++)
1331 WDL_FFT_REAL *o=tp[i];
1332 WDL_FFT_REAL *in=(WDL_FFT_REAL *)m_samplesout[i].Get();
1333 int j=wantSamples;
1334 while (j-->0) *o++ += *in++;
1336 m_samplesout[i].Advance(wantSamples*sizeof(WDL_FFT_REAL));
1337 m_samplesout[i].Compact();
1340 m_samplesout_lock.Leave();
1344 av=m_samplesout2[0].Available()/sizeof(WDL_FFT_REAL);
1345 return av>wso ? wso : av;
1348 bool WDL_ConvolutionEngine_Thread::CreateThread()
1350 #ifdef _WIN32
1351 if (!m_thread && m_thread_state)
1353 m_thread = ::CreateThread(NULL, 0, ThreadProc, this, 0, NULL);
1354 if (m_thread)
1355 SetThreadPriority(m_thread, THREAD_PRIORITY_ABOVE_NORMAL);
1356 else
1357 m_thread_state = false;
1359 #else
1360 if (!m_thread_state)
1362 m_thread_state = true;
1364 m_thread=0;
1365 pthread_create(&m_thread,NULL,ThreadProc,this);
1367 static const int prio = 1;
1368 int pol;
1369 struct sched_param param;
1370 if (!pthread_getschedparam(m_thread,&pol,&param))
1373 param.sched_priority = 31 + prio;
1374 int mt=sched_get_priority_min(pol);
1375 if (param.sched_priority<mt||param.sched_priority > (mt=sched_get_priority_max(pol)))param.sched_priority=mt;
1377 pthread_setschedparam(m_thread,pol,&param);
1380 #endif
1381 return m_thread_state;
1384 void WDL_ConvolutionEngine_Thread::CloseThread()
1386 #ifdef _WIN32
1387 if (m_thread)
1389 const bool state = m_thread_state;
1390 if (state)
1392 m_thread_state = false;
1393 SetEvent(m_signal_thread);
1394 WaitForSingleObject(m_thread, INFINITE);
1396 if (CloseHandle(m_thread)) m_thread_state = state;
1397 m_thread = NULL;
1399 #else
1400 if (m_thread_state)
1402 m_thread_state = false;
1403 WDL_CONVO_cond_signal(&m_signal_thread, &m_signal_thread_cond, &m_signal_thread_mutex);
1404 void *tmp;
1405 pthread_join(m_thread,&tmp);
1406 pthread_detach(m_thread);
1408 #endif
1411 #ifdef _WIN32
1412 DWORD WINAPI WDL_ConvolutionEngine_Thread::ThreadProc(LPVOID lpParam)
1413 #else
1414 void *WDL_ConvolutionEngine_Thread::ThreadProc(void *lpParam)
1415 #endif
1417 WDL_ConvolutionEngine_Thread* _this = (WDL_ConvolutionEngine_Thread*)lpParam;
1421 #ifdef _WIN32
1422 if (WaitForSingleObject(_this->m_signal_thread, INFINITE) != WAIT_OBJECT_0)
1424 _this->m_thread_state = false;
1426 #else
1427 WDL_CONVO_cond_wait(&_this->m_signal_thread, &_this->m_signal_thread_cond, &_this->m_signal_thread_mutex);
1428 #endif
1430 if (_this->m_thread_state)
1432 int avail, av, x;
1434 _this->m_samplesin_lock.Enter();
1435 avail = _this->m_samplesin[0].Available();
1436 while (avail > 0)
1438 int sz;
1439 for (x = 0; x < _this->m_proc_nch; x ++)
1441 void *buf=NULL;
1442 sz=_this->m_samplesin[x].GetPtr(0,&buf);
1443 _this->m_samplesin2[x].Add(buf,sz);
1444 _this->m_samplesin[x].Advance(sz);
1446 avail -= sz;
1448 _this->m_samplesin_lock.Leave();
1450 av = avail = _this->m_samplesin2[0].Available();
1451 while (avail > 0)
1453 WDL_FFT_REAL *tp[WDL_CONVO_MAX_PROC_NCH];
1454 int sz;
1455 for (x = 0; x < _this->m_proc_nch; x ++)
1457 sz=_this->m_samplesin2[x].GetPtr(0,(void**)&tp[x]);
1459 _this->m_thread_engine.Add(tp,sz/sizeof(WDL_FFT_REAL),_this->m_proc_nch);
1460 for (x = 0; x < _this->m_proc_nch; x ++)
1462 _this->m_samplesin2[x].Advance(sz);
1464 if (_this->m_need_feedsilence)
1466 if (_this->m_thread_engine.m_zl_delaypos > 0)
1468 _this->m_thread_engine.AddSilenceToOutput(_this->m_thread_engine.m_zl_delaypos); // add silence to output (to delay output to its correct time)
1470 _this->m_need_feedsilence=false;
1472 avail -= sz;
1475 if (av) av = _this->m_thread_engine.Avail(av/sizeof(WDL_FFT_REAL));
1476 if (av)
1478 WDL_FFT_REAL **p=_this->m_thread_engine.Get();
1479 _this->m_samplesout_lock.Enter();
1480 for (x = 0; x < _this->m_proc_nch; x ++)
1482 _this->m_samplesout[x].Add(p[x],av*sizeof(WDL_FFT_REAL));
1484 _this->m_samplesout_lock.Leave();
1485 _this->m_thread_engine.Advance(av);
1489 #ifdef _WIN32
1490 SetEvent(_this->m_signal_main);
1491 #else
1492 WDL_CONVO_cond_signal(&_this->m_signal_main, &_this->m_signal_main_cond, &_this->m_signal_main_mutex);
1493 #endif
1495 while (_this->m_thread_state);
1497 #ifndef _WIN32
1498 pthread_exit(0);
1499 #endif
1500 return 0;
1503 #endif // WDL_CONVO_THREAD
1506 #ifdef WDL_TEST_CONVO
1508 #include <stdio.h>
1510 int main(int argc, char **argv)
1512 if (argc!=5)
1514 printf("usage: convoengine fftsize implen oneoffs pingoffs\n");
1515 return -1;
1518 int fftsize=atoi(argv[1]);
1519 int implen = atoi(argv[2]);
1520 int oneoffs = atoi(argv[3]);
1521 int pingoffs=atoi(argv[4]);
1523 if (implen < 1 || oneoffs < 0 || oneoffs >= implen || pingoffs < 0)
1525 printf("invalid parameters\n");
1526 return -1;
1529 WDL_ImpulseBuffer imp;
1530 imp.nch=1;
1531 memset(imp.impulses[0].Resize(implen),0,implen*sizeof(WDL_FFT_REAL));
1532 imp.impulses[0].Get()[oneoffs]=1.0;
1535 #if WDL_TEST_CONVO==2
1536 WDL_ConvolutionEngine_Div engine;
1537 #else
1538 WDL_ConvolutionEngine engine;
1539 #endif
1540 engine.SetImpulse(&imp,fftsize);
1541 WDL_TypedBuf<WDL_FFT_REAL> m_tmpbuf;
1542 memset(m_tmpbuf.Resize(pingoffs+1),0,pingoffs*sizeof(WDL_FFT_REAL));
1543 m_tmpbuf.Get()[pingoffs]=1.0;
1544 WDL_FFT_REAL *p=m_tmpbuf.Get();
1545 engine.Add(&p,pingoffs+1,1);
1547 p=m_tmpbuf.Resize(4096);
1548 memset(p,0,m_tmpbuf.GetSize()*sizeof(WDL_FFT_REAL));
1550 int avail;
1551 while ((avail=engine.Avail(pingoffs+oneoffs + 8192)) < pingoffs+oneoffs + 8192)
1553 engine.Add(&p,4096,1);
1555 WDL_FFT_REAL **output = engine.Get();
1556 if (!output || !*output)
1558 printf("cant get output\n");
1559 return -1;
1561 int x;
1562 for (x = 0; x < avail; x ++)
1564 WDL_FFT_REAL val=output[0][x];
1565 WDL_FFT_REAL expval = (x==pingoffs+oneoffs) ? 1.0:0.0;
1566 if (fabs(val-expval)>0.000000001)
1568 printf("%d: %.4fdB - %f %f\n",x,log10(max(val,0.000000000001))*20.0 - log10(max(expval,0.000000000001))*20.0,val,expval);
1572 return 0;
1575 #endif
1578 int WDL_ImpulseBuffer::SetLength(int samples)
1580 const int nch = impulses.list.GetSize();
1581 if (!nch) return 0;
1582 for (int x=0;x<nch;x++)
1584 int cursz=impulses[x].GetSize();
1585 if (cursz!=samples)
1587 impulses[x].Resize(samples,false);
1589 if (impulses[x].GetSize()!=samples) // validate length!
1591 // ERROR! FREE ALL!
1592 for(x=0;x<impulses.list.GetSize();x++) impulses[x].Resize(0);
1593 return 0;
1597 if (cursz<samples)
1598 memset(impulses[x].Get()+cursz,0,(samples-cursz)*sizeof(WDL_FFT_REAL));
1600 return impulses[0].GetSize();
1604 void WDL_ImpulseBuffer::SetNumChannels(int usench, bool duplicateExisting)
1606 if (usench<1) usench=1;
1608 const int old_nch = impulses.list.GetSize();
1609 if (usench > old_nch)
1611 while (impulses.list.GetSize() < usench)
1612 impulses.list.Add(new WDL_CONVO_IMPULSE_TYPED_BUF);
1614 const int len = SetLength(GetLength());
1616 int x,ax=0;
1617 if (duplicateExisting && len>0 && old_nch>0) for(x=old_nch;x<usench;x++)
1619 memcpy(impulses[x].Get(),impulses[ax].Get(),len*sizeof(WDL_FFT_REAL)); // duplicate channels
1620 if (++ax>=old_nch) ax=0;
1623 else while (usench<impulses.list.GetSize())
1625 impulses.list.Delete(impulses.list.GetSize()-1,true);
1630 void WDL_ImpulseBuffer::Set(const WDL_FFT_REAL** bufs, int samples, int usench)
1632 #ifdef WDL_CONVO_USE_CONST_HEAP_BUF
1633 if (usench<1) usench=1;
1635 const int old_nch = impulses.list.GetSize();
1636 if (usench > old_nch)
1638 while (impulses.list.GetSize() < usench)
1639 impulses.list.Add(new WDL_CONVO_IMPULSE_TYPED_BUF);
1641 else while (usench<impulses.list.GetSize())
1643 impulses.list.Delete(impulses.list.GetSize()-1,true);
1646 for (int x = 0; x < usench; ++x) impulses[x].Set(bufs[x], samples);
1648 #else
1649 SetLength(samples);
1650 SetNumChannels(usench,false);
1651 usench = GetNumChannels();
1652 if (GetLength() > 0) for (int x = 0; x < usench; ++x)
1654 memcpy(impulses[x].Get(), bufs[x], samples * sizeof(WDL_FFT_REAL));
1656 #endif