dlzma: some small API fixes (no functional changes); added streaming example
[iv.d.git] / follin / resampler.d
blobe9e493ade4ca5dc67009939a5efe9d77f54123ae
1 /* Copyright (C) 2007-2008 Jean-Marc Valin
2 * Copyright (C) 2008 Thorvald Natvig
3 * D port by Ketmar // Invisible Vector
5 * Arbitrary resampling code
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * 1. Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 /* A-a-a-and now... D port is covered by the following license!
36 * This program is free software: you can redistribute it and/or modify
37 * it under the terms of the GNU General Public License as published by
38 * the Free Software Foundation, version 3 of the License ONLY.
40 * This program is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43 * GNU General Public License for more details.
45 * You should have received a copy of the GNU General Public License
46 * along with this program. If not, see <http://www.gnu.org/licenses/>.
48 module iv.follin.resampler /*is aliced*/;
49 import iv.alice;
52 The design goals of this code are:
53 - Very fast algorithm
54 - SIMD-friendly algorithm
55 - Low memory requirement
56 - Good *perceptual* quality (and not best SNR)
58 Warning: This resampler is relatively new. Although I think I got rid of
59 all the major bugs and I don't expect the API to change anymore, there
60 may be something I've missed. So use with caution.
62 This algorithm is based on this original resampling algorithm:
63 Smith, Julius O. Digital Audio Resampling Home Page
64 Center for Computer Research in Music and Acoustics (CCRMA),
65 Stanford University, 2007.
66 Web published at http://www-ccrma.stanford.edu/~jos/resample/.
68 There is one main difference, though. This resampler uses cubic
69 interpolation instead of linear interpolation in the above paper. This
70 makes the table much smaller and makes it possible to compute that table
71 on a per-stream basis. In turn, being able to tweak the table for each
72 stream makes it possible to both reduce complexity on simple ratios
73 (e.g. 2/3), and get rid of the rounding operations in the inner loop.
74 The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
76 version = sincresample_use_full_table;
77 version(X86) {
78 version(sincresample_disable_sse) {
79 } else {
80 version(D_PIC) {} else version = sincresample_use_sse;
85 // ////////////////////////////////////////////////////////////////////////// //
86 public struct SpeexResampler {
87 public:
88 alias Quality = int;
89 enum : uint {
90 Fastest = 0,
91 Voip = 3,
92 Default = 4,
93 Desktop = 5,
94 Music = 8,
95 Best = 10,
98 enum Error {
99 OK = 0,
100 NoMemory,
101 BadState,
102 BadArgument,
103 BadData,
106 private:
107 nothrow @trusted @nogc:
108 alias ResamplerFn = int function (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen);
110 private:
111 uint inRate;
112 uint outRate;
113 uint numRate; // from
114 uint denRate; // to
116 Quality srQuality;
117 uint chanCount;
118 uint filterLen;
119 uint memAllocSize;
120 uint bufferSize;
121 int intAdvance;
122 int fracAdvance;
123 float cutoff;
124 uint oversample;
125 bool started;
127 // these are per-channel
128 int[64] lastSample;
129 uint[64] sampFracNum;
130 uint[64] magicSamples;
132 float* mem;
133 uint realMemLen; // how much memory really allocated
134 float* sincTable;
135 uint sincTableLen;
136 uint realSincTableLen; // how much memory really allocated
137 ResamplerFn resampler;
139 int inStride;
140 int outStride;
142 public:
143 static string errorStr (int err) {
144 switch (err) with (Error) {
145 case OK: return "success";
146 case NoMemory: return "memory allocation failed";
147 case BadState: return "bad resampler state";
148 case BadArgument: return "invalid argument";
149 case BadData: return "bad data passed";
150 default:
152 return "unknown error";
155 public:
156 @disable this (this);
157 ~this () { deinit(); }
159 @property bool inited () const pure { return (resampler !is null); }
161 void deinit () {
162 import core.stdc.stdlib : free;
163 if (mem !is null) { free(mem); mem = null; }
164 if (sincTable !is null) { free(sincTable); sincTable = null; }
166 memAllocSize = realMemLen = 0;
167 sincTableLen = realSincTableLen = 0;
168 resampler = null;
169 started = false;
171 inRate = outRate = numRate = denRate = 0;
172 srQuality = cast(Quality)666;
173 chanCount = 0;
174 filterLen = 0;
175 memAllocSize = 0;
176 bufferSize = 0;
177 intAdvance = 0;
178 fracAdvance = 0;
179 cutoff = 0;
180 oversample = 0;
181 started = 0;
183 mem = null;
184 realMemLen = 0; // how much memory really allocated
185 sincTable = null;
186 sincTableLen = 0;
187 realSincTableLen = 0; // how much memory really allocated
188 resampler = null;
190 inStride = outStride = 0;
193 /** Create a new resampler with integer input and output rates.
195 * Params:
196 * chans = Number of channels to be processed
197 * inRate = Input sampling rate (integer number of Hz).
198 * outRate = Output sampling rate (integer number of Hz).
199 * aquality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
201 * Returns:
202 * 0 or error code
204 Error setup (uint chans, uint ainRate, uint aoutRate, Quality aquality/*, usize line=__LINE__*/) {
205 //{ import core.stdc.stdio; printf("init: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
206 import core.stdc.stdlib : malloc, free;
208 deinit();
209 if (aquality < 0) aquality = 0;
210 if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
211 if (chans < 1 || chans > 16) return Error.BadArgument;
213 started = false;
214 inRate = 0;
215 outRate = 0;
216 numRate = 0;
217 denRate = 0;
218 srQuality = cast(Quality)666; // it's ok
219 sincTableLen = 0;
220 memAllocSize = 0;
221 filterLen = 0;
222 mem = null;
223 resampler = null;
225 cutoff = 1.0f;
226 chanCount = chans;
227 inStride = 1;
228 outStride = 1;
230 bufferSize = 160;
232 // per channel data
233 lastSample[] = 0;
234 magicSamples[] = 0;
235 sampFracNum[] = 0;
237 setQuality(aquality);
238 setRate(ainRate, aoutRate);
240 if (auto filterErr = updateFilter()) { deinit(); return filterErr; }
241 skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
243 return Error.OK;
246 /** Set (change) the input/output sampling rates (integer value).
248 * Params:
249 * ainRate = Input sampling rate (integer number of Hz).
250 * aoutRate = Output sampling rate (integer number of Hz).
252 * Returns:
253 * 0 or error code
255 Error setRate (uint ainRate, uint aoutRate/*, usize line=__LINE__*/) {
256 //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
257 if (inRate == ainRate && outRate == aoutRate) return Error.OK;
258 //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ratioNum, ratioDen, cast(uint)line); }
260 uint oldDen = denRate;
261 inRate = ainRate;
262 outRate = aoutRate;
263 auto div = gcd(ainRate, aoutRate);
264 numRate = ainRate/div;
265 denRate = aoutRate/div;
267 if (oldDen > 0) {
268 foreach (ref v; sampFracNum.ptr[0..chanCount]) {
269 v = v*denRate/oldDen;
270 // safety net
271 if (v >= denRate) v = denRate-1;
275 return (inited ? updateFilter() : Error.OK);
278 /** Get the current input/output sampling rates (integer value).
280 * Params:
281 * ainRate = Input sampling rate (integer number of Hz) copied.
282 * aoutRate = Output sampling rate (integer number of Hz) copied.
284 void getRate (out uint ainRate, out uint aoutRate) {
285 ainRate = inRate;
286 aoutRate = outRate;
289 @property uint getInRate () { return inRate; }
290 @property uint getOutRate () { return outRate; }
292 @property uint getChans () { return chanCount; }
294 /** Get the current resampling ratio. This will be reduced to the least common denominator.
296 * Params:
297 * ratioNum = Numerator of the sampling rate ratio copied
298 * ratioDen = Denominator of the sampling rate ratio copied
300 void getRatio (out uint ratioNum, out uint ratioDen) {
301 ratioNum = numRate;
302 ratioDen = denRate;
305 /** Set (change) the conversion quality.
307 * Params:
308 * quality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
310 * Returns:
311 * 0 or error code
313 Error setQuality (Quality aquality) {
314 if (aquality < 0) aquality = 0;
315 if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
316 if (srQuality == aquality) return Error.OK;
317 srQuality = aquality;
318 return (inited ? updateFilter() : Error.OK);
321 /** Get the conversion quality.
323 * Returns:
324 * Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
326 int getQuality () { return srQuality; }
328 /** Get the latency introduced by the resampler measured in input samples.
330 * Returns:
331 * Input latency;
333 int inputLatency () { return filterLen/2; }
335 /** Get the latency introduced by the resampler measured in output samples.
337 * Returns:
338 * Output latency.
340 int outputLatency () { return ((filterLen/2)*denRate+(numRate>>1))/numRate; }
342 /* Make sure that the first samples to go out of the resamplers don't have
343 * leading zeros. This is only useful before starting to use a newly created
344 * resampler. It is recommended to use that when resampling an audio file, as
345 * it will generate a file with the same length. For real-time processing,
346 * it is probably easier not to use this call (so that the output duration
347 * is the same for the first frame).
349 * Setup/reset sequence will automatically call this, so it is private.
351 private void skipZeros () { foreach (immutable i; 0..chanCount) lastSample.ptr[i] = filterLen/2; }
353 static struct Data {
354 const(float)[] dataIn;
355 float[] dataOut;
356 uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
357 uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
360 /** Resample (an interleaved) float array. The input and output buffers must *not* overlap.
361 * `data.dataIn` can be empty, but `data.dataOut` can't.
362 * Function will return number of consumed samples (*not* *frames*!) in `data.inputSamplesUsed`,
363 * and number of produced samples in `data.outputSamplesUsed`.
364 * You should provide enough samples for all channels, and all channels will be processed.
366 * Params:
367 * data = input and output buffers, number of frames consumed and produced
369 * Returns:
370 * 0 or error code
372 Error process(string mode="interleaved") (ref Data data) {
373 static assert(mode == "interleaved" || mode == "sequential");
375 data.inputSamplesUsed = data.outputSamplesUsed = 0;
376 if (!inited) return Error.BadState;
378 if (data.dataIn.length%chanCount || data.dataOut.length < 1 || data.dataOut.length%chanCount) return Error.BadData;
379 if (data.dataIn.length > uint.max/4 || data.dataOut.length > uint.max/4) return Error.BadData;
381 static if (mode == "interleaved") {
382 inStride = outStride = chanCount;
383 } else {
384 inStride = outStride = 1;
386 uint iofs = 0, oofs = 0;
387 immutable uint idclen = cast(uint)(data.dataIn.length/chanCount);
388 immutable uint odclen = cast(uint)(data.dataOut.length/chanCount);
389 foreach (immutable i; 0..chanCount) {
390 data.inputSamplesUsed = idclen;
391 data.outputSamplesUsed = odclen;
392 if (data.dataIn.length) {
393 processOneChannel(i, data.dataIn.ptr+iofs, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
394 } else {
395 processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
397 static if (mode == "interleaved") {
398 ++iofs;
399 ++oofs;
400 } else {
401 iofs += idclen;
402 oofs += odclen;
405 data.inputSamplesUsed *= chanCount;
406 data.outputSamplesUsed *= chanCount;
407 return Error.OK;
411 //HACK for libswresample
412 // return -1 or number of outframes
413 int swrconvert (float** outbuf, int outframes, const(float)**inbuf, int inframes) {
414 if (!inited || outframes < 1 || inframes < 0) return -1;
415 inStride = outStride = 1;
416 Data data;
417 foreach (immutable i; 0..chanCount) {
418 data.dataIn = (inframes ? inbuf[i][0..inframes] : null);
419 data.dataOut = (outframes ? outbuf[i][0..outframes] : null);
420 data.inputSamplesUsed = inframes;
421 data.outputSamplesUsed = outframes;
422 if (inframes > 0) {
423 processOneChannel(i, data.dataIn.ptr, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
424 } else {
425 processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
428 return data.outputSamplesUsed;
431 /// Reset a resampler so a new (unrelated) stream can be processed.
432 void reset () {
433 lastSample[] = 0;
434 magicSamples[] = 0;
435 sampFracNum[] = 0;
436 //foreach (immutable i; 0..chanCount*(filterLen-1)) mem[i] = 0;
437 if (mem !is null) mem[0..chanCount*(filterLen-1)] = 0;
438 skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
441 private:
442 Error processOneChannel (uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen) {
443 uint ilen = *indataLen;
444 uint olen = *outdataLen;
445 float* x = mem+chanIdx*memAllocSize;
446 immutable int filterOfs = filterLen-1;
447 immutable uint xlen = memAllocSize-filterOfs;
448 immutable int istride = inStride;
449 if (magicSamples.ptr[chanIdx]) olen -= magic(chanIdx, &outdata, olen);
450 if (!magicSamples.ptr[chanIdx]) {
451 while (ilen && olen) {
452 uint ichunk = (ilen > xlen ? xlen : ilen);
453 uint ochunk = olen;
454 if (indata !is null) {
455 //foreach (immutable j; 0..ichunk) x[j+filterOfs] = indata[j*istride];
456 if (istride == 1) {
457 x[filterOfs..filterOfs+ichunk] = indata[0..ichunk];
458 } else {
459 auto sp = indata;
460 auto dp = x+filterOfs;
461 foreach (immutable j; 0..ichunk) { *dp++ = *sp; sp += istride; }
463 } else {
464 //foreach (immutable j; 0..ichunk) x[j+filterOfs] = 0;
465 x[filterOfs..filterOfs+ichunk] = 0;
467 processNative(chanIdx, &ichunk, outdata, &ochunk);
468 ilen -= ichunk;
469 olen -= ochunk;
470 outdata += ochunk*outStride;
471 if (indata !is null) indata += ichunk*istride;
474 *indataLen -= ilen;
475 *outdataLen -= olen;
476 return Error.OK;
479 Error processNative (uint chanIdx, uint* indataLen, float* outdata, uint* outdataLen) {
480 immutable N = filterLen;
481 int outSample = 0;
482 float* x = mem+chanIdx*memAllocSize;
483 uint ilen;
484 started = true;
485 // call the right resampler through the function ptr
486 outSample = resampler(this, chanIdx, x, indataLen, outdata, outdataLen);
487 if (lastSample.ptr[chanIdx] < cast(int)*indataLen) *indataLen = lastSample.ptr[chanIdx];
488 *outdataLen = outSample;
489 lastSample.ptr[chanIdx] -= *indataLen;
490 ilen = *indataLen;
491 foreach (immutable j; 0..N-1) x[j] = x[j+ilen];
492 return Error.OK;
495 int magic (uint chanIdx, float **outdata, uint outdataLen) {
496 uint tempInLen = magicSamples.ptr[chanIdx];
497 float* x = mem+chanIdx*memAllocSize;
498 processNative(chanIdx, &tempInLen, *outdata, &outdataLen);
499 magicSamples.ptr[chanIdx] -= tempInLen;
500 // if we couldn't process all "magic" input samples, save the rest for next time
501 if (magicSamples.ptr[chanIdx]) {
502 immutable N = filterLen;
503 foreach (immutable i; 0..magicSamples.ptr[chanIdx]) x[N-1+i] = x[N-1+i+tempInLen];
505 *outdata += outdataLen*outStride;
506 return outdataLen;
509 Error updateFilter () {
510 uint oldFilterLen = filterLen;
511 uint oldAllocSize = memAllocSize;
512 bool useDirect;
513 uint minSincTableLen;
514 uint minAllocSize;
516 intAdvance = numRate/denRate;
517 fracAdvance = numRate%denRate;
518 oversample = qualityMap.ptr[srQuality].oversample;
519 filterLen = qualityMap.ptr[srQuality].baseLength;
521 if (numRate > denRate) {
522 // down-sampling
523 cutoff = qualityMap.ptr[srQuality].downsampleBandwidth*denRate/numRate;
524 // FIXME: divide the numerator and denominator by a certain amount if they're too large
525 filterLen = filterLen*numRate/denRate;
526 // round up to make sure we have a multiple of 8 for SSE
527 filterLen = ((filterLen-1)&(~0x7))+8;
528 if (2*denRate < numRate) oversample >>= 1;
529 if (4*denRate < numRate) oversample >>= 1;
530 if (8*denRate < numRate) oversample >>= 1;
531 if (16*denRate < numRate) oversample >>= 1;
532 if (oversample < 1) oversample = 1;
533 } else {
534 // up-sampling
535 cutoff = qualityMap.ptr[srQuality].upsampleBandwidth;
538 // choose the resampling type that requires the least amount of memory
539 version(sincresample_use_full_table) {
540 useDirect = true;
541 if (int.max/float.sizeof/denRate < filterLen) goto fail;
542 } else {
543 useDirect = (filterLen*denRate <= filterLen*oversample+8 && int.max/float.sizeof/denRate >= filterLen);
546 if (useDirect) {
547 minSincTableLen = filterLen*denRate;
548 } else {
549 if ((int.max/float.sizeof-8)/oversample < filterLen) goto fail;
550 minSincTableLen = filterLen*oversample+8;
553 if (sincTableLen < minSincTableLen) {
554 import core.stdc.stdlib : realloc;
555 auto nslen = cast(uint)(minSincTableLen*float.sizeof);
556 if (nslen > realSincTableLen) {
557 if (nslen < 512*1024) nslen = 512*1024; // inc to 3 mb?
558 auto x = cast(float*)realloc(sincTable, nslen);
559 if (!x) goto fail;
560 sincTable = x;
561 realSincTableLen = nslen;
563 sincTableLen = minSincTableLen;
566 if (useDirect) {
567 foreach (int i; 0..denRate) {
568 foreach (int j; 0..filterLen) {
569 sincTable[i*filterLen+j] = sinc(cutoff, ((j-cast(int)filterLen/2+1)-(cast(float)i)/denRate), filterLen, qualityMap.ptr[srQuality].windowFunc);
572 if (srQuality > 8) {
573 resampler = &resamplerBasicDirect!double;
574 } else {
575 resampler = &resamplerBasicDirect!float;
577 } else {
578 foreach (immutable int i; -4..cast(int)(oversample*filterLen+4)) {
579 sincTable[i+4] = sinc(cutoff, (i/cast(float)oversample-filterLen/2), filterLen, qualityMap.ptr[srQuality].windowFunc);
581 if (srQuality > 8) {
582 resampler = &resamplerBasicInterpolate!double;
583 } else {
584 resampler = &resamplerBasicInterpolate!float;
588 /* Here's the place where we update the filter memory to take into account
589 the change in filter length. It's probably the messiest part of the code
590 due to handling of lots of corner cases. */
592 // adding bufferSize to filterLen won't overflow here because filterLen could be multiplied by float.sizeof above
593 minAllocSize = filterLen-1+bufferSize;
594 if (minAllocSize > memAllocSize) {
595 import core.stdc.stdlib : realloc;
596 if (int.max/float.sizeof/chanCount < minAllocSize) goto fail;
597 auto nslen = cast(uint)(chanCount*minAllocSize*mem[0].sizeof);
598 if (nslen > realMemLen) {
599 if (nslen < 16384) nslen = 16384;
600 auto x = cast(float*)realloc(mem, nslen);
601 if (x is null) goto fail;
602 mem = x;
603 realMemLen = nslen;
605 memAllocSize = minAllocSize;
607 if (!started) {
608 //foreach (i=0;i<chanCount*memAllocSize;i++) mem[i] = 0;
609 mem[0..chanCount*memAllocSize] = 0;
610 } else if (filterLen > oldFilterLen) {
611 // increase the filter length
612 foreach_reverse (uint i; 0..chanCount) {
613 uint j;
614 uint olen = oldFilterLen;
616 // try and remove the magic samples as if nothing had happened
617 //FIXME: this is wrong but for now we need it to avoid going over the array bounds
618 olen = oldFilterLen+2*magicSamples.ptr[i];
619 for (j = oldFilterLen-1+magicSamples.ptr[i]; j--; ) mem[i*memAllocSize+j+magicSamples.ptr[i]] = mem[i*oldAllocSize+j];
620 //for (j = 0; j < magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = 0;
621 mem[i*memAllocSize..i*memAllocSize+magicSamples.ptr[i]] = 0;
622 magicSamples.ptr[i] = 0;
624 if (filterLen > olen) {
625 // if the new filter length is still bigger than the "augmented" length
626 // copy data going backward
627 for (j = 0; j < olen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = mem[i*memAllocSize+(olen-2-j)];
628 // then put zeros for lack of anything better
629 for (; j < filterLen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = 0;
630 // adjust lastSample
631 lastSample.ptr[i] += (filterLen-olen)/2;
632 } else {
633 // put back some of the magic!
634 magicSamples.ptr[i] = (olen-filterLen)/2;
635 for (j = 0; j < filterLen-1+magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
638 } else if (filterLen < oldFilterLen) {
639 // reduce filter length, this a bit tricky
640 // we need to store some of the memory as "magic" samples so they can be used directly as input the next time(s)
641 foreach (immutable i; 0..chanCount) {
642 uint j;
643 uint oldMagic = magicSamples.ptr[i];
644 magicSamples.ptr[i] = (oldFilterLen-filterLen)/2;
645 // we must copy some of the memory that's no longer used
646 // copy data going backward
647 for (j = 0; j < filterLen-1+magicSamples.ptr[i]+oldMagic; ++j) {
648 mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
650 magicSamples.ptr[i] += oldMagic;
653 return Error.OK;
655 fail:
656 resampler = null;
657 /* mem may still contain consumed input samples for the filter.
658 Restore filterLen so that filterLen-1 still points to the position after
659 the last of these samples. */
660 filterLen = oldFilterLen;
661 return Error.NoMemory;
666 // ////////////////////////////////////////////////////////////////////////// //
667 static immutable double[68] kaiser12Table = [
668 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
669 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
670 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
671 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
672 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
673 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
674 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
675 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
676 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
677 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
678 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
679 0.00001000, 0.00000000];
681 static immutable double[36] kaiser10Table = [
682 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
683 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
684 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
685 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
686 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
687 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000];
689 static immutable double[36] kaiser8Table = [
690 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
691 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
692 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
693 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
694 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
695 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000];
697 static immutable double[36] kaiser6Table = [
698 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
699 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
700 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
701 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
702 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
703 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000];
705 struct FuncDef {
706 immutable(double)* table;
707 int oversample;
710 static immutable FuncDef Kaiser12 = FuncDef(kaiser12Table.ptr, 64);
711 static immutable FuncDef Kaiser10 = FuncDef(kaiser10Table.ptr, 32);
712 static immutable FuncDef Kaiser8 = FuncDef(kaiser8Table.ptr, 32);
713 static immutable FuncDef Kaiser6 = FuncDef(kaiser6Table.ptr, 32);
716 struct QualityMapping {
717 int baseLength;
718 int oversample;
719 float downsampleBandwidth;
720 float upsampleBandwidth;
721 immutable FuncDef* windowFunc;
725 /* This table maps conversion quality to internal parameters. There are two
726 reasons that explain why the up-sampling bandwidth is larger than the
727 down-sampling bandwidth:
728 1) When up-sampling, we can assume that the spectrum is already attenuated
729 close to the Nyquist rate (from an A/D or a previous resampling filter)
730 2) Any aliasing that occurs very close to the Nyquist rate will be masked
731 by the sinusoids/noise just below the Nyquist rate (guaranteed only for
732 up-sampling).
734 static immutable QualityMapping[11] qualityMap = [
735 QualityMapping( 8, 4, 0.830f, 0.860f, &Kaiser6 ), /* Q0 */
736 QualityMapping( 16, 4, 0.850f, 0.880f, &Kaiser6 ), /* Q1 */
737 QualityMapping( 32, 4, 0.882f, 0.910f, &Kaiser6 ), /* Q2 */ /* 82.3% cutoff ( ~60 dB stop) 6 */
738 QualityMapping( 48, 8, 0.895f, 0.917f, &Kaiser8 ), /* Q3 */ /* 84.9% cutoff ( ~80 dB stop) 8 */
739 QualityMapping( 64, 8, 0.921f, 0.940f, &Kaiser8 ), /* Q4 */ /* 88.7% cutoff ( ~80 dB stop) 8 */
740 QualityMapping( 80, 16, 0.922f, 0.940f, &Kaiser10), /* Q5 */ /* 89.1% cutoff (~100 dB stop) 10 */
741 QualityMapping( 96, 16, 0.940f, 0.945f, &Kaiser10), /* Q6 */ /* 91.5% cutoff (~100 dB stop) 10 */
742 QualityMapping(128, 16, 0.950f, 0.950f, &Kaiser10), /* Q7 */ /* 93.1% cutoff (~100 dB stop) 10 */
743 QualityMapping(160, 16, 0.960f, 0.960f, &Kaiser10), /* Q8 */ /* 94.5% cutoff (~100 dB stop) 10 */
744 QualityMapping(192, 32, 0.968f, 0.968f, &Kaiser12), /* Q9 */ /* 95.5% cutoff (~100 dB stop) 10 */
745 QualityMapping(256, 32, 0.975f, 0.975f, &Kaiser12), /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
749 nothrow @trusted @nogc:
750 /*8, 24, 40, 56, 80, 104, 128, 160, 200, 256, 320*/
751 double computeFunc (float x, immutable FuncDef* func) {
752 version(Posix) import core.stdc.math : lrintf;
753 import std.math : floor;
754 //double[4] interp;
755 float y = x*func.oversample;
756 version(Posix) {
757 int ind = cast(int)lrintf(floor(y));
758 } else {
759 int ind = cast(int)(floor(y));
761 float frac = (y-ind);
762 immutable f2 = frac*frac;
763 immutable f3 = f2*frac;
764 double interp3 = -0.1666666667*frac+0.1666666667*(f3);
765 double interp2 = frac+0.5*(f2)-0.5*(f3);
766 //double interp2 = 1.0f-0.5f*frac-f2+0.5f*f3;
767 double interp0 = -0.3333333333*frac+0.5*(f2)-0.1666666667*(f3);
768 // just to make sure we don't have rounding problems
769 double interp1 = 1.0f-interp3-interp2-interp0;
770 //sum = frac*accum[1]+(1-frac)*accum[2];
771 return interp0*func.table[ind]+interp1*func.table[ind+1]+interp2*func.table[ind+2]+interp3*func.table[ind+3];
775 // the slow way of computing a sinc for the table; should improve that some day
776 float sinc (float cutoff, float x, int N, immutable FuncDef *windowFunc) {
777 version(LittleEndian) {
778 align(1) union temp_float { align(1): float f; uint n; }
779 } else {
780 static T fabs(T) (T n) pure { static if (__VERSION__ > 2067) pragma(inline, true); return (n < 0 ? -n : n); }
782 import std.math : sin, PI;
783 version(LittleEndian) {
784 temp_float txx = void;
785 txx.f = x;
786 txx.n &= 0x7fff_ffff; // abs
787 if (txx.f < 1.0e-6f) return cutoff;
788 if (txx.f > 0.5f*N) return 0;
789 } else {
790 if (fabs(x) < 1.0e-6f) return cutoff;
791 if (fabs(x) > 0.5f*N) return 0;
793 //FIXME: can it really be any slower than this?
794 immutable float xx = x*cutoff;
795 immutable pixx = PI*xx;
796 version(LittleEndian) {
797 return cutoff*sin(pixx)/pixx*computeFunc(2.0*txx.f/N, windowFunc);
798 } else {
799 return cutoff*sin(pixx)/pixx*computeFunc(fabs(2.0*x/N), windowFunc);
804 void cubicCoef (in float frac, float* interp) {
805 immutable f2 = frac*frac;
806 immutable f3 = f2*frac;
807 // compute interpolation coefficients; i'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc
808 interp[0] = -0.16667f*frac+0.16667f*f3;
809 interp[1] = frac+0.5f*f2-0.5f*f3;
810 //interp[2] = 1.0f-0.5f*frac-f2+0.5f*f3;
811 interp[3] = -0.33333f*frac+0.5f*f2-0.16667f*f3;
812 // just to make sure we don't have rounding problems
813 interp[2] = 1.0-interp[0]-interp[1]-interp[3];
817 // ////////////////////////////////////////////////////////////////////////// //
818 int resamplerBasicDirect(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen)
819 if (is(T == float) || is(T == double))
821 auto N = st.filterLen;
822 static if (is(T == double)) assert(N%4 == 0);
823 int outSample = 0;
824 int lastSample = st.lastSample.ptr[chanIdx];
825 uint sampFracNum = st.sampFracNum.ptr[chanIdx];
826 const(float)* sincTable = st.sincTable;
827 immutable outStride = st.outStride;
828 immutable intAdvance = st.intAdvance;
829 immutable fracAdvance = st.fracAdvance;
830 immutable denRate = st.denRate;
831 T sum = void;
832 while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
833 const(float)* sinct = &sincTable[sampFracNum*N];
834 const(float)* iptr = &indata[lastSample];
835 static if (is(T == float)) {
836 // at least 2x speedup with SSE here (but for unrolled loop)
837 if (N%4 == 0) {
838 version(sincresample_use_sse) {
839 //align(64) __gshared float[4] zero = 0;
840 align(64) __gshared float[4+128] zeroesBuf = 0; // dmd cannot into such aligns, alas
841 __gshared uint zeroesptr = 0;
842 if (zeroesptr == 0) {
843 zeroesptr = cast(uint)zeroesBuf.ptr;
844 if (zeroesptr&0x3f) zeroesptr = (zeroesptr|0x3f)+1;
846 //assert((zeroesptr&0x3f) == 0, "wtf?!");
847 asm nothrow @safe @nogc {
848 mov ECX,[N];
849 shr ECX,2;
850 mov EAX,[zeroesptr];
851 movaps XMM0,[EAX];
852 mov EAX,[sinct];
853 mov EBX,[iptr];
854 mov EDX,16;
855 align 8;
856 rbdseeloop:
857 movups XMM1,[EAX];
858 movups XMM2,[EBX];
859 mulps XMM1,XMM2;
860 addps XMM0,XMM1;
861 add EAX,EDX;
862 add EBX,EDX;
863 dec ECX;
864 jnz rbdseeloop;
865 // store result in sum
866 movhlps XMM1,XMM0; // now low part of XMM1 contains high part of XMM0
867 addps XMM0,XMM1; // low part of XMM0 is ok
868 movaps XMM1,XMM0;
869 shufps XMM1,XMM0,0b_01_01_01_01; // 2nd float of XMM0 goes to the 1st float of XMM1
870 addss XMM0,XMM1;
871 movss [sum],XMM0;
874 float sum1 = 0;
875 foreach (immutable j; 0..N) sum1 += sinct[j]*iptr[j];
876 import std.math;
877 if (fabs(sum-sum1) > 0.000001f) {
878 import core.stdc.stdio;
879 printf("sum=%f; sum1=%f\n", sum, sum1);
880 assert(0);
883 } else {
884 // no SSE; for my i3 unrolled loop is almost of the speed of SSE code
885 T[4] accum = 0;
886 foreach (immutable j; 0..N/4) {
887 accum.ptr[0] += *sinct++ * *iptr++;
888 accum.ptr[1] += *sinct++ * *iptr++;
889 accum.ptr[2] += *sinct++ * *iptr++;
890 accum.ptr[3] += *sinct++ * *iptr++;
892 sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
894 } else {
895 sum = 0;
896 foreach (immutable j; 0..N) sum += *sinct++ * *iptr++;
898 outdata[outStride*outSample++] = sum;
899 } else {
900 if (N%4 == 0) {
901 //TODO: write SSE code here!
902 // for my i3 unrolled loop is ~2 times faster
903 T[4] accum = 0;
904 foreach (immutable j; 0..N/4) {
905 accum.ptr[0] += cast(double)*sinct++ * cast(double)*iptr++;
906 accum.ptr[1] += cast(double)*sinct++ * cast(double)*iptr++;
907 accum.ptr[2] += cast(double)*sinct++ * cast(double)*iptr++;
908 accum.ptr[3] += cast(double)*sinct++ * cast(double)*iptr++;
910 sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
911 } else {
912 sum = 0;
913 foreach (immutable j; 0..N) sum += cast(double)*sinct++ * cast(double)*iptr++;
915 outdata[outStride*outSample++] = cast(float)sum;
917 lastSample += intAdvance;
918 sampFracNum += fracAdvance;
919 if (sampFracNum >= denRate) {
920 sampFracNum -= denRate;
921 ++lastSample;
924 st.lastSample.ptr[chanIdx] = lastSample;
925 st.sampFracNum.ptr[chanIdx] = sampFracNum;
926 return outSample;
930 int resamplerBasicInterpolate(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen)
931 if (is(T == float) || is(T == double))
933 immutable N = st.filterLen;
934 assert(N%4 == 0);
935 int outSample = 0;
936 int lastSample = st.lastSample.ptr[chanIdx];
937 uint sampFracNum = st.sampFracNum.ptr[chanIdx];
938 immutable outStride = st.outStride;
939 immutable intAdvance = st.intAdvance;
940 immutable fracAdvance = st.fracAdvance;
941 immutable denRate = st.denRate;
942 float sum;
944 float[4] interp = void;
945 T[4] accum = void;
946 while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
947 const(float)* iptr = &indata[lastSample];
948 const int offset = sampFracNum*st.oversample/st.denRate;
949 const float frac = (cast(float)((sampFracNum*st.oversample)%st.denRate))/st.denRate;
950 accum[] = 0;
951 //TODO: optimize!
952 foreach (immutable j; 0..N) {
953 immutable T currIn = iptr[j];
954 accum.ptr[0] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-2]);
955 accum.ptr[1] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-1]);
956 accum.ptr[2] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset]);
957 accum.ptr[3] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset+1]);
960 cubicCoef(frac, interp.ptr);
961 sum = (interp.ptr[0]*accum.ptr[0])+(interp.ptr[1]*accum.ptr[1])+(interp.ptr[2]*accum.ptr[2])+(interp.ptr[3]*accum.ptr[3]);
963 outdata[outStride*outSample++] = sum;
964 lastSample += intAdvance;
965 sampFracNum += fracAdvance;
966 if (sampFracNum >= denRate) {
967 sampFracNum -= denRate;
968 ++lastSample;
972 st.lastSample.ptr[chanIdx] = lastSample;
973 st.sampFracNum.ptr[chanIdx] = sampFracNum;
974 return outSample;
978 // ////////////////////////////////////////////////////////////////////////// //
979 uint gcd (uint a, uint b) pure {
980 if (a == 0) return b;
981 if (b == 0) return a;
982 for (;;) {
983 if (a > b) {
984 a %= b;
985 if (a == 0) return b;
986 if (a == 1) return 1;
987 } else {
988 b %= a;
989 if (b == 0) return a;
990 if (b == 1) return 1;
996 // ////////////////////////////////////////////////////////////////////////// //
997 // very simple and cheap cubic upsampler
998 struct CubicUpsampler {
999 public:
1000 nothrow @trusted @nogc:
1001 float[2] curposfrac; // current position offset [0..1)
1002 float step; // how long we should move on one step?
1003 float[4][2] data; // -1..3
1004 uint[2] drain;
1006 void reset () {
1007 curposfrac[] = 0.0f;
1008 foreach (ref d; data) d[] = 0.0f;
1009 drain[] = 0;
1012 bool setup (float astep) {
1013 if (astep >= 1.0f) return false;
1014 step = astep;
1015 return true;
1019 static struct Data {
1020 const(float)[] dataIn;
1021 float[] dataOut;
1022 uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1023 uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1027 SpeexResampler.Error process (ref SpeexResampler.Data d) {
1028 d.inputSamplesUsed = d.outputSamplesUsed = 0;
1029 if (d.dataOut.length < 2) return SpeexResampler.Error.OK;
1030 foreach (uint cidx; 0..2) {
1031 uint inleft = cast(uint)d.dataIn.length/2;
1032 uint outleft = cast(uint)d.dataOut.length/2;
1033 processChannel(inleft, outleft, (d.dataIn.length ? d.dataIn.ptr+cidx : null), (d.dataOut.length ? d.dataOut.ptr+cidx : null), cidx);
1034 d.outputSamplesUsed += cast(uint)(d.dataOut.length/2)-outleft;
1035 d.inputSamplesUsed += cast(uint)(d.dataIn.length/2)-inleft;
1037 return SpeexResampler.Error.OK;
1040 private void processChannel (ref uint inleft, ref uint outleft, const(float)* dataIn, float* dataOut, uint cidx) {
1041 if (outleft == 0) return;
1042 if (inleft == 0 && drain.ptr[cidx] <= 1) return;
1043 auto dt = data.ptr[cidx].ptr;
1044 auto drn = drain.ptr+cidx;
1045 auto cpf = curposfrac.ptr+cidx;
1046 immutable float st = step;
1047 for (;;) {
1048 // fill buffer
1049 while ((*drn) < 4) {
1050 if (inleft == 0) return;
1051 dt[(*drn)++] = *dataIn;
1052 dataIn += 2;
1053 --inleft;
1055 if (outleft == 0) return;
1056 --outleft;
1057 // cubic interpolation
1058 /*version(none)*/ {
1059 // interpolate between y1 and y2
1060 immutable float mu = (*cpf); // how far we are moved from y1 to y2
1061 immutable float mu2 = mu*mu; // wow
1062 immutable float y0 = dt[0], y1 = dt[1], y2 = dt[2], y3 = dt[3];
1063 version(complex_cubic) {
1064 immutable float z0 = 0.5*y3;
1065 immutable float z1 = 0.5*y0;
1066 immutable float a0 = 1.5*y1-z1-1.5*y2+z0;
1067 immutable float a1 = y0-2.5*y1+2*y2-z0;
1068 immutable float a2 = 0.5*y2-z1;
1069 } else {
1070 immutable float a0 = y3-y2-y0+y1;
1071 immutable float a1 = y0-y1-a0;
1072 immutable float a2 = y2-y0;
1074 *dataOut = a0*mu*mu2+a1*mu2+a2*mu+y1;
1075 }// else *dataOut = dt[1];
1076 dataOut += 2;
1077 if (((*cpf) += st) >= 1.0f) {
1078 (*cpf) -= 1.0f;
1079 dt[0] = dt[1];
1080 dt[1] = dt[2];
1081 dt[2] = dt[3];
1082 dt[3] = 0.0f;
1083 --(*drn); // will request more input bytes