follin/resampler.d

   1 /* Copyright (C) 2007-2008 Jean-Marc Valin
   2  * Copyright (C) 2008      Thorvald Natvig
   3  * D port by Ketmar // Invisible Vector
   4  *
   5  * Arbitrary resampling code
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are
   9  * met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright notice,
  12  * this list of conditions and the following disclaimer.
  13  *
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  * notice, this list of conditions and the following disclaimer in the
  16  * documentation and/or other materials provided with the distribution.
  17  *
  18  * 3. The name of the author may not be used to endorse or promote products
  19  * derived from this software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  25  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  29  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 /* A-a-a-and now... D port is covered by the following license!
  35  *
  36  * This program is free software: you can redistribute it and/or modify
  37  * it under the terms of the GNU General Public License as published by
  38  * the Free Software Foundation, version 3 of the License ONLY.
  39  *
  40  * This program is distributed in the hope that it will be useful,
  41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  43  * GNU General Public License for more details.
  44  *
  45  * You should have received a copy of the GNU General Public License
  46  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  47  */
  48 module iv.follin.resampler /*is aliced*/;
  49 import iv.alice;
  50
  51 /*
  52    The design goals of this code are:
  53       - Very fast algorithm
  54       - SIMD-friendly algorithm
  55       - Low memory requirement
  56       - Good *perceptual* quality (and not best SNR)
  57
  58    Warning: This resampler is relatively new. Although I think I got rid of
  59    all the major bugs and I don't expect the API to change anymore, there
  60    may be something I've missed. So use with caution.
  61
  62    This algorithm is based on this original resampling algorithm:
  63    Smith, Julius O. Digital Audio Resampling Home Page
  64    Center for Computer Research in Music and Acoustics (CCRMA),
  65    Stanford University, 2007.
  66    Web published at http://www-ccrma.stanford.edu/~jos/resample/.
  67
  68    There is one main difference, though. This resampler uses cubic
  69    interpolation instead of linear interpolation in the above paper. This
  70    makes the table much smaller and makes it possible to compute that table
  71    on a per-stream basis. In turn, being able to tweak the table for each
  72    stream makes it possible to both reduce complexity on simple ratios
  73    (e.g. 2/3), and get rid of the rounding operations in the inner loop.
  74    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
  75 */
  76 version = sincresample_use_full_table;
  77 version(X86) {
  78   version(sincresample_disable_sse) {
  79   } else {
  80     version(D_PIC) {} else version = sincresample_use_sse;
  81   }
  82 }
  83
  84
  85 // ////////////////////////////////////////////////////////////////////////// //
  86 public struct SpeexResampler {
  87 public:
  88   alias Quality = int;
  89   enum : uint {
  90     Fastest = 0,
  91     Voip = 3,
  92     Default = 4,
  93     Desktop = 5,
  94     Music = 8,
  95     Best = 10,
  96   }
  97
  98   enum Error {
  99     OK = 0,
 100     NoMemory,
 101     BadState,
 102     BadArgument,
 103     BadData,
 104   }
 105
 106 private:
 107 nothrow @trusted @nogc:
 108   alias ResamplerFn = int function (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen);
 109
 110 private:
 111   uint inRate;
 112   uint outRate;
 113   uint numRate; // from
 114   uint denRate; // to
 115
 116   Quality srQuality;
 117   uint chanCount;
 118   uint filterLen;
 119   uint memAllocSize;
 120   uint bufferSize;
 121   int intAdvance;
 122   int fracAdvance;
 123   float cutoff;
 124   uint oversample;
 125   bool started;
 126
 127   // these are per-channel
 128   int[64] lastSample;
 129   uint[64] sampFracNum;
 130   uint[64] magicSamples;
 131
 132   float* mem;
 133   uint realMemLen; // how much memory really allocated
 134   float* sincTable;
 135   uint sincTableLen;
 136   uint realSincTableLen; // how much memory really allocated
 137   ResamplerFn resampler;
 138
 139   int inStride;
 140   int outStride;
 141
 142 public:
 143   static string errorStr (int err) {
 144     switch (err) with (Error) {
 145       case OK: return "success";
 146       case NoMemory: return "memory allocation failed";
 147       case BadState: return "bad resampler state";
 148       case BadArgument: return "invalid argument";
 149       case BadData: return "bad data passed";
 150       default:
 151     }
 152     return "unknown error";
 153   }
 154
 155 public:
 156   @disable this (this);
 157   ~this () { deinit(); }
 158
 159   @property bool inited () const pure { return (resampler !is null); }
 160
 161   void deinit () {
 162     import core.stdc.stdlib : free;
 163     if (mem !is null) { free(mem); mem = null; }
 164     if (sincTable !is null) { free(sincTable); sincTable = null; }
 165     /*
 166     memAllocSize = realMemLen = 0;
 167     sincTableLen = realSincTableLen = 0;
 168     resampler = null;
 169     started = false;
 170     */
 171     inRate = outRate = numRate = denRate = 0;
 172     srQuality = cast(Quality)666;
 173     chanCount = 0;
 174     filterLen = 0;
 175     memAllocSize = 0;
 176     bufferSize = 0;
 177     intAdvance = 0;
 178     fracAdvance = 0;
 179     cutoff = 0;
 180     oversample = 0;
 181     started = 0;
 182
 183     mem = null;
 184     realMemLen = 0; // how much memory really allocated
 185     sincTable = null;
 186     sincTableLen = 0;
 187     realSincTableLen = 0; // how much memory really allocated
 188     resampler = null;
 189
 190     inStride = outStride = 0;
 191   }
 192
 193   /** Create a new resampler with integer input and output rates.
 194    *
 195    * Params:
 196    *  chans = Number of channels to be processed
 197    *  inRate = Input sampling rate (integer number of Hz).
 198    *  outRate = Output sampling rate (integer number of Hz).
 199    *  aquality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 200    *
 201    * Returns:
 202    *  0 or error code
 203    */
 204   Error setup (uint chans, uint ainRate, uint aoutRate, Quality aquality/*, usize line=__LINE__*/) {
 205     //{ import core.stdc.stdio; printf("init: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
 206     import core.stdc.stdlib : malloc, free;
 207
 208     deinit();
 209     if (aquality < 0) aquality = 0;
 210     if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
 211     if (chans < 1 || chans > 16) return Error.BadArgument;
 212
 213     started = false;
 214     inRate = 0;
 215     outRate = 0;
 216     numRate = 0;
 217     denRate = 0;
 218     srQuality = cast(Quality)666; // it's ok
 219     sincTableLen = 0;
 220     memAllocSize = 0;
 221     filterLen = 0;
 222     mem = null;
 223     resampler = null;
 224
 225     cutoff = 1.0f;
 226     chanCount = chans;
 227     inStride = 1;
 228     outStride = 1;
 229
 230     bufferSize = 160;
 231
 232     // per channel data
 233     lastSample[] = 0;
 234     magicSamples[] = 0;
 235     sampFracNum[] = 0;
 236
 237     setQuality(aquality);
 238     setRate(ainRate, aoutRate);
 239
 240     if (auto filterErr = updateFilter()) { deinit(); return filterErr; }
 241     skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
 242
 243     return Error.OK;
 244   }
 245
 246   /** Set (change) the input/output sampling rates (integer value).
 247    *
 248    * Params:
 249    *  ainRate = Input sampling rate (integer number of Hz).
 250    *  aoutRate = Output sampling rate (integer number of Hz).
 251    *
 252    * Returns:
 253    *  0 or error code
 254    */
 255   Error setRate (uint ainRate, uint aoutRate/*, usize line=__LINE__*/) {
 256     //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
 257     if (inRate == ainRate && outRate == aoutRate) return Error.OK;
 258     //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ratioNum, ratioDen, cast(uint)line); }
 259
 260     uint oldDen = denRate;
 261     inRate = ainRate;
 262     outRate = aoutRate;
 263     auto div = gcd(ainRate, aoutRate);
 264     numRate = ainRate/div;
 265     denRate = aoutRate/div;
 266
 267     if (oldDen > 0) {
 268       foreach (ref v; sampFracNum.ptr[0..chanCount]) {
 269         v = v*denRate/oldDen;
 270         // safety net
 271         if (v >= denRate) v = denRate-1;
 272       }
 273     }
 274
 275     return (inited ? updateFilter() : Error.OK);
 276   }
 277
 278   /** Get the current input/output sampling rates (integer value).
 279    *
 280    * Params:
 281    *  ainRate = Input sampling rate (integer number of Hz) copied.
 282    *  aoutRate = Output sampling rate (integer number of Hz) copied.
 283    */
 284   void getRate (out uint ainRate, out uint aoutRate) {
 285     ainRate = inRate;
 286     aoutRate = outRate;
 287   }
 288
 289   @property uint getInRate () { return inRate; }
 290   @property uint getOutRate () { return outRate; }
 291
 292   @property uint getChans () { return chanCount; }
 293
 294   /** Get the current resampling ratio. This will be reduced to the least common denominator.
 295    *
 296    * Params:
 297    *  ratioNum = Numerator of the sampling rate ratio copied
 298    *  ratioDen = Denominator of the sampling rate ratio copied
 299    */
 300   void getRatio (out uint ratioNum, out uint ratioDen) {
 301     ratioNum = numRate;
 302     ratioDen = denRate;
 303   }
 304
 305   /** Set (change) the conversion quality.
 306    *
 307    * Params:
 308    *  quality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 309    *
 310    * Returns:
 311    *  0 or error code
 312    */
 313   Error setQuality (Quality aquality) {
 314     if (aquality < 0) aquality = 0;
 315     if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
 316     if (srQuality == aquality) return Error.OK;
 317     srQuality = aquality;
 318     return (inited ? updateFilter() : Error.OK);
 319   }
 320
 321   /** Get the conversion quality.
 322    *
 323    * Returns:
 324    *  Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 325    */
 326   int getQuality () { return srQuality; }
 327
 328   /** Get the latency introduced by the resampler measured in input samples.
 329    *
 330    * Returns:
 331    *  Input latency;
 332    */
 333   int inputLatency () { return filterLen/2; }
 334
 335   /** Get the latency introduced by the resampler measured in output samples.
 336    *
 337    * Returns:
 338    *  Output latency.
 339    */
 340   int outputLatency () { return ((filterLen/2)*denRate+(numRate>>1))/numRate; }
 341
 342   /* Make sure that the first samples to go out of the resamplers don't have
 343    * leading zeros. This is only useful before starting to use a newly created
 344    * resampler. It is recommended to use that when resampling an audio file, as
 345    * it will generate a file with the same length. For real-time processing,
 346    * it is probably easier not to use this call (so that the output duration
 347    * is the same for the first frame).
 348    *
 349    * Setup/reset sequence will automatically call this, so it is private.
 350    */
 351   private void skipZeros () { foreach (immutable i; 0..chanCount) lastSample.ptr[i] = filterLen/2; }
 352
 353   static struct Data {
 354     const(float)[] dataIn;
 355     float[] dataOut;
 356     uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
 357     uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
 358   }
 359
 360   /** Resample (an interleaved) float array. The input and output buffers must *not* overlap.
 361    * `data.dataIn` can be empty, but `data.dataOut` can't.
 362    * Function will return number of consumed samples (*not* *frames*!) in `data.inputSamplesUsed`,
 363    * and number of produced samples in `data.outputSamplesUsed`.
 364    * You should provide enough samples for all channels, and all channels will be processed.
 365    *
 366    * Params:
 367    *  data = input and output buffers, number of frames consumed and produced
 368    *
 369    * Returns:
 370    *  0 or error code
 371    */
 372   Error process(string mode="interleaved") (ref Data data) {
 373     static assert(mode == "interleaved" || mode == "sequential");
 374
 375     data.inputSamplesUsed = data.outputSamplesUsed = 0;
 376     if (!inited) return Error.BadState;
 377
 378     if (data.dataIn.length%chanCount || data.dataOut.length < 1 || data.dataOut.length%chanCount) return Error.BadData;
 379     if (data.dataIn.length > uint.max/4 || data.dataOut.length > uint.max/4) return Error.BadData;
 380
 381     static if (mode == "interleaved") {
 382       inStride = outStride = chanCount;
 383     } else {
 384       inStride = outStride = 1;
 385     }
 386     uint iofs = 0, oofs = 0;
 387     immutable uint idclen = cast(uint)(data.dataIn.length/chanCount);
 388     immutable uint odclen = cast(uint)(data.dataOut.length/chanCount);
 389     foreach (immutable i; 0..chanCount) {
 390       data.inputSamplesUsed = idclen;
 391       data.outputSamplesUsed = odclen;
 392       if (data.dataIn.length) {
 393         processOneChannel(i, data.dataIn.ptr+iofs, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
 394       } else {
 395         processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
 396       }
 397       static if (mode == "interleaved") {
 398         ++iofs;
 399         ++oofs;
 400       } else {
 401         iofs += idclen;
 402         oofs += odclen;
 403       }
 404     }
 405     data.inputSamplesUsed *= chanCount;
 406     data.outputSamplesUsed *= chanCount;
 407     return Error.OK;
 408   }
 409
 410
 411   //HACK for libswresample
 412   // return -1 or number of outframes
 413   int swrconvert (float** outbuf, int outframes, const(float)**inbuf, int inframes) {
 414     if (!inited || outframes < 1 || inframes < 0) return -1;
 415     inStride = outStride = 1;
 416     Data data;
 417     foreach (immutable i; 0..chanCount) {
 418       data.dataIn = (inframes ? inbuf[i][0..inframes] : null);
 419       data.dataOut = (outframes ? outbuf[i][0..outframes] : null);
 420       data.inputSamplesUsed = inframes;
 421       data.outputSamplesUsed = outframes;
 422       if (inframes > 0) {
 423         processOneChannel(i, data.dataIn.ptr, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
 424       } else {
 425         processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
 426       }
 427     }
 428     return data.outputSamplesUsed;
 429   }
 430
 431   /// Reset a resampler so a new (unrelated) stream can be processed.
 432   void reset () {
 433     lastSample[] = 0;
 434     magicSamples[] = 0;
 435     sampFracNum[] = 0;
 436     //foreach (immutable i; 0..chanCount*(filterLen-1)) mem[i] = 0;
 437     if (mem !is null) mem[0..chanCount*(filterLen-1)] = 0;
 438     skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
 439   }
 440
 441 private:
 442   Error processOneChannel (uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen) {
 443     uint ilen = *indataLen;
 444     uint olen = *outdataLen;
 445     float* x = mem+chanIdx*memAllocSize;
 446     immutable int filterOfs = filterLen-1;
 447     immutable uint xlen = memAllocSize-filterOfs;
 448     immutable int istride = inStride;
 449     if (magicSamples.ptr[chanIdx]) olen -= magic(chanIdx, &outdata, olen);
 450     if (!magicSamples.ptr[chanIdx]) {
 451       while (ilen && olen) {
 452         uint ichunk = (ilen > xlen ? xlen : ilen);
 453         uint ochunk = olen;
 454         if (indata !is null) {
 455           //foreach (immutable j; 0..ichunk) x[j+filterOfs] = indata[j*istride];
 456           if (istride == 1) {
 457             x[filterOfs..filterOfs+ichunk] = indata[0..ichunk];
 458           } else {
 459             auto sp = indata;
 460             auto dp = x+filterOfs;
 461             foreach (immutable j; 0..ichunk) { *dp++ = *sp; sp += istride; }
 462           }
 463         } else {
 464           //foreach (immutable j; 0..ichunk) x[j+filterOfs] = 0;
 465           x[filterOfs..filterOfs+ichunk] = 0;
 466         }
 467         processNative(chanIdx, &ichunk, outdata, &ochunk);
 468         ilen -= ichunk;
 469         olen -= ochunk;
 470         outdata += ochunk*outStride;
 471         if (indata !is null) indata += ichunk*istride;
 472       }
 473     }
 474     *indataLen -= ilen;
 475     *outdataLen -= olen;
 476     return Error.OK;
 477   }
 478
 479   Error processNative (uint chanIdx, uint* indataLen, float* outdata, uint* outdataLen) {
 480     immutable N = filterLen;
 481     int outSample = 0;
 482     float* x = mem+chanIdx*memAllocSize;
 483     uint ilen;
 484     started = true;
 485     // call the right resampler through the function ptr
 486     outSample = resampler(this, chanIdx, x, indataLen, outdata, outdataLen);
 487     if (lastSample.ptr[chanIdx] < cast(int)*indataLen) *indataLen = lastSample.ptr[chanIdx];
 488     *outdataLen = outSample;
 489     lastSample.ptr[chanIdx] -= *indataLen;
 490     ilen = *indataLen;
 491     foreach (immutable j; 0..N-1) x[j] = x[j+ilen];
 492     return Error.OK;
 493   }
 494
 495   int magic (uint chanIdx, float **outdata, uint outdataLen) {
 496     uint tempInLen = magicSamples.ptr[chanIdx];
 497     float* x = mem+chanIdx*memAllocSize;
 498     processNative(chanIdx, &tempInLen, *outdata, &outdataLen);
 499     magicSamples.ptr[chanIdx] -= tempInLen;
 500     // if we couldn't process all "magic" input samples, save the rest for next time
 501     if (magicSamples.ptr[chanIdx]) {
 502       immutable N = filterLen;
 503       foreach (immutable i; 0..magicSamples.ptr[chanIdx]) x[N-1+i] = x[N-1+i+tempInLen];
 504     }
 505     *outdata += outdataLen*outStride;
 506     return outdataLen;
 507   }
 508
 509   Error updateFilter () {
 510     uint oldFilterLen = filterLen;
 511     uint oldAllocSize = memAllocSize;
 512     bool useDirect;
 513     uint minSincTableLen;
 514     uint minAllocSize;
 515
 516     intAdvance = numRate/denRate;
 517     fracAdvance = numRate%denRate;
 518     oversample = qualityMap.ptr[srQuality].oversample;
 519     filterLen = qualityMap.ptr[srQuality].baseLength;
 520
 521     if (numRate > denRate) {
 522       // down-sampling
 523       cutoff = qualityMap.ptr[srQuality].downsampleBandwidth*denRate/numRate;
 524       // FIXME: divide the numerator and denominator by a certain amount if they're too large
 525       filterLen = filterLen*numRate/denRate;
 526       // round up to make sure we have a multiple of 8 for SSE
 527       filterLen = ((filterLen-1)&(~0x7))+8;
 528       if (2*denRate < numRate) oversample >>= 1;
 529       if (4*denRate < numRate) oversample >>= 1;
 530       if (8*denRate < numRate) oversample >>= 1;
 531       if (16*denRate < numRate) oversample >>= 1;
 532       if (oversample < 1) oversample = 1;
 533     } else {
 534       // up-sampling
 535       cutoff = qualityMap.ptr[srQuality].upsampleBandwidth;
 536     }
 537
 538     // choose the resampling type that requires the least amount of memory
 539     version(sincresample_use_full_table) {
 540       useDirect = true;
 541       if (int.max/float.sizeof/denRate < filterLen) goto fail;
 542     } else {
 543       useDirect = (filterLen*denRate <= filterLen*oversample+8 && int.max/float.sizeof/denRate >= filterLen);
 544     }
 545
 546     if (useDirect) {
 547       minSincTableLen = filterLen*denRate;
 548     } else {
 549       if ((int.max/float.sizeof-8)/oversample < filterLen) goto fail;
 550       minSincTableLen = filterLen*oversample+8;
 551     }
 552
 553     if (sincTableLen < minSincTableLen) {
 554       import core.stdc.stdlib : realloc;
 555       auto nslen = cast(uint)(minSincTableLen*float.sizeof);
 556       if (nslen > realSincTableLen) {
 557         if (nslen < 512*1024) nslen = 512*1024; // inc to 3 mb?
 558         auto x = cast(float*)realloc(sincTable, nslen);
 559         if (!x) goto fail;
 560         sincTable = x;
 561         realSincTableLen = nslen;
 562       }
 563       sincTableLen = minSincTableLen;
 564     }
 565
 566     if (useDirect) {
 567       foreach (int i; 0..denRate) {
 568         foreach (int j; 0..filterLen) {
 569           sincTable[i*filterLen+j] = sinc(cutoff, ((j-cast(int)filterLen/2+1)-(cast(float)i)/denRate), filterLen, qualityMap.ptr[srQuality].windowFunc);
 570         }
 571       }
 572       if (srQuality > 8) {
 573         resampler = &resamplerBasicDirect!double;
 574       } else {
 575         resampler = &resamplerBasicDirect!float;
 576       }
 577     } else {
 578       foreach (immutable int i; -4..cast(int)(oversample*filterLen+4)) {
 579         sincTable[i+4] = sinc(cutoff, (i/cast(float)oversample-filterLen/2), filterLen, qualityMap.ptr[srQuality].windowFunc);
 580       }
 581       if (srQuality > 8) {
 582         resampler = &resamplerBasicInterpolate!double;
 583       } else {
 584         resampler = &resamplerBasicInterpolate!float;
 585       }
 586     }
 587
 588     /* Here's the place where we update the filter memory to take into account
 589        the change in filter length. It's probably the messiest part of the code
 590        due to handling of lots of corner cases. */
 591
 592     // adding bufferSize to filterLen won't overflow here because filterLen could be multiplied by float.sizeof above
 593     minAllocSize = filterLen-1+bufferSize;
 594     if (minAllocSize > memAllocSize) {
 595       import core.stdc.stdlib : realloc;
 596       if (int.max/float.sizeof/chanCount < minAllocSize) goto fail;
 597       auto nslen = cast(uint)(chanCount*minAllocSize*mem[0].sizeof);
 598       if (nslen > realMemLen) {
 599         if (nslen < 16384) nslen = 16384;
 600         auto x = cast(float*)realloc(mem, nslen);
 601         if (x is null) goto fail;
 602         mem = x;
 603         realMemLen = nslen;
 604       }
 605       memAllocSize = minAllocSize;
 606     }
 607     if (!started) {
 608       //foreach (i=0;i<chanCount*memAllocSize;i++) mem[i] = 0;
 609       mem[0..chanCount*memAllocSize] = 0;
 610     } else if (filterLen > oldFilterLen) {
 611       // increase the filter length
 612       foreach_reverse (uint i; 0..chanCount) {
 613         uint j;
 614         uint olen = oldFilterLen;
 615         {
 616           // try and remove the magic samples as if nothing had happened
 617           //FIXME: this is wrong but for now we need it to avoid going over the array bounds
 618           olen = oldFilterLen+2*magicSamples.ptr[i];
 619           for (j = oldFilterLen-1+magicSamples.ptr[i]; j--; ) mem[i*memAllocSize+j+magicSamples.ptr[i]] = mem[i*oldAllocSize+j];
 620           //for (j = 0; j < magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = 0;
 621           mem[i*memAllocSize..i*memAllocSize+magicSamples.ptr[i]] = 0;
 622           magicSamples.ptr[i] = 0;
 623         }
 624         if (filterLen > olen) {
 625           // if the new filter length is still bigger than the "augmented" length
 626           // copy data going backward
 627           for (j = 0; j < olen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = mem[i*memAllocSize+(olen-2-j)];
 628           // then put zeros for lack of anything better
 629           for (; j < filterLen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = 0;
 630           // adjust lastSample
 631           lastSample.ptr[i] += (filterLen-olen)/2;
 632         } else {
 633           // put back some of the magic!
 634           magicSamples.ptr[i] = (olen-filterLen)/2;
 635           for (j = 0; j < filterLen-1+magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
 636         }
 637       }
 638     } else if (filterLen < oldFilterLen) {
 639       // reduce filter length, this a bit tricky
 640       // we need to store some of the memory as "magic" samples so they can be used directly as input the next time(s)
 641       foreach (immutable i; 0..chanCount) {
 642         uint j;
 643         uint oldMagic = magicSamples.ptr[i];
 644         magicSamples.ptr[i] = (oldFilterLen-filterLen)/2;
 645         // we must copy some of the memory that's no longer used
 646         // copy data going backward
 647         for (j = 0; j < filterLen-1+magicSamples.ptr[i]+oldMagic; ++j) {
 648           mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
 649         }
 650         magicSamples.ptr[i] += oldMagic;
 651       }
 652     }
 653     return Error.OK;
 654
 655   fail:
 656     resampler = null;
 657     /* mem may still contain consumed input samples for the filter.
 658        Restore filterLen so that filterLen-1 still points to the position after
 659        the last of these samples. */
 660     filterLen = oldFilterLen;
 661     return Error.NoMemory;
 662   }
 663 }
 664
 665
 666 // ////////////////////////////////////////////////////////////////////////// //
 667 static immutable double[68] kaiser12Table = [
 668   0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
 669   0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
 670   0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
 671   0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
 672   0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
 673   0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
 674   0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
 675   0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
 676   0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
 677   0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
 678   0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
 679   0.00001000, 0.00000000];
 680
 681 static immutable double[36] kaiser10Table = [
 682   0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
 683   0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
 684   0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
 685   0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
 686   0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
 687   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000];
 688
 689 static immutable double[36] kaiser8Table = [
 690   0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
 691   0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
 692   0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
 693   0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
 694   0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
 695   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000];
 696
 697 static immutable double[36] kaiser6Table = [
 698   0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
 699   0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
 700   0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
 701   0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
 702   0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
 703   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000];
 704
 705 struct FuncDef {
 706   immutable(double)* table;
 707   int oversample;
 708 }
 709
 710 static immutable FuncDef Kaiser12 = FuncDef(kaiser12Table.ptr, 64);
 711 static immutable FuncDef Kaiser10 = FuncDef(kaiser10Table.ptr, 32);
 712 static immutable FuncDef Kaiser8 = FuncDef(kaiser8Table.ptr, 32);
 713 static immutable FuncDef Kaiser6 = FuncDef(kaiser6Table.ptr, 32);
 714
 715
 716 struct QualityMapping {
 717   int baseLength;
 718   int oversample;
 719   float downsampleBandwidth;
 720   float upsampleBandwidth;
 721   immutable FuncDef* windowFunc;
 722 }
 723
 724
 725 /* This table maps conversion quality to internal parameters. There are two
 726    reasons that explain why the up-sampling bandwidth is larger than the
 727    down-sampling bandwidth:
 728    1) When up-sampling, we can assume that the spectrum is already attenuated
 729       close to the Nyquist rate (from an A/D or a previous resampling filter)
 730    2) Any aliasing that occurs very close to the Nyquist rate will be masked
 731       by the sinusoids/noise just below the Nyquist rate (guaranteed only for
 732       up-sampling).
 733 */
 734 static immutable QualityMapping[11] qualityMap = [
 735   QualityMapping(  8,  4, 0.830f, 0.860f, &Kaiser6 ), /* Q0 */
 736   QualityMapping( 16,  4, 0.850f, 0.880f, &Kaiser6 ), /* Q1 */
 737   QualityMapping( 32,  4, 0.882f, 0.910f, &Kaiser6 ), /* Q2 */  /* 82.3% cutoff ( ~60 dB stop) 6  */
 738   QualityMapping( 48,  8, 0.895f, 0.917f, &Kaiser8 ), /* Q3 */  /* 84.9% cutoff ( ~80 dB stop) 8  */
 739   QualityMapping( 64,  8, 0.921f, 0.940f, &Kaiser8 ), /* Q4 */  /* 88.7% cutoff ( ~80 dB stop) 8  */
 740   QualityMapping( 80, 16, 0.922f, 0.940f, &Kaiser10), /* Q5 */  /* 89.1% cutoff (~100 dB stop) 10 */
 741   QualityMapping( 96, 16, 0.940f, 0.945f, &Kaiser10), /* Q6 */  /* 91.5% cutoff (~100 dB stop) 10 */
 742   QualityMapping(128, 16, 0.950f, 0.950f, &Kaiser10), /* Q7 */  /* 93.1% cutoff (~100 dB stop) 10 */
 743   QualityMapping(160, 16, 0.960f, 0.960f, &Kaiser10), /* Q8 */  /* 94.5% cutoff (~100 dB stop) 10 */
 744   QualityMapping(192, 32, 0.968f, 0.968f, &Kaiser12), /* Q9 */  /* 95.5% cutoff (~100 dB stop) 10 */
 745   QualityMapping(256, 32, 0.975f, 0.975f, &Kaiser12), /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
 746 ];
 747
 748
 749 nothrow @trusted @nogc:
 750 /*8, 24, 40, 56, 80, 104, 128, 160, 200, 256, 320*/
 751 double computeFunc (float x, immutable FuncDef* func) {
 752   version(Posix) import core.stdc.math : lrintf;
 753   import std.math : floor;
 754   //double[4] interp;
 755   float y = x*func.oversample;
 756   version(Posix) {
 757     int ind = cast(int)lrintf(floor(y));
 758   } else {
 759     int ind = cast(int)(floor(y));
 760   }
 761   float frac = (y-ind);
 762   immutable f2 = frac*frac;
 763   immutable f3 = f2*frac;
 764   double interp3 = -0.1666666667*frac+0.1666666667*(f3);
 765   double interp2 = frac+0.5*(f2)-0.5*(f3);
 766   //double interp2 = 1.0f-0.5f*frac-f2+0.5f*f3;
 767   double interp0 = -0.3333333333*frac+0.5*(f2)-0.1666666667*(f3);
 768   // just to make sure we don't have rounding problems
 769   double interp1 = 1.0f-interp3-interp2-interp0;
 770   //sum = frac*accum[1]+(1-frac)*accum[2];
 771   return interp0*func.table[ind]+interp1*func.table[ind+1]+interp2*func.table[ind+2]+interp3*func.table[ind+3];
 772 }
 773
 774
 775 // the slow way of computing a sinc for the table; should improve that some day
 776 float sinc (float cutoff, float x, int N, immutable FuncDef *windowFunc) {
 777   version(LittleEndian) {
 778     align(1) union temp_float { align(1): float f; uint n; }
 779   } else {
 780     static T fabs(T) (T n) pure { static if (__VERSION__ > 2067) pragma(inline, true); return (n < 0 ? -n : n); }
 781   }
 782   import std.math : sin, PI;
 783   version(LittleEndian) {
 784     temp_float txx = void;
 785     txx.f = x;
 786     txx.n &= 0x7fff_ffff; // abs
 787     if (txx.f < 1.0e-6f) return cutoff;
 788     if (txx.f > 0.5f*N) return 0;
 789   } else {
 790     if (fabs(x) < 1.0e-6f) return cutoff;
 791     if (fabs(x) > 0.5f*N) return 0;
 792   }
 793   //FIXME: can it really be any slower than this?
 794   immutable float xx = x*cutoff;
 795   immutable pixx = PI*xx;
 796   version(LittleEndian) {
 797     return cutoff*sin(pixx)/pixx*computeFunc(2.0*txx.f/N, windowFunc);
 798   } else {
 799     return cutoff*sin(pixx)/pixx*computeFunc(fabs(2.0*x/N), windowFunc);
 800   }
 801 }
 802
 803
 804 void cubicCoef (in float frac, float* interp) {
 805   immutable f2 = frac*frac;
 806   immutable f3 = f2*frac;
 807   // compute interpolation coefficients; i'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc
 808   interp[0] =  -0.16667f*frac+0.16667f*f3;
 809   interp[1] = frac+0.5f*f2-0.5f*f3;
 810   //interp[2] = 1.0f-0.5f*frac-f2+0.5f*f3;
 811   interp[3] = -0.33333f*frac+0.5f*f2-0.16667f*f3;
 812   // just to make sure we don't have rounding problems
 813   interp[2] = 1.0-interp[0]-interp[1]-interp[3];
 814 }
 815
 816
 817 // ////////////////////////////////////////////////////////////////////////// //
 818 int resamplerBasicDirect(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen)
 819 if (is(T == float) || is(T == double))
 820 {
 821   auto N = st.filterLen;
 822   static if (is(T == double)) assert(N%4 == 0);
 823   int outSample = 0;
 824   int lastSample = st.lastSample.ptr[chanIdx];
 825   uint sampFracNum = st.sampFracNum.ptr[chanIdx];
 826   const(float)* sincTable = st.sincTable;
 827   immutable outStride = st.outStride;
 828   immutable intAdvance = st.intAdvance;
 829   immutable fracAdvance = st.fracAdvance;
 830   immutable denRate = st.denRate;
 831   T sum = void;
 832   while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
 833     const(float)* sinct = &sincTable[sampFracNum*N];
 834     const(float)* iptr = &indata[lastSample];
 835     static if (is(T == float)) {
 836       // at least 2x speedup with SSE here (but for unrolled loop)
 837       if (N%4 == 0) {
 838         version(sincresample_use_sse) {
 839           //align(64) __gshared float[4] zero = 0;
 840           align(64) __gshared float[4+128] zeroesBuf = 0; // dmd cannot into such aligns, alas
 841           __gshared uint zeroesptr = 0;
 842           if (zeroesptr == 0) {
 843             zeroesptr = cast(uint)zeroesBuf.ptr;
 844             if (zeroesptr&0x3f) zeroesptr = (zeroesptr|0x3f)+1;
 845           }
 846           //assert((zeroesptr&0x3f) == 0, "wtf?!");
 847           asm nothrow @safe @nogc {
 848             mov       ECX,[N];
 849             shr       ECX,2;
 850             mov       EAX,[zeroesptr];
 851             movaps    XMM0,[EAX];
 852             mov       EAX,[sinct];
 853             mov       EBX,[iptr];
 854             mov       EDX,16;
 855             align 8;
 856            rbdseeloop:
 857             movups    XMM1,[EAX];
 858             movups    XMM2,[EBX];
 859             mulps     XMM1,XMM2;
 860             addps     XMM0,XMM1;
 861             add       EAX,EDX;
 862             add       EBX,EDX;
 863             dec       ECX;
 864             jnz       rbdseeloop;
 865             // store result in sum
 866             movhlps   XMM1,XMM0; // now low part of XMM1 contains high part of XMM0
 867             addps     XMM0,XMM1; // low part of XMM0 is ok
 868             movaps    XMM1,XMM0;
 869             shufps    XMM1,XMM0,0b_01_01_01_01; // 2nd float of XMM0 goes to the 1st float of XMM1
 870             addss     XMM0,XMM1;
 871             movss     [sum],XMM0;
 872           }
 873           /*
 874           float sum1 = 0;
 875           foreach (immutable j; 0..N) sum1 += sinct[j]*iptr[j];
 876           import std.math;
 877           if (fabs(sum-sum1) > 0.000001f) {
 878             import core.stdc.stdio;
 879             printf("sum=%f; sum1=%f\n", sum, sum1);
 880             assert(0);
 881           }
 882           */
 883         } else {
 884           // no SSE; for my i3 unrolled loop is almost of the speed of SSE code
 885           T[4] accum = 0;
 886           foreach (immutable j; 0..N/4) {
 887             accum.ptr[0] += *sinct++ * *iptr++;
 888             accum.ptr[1] += *sinct++ * *iptr++;
 889             accum.ptr[2] += *sinct++ * *iptr++;
 890             accum.ptr[3] += *sinct++ * *iptr++;
 891           }
 892           sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
 893         }
 894       } else {
 895         sum = 0;
 896         foreach (immutable j; 0..N) sum += *sinct++ * *iptr++;
 897       }
 898       outdata[outStride*outSample++] = sum;
 899     } else {
 900       if (N%4 == 0) {
 901         //TODO: write SSE code here!
 902         // for my i3 unrolled loop is ~2 times faster
 903         T[4] accum = 0;
 904         foreach (immutable j; 0..N/4) {
 905           accum.ptr[0] += cast(double)*sinct++ * cast(double)*iptr++;
 906           accum.ptr[1] += cast(double)*sinct++ * cast(double)*iptr++;
 907           accum.ptr[2] += cast(double)*sinct++ * cast(double)*iptr++;
 908           accum.ptr[3] += cast(double)*sinct++ * cast(double)*iptr++;
 909         }
 910         sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
 911       } else {
 912         sum = 0;
 913         foreach (immutable j; 0..N) sum += cast(double)*sinct++ * cast(double)*iptr++;
 914       }
 915       outdata[outStride*outSample++] = cast(float)sum;
 916     }
 917     lastSample += intAdvance;
 918     sampFracNum += fracAdvance;
 919     if (sampFracNum >= denRate) {
 920       sampFracNum -= denRate;
 921       ++lastSample;
 922     }
 923   }
 924   st.lastSample.ptr[chanIdx] = lastSample;
 925   st.sampFracNum.ptr[chanIdx] = sampFracNum;
 926   return outSample;
 927 }
 928
 929
 930 int resamplerBasicInterpolate(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen)
 931 if (is(T == float) || is(T == double))
 932 {
 933   immutable N = st.filterLen;
 934   assert(N%4 == 0);
 935   int outSample = 0;
 936   int lastSample = st.lastSample.ptr[chanIdx];
 937   uint sampFracNum = st.sampFracNum.ptr[chanIdx];
 938   immutable outStride = st.outStride;
 939   immutable intAdvance = st.intAdvance;
 940   immutable fracAdvance = st.fracAdvance;
 941   immutable denRate = st.denRate;
 942   float sum;
 943
 944   float[4] interp = void;
 945   T[4] accum = void;
 946   while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
 947     const(float)* iptr = &indata[lastSample];
 948     const int offset = sampFracNum*st.oversample/st.denRate;
 949     const float frac = (cast(float)((sampFracNum*st.oversample)%st.denRate))/st.denRate;
 950     accum[] = 0;
 951     //TODO: optimize!
 952     foreach (immutable j; 0..N) {
 953       immutable T currIn = iptr[j];
 954       accum.ptr[0] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-2]);
 955       accum.ptr[1] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-1]);
 956       accum.ptr[2] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset]);
 957       accum.ptr[3] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset+1]);
 958     }
 959
 960     cubicCoef(frac, interp.ptr);
 961     sum = (interp.ptr[0]*accum.ptr[0])+(interp.ptr[1]*accum.ptr[1])+(interp.ptr[2]*accum.ptr[2])+(interp.ptr[3]*accum.ptr[3]);
 962
 963     outdata[outStride*outSample++] = sum;
 964     lastSample += intAdvance;
 965     sampFracNum += fracAdvance;
 966     if (sampFracNum >= denRate) {
 967       sampFracNum -= denRate;
 968       ++lastSample;
 969     }
 970   }
 971
 972   st.lastSample.ptr[chanIdx] = lastSample;
 973   st.sampFracNum.ptr[chanIdx] = sampFracNum;
 974   return outSample;
 975 }
 976
 977
 978 // ////////////////////////////////////////////////////////////////////////// //
 979 uint gcd (uint a, uint b) pure {
 980   if (a == 0) return b;
 981   if (b == 0) return a;
 982   for (;;) {
 983     if (a > b) {
 984       a %= b;
 985       if (a == 0) return b;
 986       if (a == 1) return 1;
 987     } else {
 988       b %= a;
 989       if (b == 0) return a;
 990       if (b == 1) return 1;
 991     }
 992   }
 993 }
 994
 995
 996 // ////////////////////////////////////////////////////////////////////////// //
 997 // very simple and cheap cubic upsampler
 998 struct CubicUpsampler {
 999 public:
1000 nothrow @trusted @nogc:
1001   float[2] curposfrac; // current position offset [0..1)
1002   float step; // how long we should move on one step?
1003   float[4][2] data; // -1..3
1004   uint[2] drain;
1005
1006   void reset () {
1007     curposfrac[] = 0.0f;
1008     foreach (ref d; data) d[] = 0.0f;
1009     drain[] = 0;
1010   }
1011
1012   bool setup (float astep) {
1013     if (astep >= 1.0f) return false;
1014     step = astep;
1015     return true;
1016   }
1017
1018   /*
1019   static struct Data {
1020     const(float)[] dataIn;
1021     float[] dataOut;
1022     uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1023     uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1024   }
1025   */
1026
1027   SpeexResampler.Error process (ref SpeexResampler.Data d) {
1028     d.inputSamplesUsed = d.outputSamplesUsed = 0;
1029     if (d.dataOut.length < 2) return SpeexResampler.Error.OK;
1030     foreach (uint cidx; 0..2) {
1031       uint inleft = cast(uint)d.dataIn.length/2;
1032       uint outleft = cast(uint)d.dataOut.length/2;
1033       processChannel(inleft, outleft, (d.dataIn.length ? d.dataIn.ptr+cidx : null), (d.dataOut.length ? d.dataOut.ptr+cidx : null), cidx);
1034       d.outputSamplesUsed += cast(uint)(d.dataOut.length/2)-outleft;
1035       d.inputSamplesUsed += cast(uint)(d.dataIn.length/2)-inleft;
1036     }
1037     return SpeexResampler.Error.OK;
1038   }
1039
1040   private void processChannel (ref uint inleft, ref uint outleft, const(float)* dataIn, float* dataOut, uint cidx) {
1041     if (outleft == 0) return;
1042     if (inleft == 0 && drain.ptr[cidx] <= 1) return;
1043     auto dt = data.ptr[cidx].ptr;
1044     auto drn = drain.ptr+cidx;
1045     auto cpf = curposfrac.ptr+cidx;
1046     immutable float st = step;
1047     for (;;) {
1048       // fill buffer
1049       while ((*drn) < 4) {
1050         if (inleft == 0) return;
1051         dt[(*drn)++] = *dataIn;
1052         dataIn += 2;
1053         --inleft;
1054       }
1055       if (outleft == 0) return;
1056       --outleft;
1057       // cubic interpolation
1058       /*version(none)*/ {
1059         // interpolate between y1 and y2
1060         immutable float mu = (*cpf); // how far we are moved from y1 to y2
1061         immutable float mu2 = mu*mu; // wow
1062         immutable float y0 = dt[0], y1 = dt[1], y2 = dt[2], y3 = dt[3];
1063         version(complex_cubic) {
1064           immutable float z0 = 0.5*y3;
1065           immutable float z1 = 0.5*y0;
1066           immutable float a0 = 1.5*y1-z1-1.5*y2+z0;
1067           immutable float a1 = y0-2.5*y1+2*y2-z0;
1068           immutable float a2 = 0.5*y2-z1;
1069         } else {
1070           immutable float a0 = y3-y2-y0+y1;
1071           immutable float a1 = y0-y1-a0;
1072           immutable float a2 = y2-y0;
1073         }
1074         *dataOut = a0*mu*mu2+a1*mu2+a2*mu+y1;
1075       }// else *dataOut = dt[1];
1076       dataOut += 2;
1077       if (((*cpf) += st) >= 1.0f) {
1078         (*cpf) -= 1.0f;
1079         dt[0] = dt[1];
1080         dt[1] = dt[2];
1081         dt[2] = dt[3];
1082         dt[3] = 0.0f;
1083         --(*drn); // will request more input bytes
1084       }
1085     }
1086   }
1087 }