follin/resampler.d

   1 /* Copyright (C) 2007-2008 Jean-Marc Valin
   2  * Copyright (C) 2008      Thorvald Natvig
   3  * D port by Ketmar // Invisible Vector
   4  *
   5  * Arbitrary resampling code
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are
   9  * met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright notice,
  12  * this list of conditions and the following disclaimer.
  13  *
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  * notice, this list of conditions and the following disclaimer in the
  16  * documentation and/or other materials provided with the distribution.
  17  *
  18  * 3. The name of the author may not be used to endorse or promote products
  19  * derived from this software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  25  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  29  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 /* A-a-a-and now... D port is covered by the following license!
  35  *
  36  * This program is free software: you can redistribute it and/or modify
  37  * it under the terms of the GNU General Public License as published by
  38  * the Free Software Foundation, either version 3 of the License, or
  39  * (at your option) any later version.
  40  *
  41  * This program is distributed in the hope that it will be useful,
  42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44  * GNU General Public License for more details.
  45  *
  46  * You should have received a copy of the GNU General Public License
  47  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  48  */
  49 module iv.follin.resampler /*is aliced*/;
  50 import iv.alice;
  51
  52 /*
  53    The design goals of this code are:
  54       - Very fast algorithm
  55       - SIMD-friendly algorithm
  56       - Low memory requirement
  57       - Good *perceptual* quality (and not best SNR)
  58
  59    Warning: This resampler is relatively new. Although I think I got rid of
  60    all the major bugs and I don't expect the API to change anymore, there
  61    may be something I've missed. So use with caution.
  62
  63    This algorithm is based on this original resampling algorithm:
  64    Smith, Julius O. Digital Audio Resampling Home Page
  65    Center for Computer Research in Music and Acoustics (CCRMA),
  66    Stanford University, 2007.
  67    Web published at http://www-ccrma.stanford.edu/~jos/resample/.
  68
  69    There is one main difference, though. This resampler uses cubic
  70    interpolation instead of linear interpolation in the above paper. This
  71    makes the table much smaller and makes it possible to compute that table
  72    on a per-stream basis. In turn, being able to tweak the table for each
  73    stream makes it possible to both reduce complexity on simple ratios
  74    (e.g. 2/3), and get rid of the rounding operations in the inner loop.
  75    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
  76 */
  77 version = sincresample_use_full_table;
  78 version(X86) {
  79   version(sincresample_disable_sse) {
  80   } else {
  81     version(D_PIC) {} else version = sincresample_use_sse;
  82   }
  83 }
  84
  85
  86 // ////////////////////////////////////////////////////////////////////////// //
  87 public struct SpeexResampler {
  88 public:
  89   alias Quality = int;
  90   enum : uint {
  91     Fastest = 0,
  92     Voip = 3,
  93     Default = 4,
  94     Desktop = 5,
  95     Music = 8,
  96     Best = 10,
  97   }
  98
  99   enum Error {
 100     OK = 0,
 101     NoMemory,
 102     BadState,
 103     BadArgument,
 104     BadData,
 105   }
 106
 107 private:
 108 nothrow @trusted @nogc:
 109   alias ResamplerFn = int function (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen);
 110
 111 private:
 112   uint inRate;
 113   uint outRate;
 114   uint numRate; // from
 115   uint denRate; // to
 116
 117   Quality srQuality;
 118   uint chanCount;
 119   uint filterLen;
 120   uint memAllocSize;
 121   uint bufferSize;
 122   int intAdvance;
 123   int fracAdvance;
 124   float cutoff;
 125   uint oversample;
 126   bool started;
 127
 128   // these are per-channel
 129   int[64] lastSample;
 130   uint[64] sampFracNum;
 131   uint[64] magicSamples;
 132
 133   float* mem;
 134   uint realMemLen; // how much memory really allocated
 135   float* sincTable;
 136   uint sincTableLen;
 137   uint realSincTableLen; // how much memory really allocated
 138   ResamplerFn resampler;
 139
 140   int inStride;
 141   int outStride;
 142
 143 public:
 144   static string errorStr (int err) {
 145     switch (err) with (Error) {
 146       case OK: return "success";
 147       case NoMemory: return "memory allocation failed";
 148       case BadState: return "bad resampler state";
 149       case BadArgument: return "invalid argument";
 150       case BadData: return "bad data passed";
 151       default:
 152     }
 153     return "unknown error";
 154   }
 155
 156 public:
 157   @disable this (this);
 158   ~this () { deinit(); }
 159
 160   @property bool inited () const pure { return (resampler !is null); }
 161
 162   void deinit () {
 163     import core.stdc.stdlib : free;
 164     if (mem !is null) { free(mem); mem = null; }
 165     if (sincTable !is null) { free(sincTable); sincTable = null; }
 166     /*
 167     memAllocSize = realMemLen = 0;
 168     sincTableLen = realSincTableLen = 0;
 169     resampler = null;
 170     started = false;
 171     */
 172     inRate = outRate = numRate = denRate = 0;
 173     srQuality = cast(Quality)666;
 174     chanCount = 0;
 175     filterLen = 0;
 176     memAllocSize = 0;
 177     bufferSize = 0;
 178     intAdvance = 0;
 179     fracAdvance = 0;
 180     cutoff = 0;
 181     oversample = 0;
 182     started = 0;
 183
 184     mem = null;
 185     realMemLen = 0; // how much memory really allocated
 186     sincTable = null;
 187     sincTableLen = 0;
 188     realSincTableLen = 0; // how much memory really allocated
 189     resampler = null;
 190
 191     inStride = outStride = 0;
 192   }
 193
 194   /** Create a new resampler with integer input and output rates.
 195    *
 196    * Params:
 197    *  chans = Number of channels to be processed
 198    *  inRate = Input sampling rate (integer number of Hz).
 199    *  outRate = Output sampling rate (integer number of Hz).
 200    *  aquality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 201    *
 202    * Returns:
 203    *  0 or error code
 204    */
 205   Error setup (uint chans, uint ainRate, uint aoutRate, Quality aquality/*, usize line=__LINE__*/) {
 206     //{ import core.stdc.stdio; printf("init: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
 207     import core.stdc.stdlib : malloc, free;
 208
 209     deinit();
 210     if (aquality < 0) aquality = 0;
 211     if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
 212     if (chans < 1 || chans > 16) return Error.BadArgument;
 213
 214     started = false;
 215     inRate = 0;
 216     outRate = 0;
 217     numRate = 0;
 218     denRate = 0;
 219     srQuality = cast(Quality)666; // it's ok
 220     sincTableLen = 0;
 221     memAllocSize = 0;
 222     filterLen = 0;
 223     mem = null;
 224     resampler = null;
 225
 226     cutoff = 1.0f;
 227     chanCount = chans;
 228     inStride = 1;
 229     outStride = 1;
 230
 231     bufferSize = 160;
 232
 233     // per channel data
 234     lastSample[] = 0;
 235     magicSamples[] = 0;
 236     sampFracNum[] = 0;
 237
 238     setQuality(aquality);
 239     setRate(ainRate, aoutRate);
 240
 241     if (auto filterErr = updateFilter()) { deinit(); return filterErr; }
 242     skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
 243
 244     return Error.OK;
 245   }
 246
 247   /** Set (change) the input/output sampling rates (integer value).
 248    *
 249    * Params:
 250    *  ainRate = Input sampling rate (integer number of Hz).
 251    *  aoutRate = Output sampling rate (integer number of Hz).
 252    *
 253    * Returns:
 254    *  0 or error code
 255    */
 256   Error setRate (uint ainRate, uint aoutRate/*, usize line=__LINE__*/) {
 257     //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ainRate, aoutRate, cast(uint)line); }
 258     if (inRate == ainRate && outRate == aoutRate) return Error.OK;
 259     //{ import core.stdc.stdio; printf("changing rate: %u -> %u at %u\n", ratioNum, ratioDen, cast(uint)line); }
 260
 261     uint oldDen = denRate;
 262     inRate = ainRate;
 263     outRate = aoutRate;
 264     auto div = gcd(ainRate, aoutRate);
 265     numRate = ainRate/div;
 266     denRate = aoutRate/div;
 267
 268     if (oldDen > 0) {
 269       foreach (ref v; sampFracNum.ptr[0..chanCount]) {
 270         v = v*denRate/oldDen;
 271         // safety net
 272         if (v >= denRate) v = denRate-1;
 273       }
 274     }
 275
 276     return (inited ? updateFilter() : Error.OK);
 277   }
 278
 279   /** Get the current input/output sampling rates (integer value).
 280    *
 281    * Params:
 282    *  ainRate = Input sampling rate (integer number of Hz) copied.
 283    *  aoutRate = Output sampling rate (integer number of Hz) copied.
 284    */
 285   void getRate (out uint ainRate, out uint aoutRate) {
 286     ainRate = inRate;
 287     aoutRate = outRate;
 288   }
 289
 290   @property uint getInRate () { return inRate; }
 291   @property uint getOutRate () { return outRate; }
 292
 293   @property uint getChans () { return chanCount; }
 294
 295   /** Get the current resampling ratio. This will be reduced to the least common denominator.
 296    *
 297    * Params:
 298    *  ratioNum = Numerator of the sampling rate ratio copied
 299    *  ratioDen = Denominator of the sampling rate ratio copied
 300    */
 301   void getRatio (out uint ratioNum, out uint ratioDen) {
 302     ratioNum = numRate;
 303     ratioDen = denRate;
 304   }
 305
 306   /** Set (change) the conversion quality.
 307    *
 308    * Params:
 309    *  quality = Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 310    *
 311    * Returns:
 312    *  0 or error code
 313    */
 314   Error setQuality (Quality aquality) {
 315     if (aquality < 0) aquality = 0;
 316     if (aquality > SpeexResampler.Best) aquality = SpeexResampler.Best;
 317     if (srQuality == aquality) return Error.OK;
 318     srQuality = aquality;
 319     return (inited ? updateFilter() : Error.OK);
 320   }
 321
 322   /** Get the conversion quality.
 323    *
 324    * Returns:
 325    *  Resampling quality between 0 and 10, where 0 has poor quality and 10 has very high quality.
 326    */
 327   int getQuality () { return srQuality; }
 328
 329   /** Get the latency introduced by the resampler measured in input samples.
 330    *
 331    * Returns:
 332    *  Input latency;
 333    */
 334   int inputLatency () { return filterLen/2; }
 335
 336   /** Get the latency introduced by the resampler measured in output samples.
 337    *
 338    * Returns:
 339    *  Output latency.
 340    */
 341   int outputLatency () { return ((filterLen/2)*denRate+(numRate>>1))/numRate; }
 342
 343   /* Make sure that the first samples to go out of the resamplers don't have
 344    * leading zeros. This is only useful before starting to use a newly created
 345    * resampler. It is recommended to use that when resampling an audio file, as
 346    * it will generate a file with the same length. For real-time processing,
 347    * it is probably easier not to use this call (so that the output duration
 348    * is the same for the first frame).
 349    *
 350    * Setup/reset sequence will automatically call this, so it is private.
 351    */
 352   private void skipZeros () { foreach (immutable i; 0..chanCount) lastSample.ptr[i] = filterLen/2; }
 353
 354   static struct Data {
 355     const(float)[] dataIn;
 356     float[] dataOut;
 357     uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
 358     uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
 359   }
 360
 361   /** Resample (an interleaved) float array. The input and output buffers must *not* overlap.
 362    * `data.dataIn` can be empty, but `data.dataOut` can't.
 363    * Function will return number of consumed samples (*not* *frames*!) in `data.inputSamplesUsed`,
 364    * and number of produced samples in `data.outputSamplesUsed`.
 365    * You should provide enough samples for all channels, and all channels will be processed.
 366    *
 367    * Params:
 368    *  data = input and output buffers, number of frames consumed and produced
 369    *
 370    * Returns:
 371    *  0 or error code
 372    */
 373   Error process(string mode="interleaved") (ref Data data) {
 374     static assert(mode == "interleaved" || mode == "sequential");
 375
 376     data.inputSamplesUsed = data.outputSamplesUsed = 0;
 377     if (!inited) return Error.BadState;
 378
 379     if (data.dataIn.length%chanCount || data.dataOut.length < 1 || data.dataOut.length%chanCount) return Error.BadData;
 380     if (data.dataIn.length > uint.max/4 || data.dataOut.length > uint.max/4) return Error.BadData;
 381
 382     static if (mode == "interleaved") {
 383       inStride = outStride = chanCount;
 384     } else {
 385       inStride = outStride = 1;
 386     }
 387     uint iofs = 0, oofs = 0;
 388     immutable uint idclen = cast(uint)(data.dataIn.length/chanCount);
 389     immutable uint odclen = cast(uint)(data.dataOut.length/chanCount);
 390     foreach (immutable i; 0..chanCount) {
 391       data.inputSamplesUsed = idclen;
 392       data.outputSamplesUsed = odclen;
 393       if (data.dataIn.length) {
 394         processOneChannel(i, data.dataIn.ptr+iofs, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
 395       } else {
 396         processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr+oofs, &data.outputSamplesUsed);
 397       }
 398       static if (mode == "interleaved") {
 399         ++iofs;
 400         ++oofs;
 401       } else {
 402         iofs += idclen;
 403         oofs += odclen;
 404       }
 405     }
 406     data.inputSamplesUsed *= chanCount;
 407     data.outputSamplesUsed *= chanCount;
 408     return Error.OK;
 409   }
 410
 411
 412   //HACK for libswresample
 413   // return -1 or number of outframes
 414   int swrconvert (float** outbuf, int outframes, const(float)**inbuf, int inframes) {
 415     if (!inited || outframes < 1 || inframes < 0) return -1;
 416     inStride = outStride = 1;
 417     Data data;
 418     foreach (immutable i; 0..chanCount) {
 419       data.dataIn = (inframes ? inbuf[i][0..inframes] : null);
 420       data.dataOut = (outframes ? outbuf[i][0..outframes] : null);
 421       data.inputSamplesUsed = inframes;
 422       data.outputSamplesUsed = outframes;
 423       if (inframes > 0) {
 424         processOneChannel(i, data.dataIn.ptr, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
 425       } else {
 426         processOneChannel(i, null, &data.inputSamplesUsed, data.dataOut.ptr, &data.outputSamplesUsed);
 427       }
 428     }
 429     return data.outputSamplesUsed;
 430   }
 431
 432   /// Reset a resampler so a new (unrelated) stream can be processed.
 433   void reset () {
 434     lastSample[] = 0;
 435     magicSamples[] = 0;
 436     sampFracNum[] = 0;
 437     //foreach (immutable i; 0..chanCount*(filterLen-1)) mem[i] = 0;
 438     if (mem !is null) mem[0..chanCount*(filterLen-1)] = 0;
 439     skipZeros(); // make sure that the first samples to go out of the resamplers don't have leading zeros
 440   }
 441
 442 private:
 443   Error processOneChannel (uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen) {
 444     uint ilen = *indataLen;
 445     uint olen = *outdataLen;
 446     float* x = mem+chanIdx*memAllocSize;
 447     immutable int filterOfs = filterLen-1;
 448     immutable uint xlen = memAllocSize-filterOfs;
 449     immutable int istride = inStride;
 450     if (magicSamples.ptr[chanIdx]) olen -= magic(chanIdx, &outdata, olen);
 451     if (!magicSamples.ptr[chanIdx]) {
 452       while (ilen && olen) {
 453         uint ichunk = (ilen > xlen ? xlen : ilen);
 454         uint ochunk = olen;
 455         if (indata !is null) {
 456           //foreach (immutable j; 0..ichunk) x[j+filterOfs] = indata[j*istride];
 457           if (istride == 1) {
 458             x[filterOfs..filterOfs+ichunk] = indata[0..ichunk];
 459           } else {
 460             auto sp = indata;
 461             auto dp = x+filterOfs;
 462             foreach (immutable j; 0..ichunk) { *dp++ = *sp; sp += istride; }
 463           }
 464         } else {
 465           //foreach (immutable j; 0..ichunk) x[j+filterOfs] = 0;
 466           x[filterOfs..filterOfs+ichunk] = 0;
 467         }
 468         processNative(chanIdx, &ichunk, outdata, &ochunk);
 469         ilen -= ichunk;
 470         olen -= ochunk;
 471         outdata += ochunk*outStride;
 472         if (indata !is null) indata += ichunk*istride;
 473       }
 474     }
 475     *indataLen -= ilen;
 476     *outdataLen -= olen;
 477     return Error.OK;
 478   }
 479
 480   Error processNative (uint chanIdx, uint* indataLen, float* outdata, uint* outdataLen) {
 481     immutable N = filterLen;
 482     int outSample = 0;
 483     float* x = mem+chanIdx*memAllocSize;
 484     uint ilen;
 485     started = true;
 486     // call the right resampler through the function ptr
 487     outSample = resampler(this, chanIdx, x, indataLen, outdata, outdataLen);
 488     if (lastSample.ptr[chanIdx] < cast(int)*indataLen) *indataLen = lastSample.ptr[chanIdx];
 489     *outdataLen = outSample;
 490     lastSample.ptr[chanIdx] -= *indataLen;
 491     ilen = *indataLen;
 492     foreach (immutable j; 0..N-1) x[j] = x[j+ilen];
 493     return Error.OK;
 494   }
 495
 496   int magic (uint chanIdx, float **outdata, uint outdataLen) {
 497     uint tempInLen = magicSamples.ptr[chanIdx];
 498     float* x = mem+chanIdx*memAllocSize;
 499     processNative(chanIdx, &tempInLen, *outdata, &outdataLen);
 500     magicSamples.ptr[chanIdx] -= tempInLen;
 501     // if we couldn't process all "magic" input samples, save the rest for next time
 502     if (magicSamples.ptr[chanIdx]) {
 503       immutable N = filterLen;
 504       foreach (immutable i; 0..magicSamples.ptr[chanIdx]) x[N-1+i] = x[N-1+i+tempInLen];
 505     }
 506     *outdata += outdataLen*outStride;
 507     return outdataLen;
 508   }
 509
 510   Error updateFilter () {
 511     uint oldFilterLen = filterLen;
 512     uint oldAllocSize = memAllocSize;
 513     bool useDirect;
 514     uint minSincTableLen;
 515     uint minAllocSize;
 516
 517     intAdvance = numRate/denRate;
 518     fracAdvance = numRate%denRate;
 519     oversample = qualityMap.ptr[srQuality].oversample;
 520     filterLen = qualityMap.ptr[srQuality].baseLength;
 521
 522     if (numRate > denRate) {
 523       // down-sampling
 524       cutoff = qualityMap.ptr[srQuality].downsampleBandwidth*denRate/numRate;
 525       // FIXME: divide the numerator and denominator by a certain amount if they're too large
 526       filterLen = filterLen*numRate/denRate;
 527       // round up to make sure we have a multiple of 8 for SSE
 528       filterLen = ((filterLen-1)&(~0x7))+8;
 529       if (2*denRate < numRate) oversample >>= 1;
 530       if (4*denRate < numRate) oversample >>= 1;
 531       if (8*denRate < numRate) oversample >>= 1;
 532       if (16*denRate < numRate) oversample >>= 1;
 533       if (oversample < 1) oversample = 1;
 534     } else {
 535       // up-sampling
 536       cutoff = qualityMap.ptr[srQuality].upsampleBandwidth;
 537     }
 538
 539     // choose the resampling type that requires the least amount of memory
 540     version(sincresample_use_full_table) {
 541       useDirect = true;
 542       if (int.max/float.sizeof/denRate < filterLen) goto fail;
 543     } else {
 544       useDirect = (filterLen*denRate <= filterLen*oversample+8 && int.max/float.sizeof/denRate >= filterLen);
 545     }
 546
 547     if (useDirect) {
 548       minSincTableLen = filterLen*denRate;
 549     } else {
 550       if ((int.max/float.sizeof-8)/oversample < filterLen) goto fail;
 551       minSincTableLen = filterLen*oversample+8;
 552     }
 553
 554     if (sincTableLen < minSincTableLen) {
 555       import core.stdc.stdlib : realloc;
 556       auto nslen = cast(uint)(minSincTableLen*float.sizeof);
 557       if (nslen > realSincTableLen) {
 558         if (nslen < 512*1024) nslen = 512*1024; // inc to 3 mb?
 559         auto x = cast(float*)realloc(sincTable, nslen);
 560         if (!x) goto fail;
 561         sincTable = x;
 562         realSincTableLen = nslen;
 563       }
 564       sincTableLen = minSincTableLen;
 565     }
 566
 567     if (useDirect) {
 568       foreach (int i; 0..denRate) {
 569         foreach (int j; 0..filterLen) {
 570           sincTable[i*filterLen+j] = sinc(cutoff, ((j-cast(int)filterLen/2+1)-(cast(float)i)/denRate), filterLen, qualityMap.ptr[srQuality].windowFunc);
 571         }
 572       }
 573       if (srQuality > 8) {
 574         resampler = &resamplerBasicDirect!double;
 575       } else {
 576         resampler = &resamplerBasicDirect!float;
 577       }
 578     } else {
 579       foreach (immutable int i; -4..cast(int)(oversample*filterLen+4)) {
 580         sincTable[i+4] = sinc(cutoff, (i/cast(float)oversample-filterLen/2), filterLen, qualityMap.ptr[srQuality].windowFunc);
 581       }
 582       if (srQuality > 8) {
 583         resampler = &resamplerBasicInterpolate!double;
 584       } else {
 585         resampler = &resamplerBasicInterpolate!float;
 586       }
 587     }
 588
 589     /* Here's the place where we update the filter memory to take into account
 590        the change in filter length. It's probably the messiest part of the code
 591        due to handling of lots of corner cases. */
 592
 593     // adding bufferSize to filterLen won't overflow here because filterLen could be multiplied by float.sizeof above
 594     minAllocSize = filterLen-1+bufferSize;
 595     if (minAllocSize > memAllocSize) {
 596       import core.stdc.stdlib : realloc;
 597       if (int.max/float.sizeof/chanCount < minAllocSize) goto fail;
 598       auto nslen = cast(uint)(chanCount*minAllocSize*mem[0].sizeof);
 599       if (nslen > realMemLen) {
 600         if (nslen < 16384) nslen = 16384;
 601         auto x = cast(float*)realloc(mem, nslen);
 602         if (x is null) goto fail;
 603         mem = x;
 604         realMemLen = nslen;
 605       }
 606       memAllocSize = minAllocSize;
 607     }
 608     if (!started) {
 609       //foreach (i=0;i<chanCount*memAllocSize;i++) mem[i] = 0;
 610       mem[0..chanCount*memAllocSize] = 0;
 611     } else if (filterLen > oldFilterLen) {
 612       // increase the filter length
 613       foreach_reverse (uint i; 0..chanCount) {
 614         uint j;
 615         uint olen = oldFilterLen;
 616         {
 617           // try and remove the magic samples as if nothing had happened
 618           //FIXME: this is wrong but for now we need it to avoid going over the array bounds
 619           olen = oldFilterLen+2*magicSamples.ptr[i];
 620           for (j = oldFilterLen-1+magicSamples.ptr[i]; j--; ) mem[i*memAllocSize+j+magicSamples.ptr[i]] = mem[i*oldAllocSize+j];
 621           //for (j = 0; j < magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = 0;
 622           mem[i*memAllocSize..i*memAllocSize+magicSamples.ptr[i]] = 0;
 623           magicSamples.ptr[i] = 0;
 624         }
 625         if (filterLen > olen) {
 626           // if the new filter length is still bigger than the "augmented" length
 627           // copy data going backward
 628           for (j = 0; j < olen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = mem[i*memAllocSize+(olen-2-j)];
 629           // then put zeros for lack of anything better
 630           for (; j < filterLen-1; ++j) mem[i*memAllocSize+(filterLen-2-j)] = 0;
 631           // adjust lastSample
 632           lastSample.ptr[i] += (filterLen-olen)/2;
 633         } else {
 634           // put back some of the magic!
 635           magicSamples.ptr[i] = (olen-filterLen)/2;
 636           for (j = 0; j < filterLen-1+magicSamples.ptr[i]; ++j) mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
 637         }
 638       }
 639     } else if (filterLen < oldFilterLen) {
 640       // reduce filter length, this a bit tricky
 641       // we need to store some of the memory as "magic" samples so they can be used directly as input the next time(s)
 642       foreach (immutable i; 0..chanCount) {
 643         uint j;
 644         uint oldMagic = magicSamples.ptr[i];
 645         magicSamples.ptr[i] = (oldFilterLen-filterLen)/2;
 646         // we must copy some of the memory that's no longer used
 647         // copy data going backward
 648         for (j = 0; j < filterLen-1+magicSamples.ptr[i]+oldMagic; ++j) {
 649           mem[i*memAllocSize+j] = mem[i*memAllocSize+j+magicSamples.ptr[i]];
 650         }
 651         magicSamples.ptr[i] += oldMagic;
 652       }
 653     }
 654     return Error.OK;
 655
 656   fail:
 657     resampler = null;
 658     /* mem may still contain consumed input samples for the filter.
 659        Restore filterLen so that filterLen-1 still points to the position after
 660        the last of these samples. */
 661     filterLen = oldFilterLen;
 662     return Error.NoMemory;
 663   }
 664 }
 665
 666
 667 // ////////////////////////////////////////////////////////////////////////// //
 668 static immutable double[68] kaiser12Table = [
 669   0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
 670   0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
 671   0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
 672   0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014,
 673   0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490,
 674   0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546,
 675   0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178,
 676   0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947,
 677   0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
 678   0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
 679   0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
 680   0.00001000, 0.00000000];
 681
 682 static immutable double[36] kaiser10Table = [
 683   0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
 684   0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
 685   0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
 686   0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
 687   0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
 688   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000];
 689
 690 static immutable double[36] kaiser8Table = [
 691   0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
 692   0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
 693   0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
 694   0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
 695   0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
 696   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000];
 697
 698 static immutable double[36] kaiser6Table = [
 699   0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
 700   0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
 701   0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
 702   0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
 703   0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
 704   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000];
 705
 706 struct FuncDef {
 707   immutable(double)* table;
 708   int oversample;
 709 }
 710
 711 static immutable FuncDef Kaiser12 = FuncDef(kaiser12Table.ptr, 64);
 712 static immutable FuncDef Kaiser10 = FuncDef(kaiser10Table.ptr, 32);
 713 static immutable FuncDef Kaiser8 = FuncDef(kaiser8Table.ptr, 32);
 714 static immutable FuncDef Kaiser6 = FuncDef(kaiser6Table.ptr, 32);
 715
 716
 717 struct QualityMapping {
 718   int baseLength;
 719   int oversample;
 720   float downsampleBandwidth;
 721   float upsampleBandwidth;
 722   immutable FuncDef* windowFunc;
 723 }
 724
 725
 726 /* This table maps conversion quality to internal parameters. There are two
 727    reasons that explain why the up-sampling bandwidth is larger than the
 728    down-sampling bandwidth:
 729    1) When up-sampling, we can assume that the spectrum is already attenuated
 730       close to the Nyquist rate (from an A/D or a previous resampling filter)
 731    2) Any aliasing that occurs very close to the Nyquist rate will be masked
 732       by the sinusoids/noise just below the Nyquist rate (guaranteed only for
 733       up-sampling).
 734 */
 735 static immutable QualityMapping[11] qualityMap = [
 736   QualityMapping(  8,  4, 0.830f, 0.860f, &Kaiser6 ), /* Q0 */
 737   QualityMapping( 16,  4, 0.850f, 0.880f, &Kaiser6 ), /* Q1 */
 738   QualityMapping( 32,  4, 0.882f, 0.910f, &Kaiser6 ), /* Q2 */  /* 82.3% cutoff ( ~60 dB stop) 6  */
 739   QualityMapping( 48,  8, 0.895f, 0.917f, &Kaiser8 ), /* Q3 */  /* 84.9% cutoff ( ~80 dB stop) 8  */
 740   QualityMapping( 64,  8, 0.921f, 0.940f, &Kaiser8 ), /* Q4 */  /* 88.7% cutoff ( ~80 dB stop) 8  */
 741   QualityMapping( 80, 16, 0.922f, 0.940f, &Kaiser10), /* Q5 */  /* 89.1% cutoff (~100 dB stop) 10 */
 742   QualityMapping( 96, 16, 0.940f, 0.945f, &Kaiser10), /* Q6 */  /* 91.5% cutoff (~100 dB stop) 10 */
 743   QualityMapping(128, 16, 0.950f, 0.950f, &Kaiser10), /* Q7 */  /* 93.1% cutoff (~100 dB stop) 10 */
 744   QualityMapping(160, 16, 0.960f, 0.960f, &Kaiser10), /* Q8 */  /* 94.5% cutoff (~100 dB stop) 10 */
 745   QualityMapping(192, 32, 0.968f, 0.968f, &Kaiser12), /* Q9 */  /* 95.5% cutoff (~100 dB stop) 10 */
 746   QualityMapping(256, 32, 0.975f, 0.975f, &Kaiser12), /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
 747 ];
 748
 749
 750 nothrow @trusted @nogc:
 751 /*8, 24, 40, 56, 80, 104, 128, 160, 200, 256, 320*/
 752 double computeFunc (float x, immutable FuncDef* func) {
 753   version(Posix) import core.stdc.math : lrintf;
 754   import std.math : floor;
 755   //double[4] interp;
 756   float y = x*func.oversample;
 757   version(Posix) {
 758     int ind = cast(int)lrintf(floor(y));
 759   } else {
 760     int ind = cast(int)(floor(y));
 761   }
 762   float frac = (y-ind);
 763   immutable f2 = frac*frac;
 764   immutable f3 = f2*frac;
 765   double interp3 = -0.1666666667*frac+0.1666666667*(f3);
 766   double interp2 = frac+0.5*(f2)-0.5*(f3);
 767   //double interp2 = 1.0f-0.5f*frac-f2+0.5f*f3;
 768   double interp0 = -0.3333333333*frac+0.5*(f2)-0.1666666667*(f3);
 769   // just to make sure we don't have rounding problems
 770   double interp1 = 1.0f-interp3-interp2-interp0;
 771   //sum = frac*accum[1]+(1-frac)*accum[2];
 772   return interp0*func.table[ind]+interp1*func.table[ind+1]+interp2*func.table[ind+2]+interp3*func.table[ind+3];
 773 }
 774
 775
 776 // the slow way of computing a sinc for the table; should improve that some day
 777 float sinc (float cutoff, float x, int N, immutable FuncDef *windowFunc) {
 778   version(LittleEndian) {
 779     align(1) union temp_float { align(1): float f; uint n; }
 780   } else {
 781     static T fabs(T) (T n) pure { static if (__VERSION__ > 2067) pragma(inline, true); return (n < 0 ? -n : n); }
 782   }
 783   import std.math : sin, PI;
 784   version(LittleEndian) {
 785     temp_float txx = void;
 786     txx.f = x;
 787     txx.n &= 0x7fff_ffff; // abs
 788     if (txx.f < 1.0e-6f) return cutoff;
 789     if (txx.f > 0.5f*N) return 0;
 790   } else {
 791     if (fabs(x) < 1.0e-6f) return cutoff;
 792     if (fabs(x) > 0.5f*N) return 0;
 793   }
 794   //FIXME: can it really be any slower than this?
 795   immutable float xx = x*cutoff;
 796   immutable pixx = PI*xx;
 797   version(LittleEndian) {
 798     return cutoff*sin(pixx)/pixx*computeFunc(2.0*txx.f/N, windowFunc);
 799   } else {
 800     return cutoff*sin(pixx)/pixx*computeFunc(fabs(2.0*x/N), windowFunc);
 801   }
 802 }
 803
 804
 805 void cubicCoef (in float frac, float* interp) {
 806   immutable f2 = frac*frac;
 807   immutable f3 = f2*frac;
 808   // compute interpolation coefficients; i'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc
 809   interp[0] =  -0.16667f*frac+0.16667f*f3;
 810   interp[1] = frac+0.5f*f2-0.5f*f3;
 811   //interp[2] = 1.0f-0.5f*frac-f2+0.5f*f3;
 812   interp[3] = -0.33333f*frac+0.5f*f2-0.16667f*f3;
 813   // just to make sure we don't have rounding problems
 814   interp[2] = 1.0-interp[0]-interp[1]-interp[3];
 815 }
 816
 817
 818 // ////////////////////////////////////////////////////////////////////////// //
 819 int resamplerBasicDirect(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint* indataLen, float* outdata, uint* outdataLen)
 820 if (is(T == float) || is(T == double))
 821 {
 822   auto N = st.filterLen;
 823   static if (is(T == double)) assert(N%4 == 0);
 824   int outSample = 0;
 825   int lastSample = st.lastSample.ptr[chanIdx];
 826   uint sampFracNum = st.sampFracNum.ptr[chanIdx];
 827   const(float)* sincTable = st.sincTable;
 828   immutable outStride = st.outStride;
 829   immutable intAdvance = st.intAdvance;
 830   immutable fracAdvance = st.fracAdvance;
 831   immutable denRate = st.denRate;
 832   T sum = void;
 833   while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
 834     const(float)* sinct = &sincTable[sampFracNum*N];
 835     const(float)* iptr = &indata[lastSample];
 836     static if (is(T == float)) {
 837       // at least 2x speedup with SSE here (but for unrolled loop)
 838       if (N%4 == 0) {
 839         version(sincresample_use_sse) {
 840           //align(64) __gshared float[4] zero = 0;
 841           align(64) __gshared float[4+128] zeroesBuf = 0; // dmd cannot into such aligns, alas
 842           __gshared uint zeroesptr = 0;
 843           if (zeroesptr == 0) {
 844             zeroesptr = cast(uint)zeroesBuf.ptr;
 845             if (zeroesptr&0x3f) zeroesptr = (zeroesptr|0x3f)+1;
 846           }
 847           //assert((zeroesptr&0x3f) == 0, "wtf?!");
 848           asm nothrow @safe @nogc {
 849             mov       ECX,[N];
 850             shr       ECX,2;
 851             mov       EAX,[zeroesptr];
 852             movaps    XMM0,[EAX];
 853             mov       EAX,[sinct];
 854             mov       EBX,[iptr];
 855             mov       EDX,16;
 856             align 8;
 857            rbdseeloop:
 858             movups    XMM1,[EAX];
 859             movups    XMM2,[EBX];
 860             mulps     XMM1,XMM2;
 861             addps     XMM0,XMM1;
 862             add       EAX,EDX;
 863             add       EBX,EDX;
 864             dec       ECX;
 865             jnz       rbdseeloop;
 866             // store result in sum
 867             movhlps   XMM1,XMM0; // now low part of XMM1 contains high part of XMM0
 868             addps     XMM0,XMM1; // low part of XMM0 is ok
 869             movaps    XMM1,XMM0;
 870             shufps    XMM1,XMM0,0b_01_01_01_01; // 2nd float of XMM0 goes to the 1st float of XMM1
 871             addss     XMM0,XMM1;
 872             movss     [sum],XMM0;
 873           }
 874           /*
 875           float sum1 = 0;
 876           foreach (immutable j; 0..N) sum1 += sinct[j]*iptr[j];
 877           import std.math;
 878           if (fabs(sum-sum1) > 0.000001f) {
 879             import core.stdc.stdio;
 880             printf("sum=%f; sum1=%f\n", sum, sum1);
 881             assert(0);
 882           }
 883           */
 884         } else {
 885           // no SSE; for my i3 unrolled loop is almost of the speed of SSE code
 886           T[4] accum = 0;
 887           foreach (immutable j; 0..N/4) {
 888             accum.ptr[0] += *sinct++ * *iptr++;
 889             accum.ptr[1] += *sinct++ * *iptr++;
 890             accum.ptr[2] += *sinct++ * *iptr++;
 891             accum.ptr[3] += *sinct++ * *iptr++;
 892           }
 893           sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
 894         }
 895       } else {
 896         sum = 0;
 897         foreach (immutable j; 0..N) sum += *sinct++ * *iptr++;
 898       }
 899       outdata[outStride*outSample++] = sum;
 900     } else {
 901       if (N%4 == 0) {
 902         //TODO: write SSE code here!
 903         // for my i3 unrolled loop is ~2 times faster
 904         T[4] accum = 0;
 905         foreach (immutable j; 0..N/4) {
 906           accum.ptr[0] += cast(double)*sinct++ * cast(double)*iptr++;
 907           accum.ptr[1] += cast(double)*sinct++ * cast(double)*iptr++;
 908           accum.ptr[2] += cast(double)*sinct++ * cast(double)*iptr++;
 909           accum.ptr[3] += cast(double)*sinct++ * cast(double)*iptr++;
 910         }
 911         sum = accum.ptr[0]+accum.ptr[1]+accum.ptr[2]+accum.ptr[3];
 912       } else {
 913         sum = 0;
 914         foreach (immutable j; 0..N) sum += cast(double)*sinct++ * cast(double)*iptr++;
 915       }
 916       outdata[outStride*outSample++] = cast(float)sum;
 917     }
 918     lastSample += intAdvance;
 919     sampFracNum += fracAdvance;
 920     if (sampFracNum >= denRate) {
 921       sampFracNum -= denRate;
 922       ++lastSample;
 923     }
 924   }
 925   st.lastSample.ptr[chanIdx] = lastSample;
 926   st.sampFracNum.ptr[chanIdx] = sampFracNum;
 927   return outSample;
 928 }
 929
 930
 931 int resamplerBasicInterpolate(T) (ref SpeexResampler st, uint chanIdx, const(float)* indata, uint *indataLen, float *outdata, uint *outdataLen)
 932 if (is(T == float) || is(T == double))
 933 {
 934   immutable N = st.filterLen;
 935   assert(N%4 == 0);
 936   int outSample = 0;
 937   int lastSample = st.lastSample.ptr[chanIdx];
 938   uint sampFracNum = st.sampFracNum.ptr[chanIdx];
 939   immutable outStride = st.outStride;
 940   immutable intAdvance = st.intAdvance;
 941   immutable fracAdvance = st.fracAdvance;
 942   immutable denRate = st.denRate;
 943   float sum;
 944
 945   float[4] interp = void;
 946   T[4] accum = void;
 947   while (!(lastSample >= cast(int)(*indataLen) || outSample >= cast(int)(*outdataLen))) {
 948     const(float)* iptr = &indata[lastSample];
 949     const int offset = sampFracNum*st.oversample/st.denRate;
 950     const float frac = (cast(float)((sampFracNum*st.oversample)%st.denRate))/st.denRate;
 951     accum[] = 0;
 952     //TODO: optimize!
 953     foreach (immutable j; 0..N) {
 954       immutable T currIn = iptr[j];
 955       accum.ptr[0] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-2]);
 956       accum.ptr[1] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset-1]);
 957       accum.ptr[2] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset]);
 958       accum.ptr[3] += currIn*(st.sincTable[4+(j+1)*st.oversample-offset+1]);
 959     }
 960
 961     cubicCoef(frac, interp.ptr);
 962     sum = (interp.ptr[0]*accum.ptr[0])+(interp.ptr[1]*accum.ptr[1])+(interp.ptr[2]*accum.ptr[2])+(interp.ptr[3]*accum.ptr[3]);
 963
 964     outdata[outStride*outSample++] = sum;
 965     lastSample += intAdvance;
 966     sampFracNum += fracAdvance;
 967     if (sampFracNum >= denRate) {
 968       sampFracNum -= denRate;
 969       ++lastSample;
 970     }
 971   }
 972
 973   st.lastSample.ptr[chanIdx] = lastSample;
 974   st.sampFracNum.ptr[chanIdx] = sampFracNum;
 975   return outSample;
 976 }
 977
 978
 979 // ////////////////////////////////////////////////////////////////////////// //
 980 uint gcd (uint a, uint b) pure {
 981   if (a == 0) return b;
 982   if (b == 0) return a;
 983   for (;;) {
 984     if (a > b) {
 985       a %= b;
 986       if (a == 0) return b;
 987       if (a == 1) return 1;
 988     } else {
 989       b %= a;
 990       if (b == 0) return a;
 991       if (b == 1) return 1;
 992     }
 993   }
 994 }
 995
 996
 997 // ////////////////////////////////////////////////////////////////////////// //
 998 // very simple and cheap cubic upsampler
 999 struct CubicUpsampler {
1000 public:
1001 nothrow @trusted @nogc:
1002   float[2] curposfrac; // current position offset [0..1)
1003   float step; // how long we should move on one step?
1004   float[4][2] data; // -1..3
1005   uint[2] drain;
1006
1007   void reset () {
1008     curposfrac[] = 0.0f;
1009     foreach (ref d; data) d[] = 0.0f;
1010     drain[] = 0;
1011   }
1012
1013   bool setup (float astep) {
1014     if (astep >= 1.0f) return false;
1015     step = astep;
1016     return true;
1017   }
1018
1019   /*
1020   static struct Data {
1021     const(float)[] dataIn;
1022     float[] dataOut;
1023     uint inputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1024     uint outputSamplesUsed; // out value, in samples (i.e. multiplied by channel count)
1025   }
1026   */
1027
1028   SpeexResampler.Error process (ref SpeexResampler.Data d) {
1029     d.inputSamplesUsed = d.outputSamplesUsed = 0;
1030     if (d.dataOut.length < 2) return SpeexResampler.Error.OK;
1031     foreach (uint cidx; 0..2) {
1032       uint inleft = cast(uint)d.dataIn.length/2;
1033       uint outleft = cast(uint)d.dataOut.length/2;
1034       processChannel(inleft, outleft, (d.dataIn.length ? d.dataIn.ptr+cidx : null), (d.dataOut.length ? d.dataOut.ptr+cidx : null), cidx);
1035       d.outputSamplesUsed += cast(uint)(d.dataOut.length/2)-outleft;
1036       d.inputSamplesUsed += cast(uint)(d.dataIn.length/2)-inleft;
1037     }
1038     return SpeexResampler.Error.OK;
1039   }
1040
1041   private void processChannel (ref uint inleft, ref uint outleft, const(float)* dataIn, float* dataOut, uint cidx) {
1042     if (outleft == 0) return;
1043     if (inleft == 0 && drain.ptr[cidx] <= 1) return;
1044     auto dt = data.ptr[cidx].ptr;
1045     auto drn = drain.ptr+cidx;
1046     auto cpf = curposfrac.ptr+cidx;
1047     immutable float st = step;
1048     for (;;) {
1049       // fill buffer
1050       while ((*drn) < 4) {
1051         if (inleft == 0) return;
1052         dt[(*drn)++] = *dataIn;
1053         dataIn += 2;
1054         --inleft;
1055       }
1056       if (outleft == 0) return;
1057       --outleft;
1058       // cubic interpolation
1059       /*version(none)*/ {
1060         // interpolate between y1 and y2
1061         immutable float mu = (*cpf); // how far we are moved from y1 to y2
1062         immutable float mu2 = mu*mu; // wow
1063         immutable float y0 = dt[0], y1 = dt[1], y2 = dt[2], y3 = dt[3];
1064         version(complex_cubic) {
1065           immutable float z0 = 0.5*y3;
1066           immutable float z1 = 0.5*y0;
1067           immutable float a0 = 1.5*y1-z1-1.5*y2+z0;
1068           immutable float a1 = y0-2.5*y1+2*y2-z0;
1069           immutable float a2 = 0.5*y2-z1;
1070         } else {
1071           immutable float a0 = y3-y2-y0+y1;
1072           immutable float a1 = y0-y1-a0;
1073           immutable float a2 = y2-y0;
1074         }
1075         *dataOut = a0*mu*mu2+a1*mu2+a2*mu+y1;
1076       }// else *dataOut = dt[1];
1077       dataOut += 2;
1078       if (((*cpf) += st) >= 1.0f) {
1079         (*cpf) -= 1.0f;
1080         dt[0] = dt[1];
1081         dt[1] = dt[2];
1082         dt[2] = dt[3];
1083         dt[3] = 0.0f;
1084         --(*drn); // will request more input bytes
1085       }
1086     }
1087   }
1088 }