1 ////////////////////////////////////////////////////////////////////////////////
3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
4 /// while maintaining the original pitch by using a time domain WSOLA-like
5 /// method with several performance-increasing tweaks.
7 /// Note : MMX optimized functions reside in a separate, platform-specific
8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
10 /// Author : Copyright (c) Olli Parviainen
11 /// Author e-mail : oparviai @ iki.fi
12 /// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch
14 ////////////////////////////////////////////////////////////////////////////////
16 // Last changed : $Date$
17 // File revision : $Revision$
21 ////////////////////////////////////////////////////////////////////////////////
25 // SoundTouch audio processing library
26 // Copyright (c) Olli Parviainen
28 // This library is free software; you can redistribute it and/or
29 // modify it under the terms of the GNU Lesser General Public
30 // License as published by the Free Software Foundation; either
31 // version 2.1 of the License, or (at your option) any later version.
33 // This library is distributed in the hope that it will be useful,
34 // but WITHOUT ANY WARRANTY; without even the implied warranty of
35 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 // Lesser General Public License for more details.
38 // You should have received a copy of the GNU Lesser General Public
39 // License along with this library; if not, write to the Free Software
40 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42 ////////////////////////////////////////////////////////////////////////////////
52 #include "cpu_detect.h"
53 #include "TDStretch.h"
55 using namespace soundtouch
;
58 #define min(a,b) ((a > b) ? b : a)
59 #define max(a,b) ((a < b) ? b : a)
64 /*****************************************************************************
66 * Constant definitions
68 *****************************************************************************/
71 #define MAX_SCAN_DELTA 124
73 // Table for the hierarchical mixing position seeking algorithm
74 int scanOffsets
[4][24]={
75 { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
76 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
77 {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
79 { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
81 { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
84 /*****************************************************************************
86 * Implementation of the class 'TDStretch'
88 *****************************************************************************/
91 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer
)
95 bMidBufferDirty
= FALSE
;
98 pRefMidBufferUnaligned
= NULL
;
101 setParameters(44100, DEFAULT_SEQUENCE_MS
, DEFAULT_SEEKWINDOW_MS
, DEFAULT_OVERLAP_MS
);
109 TDStretch::~TDStretch()
112 delete[] pRefMidBufferUnaligned
;
118 // Sets routine control parameters. These control are certain time constants
119 // defining how the sound is stretched to the desired duration.
121 // 'sampleRate' = sample rate of the sound
122 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
123 // 'seekwindowMS' = seeking window length for scanning the best overlapping
124 // position (default = 28 ms)
125 // 'overlapMS' = overlapping length (default = 12 ms)
127 void TDStretch::setParameters(uint aSampleRate
, uint aSequenceMS
,
128 uint aSeekWindowMS
, uint aOverlapMS
)
130 this->sampleRate
= aSampleRate
;
131 this->sequenceMs
= aSequenceMS
;
132 this->seekWindowMs
= aSeekWindowMS
;
133 this->overlapMs
= aOverlapMS
;
135 seekLength
= (sampleRate
* seekWindowMs
) / 1000;
136 seekWindowLength
= (sampleRate
* sequenceMs
) / 1000;
138 maxOffset
= seekLength
;
140 calculateOverlapLength(overlapMs
);
142 // set tempo to recalculate 'sampleReq'
149 /// Get routine control parameters, see setParameters() function.
150 /// Any of the parameters to this function can be NULL, in such case corresponding parameter
151 /// value isn't returned.
152 void TDStretch::getParameters(uint
*pSampleRate
, uint
*pSequenceMs
, uint
*pSeekWindowMs
, uint
*pOverlapMs
)
156 *pSampleRate
= sampleRate
;
161 *pSequenceMs
= sequenceMs
;
166 *pSeekWindowMs
= seekWindowMs
;
171 *pOverlapMs
= overlapMs
;
176 // Overlaps samples in 'midBuffer' with the samples in 'input'
177 void TDStretch::overlapMono(SAMPLETYPE
*output
, const SAMPLETYPE
*input
) const
181 for (i
= 0; i
< (int)overlapLength
; i
++)
183 itemp
= overlapLength
- i
;
184 output
[i
] = (input
[i
] * i
+ pMidBuffer
[i
] * itemp
) / overlapLength
; // >> overlapDividerBits;
190 void TDStretch::clearMidBuffer()
194 memset(pMidBuffer
, 0, 2 * sizeof(SAMPLETYPE
) * overlapLength
);
195 bMidBufferDirty
= FALSE
;
200 void TDStretch::clearInput()
207 // Clears the sample buffers
208 void TDStretch::clear()
210 outputBuffer
.clear();
217 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
219 void TDStretch::enableQuickSeek(BOOL enable
)
225 // Returns nonzero if the quick seeking algorithm is enabled.
226 BOOL
TDStretch::isQuickSeekEnabled() const
232 // Seeks for the optimal overlap-mixing position.
233 uint
TDStretch::seekBestOverlapPosition(const SAMPLETYPE
*refPos
)
240 return seekBestOverlapPositionStereoQuick(refPos
);
244 return seekBestOverlapPositionStereo(refPos
);
252 return seekBestOverlapPositionMonoQuick(refPos
);
256 return seekBestOverlapPositionMono(refPos
);
264 // Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at position
266 inline void TDStretch::overlap(SAMPLETYPE
*output
, const SAMPLETYPE
*input
, uint ovlPos
) const
271 overlapStereo(output
, input
+ 2 * ovlPos
);
274 overlapMono(output
, input
+ ovlPos
);
281 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
284 // The best position is determined as the position where the two overlapped
285 // sample sequences are 'most alike', in terms of the highest cross-correlation
286 // value over the overlapping period
287 uint
TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE
*refPos
)
290 LONG_SAMPLETYPE bestCorr
, corr
;
293 // Slopes the amplitudes of the 'midBuffer' samples
294 precalcCorrReferenceStereo();
299 // Scans for the best correlation value by testing each possible position
300 // over the permitted range.
301 for (i
= 0; i
< seekLength
; i
++)
303 // Calculates correlation value for the mixing position corresponding
305 corr
= calcCrossCorrStereo(refPos
+ 2 * i
, pRefMidBuffer
);
307 // Checks for the highest correlation value
314 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
315 clearCrossCorrState();
321 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
324 // The best position is determined as the position where the two overlapped
325 // sample sequences are 'most alike', in terms of the highest cross-correlation
326 // value over the overlapping period
327 uint
TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE
*refPos
)
331 LONG_SAMPLETYPE bestCorr
, corr
;
332 uint scanCount
, corrOffset
, tempOffset
;
334 // Slopes the amplitude of the 'midBuffer' samples
335 precalcCorrReferenceStereo();
342 // Scans for the best correlation value using four-pass hierarchical search.
344 // The look-up table 'scans' has hierarchical position adjusting steps.
345 // In first pass the routine searhes for the highest correlation with
346 // relatively coarse steps, then rescans the neighbourhood of the highest
347 // correlation with better resolution and so on.
348 for (scanCount
= 0;scanCount
< 4; scanCount
++)
351 while (scanOffsets
[scanCount
][j
])
353 tempOffset
= corrOffset
+ scanOffsets
[scanCount
][j
];
354 if (tempOffset
>= seekLength
) break;
356 // Calculates correlation value for the mixing position corresponding
358 corr
= calcCrossCorrStereo(refPos
+ 2 * tempOffset
, pRefMidBuffer
);
360 // Checks for the highest correlation value
364 bestOffs
= tempOffset
;
368 corrOffset
= bestOffs
;
370 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
371 clearCrossCorrState();
378 // Seeks for the optimal overlap-mixing position. The 'mono' version of the
381 // The best position is determined as the position where the two overlapped
382 // sample sequences are 'most alike', in terms of the highest cross-correlation
383 // value over the overlapping period
384 uint
TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE
*refPos
)
387 LONG_SAMPLETYPE bestCorr
, corr
;
389 const SAMPLETYPE
*compare
;
391 // Slopes the amplitude of the 'midBuffer' samples
392 precalcCorrReferenceMono();
397 // Scans for the best correlation value by testing each possible position
398 // over the permitted range.
399 for (tempOffset
= 0; tempOffset
< seekLength
; tempOffset
++)
401 compare
= refPos
+ tempOffset
;
403 // Calculates correlation value for the mixing position corresponding
405 corr
= calcCrossCorrMono(pRefMidBuffer
, compare
);
407 // Checks for the highest correlation value
411 bestOffs
= tempOffset
;
414 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
415 clearCrossCorrState();
421 // Seeks for the optimal overlap-mixing position. The 'mono' version of the
424 // The best position is determined as the position where the two overlapped
425 // sample sequences are 'most alike', in terms of the highest cross-correlation
426 // value over the overlapping period
427 uint
TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE
*refPos
)
431 LONG_SAMPLETYPE bestCorr
, corr
;
432 uint scanCount
, corrOffset
, tempOffset
;
434 // Slopes the amplitude of the 'midBuffer' samples
435 precalcCorrReferenceMono();
442 // Scans for the best correlation value using four-pass hierarchical search.
444 // The look-up table 'scans' has hierarchical position adjusting steps.
445 // In first pass the routine searhes for the highest correlation with
446 // relatively coarse steps, then rescans the neighbourhood of the highest
447 // correlation with better resolution and so on.
448 for (scanCount
= 0;scanCount
< 4; scanCount
++)
451 while (scanOffsets
[scanCount
][j
])
453 tempOffset
= corrOffset
+ scanOffsets
[scanCount
][j
];
454 if (tempOffset
>= seekLength
) break;
456 // Calculates correlation value for the mixing position corresponding
458 corr
= calcCrossCorrMono(refPos
+ tempOffset
, pRefMidBuffer
);
460 // Checks for the highest correlation value
464 bestOffs
= tempOffset
;
468 corrOffset
= bestOffs
;
470 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
471 clearCrossCorrState();
477 /// clear cross correlation routine state if necessary
478 void TDStretch::clearCrossCorrState()
480 // default implementation is empty.
484 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
485 // tempo, larger faster tempo.
486 void TDStretch::setTempo(float newTempo
)
492 // Calculate ideal skip length (according to tempo value)
493 nominalSkip
= tempo
* (seekWindowLength
- overlapLength
);
495 intskip
= (int)(nominalSkip
+ 0.5f
);
497 // Calculate how many samples are needed in the 'inputBuffer' to
498 // process another batch of samples
499 sampleReq
= max(intskip
+ overlapLength
, seekWindowLength
) + maxOffset
;
504 // Sets the number of channels, 1 = mono, 2 = stereo
505 void TDStretch::setChannels(uint numChannels
)
507 if (channels
== numChannels
) return;
508 assert(numChannels
== 1 || numChannels
== 2);
510 channels
= numChannels
;
511 inputBuffer
.setChannels(channels
);
512 outputBuffer
.setChannels(channels
);
516 // nominal tempo, no need for processing, just pass the samples through
518 void TDStretch::processNominalTempo()
520 assert(tempo
== 1.0f
);
524 // If there are samples in pMidBuffer waiting for overlapping,
525 // do a single sliding overlapping with them in order to prevent a
526 // clicking distortion in the output sound
527 if (inputBuffer
.numSamples() < overlapLength
)
529 // wait until we've got overlapLength input samples
532 // Mix the samples in the beginning of 'inputBuffer' with the
533 // samples in 'midBuffer' using sliding overlapping
534 overlap(outputBuffer
.ptrEnd(overlapLength
), inputBuffer
.ptrBegin(), 0);
535 outputBuffer
.putSamples(overlapLength
);
536 inputBuffer
.receiveSamples(overlapLength
);
538 // now we've caught the nominal sample flow and may switch to
542 // Simply bypass samples from input to output
543 outputBuffer
.moveSamples(inputBuffer
);
547 // Processes as many processing frames of the samples 'inputBuffer', store
548 // the result into 'outputBuffer'
549 void TDStretch::processSamples()
551 uint ovlSkip
, offset
;
556 // tempo not changed from the original, so bypass the processing
557 processNominalTempo();
561 if (bMidBufferDirty
== FALSE
)
563 // if midBuffer is empty, move the first samples of the input stream
565 if (inputBuffer
.numSamples() < overlapLength
)
567 // wait until we've got overlapLength samples
570 memcpy(pMidBuffer
, inputBuffer
.ptrBegin(), channels
* overlapLength
* sizeof(SAMPLETYPE
));
571 inputBuffer
.receiveSamples(overlapLength
);
572 bMidBufferDirty
= TRUE
;
575 // Process samples as long as there are enough samples in 'inputBuffer'
576 // to form a processing frame.
577 while (inputBuffer
.numSamples() >= sampleReq
)
579 // If tempo differs from the normal ('SCALE'), scan for the best overlapping
581 offset
= seekBestOverlapPosition(inputBuffer
.ptrBegin());
583 // Mix the samples in the 'inputBuffer' at position of 'offset' with the
584 // samples in 'midBuffer' using sliding overlapping
585 // ... first partially overlap with the end of the previous sequence
586 // (that's in 'midBuffer')
587 overlap(outputBuffer
.ptrEnd(overlapLength
), inputBuffer
.ptrBegin(), offset
);
588 outputBuffer
.putSamples(overlapLength
);
590 // ... then copy sequence samples from 'inputBuffer' to output
591 temp
= (seekWindowLength
- 2 * overlapLength
);// & 0xfffffffe;
594 outputBuffer
.putSamples(inputBuffer
.ptrBegin() + channels
* (offset
+ overlapLength
), temp
);
597 // Copies the end of the current sequence from 'inputBuffer' to
598 // 'midBuffer' for being mixed with the beginning of the next
599 // processing sequence and so on
600 assert(offset
+ seekWindowLength
<= inputBuffer
.numSamples());
601 memcpy(pMidBuffer
, inputBuffer
.ptrBegin() + channels
* (offset
+ seekWindowLength
- overlapLength
),
602 channels
* sizeof(SAMPLETYPE
) * overlapLength
);
603 bMidBufferDirty
= TRUE
;
605 // Remove the processed samples from the input buffer. Update
606 // the difference between integer & nominal skip step to 'skipFract'
607 // in order to prevent the error from accumulating over time.
608 skipFract
+= nominalSkip
; // real skip size
609 ovlSkip
= (int)skipFract
; // rounded to integer skip
610 skipFract
-= ovlSkip
; // maintain the fraction part, i.e. real vs. integer skip
611 inputBuffer
.receiveSamples(ovlSkip
);
616 // Adds 'numsamples' pcs of samples from the 'samples' memory position into
617 // the input of the object.
618 void TDStretch::putSamples(const SAMPLETYPE
*samples
, uint numSamples
)
620 // Add the samples into the input buffer
621 inputBuffer
.putSamples(samples
, numSamples
);
622 // Process the samples in input buffer
628 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
629 void TDStretch::acceptNewOverlapLength(uint newOverlapLength
)
633 prevOvl
= overlapLength
;
634 overlapLength
= newOverlapLength
;
636 if (overlapLength
> prevOvl
)
639 delete[] pRefMidBufferUnaligned
;
641 pMidBuffer
= new SAMPLETYPE
[overlapLength
* 2];
642 bMidBufferDirty
= TRUE
;
645 pRefMidBufferUnaligned
= new SAMPLETYPE
[2 * overlapLength
+ 16 / sizeof(SAMPLETYPE
)];
646 // ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency
647 pRefMidBuffer
= (SAMPLETYPE
*)((((ulong
)pRefMidBufferUnaligned
) + 15) & -16);
651 TDStretch
* TDStretch::newInstance()
655 uExtensions
= detectCPUextensions();
657 // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU
660 // MMX routines available only with integer sample types
661 if (uExtensions
& SUPPORT_MMX
)
663 return ::new TDStretchMMX
;
670 if (uExtensions
& SUPPORT_SSE
)
673 return ::new TDStretchSSE
;
680 if (uExtensions
& SUPPORT_3DNOW
)
683 return ::new TDStretch3DNow
;
686 #endif // ALLOW_3DNOW
689 // ISA optimizations not supported, use plain C version
690 return ::new TDStretch
;
695 //////////////////////////////////////////////////////////////////////////////
697 // Integer arithmetics specific algorithm implementations.
699 //////////////////////////////////////////////////////////////////////////////
701 #ifdef INTEGER_SAMPLES
703 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
704 // is faster to calculate
705 void TDStretch::precalcCorrReferenceStereo()
710 for (i
=0 ; i
< (int)overlapLength
;i
++)
712 temp
= i
* (overlapLength
- i
);
715 temp2
= (pMidBuffer
[cnt2
] * temp
) / slopingDivider
;
716 pRefMidBuffer
[cnt2
] = (short)(temp2
);
717 temp2
= (pMidBuffer
[cnt2
+ 1] * temp
) / slopingDivider
;
718 pRefMidBuffer
[cnt2
+ 1] = (short)(temp2
);
723 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
724 // is faster to calculate
725 void TDStretch::precalcCorrReferenceMono()
731 for (i
=0 ; i
< (int)overlapLength
;i
++)
733 temp
= i
* (overlapLength
- i
);
734 temp2
= (pMidBuffer
[i
] * temp
) / slopingDivider
;
735 pRefMidBuffer
[i
] = (short)temp2
;
740 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
741 // version of the routine.
742 void TDStretch::overlapStereo(short *output
, const short *input
) const
748 for (i
= 0; i
< (int)overlapLength
; i
++)
750 temp
= (short)(overlapLength
- i
);
752 output
[cnt2
] = (input
[cnt2
] * i
+ pMidBuffer
[cnt2
] * temp
) / overlapLength
;
753 output
[cnt2
+ 1] = (input
[cnt2
+ 1] * i
+ pMidBuffer
[cnt2
+ 1] * temp
) / overlapLength
;
758 /// Calculates overlap period length in samples.
759 /// Integer version rounds overlap length to closest power of 2
760 /// for a divide scaling operation.
761 void TDStretch::calculateOverlapLength(uint overlapMs
)
765 overlapDividerBits
= _getClosest2Power((sampleRate
* overlapMs
) / 1000.0);
766 if (overlapDividerBits
> 9) overlapDividerBits
= 9;
767 if (overlapDividerBits
< 4) overlapDividerBits
= 4;
768 newOvl
= (uint
)pow(2, overlapDividerBits
);
770 acceptNewOverlapLength(newOvl
);
772 // calculate sloping divider so that crosscorrelation operation won't
773 // overflow 32-bit register. Max. sum of the crosscorrelation sum without
774 // divider would be 2^30*(N^3-N)/3, where N = overlap length
775 slopingDivider
= (newOvl
* newOvl
- 1) / 3;
779 long TDStretch::calcCrossCorrMono(const short *mixingPos
, const short *compare
) const
785 for (i
= 1; i
< overlapLength
; i
++)
787 corr
+= (mixingPos
[i
] * compare
[i
]) >> overlapDividerBits
;
794 long TDStretch::calcCrossCorrStereo(const short *mixingPos
, const short *compare
) const
800 for (i
= 2; i
< 2 * overlapLength
; i
+= 2)
802 corr
+= (mixingPos
[i
] * compare
[i
] +
803 mixingPos
[i
+ 1] * compare
[i
+ 1]) >> overlapDividerBits
;
809 #endif // INTEGER_SAMPLES
811 //////////////////////////////////////////////////////////////////////////////
813 // Floating point arithmetics specific algorithm implementations.
819 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
820 // is faster to calculate
821 void TDStretch::precalcCorrReferenceStereo()
826 for (i
=0 ; i
< (int)overlapLength
;i
++)
828 temp
= (float)i
* (float)(overlapLength
- i
);
830 pRefMidBuffer
[cnt2
] = (float)(pMidBuffer
[cnt2
] * temp
);
831 pRefMidBuffer
[cnt2
+ 1] = (float)(pMidBuffer
[cnt2
+ 1] * temp
);
836 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
837 // is faster to calculate
838 void TDStretch::precalcCorrReferenceMono()
843 for (i
=0 ; i
< (int)overlapLength
;i
++)
845 temp
= (float)i
* (float)(overlapLength
- i
);
846 pRefMidBuffer
[i
] = (float)(pMidBuffer
[i
] * temp
);
851 // SSE-optimized version of the function overlapStereo
852 void TDStretch::overlapStereo(float *output
, const float *input
) const
860 fScale
= 1.0f
/ (float)overlapLength
;
862 for (i
= 0; i
< (int)overlapLength
; i
++)
864 fTemp
= (float)(overlapLength
- i
) * fScale
;
865 fi
= (float)i
* fScale
;
867 output
[cnt2
+ 0] = input
[cnt2
+ 0] * fi
+ pMidBuffer
[cnt2
+ 0] * fTemp
;
868 output
[cnt2
+ 1] = input
[cnt2
+ 1] * fi
+ pMidBuffer
[cnt2
+ 1] * fTemp
;
873 /// Calculates overlap period length in samples.
874 void TDStretch::calculateOverlapLength(uint overlapMs
)
878 newOvl
= (sampleRate
* overlapMs
) / 1000;
879 if (newOvl
< 16) newOvl
= 16;
881 acceptNewOverlapLength(newOvl
);
886 double TDStretch::calcCrossCorrMono(const float *mixingPos
, const float *compare
) const
892 for (i
= 1; i
< overlapLength
; i
++)
894 corr
+= mixingPos
[i
] * compare
[i
];
901 double TDStretch::calcCrossCorrStereo(const float *mixingPos
, const float *compare
) const
907 for (i
= 2; i
< 2 * overlapLength
; i
+= 2)
909 corr
+= mixingPos
[i
] * compare
[i
] +
910 mixingPos
[i
+ 1] * compare
[i
+ 1];
916 #endif // FLOAT_SAMPLES