libs/rubberband/rubberband/RubberBandStretcher.h

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     Rubber Band
   5     An audio time-stretching and pitch-shifting library.
   6     Copyright 2007-2008 Chris Cannam.
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License as
  10     published by the Free Software Foundation; either version 2 of the
  11     License, or (at your option) any later version.  See the file
  12     COPYING included with this distribution for more information.
  13 */
  14
  15 #ifndef _RUBBERBANDSTRETCHER_H_
  16 #define _RUBBERBANDSTRETCHER_H_
  17
  18 #define RUBBERBAND_VERSION "1.3.0-gpl"
  19 #define RUBBERBAND_API_MAJOR_VERSION 2
  20 #define RUBBERBAND_API_MINOR_VERSION 0
  21
  22 #include <vector>
  23
  24 /**
  25  * @mainpage RubberBand
  26  *
  27  * The Rubber Band API is contained in the single class
  28  * RubberBand::RubberBandStretcher.
  29  *
  30  * Threading notes for real-time applications:
  31  *
  32  * Multiple instances of RubberBandStretcher may be created and used
  33  * in separate threads concurrently.  However, for any single instance
  34  * of RubberBandStretcher, you may not call process() more than once
  35  * concurrently, and you may not change the time or pitch ratio while
  36  * a process() call is being executed (if the stretcher was created in
  37  * "real-time mode"; in "offline mode" you can't change the ratios
  38  * during use anyway).
  39  *
  40  * So you can run process() in its own thread if you like, but if you
  41  * want to change ratios dynamically from a different thread, you will
  42  * need some form of mutex in your code.  Changing the time or pitch
  43  * ratio is real-time safe except in extreme circumstances, so for
  44  * most applications that may change these dynamically it probably
  45  * makes most sense to do so from the same thread as calls process(),
  46  * even if that is a real-time thread.
  47  */
  48
  49 namespace RubberBand
  50 {
  51
  52 class RubberBandStretcher
  53 {
  54 public:
  55     /**
  56      * Processing options for the timestretcher.  The preferred
  57      * options should normally be set in the constructor, as a bitwise
  58      * OR of the option flags.  The default value (DefaultOptions) is
  59      * intended to give good results in most situations.
  60      *
  61      * 1. Flags prefixed \c OptionProcess determine how the timestretcher
  62      * will be invoked.  These options may not be changed after
  63      * construction.
  64      *
  65      *   \li \c OptionProcessOffline - Run the stretcher in offline
  66      *   mode.  In this mode the input data needs to be provided
  67      *   twice, once to study(), which calculates a stretch profile
  68      *   for the audio, and once to process(), which stretches it.
  69      *
  70      *   \li \c OptionProcessRealTime - Run the stretcher in real-time
  71      *   mode.  In this mode only process() should be called, and the
  72      *   stretcher adjusts dynamically in response to the input audio.
  73      *
  74      * The Process setting is likely to depend on your architecture:
  75      * non-real-time operation on seekable files: Offline; real-time
  76      * or streaming operation: RealTime.
  77      *
  78      * 2. Flags prefixed \c OptionStretch control the profile used for
  79      * variable timestretching.  Rubber Band always adjusts the
  80      * stretch profile to minimise stretching of busy broadband
  81      * transient sounds, but the degree to which it does so is
  82      * adjustable.  These options may not be changed after
  83      * construction.
  84      *
  85      *   \li \c OptionStretchElastic - Only meaningful in offline
  86      *   mode, and the default in that mode.  The audio will be
  87      *   stretched at a variable rate, aimed at preserving the quality
  88      *   of transient sounds as much as possible.  The timings of low
  89      *   activity regions between transients may be less exact than
  90      *   when the precise flag is set.
  91      *
  92      *   \li \c OptionStretchPrecise - Although still using a variable
  93      *   stretch rate, the audio will be stretched so as to maintain
  94      *   as close as possible to a linear stretch ratio throughout.
  95      *   Timing may be better than when using \c OptionStretchElastic, at
  96      *   slight cost to the sound quality of transients.  This setting
  97      *   is always used when running in real-time mode.
  98      *
  99      * 3. Flags prefixed \c OptionTransients control the component
 100      * frequency phase-reset mechanism that may be used at transient
 101      * points to provide clarity and realism to percussion and other
 102      * significant transient sounds.  These options may be changed
 103      * after construction when running in real-time mode, but not when
 104      * running in offline mode.
 105      *
 106      *   \li \c OptionTransientsCrisp - Reset component phases at the
 107      *   peak of each transient (the start of a significant note or
 108      *   percussive event).  This, the default setting, usually
 109      *   results in a clear-sounding output; but it is not always
 110      *   consistent, and may cause interruptions in stable sounds
 111      *   present at the same time as transient events.
 112      *
 113      *   \li \c OptionTransientsMixed - Reset component phases at the
 114      *   peak of each transient, outside a frequency range typical of
 115      *   musical fundamental frequencies.  The results may be more
 116      *   regular for mixed stable and percussive notes than
 117      *   \c OptionTransientsCrisp, but with a "phasier" sound.  The
 118      *   balance may sound very good for certain types of music and
 119      *   fairly bad for others.
 120      *
 121      *   \li \c OptionTransientsSmooth - Do not reset component phases
 122      *   at any point.  The results will be smoother and more regular
 123      *   but may be less clear than with either of the other
 124      *   transients flags.
 125      *
 126      * 4. Flags prefixed \c OptionPhase control the adjustment of
 127      * component frequency phases from one analysis window to the next
 128      * during non-transient segments.  These options may be changed at
 129      * any time.
 130      *
 131      *   \li \c OptionPhaseLaminar - Adjust phases when stretching in
 132      *   such a way as to try to retain the continuity of phase
 133      *   relationships between adjacent frequency bins whose phases
 134      *   are behaving in similar ways.  This, the default setting,
 135      *   should give good results in most situations.
 136      *
 137      *   \li \c OptionPhaseIndependent - Adjust the phase in each
 138      *   frequency bin independently from its neighbours.  This
 139      *   usually results in a slightly softer, phasier sound.
 140      *
 141      * 5. Flags prefixed \c OptionThreading control the threading
 142      * model of the stretcher.  These options may not be changed after
 143      * construction.
 144      *
 145      *   \li \c OptionThreadingAuto - Permit the stretcher to
 146      *   determine its own threading model.  Usually this means using
 147      *   one processing thread per audio channel in offline mode if
 148      *   the stretcher is able to determine that more than one CPU is
 149      *   available, and one thread only in realtime mode.
 150      *
 151      *   \li \c OptionThreadingNever - Never use more than one thread.
 152      *
 153      *   \li \c OptionThreadingAlways - Use multiple threads in any
 154      *   situation where \c OptionThreadingAuto would do so, except omit
 155      *   the check for multiple CPUs and instead assume it to be true.
 156      *
 157      * 6. Flags prefixed \c OptionWindow control the window size for
 158      * FFT processing.  The window size actually used will depend on
 159      * many factors, but it can be influenced.  These options may not
 160      * be changed after construction.
 161      *
 162      *   \li \c OptionWindowStandard - Use the default window size.
 163      *   The actual size will vary depending on other parameters.
 164      *   This option is expected to produce better results than the
 165      *   other window options in most situations.
 166      *
 167      *   \li \c OptionWindowShort - Use a shorter window.  This may
 168      *   result in crisper sound for audio that depends strongly on
 169      *   its timing qualities.
 170      *
 171      *   \li \c OptionWindowLong - Use a longer window.  This is
 172      *   likely to result in a smoother sound at the expense of
 173      *   clarity and timing.
 174      *
 175      * 7. Flags prefixed \c OptionFormant control the handling of
 176      * formant shape (spectral envelope) when pitch-shifting.  These
 177      * options may be changed at any time.
 178      *
 179      *   \li \c OptionFormantShifted - Apply no special formant
 180      *   processing.  The spectral envelope will be pitch shifted as
 181      *   normal.
 182      *
 183      *   \li \c OptionFormantPreserved - Preserve the spectral
 184      *   envelope of the unshifted signal.  This permits shifting the
 185      *   note frequency without so substantially affecting the
 186      *   perceived pitch profile of the voice or instrument.
 187      *
 188      * 8. Flags prefixed \c OptionPitch control the method used for
 189      * pitch shifting.  These options may be changed at any time.
 190      * They are only effective in realtime mode; in offline mode, the
 191      * pitch-shift method is fixed.
 192      *
 193      *   \li \c OptionPitchHighSpeed - Use a method with a CPU cost
 194      *   that is relatively moderate and predictable.  This may
 195      *   sound less clear than OptionPitchHighQuality, especially
 196      *   for large pitch shifts.
 197
 198      *   \li \c OptionPitchHighQuality - Use the highest quality
 199      *   method for pitch shifting.  This method has a CPU cost
 200      *   approximately proportional to the required frequency shift.
 201
 202      *   \li \c OptionPitchHighConsistency - Use the method that gives
 203      *   greatest consistency when used to create small variations in
 204      *   pitch around the 1.0-ratio level.  Unlike the previous two
 205      *   options, this avoids discontinuities when moving across the
 206      *   1.0 pitch scale in real-time; it also consumes more CPU than
 207      *   the others in the case where the pitch scale is exactly 1.0.
 208      */
 209
 210     enum Option {
 211
 212         OptionProcessOffline       = 0x00000000,
 213         OptionProcessRealTime      = 0x00000001,
 214
 215         OptionStretchElastic       = 0x00000000,
 216         OptionStretchPrecise       = 0x00000010,
 217
 218         OptionTransientsCrisp      = 0x00000000,
 219         OptionTransientsMixed      = 0x00000100,
 220         OptionTransientsSmooth     = 0x00000200,
 221
 222         OptionPhaseLaminar         = 0x00000000,
 223         OptionPhaseIndependent     = 0x00002000,
 224
 225         OptionThreadingAuto        = 0x00000000,
 226         OptionThreadingNever       = 0x00010000,
 227         OptionThreadingAlways      = 0x00020000,
 228
 229         OptionWindowStandard       = 0x00000000,
 230         OptionWindowShort          = 0x00100000,
 231         OptionWindowLong           = 0x00200000,
 232
 233         OptionFormantShifted       = 0x00000000,
 234         OptionFormantPreserved     = 0x01000000,
 235
 236         OptionPitchHighSpeed       = 0x00000000,
 237         OptionPitchHighQuality     = 0x02000000,
 238         OptionPitchHighConsistency = 0x04000000
 239     };
 240
 241     typedef int Options;
 242
 243     enum PresetOption {
 244         DefaultOptions             = 0x00000000,
 245         PercussiveOptions          = 0x00102000
 246     };
 247
 248     /**
 249      * Construct a time and pitch stretcher object to run at the given
 250      * sample rate, with the given number of channels.  Processing
 251      * options and the time and pitch scaling ratios may be provided.
 252      * The time and pitch ratios may be changed after construction,
 253      * but most of the options may not.  See the option documentation
 254      * above for more details.
 255      */
 256     RubberBandStretcher(size_t sampleRate,
 257                         size_t channels,
 258                         Options options = DefaultOptions,
 259                         double initialTimeRatio = 1.0,
 260                         double initialPitchScale = 1.0);
 261     ~RubberBandStretcher();
 262
 263     /**
 264      * Reset the stretcher's internal buffers.  The stretcher should
 265      * subsequently behave as if it had just been constructed
 266      * (although retaining the current time and pitch ratio).
 267      */
 268     void reset();
 269
 270     /**
 271      * Set the time ratio for the stretcher.  This is the ratio of
 272      * stretched to unstretched duration -- not tempo.  For example, a
 273      * ratio of 2.0 would make the audio twice as long (i.e. halve the
 274      * tempo); 0.5 would make it half as long (i.e. double the tempo);
 275      * 1.0 would leave the duration unaffected.
 276      *
 277      * If the stretcher was constructed in Offline mode, the time
 278      * ratio is fixed throughout operation; this function may be
 279      * called any number of times between construction (or a call to
 280      * reset()) and the first call to study() or process(), but may
 281      * not be called after study() or process() has been called.
 282      *
 283      * If the stretcher was constructed in RealTime mode, the time
 284      * ratio may be varied during operation; this function may be
 285      * called at any time, so long as it is not called concurrently
 286      * with process().  You should either call this function from the
 287      * same thread as process(), or provide your own mutex or similar
 288      * mechanism to ensure that setTimeRatio and process() cannot be
 289      * run at once (there is no internal mutex for this purpose).
 290      */
 291     void setTimeRatio(double ratio);
 292
 293     /**
 294      * Set the pitch scaling ratio for the stretcher.  This is the
 295      * ratio of target frequency to source frequency.  For example, a
 296      * ratio of 2.0 would shift up by one octave; 0.5 down by one
 297      * octave; or 1.0 leave the pitch unaffected.
 298      *
 299      * To put this in musical terms, a pitch scaling ratio
 300      * corresponding to a shift of S equal-tempered semitones (where S
 301      * is positive for an upwards shift and negative for downwards) is
 302      * pow(2.0, S / 12.0).
 303      *
 304      * If the stretcher was constructed in Offline mode, the pitch
 305      * scaling ratio is fixed throughout operation; this function may
 306      * be called any number of times between construction (or a call
 307      * to reset()) and the first call to study() or process(), but may
 308      * not be called after study() or process() has been called.
 309      *
 310      * If the stretcher was constructed in RealTime mode, the pitch
 311      * scaling ratio may be varied during operation; this function may
 312      * be called at any time, so long as it is not called concurrently
 313      * with process().  You should either call this function from the
 314      * same thread as process(), or provide your own mutex or similar
 315      * mechanism to ensure that setPitchScale and process() cannot be
 316      * run at once (there is no internal mutex for this purpose).
 317      */
 318     void setPitchScale(double scale);
 319
 320     /**
 321      * Return the last time ratio value that was set (either on
 322      * construction or with setTimeRatio()).
 323      */
 324     double getTimeRatio() const;
 325
 326     /**
 327      * Return the last pitch scaling ratio value that was set (either
 328      * on construction or with setPitchScale()).
 329      */
 330     double getPitchScale() const;
 331
 332     /**
 333      * Return the processing latency of the stretcher.  This is the
 334      * number of audio samples that one would have to discard at the
 335      * start of the output in order to ensure that the resulting audio
 336      * aligned with the input audio at the start.  In Offline mode,
 337      * latency is automatically adjusted for and the result is zero.
 338      * In RealTime mode, the latency may depend on the time and pitch
 339      * ratio and other options.
 340      */
 341     size_t getLatency() const;
 342
 343     /**
 344      * Change an OptionTransients configuration setting.  This may be
 345      * called at any time in RealTime mode.  It may not be called in
 346      * Offline mode (for which the transients option is fixed on
 347      * construction).
 348      */
 349     void setTransientsOption(Options options);
 350
 351     /**
 352      * Change an OptionPhase configuration setting.  This may be
 353      * called at any time in any mode.
 354      *
 355      * Note that if running multi-threaded in Offline mode, the change
 356      * may not take effect immediately if processing is already under
 357      * way when this function is called.
 358      */
 359     void setPhaseOption(Options options);
 360
 361     /**
 362      * Change an OptionFormant configuration setting.  This may be
 363      * called at any time in any mode.
 364      *
 365      * Note that if running multi-threaded in Offline mode, the change
 366      * may not take effect immediately if processing is already under
 367      * way when this function is called.
 368      */
 369     void setFormantOption(Options options);
 370
 371     /**
 372      * Change an OptionPitch configuration setting.  This may be
 373      * called at any time in RealTime mode.  It may not be called in
 374      * Offline mode (for which the transients option is fixed on
 375      * construction).
 376      */
 377     void setPitchOption(Options options);
 378
 379     /**
 380      * Tell the stretcher exactly how many input samples it will
 381      * receive.  This is only useful in Offline mode, when it allows
 382      * the stretcher to ensure that the number of output samples is
 383      * exactly correct.  In RealTime mode no such guarantee is
 384      * possible and this value is ignored.
 385      */
 386     void setExpectedInputDuration(size_t samples);
 387
 388     /**
 389      * Ask the stretcher how many audio sample frames should be
 390      * provided as input in order to ensure that some more output
 391      * becomes available.  Normal usage consists of querying this
 392      * function, providing that number of samples to process(),
 393      * reading the output using available() and retrieve(), and then
 394      * repeating.
 395      *
 396      * Note that this value is only relevant to process(), not to
 397      * study() (to which you may pass any number of samples at a time,
 398      * and from which there is no output).
 399      */
 400      size_t getSamplesRequired() const;
 401
 402     /**
 403      * Tell the stretcher the maximum number of sample frames that you
 404      * will ever be passing in to a single process() call. If you
 405      * don't call this function, the stretcher will assume that you
 406      * never pass in more samples than getSamplesRequired() suggested
 407      * you should.  You should not pass in more samples than that
 408      * unless you have called setMaxProcessSize first.
 409      *
 410      * This function may not be called after the first call to study()
 411      * or process().
 412      *
 413      * Note that this value is only relevant to process(), not to
 414      * study() (to which you may pass any number of samples at a time,
 415      * and from which there is no output).
 416      */
 417     void setMaxProcessSize(size_t samples);
 418
 419     /**
 420      * Provide a block of "samples" sample frames for the stretcher to
 421      * study and calculate a stretch profile from.
 422      *
 423      * This is only meaningful in Offline mode, and is required if
 424      * running in that mode.  You should pass the entire input through
 425      * study() before any process() calls are made, as a sequence of
 426      * blocks in individual study() calls, or as a single large block.
 427      *
 428      * "input" should point to de-interleaved audio data with one
 429      * float array per channel.  "samples" supplies the number of
 430      * audio sample frames available in "input".  If "samples" is
 431      * zero, "input" may be NULL.
 432      *
 433      * Set "final" to true if this is the last block of data that will
 434      * be provided to study() before the first process() call.
 435      */
 436     void study(const float *const *input, size_t samples, bool final);
 437
 438     /**
 439      * Provide a block of "samples" sample frames for processing.
 440      * See also getSamplesRequired() and setMaxProcessSize().
 441      *
 442      * Set "final" to true if this is the last block of input data.
 443      */
 444     void process(const float *const *input, size_t samples, bool final);
 445
 446     /**
 447      * Ask the stretcher how many audio sample frames of output data
 448      * are available for reading (via retrieve()).
 449      *
 450      * This function returns 0 if no frames are available: this
 451      * usually means more input data needs to be provided, but if the
 452      * stretcher is running in threaded mode it may just mean that not
 453      * enough data has yet been processed.  Call getSamplesRequired()
 454      * to discover whether more input is needed.
 455      *
 456      * This function returns -1 if all data has been fully processed
 457      * and all output read, and the stretch process is now finished.
 458      */
 459     int available() const;
 460
 461     /**
 462      * Obtain some processed output data from the stretcher.  Up to
 463      * "samples" samples will be stored in the output arrays (one per
 464      * channel for de-interleaved audio data) pointed to by "output".
 465      * The return value is the actual number of sample frames
 466      * retrieved.
 467      */
 468     size_t retrieve(float *const *output, size_t samples) const;
 469
 470     /**
 471      * Return the value of internal frequency cutoff value n.
 472      *
 473      * This function is not for general use.
 474      */
 475     float getFrequencyCutoff(int n) const;
 476
 477     /**
 478      * Set the value of internal frequency cutoff n to f Hz.
 479      *
 480      * This function is not for general use.
 481      */
 482     void setFrequencyCutoff(int n, float f);
 483
 484     /**
 485      * Retrieve the value of the internal input block increment value.
 486      *
 487      * This function is provided for diagnostic purposes only.
 488      */
 489     size_t getInputIncrement() const;
 490
 491     /**
 492      * In offline mode, retrieve the sequence of internal block
 493      * increments for output, for the entire audio data, provided the
 494      * stretch profile has been calculated.  In realtime mode,
 495      * retrieve any output increments that have accumulated since the
 496      * last call to getOutputIncrements, to a limit of 16.
 497      *
 498      * This function is provided for diagnostic purposes only.
 499      */
 500     std::vector<int> getOutputIncrements() const;
 501
 502     /**
 503      * In offline mode, retrieve the sequence of internal phase reset
 504      * detection function values, for the entire audio data, provided
 505      * the stretch profile has been calculated.  In realtime mode,
 506      * retrieve any phase reset points that have accumulated since the
 507      * last call to getPhaseResetCurve, to a limit of 16.
 508      *
 509      * This function is provided for diagnostic purposes only.
 510      */
 511     std::vector<float> getPhaseResetCurve() const;
 512
 513     /**
 514      * In offline mode, retrieve the sequence of internal frames for
 515      * which exact timing has been sought, for the entire audio data,
 516      * provided the stretch profile has been calculated.  In realtime
 517      * mode, return an empty sequence.
 518      *
 519      * This function is provided for diagnostic purposes only.
 520      */
 521     std::vector<int> getExactTimePoints() const;
 522
 523     /**
 524      * Return the number of channels this stretcher was constructed
 525      * with.
 526      */
 527     size_t getChannelCount() const;
 528
 529     /**
 530      * Force the stretcher to calculate a stretch profile.  Normally
 531      * this happens automatically for the first process() call in
 532      * offline mode.
 533      *
 534      * This function is provided for diagnostic purposes only.
 535      */
 536     void calculateStretch();
 537
 538     /**
 539      * Set the level of debug output.  The value may be from 0 (errors
 540      * only) to 3 (very verbose, with audible ticks in the output at
 541      * phase reset points).  The default is whatever has been set
 542      * using setDefaultDebugLevel, or 0 if that function has not been
 543      * called.
 544      */
 545     void setDebugLevel(int level);
 546
 547     /**
 548      * Set the default level of debug output for subsequently
 549      * constructed stretchers.
 550      *
 551      * @see setDebugLevel
 552      */
 553     static void setDefaultDebugLevel(int level);
 554
 555 protected:
 556     class Impl;
 557     Impl *m_d;
 558 };
 559
 560 }
 561
 562 #endif