dom/media/DynamicResampler.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef MOZILLA_DYNAMIC_RESAMPLER_H_
   7 #define MOZILLA_DYNAMIC_RESAMPLER_H_
   8
   9 #include "AudioRingBuffer.h"
  10 #include "AudioSegment.h"
  11
  12 #include <speex/speex_resampler.h>
  13
  14 namespace mozilla {
  15
  16 const uint32_t STEREO = 2;
  17
  18 /**
  19  * DynamicResampler allows updating on the fly the output sample rate and the
  20  * number of channels. In addition to that, it maintains an internal buffer for
  21  * the input data and allows pre-buffering as well. The Resample() method
  22  * strives to provide the requested number of output frames by using the input
  23  * data including any pre-buffering. If this is not possible then it will not
  24  * attempt to resample and it will return failure.
  25  *
  26  * Input data buffering makes use of the AudioRingBuffer. The capacity of the
  27  * buffer is 100ms of float audio and it is pre-allocated at the constructor.
  28  * No extra allocations take place when the input is appended. In addition to
  29  * that, due to special feature of AudioRingBuffer, no extra copies take place
  30  * when the input data is fed to the resampler.
  31  *
  32  * The sample format must be set before using any method. If the provided sample
  33  * format is of type short the pre-allocated capacity of the input buffer
  34  * becomes 200ms of short audio.
  35  *
  36  * The DynamicResampler is not thread-safe, so all the methods appart from the
  37  * constructor must be called on the same thread.
  38  */
  39 class DynamicResampler final {
  40  public:
  41   /**
  42    * Provide the initial input and output rate and the amount of pre-buffering.
  43    * The channel count will be set to stereo. Memory allocation will take
  44    * place. The input buffer is non-interleaved.
  45    */
  46   DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
  47                    uint32_t aPreBufferFrames = 0);
  48   ~DynamicResampler();
  49
  50   /**
  51    * Set the sample format type to float or short.
  52    */
  53   void SetSampleFormat(AudioSampleFormat aFormat);
  54   uint32_t GetOutRate() const { return mOutRate; }
  55   uint32_t GetChannels() const { return mChannels; }
  56
  57   /**
  58    * Append `aInFrames` number of frames from `aInBuffer` to the internal input
  59    * buffer. Memory copy/move takes place.
  60    */
  61   void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames);
  62   void AppendInput(const nsTArray<const int16_t*>& aInBuffer,
  63                    uint32_t aInFrames);
  64   /**
  65    * Append `aInFrames` number of frames of silence to the internal input
  66    * buffer. Memory copy/move takes place.
  67    */
  68   void AppendInputSilence(const uint32_t aInFrames);
  69   /**
  70    * Return the number of frames stored in the internal input buffer.
  71    */
  72   uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
  73   /**
  74    * Return the number of frames left to store in the internal input buffer.
  75    */
  76   uint32_t InFramesLeftToBuffer(uint32_t aChannelIndex) const;
  77
  78   /*
  79    * Resampler as much frame is needed from the internal input buffer to the
  80    * `aOutBuffer` in order to provide all `aOutFrames` and return true. If there
  81    * not enough input frames to provide the requested output frames no
  82    * resampling is attempted and false is returned.
  83    */
  84   bool Resample(float* aOutBuffer, uint32_t* aOutFrames,
  85                 uint32_t aChannelIndex);
  86   bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames,
  87                 uint32_t aChannelIndex);
  88
  89   /**
  90    * Update the output rate or/and the channel count. If a value is not updated
  91    * compared to the current one nothing happens. Changing the `aOutRate`
  92    * results in recalculation in the resampler. Changing `aChannels` results in
  93    * the reallocation of the internal input buffer with the exception of
  94    * changes between mono to stereo and vice versa where no reallocation takes
  95    * place. A stereo internal input buffer is always maintained even if the
  96    * sound is mono.
  97    */
  98   void UpdateResampler(uint32_t aOutRate, uint32_t aChannels);
  99
 100   /**
 101    * Returns true if the resampler has enough input data to provide to the
 102    * output of the `Resample()` method `aOutFrames` number of frames. This is a
 103    * way to know in advance if the `Resampler` method will return true or false
 104    * given that nothing changes in between.
 105    */
 106   bool CanResample(uint32_t aOutFrames) const;
 107
 108  private:
 109   template <typename T>
 110   void AppendInputInternal(const nsTArray<const T*>& aInBuffer,
 111                            uint32_t aInFrames) {
 112     MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
 113     for (uint32_t i = 0; i < mChannels; ++i) {
 114       PushInFrames(aInBuffer[i], aInFrames, i);
 115     }
 116   }
 117
 118   void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
 119                         float* aOutBuffer, uint32_t* aOutFrames,
 120                         uint32_t aChannelIndex);
 121   void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
 122                         int16_t* aOutBuffer, uint32_t* aOutFrames,
 123                         uint32_t aChannelIndex);
 124
 125   template <typename T>
 126   bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames,
 127                         uint32_t aChannelIndex) {
 128     MOZ_ASSERT(mInRate);
 129     MOZ_ASSERT(mOutRate);
 130     MOZ_ASSERT(mChannels);
 131     MOZ_ASSERT(aChannelIndex <= mChannels);
 132     MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
 133     MOZ_ASSERT(aOutFrames);
 134     MOZ_ASSERT(*aOutFrames);
 135
 136     // Not enough input, don't do anything
 137     if (!EnoughInFrames(*aOutFrames, aChannelIndex)) {
 138       *aOutFrames = 0;
 139       return false;
 140     }
 141
 142     if (mInRate == mOutRate) {
 143       mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, *aOutFrames));
 144       // Workaround to avoid discontinuity when the speex resampler operates
 145       // again. Feed it with the last 20 frames to warm up the internal memory
 146       // of the resampler and then skip memory equals to resampler's input
 147       // latency.
 148       mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames);
 149       return true;
 150     }
 151
 152     uint32_t totalOutFramesNeeded = *aOutFrames;
 153
 154     mInternalInBuffer[aChannelIndex].ReadNoCopy(
 155         [this, &aOutBuffer, &totalOutFramesNeeded,
 156          aChannelIndex](const Span<const T>& aInBuffer) -> uint32_t {
 157           if (!totalOutFramesNeeded) {
 158             return 0;
 159           }
 160           uint32_t outFramesResampled = totalOutFramesNeeded;
 161           uint32_t inFrames = aInBuffer.Length();
 162           ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
 163                            &outFramesResampled, aChannelIndex);
 164           aOutBuffer += outFramesResampled;
 165           totalOutFramesNeeded -= outFramesResampled;
 166           mInputTail[aChannelIndex].StoreTail<T>(aInBuffer);
 167           return inFrames;
 168         });
 169
 170     MOZ_ASSERT(totalOutFramesNeeded == 0);
 171     return true;
 172   }
 173
 174   bool EnoughInFrames(uint32_t aOutFrames, uint32_t aChannelIndex) const;
 175
 176   template <typename T>
 177   void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
 178                     uint32_t aChannelIndex) {
 179     MOZ_ASSERT(aInBuffer);
 180     MOZ_ASSERT(aInFrames);
 181     MOZ_ASSERT(mChannels);
 182     MOZ_ASSERT(aChannelIndex <= mChannels);
 183     MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
 184     mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
 185   }
 186
 187   void WarmUpResampler(bool aSkipLatency);
 188
 189  public:
 190   const uint32_t mInRate;
 191   const uint32_t mPreBufferFrames;
 192
 193  private:
 194   uint32_t mChannels = 0;
 195   uint32_t mOutRate;
 196
 197   AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
 198
 199   SpeexResamplerState* mResampler = nullptr;
 200   AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
 201
 202   class TailBuffer {
 203    public:
 204     template <typename T>
 205     T* Buffer() {
 206       return reinterpret_cast<T*>(mBuffer);
 207     }
 208     /* Store the MAXSIZE last elements of the buffer. */
 209     template <typename T>
 210     void StoreTail(const Span<const T>& aInBuffer) {
 211       StoreTail(aInBuffer.data(), aInBuffer.size());
 212     }
 213     template <typename T>
 214     void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
 215       if (aInFrames >= MAXSIZE) {
 216         PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
 217         mSize = MAXSIZE;
 218       } else {
 219         PodCopy(Buffer<T>(), aInBuffer, aInFrames);
 220         mSize = aInFrames;
 221       }
 222     }
 223     uint32_t Length() { return mSize; }
 224     static const uint32_t MAXSIZE = 20;
 225
 226    private:
 227     float mBuffer[MAXSIZE] = {};
 228     uint32_t mSize = 0;
 229   };
 230   AutoTArray<TailBuffer, STEREO> mInputTail;
 231 };
 232
 233 /**
 234  * AudioChunkList provides a way to have preallocated audio buffers in
 235  * AudioSegment. The idea is that the amount of  AudioChunks is created in
 236  * advance. Each AudioChunk is able to hold a specific amount of audio
 237  * (capacity). The total capacity of AudioChunkList is specified by the number
 238  * of AudioChunks. The important aspect of the AudioChunkList is that
 239  * preallocates everything and reuse the same chunks similar to a ring buffer.
 240  *
 241  * Why the whole AudioChunk is preallocated and not some raw memory buffer? This
 242  * is due to the limitations of MediaTrackGraph. The way that MTG works depends
 243  * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists
 244  * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the
 245  * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio
 246  * data was in a different form, the only way to use it from the audio thread
 247  * would be to create the AudioChunk there. That would result in a copy
 248  * operation (not very important) and most of all an allocation of the audio
 249  * buffer in the audio thread. This happens in many places inside MTG it's a bad
 250  * practice, though, and it has been avoided due to the AudioChunkList.
 251  *
 252  * After construction the sample format must be set, when it is available. It
 253  * can be set in the audio thread. Before setting the sample format is not
 254  * possible to use any method of AudioChunkList.
 255  *
 256  * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128
 257  * frames of float audio. Nevertheless, the sample format is not available at
 258  * that point. Thus if the sample format is set to short, the capacity of each
 259  * chunk changes to 256 number of frames, and the total duration becomes twice
 260  * big. There are methods to get the chunk capacity and total capacity in frames
 261  * and must always be used.
 262  *
 263  * Two things to note. First, when the channel count changes everything is
 264  * recreated which means reallocations. Second, the total capacity might differs
 265  * from the requested total capacity for two reasons. First, if the sample
 266  * format is set to short and second because the number of chunks in the list
 267  * divides exactly the final total capacity. The corresponding method must
 268  * always be used to query the total capacity.
 269  */
 270 class AudioChunkList {
 271  public:
 272   /**
 273    * Constructor, the final total duration might be different from the requested
 274    * `aTotalDuration`. Memory allocation takes place.
 275    */
 276   AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels,
 277                  const PrincipalHandle& aPrincipalHandle);
 278   AudioChunkList(const AudioChunkList&) = delete;
 279   AudioChunkList(AudioChunkList&&) = delete;
 280   ~AudioChunkList() = default;
 281
 282   /**
 283    * Set sample format. It must be done before any other method being used.
 284    */
 285   void SetSampleFormat(AudioSampleFormat aFormat);
 286   /**
 287    * Get the next available AudioChunk. The duration of the chunk will be zero
 288    * and the volume 1.0. However, the buffers will be there ready to be written.
 289    * Please note, that a reference of the preallocated chunk is returned. Thus
 290    * it _must not be consumed_ directly. If the chunk needs to be consumed it
 291    * must be copied to a temporary chunk first. For example:
 292    * ```
 293    *   AudioChunk& chunk = audioChunklist.GetNext();
 294    *   // Set up the chunk
 295    *   AudioChunk tmp = chunk;
 296    *   audioSegment.AppendAndConsumeChunk(std::move(tmp));
 297    * ```
 298    * This way no memory allocation or copy, takes place.
 299    */
 300   AudioChunk& GetNext();
 301
 302   /**
 303    * Get the capacity of each individual AudioChunk in the list.
 304    */
 305   uint32_t ChunkCapacity() const {
 306     MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
 307                mSampleFormat == AUDIO_FORMAT_FLOAT32);
 308     return mChunkCapacity;
 309   }
 310   /**
 311    * Get the total capacity of AudioChunkList.
 312    */
 313   uint32_t TotalCapacity() const {
 314     MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
 315                mSampleFormat == AUDIO_FORMAT_FLOAT32);
 316     return CheckedInt<uint32_t>(mChunkCapacity * mChunks.Length()).value();
 317   }
 318
 319   /**
 320    * Update the channel count of the AudioChunkList. Memory allocation is
 321    * taking place.
 322    */
 323   void Update(uint32_t aChannels);
 324
 325  private:
 326   void IncrementIndex() {
 327     ++mIndex;
 328     mIndex = CheckedInt<uint32_t>(mIndex % mChunks.Length()).value();
 329   }
 330   void CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels);
 331   void UpdateToMonoOrStereo(uint32_t aChannels);
 332
 333  private:
 334   const PrincipalHandle mPrincipalHandle;
 335   nsTArray<AudioChunk> mChunks;
 336   uint32_t mIndex = 0;
 337   uint32_t mChunkCapacity = WEBAUDIO_BLOCK_SIZE;
 338   AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
 339 };
 340
 341 /**
 342  * Audio Resampler is a resampler able to change the output rate and channels
 343  * count on the fly. The API is simple and it is based in AudioSegment in order
 344  * to be used MTG. All memory allocations, for input and output buffers, happen
 345  * in the constructor and when channel count changes. The memory is recycled in
 346  * order to avoid reallocations. It also supports prebuffering of silence. It
 347  * consists of DynamicResampler and AudioChunkList so please read their
 348  * documentation if you are interested in more details.
 349  *
 350  * The output buffer is preallocated  and returned in the form of AudioSegment.
 351  * The intention is to be used directly in a MediaTrack. Since an AudioChunk
 352  * must no be "shared" in order to be written, the AudioSegment returned by
 353  * resampler method must be cleaned up in order to be able for the `AudioChunk`s
 354  * that it consists of to be reused. For `MediaTrack::mSegment` this happens
 355  * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus
 356  * memory capacity of 100ms has been preallocated for internal input and output
 357  * buffering.
 358  */
 359 class AudioResampler final {
 360  public:
 361   AudioResampler(uint32_t aInRate, uint32_t aOutRate, uint32_t aPreBufferFrames,
 362                  const PrincipalHandle& aPrincipalHandle);
 363
 364   /**
 365    * Append input data into the resampler internal buffer. Copy/move of the
 366    * memory is taking place. Also, the channel count will change according to
 367    * the channel count of the chunks.
 368    */
 369   void AppendInput(const AudioSegment& aInSegment);
 370   /**
 371    * Get the number of frames that can be read from the internal input buffer
 372    * before it becomes empty.
 373    */
 374   uint32_t InputReadableFrames() const;
 375   /**
 376    * Get the number of frames that can be written to the internal input buffer
 377    * before it becomes full.
 378    */
 379   uint32_t InputWritableFrames() const;
 380
 381   /*
 382    * Reguest `aOutFrames` of audio in the output sample rate. The internal
 383    * buffered input is used. If there is no enough input for that amount of
 384    * output and empty AudioSegment is returned
 385    */
 386   AudioSegment Resample(uint32_t aOutFrames);
 387
 388   /*
 389    * Updates the output rate that will be used by the resampler.
 390    */
 391   void UpdateOutRate(uint32_t aOutRate) {
 392     Update(aOutRate, mResampler.GetChannels());
 393   }
 394
 395  private:
 396   void UpdateChannels(uint32_t aChannels) {
 397     Update(mResampler.GetOutRate(), aChannels);
 398   }
 399   void Update(uint32_t aOutRate, uint32_t aChannels);
 400
 401  private:
 402   DynamicResampler mResampler;
 403   AudioChunkList mOutputChunks;
 404   bool mIsSampleFormatSet = false;
 405 };
 406
 407 }  // namespace mozilla
 408
 409 #endif  // MOZILLA_DYNAMIC_RESAMPLER_H_