dom/media/AudioSegment.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef MOZILLA_AUDIOSEGMENT_H_
   7 #define MOZILLA_AUDIOSEGMENT_H_
   8
   9 #include <speex/speex_resampler.h>
  10 #include "MediaTrackGraph.h"
  11 #include "MediaSegment.h"
  12 #include "AudioSampleFormat.h"
  13 #include "AudioChannelFormat.h"
  14 #include "SharedBuffer.h"
  15 #include "WebAudioUtils.h"
  16 #include "nsAutoRef.h"
  17 #ifdef MOZILLA_INTERNAL_API
  18 #  include "mozilla/TimeStamp.h"
  19 #endif
  20 #include <float.h>
  21
  22 namespace mozilla {
  23 struct AudioChunk;
  24 class AudioSegment;
  25 }  // namespace mozilla
  26 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)
  27
  28 /**
  29  * This allows compilation of nsTArray<AudioSegment> and
  30  * AutoTArray<AudioSegment> since without it, static analysis fails on the
  31  * mChunks member being a non-memmovable AutoTArray.
  32  *
  33  * Note that AudioSegment(const AudioSegment&) is deleted, so this should
  34  * never come into effect.
  35  */
  36 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)
  37
  38 namespace mozilla {
  39
  40 template <typename T>
  41 class SharedChannelArrayBuffer : public ThreadSharedObject {
  42  public:
  43   explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)
  44       : mBuffers(std::move(aBuffers)) {}
  45
  46   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
  47     size_t amount = 0;
  48     amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
  49     for (size_t i = 0; i < mBuffers.Length(); i++) {
  50       amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
  51     }
  52
  53     return amount;
  54   }
  55
  56   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
  57     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  58   }
  59
  60   nsTArray<nsTArray<T> > mBuffers;
  61 };
  62
  63 class AudioMixer;
  64
  65 /**
  66  * For auto-arrays etc, guess this as the common number of channels.
  67  */
  68 const int GUESS_AUDIO_CHANNELS = 2;
  69
  70 // We ensure that the graph advances in steps that are multiples of the Web
  71 // Audio block size
  72 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
  73 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
  74
  75 template <typename SrcT, typename DestT>
  76 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
  77                                        uint32_t aLength, float aVolume,
  78                                        uint32_t aChannels, DestT* aOutput) {
  79   DestT* output = aOutput;
  80   for (size_t i = 0; i < aLength; ++i) {
  81     for (size_t channel = 0; channel < aChannels; ++channel) {
  82       float v = AudioSampleToFloat(aSourceChannels[channel][i]) * aVolume;
  83       *output = FloatToAudioSample<DestT>(v);
  84       ++output;
  85     }
  86   }
  87 }
  88
  89 template <typename SrcT, typename DestT>
  90 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
  91                                          uint32_t aFrames, uint32_t aChannels,
  92                                          DestT** aOutput) {
  93   for (size_t i = 0; i < aChannels; i++) {
  94     size_t interleavedIndex = i;
  95     for (size_t j = 0; j < aFrames; j++) {
  96       ConvertAudioSample(aSourceBuffer[interleavedIndex], aOutput[i][j]);
  97       interleavedIndex += aChannels;
  98     }
  99   }
 100 }
 101
 102 class SilentChannel {
 103  public:
 104   static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
 105   static const uint8_t
 106       gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];
 107   // We take advantage of the fact that zero in float and zero in int have the
 108   // same all-zeros bit layout.
 109   template <typename T>
 110   static const T* ZeroChannel();
 111 };
 112
 113 /**
 114  * Given an array of input channels (aChannelData), downmix to aOutputChannels,
 115  * interleave the channel data. A total of aOutputChannels*aDuration
 116  * interleaved samples will be copied to a channel buffer in aOutput.
 117  */
 118 template <typename SrcT, typename DestT>
 119 void DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
 120                           int32_t aDuration, float aVolume,
 121                           uint32_t aOutputChannels, DestT* aOutput) {
 122   if (aChannelData.Length() == aOutputChannels) {
 123     InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,
 124                                aOutputChannels, aOutput);
 125   } else {
 126     AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;
 127     AutoTArray<SrcT,
 128                SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
 129         outputBuffers;
 130     outputChannelData.SetLength(aOutputChannels);
 131     outputBuffers.SetLength(aDuration * aOutputChannels);
 132     for (uint32_t i = 0; i < aOutputChannels; i++) {
 133       outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
 134     }
 135     AudioChannelsDownMix(aChannelData, outputChannelData.Elements(),
 136                          aOutputChannels, aDuration);
 137     InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,
 138                                aOutputChannels, aOutput);
 139   }
 140 }
 141
 142 /**
 143  * An AudioChunk represents a multi-channel buffer of audio samples.
 144  * It references an underlying ThreadSharedObject which manages the lifetime
 145  * of the buffer. An AudioChunk maintains its own duration and channel data
 146  * pointers so it can represent a subinterval of a buffer without copying.
 147  * An AudioChunk can store its individual channels anywhere; it maintains
 148  * separate pointers to each channel's buffer.
 149  */
 150 struct AudioChunk {
 151   typedef mozilla::AudioSampleFormat SampleFormat;
 152
 153   // Generic methods
 154   void SliceTo(TrackTime aStart, TrackTime aEnd) {
 155     MOZ_ASSERT(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
 156                "Slice out of bounds");
 157     if (mBuffer) {
 158       MOZ_ASSERT(aStart < INT32_MAX,
 159                  "Can't slice beyond 32-bit sample lengths");
 160       for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 161         mChannelData[channel] = AddAudioSampleOffset(
 162             mChannelData[channel], mBufferFormat, int32_t(aStart));
 163       }
 164     }
 165     mDuration = aEnd - aStart;
 166   }
 167   TrackTime GetDuration() const { return mDuration; }
 168   bool CanCombineWithFollowing(const AudioChunk& aOther) const {
 169     if (aOther.mBuffer != mBuffer) {
 170       return false;
 171     }
 172     if (!mBuffer) {
 173       return true;
 174     }
 175     if (aOther.mVolume != mVolume) {
 176       return false;
 177     }
 178     if (aOther.mPrincipalHandle != mPrincipalHandle) {
 179       return false;
 180     }
 181     NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
 182                  "Wrong metadata about buffer");
 183     NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
 184                  "Mismatched channel count");
 185     if (mDuration > INT32_MAX) {
 186       return false;
 187     }
 188     for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 189       if (aOther.mChannelData[channel] !=
 190           AddAudioSampleOffset(mChannelData[channel], mBufferFormat,
 191                                int32_t(mDuration))) {
 192         return false;
 193       }
 194     }
 195     return true;
 196   }
 197   bool IsNull() const { return mBuffer == nullptr; }
 198   void SetNull(TrackTime aDuration) {
 199     mBuffer = nullptr;
 200     mChannelData.Clear();
 201     mDuration = aDuration;
 202     mVolume = 1.0f;
 203     mBufferFormat = AUDIO_FORMAT_SILENCE;
 204     mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 205   }
 206
 207   uint32_t ChannelCount() const { return mChannelData.Length(); }
 208
 209   bool IsMuted() const { return mVolume == 0.0f; }
 210
 211   size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {
 212     return SizeOfExcludingThis(aMallocSizeOf, true);
 213   }
 214
 215   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {
 216     size_t amount = 0;
 217
 218     // Possibly owned:
 219     // - mBuffer - Can hold data that is also in the decoded audio queue. If it
 220     //             is not shared, or unshared == false it gets counted.
 221     if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
 222       amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
 223     }
 224
 225     // Memory in the array is owned by mBuffer.
 226     amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
 227     return amount;
 228   }
 229
 230   template <typename T>
 231   const nsTArray<const T*>& ChannelData() const {
 232     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 233     return *reinterpret_cast<const AutoTArray<const T*, GUESS_AUDIO_CHANNELS>*>(
 234         &mChannelData);
 235   }
 236
 237   /**
 238    * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
 239    * by the calling thread.
 240    */
 241   template <typename T>
 242   T* ChannelDataForWrite(size_t aChannel) {
 243     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 244     MOZ_ASSERT(!mBuffer->IsShared());
 245     return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
 246   }
 247
 248   const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
 249
 250   TrackTime mDuration = 0;             // in frames within the buffer
 251   RefPtr<ThreadSharedObject> mBuffer;  // the buffer object whose lifetime is
 252                                        // managed; null means data is all zeroes
 253   // one pointer per channel; empty if and only if mBuffer is null
 254   CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;
 255   float mVolume = 1.0f;  // volume multiplier to apply
 256   // format of frames in mBuffer (or silence if mBuffer is null)
 257   SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
 258   // principalHandle for the data in this chunk.
 259   // This can be compared to an nsIPrincipal* when back on main thread.
 260   PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 261 };
 262
 263 /**
 264  * A list of audio samples consisting of a sequence of slices of SharedBuffers.
 265  * The audio rate is determined by the track, not stored in this class.
 266  */
 267 class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
 268   // The channel count that MaxChannelCount() returned last time it was called.
 269   uint32_t mMemoizedMaxChannelCount = 0;
 270
 271  public:
 272   typedef mozilla::AudioSampleFormat SampleFormat;
 273
 274   AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
 275
 276   AudioSegment(AudioSegment&& aSegment) = default;
 277
 278   AudioSegment(const AudioSegment&) = delete;
 279   AudioSegment& operator=(const AudioSegment&) = delete;
 280
 281   ~AudioSegment() = default;
 282
 283   // Resample the whole segment in place.  `aResampler` is an instance of a
 284   // resampler, initialized with `aResamplerChannelCount` channels. If this
 285   // function finds a chunk with more channels, `aResampler` is destroyed and a
 286   // new resampler is created, and `aResamplerChannelCount` is updated with the
 287   // new channel count value.
 288   template <typename T>
 289   void Resample(nsAutoRef<SpeexResamplerState>& aResampler,
 290                 uint32_t* aResamplerChannelCount, uint32_t aInRate,
 291                 uint32_t aOutRate) {
 292     mDuration = 0;
 293
 294     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 295       AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
 296       AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
 297       AudioChunk& c = *ci;
 298       // If this chunk is null, don't bother resampling, just alter its duration
 299       if (c.IsNull()) {
 300         c.mDuration = (c.mDuration * aOutRate) / aInRate;
 301         mDuration += c.mDuration;
 302         continue;
 303       }
 304       uint32_t channels = c.mChannelData.Length();
 305       // This might introduce a discontinuity, but a channel count change in the
 306       // middle of a stream is not that common. This also initializes the
 307       // resampler as late as possible.
 308       if (channels != *aResamplerChannelCount) {
 309         SpeexResamplerState* state =
 310             speex_resampler_init(channels, aInRate, aOutRate,
 311                                  SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
 312         MOZ_ASSERT(state);
 313         aResampler.own(state);
 314         *aResamplerChannelCount = channels;
 315       }
 316       output.SetLength(channels);
 317       bufferPtrs.SetLength(channels);
 318       uint32_t inFrames = c.mDuration;
 319       // Round up to allocate; the last frame may not be used.
 320       NS_ASSERTION((UINT32_MAX - aInRate + 1) / c.mDuration >= aOutRate,
 321                    "Dropping samples");
 322       uint32_t outSize = (c.mDuration * aOutRate + aInRate - 1) / aInRate;
 323       for (uint32_t i = 0; i < channels; i++) {
 324         T* out = output[i].AppendElements(outSize);
 325         uint32_t outFrames = outSize;
 326
 327         const T* in = static_cast<const T*>(c.mChannelData[i]);
 328         dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in,
 329                                                   &inFrames, out, &outFrames);
 330         MOZ_ASSERT(inFrames == c.mDuration);
 331
 332         bufferPtrs[i] = out;
 333         output[i].SetLength(outFrames);
 334       }
 335       MOZ_ASSERT(channels > 0);
 336       c.mDuration = output[0].Length();
 337       c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output));
 338       for (uint32_t i = 0; i < channels; i++) {
 339         c.mChannelData[i] = bufferPtrs[i];
 340       }
 341       mDuration += c.mDuration;
 342     }
 343   }
 344
 345   void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
 346                       uint32_t* aResamplerChannelCount, uint32_t aInRate,
 347                       uint32_t aOutRate);
 348   void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
 349                     const nsTArray<const float*>& aChannelData,
 350                     int32_t aDuration,
 351                     const PrincipalHandle& aPrincipalHandle) {
 352     AudioChunk* chunk = AppendChunk(aDuration);
 353     chunk->mBuffer = aBuffer;
 354
 355     MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(),
 356                "Appending invalid data ?");
 357
 358     for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
 359       chunk->mChannelData.AppendElement(aChannelData[channel]);
 360     }
 361     chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
 362     chunk->mPrincipalHandle = aPrincipalHandle;
 363   }
 364   void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
 365                     const nsTArray<const int16_t*>& aChannelData,
 366                     int32_t aDuration,
 367                     const PrincipalHandle& aPrincipalHandle) {
 368     AudioChunk* chunk = AppendChunk(aDuration);
 369     chunk->mBuffer = aBuffer;
 370
 371     MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(),
 372                "Appending invalid data ?");
 373
 374     for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
 375       chunk->mChannelData.AppendElement(aChannelData[channel]);
 376     }
 377     chunk->mBufferFormat = AUDIO_FORMAT_S16;
 378     chunk->mPrincipalHandle = aPrincipalHandle;
 379   }
 380   // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
 381   // in the segment.
 382   AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk) {
 383     AudioChunk* chunk = AppendChunk(aChunk->mDuration);
 384     chunk->mBuffer = std::move(aChunk->mBuffer);
 385     chunk->mChannelData = std::move(aChunk->mChannelData);
 386
 387     MOZ_ASSERT(chunk->mBuffer || aChunk->mChannelData.IsEmpty(),
 388                "Appending invalid data ?");
 389
 390     chunk->mVolume = aChunk->mVolume;
 391     chunk->mBufferFormat = aChunk->mBufferFormat;
 392     chunk->mPrincipalHandle = aChunk->mPrincipalHandle;
 393     return chunk;
 394   }
 395   void ApplyVolume(float aVolume);
 396   // Mix the segment into a mixer, interleaved. This is useful to output a
 397   // segment to a system audio callback. It up or down mixes to aChannelCount
 398   // channels.
 399   void WriteTo(AudioMixer& aMixer, uint32_t aChannelCount,
 400                uint32_t aSampleRate);
 401   // Mix the segment into a mixer, keeping it planar, up or down mixing to
 402   // aChannelCount channels.
 403   void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
 404
 405   // Returns the maximum channel count across all chunks in this segment.
 406   // Should there be no chunk with a channel count we return the memoized return
 407   // value from last time this method was called.
 408   uint32_t MaxChannelCount() {
 409     uint32_t channelCount = 0;
 410     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 411       if (ci->ChannelCount()) {
 412         channelCount = std::max(channelCount, ci->ChannelCount());
 413       }
 414     }
 415     if (channelCount == 0) {
 416       return mMemoizedMaxChannelCount;
 417     }
 418     return mMemoizedMaxChannelCount = channelCount;
 419   }
 420
 421   static Type StaticType() { return AUDIO; }
 422
 423   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
 424     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
 425   }
 426 };
 427
 428 template <typename SrcT>
 429 void WriteChunk(AudioChunk& aChunk, uint32_t aOutputChannels,
 430                 AudioDataValue* aOutputBuffer) {
 431   AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
 432
 433   channelData = aChunk.ChannelData<SrcT>().Clone();
 434
 435   if (channelData.Length() < aOutputChannels) {
 436     // Up-mix. Note that this might actually make channelData have more
 437     // than aOutputChannels temporarily.
 438     AudioChannelsUpMix(&channelData, aOutputChannels,
 439                        SilentChannel::ZeroChannel<SrcT>());
 440   }
 441   if (channelData.Length() > aOutputChannels) {
 442     // Down-mix.
 443     DownmixAndInterleave(channelData, aChunk.mDuration, aChunk.mVolume,
 444                          aOutputChannels, aOutputBuffer);
 445   } else {
 446     InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,
 447                                aChunk.mVolume, aOutputChannels, aOutputBuffer);
 448   }
 449 }
 450
 451 }  // namespace mozilla
 452
 453 #endif /* MOZILLA_AUDIOSEGMENT_H_ */