dom/media/AudioSegment.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef MOZILLA_AUDIOSEGMENT_H_
   7 #define MOZILLA_AUDIOSEGMENT_H_
   8
   9 #include <speex/speex_resampler.h>
  10 #include "MediaSegment.h"
  11 #include "AudioSampleFormat.h"
  12 #include "AudioChannelFormat.h"
  13 #include "SharedBuffer.h"
  14 #include "WebAudioUtils.h"
  15 #include "mozilla/ScopeExit.h"
  16 #include "nsAutoRef.h"
  17 #ifdef MOZILLA_INTERNAL_API
  18 #  include "mozilla/TimeStamp.h"
  19 #endif
  20 #include <float.h>
  21
  22 namespace mozilla {
  23 struct AudioChunk;
  24 class AudioSegment;
  25 }  // namespace mozilla
  26 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)
  27
  28 /**
  29  * This allows compilation of nsTArray<AudioSegment> and
  30  * AutoTArray<AudioSegment> since without it, static analysis fails on the
  31  * mChunks member being a non-memmovable AutoTArray.
  32  *
  33  * Note that AudioSegment(const AudioSegment&) is deleted, so this should
  34  * never come into effect.
  35  */
  36 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)
  37
  38 namespace mozilla {
  39
  40 template <typename T>
  41 class SharedChannelArrayBuffer : public ThreadSharedObject {
  42  public:
  43   explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)
  44       : mBuffers(std::move(aBuffers)) {}
  45
  46   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
  47     size_t amount = 0;
  48     amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
  49     for (size_t i = 0; i < mBuffers.Length(); i++) {
  50       amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
  51     }
  52
  53     return amount;
  54   }
  55
  56   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
  57     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  58   }
  59
  60   nsTArray<nsTArray<T> > mBuffers;
  61 };
  62
  63 class AudioMixer;
  64
  65 /**
  66  * For auto-arrays etc, guess this as the common number of channels.
  67  */
  68 const int GUESS_AUDIO_CHANNELS = 2;
  69
  70 // We ensure that the graph advances in steps that are multiples of the Web
  71 // Audio block size
  72 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
  73 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
  74
  75 template <typename SrcT, typename DestT>
  76 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
  77                                        uint32_t aLength, float aVolume,
  78                                        uint32_t aChannels, DestT* aOutput) {
  79   DestT* output = aOutput;
  80   for (size_t i = 0; i < aLength; ++i) {
  81     for (size_t channel = 0; channel < aChannels; ++channel) {
  82       float v =
  83           ConvertAudioSample<float>(aSourceChannels[channel][i]) * aVolume;
  84       *output = FloatToAudioSample<DestT>(v);
  85       ++output;
  86     }
  87   }
  88 }
  89
  90 template <typename SrcT, typename DestT>
  91 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
  92                                          uint32_t aFrames, uint32_t aChannels,
  93                                          DestT** aOutput) {
  94   for (size_t i = 0; i < aChannels; i++) {
  95     size_t interleavedIndex = i;
  96     for (size_t j = 0; j < aFrames; j++) {
  97       aOutput[i][j] =
  98           ConvertAudioSample<DestT>(aSourceBuffer[interleavedIndex]);
  99       interleavedIndex += aChannels;
 100     }
 101   }
 102 }
 103
 104 class SilentChannel {
 105  public:
 106   static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
 107   static const uint8_t
 108       gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];
 109   // We take advantage of the fact that zero in float and zero in int have the
 110   // same all-zeros bit layout.
 111   template <typename T>
 112   static const T* ZeroChannel();
 113 };
 114
 115 /**
 116  * Given an array of input channels (aChannelData), downmix to aOutputChannels,
 117  * interleave the channel data. A total of aOutputChannels*aDuration
 118  * interleaved samples will be copied to a channel buffer in aOutput.
 119  */
 120 template <typename SrcT, typename DestT>
 121 void DownmixAndInterleave(Span<const SrcT* const> aChannelData,
 122                           int32_t aDuration, float aVolume,
 123                           uint32_t aOutputChannels, DestT* aOutput) {
 124   if (aChannelData.Length() == aOutputChannels) {
 125     InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,
 126                                aOutputChannels, aOutput);
 127   } else {
 128     AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;
 129     AutoTArray<SrcT,
 130                SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
 131         outputBuffers;
 132     outputChannelData.SetLength(aOutputChannels);
 133     outputBuffers.SetLength(aDuration * aOutputChannels);
 134     for (uint32_t i = 0; i < aOutputChannels; i++) {
 135       outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
 136     }
 137     AudioChannelsDownMix<SrcT, SrcT>(aChannelData, outputChannelData,
 138                                      aDuration);
 139     InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,
 140                                aOutputChannels, aOutput);
 141   }
 142 }
 143
 144 /**
 145  * An AudioChunk represents a multi-channel buffer of audio samples.
 146  * It references an underlying ThreadSharedObject which manages the lifetime
 147  * of the buffer. An AudioChunk maintains its own duration and channel data
 148  * pointers so it can represent a subinterval of a buffer without copying.
 149  * An AudioChunk can store its individual channels anywhere; it maintains
 150  * separate pointers to each channel's buffer.
 151  */
 152 struct AudioChunk {
 153   using SampleFormat = mozilla::AudioSampleFormat;
 154
 155   AudioChunk() = default;
 156
 157   template <typename T>
 158   AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer,
 159              const nsTArray<const T*>& aChannelData, TrackTime aDuration,
 160              PrincipalHandle aPrincipalHandle)
 161       : mDuration(aDuration),
 162         mBuffer(aBuffer),
 163         mBufferFormat(AudioSampleTypeToFormat<T>::Format),
 164         mPrincipalHandle(std::move(aPrincipalHandle)) {
 165     MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?");
 166     for (const T* data : aChannelData) {
 167       mChannelData.AppendElement(data);
 168     }
 169   }
 170
 171   // Generic methods
 172   void SliceTo(TrackTime aStart, TrackTime aEnd) {
 173     MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start");
 174     MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range");
 175     MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end");
 176
 177     if (mBuffer) {
 178       MOZ_ASSERT(aStart < INT32_MAX,
 179                  "Can't slice beyond 32-bit sample lengths");
 180       for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 181         mChannelData[channel] = AddAudioSampleOffset(
 182             mChannelData[channel], mBufferFormat, int32_t(aStart));
 183       }
 184     }
 185     mDuration = aEnd - aStart;
 186   }
 187   TrackTime GetDuration() const { return mDuration; }
 188   bool CanCombineWithFollowing(const AudioChunk& aOther) const {
 189     if (aOther.mBuffer != mBuffer) {
 190       return false;
 191     }
 192     if (!mBuffer) {
 193       return true;
 194     }
 195     if (aOther.mVolume != mVolume) {
 196       return false;
 197     }
 198     if (aOther.mPrincipalHandle != mPrincipalHandle) {
 199       return false;
 200     }
 201     NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
 202                  "Wrong metadata about buffer");
 203     NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
 204                  "Mismatched channel count");
 205     if (mDuration > INT32_MAX) {
 206       return false;
 207     }
 208     for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 209       if (aOther.mChannelData[channel] !=
 210           AddAudioSampleOffset(mChannelData[channel], mBufferFormat,
 211                                int32_t(mDuration))) {
 212         return false;
 213       }
 214     }
 215     return true;
 216   }
 217   bool IsNull() const { return mBuffer == nullptr; }
 218   void SetNull(TrackTime aDuration) {
 219     mBuffer = nullptr;
 220     mChannelData.Clear();
 221     mDuration = aDuration;
 222     mVolume = 1.0f;
 223     mBufferFormat = AUDIO_FORMAT_SILENCE;
 224     mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 225   }
 226
 227   uint32_t ChannelCount() const { return mChannelData.Length(); }
 228
 229   bool IsMuted() const { return mVolume == 0.0f; }
 230
 231   size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {
 232     return SizeOfExcludingThis(aMallocSizeOf, true);
 233   }
 234
 235   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {
 236     size_t amount = 0;
 237
 238     // Possibly owned:
 239     // - mBuffer - Can hold data that is also in the decoded audio queue. If it
 240     //             is not shared, or unshared == false it gets counted.
 241     if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
 242       amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
 243     }
 244
 245     // Memory in the array is owned by mBuffer.
 246     amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
 247     return amount;
 248   }
 249
 250   template <typename T>
 251   Span<const T* const> ChannelData() const {
 252     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 253     return Span(reinterpret_cast<const T* const*>(mChannelData.Elements()),
 254                 mChannelData.Length());
 255   }
 256
 257   /**
 258    * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
 259    * by the calling thread.
 260    */
 261   template <typename T>
 262   T* ChannelDataForWrite(size_t aChannel) {
 263     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 264     MOZ_ASSERT(!mBuffer->IsShared());
 265     return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
 266   }
 267
 268   template <typename T>
 269   static AudioChunk FromInterleavedBuffer(
 270       const T* aBuffer, size_t aFrames, uint32_t aChannels,
 271       const PrincipalHandle& aPrincipalHandle) {
 272     CheckedInt<size_t> bufferSize(sizeof(T));
 273     bufferSize *= aFrames;
 274     bufferSize *= aChannels;
 275     RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
 276
 277     AutoTArray<T*, 8> deinterleaved;
 278     if (aChannels == 1) {
 279       PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames);
 280       deinterleaved.AppendElement(static_cast<T*>(buffer->Data()));
 281     } else {
 282       deinterleaved.SetLength(aChannels);
 283       T* samples = static_cast<T*>(buffer->Data());
 284
 285       size_t offset = 0;
 286       for (uint32_t i = 0; i < aChannels; ++i) {
 287         deinterleaved[i] = samples + offset;
 288         offset += aFrames;
 289       }
 290
 291       DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames),
 292                                    aChannels, deinterleaved.Elements());
 293     }
 294
 295     AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData;
 296     channelData.AppendElements(deinterleaved);
 297     return AudioChunk(buffer.forget(), channelData,
 298                       static_cast<TrackTime>(aFrames), aPrincipalHandle);
 299   }
 300
 301   const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
 302
 303   // aOutputChannels must contain pointers to channel data of length mDuration.
 304   void DownMixTo(Span<AudioDataValue* const> aOutputChannels) const;
 305
 306   TrackTime mDuration = 0;             // in frames within the buffer
 307   RefPtr<ThreadSharedObject> mBuffer;  // the buffer object whose lifetime is
 308                                        // managed; null means data is all zeroes
 309   // one pointer per channel; empty if and only if mBuffer is null
 310   CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;
 311   float mVolume = 1.0f;  // volume multiplier to apply
 312   // format of frames in mBuffer (or silence if mBuffer is null)
 313   SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
 314   // principalHandle for the data in this chunk.
 315   // This can be compared to an nsIPrincipal* when back on main thread.
 316   PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 317 };
 318
 319 /**
 320  * A list of audio samples consisting of a sequence of slices of SharedBuffers.
 321  * The audio rate is determined by the track, not stored in this class.
 322  */
 323 class AudioSegment final : public MediaSegmentBase<AudioSegment, AudioChunk> {
 324   // The channel count that MaxChannelCount() returned last time it was called.
 325   uint32_t mMemoizedMaxChannelCount = 0;
 326
 327  public:
 328   typedef mozilla::AudioSampleFormat SampleFormat;
 329
 330   AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
 331
 332   AudioSegment(AudioSegment&& aSegment) = default;
 333
 334   AudioSegment(const AudioSegment&) = delete;
 335   AudioSegment& operator=(const AudioSegment&) = delete;
 336
 337   ~AudioSegment() = default;
 338
 339   // Resample the whole segment in place.  `aResampler` is an instance of a
 340   // resampler, initialized with `aResamplerChannelCount` channels. If this
 341   // function finds a chunk with more channels, `aResampler` is destroyed and a
 342   // new resampler is created, and `aResamplerChannelCount` is updated with the
 343   // new channel count value.
 344   void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
 345                       uint32_t* aResamplerChannelCount, uint32_t aInRate,
 346                       uint32_t aOutRate);
 347
 348   template <typename T>
 349   void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
 350                     const nsTArray<const T*>& aChannelData, TrackTime aDuration,
 351                     const PrincipalHandle& aPrincipalHandle) {
 352     AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData,
 353                                      aDuration, aPrincipalHandle));
 354   }
 355   void AppendSegment(const AudioSegment* aSegment) {
 356     MOZ_ASSERT(aSegment);
 357
 358     for (const AudioChunk& c : aSegment->mChunks) {
 359       AudioChunk* chunk = AppendChunk(c.GetDuration());
 360       chunk->mBuffer = c.mBuffer;
 361       chunk->mChannelData = c.mChannelData;
 362       chunk->mBufferFormat = c.mBufferFormat;
 363       chunk->mPrincipalHandle = c.mPrincipalHandle;
 364     }
 365   }
 366   template <typename T>
 367   void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames,
 368                                    uint32_t aChannels,
 369                                    const PrincipalHandle& aPrincipalHandle) {
 370     AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>(
 371         aBuffer, aFrames, aChannels, aPrincipalHandle));
 372   }
 373   // Write the segement data into an interleaved buffer. Do mixing if the
 374   // AudioChunk's channel count in the segment is different from aChannels.
 375   // Returns sample count of the converted audio data. The converted data will
 376   // be stored into aBuffer.
 377   size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
 378                                   uint32_t aChannels) const;
 379   // Consumes aChunk, and append it to the segment if its duration is not zero.
 380   void AppendAndConsumeChunk(AudioChunk&& aChunk) {
 381     AudioChunk unused;
 382     AudioChunk* chunk = &unused;
 383
 384     // Always consume aChunk. The chunk's mBuffer can be non-null even if its
 385     // duration is 0.
 386     auto consume = MakeScopeExit([&] {
 387       chunk->mBuffer = std::move(aChunk.mBuffer);
 388       chunk->mChannelData = std::move(aChunk.mChannelData);
 389
 390       MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(),
 391                  "Appending invalid data ?");
 392
 393       chunk->mVolume = aChunk.mVolume;
 394       chunk->mBufferFormat = aChunk.mBufferFormat;
 395       chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle);
 396     });
 397
 398     if (aChunk.GetDuration() == 0) {
 399       return;
 400     }
 401
 402     if (!mChunks.IsEmpty() &&
 403         mChunks.LastElement().CanCombineWithFollowing(aChunk)) {
 404       mChunks.LastElement().mDuration += aChunk.GetDuration();
 405       mDuration += aChunk.GetDuration();
 406       return;
 407     }
 408
 409     chunk = AppendChunk(aChunk.mDuration);
 410   }
 411   void ApplyVolume(float aVolume);
 412   // Mix the segment into a mixer, keeping it planar, up or down mixing to
 413   // aChannelCount channels.
 414   void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
 415
 416   // Returns the maximum channel count across all chunks in this segment.
 417   // Should there be no chunk with a channel count we return the memoized return
 418   // value from last time this method was called.
 419   uint32_t MaxChannelCount() {
 420     uint32_t channelCount = 0;
 421     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 422       if (ci->ChannelCount()) {
 423         channelCount = std::max(channelCount, ci->ChannelCount());
 424       }
 425     }
 426     if (channelCount == 0) {
 427       return mMemoizedMaxChannelCount;
 428     }
 429     return mMemoizedMaxChannelCount = channelCount;
 430   }
 431
 432   static Type StaticType() { return AUDIO; }
 433
 434   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
 435     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
 436   }
 437
 438   PrincipalHandle GetOldestPrinciple() const {
 439     const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0];
 440     return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE;
 441   }
 442
 443   // Iterate on each chunks until the input function returns true.
 444   template <typename Function>
 445   void IterateOnChunks(const Function&& aFunction) {
 446     for (uint32_t idx = 0; idx < mChunks.Length(); idx++) {
 447       if (aFunction(&mChunks[idx])) {
 448         return;
 449       }
 450     }
 451   }
 452
 453  private:
 454   template <typename T>
 455   void Resample(nsAutoRef<SpeexResamplerState>& aResampler,
 456                 uint32_t* aResamplerChannelCount, uint32_t aInRate,
 457                 uint32_t aOutRate);
 458 };
 459
 460 template <typename SrcT>
 461 void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,
 462                 float aVolume, AudioDataValue* aOutputBuffer) {
 463   CopyableAutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
 464   channelData.AppendElements(aChunk.ChannelData<SrcT>());
 465
 466   if (channelData.Length() < aOutputChannels) {
 467     // Up-mix. Note that this might actually make channelData have more
 468     // than aOutputChannels temporarily.
 469     AudioChannelsUpMix(&channelData, aOutputChannels,
 470                        SilentChannel::ZeroChannel<SrcT>());
 471   }
 472   if (channelData.Length() > aOutputChannels) {
 473     // Down-mix.
 474     DownmixAndInterleave<SrcT>(channelData, aChunk.mDuration, aVolume,
 475                                aOutputChannels, aOutputBuffer);
 476   } else {
 477     InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,
 478                                aVolume, aOutputChannels, aOutputBuffer);
 479   }
 480 }
 481
 482 }  // namespace mozilla
 483
 484 #endif /* MOZILLA_AUDIOSEGMENT_H_ */