dom/media/AudioSegment.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef MOZILLA_AUDIOSEGMENT_H_
   7 #define MOZILLA_AUDIOSEGMENT_H_
   8
   9 #include "MediaSegment.h"
  10 #include "AudioSampleFormat.h"
  11 #include "AudioChannelFormat.h"
  12 #include "SharedBuffer.h"
  13 #include "WebAudioUtils.h"
  14 #ifdef MOZILLA_INTERNAL_API
  15 #include "mozilla/TimeStamp.h"
  16 #endif
  17 #include <float.h>
  18
  19 namespace mozilla {
  20   struct AudioChunk;
  21   class AudioSegment;
  22 }
  23 DECLARE_USE_COPY_CONSTRUCTORS(mozilla::AudioChunk)
  24
  25 /**
  26  * This allows compilation of nsTArray<AudioSegment> and
  27  * AutoTArray<AudioSegment> since without it, static analysis fails on the
  28  * mChunks member being a non-memmovable AutoTArray.
  29  *
  30  * Note that AudioSegment(const AudioSegment&) is deleted, so this should
  31  * never come into effect.
  32  */
  33 DECLARE_USE_COPY_CONSTRUCTORS(mozilla::AudioSegment)
  34
  35 namespace mozilla {
  36
  37 template<typename T>
  38 class SharedChannelArrayBuffer : public ThreadSharedObject {
  39 public:
  40   explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >* aBuffers)
  41   {
  42     mBuffers.SwapElements(*aBuffers);
  43   }
  44
  45   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
  46   {
  47     size_t amount = 0;
  48     amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
  49     for (size_t i = 0; i < mBuffers.Length(); i++) {
  50       amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
  51     }
  52
  53     return amount;
  54   }
  55
  56   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
  57   {
  58     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  59   }
  60
  61   nsTArray<nsTArray<T> > mBuffers;
  62 };
  63
  64 class AudioMixer;
  65
  66 /**
  67  * For auto-arrays etc, guess this as the common number of channels.
  68  */
  69 const int GUESS_AUDIO_CHANNELS = 2;
  70
  71 // We ensure that the graph advances in steps that are multiples of the Web
  72 // Audio block size
  73 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
  74 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
  75
  76 template <typename SrcT, typename DestT>
  77 static void
  78 InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
  79                            uint32_t aLength, float aVolume,
  80                            uint32_t aChannels,
  81                            DestT* aOutput)
  82 {
  83   DestT* output = aOutput;
  84   for (size_t i = 0; i < aLength; ++i) {
  85     for (size_t channel = 0; channel < aChannels; ++channel) {
  86       float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
  87       *output = FloatToAudioSample<DestT>(v);
  88       ++output;
  89     }
  90   }
  91 }
  92
  93 template <typename SrcT, typename DestT>
  94 static void
  95 DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
  96                              uint32_t aFrames, uint32_t aChannels,
  97                              DestT** aOutput)
  98 {
  99   for (size_t i = 0; i < aChannels; i++) {
 100     size_t interleavedIndex = i;
 101     for (size_t j = 0; j < aFrames; j++) {
 102       ConvertAudioSample(aSourceBuffer[interleavedIndex],
 103                          aOutput[i][j]);
 104       interleavedIndex += aChannels;
 105     }
 106   }
 107 }
 108
 109 class SilentChannel
 110 {
 111 public:
 112   static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
 113   static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES];
 114   // We take advantage of the fact that zero in float and zero in int have the
 115   // same all-zeros bit layout.
 116   template<typename T>
 117   static const T* ZeroChannel();
 118 };
 119
 120
 121 /**
 122  * Given an array of input channels (aChannelData), downmix to aOutputChannels,
 123  * interleave the channel data. A total of aOutputChannels*aDuration
 124  * interleaved samples will be copied to a channel buffer in aOutput.
 125  */
 126 template <typename SrcT, typename DestT>
 127 void
 128 DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
 129                      int32_t aDuration, float aVolume, uint32_t aOutputChannels,
 130                      DestT* aOutput)
 131 {
 132
 133   if (aChannelData.Length() == aOutputChannels) {
 134     InterleaveAndConvertBuffer(aChannelData.Elements(),
 135                                aDuration, aVolume, aOutputChannels, aOutput);
 136   } else {
 137     AutoTArray<SrcT*,GUESS_AUDIO_CHANNELS> outputChannelData;
 138     AutoTArray<SrcT, SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> outputBuffers;
 139     outputChannelData.SetLength(aOutputChannels);
 140     outputBuffers.SetLength(aDuration * aOutputChannels);
 141     for (uint32_t i = 0; i < aOutputChannels; i++) {
 142       outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
 143     }
 144     AudioChannelsDownMix(aChannelData,
 145                          outputChannelData.Elements(),
 146                          aOutputChannels,
 147                          aDuration);
 148     InterleaveAndConvertBuffer(outputChannelData.Elements(),
 149                                aDuration, aVolume, aOutputChannels, aOutput);
 150   }
 151 }
 152
 153 /**
 154  * An AudioChunk represents a multi-channel buffer of audio samples.
 155  * It references an underlying ThreadSharedObject which manages the lifetime
 156  * of the buffer. An AudioChunk maintains its own duration and channel data
 157  * pointers so it can represent a subinterval of a buffer without copying.
 158  * An AudioChunk can store its individual channels anywhere; it maintains
 159  * separate pointers to each channel's buffer.
 160  */
 161 struct AudioChunk {
 162   typedef mozilla::AudioSampleFormat SampleFormat;
 163
 164   // Generic methods
 165   void SliceTo(StreamTime aStart, StreamTime aEnd)
 166   {
 167     MOZ_ASSERT(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
 168                "Slice out of bounds");
 169     if (mBuffer) {
 170       MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths");
 171       for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 172         mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel],
 173             mBufferFormat, int32_t(aStart));
 174       }
 175     }
 176     mDuration = aEnd - aStart;
 177   }
 178   StreamTime GetDuration() const { return mDuration; }
 179   bool CanCombineWithFollowing(const AudioChunk& aOther) const
 180   {
 181     if (aOther.mBuffer != mBuffer) {
 182       return false;
 183     }
 184     if (!mBuffer) {
 185       return true;
 186     }
 187     if (aOther.mVolume != mVolume) {
 188       return false;
 189     }
 190     if (aOther.mPrincipalHandle != mPrincipalHandle) {
 191       return false;
 192     }
 193     NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
 194                  "Wrong metadata about buffer");
 195     NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
 196                  "Mismatched channel count");
 197     if (mDuration > INT32_MAX) {
 198       return false;
 199     }
 200     for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
 201       if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel],
 202           mBufferFormat, int32_t(mDuration))) {
 203         return false;
 204       }
 205     }
 206     return true;
 207   }
 208   bool IsNull() const {
 209     return mBuffer == nullptr;
 210   }
 211   void SetNull(StreamTime aDuration)
 212   {
 213     mBuffer = nullptr;
 214     mChannelData.Clear();
 215     mDuration = aDuration;
 216     mVolume = 1.0f;
 217     mBufferFormat = AUDIO_FORMAT_SILENCE;
 218     mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 219   }
 220
 221   size_t ChannelCount() const { return mChannelData.Length(); }
 222
 223   bool IsMuted() const { return mVolume == 0.0f; }
 224
 225   size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const
 226   {
 227     return SizeOfExcludingThis(aMallocSizeOf, true);
 228   }
 229
 230   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const
 231   {
 232     size_t amount = 0;
 233
 234     // Possibly owned:
 235     // - mBuffer - Can hold data that is also in the decoded audio queue. If it
 236     //             is not shared, or unshared == false it gets counted.
 237     if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
 238       amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
 239     }
 240
 241     // Memory in the array is owned by mBuffer.
 242     amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
 243     return amount;
 244   }
 245
 246   template<typename T>
 247   const nsTArray<const T*>& ChannelData() const
 248   {
 249     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 250     return *reinterpret_cast<const AutoTArray<const T*,GUESS_AUDIO_CHANNELS>*>
 251       (&mChannelData);
 252   }
 253
 254   /**
 255    * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
 256    * by the calling thread.
 257    */
 258   template<typename T>
 259   T* ChannelDataForWrite(size_t aChannel)
 260   {
 261     MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
 262     MOZ_ASSERT(!mBuffer->IsShared());
 263     return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
 264   }
 265
 266   const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
 267
 268   StreamTime mDuration = 0; // in frames within the buffer
 269   RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes
 270   // one pointer per channel; empty if and only if mBuffer is null
 271   AutoTArray<const void*,GUESS_AUDIO_CHANNELS> mChannelData;
 272   float mVolume = 1.0f; // volume multiplier to apply
 273   // format of frames in mBuffer (or silence if mBuffer is null)
 274   SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
 275 #ifdef MOZILLA_INTERNAL_API
 276   mozilla::TimeStamp mTimeStamp;           // time at which this has been fetched from the MediaEngine
 277 #endif
 278   // principalHandle for the data in this chunk.
 279   // This can be compared to an nsIPrincipal* when back on main thread.
 280   PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
 281 };
 282
 283 /**
 284  * A list of audio samples consisting of a sequence of slices of SharedBuffers.
 285  * The audio rate is determined by the track, not stored in this class.
 286  */
 287 class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
 288 public:
 289   typedef mozilla::AudioSampleFormat SampleFormat;
 290
 291   AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
 292
 293   AudioSegment(AudioSegment&& aSegment)
 294     : MediaSegmentBase<AudioSegment, AudioChunk>(Move(aSegment))
 295   {}
 296
 297   AudioSegment(const AudioSegment&)=delete;
 298   AudioSegment& operator= (const AudioSegment&)=delete;
 299
 300   ~AudioSegment() {}
 301
 302   // Resample the whole segment in place.
 303   template<typename T>
 304   void Resample(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
 305   {
 306     mDuration = 0;
 307 #ifdef DEBUG
 308     uint32_t segmentChannelCount = ChannelCount();
 309 #endif
 310
 311     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 312       AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
 313       AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
 314       AudioChunk& c = *ci;
 315       // If this chunk is null, don't bother resampling, just alter its duration
 316       if (c.IsNull()) {
 317         c.mDuration = (c.mDuration * aOutRate) / aInRate;
 318         mDuration += c.mDuration;
 319         continue;
 320       }
 321       uint32_t channels = c.mChannelData.Length();
 322       MOZ_ASSERT(channels == segmentChannelCount);
 323       output.SetLength(channels);
 324       bufferPtrs.SetLength(channels);
 325       uint32_t inFrames = c.mDuration;
 326       // Round up to allocate; the last frame may not be used.
 327       NS_ASSERTION((UINT32_MAX - aInRate + 1) / c.mDuration >= aOutRate,
 328                    "Dropping samples");
 329       uint32_t outSize = (c.mDuration * aOutRate + aInRate - 1) / aInRate;
 330       for (uint32_t i = 0; i < channels; i++) {
 331         T* out = output[i].AppendElements(outSize);
 332         uint32_t outFrames = outSize;
 333
 334         const T* in = static_cast<const T*>(c.mChannelData[i]);
 335         dom::WebAudioUtils::SpeexResamplerProcess(aResampler, i,
 336                                                   in, &inFrames,
 337                                                   out, &outFrames);
 338         MOZ_ASSERT(inFrames == c.mDuration);
 339
 340         bufferPtrs[i] = out;
 341         output[i].SetLength(outFrames);
 342       }
 343       MOZ_ASSERT(channels > 0);
 344       c.mDuration = output[0].Length();
 345       c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(&output);
 346       for (uint32_t i = 0; i < channels; i++) {
 347         c.mChannelData[i] = bufferPtrs[i];
 348       }
 349       mDuration += c.mDuration;
 350     }
 351   }
 352
 353   void ResampleChunks(SpeexResamplerState* aResampler,
 354                       uint32_t aInRate,
 355                       uint32_t aOutRate);
 356   void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
 357                     const nsTArray<const float*>& aChannelData,
 358                     int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
 359   {
 360     AudioChunk* chunk = AppendChunk(aDuration);
 361     chunk->mBuffer = aBuffer;
 362
 363     MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(), "Appending invalid data ?");
 364
 365     for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
 366       chunk->mChannelData.AppendElement(aChannelData[channel]);
 367     }
 368     chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
 369 #ifdef MOZILLA_INTERNAL_API
 370     chunk->mTimeStamp = TimeStamp::Now();
 371 #endif
 372     chunk->mPrincipalHandle = aPrincipalHandle;
 373   }
 374   void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
 375                     const nsTArray<const int16_t*>& aChannelData,
 376                     int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
 377   {
 378     AudioChunk* chunk = AppendChunk(aDuration);
 379     chunk->mBuffer = aBuffer;
 380
 381     MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(), "Appending invalid data ?");
 382
 383     for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
 384       chunk->mChannelData.AppendElement(aChannelData[channel]);
 385     }
 386     chunk->mBufferFormat = AUDIO_FORMAT_S16;
 387 #ifdef MOZILLA_INTERNAL_API
 388     chunk->mTimeStamp = TimeStamp::Now();
 389 #endif
 390     chunk->mPrincipalHandle = aPrincipalHandle;
 391
 392   }
 393   // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
 394   // in the segment.
 395   AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk)
 396   {
 397     AudioChunk* chunk = AppendChunk(aChunk->mDuration);
 398     chunk->mBuffer = aChunk->mBuffer.forget();
 399     chunk->mChannelData.SwapElements(aChunk->mChannelData);
 400
 401     MOZ_ASSERT(chunk->mBuffer || aChunk->mChannelData.IsEmpty(), "Appending invalid data ?");
 402
 403     chunk->mVolume = aChunk->mVolume;
 404     chunk->mBufferFormat = aChunk->mBufferFormat;
 405 #ifdef MOZILLA_INTERNAL_API
 406     chunk->mTimeStamp = TimeStamp::Now();
 407 #endif
 408     chunk->mPrincipalHandle = aChunk->mPrincipalHandle;
 409     return chunk;
 410   }
 411   void ApplyVolume(float aVolume);
 412   // Mix the segment into a mixer, interleaved. This is useful to output a
 413   // segment to a system audio callback. It up or down mixes to aChannelCount
 414   // channels.
 415   void WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aChannelCount,
 416                uint32_t aSampleRate);
 417   // Mix the segment into a mixer, keeping it planar, up or down mixing to
 418   // aChannelCount channels.
 419   void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
 420
 421   int ChannelCount() {
 422     NS_WARNING_ASSERTION(
 423       !mChunks.IsEmpty(),
 424       "Cannot query channel count on a AudioSegment with no chunks.");
 425     // Find the first chunk that has non-zero channels. A chunk that hs zero
 426     // channels is just silence and we can simply discard it.
 427     for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 428       if (ci->ChannelCount()) {
 429         return ci->ChannelCount();
 430       }
 431     }
 432     return 0;
 433   }
 434
 435   static Type StaticType() { return AUDIO; }
 436
 437   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
 438   {
 439     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
 440   }
 441 };
 442
 443 template<typename SrcT>
 444 void WriteChunk(AudioChunk& aChunk,
 445                 uint32_t aOutputChannels,
 446                 AudioDataValue* aOutputBuffer)
 447 {
 448   AutoTArray<const SrcT*,GUESS_AUDIO_CHANNELS> channelData;
 449
 450   channelData = aChunk.ChannelData<SrcT>();
 451
 452   if (channelData.Length() < aOutputChannels) {
 453     // Up-mix. Note that this might actually make channelData have more
 454     // than aOutputChannels temporarily.
 455     AudioChannelsUpMix(&channelData, aOutputChannels, SilentChannel::ZeroChannel<SrcT>());
 456   }
 457   if (channelData.Length() > aOutputChannels) {
 458     // Down-mix.
 459     DownmixAndInterleave(channelData, aChunk.mDuration,
 460         aChunk.mVolume, aOutputChannels, aOutputBuffer);
 461   } else {
 462     InterleaveAndConvertBuffer(channelData.Elements(),
 463         aChunk.mDuration, aChunk.mVolume,
 464         aOutputChannels,
 465         aOutputBuffer);
 466   }
 467 }
 468
 469
 470
 471 } // namespace mozilla
 472
 473 #endif /* MOZILLA_AUDIOSEGMENT_H_ */