Bug 1839170 - Refactor Snap pulling, Add Firefox Snap Core22 and GNOME 42 SDK symbols...
[gecko.git] / dom / media / AudioSegment.h
bloba13db6fec68709c8adb00b3749ca969fb796ec1d
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef MOZILLA_AUDIOSEGMENT_H_
7 #define MOZILLA_AUDIOSEGMENT_H_
9 #include <speex/speex_resampler.h>
10 #include "MediaTrackGraph.h"
11 #include "MediaSegment.h"
12 #include "AudioSampleFormat.h"
13 #include "AudioChannelFormat.h"
14 #include "SharedBuffer.h"
15 #include "WebAudioUtils.h"
16 #include "mozilla/ScopeExit.h"
17 #include "nsAutoRef.h"
18 #ifdef MOZILLA_INTERNAL_API
19 # include "mozilla/TimeStamp.h"
20 #endif
21 #include <float.h>
23 namespace mozilla {
24 struct AudioChunk;
25 class AudioSegment;
26 } // namespace mozilla
27 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)
29 /**
30 * This allows compilation of nsTArray<AudioSegment> and
31 * AutoTArray<AudioSegment> since without it, static analysis fails on the
32 * mChunks member being a non-memmovable AutoTArray.
34 * Note that AudioSegment(const AudioSegment&) is deleted, so this should
35 * never come into effect.
37 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)
39 namespace mozilla {
41 template <typename T>
42 class SharedChannelArrayBuffer : public ThreadSharedObject {
43 public:
44 explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)
45 : mBuffers(std::move(aBuffers)) {}
47 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
48 size_t amount = 0;
49 amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
50 for (size_t i = 0; i < mBuffers.Length(); i++) {
51 amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
54 return amount;
57 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
58 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
61 nsTArray<nsTArray<T> > mBuffers;
64 class AudioMixer;
66 /**
67 * For auto-arrays etc, guess this as the common number of channels.
69 const int GUESS_AUDIO_CHANNELS = 2;
71 // We ensure that the graph advances in steps that are multiples of the Web
72 // Audio block size
73 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
74 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
76 template <typename SrcT, typename DestT>
77 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
78 uint32_t aLength, float aVolume,
79 uint32_t aChannels, DestT* aOutput) {
80 DestT* output = aOutput;
81 for (size_t i = 0; i < aLength; ++i) {
82 for (size_t channel = 0; channel < aChannels; ++channel) {
83 float v = AudioSampleToFloat(aSourceChannels[channel][i]) * aVolume;
84 *output = FloatToAudioSample<DestT>(v);
85 ++output;
90 template <typename SrcT, typename DestT>
91 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
92 uint32_t aFrames, uint32_t aChannels,
93 DestT** aOutput) {
94 for (size_t i = 0; i < aChannels; i++) {
95 size_t interleavedIndex = i;
96 for (size_t j = 0; j < aFrames; j++) {
97 ConvertAudioSample(aSourceBuffer[interleavedIndex], aOutput[i][j]);
98 interleavedIndex += aChannels;
103 class SilentChannel {
104 public:
105 static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
106 static const uint8_t
107 gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];
108 // We take advantage of the fact that zero in float and zero in int have the
109 // same all-zeros bit layout.
110 template <typename T>
111 static const T* ZeroChannel();
115 * Given an array of input channels (aChannelData), downmix to aOutputChannels,
116 * interleave the channel data. A total of aOutputChannels*aDuration
117 * interleaved samples will be copied to a channel buffer in aOutput.
119 template <typename SrcT, typename DestT>
120 void DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
121 int32_t aDuration, float aVolume,
122 uint32_t aOutputChannels, DestT* aOutput) {
123 if (aChannelData.Length() == aOutputChannels) {
124 InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,
125 aOutputChannels, aOutput);
126 } else {
127 AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;
128 AutoTArray<SrcT,
129 SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
130 outputBuffers;
131 outputChannelData.SetLength(aOutputChannels);
132 outputBuffers.SetLength(aDuration * aOutputChannels);
133 for (uint32_t i = 0; i < aOutputChannels; i++) {
134 outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
136 AudioChannelsDownMix(aChannelData, outputChannelData.Elements(),
137 aOutputChannels, aDuration);
138 InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,
139 aOutputChannels, aOutput);
144 * An AudioChunk represents a multi-channel buffer of audio samples.
145 * It references an underlying ThreadSharedObject which manages the lifetime
146 * of the buffer. An AudioChunk maintains its own duration and channel data
147 * pointers so it can represent a subinterval of a buffer without copying.
148 * An AudioChunk can store its individual channels anywhere; it maintains
149 * separate pointers to each channel's buffer.
151 struct AudioChunk {
152 typedef mozilla::AudioSampleFormat SampleFormat;
154 AudioChunk() = default;
156 template <typename T>
157 AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer,
158 const nsTArray<const T*>& aChannelData, TrackTime aDuration,
159 PrincipalHandle aPrincipalHandle)
160 : mDuration(aDuration),
161 mBuffer(aBuffer),
162 mBufferFormat(AudioSampleTypeToFormat<T>::Format),
163 mPrincipalHandle(std::move(aPrincipalHandle)) {
164 MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?");
165 for (const T* data : aChannelData) {
166 mChannelData.AppendElement(data);
170 // Generic methods
171 void SliceTo(TrackTime aStart, TrackTime aEnd) {
172 MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start");
173 MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range");
174 MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end");
176 if (mBuffer) {
177 MOZ_ASSERT(aStart < INT32_MAX,
178 "Can't slice beyond 32-bit sample lengths");
179 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
180 mChannelData[channel] = AddAudioSampleOffset(
181 mChannelData[channel], mBufferFormat, int32_t(aStart));
184 mDuration = aEnd - aStart;
186 TrackTime GetDuration() const { return mDuration; }
187 bool CanCombineWithFollowing(const AudioChunk& aOther) const {
188 if (aOther.mBuffer != mBuffer) {
189 return false;
191 if (!mBuffer) {
192 return true;
194 if (aOther.mVolume != mVolume) {
195 return false;
197 if (aOther.mPrincipalHandle != mPrincipalHandle) {
198 return false;
200 NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
201 "Wrong metadata about buffer");
202 NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
203 "Mismatched channel count");
204 if (mDuration > INT32_MAX) {
205 return false;
207 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
208 if (aOther.mChannelData[channel] !=
209 AddAudioSampleOffset(mChannelData[channel], mBufferFormat,
210 int32_t(mDuration))) {
211 return false;
214 return true;
216 bool IsNull() const { return mBuffer == nullptr; }
217 void SetNull(TrackTime aDuration) {
218 mBuffer = nullptr;
219 mChannelData.Clear();
220 mDuration = aDuration;
221 mVolume = 1.0f;
222 mBufferFormat = AUDIO_FORMAT_SILENCE;
223 mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
226 uint32_t ChannelCount() const { return mChannelData.Length(); }
228 bool IsMuted() const { return mVolume == 0.0f; }
230 size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {
231 return SizeOfExcludingThis(aMallocSizeOf, true);
234 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {
235 size_t amount = 0;
237 // Possibly owned:
238 // - mBuffer - Can hold data that is also in the decoded audio queue. If it
239 // is not shared, or unshared == false it gets counted.
240 if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
241 amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
244 // Memory in the array is owned by mBuffer.
245 amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
246 return amount;
249 template <typename T>
250 const nsTArray<const T*>& ChannelData() const {
251 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
252 return *reinterpret_cast<const AutoTArray<const T*, GUESS_AUDIO_CHANNELS>*>(
253 &mChannelData);
257 * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
258 * by the calling thread.
260 template <typename T>
261 T* ChannelDataForWrite(size_t aChannel) {
262 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
263 MOZ_ASSERT(!mBuffer->IsShared());
264 return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
267 template <typename T>
268 static AudioChunk FromInterleavedBuffer(
269 const T* aBuffer, size_t aFrames, uint32_t aChannels,
270 const PrincipalHandle& aPrincipalHandle) {
271 CheckedInt<size_t> bufferSize(sizeof(T));
272 bufferSize *= aFrames;
273 bufferSize *= aChannels;
274 RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
276 AutoTArray<T*, 8> deinterleaved;
277 if (aChannels == 1) {
278 PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames);
279 deinterleaved.AppendElement(static_cast<T*>(buffer->Data()));
280 } else {
281 deinterleaved.SetLength(aChannels);
282 T* samples = static_cast<T*>(buffer->Data());
284 size_t offset = 0;
285 for (uint32_t i = 0; i < aChannels; ++i) {
286 deinterleaved[i] = samples + offset;
287 offset += aFrames;
290 DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames),
291 aChannels, deinterleaved.Elements());
294 AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData;
295 channelData.AppendElements(deinterleaved);
296 return AudioChunk(buffer.forget(), channelData,
297 static_cast<TrackTime>(aFrames), aPrincipalHandle);
300 const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
302 TrackTime mDuration = 0; // in frames within the buffer
303 RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is
304 // managed; null means data is all zeroes
305 // one pointer per channel; empty if and only if mBuffer is null
306 CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;
307 float mVolume = 1.0f; // volume multiplier to apply
308 // format of frames in mBuffer (or silence if mBuffer is null)
309 SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
310 // principalHandle for the data in this chunk.
311 // This can be compared to an nsIPrincipal* when back on main thread.
312 PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
316 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
317 * The audio rate is determined by the track, not stored in this class.
319 class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
320 // The channel count that MaxChannelCount() returned last time it was called.
321 uint32_t mMemoizedMaxChannelCount = 0;
323 public:
324 typedef mozilla::AudioSampleFormat SampleFormat;
326 AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
328 AudioSegment(AudioSegment&& aSegment) = default;
330 AudioSegment(const AudioSegment&) = delete;
331 AudioSegment& operator=(const AudioSegment&) = delete;
333 ~AudioSegment() = default;
335 // Resample the whole segment in place. `aResampler` is an instance of a
336 // resampler, initialized with `aResamplerChannelCount` channels. If this
337 // function finds a chunk with more channels, `aResampler` is destroyed and a
338 // new resampler is created, and `aResamplerChannelCount` is updated with the
339 // new channel count value.
340 template <typename T>
341 void Resample(nsAutoRef<SpeexResamplerState>& aResampler,
342 uint32_t* aResamplerChannelCount, uint32_t aInRate,
343 uint32_t aOutRate) {
344 mDuration = 0;
346 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
347 AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
348 AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
349 AudioChunk& c = *ci;
350 // If this chunk is null, don't bother resampling, just alter its duration
351 if (c.IsNull()) {
352 c.mDuration = (c.mDuration * aOutRate) / aInRate;
353 mDuration += c.mDuration;
354 continue;
356 uint32_t channels = c.mChannelData.Length();
357 // This might introduce a discontinuity, but a channel count change in the
358 // middle of a stream is not that common. This also initializes the
359 // resampler as late as possible.
360 if (channels != *aResamplerChannelCount) {
361 SpeexResamplerState* state =
362 speex_resampler_init(channels, aInRate, aOutRate,
363 SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
364 MOZ_ASSERT(state);
365 aResampler.own(state);
366 *aResamplerChannelCount = channels;
368 output.SetLength(channels);
369 bufferPtrs.SetLength(channels);
370 uint32_t inFrames = c.mDuration;
371 // Round up to allocate; the last frame may not be used.
372 NS_ASSERTION((UINT64_MAX - aInRate + 1) / c.mDuration >= aOutRate,
373 "Dropping samples");
374 uint32_t outSize =
375 (static_cast<uint64_t>(c.mDuration) * aOutRate + aInRate - 1) /
376 aInRate;
377 for (uint32_t i = 0; i < channels; i++) {
378 T* out = output[i].AppendElements(outSize);
379 uint32_t outFrames = outSize;
381 const T* in = static_cast<const T*>(c.mChannelData[i]);
382 dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in,
383 &inFrames, out, &outFrames);
384 MOZ_ASSERT(inFrames == c.mDuration);
386 bufferPtrs[i] = out;
387 output[i].SetLength(outFrames);
389 MOZ_ASSERT(channels > 0);
390 c.mDuration = output[0].Length();
391 c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output));
392 for (uint32_t i = 0; i < channels; i++) {
393 c.mChannelData[i] = bufferPtrs[i];
395 mDuration += c.mDuration;
399 void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
400 uint32_t* aResamplerChannelCount, uint32_t aInRate,
401 uint32_t aOutRate);
403 template <typename T>
404 void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
405 const nsTArray<const T*>& aChannelData, TrackTime aDuration,
406 const PrincipalHandle& aPrincipalHandle) {
407 AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData,
408 aDuration, aPrincipalHandle));
410 void AppendSegment(const AudioSegment* aSegment) {
411 MOZ_ASSERT(aSegment);
413 for (const AudioChunk& c : aSegment->mChunks) {
414 AudioChunk* chunk = AppendChunk(c.GetDuration());
415 chunk->mBuffer = c.mBuffer;
416 chunk->mChannelData = c.mChannelData;
417 chunk->mBufferFormat = c.mBufferFormat;
418 chunk->mPrincipalHandle = c.mPrincipalHandle;
421 template <typename T>
422 void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames,
423 uint32_t aChannels,
424 const PrincipalHandle& aPrincipalHandle) {
425 AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>(
426 aBuffer, aFrames, aChannels, aPrincipalHandle));
428 // Write the segement data into an interleaved buffer. Do mixing if the
429 // AudioChunk's channel count in the segment is different from aChannels.
430 // Returns sample count of the converted audio data. The converted data will
431 // be stored into aBuffer.
432 size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
433 uint32_t aChannels) const;
434 // Consumes aChunk, and append it to the segment if its duration is not zero.
435 void AppendAndConsumeChunk(AudioChunk&& aChunk) {
436 AudioChunk unused;
437 AudioChunk* chunk = &unused;
439 // Always consume aChunk. The chunk's mBuffer can be non-null even if its
440 // duration is 0.
441 auto consume = MakeScopeExit([&] {
442 chunk->mBuffer = std::move(aChunk.mBuffer);
443 chunk->mChannelData = std::move(aChunk.mChannelData);
445 MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(),
446 "Appending invalid data ?");
448 chunk->mVolume = aChunk.mVolume;
449 chunk->mBufferFormat = aChunk.mBufferFormat;
450 chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle);
453 if (aChunk.GetDuration() == 0) {
454 return;
457 if (!mChunks.IsEmpty() &&
458 mChunks.LastElement().CanCombineWithFollowing(aChunk)) {
459 mChunks.LastElement().mDuration += aChunk.GetDuration();
460 mDuration += aChunk.GetDuration();
461 return;
464 chunk = AppendChunk(aChunk.mDuration);
466 void ApplyVolume(float aVolume);
467 // Mix the segment into a mixer, interleaved. This is useful to output a
468 // segment to a system audio callback. It up or down mixes to aChannelCount
469 // channels.
470 void WriteTo(AudioMixer& aMixer, uint32_t aChannelCount,
471 uint32_t aSampleRate);
472 // Mix the segment into a mixer, keeping it planar, up or down mixing to
473 // aChannelCount channels.
474 void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
476 // Returns the maximum channel count across all chunks in this segment.
477 // Should there be no chunk with a channel count we return the memoized return
478 // value from last time this method was called.
479 uint32_t MaxChannelCount() {
480 uint32_t channelCount = 0;
481 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
482 if (ci->ChannelCount()) {
483 channelCount = std::max(channelCount, ci->ChannelCount());
486 if (channelCount == 0) {
487 return mMemoizedMaxChannelCount;
489 return mMemoizedMaxChannelCount = channelCount;
492 static Type StaticType() { return AUDIO; }
494 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
495 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
498 PrincipalHandle GetOldestPrinciple() const {
499 const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0];
500 return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE;
503 // Iterate on each chunks until the input function returns true.
504 template <typename Function>
505 void IterateOnChunks(const Function&& aFunction) {
506 for (uint32_t idx = 0; idx < mChunks.Length(); idx++) {
507 if (aFunction(&mChunks[idx])) {
508 return;
514 template <typename SrcT>
515 void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,
516 float aVolume, AudioDataValue* aOutputBuffer) {
517 AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
519 channelData = aChunk.ChannelData<SrcT>().Clone();
521 if (channelData.Length() < aOutputChannels) {
522 // Up-mix. Note that this might actually make channelData have more
523 // than aOutputChannels temporarily.
524 AudioChannelsUpMix(&channelData, aOutputChannels,
525 SilentChannel::ZeroChannel<SrcT>());
527 if (channelData.Length() > aOutputChannels) {
528 // Down-mix.
529 DownmixAndInterleave(channelData, aChunk.mDuration, aVolume,
530 aOutputChannels, aOutputBuffer);
531 } else {
532 InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,
533 aVolume, aOutputChannels, aOutputBuffer);
537 } // namespace mozilla
539 #endif /* MOZILLA_AUDIOSEGMENT_H_ */