Bug 1685822 [wpt PR 27117] - [Import Maps] Add tests for rejecting multiple import...
[gecko.git] / dom / media / AudioSegment.h
blobeba005453a41769144060ca1d9ba16cc61fb115b
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef MOZILLA_AUDIOSEGMENT_H_
7 #define MOZILLA_AUDIOSEGMENT_H_
9 #include <speex/speex_resampler.h>
10 #include "MediaTrackGraph.h"
11 #include "MediaSegment.h"
12 #include "AudioSampleFormat.h"
13 #include "AudioChannelFormat.h"
14 #include "SharedBuffer.h"
15 #include "WebAudioUtils.h"
16 #include "nsAutoRef.h"
17 #ifdef MOZILLA_INTERNAL_API
18 # include "mozilla/TimeStamp.h"
19 #endif
20 #include <float.h>
22 namespace mozilla {
23 struct AudioChunk;
24 class AudioSegment;
25 } // namespace mozilla
26 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)
28 /**
29 * This allows compilation of nsTArray<AudioSegment> and
30 * AutoTArray<AudioSegment> since without it, static analysis fails on the
31 * mChunks member being a non-memmovable AutoTArray.
33 * Note that AudioSegment(const AudioSegment&) is deleted, so this should
34 * never come into effect.
36 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)
38 namespace mozilla {
40 template <typename T>
41 class SharedChannelArrayBuffer : public ThreadSharedObject {
42 public:
43 explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)
44 : mBuffers(std::move(aBuffers)) {}
46 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
47 size_t amount = 0;
48 amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
49 for (size_t i = 0; i < mBuffers.Length(); i++) {
50 amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
53 return amount;
56 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
57 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
60 nsTArray<nsTArray<T> > mBuffers;
63 class AudioMixer;
65 /**
66 * For auto-arrays etc, guess this as the common number of channels.
68 const int GUESS_AUDIO_CHANNELS = 2;
70 // We ensure that the graph advances in steps that are multiples of the Web
71 // Audio block size
72 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
73 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
75 template <typename SrcT, typename DestT>
76 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
77 uint32_t aLength, float aVolume,
78 uint32_t aChannels, DestT* aOutput) {
79 DestT* output = aOutput;
80 for (size_t i = 0; i < aLength; ++i) {
81 for (size_t channel = 0; channel < aChannels; ++channel) {
82 float v = AudioSampleToFloat(aSourceChannels[channel][i]) * aVolume;
83 *output = FloatToAudioSample<DestT>(v);
84 ++output;
89 template <typename SrcT, typename DestT>
90 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
91 uint32_t aFrames, uint32_t aChannels,
92 DestT** aOutput) {
93 for (size_t i = 0; i < aChannels; i++) {
94 size_t interleavedIndex = i;
95 for (size_t j = 0; j < aFrames; j++) {
96 ConvertAudioSample(aSourceBuffer[interleavedIndex], aOutput[i][j]);
97 interleavedIndex += aChannels;
102 class SilentChannel {
103 public:
104 static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
105 static const uint8_t
106 gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];
107 // We take advantage of the fact that zero in float and zero in int have the
108 // same all-zeros bit layout.
109 template <typename T>
110 static const T* ZeroChannel();
114 * Given an array of input channels (aChannelData), downmix to aOutputChannels,
115 * interleave the channel data. A total of aOutputChannels*aDuration
116 * interleaved samples will be copied to a channel buffer in aOutput.
118 template <typename SrcT, typename DestT>
119 void DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
120 int32_t aDuration, float aVolume,
121 uint32_t aOutputChannels, DestT* aOutput) {
122 if (aChannelData.Length() == aOutputChannels) {
123 InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,
124 aOutputChannels, aOutput);
125 } else {
126 AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;
127 AutoTArray<SrcT,
128 SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
129 outputBuffers;
130 outputChannelData.SetLength(aOutputChannels);
131 outputBuffers.SetLength(aDuration * aOutputChannels);
132 for (uint32_t i = 0; i < aOutputChannels; i++) {
133 outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
135 AudioChannelsDownMix(aChannelData, outputChannelData.Elements(),
136 aOutputChannels, aDuration);
137 InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,
138 aOutputChannels, aOutput);
143 * An AudioChunk represents a multi-channel buffer of audio samples.
144 * It references an underlying ThreadSharedObject which manages the lifetime
145 * of the buffer. An AudioChunk maintains its own duration and channel data
146 * pointers so it can represent a subinterval of a buffer without copying.
147 * An AudioChunk can store its individual channels anywhere; it maintains
148 * separate pointers to each channel's buffer.
150 struct AudioChunk {
151 typedef mozilla::AudioSampleFormat SampleFormat;
153 // Generic methods
154 void SliceTo(TrackTime aStart, TrackTime aEnd) {
155 MOZ_ASSERT(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
156 "Slice out of bounds");
157 if (mBuffer) {
158 MOZ_ASSERT(aStart < INT32_MAX,
159 "Can't slice beyond 32-bit sample lengths");
160 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
161 mChannelData[channel] = AddAudioSampleOffset(
162 mChannelData[channel], mBufferFormat, int32_t(aStart));
165 mDuration = aEnd - aStart;
167 TrackTime GetDuration() const { return mDuration; }
168 bool CanCombineWithFollowing(const AudioChunk& aOther) const {
169 if (aOther.mBuffer != mBuffer) {
170 return false;
172 if (!mBuffer) {
173 return true;
175 if (aOther.mVolume != mVolume) {
176 return false;
178 if (aOther.mPrincipalHandle != mPrincipalHandle) {
179 return false;
181 NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
182 "Wrong metadata about buffer");
183 NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
184 "Mismatched channel count");
185 if (mDuration > INT32_MAX) {
186 return false;
188 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
189 if (aOther.mChannelData[channel] !=
190 AddAudioSampleOffset(mChannelData[channel], mBufferFormat,
191 int32_t(mDuration))) {
192 return false;
195 return true;
197 bool IsNull() const { return mBuffer == nullptr; }
198 void SetNull(TrackTime aDuration) {
199 mBuffer = nullptr;
200 mChannelData.Clear();
201 mDuration = aDuration;
202 mVolume = 1.0f;
203 mBufferFormat = AUDIO_FORMAT_SILENCE;
204 mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
207 uint32_t ChannelCount() const { return mChannelData.Length(); }
209 bool IsMuted() const { return mVolume == 0.0f; }
211 size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {
212 return SizeOfExcludingThis(aMallocSizeOf, true);
215 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {
216 size_t amount = 0;
218 // Possibly owned:
219 // - mBuffer - Can hold data that is also in the decoded audio queue. If it
220 // is not shared, or unshared == false it gets counted.
221 if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
222 amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
225 // Memory in the array is owned by mBuffer.
226 amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
227 return amount;
230 template <typename T>
231 const nsTArray<const T*>& ChannelData() const {
232 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
233 return *reinterpret_cast<const AutoTArray<const T*, GUESS_AUDIO_CHANNELS>*>(
234 &mChannelData);
238 * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
239 * by the calling thread.
241 template <typename T>
242 T* ChannelDataForWrite(size_t aChannel) {
243 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
244 MOZ_ASSERT(!mBuffer->IsShared());
245 return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
248 const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
250 TrackTime mDuration = 0; // in frames within the buffer
251 RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is
252 // managed; null means data is all zeroes
253 // one pointer per channel; empty if and only if mBuffer is null
254 CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;
255 float mVolume = 1.0f; // volume multiplier to apply
256 // format of frames in mBuffer (or silence if mBuffer is null)
257 SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
258 // principalHandle for the data in this chunk.
259 // This can be compared to an nsIPrincipal* when back on main thread.
260 PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
264 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
265 * The audio rate is determined by the track, not stored in this class.
267 class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
268 // The channel count that MaxChannelCount() returned last time it was called.
269 uint32_t mMemoizedMaxChannelCount = 0;
271 public:
272 typedef mozilla::AudioSampleFormat SampleFormat;
274 AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
276 AudioSegment(AudioSegment&& aSegment) = default;
278 AudioSegment(const AudioSegment&) = delete;
279 AudioSegment& operator=(const AudioSegment&) = delete;
281 ~AudioSegment() = default;
283 // Resample the whole segment in place. `aResampler` is an instance of a
284 // resampler, initialized with `aResamplerChannelCount` channels. If this
285 // function finds a chunk with more channels, `aResampler` is destroyed and a
286 // new resampler is created, and `aResamplerChannelCount` is updated with the
287 // new channel count value.
288 template <typename T>
289 void Resample(nsAutoRef<SpeexResamplerState>& aResampler,
290 uint32_t* aResamplerChannelCount, uint32_t aInRate,
291 uint32_t aOutRate) {
292 mDuration = 0;
294 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
295 AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
296 AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
297 AudioChunk& c = *ci;
298 // If this chunk is null, don't bother resampling, just alter its duration
299 if (c.IsNull()) {
300 c.mDuration = (c.mDuration * aOutRate) / aInRate;
301 mDuration += c.mDuration;
302 continue;
304 uint32_t channels = c.mChannelData.Length();
305 // This might introduce a discontinuity, but a channel count change in the
306 // middle of a stream is not that common. This also initializes the
307 // resampler as late as possible.
308 if (channels != *aResamplerChannelCount) {
309 SpeexResamplerState* state =
310 speex_resampler_init(channels, aInRate, aOutRate,
311 SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
312 MOZ_ASSERT(state);
313 aResampler.own(state);
314 *aResamplerChannelCount = channels;
316 output.SetLength(channels);
317 bufferPtrs.SetLength(channels);
318 uint32_t inFrames = c.mDuration;
319 // Round up to allocate; the last frame may not be used.
320 NS_ASSERTION((UINT32_MAX - aInRate + 1) / c.mDuration >= aOutRate,
321 "Dropping samples");
322 uint32_t outSize = (c.mDuration * aOutRate + aInRate - 1) / aInRate;
323 for (uint32_t i = 0; i < channels; i++) {
324 T* out = output[i].AppendElements(outSize);
325 uint32_t outFrames = outSize;
327 const T* in = static_cast<const T*>(c.mChannelData[i]);
328 dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in,
329 &inFrames, out, &outFrames);
330 MOZ_ASSERT(inFrames == c.mDuration);
332 bufferPtrs[i] = out;
333 output[i].SetLength(outFrames);
335 MOZ_ASSERT(channels > 0);
336 c.mDuration = output[0].Length();
337 c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output));
338 for (uint32_t i = 0; i < channels; i++) {
339 c.mChannelData[i] = bufferPtrs[i];
341 mDuration += c.mDuration;
345 void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
346 uint32_t* aResamplerChannelCount, uint32_t aInRate,
347 uint32_t aOutRate);
348 void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
349 const nsTArray<const float*>& aChannelData,
350 int32_t aDuration,
351 const PrincipalHandle& aPrincipalHandle) {
352 AudioChunk* chunk = AppendChunk(aDuration);
353 chunk->mBuffer = aBuffer;
355 MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(),
356 "Appending invalid data ?");
358 for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
359 chunk->mChannelData.AppendElement(aChannelData[channel]);
361 chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
362 chunk->mPrincipalHandle = aPrincipalHandle;
364 void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
365 const nsTArray<const int16_t*>& aChannelData,
366 int32_t aDuration,
367 const PrincipalHandle& aPrincipalHandle) {
368 AudioChunk* chunk = AppendChunk(aDuration);
369 chunk->mBuffer = aBuffer;
371 MOZ_ASSERT(chunk->mBuffer || aChannelData.IsEmpty(),
372 "Appending invalid data ?");
374 for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
375 chunk->mChannelData.AppendElement(aChannelData[channel]);
377 chunk->mBufferFormat = AUDIO_FORMAT_S16;
378 chunk->mPrincipalHandle = aPrincipalHandle;
380 // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
381 // in the segment.
382 AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk) {
383 AudioChunk* chunk = AppendChunk(aChunk->mDuration);
384 chunk->mBuffer = std::move(aChunk->mBuffer);
385 chunk->mChannelData = std::move(aChunk->mChannelData);
387 MOZ_ASSERT(chunk->mBuffer || aChunk->mChannelData.IsEmpty(),
388 "Appending invalid data ?");
390 chunk->mVolume = aChunk->mVolume;
391 chunk->mBufferFormat = aChunk->mBufferFormat;
392 chunk->mPrincipalHandle = aChunk->mPrincipalHandle;
393 return chunk;
395 void ApplyVolume(float aVolume);
396 // Mix the segment into a mixer, interleaved. This is useful to output a
397 // segment to a system audio callback. It up or down mixes to aChannelCount
398 // channels.
399 void WriteTo(AudioMixer& aMixer, uint32_t aChannelCount,
400 uint32_t aSampleRate);
401 // Mix the segment into a mixer, keeping it planar, up or down mixing to
402 // aChannelCount channels.
403 void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
405 // Returns the maximum channel count across all chunks in this segment.
406 // Should there be no chunk with a channel count we return the memoized return
407 // value from last time this method was called.
408 uint32_t MaxChannelCount() {
409 uint32_t channelCount = 0;
410 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
411 if (ci->ChannelCount()) {
412 channelCount = std::max(channelCount, ci->ChannelCount());
415 if (channelCount == 0) {
416 return mMemoizedMaxChannelCount;
418 return mMemoizedMaxChannelCount = channelCount;
421 static Type StaticType() { return AUDIO; }
423 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
424 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
428 template <typename SrcT>
429 void WriteChunk(AudioChunk& aChunk, uint32_t aOutputChannels,
430 AudioDataValue* aOutputBuffer) {
431 AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
433 channelData = aChunk.ChannelData<SrcT>().Clone();
435 if (channelData.Length() < aOutputChannels) {
436 // Up-mix. Note that this might actually make channelData have more
437 // than aOutputChannels temporarily.
438 AudioChannelsUpMix(&channelData, aOutputChannels,
439 SilentChannel::ZeroChannel<SrcT>());
441 if (channelData.Length() > aOutputChannels) {
442 // Down-mix.
443 DownmixAndInterleave(channelData, aChunk.mDuration, aChunk.mVolume,
444 aOutputChannels, aOutputBuffer);
445 } else {
446 InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,
447 aChunk.mVolume, aOutputChannels, aOutputBuffer);
451 } // namespace mozilla
453 #endif /* MOZILLA_AUDIOSEGMENT_H_ */