1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef MOZILLA_AUDIOSEGMENT_H_
7 #define MOZILLA_AUDIOSEGMENT_H_
9 #include <speex/speex_resampler.h>
10 #include "MediaTrackGraph.h"
11 #include "MediaSegment.h"
12 #include "AudioSampleFormat.h"
13 #include "AudioChannelFormat.h"
14 #include "SharedBuffer.h"
15 #include "WebAudioUtils.h"
16 #include "mozilla/ScopeExit.h"
17 #include "nsAutoRef.h"
18 #ifdef MOZILLA_INTERNAL_API
19 # include "mozilla/TimeStamp.h"
26 } // namespace mozilla
27 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk
)
30 * This allows compilation of nsTArray<AudioSegment> and
31 * AutoTArray<AudioSegment> since without it, static analysis fails on the
32 * mChunks member being a non-memmovable AutoTArray.
34 * Note that AudioSegment(const AudioSegment&) is deleted, so this should
35 * never come into effect.
37 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment
)
42 class SharedChannelArrayBuffer
: public ThreadSharedObject
{
44 explicit SharedChannelArrayBuffer(nsTArray
<nsTArray
<T
> >&& aBuffers
)
45 : mBuffers(std::move(aBuffers
)) {}
47 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf
) const override
{
49 amount
+= mBuffers
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
50 for (size_t i
= 0; i
< mBuffers
.Length(); i
++) {
51 amount
+= mBuffers
[i
].ShallowSizeOfExcludingThis(aMallocSizeOf
);
57 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf
) const override
{
58 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
61 nsTArray
<nsTArray
<T
> > mBuffers
;
67 * For auto-arrays etc, guess this as the common number of channels.
69 const int GUESS_AUDIO_CHANNELS
= 2;
71 // We ensure that the graph advances in steps that are multiples of the Web
73 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS
= 7;
74 const uint32_t WEBAUDIO_BLOCK_SIZE
= 1 << WEBAUDIO_BLOCK_SIZE_BITS
;
76 template <typename SrcT
, typename DestT
>
77 static void InterleaveAndConvertBuffer(const SrcT
* const* aSourceChannels
,
78 uint32_t aLength
, float aVolume
,
79 uint32_t aChannels
, DestT
* aOutput
) {
80 DestT
* output
= aOutput
;
81 for (size_t i
= 0; i
< aLength
; ++i
) {
82 for (size_t channel
= 0; channel
< aChannels
; ++channel
) {
83 float v
= AudioSampleToFloat(aSourceChannels
[channel
][i
]) * aVolume
;
84 *output
= FloatToAudioSample
<DestT
>(v
);
90 template <typename SrcT
, typename DestT
>
91 static void DeinterleaveAndConvertBuffer(const SrcT
* aSourceBuffer
,
92 uint32_t aFrames
, uint32_t aChannels
,
94 for (size_t i
= 0; i
< aChannels
; i
++) {
95 size_t interleavedIndex
= i
;
96 for (size_t j
= 0; j
< aFrames
; j
++) {
97 ConvertAudioSample(aSourceBuffer
[interleavedIndex
], aOutput
[i
][j
]);
98 interleavedIndex
+= aChannels
;
103 class SilentChannel
{
105 static const int AUDIO_PROCESSING_FRAMES
= 640; /* > 10ms of 48KHz audio */
107 gZeroChannel
[MAX_AUDIO_SAMPLE_SIZE
* AUDIO_PROCESSING_FRAMES
];
108 // We take advantage of the fact that zero in float and zero in int have the
109 // same all-zeros bit layout.
110 template <typename T
>
111 static const T
* ZeroChannel();
115 * Given an array of input channels (aChannelData), downmix to aOutputChannels,
116 * interleave the channel data. A total of aOutputChannels*aDuration
117 * interleaved samples will be copied to a channel buffer in aOutput.
119 template <typename SrcT
, typename DestT
>
120 void DownmixAndInterleave(const nsTArray
<const SrcT
*>& aChannelData
,
121 int32_t aDuration
, float aVolume
,
122 uint32_t aOutputChannels
, DestT
* aOutput
) {
123 if (aChannelData
.Length() == aOutputChannels
) {
124 InterleaveAndConvertBuffer(aChannelData
.Elements(), aDuration
, aVolume
,
125 aOutputChannels
, aOutput
);
127 AutoTArray
<SrcT
*, GUESS_AUDIO_CHANNELS
> outputChannelData
;
129 SilentChannel::AUDIO_PROCESSING_FRAMES
* GUESS_AUDIO_CHANNELS
>
131 outputChannelData
.SetLength(aOutputChannels
);
132 outputBuffers
.SetLength(aDuration
* aOutputChannels
);
133 for (uint32_t i
= 0; i
< aOutputChannels
; i
++) {
134 outputChannelData
[i
] = outputBuffers
.Elements() + aDuration
* i
;
136 AudioChannelsDownMix(aChannelData
, outputChannelData
.Elements(),
137 aOutputChannels
, aDuration
);
138 InterleaveAndConvertBuffer(outputChannelData
.Elements(), aDuration
, aVolume
,
139 aOutputChannels
, aOutput
);
144 * An AudioChunk represents a multi-channel buffer of audio samples.
145 * It references an underlying ThreadSharedObject which manages the lifetime
146 * of the buffer. An AudioChunk maintains its own duration and channel data
147 * pointers so it can represent a subinterval of a buffer without copying.
148 * An AudioChunk can store its individual channels anywhere; it maintains
149 * separate pointers to each channel's buffer.
152 typedef mozilla::AudioSampleFormat SampleFormat
;
154 AudioChunk() = default;
156 template <typename T
>
157 AudioChunk(already_AddRefed
<ThreadSharedObject
> aBuffer
,
158 const nsTArray
<const T
*>& aChannelData
, TrackTime aDuration
,
159 PrincipalHandle aPrincipalHandle
)
160 : mDuration(aDuration
),
162 mBufferFormat(AudioSampleTypeToFormat
<T
>::Format
),
163 mPrincipalHandle(std::move(aPrincipalHandle
)) {
164 MOZ_ASSERT(!mBuffer
== aChannelData
.IsEmpty(), "Appending invalid data ?");
165 for (const T
* data
: aChannelData
) {
166 mChannelData
.AppendElement(data
);
171 void SliceTo(TrackTime aStart
, TrackTime aEnd
) {
172 MOZ_ASSERT(aStart
>= 0, "Slice out of bounds: invalid start");
173 MOZ_ASSERT(aStart
< aEnd
, "Slice out of bounds: invalid range");
174 MOZ_ASSERT(aEnd
<= mDuration
, "Slice out of bounds: invalid end");
177 MOZ_ASSERT(aStart
< INT32_MAX
,
178 "Can't slice beyond 32-bit sample lengths");
179 for (uint32_t channel
= 0; channel
< mChannelData
.Length(); ++channel
) {
180 mChannelData
[channel
] = AddAudioSampleOffset(
181 mChannelData
[channel
], mBufferFormat
, int32_t(aStart
));
184 mDuration
= aEnd
- aStart
;
186 TrackTime
GetDuration() const { return mDuration
; }
187 bool CanCombineWithFollowing(const AudioChunk
& aOther
) const {
188 if (aOther
.mBuffer
!= mBuffer
) {
194 if (aOther
.mVolume
!= mVolume
) {
197 if (aOther
.mPrincipalHandle
!= mPrincipalHandle
) {
200 NS_ASSERTION(aOther
.mBufferFormat
== mBufferFormat
,
201 "Wrong metadata about buffer");
202 NS_ASSERTION(aOther
.mChannelData
.Length() == mChannelData
.Length(),
203 "Mismatched channel count");
204 if (mDuration
> INT32_MAX
) {
207 for (uint32_t channel
= 0; channel
< mChannelData
.Length(); ++channel
) {
208 if (aOther
.mChannelData
[channel
] !=
209 AddAudioSampleOffset(mChannelData
[channel
], mBufferFormat
,
210 int32_t(mDuration
))) {
216 bool IsNull() const { return mBuffer
== nullptr; }
217 void SetNull(TrackTime aDuration
) {
219 mChannelData
.Clear();
220 mDuration
= aDuration
;
222 mBufferFormat
= AUDIO_FORMAT_SILENCE
;
223 mPrincipalHandle
= PRINCIPAL_HANDLE_NONE
;
226 uint32_t ChannelCount() const { return mChannelData
.Length(); }
228 bool IsMuted() const { return mVolume
== 0.0f
; }
230 size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf
) const {
231 return SizeOfExcludingThis(aMallocSizeOf
, true);
234 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf
, bool aUnshared
) const {
238 // - mBuffer - Can hold data that is also in the decoded audio queue. If it
239 // is not shared, or unshared == false it gets counted.
240 if (mBuffer
&& (!aUnshared
|| !mBuffer
->IsShared())) {
241 amount
+= mBuffer
->SizeOfIncludingThis(aMallocSizeOf
);
244 // Memory in the array is owned by mBuffer.
245 amount
+= mChannelData
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
249 template <typename T
>
250 const nsTArray
<const T
*>& ChannelData() const {
251 MOZ_ASSERT(AudioSampleTypeToFormat
<T
>::Format
== mBufferFormat
);
252 return *reinterpret_cast<const AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
>*>(
257 * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
258 * by the calling thread.
260 template <typename T
>
261 T
* ChannelDataForWrite(size_t aChannel
) {
262 MOZ_ASSERT(AudioSampleTypeToFormat
<T
>::Format
== mBufferFormat
);
263 MOZ_ASSERT(!mBuffer
->IsShared());
264 return static_cast<T
*>(const_cast<void*>(mChannelData
[aChannel
]));
267 template <typename T
>
268 static AudioChunk
FromInterleavedBuffer(
269 const T
* aBuffer
, size_t aFrames
, uint32_t aChannels
,
270 const PrincipalHandle
& aPrincipalHandle
) {
271 CheckedInt
<size_t> bufferSize(sizeof(T
));
272 bufferSize
*= aFrames
;
273 bufferSize
*= aChannels
;
274 RefPtr
<SharedBuffer
> buffer
= SharedBuffer::Create(bufferSize
);
276 AutoTArray
<T
*, 8> deinterleaved
;
277 if (aChannels
== 1) {
278 PodCopy(static_cast<T
*>(buffer
->Data()), aBuffer
, aFrames
);
279 deinterleaved
.AppendElement(static_cast<T
*>(buffer
->Data()));
281 deinterleaved
.SetLength(aChannels
);
282 T
* samples
= static_cast<T
*>(buffer
->Data());
285 for (uint32_t i
= 0; i
< aChannels
; ++i
) {
286 deinterleaved
[i
] = samples
+ offset
;
290 DeinterleaveAndConvertBuffer(aBuffer
, static_cast<uint32_t>(aFrames
),
291 aChannels
, deinterleaved
.Elements());
294 AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
> channelData
;
295 channelData
.AppendElements(deinterleaved
);
296 return AudioChunk(buffer
.forget(), channelData
,
297 static_cast<TrackTime
>(aFrames
), aPrincipalHandle
);
300 const PrincipalHandle
& GetPrincipalHandle() const { return mPrincipalHandle
; }
302 TrackTime mDuration
= 0; // in frames within the buffer
303 RefPtr
<ThreadSharedObject
> mBuffer
; // the buffer object whose lifetime is
304 // managed; null means data is all zeroes
305 // one pointer per channel; empty if and only if mBuffer is null
306 CopyableAutoTArray
<const void*, GUESS_AUDIO_CHANNELS
> mChannelData
;
307 float mVolume
= 1.0f
; // volume multiplier to apply
308 // format of frames in mBuffer (or silence if mBuffer is null)
309 SampleFormat mBufferFormat
= AUDIO_FORMAT_SILENCE
;
310 // principalHandle for the data in this chunk.
311 // This can be compared to an nsIPrincipal* when back on main thread.
312 PrincipalHandle mPrincipalHandle
= PRINCIPAL_HANDLE_NONE
;
316 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
317 * The audio rate is determined by the track, not stored in this class.
319 class AudioSegment
: public MediaSegmentBase
<AudioSegment
, AudioChunk
> {
320 // The channel count that MaxChannelCount() returned last time it was called.
321 uint32_t mMemoizedMaxChannelCount
= 0;
324 typedef mozilla::AudioSampleFormat SampleFormat
;
326 AudioSegment() : MediaSegmentBase
<AudioSegment
, AudioChunk
>(AUDIO
) {}
328 AudioSegment(AudioSegment
&& aSegment
) = default;
330 AudioSegment(const AudioSegment
&) = delete;
331 AudioSegment
& operator=(const AudioSegment
&) = delete;
333 ~AudioSegment() = default;
335 // Resample the whole segment in place. `aResampler` is an instance of a
336 // resampler, initialized with `aResamplerChannelCount` channels. If this
337 // function finds a chunk with more channels, `aResampler` is destroyed and a
338 // new resampler is created, and `aResamplerChannelCount` is updated with the
339 // new channel count value.
340 template <typename T
>
341 void Resample(nsAutoRef
<SpeexResamplerState
>& aResampler
,
342 uint32_t* aResamplerChannelCount
, uint32_t aInRate
,
346 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
347 AutoTArray
<nsTArray
<T
>, GUESS_AUDIO_CHANNELS
> output
;
348 AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
> bufferPtrs
;
350 // If this chunk is null, don't bother resampling, just alter its duration
352 c
.mDuration
= (c
.mDuration
* aOutRate
) / aInRate
;
353 mDuration
+= c
.mDuration
;
356 uint32_t channels
= c
.mChannelData
.Length();
357 // This might introduce a discontinuity, but a channel count change in the
358 // middle of a stream is not that common. This also initializes the
359 // resampler as late as possible.
360 if (channels
!= *aResamplerChannelCount
) {
361 SpeexResamplerState
* state
=
362 speex_resampler_init(channels
, aInRate
, aOutRate
,
363 SPEEX_RESAMPLER_QUALITY_DEFAULT
, nullptr);
365 aResampler
.own(state
);
366 *aResamplerChannelCount
= channels
;
368 output
.SetLength(channels
);
369 bufferPtrs
.SetLength(channels
);
370 uint32_t inFrames
= c
.mDuration
;
371 // Round up to allocate; the last frame may not be used.
372 NS_ASSERTION((UINT64_MAX
- aInRate
+ 1) / c
.mDuration
>= aOutRate
,
375 (static_cast<uint64_t>(c
.mDuration
) * aOutRate
+ aInRate
- 1) /
377 for (uint32_t i
= 0; i
< channels
; i
++) {
378 T
* out
= output
[i
].AppendElements(outSize
);
379 uint32_t outFrames
= outSize
;
381 const T
* in
= static_cast<const T
*>(c
.mChannelData
[i
]);
382 dom::WebAudioUtils::SpeexResamplerProcess(aResampler
.get(), i
, in
,
383 &inFrames
, out
, &outFrames
);
384 MOZ_ASSERT(inFrames
== c
.mDuration
);
387 output
[i
].SetLength(outFrames
);
389 MOZ_ASSERT(channels
> 0);
390 c
.mDuration
= output
[0].Length();
391 c
.mBuffer
= new mozilla::SharedChannelArrayBuffer
<T
>(std::move(output
));
392 for (uint32_t i
= 0; i
< channels
; i
++) {
393 c
.mChannelData
[i
] = bufferPtrs
[i
];
395 mDuration
+= c
.mDuration
;
399 void ResampleChunks(nsAutoRef
<SpeexResamplerState
>& aResampler
,
400 uint32_t* aResamplerChannelCount
, uint32_t aInRate
,
403 template <typename T
>
404 void AppendFrames(already_AddRefed
<ThreadSharedObject
> aBuffer
,
405 const nsTArray
<const T
*>& aChannelData
, TrackTime aDuration
,
406 const PrincipalHandle
& aPrincipalHandle
) {
407 AppendAndConsumeChunk(AudioChunk(std::move(aBuffer
), aChannelData
,
408 aDuration
, aPrincipalHandle
));
410 void AppendSegment(const AudioSegment
* aSegment
) {
411 MOZ_ASSERT(aSegment
);
413 for (const AudioChunk
& c
: aSegment
->mChunks
) {
414 AudioChunk
* chunk
= AppendChunk(c
.GetDuration());
415 chunk
->mBuffer
= c
.mBuffer
;
416 chunk
->mChannelData
= c
.mChannelData
;
417 chunk
->mBufferFormat
= c
.mBufferFormat
;
418 chunk
->mPrincipalHandle
= c
.mPrincipalHandle
;
421 template <typename T
>
422 void AppendFromInterleavedBuffer(const T
* aBuffer
, size_t aFrames
,
424 const PrincipalHandle
& aPrincipalHandle
) {
425 AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer
<T
>(
426 aBuffer
, aFrames
, aChannels
, aPrincipalHandle
));
428 // Write the segement data into an interleaved buffer. Do mixing if the
429 // AudioChunk's channel count in the segment is different from aChannels.
430 // Returns sample count of the converted audio data. The converted data will
431 // be stored into aBuffer.
432 size_t WriteToInterleavedBuffer(nsTArray
<AudioDataValue
>& aBuffer
,
433 uint32_t aChannels
) const;
434 // Consumes aChunk, and append it to the segment if its duration is not zero.
435 void AppendAndConsumeChunk(AudioChunk
&& aChunk
) {
437 AudioChunk
* chunk
= &unused
;
439 // Always consume aChunk. The chunk's mBuffer can be non-null even if its
441 auto consume
= MakeScopeExit([&] {
442 chunk
->mBuffer
= std::move(aChunk
.mBuffer
);
443 chunk
->mChannelData
= std::move(aChunk
.mChannelData
);
445 MOZ_ASSERT(chunk
->mBuffer
|| chunk
->mChannelData
.IsEmpty(),
446 "Appending invalid data ?");
448 chunk
->mVolume
= aChunk
.mVolume
;
449 chunk
->mBufferFormat
= aChunk
.mBufferFormat
;
450 chunk
->mPrincipalHandle
= std::move(aChunk
.mPrincipalHandle
);
453 if (aChunk
.GetDuration() == 0) {
457 if (!mChunks
.IsEmpty() &&
458 mChunks
.LastElement().CanCombineWithFollowing(aChunk
)) {
459 mChunks
.LastElement().mDuration
+= aChunk
.GetDuration();
460 mDuration
+= aChunk
.GetDuration();
464 chunk
= AppendChunk(aChunk
.mDuration
);
466 void ApplyVolume(float aVolume
);
467 // Mix the segment into a mixer, interleaved. This is useful to output a
468 // segment to a system audio callback. It up or down mixes to aChannelCount
470 void WriteTo(AudioMixer
& aMixer
, uint32_t aChannelCount
,
471 uint32_t aSampleRate
);
472 // Mix the segment into a mixer, keeping it planar, up or down mixing to
473 // aChannelCount channels.
474 void Mix(AudioMixer
& aMixer
, uint32_t aChannelCount
, uint32_t aSampleRate
);
476 // Returns the maximum channel count across all chunks in this segment.
477 // Should there be no chunk with a channel count we return the memoized return
478 // value from last time this method was called.
479 uint32_t MaxChannelCount() {
480 uint32_t channelCount
= 0;
481 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
482 if (ci
->ChannelCount()) {
483 channelCount
= std::max(channelCount
, ci
->ChannelCount());
486 if (channelCount
== 0) {
487 return mMemoizedMaxChannelCount
;
489 return mMemoizedMaxChannelCount
= channelCount
;
492 static Type
StaticType() { return AUDIO
; }
494 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf
) const override
{
495 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
498 PrincipalHandle
GetOldestPrinciple() const {
499 const AudioChunk
* chunk
= mChunks
.IsEmpty() ? nullptr : &mChunks
[0];
500 return chunk
? chunk
->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE
;
503 // Iterate on each chunks until the input function returns true.
504 template <typename Function
>
505 void IterateOnChunks(const Function
&& aFunction
) {
506 for (uint32_t idx
= 0; idx
< mChunks
.Length(); idx
++) {
507 if (aFunction(&mChunks
[idx
])) {
514 template <typename SrcT
>
515 void WriteChunk(const AudioChunk
& aChunk
, uint32_t aOutputChannels
,
516 float aVolume
, AudioDataValue
* aOutputBuffer
) {
517 AutoTArray
<const SrcT
*, GUESS_AUDIO_CHANNELS
> channelData
;
519 channelData
= aChunk
.ChannelData
<SrcT
>().Clone();
521 if (channelData
.Length() < aOutputChannels
) {
522 // Up-mix. Note that this might actually make channelData have more
523 // than aOutputChannels temporarily.
524 AudioChannelsUpMix(&channelData
, aOutputChannels
,
525 SilentChannel::ZeroChannel
<SrcT
>());
527 if (channelData
.Length() > aOutputChannels
) {
529 DownmixAndInterleave(channelData
, aChunk
.mDuration
, aVolume
,
530 aOutputChannels
, aOutputBuffer
);
532 InterleaveAndConvertBuffer(channelData
.Elements(), aChunk
.mDuration
,
533 aVolume
, aOutputChannels
, aOutputBuffer
);
537 } // namespace mozilla
539 #endif /* MOZILLA_AUDIOSEGMENT_H_ */