1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef MOZILLA_AUDIOSEGMENT_H_
7 #define MOZILLA_AUDIOSEGMENT_H_
9 #include <speex/speex_resampler.h>
10 #include "MediaTrackGraph.h"
11 #include "MediaSegment.h"
12 #include "AudioSampleFormat.h"
13 #include "AudioChannelFormat.h"
14 #include "SharedBuffer.h"
15 #include "WebAudioUtils.h"
16 #include "nsAutoRef.h"
17 #ifdef MOZILLA_INTERNAL_API
18 # include "mozilla/TimeStamp.h"
25 } // namespace mozilla
26 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk
)
29 * This allows compilation of nsTArray<AudioSegment> and
30 * AutoTArray<AudioSegment> since without it, static analysis fails on the
31 * mChunks member being a non-memmovable AutoTArray.
33 * Note that AudioSegment(const AudioSegment&) is deleted, so this should
34 * never come into effect.
36 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment
)
41 class SharedChannelArrayBuffer
: public ThreadSharedObject
{
43 explicit SharedChannelArrayBuffer(nsTArray
<nsTArray
<T
> >&& aBuffers
)
44 : mBuffers(std::move(aBuffers
)) {}
46 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf
) const override
{
48 amount
+= mBuffers
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
49 for (size_t i
= 0; i
< mBuffers
.Length(); i
++) {
50 amount
+= mBuffers
[i
].ShallowSizeOfExcludingThis(aMallocSizeOf
);
56 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf
) const override
{
57 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
60 nsTArray
<nsTArray
<T
> > mBuffers
;
66 * For auto-arrays etc, guess this as the common number of channels.
68 const int GUESS_AUDIO_CHANNELS
= 2;
70 // We ensure that the graph advances in steps that are multiples of the Web
72 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS
= 7;
73 const uint32_t WEBAUDIO_BLOCK_SIZE
= 1 << WEBAUDIO_BLOCK_SIZE_BITS
;
75 template <typename SrcT
, typename DestT
>
76 static void InterleaveAndConvertBuffer(const SrcT
* const* aSourceChannels
,
77 uint32_t aLength
, float aVolume
,
78 uint32_t aChannels
, DestT
* aOutput
) {
79 DestT
* output
= aOutput
;
80 for (size_t i
= 0; i
< aLength
; ++i
) {
81 for (size_t channel
= 0; channel
< aChannels
; ++channel
) {
82 float v
= AudioSampleToFloat(aSourceChannels
[channel
][i
]) * aVolume
;
83 *output
= FloatToAudioSample
<DestT
>(v
);
89 template <typename SrcT
, typename DestT
>
90 static void DeinterleaveAndConvertBuffer(const SrcT
* aSourceBuffer
,
91 uint32_t aFrames
, uint32_t aChannels
,
93 for (size_t i
= 0; i
< aChannels
; i
++) {
94 size_t interleavedIndex
= i
;
95 for (size_t j
= 0; j
< aFrames
; j
++) {
96 ConvertAudioSample(aSourceBuffer
[interleavedIndex
], aOutput
[i
][j
]);
97 interleavedIndex
+= aChannels
;
102 class SilentChannel
{
104 static const int AUDIO_PROCESSING_FRAMES
= 640; /* > 10ms of 48KHz audio */
106 gZeroChannel
[MAX_AUDIO_SAMPLE_SIZE
* AUDIO_PROCESSING_FRAMES
];
107 // We take advantage of the fact that zero in float and zero in int have the
108 // same all-zeros bit layout.
109 template <typename T
>
110 static const T
* ZeroChannel();
114 * Given an array of input channels (aChannelData), downmix to aOutputChannels,
115 * interleave the channel data. A total of aOutputChannels*aDuration
116 * interleaved samples will be copied to a channel buffer in aOutput.
118 template <typename SrcT
, typename DestT
>
119 void DownmixAndInterleave(const nsTArray
<const SrcT
*>& aChannelData
,
120 int32_t aDuration
, float aVolume
,
121 uint32_t aOutputChannels
, DestT
* aOutput
) {
122 if (aChannelData
.Length() == aOutputChannels
) {
123 InterleaveAndConvertBuffer(aChannelData
.Elements(), aDuration
, aVolume
,
124 aOutputChannels
, aOutput
);
126 AutoTArray
<SrcT
*, GUESS_AUDIO_CHANNELS
> outputChannelData
;
128 SilentChannel::AUDIO_PROCESSING_FRAMES
* GUESS_AUDIO_CHANNELS
>
130 outputChannelData
.SetLength(aOutputChannels
);
131 outputBuffers
.SetLength(aDuration
* aOutputChannels
);
132 for (uint32_t i
= 0; i
< aOutputChannels
; i
++) {
133 outputChannelData
[i
] = outputBuffers
.Elements() + aDuration
* i
;
135 AudioChannelsDownMix(aChannelData
, outputChannelData
.Elements(),
136 aOutputChannels
, aDuration
);
137 InterleaveAndConvertBuffer(outputChannelData
.Elements(), aDuration
, aVolume
,
138 aOutputChannels
, aOutput
);
143 * An AudioChunk represents a multi-channel buffer of audio samples.
144 * It references an underlying ThreadSharedObject which manages the lifetime
145 * of the buffer. An AudioChunk maintains its own duration and channel data
146 * pointers so it can represent a subinterval of a buffer without copying.
147 * An AudioChunk can store its individual channels anywhere; it maintains
148 * separate pointers to each channel's buffer.
151 typedef mozilla::AudioSampleFormat SampleFormat
;
154 void SliceTo(TrackTime aStart
, TrackTime aEnd
) {
155 MOZ_ASSERT(aStart
>= 0 && aStart
< aEnd
&& aEnd
<= mDuration
,
156 "Slice out of bounds");
158 MOZ_ASSERT(aStart
< INT32_MAX
,
159 "Can't slice beyond 32-bit sample lengths");
160 for (uint32_t channel
= 0; channel
< mChannelData
.Length(); ++channel
) {
161 mChannelData
[channel
] = AddAudioSampleOffset(
162 mChannelData
[channel
], mBufferFormat
, int32_t(aStart
));
165 mDuration
= aEnd
- aStart
;
167 TrackTime
GetDuration() const { return mDuration
; }
168 bool CanCombineWithFollowing(const AudioChunk
& aOther
) const {
169 if (aOther
.mBuffer
!= mBuffer
) {
175 if (aOther
.mVolume
!= mVolume
) {
178 if (aOther
.mPrincipalHandle
!= mPrincipalHandle
) {
181 NS_ASSERTION(aOther
.mBufferFormat
== mBufferFormat
,
182 "Wrong metadata about buffer");
183 NS_ASSERTION(aOther
.mChannelData
.Length() == mChannelData
.Length(),
184 "Mismatched channel count");
185 if (mDuration
> INT32_MAX
) {
188 for (uint32_t channel
= 0; channel
< mChannelData
.Length(); ++channel
) {
189 if (aOther
.mChannelData
[channel
] !=
190 AddAudioSampleOffset(mChannelData
[channel
], mBufferFormat
,
191 int32_t(mDuration
))) {
197 bool IsNull() const { return mBuffer
== nullptr; }
198 void SetNull(TrackTime aDuration
) {
200 mChannelData
.Clear();
201 mDuration
= aDuration
;
203 mBufferFormat
= AUDIO_FORMAT_SILENCE
;
204 mPrincipalHandle
= PRINCIPAL_HANDLE_NONE
;
207 uint32_t ChannelCount() const { return mChannelData
.Length(); }
209 bool IsMuted() const { return mVolume
== 0.0f
; }
211 size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf
) const {
212 return SizeOfExcludingThis(aMallocSizeOf
, true);
215 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf
, bool aUnshared
) const {
219 // - mBuffer - Can hold data that is also in the decoded audio queue. If it
220 // is not shared, or unshared == false it gets counted.
221 if (mBuffer
&& (!aUnshared
|| !mBuffer
->IsShared())) {
222 amount
+= mBuffer
->SizeOfIncludingThis(aMallocSizeOf
);
225 // Memory in the array is owned by mBuffer.
226 amount
+= mChannelData
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
230 template <typename T
>
231 const nsTArray
<const T
*>& ChannelData() const {
232 MOZ_ASSERT(AudioSampleTypeToFormat
<T
>::Format
== mBufferFormat
);
233 return *reinterpret_cast<const AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
>*>(
238 * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
239 * by the calling thread.
241 template <typename T
>
242 T
* ChannelDataForWrite(size_t aChannel
) {
243 MOZ_ASSERT(AudioSampleTypeToFormat
<T
>::Format
== mBufferFormat
);
244 MOZ_ASSERT(!mBuffer
->IsShared());
245 return static_cast<T
*>(const_cast<void*>(mChannelData
[aChannel
]));
248 const PrincipalHandle
& GetPrincipalHandle() const { return mPrincipalHandle
; }
250 TrackTime mDuration
= 0; // in frames within the buffer
251 RefPtr
<ThreadSharedObject
> mBuffer
; // the buffer object whose lifetime is
252 // managed; null means data is all zeroes
253 // one pointer per channel; empty if and only if mBuffer is null
254 CopyableAutoTArray
<const void*, GUESS_AUDIO_CHANNELS
> mChannelData
;
255 float mVolume
= 1.0f
; // volume multiplier to apply
256 // format of frames in mBuffer (or silence if mBuffer is null)
257 SampleFormat mBufferFormat
= AUDIO_FORMAT_SILENCE
;
258 // principalHandle for the data in this chunk.
259 // This can be compared to an nsIPrincipal* when back on main thread.
260 PrincipalHandle mPrincipalHandle
= PRINCIPAL_HANDLE_NONE
;
264 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
265 * The audio rate is determined by the track, not stored in this class.
267 class AudioSegment
: public MediaSegmentBase
<AudioSegment
, AudioChunk
> {
268 // The channel count that MaxChannelCount() returned last time it was called.
269 uint32_t mMemoizedMaxChannelCount
= 0;
272 typedef mozilla::AudioSampleFormat SampleFormat
;
274 AudioSegment() : MediaSegmentBase
<AudioSegment
, AudioChunk
>(AUDIO
) {}
276 AudioSegment(AudioSegment
&& aSegment
) = default;
278 AudioSegment(const AudioSegment
&) = delete;
279 AudioSegment
& operator=(const AudioSegment
&) = delete;
281 ~AudioSegment() = default;
283 // Resample the whole segment in place. `aResampler` is an instance of a
284 // resampler, initialized with `aResamplerChannelCount` channels. If this
285 // function finds a chunk with more channels, `aResampler` is destroyed and a
286 // new resampler is created, and `aResamplerChannelCount` is updated with the
287 // new channel count value.
288 template <typename T
>
289 void Resample(nsAutoRef
<SpeexResamplerState
>& aResampler
,
290 uint32_t* aResamplerChannelCount
, uint32_t aInRate
,
294 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
295 AutoTArray
<nsTArray
<T
>, GUESS_AUDIO_CHANNELS
> output
;
296 AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
> bufferPtrs
;
298 // If this chunk is null, don't bother resampling, just alter its duration
300 c
.mDuration
= (c
.mDuration
* aOutRate
) / aInRate
;
301 mDuration
+= c
.mDuration
;
304 uint32_t channels
= c
.mChannelData
.Length();
305 // This might introduce a discontinuity, but a channel count change in the
306 // middle of a stream is not that common. This also initializes the
307 // resampler as late as possible.
308 if (channels
!= *aResamplerChannelCount
) {
309 SpeexResamplerState
* state
=
310 speex_resampler_init(channels
, aInRate
, aOutRate
,
311 SPEEX_RESAMPLER_QUALITY_DEFAULT
, nullptr);
313 aResampler
.own(state
);
314 *aResamplerChannelCount
= channels
;
316 output
.SetLength(channels
);
317 bufferPtrs
.SetLength(channels
);
318 uint32_t inFrames
= c
.mDuration
;
319 // Round up to allocate; the last frame may not be used.
320 NS_ASSERTION((UINT32_MAX
- aInRate
+ 1) / c
.mDuration
>= aOutRate
,
322 uint32_t outSize
= (c
.mDuration
* aOutRate
+ aInRate
- 1) / aInRate
;
323 for (uint32_t i
= 0; i
< channels
; i
++) {
324 T
* out
= output
[i
].AppendElements(outSize
);
325 uint32_t outFrames
= outSize
;
327 const T
* in
= static_cast<const T
*>(c
.mChannelData
[i
]);
328 dom::WebAudioUtils::SpeexResamplerProcess(aResampler
.get(), i
, in
,
329 &inFrames
, out
, &outFrames
);
330 MOZ_ASSERT(inFrames
== c
.mDuration
);
333 output
[i
].SetLength(outFrames
);
335 MOZ_ASSERT(channels
> 0);
336 c
.mDuration
= output
[0].Length();
337 c
.mBuffer
= new mozilla::SharedChannelArrayBuffer
<T
>(std::move(output
));
338 for (uint32_t i
= 0; i
< channels
; i
++) {
339 c
.mChannelData
[i
] = bufferPtrs
[i
];
341 mDuration
+= c
.mDuration
;
345 void ResampleChunks(nsAutoRef
<SpeexResamplerState
>& aResampler
,
346 uint32_t* aResamplerChannelCount
, uint32_t aInRate
,
348 void AppendFrames(already_AddRefed
<ThreadSharedObject
> aBuffer
,
349 const nsTArray
<const float*>& aChannelData
,
351 const PrincipalHandle
& aPrincipalHandle
) {
352 AudioChunk
* chunk
= AppendChunk(aDuration
);
353 chunk
->mBuffer
= aBuffer
;
355 MOZ_ASSERT(chunk
->mBuffer
|| aChannelData
.IsEmpty(),
356 "Appending invalid data ?");
358 for (uint32_t channel
= 0; channel
< aChannelData
.Length(); ++channel
) {
359 chunk
->mChannelData
.AppendElement(aChannelData
[channel
]);
361 chunk
->mBufferFormat
= AUDIO_FORMAT_FLOAT32
;
362 chunk
->mPrincipalHandle
= aPrincipalHandle
;
364 void AppendFrames(already_AddRefed
<ThreadSharedObject
> aBuffer
,
365 const nsTArray
<const int16_t*>& aChannelData
,
367 const PrincipalHandle
& aPrincipalHandle
) {
368 AudioChunk
* chunk
= AppendChunk(aDuration
);
369 chunk
->mBuffer
= aBuffer
;
371 MOZ_ASSERT(chunk
->mBuffer
|| aChannelData
.IsEmpty(),
372 "Appending invalid data ?");
374 for (uint32_t channel
= 0; channel
< aChannelData
.Length(); ++channel
) {
375 chunk
->mChannelData
.AppendElement(aChannelData
[channel
]);
377 chunk
->mBufferFormat
= AUDIO_FORMAT_S16
;
378 chunk
->mPrincipalHandle
= aPrincipalHandle
;
380 // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
382 AudioChunk
* AppendAndConsumeChunk(AudioChunk
* aChunk
) {
383 AudioChunk
* chunk
= AppendChunk(aChunk
->mDuration
);
384 chunk
->mBuffer
= std::move(aChunk
->mBuffer
);
385 chunk
->mChannelData
= std::move(aChunk
->mChannelData
);
387 MOZ_ASSERT(chunk
->mBuffer
|| aChunk
->mChannelData
.IsEmpty(),
388 "Appending invalid data ?");
390 chunk
->mVolume
= aChunk
->mVolume
;
391 chunk
->mBufferFormat
= aChunk
->mBufferFormat
;
392 chunk
->mPrincipalHandle
= aChunk
->mPrincipalHandle
;
395 void ApplyVolume(float aVolume
);
396 // Mix the segment into a mixer, interleaved. This is useful to output a
397 // segment to a system audio callback. It up or down mixes to aChannelCount
399 void WriteTo(AudioMixer
& aMixer
, uint32_t aChannelCount
,
400 uint32_t aSampleRate
);
401 // Mix the segment into a mixer, keeping it planar, up or down mixing to
402 // aChannelCount channels.
403 void Mix(AudioMixer
& aMixer
, uint32_t aChannelCount
, uint32_t aSampleRate
);
405 // Returns the maximum channel count across all chunks in this segment.
406 // Should there be no chunk with a channel count we return the memoized return
407 // value from last time this method was called.
408 uint32_t MaxChannelCount() {
409 uint32_t channelCount
= 0;
410 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
411 if (ci
->ChannelCount()) {
412 channelCount
= std::max(channelCount
, ci
->ChannelCount());
415 if (channelCount
== 0) {
416 return mMemoizedMaxChannelCount
;
418 return mMemoizedMaxChannelCount
= channelCount
;
421 static Type
StaticType() { return AUDIO
; }
423 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf
) const override
{
424 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
428 template <typename SrcT
>
429 void WriteChunk(AudioChunk
& aChunk
, uint32_t aOutputChannels
,
430 AudioDataValue
* aOutputBuffer
) {
431 AutoTArray
<const SrcT
*, GUESS_AUDIO_CHANNELS
> channelData
;
433 channelData
= aChunk
.ChannelData
<SrcT
>().Clone();
435 if (channelData
.Length() < aOutputChannels
) {
436 // Up-mix. Note that this might actually make channelData have more
437 // than aOutputChannels temporarily.
438 AudioChannelsUpMix(&channelData
, aOutputChannels
,
439 SilentChannel::ZeroChannel
<SrcT
>());
441 if (channelData
.Length() > aOutputChannels
) {
443 DownmixAndInterleave(channelData
, aChunk
.mDuration
, aChunk
.mVolume
,
444 aOutputChannels
, aOutputBuffer
);
446 InterleaveAndConvertBuffer(channelData
.Elements(), aChunk
.mDuration
,
447 aChunk
.mVolume
, aOutputChannels
, aOutputBuffer
);
451 } // namespace mozilla
453 #endif /* MOZILLA_AUDIOSEGMENT_H_ */