1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "AudioSegment.h"
7 #include "AudioMixer.h"
8 #include "AudioChannelFormat.h"
9 #include "MediaTrackGraph.h" // for nsAutoRefTraits<SpeexResamplerState>
10 #include <speex/speex_resampler.h>
15 SilentChannel::gZeroChannel
[MAX_AUDIO_SAMPLE_SIZE
*
16 SilentChannel::AUDIO_PROCESSING_FRAMES
] = {0};
19 const float* SilentChannel::ZeroChannel
<float>() {
20 return reinterpret_cast<const float*>(SilentChannel::gZeroChannel
);
24 const int16_t* SilentChannel::ZeroChannel
<int16_t>() {
25 return reinterpret_cast<const int16_t*>(SilentChannel::gZeroChannel
);
28 void AudioSegment::ApplyVolume(float aVolume
) {
29 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
30 ci
->mVolume
*= aVolume
;
35 void AudioSegment::Resample(nsAutoRef
<SpeexResamplerState
>& aResampler
,
36 uint32_t* aResamplerChannelCount
, uint32_t aInRate
,
40 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
41 AutoTArray
<nsTArray
<T
>, GUESS_AUDIO_CHANNELS
> output
;
42 AutoTArray
<const T
*, GUESS_AUDIO_CHANNELS
> bufferPtrs
;
44 // If this chunk is null, don't bother resampling, just alter its duration
46 c
.mDuration
= (c
.mDuration
* aOutRate
) / aInRate
;
47 mDuration
+= c
.mDuration
;
50 uint32_t channels
= c
.mChannelData
.Length();
51 // This might introduce a discontinuity, but a channel count change in the
52 // middle of a stream is not that common. This also initializes the
53 // resampler as late as possible.
54 if (channels
!= *aResamplerChannelCount
) {
55 SpeexResamplerState
* state
=
56 speex_resampler_init(channels
, aInRate
, aOutRate
,
57 SPEEX_RESAMPLER_QUALITY_DEFAULT
, nullptr);
59 aResampler
.own(state
);
60 *aResamplerChannelCount
= channels
;
62 output
.SetLength(channels
);
63 bufferPtrs
.SetLength(channels
);
64 uint32_t inFrames
= c
.mDuration
;
65 // Round up to allocate; the last frame may not be used.
66 NS_ASSERTION((UINT64_MAX
- aInRate
+ 1) / c
.mDuration
>= aOutRate
,
69 (static_cast<uint64_t>(c
.mDuration
) * aOutRate
+ aInRate
- 1) / aInRate
;
70 for (uint32_t i
= 0; i
< channels
; i
++) {
71 T
* out
= output
[i
].AppendElements(outSize
);
72 uint32_t outFrames
= outSize
;
74 const T
* in
= static_cast<const T
*>(c
.mChannelData
[i
]);
75 dom::WebAudioUtils::SpeexResamplerProcess(aResampler
.get(), i
, in
,
76 &inFrames
, out
, &outFrames
);
77 MOZ_ASSERT(inFrames
== c
.mDuration
);
80 output
[i
].SetLength(outFrames
);
82 MOZ_ASSERT(channels
> 0);
83 c
.mDuration
= output
[0].Length();
84 c
.mBuffer
= new mozilla::SharedChannelArrayBuffer
<T
>(std::move(output
));
85 for (uint32_t i
= 0; i
< channels
; i
++) {
86 c
.mChannelData
[i
] = bufferPtrs
[i
];
88 mDuration
+= c
.mDuration
;
92 void AudioSegment::ResampleChunks(nsAutoRef
<SpeexResamplerState
>& aResampler
,
93 uint32_t* aResamplerChannelCount
,
94 uint32_t aInRate
, uint32_t aOutRate
) {
95 if (mChunks
.IsEmpty()) {
99 AudioSampleFormat format
= AUDIO_FORMAT_SILENCE
;
100 for (ChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
101 if (ci
->mBufferFormat
!= AUDIO_FORMAT_SILENCE
) {
102 format
= ci
->mBufferFormat
;
107 // If the format is silence at this point, all the chunks are silent. The
108 // actual function we use does not matter, it's just a matter of changing
109 // the chunks duration.
110 case AUDIO_FORMAT_SILENCE
:
111 case AUDIO_FORMAT_FLOAT32
:
112 Resample
<float>(aResampler
, aResamplerChannelCount
, aInRate
, aOutRate
);
114 case AUDIO_FORMAT_S16
:
115 Resample
<int16_t>(aResampler
, aResamplerChannelCount
, aInRate
, aOutRate
);
123 size_t AudioSegment::WriteToInterleavedBuffer(nsTArray
<AudioDataValue
>& aBuffer
,
124 uint32_t aChannels
) const {
126 if (GetDuration() <= 0) {
127 MOZ_ASSERT(GetDuration() == 0);
131 // Calculate how many samples in this segment
132 size_t frames
= static_cast<size_t>(GetDuration());
133 CheckedInt
<size_t> samples(frames
);
134 samples
*= static_cast<size_t>(aChannels
);
135 MOZ_ASSERT(samples
.isValid());
136 if (!samples
.isValid()) {
140 // Enlarge buffer space if needed
141 if (samples
.value() > aBuffer
.Capacity()) {
142 aBuffer
.SetCapacity(samples
.value());
144 aBuffer
.SetLengthAndRetainStorage(samples
.value());
145 aBuffer
.ClearAndRetainStorage();
147 // Convert the de-interleaved chunks into an interleaved buffer. Note that
148 // we may upmix or downmix the audio data if the channel in the chunks
149 // mismatch with aChannels
150 for (ConstChunkIterator
ci(*this); !ci
.IsEnded(); ci
.Next()) {
151 const AudioChunk
& c
= *ci
;
152 size_t samplesInChunk
= static_cast<size_t>(c
.mDuration
) * aChannels
;
153 switch (c
.mBufferFormat
) {
154 case AUDIO_FORMAT_S16
:
155 WriteChunk
<int16_t>(c
, aChannels
, c
.mVolume
,
156 aBuffer
.Elements() + offset
);
158 case AUDIO_FORMAT_FLOAT32
:
159 WriteChunk
<float>(c
, aChannels
, c
.mVolume
, aBuffer
.Elements() + offset
);
161 case AUDIO_FORMAT_SILENCE
:
162 PodZero(aBuffer
.Elements() + offset
, samplesInChunk
);
165 MOZ_ASSERT_UNREACHABLE("Unknown format");
166 PodZero(aBuffer
.Elements() + offset
, samplesInChunk
);
169 offset
+= samplesInChunk
;
171 MOZ_DIAGNOSTIC_ASSERT(samples
.value() == offset
,
172 "Segment's duration is incorrect");
173 aBuffer
.SetLengthAndRetainStorage(offset
);
177 // This helps to to safely get a pointer to the position we want to start
178 // writing a planar audio buffer, depending on the channel and the offset in the
180 static AudioDataValue
* PointerForOffsetInChannel(AudioDataValue
* aData
,
181 size_t aLengthSamples
,
182 uint32_t aChannelCount
,
184 uint32_t aOffsetSamples
) {
185 size_t samplesPerChannel
= aLengthSamples
/ aChannelCount
;
186 size_t beginningOfChannel
= samplesPerChannel
* aChannel
;
187 MOZ_ASSERT(aChannel
* samplesPerChannel
+ aOffsetSamples
< aLengthSamples
,
188 "Offset request out of bounds.");
189 return aData
+ beginningOfChannel
+ aOffsetSamples
;
192 template <typename SrcT
>
193 static void DownMixChunk(const AudioChunk
& aChunk
,
194 Span
<AudioDataValue
* const> aOutputChannels
) {
195 Span
<const SrcT
* const> channelData
= aChunk
.ChannelData
<SrcT
>();
196 uint32_t frameCount
= aChunk
.mDuration
;
197 if (channelData
.Length() > aOutputChannels
.Length()) {
199 AudioChannelsDownMix(channelData
, aOutputChannels
, frameCount
);
200 for (AudioDataValue
* outChannel
: aOutputChannels
) {
201 ScaleAudioSamples(outChannel
, frameCount
, aChunk
.mVolume
);
204 // The channel count is already what we want.
205 for (uint32_t channel
= 0; channel
< aOutputChannels
.Length(); channel
++) {
206 ConvertAudioSamplesWithScale(channelData
[channel
],
207 aOutputChannels
[channel
], frameCount
,
213 void AudioChunk::DownMixTo(
214 Span
<AudioDataValue
* const> aOutputChannelPtrs
) const {
215 switch (mBufferFormat
) {
216 case AUDIO_FORMAT_FLOAT32
:
217 DownMixChunk
<float>(*this, aOutputChannelPtrs
);
219 case AUDIO_FORMAT_S16
:
220 DownMixChunk
<int16_t>(*this, aOutputChannelPtrs
);
222 case AUDIO_FORMAT_SILENCE
:
223 for (AudioDataValue
* outChannel
: aOutputChannelPtrs
) {
224 std::fill_n(outChannel
, mDuration
, static_cast<AudioDataValue
>(0));
227 // Avoid `default:` so that `-Wswitch` catches missing enumerators at
230 MOZ_ASSERT_UNREACHABLE("buffer format");
233 void AudioSegment::Mix(AudioMixer
& aMixer
, uint32_t aOutputChannels
,
234 uint32_t aSampleRate
) {
235 AutoTArray
<AudioDataValue
,
236 SilentChannel::AUDIO_PROCESSING_FRAMES
* GUESS_AUDIO_CHANNELS
>
238 AudioChunk upMixChunk
;
239 uint32_t offsetSamples
= 0;
240 uint32_t duration
= GetDuration();
243 MOZ_ASSERT(duration
== 0);
247 uint32_t outBufferLength
= duration
* aOutputChannels
;
248 buf
.SetLength(outBufferLength
);
250 AutoTArray
<AudioDataValue
*, GUESS_AUDIO_CHANNELS
> outChannelPtrs
;
251 outChannelPtrs
.SetLength(aOutputChannels
);
254 for (ChunkIterator
ci(*this); !ci
.IsEnded();
255 ci
.Next(), offsetSamples
+= frames
) {
256 const AudioChunk
& c
= *ci
;
257 frames
= c
.mDuration
;
258 for (uint32_t channel
= 0; channel
< aOutputChannels
; channel
++) {
259 outChannelPtrs
[channel
] =
260 PointerForOffsetInChannel(buf
.Elements(), outBufferLength
,
261 aOutputChannels
, channel
, offsetSamples
);
264 // If the chunk is silent, simply write the right number of silence in the
266 if (c
.mBufferFormat
== AUDIO_FORMAT_SILENCE
) {
267 for (AudioDataValue
* outChannel
: outChannelPtrs
) {
268 PodZero(outChannel
, frames
);
272 // We need to upmix and downmix appropriately, depending on the
273 // desired input and output channels.
274 const AudioChunk
* downMixInput
= &c
;
275 if (c
.ChannelCount() < aOutputChannels
) {
278 AudioChannelsUpMix
<void>(&upMixChunk
.mChannelData
, aOutputChannels
,
279 SilentChannel::gZeroChannel
);
280 downMixInput
= &upMixChunk
;
282 downMixInput
->DownMixTo(outChannelPtrs
);
286 MOZ_ASSERT(offsetSamples
== outBufferLength
/ aOutputChannels
,
287 "We forgot to write some samples?");
288 aMixer
.Mix(buf
.Elements(), aOutputChannels
, offsetSamples
, aSampleRate
);
292 } // namespace mozilla