Bug 1856663 - Add more chunks for Android mochitest-plain. r=jmaher,taskgraph-reviewe...
[gecko.git] / dom / media / DynamicResampler.h
blobf8c5aff0e4d619b079cbdd2bd4f68c5937f0ff21
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef MOZILLA_DYNAMIC_RESAMPLER_H_
7 #define MOZILLA_DYNAMIC_RESAMPLER_H_
9 #include "AudioRingBuffer.h"
10 #include "AudioSegment.h"
12 #include <speex/speex_resampler.h>
14 namespace mozilla {
16 const uint32_t STEREO = 2;
18 /**
19 * DynamicResampler allows updating on the fly the output sample rate and the
20 * number of channels. In addition to that, it maintains an internal buffer for
21 * the input data and allows pre-buffering as well. The Resample() method
22 * strives to provide the requested number of output frames by using the input
23 * data including any pre-buffering. If this is not possible then it will not
24 * attempt to resample and it will return failure.
26 * Input data buffering makes use of the AudioRingBuffer. The capacity of the
27 * buffer is 100ms of float audio and it is pre-allocated at the constructor.
28 * No extra allocations take place when the input is appended. In addition to
29 * that, due to special feature of AudioRingBuffer, no extra copies take place
30 * when the input data is fed to the resampler.
32 * The sample format must be set before using any method. If the provided sample
33 * format is of type short the pre-allocated capacity of the input buffer
34 * becomes 200ms of short audio.
36 * The DynamicResampler is not thread-safe, so all the methods appart from the
37 * constructor must be called on the same thread.
39 class DynamicResampler final {
40 public:
41 /**
42 * Provide the initial input and output rate and the amount of pre-buffering.
43 * The channel count will be set to stereo. Memory allocation will take
44 * place. The input buffer is non-interleaved.
46 DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
47 uint32_t aPreBufferFrames = 0);
48 ~DynamicResampler();
50 /**
51 * Set the sample format type to float or short.
53 void SetSampleFormat(AudioSampleFormat aFormat);
54 uint32_t GetOutRate() const { return mOutRate; }
55 uint32_t GetChannels() const { return mChannels; }
57 /**
58 * Append `aInFrames` number of frames from `aInBuffer` to the internal input
59 * buffer. Memory copy/move takes place.
61 void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames);
62 void AppendInput(const nsTArray<const int16_t*>& aInBuffer,
63 uint32_t aInFrames);
64 /**
65 * Append `aInFrames` number of frames of silence to the internal input
66 * buffer. Memory copy/move takes place.
68 void AppendInputSilence(const uint32_t aInFrames);
69 /**
70 * Return the number of frames stored in the internal input buffer.
72 uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
73 /**
74 * Return the number of frames left to store in the internal input buffer.
76 uint32_t InFramesLeftToBuffer(uint32_t aChannelIndex) const;
79 * Resampler as much frame is needed from the internal input buffer to the
80 * `aOutBuffer` in order to provide all `aOutFrames` and return true. If there
81 * not enough input frames to provide the requested output frames no
82 * resampling is attempted and false is returned.
84 bool Resample(float* aOutBuffer, uint32_t* aOutFrames,
85 uint32_t aChannelIndex);
86 bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames,
87 uint32_t aChannelIndex);
89 /**
90 * Update the output rate or/and the channel count. If a value is not updated
91 * compared to the current one nothing happens. Changing the `aOutRate`
92 * results in recalculation in the resampler. Changing `aChannels` results in
93 * the reallocation of the internal input buffer with the exception of
94 * changes between mono to stereo and vice versa where no reallocation takes
95 * place. A stereo internal input buffer is always maintained even if the
96 * sound is mono.
98 void UpdateResampler(uint32_t aOutRate, uint32_t aChannels);
101 * Returns true if the resampler has enough input data to provide to the
102 * output of the `Resample()` method `aOutFrames` number of frames. This is a
103 * way to know in advance if the `Resampler` method will return true or false
104 * given that nothing changes in between.
106 bool CanResample(uint32_t aOutFrames) const;
108 private:
109 template <typename T>
110 void AppendInputInternal(const nsTArray<const T*>& aInBuffer,
111 uint32_t aInFrames) {
112 MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
113 for (uint32_t i = 0; i < mChannels; ++i) {
114 PushInFrames(aInBuffer[i], aInFrames, i);
118 void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
119 float* aOutBuffer, uint32_t* aOutFrames,
120 uint32_t aChannelIndex);
121 void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
122 int16_t* aOutBuffer, uint32_t* aOutFrames,
123 uint32_t aChannelIndex);
125 template <typename T>
126 bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames,
127 uint32_t aChannelIndex) {
128 MOZ_ASSERT(mInRate);
129 MOZ_ASSERT(mOutRate);
130 MOZ_ASSERT(mChannels);
131 MOZ_ASSERT(aChannelIndex <= mChannels);
132 MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
133 MOZ_ASSERT(aOutFrames);
134 MOZ_ASSERT(*aOutFrames);
136 // Not enough input, don't do anything
137 if (!EnoughInFrames(*aOutFrames, aChannelIndex)) {
138 *aOutFrames = 0;
139 return false;
142 if (mInRate == mOutRate) {
143 mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, *aOutFrames));
144 // Workaround to avoid discontinuity when the speex resampler operates
145 // again. Feed it with the last 20 frames to warm up the internal memory
146 // of the resampler and then skip memory equals to resampler's input
147 // latency.
148 mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames);
149 return true;
152 uint32_t totalOutFramesNeeded = *aOutFrames;
154 mInternalInBuffer[aChannelIndex].ReadNoCopy(
155 [this, &aOutBuffer, &totalOutFramesNeeded,
156 aChannelIndex](const Span<const T>& aInBuffer) -> uint32_t {
157 if (!totalOutFramesNeeded) {
158 return 0;
160 uint32_t outFramesResampled = totalOutFramesNeeded;
161 uint32_t inFrames = aInBuffer.Length();
162 ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
163 &outFramesResampled, aChannelIndex);
164 aOutBuffer += outFramesResampled;
165 totalOutFramesNeeded -= outFramesResampled;
166 mInputTail[aChannelIndex].StoreTail<T>(aInBuffer);
167 return inFrames;
170 MOZ_ASSERT(totalOutFramesNeeded == 0);
171 return true;
174 bool EnoughInFrames(uint32_t aOutFrames, uint32_t aChannelIndex) const;
176 template <typename T>
177 void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
178 uint32_t aChannelIndex) {
179 MOZ_ASSERT(aInBuffer);
180 MOZ_ASSERT(aInFrames);
181 MOZ_ASSERT(mChannels);
182 MOZ_ASSERT(aChannelIndex <= mChannels);
183 MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
184 mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
187 void WarmUpResampler(bool aSkipLatency);
189 public:
190 const uint32_t mInRate;
191 const uint32_t mPreBufferFrames;
193 private:
194 uint32_t mChannels = 0;
195 uint32_t mOutRate;
197 AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
199 SpeexResamplerState* mResampler = nullptr;
200 AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
202 class TailBuffer {
203 public:
204 template <typename T>
205 T* Buffer() {
206 return reinterpret_cast<T*>(mBuffer);
208 /* Store the MAXSIZE last elements of the buffer. */
209 template <typename T>
210 void StoreTail(const Span<const T>& aInBuffer) {
211 StoreTail(aInBuffer.data(), aInBuffer.size());
213 template <typename T>
214 void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
215 if (aInFrames >= MAXSIZE) {
216 PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
217 mSize = MAXSIZE;
218 } else {
219 PodCopy(Buffer<T>(), aInBuffer, aInFrames);
220 mSize = aInFrames;
223 uint32_t Length() { return mSize; }
224 static const uint32_t MAXSIZE = 20;
226 private:
227 float mBuffer[MAXSIZE] = {};
228 uint32_t mSize = 0;
230 AutoTArray<TailBuffer, STEREO> mInputTail;
234 * AudioChunkList provides a way to have preallocated audio buffers in
235 * AudioSegment. The idea is that the amount of AudioChunks is created in
236 * advance. Each AudioChunk is able to hold a specific amount of audio
237 * (capacity). The total capacity of AudioChunkList is specified by the number
238 * of AudioChunks. The important aspect of the AudioChunkList is that
239 * preallocates everything and reuse the same chunks similar to a ring buffer.
241 * Why the whole AudioChunk is preallocated and not some raw memory buffer? This
242 * is due to the limitations of MediaTrackGraph. The way that MTG works depends
243 * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists
244 * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the
245 * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio
246 * data was in a different form, the only way to use it from the audio thread
247 * would be to create the AudioChunk there. That would result in a copy
248 * operation (not very important) and most of all an allocation of the audio
249 * buffer in the audio thread. This happens in many places inside MTG it's a bad
250 * practice, though, and it has been avoided due to the AudioChunkList.
252 * After construction the sample format must be set, when it is available. It
253 * can be set in the audio thread. Before setting the sample format is not
254 * possible to use any method of AudioChunkList.
256 * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128
257 * frames of float audio. Nevertheless, the sample format is not available at
258 * that point. Thus if the sample format is set to short, the capacity of each
259 * chunk changes to 256 number of frames, and the total duration becomes twice
260 * big. There are methods to get the chunk capacity and total capacity in frames
261 * and must always be used.
263 * Two things to note. First, when the channel count changes everything is
264 * recreated which means reallocations. Second, the total capacity might differs
265 * from the requested total capacity for two reasons. First, if the sample
266 * format is set to short and second because the number of chunks in the list
267 * divides exactly the final total capacity. The corresponding method must
268 * always be used to query the total capacity.
270 class AudioChunkList {
271 public:
273 * Constructor, the final total duration might be different from the requested
274 * `aTotalDuration`. Memory allocation takes place.
276 AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels,
277 const PrincipalHandle& aPrincipalHandle);
278 AudioChunkList(const AudioChunkList&) = delete;
279 AudioChunkList(AudioChunkList&&) = delete;
280 ~AudioChunkList() = default;
283 * Set sample format. It must be done before any other method being used.
285 void SetSampleFormat(AudioSampleFormat aFormat);
287 * Get the next available AudioChunk. The duration of the chunk will be zero
288 * and the volume 1.0. However, the buffers will be there ready to be written.
289 * Please note, that a reference of the preallocated chunk is returned. Thus
290 * it _must not be consumed_ directly. If the chunk needs to be consumed it
291 * must be copied to a temporary chunk first. For example:
292 * ```
293 * AudioChunk& chunk = audioChunklist.GetNext();
294 * // Set up the chunk
295 * AudioChunk tmp = chunk;
296 * audioSegment.AppendAndConsumeChunk(std::move(tmp));
297 * ```
298 * This way no memory allocation or copy, takes place.
300 AudioChunk& GetNext();
303 * Get the capacity of each individual AudioChunk in the list.
305 uint32_t ChunkCapacity() const {
306 MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
307 mSampleFormat == AUDIO_FORMAT_FLOAT32);
308 return mChunkCapacity;
311 * Get the total capacity of AudioChunkList.
313 uint32_t TotalCapacity() const {
314 MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
315 mSampleFormat == AUDIO_FORMAT_FLOAT32);
316 return CheckedInt<uint32_t>(mChunkCapacity * mChunks.Length()).value();
320 * Update the channel count of the AudioChunkList. Memory allocation is
321 * taking place.
323 void Update(uint32_t aChannels);
325 private:
326 void IncrementIndex() {
327 ++mIndex;
328 mIndex = CheckedInt<uint32_t>(mIndex % mChunks.Length()).value();
330 void CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels);
331 void UpdateToMonoOrStereo(uint32_t aChannels);
333 private:
334 const PrincipalHandle mPrincipalHandle;
335 nsTArray<AudioChunk> mChunks;
336 uint32_t mIndex = 0;
337 uint32_t mChunkCapacity = WEBAUDIO_BLOCK_SIZE;
338 AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
342 * Audio Resampler is a resampler able to change the output rate and channels
343 * count on the fly. The API is simple and it is based in AudioSegment in order
344 * to be used MTG. All memory allocations, for input and output buffers, happen
345 * in the constructor and when channel count changes. The memory is recycled in
346 * order to avoid reallocations. It also supports prebuffering of silence. It
347 * consists of DynamicResampler and AudioChunkList so please read their
348 * documentation if you are interested in more details.
350 * The output buffer is preallocated and returned in the form of AudioSegment.
351 * The intention is to be used directly in a MediaTrack. Since an AudioChunk
352 * must no be "shared" in order to be written, the AudioSegment returned by
353 * resampler method must be cleaned up in order to be able for the `AudioChunk`s
354 * that it consists of to be reused. For `MediaTrack::mSegment` this happens
355 * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus
356 * memory capacity of 100ms has been preallocated for internal input and output
357 * buffering.
359 class AudioResampler final {
360 public:
361 AudioResampler(uint32_t aInRate, uint32_t aOutRate, uint32_t aPreBufferFrames,
362 const PrincipalHandle& aPrincipalHandle);
365 * Append input data into the resampler internal buffer. Copy/move of the
366 * memory is taking place. Also, the channel count will change according to
367 * the channel count of the chunks.
369 void AppendInput(const AudioSegment& aInSegment);
371 * Get the number of frames that can be read from the internal input buffer
372 * before it becomes empty.
374 uint32_t InputReadableFrames() const;
376 * Get the number of frames that can be written to the internal input buffer
377 * before it becomes full.
379 uint32_t InputWritableFrames() const;
382 * Reguest `aOutFrames` of audio in the output sample rate. The internal
383 * buffered input is used. If there is no enough input for that amount of
384 * output and empty AudioSegment is returned
386 AudioSegment Resample(uint32_t aOutFrames);
389 * Updates the output rate that will be used by the resampler.
391 void UpdateOutRate(uint32_t aOutRate) {
392 Update(aOutRate, mResampler.GetChannels());
395 private:
396 void UpdateChannels(uint32_t aChannels) {
397 Update(mResampler.GetOutRate(), aChannels);
399 void Update(uint32_t aOutRate, uint32_t aChannels);
401 private:
402 DynamicResampler mResampler;
403 AudioChunkList mOutputChunks;
404 bool mIsSampleFormatSet = false;
407 } // namespace mozilla
409 #endif // MOZILLA_DYNAMIC_RESAMPLER_H_