Bug 1856663 - Add more chunks for Android mochitest-plain. r=jmaher,taskgraph-reviewe...
[gecko.git] / dom / media / AudioConverter.cpp
blob1f5860804318e43641c046c5ed4a242f107cc718
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "AudioConverter.h"
8 #include <speex/speex_resampler.h>
9 #include <string.h>
10 #include <cmath>
13 * Parts derived from MythTV AudioConvert Class
14 * Created by Jean-Yves Avenard.
16 * Copyright (C) Bubblestuff Pty Ltd 2013
17 * Copyright (C) foobum@gmail.com 2010
20 namespace mozilla {
22 AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
23 : mIn(aIn), mOut(aOut), mResampler(nullptr) {
24 MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut),
25 "The conversion is not supported");
26 mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap);
27 if (aIn.Rate() != aOut.Rate()) {
28 RecreateResampler();
32 AudioConverter::~AudioConverter() {
33 if (mResampler) {
34 speex_resampler_destroy(mResampler);
35 mResampler = nullptr;
39 bool AudioConverter::CanConvert(const AudioConfig& aIn,
40 const AudioConfig& aOut) {
41 if (aIn.Format() != aOut.Format() ||
42 aIn.Interleaved() != aOut.Interleaved()) {
43 NS_WARNING("No format conversion is supported at this stage");
44 return false;
46 if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) {
47 NS_WARNING(
48 "Only down/upmixing to mono or stereo is supported at this stage");
49 return false;
51 if (!aOut.Interleaved()) {
52 NS_WARNING("planar audio format not supported");
53 return false;
55 return true;
58 bool AudioConverter::CanWorkInPlace() const {
59 bool needDownmix = mIn.Channels() > mOut.Channels();
60 bool needUpmix = mIn.Channels() < mOut.Channels();
61 bool canDownmixInPlace =
62 mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
63 mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
64 bool needResample = mIn.Rate() != mOut.Rate();
65 bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
66 // We should be able to work in place if 1s of audio input takes less space
67 // than 1s of audio output. However, as we downmix before resampling we can't
68 // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
69 return !needUpmix && (!needDownmix || canDownmixInPlace) &&
70 (!needResample || canResampleInPlace);
73 size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn,
74 size_t aFrames) {
75 if (!aFrames) {
76 return 0;
78 if (mIn.Channels() > mOut.Channels()) {
79 return DownmixAudio(aOut, aIn, aFrames);
80 } else if (mIn.Channels() < mOut.Channels()) {
81 return UpmixAudio(aOut, aIn, aFrames);
82 } else if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) {
83 ReOrderInterleavedChannels(aOut, aIn, aFrames);
84 } else if (aIn != aOut) {
85 memmove(aOut, aIn, FramesOutToBytes(aFrames));
87 return aFrames;
90 // Reorder interleaved channels.
91 // Can work in place (e.g aOut == aIn).
92 template <class AudioDataType>
93 void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn,
94 uint32_t aFrames, uint32_t aChannels,
95 const uint8_t* aChannelOrderMap) {
96 MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS);
97 AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS];
98 for (uint32_t i = 0; i < aFrames; i++) {
99 for (uint32_t j = 0; j < aChannels; j++) {
100 val[j] = aIn[aChannelOrderMap[j]];
102 for (uint32_t j = 0; j < aChannels; j++) {
103 aOut[j] = val[j];
105 aOut += aChannels;
106 aIn += aChannels;
110 void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn,
111 size_t aFrames) const {
112 MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels());
113 MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio());
115 if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 ||
116 mOut.Layout() == mIn.Layout()) {
117 // If channel count is 1, planar and non-planar formats are the same or
118 // there's nothing to reorder, or if we don't know how to re-order.
119 if (aOut != aIn) {
120 memmove(aOut, aIn, FramesOutToBytes(aFrames));
122 return;
125 uint32_t bits = AudioConfig::FormatToBits(mOut.Format());
126 switch (bits) {
127 case 8:
128 _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames,
129 mIn.Channels(), mChannelOrderMap.Elements());
130 break;
131 case 16:
132 _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames,
133 mIn.Channels(), mChannelOrderMap.Elements());
134 break;
135 default:
136 MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4);
137 _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames,
138 mIn.Channels(), mChannelOrderMap.Elements());
139 break;
143 static inline int16_t clipTo15(int32_t aX) {
144 return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767;
147 template <typename TYPE>
148 static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn,
149 int32_t aInChannels, int32_t aFrames) {
150 if (aIn == aOut) {
151 return;
153 int32_t commonChannels = std::min(aInChannels, aOutChannels);
155 for (int32_t i = 0; i < aFrames; i++) {
156 for (int32_t j = 0; j < commonChannels; j++) {
157 aOut[i * aOutChannels + j] = aIn[i * aInChannels + j];
159 if (aOutChannels > aInChannels) {
160 for (int32_t j = 0; j < aInChannels - aOutChannels; j++) {
161 aOut[i * aOutChannels + j] = 0;
167 size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn,
168 size_t aFrames) const {
169 MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
170 mIn.Format() == AudioConfig::FORMAT_FLT);
171 MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels());
172 MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) ||
173 mOut.Layout() == AudioConfig::ChannelLayout(1));
175 uint32_t inChannels = mIn.Channels();
176 uint32_t outChannels = mOut.Channels();
178 if (inChannels == outChannels) {
179 if (aOut != aIn) {
180 memmove(aOut, aIn, FramesOutToBytes(aFrames));
182 return aFrames;
185 if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) {
186 // Dumb copy dropping extra channels.
187 if (mIn.Format() == AudioConfig::FORMAT_FLT) {
188 dumbUpDownMix(static_cast<float*>(aOut), outChannels,
189 static_cast<const float*>(aIn), inChannels, aFrames);
190 } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
191 dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels,
192 static_cast<const int16_t*>(aIn), inChannels, aFrames);
193 } else {
194 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
196 return aFrames;
199 MOZ_ASSERT(
200 mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()),
201 "Can only downmix input data in SMPTE layout");
202 if (inChannels > 2) {
203 if (mIn.Format() == AudioConfig::FORMAT_FLT) {
204 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
205 // 5-8.
206 static const float dmatrix[6][8][2] = {
207 /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}},
208 /*4*/
209 {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}},
210 /*5*/
211 {{0.6510f, 0},
212 {0, 0.6510f},
213 {0.4600f, 0.4600f},
214 {0.5636f, 0.3254f},
215 {0.3254f, 0.5636f}},
216 /*6*/
217 {{0.5290f, 0},
218 {0, 0.5290f},
219 {0.3741f, 0.3741f},
220 {0.3741f, 0.3741f},
221 {0.4582f, 0.2645f},
222 {0.2645f, 0.4582f}},
223 /*7*/
224 {{0.4553f, 0},
225 {0, 0.4553f},
226 {0.3220f, 0.3220f},
227 {0.3220f, 0.3220f},
228 {0.2788f, 0.2788f},
229 {0.3943f, 0.2277f},
230 {0.2277f, 0.3943f}},
231 /*8*/
232 {{0.3886f, 0},
233 {0, 0.3886f},
234 {0.2748f, 0.2748f},
235 {0.2748f, 0.2748f},
236 {0.3366f, 0.1943f},
237 {0.1943f, 0.3366f},
238 {0.3366f, 0.1943f},
239 {0.1943f, 0.3366f}},
241 // Re-write the buffer with downmixed data
242 const float* in = static_cast<const float*>(aIn);
243 float* out = static_cast<float*>(aOut);
244 for (uint32_t i = 0; i < aFrames; i++) {
245 float sampL = 0.0;
246 float sampR = 0.0;
247 for (uint32_t j = 0; j < inChannels; j++) {
248 sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
249 sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
251 if (outChannels == 2) {
252 *out++ = sampL;
253 *out++ = sampR;
254 } else {
255 *out++ = (sampL + sampR) * 0.5;
258 } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
259 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
260 // 5-8. Coefficients in Q14.
261 static const int16_t dmatrix[6][8][2] = {
262 /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}},
263 /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}},
264 /*5*/
265 {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}},
266 /*6*/
267 {{8668, 0},
268 {0, 8668},
269 {6129, 6129},
270 {6129, 6129},
271 {7507, 4335},
272 {4335, 7507}},
273 /*7*/
274 {{7459, 0},
275 {0, 7459},
276 {5275, 5275},
277 {5275, 5275},
278 {4568, 4568},
279 {6460, 3731},
280 {3731, 6460}},
281 /*8*/
282 {{6368, 0},
283 {0, 6368},
284 {4502, 4502},
285 {4502, 4502},
286 {5514, 3184},
287 {3184, 5514},
288 {5514, 3184},
289 {3184, 5514}}};
290 // Re-write the buffer with downmixed data
291 const int16_t* in = static_cast<const int16_t*>(aIn);
292 int16_t* out = static_cast<int16_t*>(aOut);
293 for (uint32_t i = 0; i < aFrames; i++) {
294 int32_t sampL = 0;
295 int32_t sampR = 0;
296 for (uint32_t j = 0; j < inChannels; j++) {
297 sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
298 sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
300 sampL = clipTo15((sampL + 8192) >> 14);
301 sampR = clipTo15((sampR + 8192) >> 14);
302 if (outChannels == 2) {
303 *out++ = sampL;
304 *out++ = sampR;
305 } else {
306 *out++ = (sampL + sampR) * 0.5;
309 } else {
310 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
312 return aFrames;
315 MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1);
316 if (mIn.Format() == AudioConfig::FORMAT_FLT) {
317 const float* in = static_cast<const float*>(aIn);
318 float* out = static_cast<float*>(aOut);
319 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
320 float sample = 0.0;
321 // The sample of the buffer would be interleaved.
322 sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
323 *out++ = sample;
325 } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
326 const int16_t* in = static_cast<const int16_t*>(aIn);
327 int16_t* out = static_cast<int16_t*>(aOut);
328 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
329 int32_t sample = 0.0;
330 // The sample of the buffer would be interleaved.
331 sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
332 *out++ = sample;
334 } else {
335 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
337 return aFrames;
340 size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn,
341 size_t aFrames) {
342 if (!mResampler) {
343 return 0;
345 uint32_t outframes = ResampleRecipientFrames(aFrames);
346 uint32_t inframes = aFrames;
348 int error;
349 if (mOut.Format() == AudioConfig::FORMAT_FLT) {
350 const float* in = reinterpret_cast<const float*>(aIn);
351 float* out = reinterpret_cast<float*>(aOut);
352 error = speex_resampler_process_interleaved_float(mResampler, in, &inframes,
353 out, &outframes);
354 } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
355 const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
356 int16_t* out = reinterpret_cast<int16_t*>(aOut);
357 error = speex_resampler_process_interleaved_int(mResampler, in, &inframes,
358 out, &outframes);
359 } else {
360 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
361 error = RESAMPLER_ERR_ALLOC_FAILED;
363 MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS);
364 if (error != RESAMPLER_ERR_SUCCESS) {
365 speex_resampler_destroy(mResampler);
366 mResampler = nullptr;
367 return 0;
369 MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
370 return outframes;
373 void AudioConverter::RecreateResampler() {
374 if (mResampler) {
375 speex_resampler_destroy(mResampler);
377 int error;
378 mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(),
379 SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
381 if (error == RESAMPLER_ERR_SUCCESS) {
382 speex_resampler_skip_zeros(mResampler);
383 } else {
384 NS_WARNING("Failed to initialize resampler.");
385 mResampler = nullptr;
389 size_t AudioConverter::DrainResampler(void* aOut) {
390 if (!mResampler) {
391 return 0;
393 int frames = speex_resampler_get_input_latency(mResampler);
394 AlignedByteBuffer buffer(FramesOutToBytes(frames));
395 if (!buffer) {
396 // OOM
397 return 0;
399 frames = ResampleAudio(aOut, buffer.Data(), frames);
400 // Tore down the resampler as it's easier than handling follow-up.
401 RecreateResampler();
402 return frames;
405 size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn,
406 size_t aFrames) const {
407 MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
408 mIn.Format() == AudioConfig::FORMAT_FLT);
409 MOZ_ASSERT(mIn.Channels() < mOut.Channels());
410 MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
411 MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
413 if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() ||
414 mOut.Channels() != 2) {
415 // Dumb copy the channels and insert silence for the extra channels.
416 if (mIn.Format() == AudioConfig::FORMAT_FLT) {
417 dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(),
418 static_cast<const float*>(aIn), mIn.Channels(), aFrames);
419 } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
420 dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(),
421 static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames);
422 } else {
423 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
425 return aFrames;
428 // Upmix mono to stereo.
429 // This is a very dumb mono to stereo upmixing, power levels are preserved
430 // following the calculation: left = right = -3dB*mono.
431 if (mIn.Format() == AudioConfig::FORMAT_FLT) {
432 const float m3db = std::sqrt(0.5); // -3dB = sqrt(1/2)
433 const float* in = static_cast<const float*>(aIn);
434 float* out = static_cast<float*>(aOut);
435 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
436 float sample = in[fIdx] * m3db;
437 // The samples of the buffer would be interleaved.
438 *out++ = sample;
439 *out++ = sample;
441 } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
442 const int16_t* in = static_cast<const int16_t*>(aIn);
443 int16_t* out = static_cast<int16_t*>(aOut);
444 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
445 int16_t sample =
446 ((int32_t)in[fIdx] * 11585) >> 14; // close enough to i*sqrt(0.5)
447 // The samples of the buffer would be interleaved.
448 *out++ = sample;
449 *out++ = sample;
451 } else {
452 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
455 return aFrames;
458 size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const {
459 if (!aFrames && mIn.Rate() != mOut.Rate()) {
460 if (!mResampler) {
461 return 0;
463 // We drain by pushing in get_input_latency() samples of 0
464 aFrames = speex_resampler_get_input_latency(mResampler);
466 return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
469 size_t AudioConverter::FramesOutToSamples(size_t aFrames) const {
470 return aFrames * mOut.Channels();
473 size_t AudioConverter::SamplesInToFrames(size_t aSamples) const {
474 return aSamples / mIn.Channels();
477 size_t AudioConverter::FramesOutToBytes(size_t aFrames) const {
478 return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format());
480 } // namespace mozilla