1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "AudioConverter.h"
8 #include <speex/speex_resampler.h>
13 * Parts derived from MythTV AudioConvert Class
14 * Created by Jean-Yves Avenard.
16 * Copyright (C) Bubblestuff Pty Ltd 2013
17 * Copyright (C) foobum@gmail.com 2010
22 AudioConverter::AudioConverter(const AudioConfig
& aIn
, const AudioConfig
& aOut
)
23 : mIn(aIn
), mOut(aOut
), mResampler(nullptr) {
24 MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn
, aOut
),
25 "The conversion is not supported");
26 mIn
.Layout().MappingTable(mOut
.Layout(), &mChannelOrderMap
);
27 if (aIn
.Rate() != aOut
.Rate()) {
32 AudioConverter::~AudioConverter() {
34 speex_resampler_destroy(mResampler
);
39 bool AudioConverter::CanConvert(const AudioConfig
& aIn
,
40 const AudioConfig
& aOut
) {
41 if (aIn
.Format() != aOut
.Format() ||
42 aIn
.Interleaved() != aOut
.Interleaved()) {
43 NS_WARNING("No format conversion is supported at this stage");
46 if (aIn
.Channels() != aOut
.Channels() && aOut
.Channels() > 2) {
48 "Only down/upmixing to mono or stereo is supported at this stage");
51 if (!aOut
.Interleaved()) {
52 NS_WARNING("planar audio format not supported");
58 bool AudioConverter::CanWorkInPlace() const {
59 bool needDownmix
= mIn
.Channels() > mOut
.Channels();
60 bool needUpmix
= mIn
.Channels() < mOut
.Channels();
61 bool canDownmixInPlace
=
62 mIn
.Channels() * AudioConfig::SampleSize(mIn
.Format()) >=
63 mOut
.Channels() * AudioConfig::SampleSize(mOut
.Format());
64 bool needResample
= mIn
.Rate() != mOut
.Rate();
65 bool canResampleInPlace
= mIn
.Rate() >= mOut
.Rate();
66 // We should be able to work in place if 1s of audio input takes less space
67 // than 1s of audio output. However, as we downmix before resampling we can't
68 // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
69 return !needUpmix
&& (!needDownmix
|| canDownmixInPlace
) &&
70 (!needResample
|| canResampleInPlace
);
73 size_t AudioConverter::ProcessInternal(void* aOut
, const void* aIn
,
78 if (mIn
.Channels() > mOut
.Channels()) {
79 return DownmixAudio(aOut
, aIn
, aFrames
);
80 } else if (mIn
.Channels() < mOut
.Channels()) {
81 return UpmixAudio(aOut
, aIn
, aFrames
);
82 } else if (mIn
.Layout() != mOut
.Layout() && CanReorderAudio()) {
83 ReOrderInterleavedChannels(aOut
, aIn
, aFrames
);
84 } else if (aIn
!= aOut
) {
85 memmove(aOut
, aIn
, FramesOutToBytes(aFrames
));
90 // Reorder interleaved channels.
91 // Can work in place (e.g aOut == aIn).
92 template <class AudioDataType
>
93 void _ReOrderInterleavedChannels(AudioDataType
* aOut
, const AudioDataType
* aIn
,
94 uint32_t aFrames
, uint32_t aChannels
,
95 const uint8_t* aChannelOrderMap
) {
96 MOZ_DIAGNOSTIC_ASSERT(aChannels
<= AudioConfig::ChannelLayout::MAX_CHANNELS
);
97 AudioDataType val
[AudioConfig::ChannelLayout::MAX_CHANNELS
];
98 for (uint32_t i
= 0; i
< aFrames
; i
++) {
99 for (uint32_t j
= 0; j
< aChannels
; j
++) {
100 val
[j
] = aIn
[aChannelOrderMap
[j
]];
102 for (uint32_t j
= 0; j
< aChannels
; j
++) {
110 void AudioConverter::ReOrderInterleavedChannels(void* aOut
, const void* aIn
,
111 size_t aFrames
) const {
112 MOZ_DIAGNOSTIC_ASSERT(mIn
.Channels() == mOut
.Channels());
113 MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio());
115 if (mChannelOrderMap
.IsEmpty() || mOut
.Channels() == 1 ||
116 mOut
.Layout() == mIn
.Layout()) {
117 // If channel count is 1, planar and non-planar formats are the same or
118 // there's nothing to reorder, or if we don't know how to re-order.
120 memmove(aOut
, aIn
, FramesOutToBytes(aFrames
));
125 uint32_t bits
= AudioConfig::FormatToBits(mOut
.Format());
128 _ReOrderInterleavedChannels((uint8_t*)aOut
, (const uint8_t*)aIn
, aFrames
,
129 mIn
.Channels(), mChannelOrderMap
.Elements());
132 _ReOrderInterleavedChannels((int16_t*)aOut
, (const int16_t*)aIn
, aFrames
,
133 mIn
.Channels(), mChannelOrderMap
.Elements());
136 MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut
.Format()) == 4);
137 _ReOrderInterleavedChannels((int32_t*)aOut
, (const int32_t*)aIn
, aFrames
,
138 mIn
.Channels(), mChannelOrderMap
.Elements());
143 static inline int16_t clipTo15(int32_t aX
) {
144 return aX
< -32768 ? -32768 : aX
<= 32767 ? aX
: 32767;
147 template <typename TYPE
>
148 static void dumbUpDownMix(TYPE
* aOut
, int32_t aOutChannels
, const TYPE
* aIn
,
149 int32_t aInChannels
, int32_t aFrames
) {
153 int32_t commonChannels
= std::min(aInChannels
, aOutChannels
);
155 for (int32_t i
= 0; i
< aFrames
; i
++) {
156 for (int32_t j
= 0; j
< commonChannels
; j
++) {
157 aOut
[i
* aOutChannels
+ j
] = aIn
[i
* aInChannels
+ j
];
159 if (aOutChannels
> aInChannels
) {
160 for (int32_t j
= 0; j
< aInChannels
- aOutChannels
; j
++) {
161 aOut
[i
* aOutChannels
+ j
] = 0;
167 size_t AudioConverter::DownmixAudio(void* aOut
, const void* aIn
,
168 size_t aFrames
) const {
169 MOZ_DIAGNOSTIC_ASSERT(mIn
.Format() == AudioConfig::FORMAT_S16
||
170 mIn
.Format() == AudioConfig::FORMAT_FLT
);
171 MOZ_DIAGNOSTIC_ASSERT(mIn
.Channels() >= mOut
.Channels());
172 MOZ_DIAGNOSTIC_ASSERT(mOut
.Layout() == AudioConfig::ChannelLayout(2) ||
173 mOut
.Layout() == AudioConfig::ChannelLayout(1));
175 uint32_t inChannels
= mIn
.Channels();
176 uint32_t outChannels
= mOut
.Channels();
178 if (inChannels
== outChannels
) {
180 memmove(aOut
, aIn
, FramesOutToBytes(aFrames
));
185 if (!mIn
.Layout().IsValid() || !mOut
.Layout().IsValid()) {
186 // Dumb copy dropping extra channels.
187 if (mIn
.Format() == AudioConfig::FORMAT_FLT
) {
188 dumbUpDownMix(static_cast<float*>(aOut
), outChannels
,
189 static_cast<const float*>(aIn
), inChannels
, aFrames
);
190 } else if (mIn
.Format() == AudioConfig::FORMAT_S16
) {
191 dumbUpDownMix(static_cast<int16_t*>(aOut
), outChannels
,
192 static_cast<const int16_t*>(aIn
), inChannels
, aFrames
);
194 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
200 mIn
.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn
.Layout()),
201 "Can only downmix input data in SMPTE layout");
202 if (inChannels
> 2) {
203 if (mIn
.Format() == AudioConfig::FORMAT_FLT
) {
204 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
206 static const float dmatrix
[6][8][2] = {
207 /*3*/ {{0.5858f
, 0}, {0, 0.5858f
}, {0.4142f
, 0.4142f
}},
209 {{0.4226f
, 0}, {0, 0.4226f
}, {0.366f
, 0.2114f
}, {0.2114f
, 0.366f
}},
241 // Re-write the buffer with downmixed data
242 const float* in
= static_cast<const float*>(aIn
);
243 float* out
= static_cast<float*>(aOut
);
244 for (uint32_t i
= 0; i
< aFrames
; i
++) {
247 for (uint32_t j
= 0; j
< inChannels
; j
++) {
248 sampL
+= in
[i
* inChannels
+ j
] * dmatrix
[inChannels
- 3][j
][0];
249 sampR
+= in
[i
* inChannels
+ j
] * dmatrix
[inChannels
- 3][j
][1];
251 if (outChannels
== 2) {
255 *out
++ = (sampL
+ sampR
) * 0.5;
258 } else if (mIn
.Format() == AudioConfig::FORMAT_S16
) {
259 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
260 // 5-8. Coefficients in Q14.
261 static const int16_t dmatrix
[6][8][2] = {
262 /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}},
263 /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}},
265 {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}},
290 // Re-write the buffer with downmixed data
291 const int16_t* in
= static_cast<const int16_t*>(aIn
);
292 int16_t* out
= static_cast<int16_t*>(aOut
);
293 for (uint32_t i
= 0; i
< aFrames
; i
++) {
296 for (uint32_t j
= 0; j
< inChannels
; j
++) {
297 sampL
+= in
[i
* inChannels
+ j
] * dmatrix
[inChannels
- 3][j
][0];
298 sampR
+= in
[i
* inChannels
+ j
] * dmatrix
[inChannels
- 3][j
][1];
300 sampL
= clipTo15((sampL
+ 8192) >> 14);
301 sampR
= clipTo15((sampR
+ 8192) >> 14);
302 if (outChannels
== 2) {
306 *out
++ = (sampL
+ sampR
) * 0.5;
310 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
315 MOZ_DIAGNOSTIC_ASSERT(inChannels
== 2 && outChannels
== 1);
316 if (mIn
.Format() == AudioConfig::FORMAT_FLT
) {
317 const float* in
= static_cast<const float*>(aIn
);
318 float* out
= static_cast<float*>(aOut
);
319 for (size_t fIdx
= 0; fIdx
< aFrames
; ++fIdx
) {
321 // The sample of the buffer would be interleaved.
322 sample
= (in
[fIdx
* inChannels
] + in
[fIdx
* inChannels
+ 1]) * 0.5;
325 } else if (mIn
.Format() == AudioConfig::FORMAT_S16
) {
326 const int16_t* in
= static_cast<const int16_t*>(aIn
);
327 int16_t* out
= static_cast<int16_t*>(aOut
);
328 for (size_t fIdx
= 0; fIdx
< aFrames
; ++fIdx
) {
329 int32_t sample
= 0.0;
330 // The sample of the buffer would be interleaved.
331 sample
= (in
[fIdx
* inChannels
] + in
[fIdx
* inChannels
+ 1]) * 0.5;
335 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
340 size_t AudioConverter::ResampleAudio(void* aOut
, const void* aIn
,
345 uint32_t outframes
= ResampleRecipientFrames(aFrames
);
346 uint32_t inframes
= aFrames
;
349 if (mOut
.Format() == AudioConfig::FORMAT_FLT
) {
350 const float* in
= reinterpret_cast<const float*>(aIn
);
351 float* out
= reinterpret_cast<float*>(aOut
);
352 error
= speex_resampler_process_interleaved_float(mResampler
, in
, &inframes
,
354 } else if (mOut
.Format() == AudioConfig::FORMAT_S16
) {
355 const int16_t* in
= reinterpret_cast<const int16_t*>(aIn
);
356 int16_t* out
= reinterpret_cast<int16_t*>(aOut
);
357 error
= speex_resampler_process_interleaved_int(mResampler
, in
, &inframes
,
360 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
361 error
= RESAMPLER_ERR_ALLOC_FAILED
;
363 MOZ_ASSERT(error
== RESAMPLER_ERR_SUCCESS
);
364 if (error
!= RESAMPLER_ERR_SUCCESS
) {
365 speex_resampler_destroy(mResampler
);
366 mResampler
= nullptr;
369 MOZ_ASSERT(inframes
== aFrames
, "Some frames will be dropped");
373 void AudioConverter::RecreateResampler() {
375 speex_resampler_destroy(mResampler
);
378 mResampler
= speex_resampler_init(mOut
.Channels(), mIn
.Rate(), mOut
.Rate(),
379 SPEEX_RESAMPLER_QUALITY_DEFAULT
, &error
);
381 if (error
== RESAMPLER_ERR_SUCCESS
) {
382 speex_resampler_skip_zeros(mResampler
);
384 NS_WARNING("Failed to initialize resampler.");
385 mResampler
= nullptr;
389 size_t AudioConverter::DrainResampler(void* aOut
) {
393 int frames
= speex_resampler_get_input_latency(mResampler
);
394 AlignedByteBuffer
buffer(FramesOutToBytes(frames
));
399 frames
= ResampleAudio(aOut
, buffer
.Data(), frames
);
400 // Tore down the resampler as it's easier than handling follow-up.
405 size_t AudioConverter::UpmixAudio(void* aOut
, const void* aIn
,
406 size_t aFrames
) const {
407 MOZ_ASSERT(mIn
.Format() == AudioConfig::FORMAT_S16
||
408 mIn
.Format() == AudioConfig::FORMAT_FLT
);
409 MOZ_ASSERT(mIn
.Channels() < mOut
.Channels());
410 MOZ_ASSERT(mIn
.Channels() == 1, "Can only upmix mono for now");
411 MOZ_ASSERT(mOut
.Channels() == 2, "Can only upmix to stereo for now");
413 if (!mIn
.Layout().IsValid() || !mOut
.Layout().IsValid() ||
414 mOut
.Channels() != 2) {
415 // Dumb copy the channels and insert silence for the extra channels.
416 if (mIn
.Format() == AudioConfig::FORMAT_FLT
) {
417 dumbUpDownMix(static_cast<float*>(aOut
), mOut
.Channels(),
418 static_cast<const float*>(aIn
), mIn
.Channels(), aFrames
);
419 } else if (mIn
.Format() == AudioConfig::FORMAT_S16
) {
420 dumbUpDownMix(static_cast<int16_t*>(aOut
), mOut
.Channels(),
421 static_cast<const int16_t*>(aIn
), mIn
.Channels(), aFrames
);
423 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
428 // Upmix mono to stereo.
429 // This is a very dumb mono to stereo upmixing, power levels are preserved
430 // following the calculation: left = right = -3dB*mono.
431 if (mIn
.Format() == AudioConfig::FORMAT_FLT
) {
432 const float m3db
= std::sqrt(0.5); // -3dB = sqrt(1/2)
433 const float* in
= static_cast<const float*>(aIn
);
434 float* out
= static_cast<float*>(aOut
);
435 for (size_t fIdx
= 0; fIdx
< aFrames
; ++fIdx
) {
436 float sample
= in
[fIdx
] * m3db
;
437 // The samples of the buffer would be interleaved.
441 } else if (mIn
.Format() == AudioConfig::FORMAT_S16
) {
442 const int16_t* in
= static_cast<const int16_t*>(aIn
);
443 int16_t* out
= static_cast<int16_t*>(aOut
);
444 for (size_t fIdx
= 0; fIdx
< aFrames
; ++fIdx
) {
446 ((int32_t)in
[fIdx
] * 11585) >> 14; // close enough to i*sqrt(0.5)
447 // The samples of the buffer would be interleaved.
452 MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
458 size_t AudioConverter::ResampleRecipientFrames(size_t aFrames
) const {
459 if (!aFrames
&& mIn
.Rate() != mOut
.Rate()) {
463 // We drain by pushing in get_input_latency() samples of 0
464 aFrames
= speex_resampler_get_input_latency(mResampler
);
466 return (uint64_t)aFrames
* mOut
.Rate() / mIn
.Rate() + 1;
469 size_t AudioConverter::FramesOutToSamples(size_t aFrames
) const {
470 return aFrames
* mOut
.Channels();
473 size_t AudioConverter::SamplesInToFrames(size_t aSamples
) const {
474 return aSamples
/ mIn
.Channels();
477 size_t AudioConverter::FramesOutToBytes(size_t aFrames
) const {
478 return FramesOutToSamples(aFrames
) * AudioConfig::SampleSize(mOut
.Format());
480 } // namespace mozilla