dom/media/AudioConverter.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "AudioConverter.h"
   8 #include <speex/speex_resampler.h>
   9 #include <string.h>
  10 #include <cmath>
  11
  12 /*
  13  *  Parts derived from MythTV AudioConvert Class
  14  *  Created by Jean-Yves Avenard.
  15  *
  16  *  Copyright (C) Bubblestuff Pty Ltd 2013
  17  *  Copyright (C) foobum@gmail.com 2010
  18  */
  19
  20 namespace mozilla {
  21
  22 AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
  23     : mIn(aIn), mOut(aOut), mResampler(nullptr) {
  24   MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut),
  25                         "The conversion is not supported");
  26   mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap);
  27   if (aIn.Rate() != aOut.Rate()) {
  28     RecreateResampler();
  29   }
  30 }
  31
  32 AudioConverter::~AudioConverter() {
  33   if (mResampler) {
  34     speex_resampler_destroy(mResampler);
  35     mResampler = nullptr;
  36   }
  37 }
  38
  39 bool AudioConverter::CanConvert(const AudioConfig& aIn,
  40                                 const AudioConfig& aOut) {
  41   if (aIn.Format() != aOut.Format() ||
  42       aIn.Interleaved() != aOut.Interleaved()) {
  43     NS_WARNING("No format conversion is supported at this stage");
  44     return false;
  45   }
  46   if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) {
  47     NS_WARNING(
  48         "Only down/upmixing to mono or stereo is supported at this stage");
  49     return false;
  50   }
  51   if (!aOut.Interleaved()) {
  52     NS_WARNING("planar audio format not supported");
  53     return false;
  54   }
  55   return true;
  56 }
  57
  58 bool AudioConverter::CanWorkInPlace() const {
  59   bool needDownmix = mIn.Channels() > mOut.Channels();
  60   bool needUpmix = mIn.Channels() < mOut.Channels();
  61   bool canDownmixInPlace =
  62       mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
  63       mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
  64   bool needResample = mIn.Rate() != mOut.Rate();
  65   bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
  66   // We should be able to work in place if 1s of audio input takes less space
  67   // than 1s of audio output. However, as we downmix before resampling we can't
  68   // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
  69   return !needUpmix && (!needDownmix || canDownmixInPlace) &&
  70          (!needResample || canResampleInPlace);
  71 }
  72
  73 size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn,
  74                                        size_t aFrames) {
  75   if (!aFrames) {
  76     return 0;
  77   }
  78   if (mIn.Channels() > mOut.Channels()) {
  79     return DownmixAudio(aOut, aIn, aFrames);
  80   } else if (mIn.Channels() < mOut.Channels()) {
  81     return UpmixAudio(aOut, aIn, aFrames);
  82   } else if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) {
  83     ReOrderInterleavedChannels(aOut, aIn, aFrames);
  84   } else if (aIn != aOut) {
  85     memmove(aOut, aIn, FramesOutToBytes(aFrames));
  86   }
  87   return aFrames;
  88 }
  89
  90 // Reorder interleaved channels.
  91 // Can work in place (e.g aOut == aIn).
  92 template <class AudioDataType>
  93 void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn,
  94                                  uint32_t aFrames, uint32_t aChannels,
  95                                  const uint8_t* aChannelOrderMap) {
  96   MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS);
  97   AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS];
  98   for (uint32_t i = 0; i < aFrames; i++) {
  99     for (uint32_t j = 0; j < aChannels; j++) {
 100       val[j] = aIn[aChannelOrderMap[j]];
 101     }
 102     for (uint32_t j = 0; j < aChannels; j++) {
 103       aOut[j] = val[j];
 104     }
 105     aOut += aChannels;
 106     aIn += aChannels;
 107   }
 108 }
 109
 110 void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn,
 111                                                 size_t aFrames) const {
 112   MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels());
 113   MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio());
 114
 115   if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 ||
 116       mOut.Layout() == mIn.Layout()) {
 117     // If channel count is 1, planar and non-planar formats are the same or
 118     // there's nothing to reorder, or if we don't know how to re-order.
 119     if (aOut != aIn) {
 120       memmove(aOut, aIn, FramesOutToBytes(aFrames));
 121     }
 122     return;
 123   }
 124
 125   uint32_t bits = AudioConfig::FormatToBits(mOut.Format());
 126   switch (bits) {
 127     case 8:
 128       _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames,
 129                                   mIn.Channels(), mChannelOrderMap.Elements());
 130       break;
 131     case 16:
 132       _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames,
 133                                   mIn.Channels(), mChannelOrderMap.Elements());
 134       break;
 135     default:
 136       MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4);
 137       _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames,
 138                                   mIn.Channels(), mChannelOrderMap.Elements());
 139       break;
 140   }
 141 }
 142
 143 static inline int16_t clipTo15(int32_t aX) {
 144   return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767;
 145 }
 146
 147 template <typename TYPE>
 148 static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn,
 149                           int32_t aInChannels, int32_t aFrames) {
 150   if (aIn == aOut) {
 151     return;
 152   }
 153   int32_t commonChannels = std::min(aInChannels, aOutChannels);
 154
 155   for (int32_t i = 0; i < aFrames; i++) {
 156     for (int32_t j = 0; j < commonChannels; j++) {
 157       aOut[i * aOutChannels + j] = aIn[i * aInChannels + j];
 158     }
 159     if (aOutChannels > aInChannels) {
 160       for (int32_t j = 0; j < aInChannels - aOutChannels; j++) {
 161         aOut[i * aOutChannels + j] = 0;
 162       }
 163     }
 164   }
 165 }
 166
 167 size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn,
 168                                     size_t aFrames) const {
 169   MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
 170                         mIn.Format() == AudioConfig::FORMAT_FLT);
 171   MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels());
 172   MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) ||
 173                         mOut.Layout() == AudioConfig::ChannelLayout(1));
 174
 175   uint32_t inChannels = mIn.Channels();
 176   uint32_t outChannels = mOut.Channels();
 177
 178   if (inChannels == outChannels) {
 179     if (aOut != aIn) {
 180       memmove(aOut, aIn, FramesOutToBytes(aFrames));
 181     }
 182     return aFrames;
 183   }
 184
 185   if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) {
 186     // Dumb copy dropping extra channels.
 187     if (mIn.Format() == AudioConfig::FORMAT_FLT) {
 188       dumbUpDownMix(static_cast<float*>(aOut), outChannels,
 189                     static_cast<const float*>(aIn), inChannels, aFrames);
 190     } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
 191       dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels,
 192                     static_cast<const int16_t*>(aIn), inChannels, aFrames);
 193     } else {
 194       MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 195     }
 196     return aFrames;
 197   }
 198
 199   MOZ_ASSERT(
 200       mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()),
 201       "Can only downmix input data in SMPTE layout");
 202   if (inChannels > 2) {
 203     if (mIn.Format() == AudioConfig::FORMAT_FLT) {
 204       // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
 205       // 5-8.
 206       static const float dmatrix[6][8][2] = {
 207           /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}},
 208           /*4*/
 209           {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}},
 210           /*5*/
 211           {{0.6510f, 0},
 212            {0, 0.6510f},
 213            {0.4600f, 0.4600f},
 214            {0.5636f, 0.3254f},
 215            {0.3254f, 0.5636f}},
 216           /*6*/
 217           {{0.5290f, 0},
 218            {0, 0.5290f},
 219            {0.3741f, 0.3741f},
 220            {0.3741f, 0.3741f},
 221            {0.4582f, 0.2645f},
 222            {0.2645f, 0.4582f}},
 223           /*7*/
 224           {{0.4553f, 0},
 225            {0, 0.4553f},
 226            {0.3220f, 0.3220f},
 227            {0.3220f, 0.3220f},
 228            {0.2788f, 0.2788f},
 229            {0.3943f, 0.2277f},
 230            {0.2277f, 0.3943f}},
 231           /*8*/
 232           {{0.3886f, 0},
 233            {0, 0.3886f},
 234            {0.2748f, 0.2748f},
 235            {0.2748f, 0.2748f},
 236            {0.3366f, 0.1943f},
 237            {0.1943f, 0.3366f},
 238            {0.3366f, 0.1943f},
 239            {0.1943f, 0.3366f}},
 240       };
 241       // Re-write the buffer with downmixed data
 242       const float* in = static_cast<const float*>(aIn);
 243       float* out = static_cast<float*>(aOut);
 244       for (uint32_t i = 0; i < aFrames; i++) {
 245         float sampL = 0.0;
 246         float sampR = 0.0;
 247         for (uint32_t j = 0; j < inChannels; j++) {
 248           sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
 249           sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
 250         }
 251         if (outChannels == 2) {
 252           *out++ = sampL;
 253           *out++ = sampR;
 254         } else {
 255           *out++ = (sampL + sampR) * 0.5;
 256         }
 257       }
 258     } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
 259       // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
 260       // 5-8. Coefficients in Q14.
 261       static const int16_t dmatrix[6][8][2] = {
 262           /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}},
 263           /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}},
 264           /*5*/
 265           {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}},
 266           /*6*/
 267           {{8668, 0},
 268            {0, 8668},
 269            {6129, 6129},
 270            {6129, 6129},
 271            {7507, 4335},
 272            {4335, 7507}},
 273           /*7*/
 274           {{7459, 0},
 275            {0, 7459},
 276            {5275, 5275},
 277            {5275, 5275},
 278            {4568, 4568},
 279            {6460, 3731},
 280            {3731, 6460}},
 281           /*8*/
 282           {{6368, 0},
 283            {0, 6368},
 284            {4502, 4502},
 285            {4502, 4502},
 286            {5514, 3184},
 287            {3184, 5514},
 288            {5514, 3184},
 289            {3184, 5514}}};
 290       // Re-write the buffer with downmixed data
 291       const int16_t* in = static_cast<const int16_t*>(aIn);
 292       int16_t* out = static_cast<int16_t*>(aOut);
 293       for (uint32_t i = 0; i < aFrames; i++) {
 294         int32_t sampL = 0;
 295         int32_t sampR = 0;
 296         for (uint32_t j = 0; j < inChannels; j++) {
 297           sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
 298           sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
 299         }
 300         sampL = clipTo15((sampL + 8192) >> 14);
 301         sampR = clipTo15((sampR + 8192) >> 14);
 302         if (outChannels == 2) {
 303           *out++ = sampL;
 304           *out++ = sampR;
 305         } else {
 306           *out++ = (sampL + sampR) * 0.5;
 307         }
 308       }
 309     } else {
 310       MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 311     }
 312     return aFrames;
 313   }
 314
 315   MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1);
 316   if (mIn.Format() == AudioConfig::FORMAT_FLT) {
 317     const float* in = static_cast<const float*>(aIn);
 318     float* out = static_cast<float*>(aOut);
 319     for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
 320       float sample = 0.0;
 321       // The sample of the buffer would be interleaved.
 322       sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
 323       *out++ = sample;
 324     }
 325   } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
 326     const int16_t* in = static_cast<const int16_t*>(aIn);
 327     int16_t* out = static_cast<int16_t*>(aOut);
 328     for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
 329       int32_t sample = 0.0;
 330       // The sample of the buffer would be interleaved.
 331       sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
 332       *out++ = sample;
 333     }
 334   } else {
 335     MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 336   }
 337   return aFrames;
 338 }
 339
 340 size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn,
 341                                      size_t aFrames) {
 342   if (!mResampler) {
 343     return 0;
 344   }
 345   uint32_t outframes = ResampleRecipientFrames(aFrames);
 346   uint32_t inframes = aFrames;
 347
 348   int error;
 349   if (mOut.Format() == AudioConfig::FORMAT_FLT) {
 350     const float* in = reinterpret_cast<const float*>(aIn);
 351     float* out = reinterpret_cast<float*>(aOut);
 352     error = speex_resampler_process_interleaved_float(mResampler, in, &inframes,
 353                                                       out, &outframes);
 354   } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
 355     const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
 356     int16_t* out = reinterpret_cast<int16_t*>(aOut);
 357     error = speex_resampler_process_interleaved_int(mResampler, in, &inframes,
 358                                                     out, &outframes);
 359   } else {
 360     MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 361     error = RESAMPLER_ERR_ALLOC_FAILED;
 362   }
 363   MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS);
 364   if (error != RESAMPLER_ERR_SUCCESS) {
 365     speex_resampler_destroy(mResampler);
 366     mResampler = nullptr;
 367     return 0;
 368   }
 369   MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
 370   return outframes;
 371 }
 372
 373 void AudioConverter::RecreateResampler() {
 374   if (mResampler) {
 375     speex_resampler_destroy(mResampler);
 376   }
 377   int error;
 378   mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(),
 379                                     SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
 380
 381   if (error == RESAMPLER_ERR_SUCCESS) {
 382     speex_resampler_skip_zeros(mResampler);
 383   } else {
 384     NS_WARNING("Failed to initialize resampler.");
 385     mResampler = nullptr;
 386   }
 387 }
 388
 389 size_t AudioConverter::DrainResampler(void* aOut) {
 390   if (!mResampler) {
 391     return 0;
 392   }
 393   int frames = speex_resampler_get_input_latency(mResampler);
 394   AlignedByteBuffer buffer(FramesOutToBytes(frames));
 395   if (!buffer) {
 396     // OOM
 397     return 0;
 398   }
 399   frames = ResampleAudio(aOut, buffer.Data(), frames);
 400   // Tore down the resampler as it's easier than handling follow-up.
 401   RecreateResampler();
 402   return frames;
 403 }
 404
 405 size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn,
 406                                   size_t aFrames) const {
 407   MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
 408              mIn.Format() == AudioConfig::FORMAT_FLT);
 409   MOZ_ASSERT(mIn.Channels() < mOut.Channels());
 410   MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
 411   MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
 412
 413   if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() ||
 414       mOut.Channels() != 2) {
 415     // Dumb copy the channels and insert silence for the extra channels.
 416     if (mIn.Format() == AudioConfig::FORMAT_FLT) {
 417       dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(),
 418                     static_cast<const float*>(aIn), mIn.Channels(), aFrames);
 419     } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
 420       dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(),
 421                     static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames);
 422     } else {
 423       MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 424     }
 425     return aFrames;
 426   }
 427
 428   // Upmix mono to stereo.
 429   // This is a very dumb mono to stereo upmixing, power levels are preserved
 430   // following the calculation: left = right = -3dB*mono.
 431   if (mIn.Format() == AudioConfig::FORMAT_FLT) {
 432     const float m3db = std::sqrt(0.5);  // -3dB = sqrt(1/2)
 433     const float* in = static_cast<const float*>(aIn);
 434     float* out = static_cast<float*>(aOut);
 435     for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
 436       float sample = in[fIdx] * m3db;
 437       // The samples of the buffer would be interleaved.
 438       *out++ = sample;
 439       *out++ = sample;
 440     }
 441   } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
 442     const int16_t* in = static_cast<const int16_t*>(aIn);
 443     int16_t* out = static_cast<int16_t*>(aOut);
 444     for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
 445       int16_t sample =
 446           ((int32_t)in[fIdx] * 11585) >> 14;  // close enough to i*sqrt(0.5)
 447       // The samples of the buffer would be interleaved.
 448       *out++ = sample;
 449       *out++ = sample;
 450     }
 451   } else {
 452     MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
 453   }
 454
 455   return aFrames;
 456 }
 457
 458 size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const {
 459   if (!aFrames && mIn.Rate() != mOut.Rate()) {
 460     if (!mResampler) {
 461       return 0;
 462     }
 463     // We drain by pushing in get_input_latency() samples of 0
 464     aFrames = speex_resampler_get_input_latency(mResampler);
 465   }
 466   return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
 467 }
 468
 469 size_t AudioConverter::FramesOutToSamples(size_t aFrames) const {
 470   return aFrames * mOut.Channels();
 471 }
 472
 473 size_t AudioConverter::SamplesInToFrames(size_t aSamples) const {
 474   return aSamples / mIn.Channels();
 475 }
 476
 477 size_t AudioConverter::FramesOutToBytes(size_t aFrames) const {
 478   return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format());
 479 }
 480 }  // namespace mozilla