dom/media/AudioSegment.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "AudioSegment.h"
   7 #include "AudioMixer.h"
   8 #include "AudioChannelFormat.h"
   9 #include "MediaTrackGraph.h"  // for nsAutoRefTraits<SpeexResamplerState>
  10 #include <speex/speex_resampler.h>
  11
  12 namespace mozilla {
  13
  14 const uint8_t
  15     SilentChannel::gZeroChannel[MAX_AUDIO_SAMPLE_SIZE *
  16                                 SilentChannel::AUDIO_PROCESSING_FRAMES] = {0};
  17
  18 template <>
  19 const float* SilentChannel::ZeroChannel<float>() {
  20   return reinterpret_cast<const float*>(SilentChannel::gZeroChannel);
  21 }
  22
  23 template <>
  24 const int16_t* SilentChannel::ZeroChannel<int16_t>() {
  25   return reinterpret_cast<const int16_t*>(SilentChannel::gZeroChannel);
  26 }
  27
  28 void AudioSegment::ApplyVolume(float aVolume) {
  29   for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
  30     ci->mVolume *= aVolume;
  31   }
  32 }
  33
  34 template <typename T>
  35 void AudioSegment::Resample(nsAutoRef<SpeexResamplerState>& aResampler,
  36                             uint32_t* aResamplerChannelCount, uint32_t aInRate,
  37                             uint32_t aOutRate) {
  38   mDuration = 0;
  39
  40   for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
  41     AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
  42     AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
  43     AudioChunk& c = *ci;
  44     // If this chunk is null, don't bother resampling, just alter its duration
  45     if (c.IsNull()) {
  46       c.mDuration = (c.mDuration * aOutRate) / aInRate;
  47       mDuration += c.mDuration;
  48       continue;
  49     }
  50     uint32_t channels = c.mChannelData.Length();
  51     // This might introduce a discontinuity, but a channel count change in the
  52     // middle of a stream is not that common. This also initializes the
  53     // resampler as late as possible.
  54     if (channels != *aResamplerChannelCount) {
  55       SpeexResamplerState* state =
  56           speex_resampler_init(channels, aInRate, aOutRate,
  57                                SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr);
  58       MOZ_ASSERT(state);
  59       aResampler.own(state);
  60       *aResamplerChannelCount = channels;
  61     }
  62     output.SetLength(channels);
  63     bufferPtrs.SetLength(channels);
  64     uint32_t inFrames = c.mDuration;
  65     // Round up to allocate; the last frame may not be used.
  66     NS_ASSERTION((UINT64_MAX - aInRate + 1) / c.mDuration >= aOutRate,
  67                  "Dropping samples");
  68     uint32_t outSize =
  69         (static_cast<uint64_t>(c.mDuration) * aOutRate + aInRate - 1) / aInRate;
  70     for (uint32_t i = 0; i < channels; i++) {
  71       T* out = output[i].AppendElements(outSize);
  72       uint32_t outFrames = outSize;
  73
  74       const T* in = static_cast<const T*>(c.mChannelData[i]);
  75       dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in,
  76                                                 &inFrames, out, &outFrames);
  77       MOZ_ASSERT(inFrames == c.mDuration);
  78
  79       bufferPtrs[i] = out;
  80       output[i].SetLength(outFrames);
  81     }
  82     MOZ_ASSERT(channels > 0);
  83     c.mDuration = output[0].Length();
  84     c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output));
  85     for (uint32_t i = 0; i < channels; i++) {
  86       c.mChannelData[i] = bufferPtrs[i];
  87     }
  88     mDuration += c.mDuration;
  89   }
  90 }
  91
  92 void AudioSegment::ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
  93                                   uint32_t* aResamplerChannelCount,
  94                                   uint32_t aInRate, uint32_t aOutRate) {
  95   if (mChunks.IsEmpty()) {
  96     return;
  97   }
  98
  99   AudioSampleFormat format = AUDIO_FORMAT_SILENCE;
 100   for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 101     if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) {
 102       format = ci->mBufferFormat;
 103     }
 104   }
 105
 106   switch (format) {
 107     // If the format is silence at this point, all the chunks are silent. The
 108     // actual function we use does not matter, it's just a matter of changing
 109     // the chunks duration.
 110     case AUDIO_FORMAT_SILENCE:
 111     case AUDIO_FORMAT_FLOAT32:
 112       Resample<float>(aResampler, aResamplerChannelCount, aInRate, aOutRate);
 113       break;
 114     case AUDIO_FORMAT_S16:
 115       Resample<int16_t>(aResampler, aResamplerChannelCount, aInRate, aOutRate);
 116       break;
 117     default:
 118       MOZ_ASSERT(false);
 119       break;
 120   }
 121 }
 122
 123 size_t AudioSegment::WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
 124                                               uint32_t aChannels) const {
 125   size_t offset = 0;
 126   if (GetDuration() <= 0) {
 127     MOZ_ASSERT(GetDuration() == 0);
 128     return offset;
 129   }
 130
 131   // Calculate how many samples in this segment
 132   size_t frames = static_cast<size_t>(GetDuration());
 133   CheckedInt<size_t> samples(frames);
 134   samples *= static_cast<size_t>(aChannels);
 135   MOZ_ASSERT(samples.isValid());
 136   if (!samples.isValid()) {
 137     return offset;
 138   }
 139
 140   // Enlarge buffer space if needed
 141   if (samples.value() > aBuffer.Capacity()) {
 142     aBuffer.SetCapacity(samples.value());
 143   }
 144   aBuffer.SetLengthAndRetainStorage(samples.value());
 145   aBuffer.ClearAndRetainStorage();
 146
 147   // Convert the de-interleaved chunks into an interleaved buffer. Note that
 148   // we may upmix or downmix the audio data if the channel in the chunks
 149   // mismatch with aChannels
 150   for (ConstChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
 151     const AudioChunk& c = *ci;
 152     size_t samplesInChunk = static_cast<size_t>(c.mDuration) * aChannels;
 153     switch (c.mBufferFormat) {
 154       case AUDIO_FORMAT_S16:
 155         WriteChunk<int16_t>(c, aChannels, c.mVolume,
 156                             aBuffer.Elements() + offset);
 157         break;
 158       case AUDIO_FORMAT_FLOAT32:
 159         WriteChunk<float>(c, aChannels, c.mVolume, aBuffer.Elements() + offset);
 160         break;
 161       case AUDIO_FORMAT_SILENCE:
 162         PodZero(aBuffer.Elements() + offset, samplesInChunk);
 163         break;
 164       default:
 165         MOZ_ASSERT_UNREACHABLE("Unknown format");
 166         PodZero(aBuffer.Elements() + offset, samplesInChunk);
 167         break;
 168     }
 169     offset += samplesInChunk;
 170   }
 171   MOZ_DIAGNOSTIC_ASSERT(samples.value() == offset,
 172                         "Segment's duration is incorrect");
 173   aBuffer.SetLengthAndRetainStorage(offset);
 174   return offset;
 175 }
 176
 177 // This helps to to safely get a pointer to the position we want to start
 178 // writing a planar audio buffer, depending on the channel and the offset in the
 179 // buffer.
 180 static AudioDataValue* PointerForOffsetInChannel(AudioDataValue* aData,
 181                                                  size_t aLengthSamples,
 182                                                  uint32_t aChannelCount,
 183                                                  uint32_t aChannel,
 184                                                  uint32_t aOffsetSamples) {
 185   size_t samplesPerChannel = aLengthSamples / aChannelCount;
 186   size_t beginningOfChannel = samplesPerChannel * aChannel;
 187   MOZ_ASSERT(aChannel * samplesPerChannel + aOffsetSamples < aLengthSamples,
 188              "Offset request out of bounds.");
 189   return aData + beginningOfChannel + aOffsetSamples;
 190 }
 191
 192 template <typename SrcT>
 193 static void DownMixChunk(const AudioChunk& aChunk,
 194                          Span<AudioDataValue* const> aOutputChannels) {
 195   Span<const SrcT* const> channelData = aChunk.ChannelData<SrcT>();
 196   uint32_t frameCount = aChunk.mDuration;
 197   if (channelData.Length() > aOutputChannels.Length()) {
 198     // Down mix.
 199     AudioChannelsDownMix(channelData, aOutputChannels, frameCount);
 200     for (AudioDataValue* outChannel : aOutputChannels) {
 201       ScaleAudioSamples(outChannel, frameCount, aChunk.mVolume);
 202     }
 203   } else {
 204     // The channel count is already what we want.
 205     for (uint32_t channel = 0; channel < aOutputChannels.Length(); channel++) {
 206       ConvertAudioSamplesWithScale(channelData[channel],
 207                                    aOutputChannels[channel], frameCount,
 208                                    aChunk.mVolume);
 209     }
 210   }
 211 }
 212
 213 void AudioChunk::DownMixTo(
 214     Span<AudioDataValue* const> aOutputChannelPtrs) const {
 215   switch (mBufferFormat) {
 216     case AUDIO_FORMAT_FLOAT32:
 217       DownMixChunk<float>(*this, aOutputChannelPtrs);
 218       return;
 219     case AUDIO_FORMAT_S16:
 220       DownMixChunk<int16_t>(*this, aOutputChannelPtrs);
 221       return;
 222     case AUDIO_FORMAT_SILENCE:
 223       for (AudioDataValue* outChannel : aOutputChannelPtrs) {
 224         std::fill_n(outChannel, mDuration, static_cast<AudioDataValue>(0));
 225       }
 226       return;
 227       // Avoid `default:` so that `-Wswitch` catches missing enumerators at
 228       // compile time.
 229   }
 230   MOZ_ASSERT_UNREACHABLE("buffer format");
 231 }
 232
 233 void AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
 234                        uint32_t aSampleRate) {
 235   AutoTArray<AudioDataValue,
 236              SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
 237       buf;
 238   AudioChunk upMixChunk;
 239   uint32_t offsetSamples = 0;
 240   uint32_t duration = GetDuration();
 241
 242   if (duration <= 0) {
 243     MOZ_ASSERT(duration == 0);
 244     return;
 245   }
 246
 247   uint32_t outBufferLength = duration * aOutputChannels;
 248   buf.SetLength(outBufferLength);
 249
 250   AutoTArray<AudioDataValue*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
 251   outChannelPtrs.SetLength(aOutputChannels);
 252
 253   uint32_t frames;
 254   for (ChunkIterator ci(*this); !ci.IsEnded();
 255        ci.Next(), offsetSamples += frames) {
 256     const AudioChunk& c = *ci;
 257     frames = c.mDuration;
 258     for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
 259       outChannelPtrs[channel] =
 260           PointerForOffsetInChannel(buf.Elements(), outBufferLength,
 261                                     aOutputChannels, channel, offsetSamples);
 262     }
 263
 264     // If the chunk is silent, simply write the right number of silence in the
 265     // buffers.
 266     if (c.mBufferFormat == AUDIO_FORMAT_SILENCE) {
 267       for (AudioDataValue* outChannel : outChannelPtrs) {
 268         PodZero(outChannel, frames);
 269       }
 270       continue;
 271     }
 272     // We need to upmix and downmix appropriately, depending on the
 273     // desired input and output channels.
 274     const AudioChunk* downMixInput = &c;
 275     if (c.ChannelCount() < aOutputChannels) {
 276       // Up-mix.
 277       upMixChunk = c;
 278       AudioChannelsUpMix<void>(&upMixChunk.mChannelData, aOutputChannels,
 279                                SilentChannel::gZeroChannel);
 280       downMixInput = &upMixChunk;
 281     }
 282     downMixInput->DownMixTo(outChannelPtrs);
 283   }
 284
 285   if (offsetSamples) {
 286     MOZ_ASSERT(offsetSamples == outBufferLength / aOutputChannels,
 287                "We forgot to write some samples?");
 288     aMixer.Mix(buf.Elements(), aOutputChannels, offsetSamples, aSampleRate);
 289   }
 290 }
 291
 292 }  // namespace mozilla