dom/media/gtest/TestAudioDecoderInputTrack.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   5  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include <utility>
   8
   9 #include "AudioDecoderInputTrack.h"
  10 #include "gmock/gmock.h"
  11 #include "GraphDriver.h"
  12 #include "gtest/gtest.h"
  13 #include "MediaInfo.h"
  14 #include "MediaTrackGraphImpl.h"
  15 #include "nsThreadUtils.h"
  16 #include "VideoUtils.h"
  17 #include "WaitFor.h"
  18
  19 using namespace mozilla;
  20 using namespace mozilla::media;
  21 using testing::AssertionResult;
  22 using testing::NiceMock;
  23 using testing::Return;
  24
  25 constexpr uint32_t kNoFlags = 0;
  26 constexpr TrackRate kRate = 44100;
  27 constexpr uint32_t kChannels = 2;
  28
  29 class MockTestGraph : public MediaTrackGraphImpl {
  30  public:
  31   MockTestGraph(TrackRate aRate, uint32_t aChannels)
  32       : MediaTrackGraphImpl(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aRate,
  33                             aChannels, nullptr, NS_GetCurrentThread()) {
  34     ON_CALL(*this, OnGraphThread).WillByDefault(Return(true));
  35     // We have to call `Destroy()` manually in order to break the reference.
  36     // The reason we don't assign a null driver is because we would add a track
  37     // to the graph, then it would trigger graph's `EnsureNextIteration()` that
  38     // requires a non-null driver.
  39     SetCurrentDriver(new NiceMock<MockDriver>());
  40   }
  41
  42   MOCK_CONST_METHOD0(OnGraphThread, bool());
  43   MOCK_METHOD1(AppendMessage, void(UniquePtr<ControlMessage>));
  44
  45  protected:
  46   ~MockTestGraph() = default;
  47
  48   class MockDriver : public GraphDriver {
  49     NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MockDriver, override);
  50
  51     MockDriver() : GraphDriver(nullptr, nullptr, 0) {
  52       ON_CALL(*this, OnThread).WillByDefault(Return(true));
  53       ON_CALL(*this, ThreadRunning).WillByDefault(Return(true));
  54     }
  55
  56     MOCK_METHOD0(Start, void());
  57     MOCK_METHOD0(Shutdown, void());
  58     MOCK_METHOD0(IterationDuration, uint32_t());
  59     MOCK_METHOD0(EnsureNextIteration, void());
  60     MOCK_CONST_METHOD0(OnThread, bool());
  61     MOCK_CONST_METHOD0(ThreadRunning, bool());
  62
  63    protected:
  64     ~MockDriver() = default;
  65   };
  66
  67   bool mEnableFakeAppend = false;
  68 };
  69
  70 AudioData* CreateAudioDataFromInfo(uint32_t aFrames, const AudioInfo& aInfo) {
  71   AlignedAudioBuffer samples(aFrames * aInfo.mChannels);
  72   return new AudioData(0, TimeUnit::Zero(), std::move(samples), aInfo.mChannels,
  73                        aInfo.mRate);
  74 }
  75
  76 AudioDecoderInputTrack* CreateTrack(MediaTrackGraph* aGraph,
  77                                     nsISerialEventTarget* aThread,
  78                                     const AudioInfo& aInfo,
  79                                     float aPlaybackRate = 1.0,
  80                                     float aVolume = 1.0,
  81                                     bool aPreservesPitch = true) {
  82   return AudioDecoderInputTrack::Create(aGraph, aThread, aInfo, aPlaybackRate,
  83                                         aVolume, aPreservesPitch);
  84 }
  85
  86 class TestAudioDecoderInputTrack : public testing::Test {
  87  protected:
  88   void SetUp() override {
  89     mGraph = MakeRefPtr<NiceMock<MockTestGraph>>(kRate, kChannels);
  90
  91     mInfo.mRate = kRate;
  92     mInfo.mChannels = kChannels;
  93     mTrack = CreateTrack(mGraph, NS_GetCurrentThread(), mInfo);
  94     EXPECT_FALSE(mTrack->Ended());
  95   }
  96
  97   void TearDown() override {
  98     // This simulates the normal usage where the `Close()` is always be called
  99     // before the `Destroy()`.
 100     mTrack->Close();
 101     mTrack->Destroy();
 102     // Remove the reference of the track from the mock graph, and then release
 103     // the self-reference of mock graph.
 104     mGraph->RemoveTrackGraphThread(mTrack);
 105     mGraph->Destroy();
 106   }
 107
 108   AudioData* CreateAudioData(uint32_t aFrames) {
 109     return CreateAudioDataFromInfo(aFrames, mInfo);
 110   }
 111
 112   AudioSegment* GetTrackSegment() { return mTrack->GetData<AudioSegment>(); }
 113
 114   AssertionResult ExpectSegmentNonSilence(const char* aStartExpr,
 115                                           const char* aEndExpr,
 116                                           TrackTime aStart, TrackTime aEnd) {
 117     AudioSegment checkedRange;
 118     checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd);
 119     if (!checkedRange.IsNull()) {
 120       return testing::AssertionSuccess();
 121     }
 122     return testing::AssertionFailure()
 123            << "segment [" << aStart << ":" << aEnd << "] should be non-silence";
 124   }
 125
 126   AssertionResult ExpectSegmentSilence(const char* aStartExpr,
 127                                        const char* aEndExpr, TrackTime aStart,
 128                                        TrackTime aEnd) {
 129     AudioSegment checkedRange;
 130     checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd);
 131     if (checkedRange.IsNull()) {
 132       return testing::AssertionSuccess();
 133     }
 134     return testing::AssertionFailure()
 135            << "segment [" << aStart << ":" << aEnd << "] should be silence";
 136   }
 137
 138   RefPtr<MockTestGraph> mGraph;
 139   RefPtr<AudioDecoderInputTrack> mTrack;
 140   AudioInfo mInfo;
 141 };
 142
 143 TEST_F(TestAudioDecoderInputTrack, BasicAppendData) {
 144   // Start from [0:10] and each time we move the time by 10ms.
 145   // Expected: outputDuration=10, outputFrames=0, outputSilence=10
 146   TrackTime start = 0;
 147   TrackTime end = 10;
 148   mTrack->ProcessInput(start, end, kNoFlags);
 149   EXPECT_EQ(mTrack->GetEnd(), end);
 150   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 151
 152   // Expected: outputDuration=20, outputFrames=5, outputSilence=15
 153   RefPtr<AudioData> audio1 = CreateAudioData(5);
 154   mTrack->AppendData(audio1, nullptr);
 155   start = end;
 156   end += 10;
 157   mTrack->ProcessInput(start, end, kNoFlags);
 158   EXPECT_EQ(mTrack->GetEnd(), end);
 159   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + audio1->Frames());
 160   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + audio1->Frames(), end);
 161
 162   // Expected: outputDuration=30, outputFrames=15, outputSilence=15
 163   RefPtr<AudioData> audio2 = CreateAudioData(10);
 164   mTrack->AppendData(audio2, nullptr);
 165   start = end;
 166   end += 10;
 167   mTrack->ProcessInput(start, end, kNoFlags);
 168   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 169   EXPECT_EQ(mTrack->GetEnd(), end);
 170
 171   // Expected : sent all data, track should be ended in the next iteration and
 172   // fill slience in this iteration.
 173   mTrack->NotifyEndOfStream();
 174   start = end;
 175   end += 10;
 176   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 177   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 178   EXPECT_EQ(mTrack->GetEnd(), end);
 179   EXPECT_FALSE(mTrack->Ended());
 180
 181   // Expected : track ended
 182   start = end;
 183   end += 10;
 184   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 185   EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() + audio2->Frames());
 186 }
 187
 188 TEST_F(TestAudioDecoderInputTrack, ClearFuture) {
 189   // Start from [0:10] and each time we move the time by 10ms.
 190   // Expected: appended=30, expected duration=10
 191   RefPtr<AudioData> audio1 = CreateAudioData(30);
 192   mTrack->AppendData(audio1, nullptr);
 193   TrackTime start = 0;
 194   TrackTime end = 10;
 195   mTrack->ProcessInput(start, end, kNoFlags);
 196   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 197
 198   // In next iteration [10:20], we would consume the remaining data that was
 199   // appended in the previous iteration.
 200   start = end;
 201   end += 10;
 202   mTrack->ProcessInput(start, end, kNoFlags);
 203   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 204
 205   // Clear future data which is the remaining 10 frames so the track would
 206   // only output silence.
 207   mTrack->ClearFutureData();
 208   start = end;
 209   end += 10;
 210   mTrack->ProcessInput(start, end, kNoFlags);
 211   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 212
 213   // Test appending data again, to see if we can append data correctly after
 214   // calling `ClearFutureData()`.
 215   RefPtr<AudioData> audio2 = CreateAudioData(10);
 216   mTrack->AppendData(audio2, nullptr);
 217   start = end;
 218   end += 10;
 219   mTrack->ProcessInput(start, end, kNoFlags);
 220   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 221
 222   // Run another iteration that should only contains silence because the data
 223   // we appended only enough for one iteration.
 224   start = end;
 225   end += 10;
 226   mTrack->ProcessInput(start, end, kNoFlags);
 227   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 228
 229   // Clear future data would also remove the EOS.
 230   mTrack->NotifyEndOfStream();
 231   mTrack->ClearFutureData();
 232   start = end;
 233   end += 10;
 234   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 235   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 236   EXPECT_FALSE(mTrack->Ended());
 237
 238   // As EOS has been removed, in next iteration the track would still be
 239   // running.
 240   start = end;
 241   end += 10;
 242   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 243   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 244   EXPECT_FALSE(mTrack->Ended());
 245   EXPECT_EQ(mTrack->WrittenFrames(),
 246             (audio1->Frames() - 10 /* got clear */) + audio2->Frames());
 247 }
 248
 249 TEST_F(TestAudioDecoderInputTrack, InputRateChange) {
 250   // Start from [0:10] and each time we move the time by 10ms.
 251   // Expected: appended=10, expected duration=10
 252   RefPtr<AudioData> audio1 = CreateAudioData(10);
 253   mTrack->AppendData(audio1, nullptr);
 254   TrackTime start = 0;
 255   TrackTime end = 10;
 256   mTrack->ProcessInput(start, end, kNoFlags);
 257   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 258
 259   // Change input sample rate to the half, input data should be resampled and
 260   // its duration would become longer.
 261   // Expected: appended=10 + 5,
 262   //           expected duration=10 + 5*2 (resampled)
 263   mInfo.mRate = kRate / 2;
 264   RefPtr<AudioData> audioHalfSampleRate = CreateAudioData(5);
 265   mTrack->AppendData(audioHalfSampleRate, nullptr);
 266   start = end;
 267   end += 10;
 268   mTrack->ProcessInput(start, end, kNoFlags);
 269   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 270
 271   // Change input sample rate to the double, input data should be resampled and
 272   // its duration would become shorter.
 273   // Expected: appended=10 + 10 + 10,
 274   //           expected duration=10 + 10 + 10/2(resampled) + 5(silence)
 275   mInfo.mRate = kRate * 2;
 276   RefPtr<AudioData> audioDoubleSampleRate = CreateAudioData(10);
 277   TrackTime expectedDuration = audioDoubleSampleRate->Frames() / 2;
 278   mTrack->AppendData(audioDoubleSampleRate, nullptr);
 279   start = end;
 280   end += 10;
 281   mTrack->ProcessInput(start, end, kNoFlags);
 282   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + expectedDuration);
 283   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + expectedDuration, end);
 284   EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() +
 285                                          audioHalfSampleRate->Frames() * 2 +
 286                                          audioDoubleSampleRate->Frames() / 2);
 287 }
 288
 289 TEST_F(TestAudioDecoderInputTrack, ChannelChange) {
 290   // Start from [0:10] and each time we move the time by 10ms.
 291   // Track was initialized in stero.
 292   EXPECT_EQ(mTrack->NumberOfChannels(), uint32_t(2));
 293
 294   // But first audio data is mono, so the `NumberOfChannels()` changes to
 295   // reflect the maximum channel in the audio segment.
 296   mInfo.mChannels = 1;
 297   RefPtr<AudioData> audioMono = CreateAudioData(10);
 298   mTrack->AppendData(audioMono, nullptr);
 299   TrackTime start = 0;
 300   TrackTime end = 10;
 301   mTrack->ProcessInput(start, end, kNoFlags);
 302   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 303   EXPECT_EQ(mTrack->NumberOfChannels(), audioMono->mChannels);
 304
 305   // Then append audio data with 5 channels.
 306   mInfo.mChannels = 5;
 307   RefPtr<AudioData> audioWithFiveChannels = CreateAudioData(10);
 308   mTrack->AppendData(audioWithFiveChannels, nullptr);
 309   start = end;
 310   end += 10;
 311   mTrack->ProcessInput(start, end, kNoFlags);
 312   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 313   EXPECT_EQ(mTrack->NumberOfChannels(), audioWithFiveChannels->mChannels);
 314   EXPECT_EQ(mTrack->WrittenFrames(),
 315             audioMono->Frames() + audioWithFiveChannels->Frames());
 316 }
 317
 318 TEST_F(TestAudioDecoderInputTrack, VolumeChange) {
 319   // In order to run the volume change directly without using a real graph.
 320   // one for setting the track's volume, another for the track destruction.
 321   EXPECT_CALL(*mGraph, AppendMessage)
 322       .Times(2)
 323       .WillOnce([](UniquePtr<ControlMessage> aMessage) { aMessage->Run(); })
 324       .WillOnce([](UniquePtr<ControlMessage> aMessage) {});
 325
 326   // The default volume is 1.0.
 327   float expectedVolume = 1.0;
 328   RefPtr<AudioData> audio = CreateAudioData(20);
 329   TrackTime start = 0;
 330   TrackTime end = 10;
 331   mTrack->AppendData(audio, nullptr);
 332   mTrack->ProcessInput(start, end, kNoFlags);
 333   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 334   EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume);
 335
 336   // After setting volume on the track, the data in the output chunk should be
 337   // changed as well.
 338   expectedVolume = 0.1;
 339   mTrack->SetVolume(expectedVolume);
 340   SpinEventLoopUntil<ProcessFailureBehavior::IgnoreAndContinue>(
 341       "TEST_F(TestAudioDecoderInputTrack, VolumeChange)"_ns,
 342       [&] { return mTrack->Volume() == expectedVolume; });
 343   start = end;
 344   end += 10;
 345   mTrack->ProcessInput(start, end, kNoFlags);
 346   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 347   EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume);
 348 }
 349
 350 TEST_F(TestAudioDecoderInputTrack, BatchedData) {
 351   uint32_t appendedFrames = 0;
 352   RefPtr<AudioData> audio = CreateAudioData(10);
 353   for (size_t idx = 0; idx < 50; idx++) {
 354     mTrack->AppendData(audio, nullptr);
 355     appendedFrames += audio->Frames();
 356   }
 357
 358   // First we need to call `ProcessInput` at least once to drain the track's
 359   // SPSC queue, otherwise we're not able to push the batched data later.
 360   TrackTime start = 0;
 361   TrackTime end = 10;
 362   uint32_t expectedFrames = end - start;
 363   mTrack->ProcessInput(start, end, kNoFlags);
 364   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 365
 366   // The batched data would be pushed to the graph thread in around 10ms after
 367   // the track first time started to batch data, which we can't control here.
 368   // Therefore, we need to wait until the batched data gets cleared.
 369   SpinEventLoopUntil<ProcessFailureBehavior::IgnoreAndContinue>(
 370       "TEST_F(TestAudioDecoderInputTrack, BatchedData)"_ns,
 371       [&] { return !mTrack->HasBatchedData(); });
 372
 373   // Check that we received all the remainging data previously appended.
 374   start = end;
 375   end = start + (appendedFrames - expectedFrames);
 376   mTrack->ProcessInput(start, end, kNoFlags);
 377   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
 378
 379   // Check that we received no more data than previously appended.
 380   start = end;
 381   end += 10;
 382   mTrack->ProcessInput(start, end, kNoFlags);
 383   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
 384   EXPECT_EQ(mTrack->WrittenFrames(), appendedFrames);
 385 }
 386
 387 TEST_F(TestAudioDecoderInputTrack, OutputAndEndEvent) {
 388   // Append an audio and EOS, the output event should notify the amount of
 389   // frames that is equal to the amount of audio we appended.
 390   RefPtr<AudioData> audio = CreateAudioData(10);
 391   MozPromiseHolder<GenericPromise> holder;
 392   RefPtr<GenericPromise> p = holder.Ensure(__func__);
 393   MediaEventListener outputListener =
 394       mTrack->OnOutput().Connect(NS_GetCurrentThread(), [&](TrackTime aFrame) {
 395         EXPECT_EQ(aFrame, audio->Frames());
 396         holder.Resolve(true, __func__);
 397       });
 398   mTrack->AppendData(audio, nullptr);
 399   mTrack->NotifyEndOfStream();
 400   TrackTime start = 0;
 401   TrackTime end = 10;
 402   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 403   Unused << WaitFor(p);
 404
 405   // Track should end in this iteration, so the end event should be notified.
 406   p = holder.Ensure(__func__);
 407   MediaEventListener endListener = mTrack->OnEnd().Connect(
 408       NS_GetCurrentThread(), [&]() { holder.Resolve(true, __func__); });
 409   start = end;
 410   end += 10;
 411   mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
 412   Unused << WaitFor(p);
 413   outputListener.Disconnect();
 414   endListener.Disconnect();
 415 }
 416
 417 TEST_F(TestAudioDecoderInputTrack, PlaybackRateChange) {
 418   // In order to run the playback change directly without using a real graph.
 419   // one for setting the track's playback, another for the track destruction.
 420   EXPECT_CALL(*mGraph, AppendMessage)
 421       .Times(2)
 422       .WillOnce([](UniquePtr<ControlMessage> aMessage) { aMessage->Run(); })
 423       .WillOnce([](UniquePtr<ControlMessage> aMessage) {});
 424
 425   // Changing the playback rate.
 426   float expectedPlaybackRate = 2.0;
 427   mTrack->SetPlaybackRate(expectedPlaybackRate);
 428   SpinEventLoopUntil<ProcessFailureBehavior::IgnoreAndContinue>(
 429       "TEST_F(TestAudioDecoderInputTrack, PlaybackRateChange)"_ns,
 430       [&] { return mTrack->PlaybackRate() == expectedPlaybackRate; });
 431
 432   // Time stretcher in the track would usually need certain amount of data
 433   // before it outputs the time-stretched result. As we're in testing, we would
 434   // only append data once, so signal an EOS after appending data, in order to
 435   // ask the track to flush all samples from the time strecther.
 436   RefPtr<AudioData> audio = CreateAudioData(100);
 437   mTrack->AppendData(audio, nullptr);
 438   mTrack->NotifyEndOfStream();
 439
 440   // Playback rate is 2x, so we should only get 1/2x sample frames, another 1/2
 441   // should be silence.
 442   TrackTime start = 0;
 443   TrackTime end = audio->Frames();
 444   mTrack->ProcessInput(start, end, kNoFlags);
 445   EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, audio->Frames() / 2);
 446   EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + audio->Frames() / 2, end);
 447 }