From af70041101543cfb65a2aed42f7e41e38a5d977b Mon Sep 17 00:00:00 2001 From: Karl Tomlinson Date: Wed, 10 Apr 2024 22:50:38 +0000 Subject: [PATCH] Bug 1887774 pass a track to EnsureAudioProcessing() r=pehrsons This would be used in a subsequent changeset to configure the AudioProcessing. Differential Revision: https://phabricator.services.mozilla.com/D206871 --- dom/media/MediaTrackGraphImpl.h | 2 + dom/media/gtest/TestAudioInputProcessing.cpp | 53 +++++++++++++-------- dom/media/webrtc/MediaEngineWebRTCAudio.cpp | 71 +++++++++++++++------------- dom/media/webrtc/MediaEngineWebRTCAudio.h | 11 +++-- 4 files changed, 78 insertions(+), 59 deletions(-) diff --git a/dom/media/MediaTrackGraphImpl.h b/dom/media/MediaTrackGraphImpl.h index e733b961ff7a..68a73ac0aabf 100644 --- a/dom/media/MediaTrackGraphImpl.h +++ b/dom/media/MediaTrackGraphImpl.h @@ -1115,12 +1115,14 @@ class MediaTrackGraphImpl : public MediaTrackGraph, const float mGlobalVolume; #ifdef DEBUG + protected: /** * Used to assert when AppendMessage() runs control messages synchronously. */ bool mCanRunMessagesSynchronously; #endif + private: /** * The graph's main-thread observable graph time. * Updated by the stable state runnable after each iteration. diff --git a/dom/media/gtest/TestAudioInputProcessing.cpp b/dom/media/gtest/TestAudioInputProcessing.cpp index 93524122ebc7..901718d64c32 100644 --- a/dom/media/gtest/TestAudioInputProcessing.cpp +++ b/dom/media/gtest/TestAudioInputProcessing.cpp @@ -30,11 +30,21 @@ class MockGraph : public MediaTrackGraphImpl { void Init(uint32_t aChannels) { MediaTrackGraphImpl::Init(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aChannels); - // Remove this graph's driver since it holds a ref. If no AppendMessage - // takes place, the driver never starts. This will also make sure no-one - // tries to use it. We are still kept alive by the self-ref. Destroy() must - // be called to break that cycle. - SetCurrentDriver(nullptr); + + MonitorAutoLock lock(mMonitor); + // We don't need a graph driver. Advance to + // LIFECYCLE_WAITING_FOR_TRACK_DESTRUCTION so that the driver never + // starts. Graph control messages run as in shutdown, synchronously. + // This permits the main thread part of track initialization through + // AudioProcessingTrack::Create(). + mLifecycleState = LIFECYCLE_WAITING_FOR_TRACK_DESTRUCTION; +#ifdef DEBUG + mCanRunMessagesSynchronously = true; +#endif + // Remove this graph's driver since it holds a ref. We are still kept + // alive by the self-ref. Destroy() must be called to break that cycle if + // no tracks are created and destroyed. + mDriver = nullptr; } MOCK_CONST_METHOD0(OnGraphThread, bool()); @@ -53,6 +63,7 @@ TEST(TestAudioInputProcessing, Buffering) const uint32_t channels = 1; auto graph = MakeRefPtr>(rate); graph->Init(channels); + RefPtr track = AudioProcessingTrack::Create(graph); auto aip = MakeRefPtr(channels); @@ -90,7 +101,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 0); @@ -109,7 +120,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 0); @@ -127,7 +138,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 0); @@ -148,7 +159,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 32); @@ -162,7 +173,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 32); @@ -181,7 +192,7 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 64); @@ -199,14 +210,14 @@ TEST(TestAudioInputProcessing, Buffering) AudioSegment input; generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), processedTime); EXPECT_EQ(aip->NumBufferedFrames(graph), 0); } aip->Stop(graph); - graph->Destroy(); + track->Destroy(); } TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) @@ -215,6 +226,7 @@ TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) const uint32_t channels = 2; auto graph = MakeRefPtr>(rate); graph->Init(channels); + RefPtr track = AudioProcessingTrack::Create(graph); auto aip = MakeRefPtr(channels); AudioGenerator generator(channels, rate); @@ -284,7 +296,7 @@ TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) AudioSegment output; { AudioSegment data; - aip->Process(graph, 0, 4800, &input, &data); + aip->Process(track, 0, 4800, &input, &data); EXPECT_EQ(input.GetDuration(), 4800); EXPECT_EQ(data.GetDuration(), 4800); @@ -292,7 +304,7 @@ TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) EXPECT_EQ(aip->NumBufferedFrames(graph), 480); AudioSegment dummy; dummy.AppendNullData(480); - aip->Process(graph, 0, 480, &dummy, &data); + aip->Process(track, 0, 480, &dummy, &data); EXPECT_EQ(dummy.GetDuration(), 480); EXPECT_EQ(data.GetDuration(), 480 + 4800); @@ -309,7 +321,7 @@ TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) EXPECT_EQ(aip->IsPassThrough(graph), true); { AudioSegment output; - aip->Process(graph, 0, 4800, &input, &output); + aip->Process(track, 0, 4800, &input, &output); EXPECT_EQ(input.GetDuration(), 4800); EXPECT_EQ(output.GetDuration(), 4800); @@ -317,7 +329,7 @@ TEST(TestAudioInputProcessing, ProcessDataWithDifferentPrincipals) } aip->Stop(graph); - graph->Destroy(); + track->Destroy(); } TEST(TestAudioInputProcessing, Downmixing) @@ -326,6 +338,7 @@ TEST(TestAudioInputProcessing, Downmixing) const uint32_t channels = 4; auto graph = MakeRefPtr>(rate); graph->Init(channels); + RefPtr track = AudioProcessingTrack::Create(graph); auto aip = MakeRefPtr(channels); @@ -359,7 +372,7 @@ TEST(TestAudioInputProcessing, Downmixing) // downmix to mono, scaling the input by 1/4 in the process. // We can't compare the input and output signal because the sine is going to // be mangledui - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime); EXPECT_EQ(output.MaxChannelCount(), 1u); @@ -390,7 +403,7 @@ TEST(TestAudioInputProcessing, Downmixing) nextTime += MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames); generator.Generate(input, nextTime - processedTime); - aip->Process(graph, processedTime, nextTime, &input, &output); + aip->Process(track, processedTime, nextTime, &input, &output); EXPECT_EQ(input.GetDuration(), nextTime - processedTime); EXPECT_EQ(output.GetDuration(), nextTime - processedTime); // This time, no downmix: 4 channels of input, 4 channels of output @@ -406,5 +419,5 @@ TEST(TestAudioInputProcessing, Downmixing) } aip->Stop(graph); - graph->Destroy(); + track->Destroy(); } diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp index df2d86854163..282cf44786ee 100644 --- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp +++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp @@ -584,10 +584,11 @@ void AudioInputProcessing::Stop(MediaTrackGraph* aGraph) { // // The D(N) frames of data are just forwarded from input to output without any // processing -void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, - GraphTime aTo, AudioSegment* aInput, +void AudioInputProcessing::Process(AudioProcessingTrack* aTrack, + GraphTime aFrom, GraphTime aTo, + AudioSegment* aInput, AudioSegment* aOutput) { - aGraph->AssertOnGraphThread(); + aTrack->AssertOnGraphThread(); MOZ_ASSERT(aFrom <= aTo); MOZ_ASSERT(!mEnded); @@ -596,10 +597,11 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, return; } + MediaTrackGraph* graph = aTrack->Graph(); if (!mEnabled) { LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64 " frames of silence to output (disabled)", - aGraph, aGraph->CurrentDriver(), this, need); + graph, graph->CurrentDriver(), this, need); aOutput->AppendNullData(need); return; } @@ -607,11 +609,11 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, MOZ_ASSERT(aInput->GetDuration() == need, "Wrong data length from input port source"); - if (IsPassThrough(aGraph)) { + if (IsPassThrough(graph)) { LOG_FRAME( "(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64 " frames of input data to output directly (PassThrough)", - aGraph, aGraph->CurrentDriver(), this, aInput->GetDuration()); + graph, graph->CurrentDriver(), this, aInput->GetDuration()); aOutput->AppendSegment(aInput); return; } @@ -620,7 +622,7 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, // packetizer. No need to change the pre-buffering since the rate is always // the same. The frames left in the packetizer would be replaced by null // data and then transferred to mSegment. - EnsurePacketizer(aGraph); + EnsurePacketizer(aTrack); // Preconditions of the audio-processing logic. MOZ_ASSERT(static_cast(mSegment.GetDuration()) + @@ -632,10 +634,10 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, MOZ_ASSERT(mSegment.GetDuration() >= 1); MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); - PacketizeAndProcess(aGraph, *aInput); + PacketizeAndProcess(aTrack, *aInput); LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64 " frames of data now, after packetizing and processing", - aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration()); + graph, graph->CurrentDriver(), this, mSegment.GetDuration()); // By setting pre-buffering to the number of frames of one packet, and // because the maximum number of frames stuck in the packetizer before @@ -646,8 +648,7 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, mSegment.RemoveLeading(need); LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64 " frames of data to output, leaving %" PRId64 " frames in buffer", - aGraph, aGraph->CurrentDriver(), this, need, - mSegment.GetDuration()); + graph, graph->CurrentDriver(), this, need, mSegment.GetDuration()); // Postconditions of the audio-processing logic. MOZ_ASSERT(static_cast(mSegment.GetDuration()) + @@ -657,16 +658,16 @@ void AudioInputProcessing::Process(MediaTrackGraph* aGraph, GraphTime aFrom, MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); } -void AudioInputProcessing::ProcessOutputData(MediaTrackGraph* aGraph, +void AudioInputProcessing::ProcessOutputData(AudioProcessingTrack* aTrack, const AudioChunk& aChunk) { MOZ_ASSERT(aChunk.ChannelCount() > 0); - aGraph->AssertOnGraphThread(); + aTrack->AssertOnGraphThread(); - if (!mEnabled || IsPassThrough(aGraph)) { + if (!mEnabled || IsPassThrough(aTrack->Graph())) { return; } - TrackRate sampleRate = aGraph->GraphRate(); + TrackRate sampleRate = aTrack->mSampleRate; uint32_t framesPerPacket = GetPacketSize(sampleRate); // in frames // Downmix from aChannels to MAX_CHANNELS if needed. uint32_t channelCount = @@ -704,7 +705,7 @@ void AudioInputProcessing::ProcessOutputData(MediaTrackGraph* aGraph, if (mOutputBufferFrameCount == framesPerPacket) { // Have a complete packet. Analyze it. - EnsureAudioProcessing(aGraph); + EnsureAudioProcessing(aTrack); for (uint32_t channel = 0; channel < channelCount; channel++) { channelPtrs[channel] = &mOutputBuffer[channel * framesPerPacket]; } @@ -721,14 +722,15 @@ void AudioInputProcessing::ProcessOutputData(MediaTrackGraph* aGraph, } // Only called if we're not in passthrough mode -void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraph* aGraph, +void AudioInputProcessing::PacketizeAndProcess(AudioProcessingTrack* aTrack, const AudioSegment& aSegment) { - MOZ_ASSERT(!IsPassThrough(aGraph), + MediaTrackGraph* graph = aTrack->Graph(); + MOZ_ASSERT(!IsPassThrough(graph), "This should be bypassed when in PassThrough mode."); MOZ_ASSERT(mEnabled); MOZ_ASSERT(mPacketizerInput); MOZ_ASSERT(mPacketizerInput->mPacketSize == - GetPacketSize(aGraph->GraphRate())); + GetPacketSize(aTrack->mSampleRate)); // Calculate number of the pending frames in mChunksInPacketizer. auto pendingFrames = [&]() { @@ -770,7 +772,7 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraph* aGraph, LOG_FRAME( "(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. " "Packetizer has %u frames (enough for %u packets) now", - aGraph, aGraph->CurrentDriver(), this, frameCount, + graph, graph->CurrentDriver(), this, frameCount, mPacketizerInput->FramesAvailable(), mPacketizerInput->PacketsAvailable()); @@ -828,10 +830,10 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraph* aGraph, deinterleavedPacketizedInputDataChannelPointers.Elements()); } - StreamConfig inputConfig(aGraph->GraphRate(), channelCountInput); + StreamConfig inputConfig(aTrack->mSampleRate, channelCountInput); StreamConfig outputConfig = inputConfig; - EnsureAudioProcessing(aGraph); + EnsureAudioProcessing(aTrack); // Bug 1404965: Get the right delay here, it saves some work down the line. mAudioProcessing->set_stream_delay_ms(0); @@ -937,7 +939,7 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraph* aGraph, "(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of " "packetized audio, leaving %u frames in packetizer (%" PRId64 " frames in mChunksInPacketizer)", - aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize, + graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize, mPacketizerInput->FramesAvailable(), pendingFrames()); // Postcondition of the Principal-labelling logic. @@ -993,10 +995,11 @@ TrackTime AudioInputProcessing::NumBufferedFrames( return mSegment.GetDuration(); } -void AudioInputProcessing::EnsurePacketizer(MediaTrackGraph* aGraph) { - aGraph->AssertOnGraphThread(); +void AudioInputProcessing::EnsurePacketizer(AudioProcessingTrack* aTrack) { + aTrack->AssertOnGraphThread(); MOZ_ASSERT(mEnabled); - MOZ_ASSERT(!IsPassThrough(aGraph)); + MediaTrackGraph* graph = aTrack->Graph(); + MOZ_ASSERT(!IsPassThrough(graph)); uint32_t channelCount = GetRequestedInputChannelCount(); MOZ_ASSERT(channelCount > 0); @@ -1008,7 +1011,7 @@ void AudioInputProcessing::EnsurePacketizer(MediaTrackGraph* aGraph) { // need to change pre-buffering since the packet size is the same as the old // one, since the rate is a constant. MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize == - GetPacketSize(aGraph->GraphRate())); + GetPacketSize(aTrack->mSampleRate)); bool needPreBuffering = !mPacketizerInput; if (mPacketizerInput) { const TrackTime numBufferedFrames = @@ -1018,23 +1021,23 @@ void AudioInputProcessing::EnsurePacketizer(MediaTrackGraph* aGraph) { mChunksInPacketizer.clear(); } - mPacketizerInput.emplace(GetPacketSize(aGraph->GraphRate()), channelCount); + mPacketizerInput.emplace(GetPacketSize(aTrack->mSampleRate), channelCount); if (needPreBuffering) { LOG_FRAME( "(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of " "silence as pre-buffering", - aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize); + graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize); AudioSegment buffering; buffering.AppendNullData( static_cast(mPacketizerInput->mPacketSize)); - PacketizeAndProcess(aGraph, buffering); + PacketizeAndProcess(aTrack, buffering); } } -void AudioInputProcessing::EnsureAudioProcessing(MediaTrackGraph* aGraph) { - aGraph->AssertOnGraphThread(); +void AudioInputProcessing::EnsureAudioProcessing(AudioProcessingTrack* aTrack) { + aTrack->AssertOnGraphThread(); if (!mAudioProcessing) { TRACE("AudioProcessing creation"); @@ -1137,7 +1140,7 @@ void AudioProcessingTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, MOZ_ASSERT(mInputs.Length() == 1); AudioSegment data; DeviceInputConsumerTrack::GetInputSourceData(data, aFrom, aTo); - mInputProcessing->Process(Graph(), aFrom, aTo, &data, + mInputProcessing->Process(this, aFrom, aTo, &data, GetData()); } MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo); @@ -1153,7 +1156,7 @@ void AudioProcessingTrack::NotifyOutputData(MediaTrackGraph* aGraph, MOZ_ASSERT(mGraph == aGraph, "Cannot feed audio output to another graph"); AssertOnGraphThread(); if (mInputProcessing) { - mInputProcessing->ProcessOutputData(aGraph, aChunk); + mInputProcessing->ProcessOutputData(this, aChunk); } } diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.h b/dom/media/webrtc/MediaEngineWebRTCAudio.h index 9cc0b6ea4ccd..a63120070af0 100644 --- a/dom/media/webrtc/MediaEngineWebRTCAudio.h +++ b/dom/media/webrtc/MediaEngineWebRTCAudio.h @@ -108,10 +108,11 @@ class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource { class AudioInputProcessing : public AudioDataListener { public: explicit AudioInputProcessing(uint32_t aMaxChannelCount); - void Process(MediaTrackGraph* aGraph, GraphTime aFrom, GraphTime aTo, + void Process(AudioProcessingTrack* aTrack, GraphTime aFrom, GraphTime aTo, AudioSegment* aInput, AudioSegment* aOutput); - void ProcessOutputData(MediaTrackGraph* aGraph, const AudioChunk& aChunk); + void ProcessOutputData(AudioProcessingTrack* aTrack, + const AudioChunk& aChunk); bool IsVoiceInput(MediaTrackGraph* aGraph) const override { // If we're passing data directly without AEC or any other process, this // means that all voice-processing has been disabled intentionaly. In this @@ -130,7 +131,7 @@ class AudioInputProcessing : public AudioDataListener { void Disconnect(MediaTrackGraph* aGraph) override; - void PacketizeAndProcess(MediaTrackGraph* aGraph, + void PacketizeAndProcess(AudioProcessingTrack* aTrack, const AudioSegment& aSegment); uint32_t GetRequestedInputChannelCount(); @@ -163,8 +164,8 @@ class AudioInputProcessing : public AudioDataListener { void PassThroughChanged(MediaTrackGraph* aGraph); void RequestedInputChannelCountChanged(MediaTrackGraph* aGraph, CubebUtils::AudioDeviceID aDeviceId); - void EnsurePacketizer(MediaTrackGraph* aGraph); - void EnsureAudioProcessing(MediaTrackGraph* aGraph); + void EnsurePacketizer(AudioProcessingTrack* aTrack); + void EnsureAudioProcessing(AudioProcessingTrack* aTrack); void ResetAudioProcessing(MediaTrackGraph* aGraph); PrincipalHandle GetCheckedPrincipal(const AudioSegment& aSegment); // This implements the processing algoritm to apply to the input (e.g. a -- 2.11.4.GIT