1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
11 #include "base/basictypes.h"
12 #include "base/compiler_specific.h"
13 #include "base/files/file.h"
14 #include "base/logging.h"
15 #include "base/memory/ref_counted.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/threading/thread_checker.h"
18 #include "content/common/content_export.h"
19 #include "content/renderer/media/webrtc_audio_capturer.h"
20 #include "content/renderer/media/webrtc_audio_device_not_impl.h"
21 #include "ipc/ipc_platform_file.h"
22 #include "media/base/audio_capturer_source.h"
23 #include "media/base/audio_renderer_sink.h"
25 // A WebRtcAudioDeviceImpl instance implements the abstract interface
26 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
27 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
28 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
29 // session id that tells which device to use. The user can then call
30 // WebRtcAudioDeviceImpl::StartPlayout() and
31 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
32 // and start audio rendering and capturing in the browser process. IPC is
33 // utilized to set up the media streams.
37 // using namespace webrtc;
40 // scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
41 // external_adm = new WebRtcAudioDeviceImpl();
42 // external_adm->SetSessionId(session_id);
43 // VoiceEngine* voe = VoiceEngine::Create();
44 // VoEBase* base = VoEBase::GetInterface(voe);
45 // base->Init(external_adm);
46 // int ch = base->CreateChannel();
48 // base->StartReceive(ch)
49 // base->StartPlayout(ch);
50 // base->StartSending(ch);
52 // <== full-duplex audio session with AGC enabled ==>
54 // base->DeleteChannel(ch);
57 // VoiceEngine::Delete(voe);
60 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
62 // RegisterAudioCallback(this)
63 // webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
64 // implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
67 // Creates and initializes the AudioOutputDevice and AudioInputDevice
71 // Enables the adaptive analog mode of the AGC which ensures that a
72 // suitable microphone volume level will be set. This scheme will affect
73 // the actual microphone control slider.
77 // It aims to maintain a constant speech loudness level from the microphone.
78 // This is done by both controlling the analog microphone gain and applying
79 // digital gain. The microphone gain on the sound card is slowly
80 // increased/decreased during speech only. By observing the microphone control
81 // slider you can see it move when you speak. If you scream, the slider moves
82 // downwards and then upwards again when you return to normal. It is not
83 // uncommon that the slider hits the maximum. This means that the maximum
84 // analog gain is not large enough to give the desired loudness. Nevertheless,
85 // we can in general still attain the desired loudness. If the microphone
86 // control slider is moved manually, the gain adaptation restarts and returns
87 // to roughly the same position as before the change if the circumstances are
88 // still the same. When the input microphone signal causes saturation, the
89 // level is decreased dramatically and has to re-adapt towards the old level.
90 // The adaptation is a slowly varying process and at the beginning of capture
91 // this is noticed by a slow increase in volume. Smaller changes in microphone
92 // input level is leveled out by the built-in digital control. For larger
93 // differences we need to rely on the slow adaptation.
94 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
96 // AGC implementation details:
98 // The adaptive analog mode of the AGC is always enabled for desktop platforms
101 // Before recording starts, the ADM enables AGC on the AudioInputDevice.
103 // A capture session with AGC is started up as follows (simplified):
107 // ADM::StartRecording()
108 // AudioInputDevice::InitializeOnIOThread()
109 // AudioInputHostMsg_CreateStream(..., agc=true) [IPC]
111 // [IPC to the browser]
113 // AudioInputRendererHost::OnCreateStream()
114 // AudioInputController::CreateLowLatency()
115 // AudioInputController::DoSetAutomaticGainControl(true)
116 // AudioInputStream::SetAutomaticGainControl(true)
118 // AGC is now enabled in the media layer and streaming starts (details omitted).
119 // The figure below illustrates the AGC scheme which is active in combination
120 // with the default media flow explained earlier.
124 // AudioInputStream::(Capture thread loop)
125 // AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
126 // AudioInputData::OnData(..., volume)
127 // AudioInputController::OnData(..., volume)
128 // AudioInputSyncWriter::Write(..., volume)
130 // [volume | size | data] is sent to the renderer [shared memory]
134 // AudioInputDevice::AudioThreadCallback::Process()
135 // WebRtcAudioDeviceImpl::Capture(..., volume)
136 // AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
138 // The AGC now uses the current volume input and computes a suitable new
139 // level given by the |new_level| output. This value is only non-zero if the
140 // AGC has take a decision that the microphone level should change.
142 // if (new_volume != 0)
143 // AudioInputDevice::SetVolume(new_volume)
144 // AudioInputHostMsg_SetVolume(new_volume) [IPC]
146 // [IPC to the browser]
148 // AudioInputRendererHost::OnSetVolume()
149 // AudioInputController::SetVolume()
150 // AudioInputStream::SetVolume(scaled_volume)
152 // Here we set the new microphone level in the media layer and at the same time
153 // read the new setting (we might not get exactly what is set).
155 // AudioInputData::OnData(..., updated_volume)
156 // AudioInputController::OnData(..., updated_volume)
159 // This process repeats until we stop capturing data. Note that, a common
160 // steady state is that the volume control reaches its max and the new_volume
161 // value from the AGC is zero. A loud voice input is required to break this
162 // state and start lowering the level again.
164 // Implementation notes:
166 // - This class must be created and destroyed on the main render thread and
167 // most methods are called on the same thread. However, some methods are
168 // also called on a Libjingle worker thread. RenderData is called on the
169 // AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
170 // To summarize: this class lives on four different threads.
171 // - The webrtc::AudioDeviceModule is reference counted.
172 // - AGC is only supported in combination with the WASAPI-based audio layer
173 // on Windows, i.e., it is not supported on Windows XP.
174 // - All volume levels required for the AGC scheme are transfered in a
175 // normalized range [0.0, 1.0]. Scaling takes place in both endpoints
176 // (WebRTC client a media layer). This approach ensures that we can avoid
177 // transferring maximum levels between the renderer and the browser.
182 class WebRtcAudioCapturer
;
183 class WebRtcAudioRenderer
;
185 // TODO(xians): Move the following two interfaces to webrtc so that
186 // libjingle can own references to the renderer and capturer.
187 class WebRtcAudioRendererSource
{
189 // Callback to get the rendered data.
190 virtual void RenderData(media::AudioBus
* audio_bus
,
192 int audio_delay_milliseconds
) = 0;
194 // Callback to notify the client that the renderer is going away.
195 virtual void RemoveAudioRenderer(WebRtcAudioRenderer
* renderer
) = 0;
198 virtual ~WebRtcAudioRendererSource() {}
201 class PeerConnectionAudioSink
{
203 // Callback to deliver the captured interleaved data.
204 // |channels| contains a vector of WebRtc VoE channels.
205 // |audio_data| is the pointer to the audio data.
206 // |sample_rate| is the sample frequency of audio data.
207 // |number_of_channels| is the number of channels reflecting the order of
208 // surround sound channels.
209 // |audio_delay_milliseconds| is recording delay value.
210 // |current_volume| is current microphone volume, in range of |0, 255].
211 // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC
213 // The return value is the new microphone volume, in the range of |0, 255].
214 // When the volume does not need to be updated, it returns 0.
215 virtual int OnData(const int16
* audio_data
,
217 int number_of_channels
,
218 int number_of_frames
,
219 const std::vector
<int>& channels
,
220 int audio_delay_milliseconds
,
222 bool need_audio_processing
,
223 bool key_pressed
) = 0;
225 // Set the format for the capture audio parameters.
226 // This is called when the capture format has changed, and it must be called
227 // on the same thread as calling CaptureData().
228 virtual void OnSetFormat(const media::AudioParameters
& params
) = 0;
231 virtual ~PeerConnectionAudioSink() {}
234 // TODO(xians): Merge this interface with WebRtcAudioRendererSource.
235 // The reason why we could not do it today is that WebRtcAudioRendererSource
236 // gets the data by pulling, while the data is pushed into
237 // WebRtcPlayoutDataSource::Sink.
238 class WebRtcPlayoutDataSource
{
242 // Callback to get the playout data.
243 // Called on the render audio thread.
244 virtual void OnPlayoutData(media::AudioBus
* audio_bus
,
246 int audio_delay_milliseconds
) = 0;
248 // Callback to notify the sink that the source has changed.
249 // Called on the main render thread.
250 virtual void OnPlayoutDataSourceChanged() = 0;
256 // Adds/Removes the sink of WebRtcAudioRendererSource to the ADM.
257 // These methods are used by the MediaStreamAudioProcesssor to get the
258 // rendered data for AEC.
259 virtual void AddPlayoutSink(Sink
* sink
) = 0;
260 virtual void RemovePlayoutSink(Sink
* sink
) = 0;
263 virtual ~WebRtcPlayoutDataSource() {}
266 // Note that this class inherits from webrtc::AudioDeviceModule but due to
267 // the high number of non-implemented methods, we move the cruft over to the
268 // WebRtcAudioDeviceNotImpl.
269 class CONTENT_EXPORT WebRtcAudioDeviceImpl
270 : NON_EXPORTED_BASE(public PeerConnectionAudioSink
),
271 NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl
),
272 NON_EXPORTED_BASE(public WebRtcAudioRendererSource
),
273 NON_EXPORTED_BASE(public WebRtcPlayoutDataSource
) {
275 // The maximum volume value WebRtc uses.
276 static const int kMaxVolumeLevel
= 255;
278 // Instances of this object are created on the main render thread.
279 WebRtcAudioDeviceImpl();
281 // webrtc::RefCountedModule implementation.
282 // The creator must call AddRef() after construction and use Release()
283 // to release the reference and delete this object.
284 // Called on the main render thread.
285 virtual int32_t AddRef() OVERRIDE
;
286 virtual int32_t Release() OVERRIDE
;
288 // webrtc::AudioDeviceModule implementation.
289 // All implemented methods are called on the main render thread unless
290 // anything else is stated.
292 virtual int32_t RegisterAudioCallback(webrtc::AudioTransport
* audio_callback
)
295 virtual int32_t Init() OVERRIDE
;
296 virtual int32_t Terminate() OVERRIDE
;
297 virtual bool Initialized() const OVERRIDE
;
299 virtual int32_t PlayoutIsAvailable(bool* available
) OVERRIDE
;
300 virtual bool PlayoutIsInitialized() const OVERRIDE
;
301 virtual int32_t RecordingIsAvailable(bool* available
) OVERRIDE
;
302 virtual bool RecordingIsInitialized() const OVERRIDE
;
304 // All Start/Stop methods are called on a libJingle worker thread.
305 virtual int32_t StartPlayout() OVERRIDE
;
306 virtual int32_t StopPlayout() OVERRIDE
;
307 virtual bool Playing() const OVERRIDE
;
308 virtual int32_t StartRecording() OVERRIDE
;
309 virtual int32_t StopRecording() OVERRIDE
;
310 virtual bool Recording() const OVERRIDE
;
312 // Called on the AudioInputDevice worker thread.
313 virtual int32_t SetMicrophoneVolume(uint32_t volume
) OVERRIDE
;
315 // TODO(henrika): sort out calling thread once we start using this API.
316 virtual int32_t MicrophoneVolume(uint32_t* volume
) const OVERRIDE
;
318 virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume
) const OVERRIDE
;
319 virtual int32_t MinMicrophoneVolume(uint32_t* min_volume
) const OVERRIDE
;
320 virtual int32_t StereoPlayoutIsAvailable(bool* available
) const OVERRIDE
;
321 virtual int32_t StereoRecordingIsAvailable(bool* available
) const OVERRIDE
;
322 virtual int32_t PlayoutDelay(uint16_t* delay_ms
) const OVERRIDE
;
323 virtual int32_t RecordingDelay(uint16_t* delay_ms
) const OVERRIDE
;
324 virtual int32_t RecordingSampleRate(uint32_t* sample_rate
) const OVERRIDE
;
325 virtual int32_t PlayoutSampleRate(uint32_t* sample_rate
) const OVERRIDE
;
327 // Sets the |renderer_|, returns false if |renderer_| already exists.
328 // Called on the main renderer thread.
329 bool SetAudioRenderer(WebRtcAudioRenderer
* renderer
);
331 // Adds/Removes the capturer to the ADM.
332 // TODO(xians): Remove these two methods once the ADM does not need to pass
333 // hardware information up to WebRtc.
334 void AddAudioCapturer(const scoped_refptr
<WebRtcAudioCapturer
>& capturer
);
335 void RemoveAudioCapturer(const scoped_refptr
<WebRtcAudioCapturer
>& capturer
);
337 // Gets the default capturer, which is the last capturer in |capturers_|.
338 // The method can be called by both Libjingle thread and main render thread.
339 scoped_refptr
<WebRtcAudioCapturer
> GetDefaultCapturer() const;
341 // Gets paired device information of the capture device for the audio
342 // renderer. This is used to pass on a session id, sample rate and buffer
343 // size to a webrtc audio renderer (either local or remote), so that audio
344 // will be rendered to a matching output device.
345 // Returns true if the capture device has a paired output device, otherwise
346 // false. Note that if there are more than one open capture device the
347 // function will not be able to pick an appropriate device and return false.
348 bool GetAuthorizedDeviceInfoForAudioRenderer(
349 int* session_id
, int* output_sample_rate
, int* output_buffer_size
);
351 const scoped_refptr
<WebRtcAudioRenderer
>& renderer() const {
355 // Enables the Aec dump. If the default capturer exists, it will call
356 // StartAecDump() on the capturer and pass the ownership of the file to
357 // WebRtc. Otherwise it will hold the file until a capturer is added.
358 void EnableAecDump(base::File aec_dump_file
);
360 // Disables the Aec dump. When this method is called, the ongoing Aec dump
361 // on WebRtc will be stopped.
362 void DisableAecDump();
365 typedef std::list
<scoped_refptr
<WebRtcAudioCapturer
> > CapturerList
;
366 typedef std::list
<WebRtcPlayoutDataSource::Sink
*> PlayoutDataSinkList
;
369 // Make destructor private to ensure that we can only be deleted by Release().
370 virtual ~WebRtcAudioDeviceImpl();
372 // PeerConnectionAudioSink implementation.
374 // Called on the AudioInputDevice worker thread.
375 virtual int OnData(const int16
* audio_data
,
377 int number_of_channels
,
378 int number_of_frames
,
379 const std::vector
<int>& channels
,
380 int audio_delay_milliseconds
,
382 bool need_audio_processing
,
383 bool key_pressed
) OVERRIDE
;
385 // Called on the AudioInputDevice worker thread.
386 virtual void OnSetFormat(const media::AudioParameters
& params
) OVERRIDE
;
388 // WebRtcAudioRendererSource implementation.
390 // Called on the AudioOutputDevice worker thread.
391 virtual void RenderData(media::AudioBus
* audio_bus
,
393 int audio_delay_milliseconds
) OVERRIDE
;
395 // Called on the main render thread.
396 virtual void RemoveAudioRenderer(WebRtcAudioRenderer
* renderer
) OVERRIDE
;
398 // WebRtcPlayoutDataSource implementation.
399 virtual void AddPlayoutSink(WebRtcPlayoutDataSource::Sink
* sink
) OVERRIDE
;
400 virtual void RemovePlayoutSink(WebRtcPlayoutDataSource::Sink
* sink
) OVERRIDE
;
402 // Helper to start the Aec dump if the default capturer exists.
403 void MaybeStartAecDump();
405 // Used to DCHECK that we are called on the correct thread.
406 base::ThreadChecker thread_checker_
;
410 // List of captures which provides access to the native audio input layer
411 // in the browser process.
412 CapturerList capturers_
;
414 // Provides access to the audio renderer in the browser process.
415 scoped_refptr
<WebRtcAudioRenderer
> renderer_
;
417 // A list of raw pointer of WebRtcPlayoutDataSource::Sink objects which want
418 // to get the playout data, the sink need to call RemovePlayoutSink()
419 // before it goes away.
420 PlayoutDataSinkList playout_sinks_
;
422 // Weak reference to the audio callback.
423 // The webrtc client defines |audio_transport_callback_| by calling
424 // RegisterAudioCallback().
425 webrtc::AudioTransport
* audio_transport_callback_
;
427 // Cached value of the current audio delay on the input/capture side.
430 // Cached value of the current audio delay on the output/renderer side.
431 int output_delay_ms_
;
433 // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
434 // |recording_| and |microphone_volume_|.
435 mutable base::Lock lock_
;
437 // Used to protect the racing of calling OnData() since there can be more
438 // than one input stream calling OnData().
439 mutable base::Lock capture_callback_lock_
;
445 // Stores latest microphone volume received in a CaptureData() callback.
446 // Range is [0, 255].
447 uint32_t microphone_volume_
;
449 // Buffer used for temporary storage during render callback.
450 // It is only accessed by the audio render thread.
451 std::vector
<int16
> render_buffer_
;
453 // Used for start the Aec dump on the default capturer.
454 base::File aec_dump_file_
;
456 // Flag to tell if audio processing is enabled in MediaStreamAudioProcessor.
457 const bool is_audio_track_processing_enabled_
;
459 DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl
);
462 } // namespace content
464 #endif // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_