1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/ffmpeg_audio_decoder.h"
8 #include "base/callback_helpers.h"
9 #include "base/location.h"
10 #include "base/message_loop/message_loop_proxy.h"
11 #include "media/base/audio_bus.h"
12 #include "media/base/audio_decoder_config.h"
13 #include "media/base/audio_timestamp_helper.h"
14 #include "media/base/bind_to_loop.h"
15 #include "media/base/data_buffer.h"
16 #include "media/base/decoder_buffer.h"
17 #include "media/base/demuxer.h"
18 #include "media/base/pipeline.h"
19 #include "media/ffmpeg/ffmpeg_common.h"
20 #include "media/filters/ffmpeg_glue.h"
24 // Helper structure for managing multiple decoded audio frames per packet.
25 struct QueuedAudioBuffer
{
26 AudioDecoder::Status status
;
27 scoped_refptr
<DataBuffer
> buffer
;
30 // Returns true if the decode result was end of stream.
31 static inline bool IsEndOfStream(int result
, int decoded_size
,
32 const scoped_refptr
<DecoderBuffer
>& input
) {
33 // Three conditions to meet to declare end of stream for this decoder:
34 // 1. FFmpeg didn't read anything.
35 // 2. FFmpeg didn't output anything.
36 // 3. An end of stream buffer is received.
37 return result
== 0 && decoded_size
== 0 && input
->IsEndOfStream();
40 FFmpegAudioDecoder::FFmpegAudioDecoder(
41 const scoped_refptr
<base::MessageLoopProxy
>& message_loop
)
42 : message_loop_(message_loop
),
44 demuxer_stream_(NULL
),
47 channel_layout_(CHANNEL_LAYOUT_NONE
),
49 samples_per_second_(0),
52 last_input_timestamp_(kNoTimestamp()),
53 output_bytes_to_drop_(0),
57 void FFmpegAudioDecoder::Initialize(
58 DemuxerStream
* stream
,
59 const PipelineStatusCB
& status_cb
,
60 const StatisticsCB
& statistics_cb
) {
61 DCHECK(message_loop_
->BelongsToCurrentThread());
62 PipelineStatusCB initialize_cb
= BindToCurrentLoop(status_cb
);
64 FFmpegGlue::InitializeFFmpeg();
66 if (demuxer_stream_
) {
67 // TODO(scherkus): initialization currently happens more than once in
68 // PipelineIntegrationTest.BasicPlayback.
69 LOG(ERROR
) << "Initialize has already been called.";
73 weak_this_
= weak_factory_
.GetWeakPtr();
74 demuxer_stream_
= stream
;
76 if (!ConfigureDecoder()) {
77 status_cb
.Run(DECODER_ERROR_NOT_SUPPORTED
);
81 statistics_cb_
= statistics_cb
;
82 initialize_cb
.Run(PIPELINE_OK
);
85 void FFmpegAudioDecoder::Read(const ReadCB
& read_cb
) {
86 DCHECK(message_loop_
->BelongsToCurrentThread());
87 DCHECK(!read_cb
.is_null());
88 CHECK(read_cb_
.is_null()) << "Overlapping decodes are not supported.";
90 read_cb_
= BindToCurrentLoop(read_cb
);
92 // If we don't have any queued audio from the last packet we decoded, ask for
93 // more data from the demuxer to satisfy this read.
94 if (queued_audio_
.empty()) {
95 ReadFromDemuxerStream();
99 base::ResetAndReturn(&read_cb_
).Run(
100 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
101 queued_audio_
.pop_front();
104 int FFmpegAudioDecoder::bits_per_channel() {
105 DCHECK(message_loop_
->BelongsToCurrentThread());
106 return bits_per_channel_
;
109 ChannelLayout
FFmpegAudioDecoder::channel_layout() {
110 DCHECK(message_loop_
->BelongsToCurrentThread());
111 return channel_layout_
;
114 int FFmpegAudioDecoder::samples_per_second() {
115 DCHECK(message_loop_
->BelongsToCurrentThread());
116 return samples_per_second_
;
119 void FFmpegAudioDecoder::Reset(const base::Closure
& closure
) {
120 DCHECK(message_loop_
->BelongsToCurrentThread());
121 base::Closure reset_cb
= BindToCurrentLoop(closure
);
123 avcodec_flush_buffers(codec_context_
);
124 ResetTimestampState();
125 queued_audio_
.clear();
129 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
130 // TODO(scherkus): should we require Stop() to be called? this might end up
131 // getting called on a random thread due to refcounting.
132 ReleaseFFmpegResources();
135 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
136 DCHECK(!read_cb_
.is_null());
137 demuxer_stream_
->Read(base::Bind(
138 &FFmpegAudioDecoder::BufferReady
, weak_this_
));
141 void FFmpegAudioDecoder::BufferReady(
142 DemuxerStream::Status status
,
143 const scoped_refptr
<DecoderBuffer
>& input
) {
144 DCHECK(message_loop_
->BelongsToCurrentThread());
145 DCHECK(!read_cb_
.is_null());
146 DCHECK(queued_audio_
.empty());
147 DCHECK_EQ(status
!= DemuxerStream::kOk
, !input
.get()) << status
;
149 if (status
== DemuxerStream::kAborted
) {
150 DCHECK(!input
.get());
151 base::ResetAndReturn(&read_cb_
).Run(kAborted
, NULL
);
155 if (status
== DemuxerStream::kConfigChanged
) {
156 DCHECK(!input
.get());
158 // Send a "end of stream" buffer to the decode loop
159 // to output any remaining data still in the decoder.
160 RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
162 DVLOG(1) << "Config changed.";
164 if (!ConfigureDecoder()) {
165 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
169 ResetTimestampState();
171 if (queued_audio_
.empty()) {
172 ReadFromDemuxerStream();
176 base::ResetAndReturn(&read_cb_
).Run(
177 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
178 queued_audio_
.pop_front();
182 DCHECK_EQ(status
, DemuxerStream::kOk
);
185 // Make sure we are notified if http://crbug.com/49709 returns. Issue also
186 // occurs with some damaged files.
187 if (!input
->IsEndOfStream() && input
->GetTimestamp() == kNoTimestamp() &&
188 output_timestamp_helper_
->base_timestamp() == kNoTimestamp()) {
189 DVLOG(1) << "Received a buffer without timestamps!";
190 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
194 bool is_vorbis
= codec_context_
->codec_id
== AV_CODEC_ID_VORBIS
;
195 if (!input
->IsEndOfStream()) {
196 if (last_input_timestamp_
== kNoTimestamp()) {
197 if (is_vorbis
&& (input
->GetTimestamp() < base::TimeDelta())) {
198 // Dropping frames for negative timestamps as outlined in section A.2
199 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
200 int frames_to_drop
= floor(
201 0.5 + -input
->GetTimestamp().InSecondsF() * samples_per_second_
);
202 output_bytes_to_drop_
= bytes_per_frame_
* frames_to_drop
;
204 last_input_timestamp_
= input
->GetTimestamp();
206 } else if (input
->GetTimestamp() != kNoTimestamp()) {
207 if (input
->GetTimestamp() < last_input_timestamp_
) {
208 base::TimeDelta diff
= input
->GetTimestamp() - last_input_timestamp_
;
209 DVLOG(1) << "Input timestamps are not monotonically increasing! "
210 << " ts " << input
->GetTimestamp().InMicroseconds() << " us"
211 << " diff " << diff
.InMicroseconds() << " us";
212 base::ResetAndReturn(&read_cb_
).Run(kDecodeError
, NULL
);
216 last_input_timestamp_
= input
->GetTimestamp();
220 RunDecodeLoop(input
, false);
222 // We exhausted the provided packet, but it wasn't enough for a frame. Ask
223 // for more data in order to fulfill this read.
224 if (queued_audio_
.empty()) {
225 ReadFromDemuxerStream();
229 // Execute callback to return the first frame we decoded.
230 base::ResetAndReturn(&read_cb_
).Run(
231 queued_audio_
.front().status
, queued_audio_
.front().buffer
);
232 queued_audio_
.pop_front();
235 bool FFmpegAudioDecoder::ConfigureDecoder() {
236 const AudioDecoderConfig
& config
= demuxer_stream_
->audio_decoder_config();
238 if (!config
.IsValidConfig()) {
239 DLOG(ERROR
) << "Invalid audio stream -"
240 << " codec: " << config
.codec()
241 << " channel layout: " << config
.channel_layout()
242 << " bits per channel: " << config
.bits_per_channel()
243 << " samples per second: " << config
.samples_per_second();
247 if (config
.is_encrypted()) {
248 DLOG(ERROR
) << "Encrypted audio stream not supported";
252 if (codec_context_
&&
253 (bits_per_channel_
!= config
.bits_per_channel() ||
254 channel_layout_
!= config
.channel_layout() ||
255 samples_per_second_
!= config
.samples_per_second())) {
256 DVLOG(1) << "Unsupported config change :";
257 DVLOG(1) << "\tbits_per_channel : " << bits_per_channel_
258 << " -> " << config
.bits_per_channel();
259 DVLOG(1) << "\tchannel_layout : " << channel_layout_
260 << " -> " << config
.channel_layout();
261 DVLOG(1) << "\tsample_rate : " << samples_per_second_
262 << " -> " << config
.samples_per_second();
266 // Release existing decoder resources if necessary.
267 ReleaseFFmpegResources();
269 // Initialize AVCodecContext structure.
270 codec_context_
= avcodec_alloc_context3(NULL
);
271 AudioDecoderConfigToAVCodecContext(config
, codec_context_
);
273 // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
274 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
)
275 codec_context_
->request_sample_fmt
= AV_SAMPLE_FMT_S16
;
277 AVCodec
* codec
= avcodec_find_decoder(codec_context_
->codec_id
);
278 if (!codec
|| avcodec_open2(codec_context_
, codec
, NULL
) < 0) {
279 DLOG(ERROR
) << "Could not initialize audio decoder: "
280 << codec_context_
->codec_id
;
284 // Ensure avcodec_open2() respected our format request.
285 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_S16P
) {
286 DLOG(ERROR
) << "Unable to configure a supported sample format: "
287 << codec_context_
->sample_fmt
;
291 // Some codecs will only output float data, so we need to convert to integer
292 // before returning the decoded buffer.
293 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLTP
||
294 codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
) {
295 // Preallocate the AudioBus for float conversions. We can treat interleaved
296 // float data as a single planar channel since our output is expected in an
297 // interleaved format anyways.
298 int channels
= codec_context_
->channels
;
299 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
)
301 converter_bus_
= AudioBus::CreateWrapper(channels
);
305 av_frame_
= avcodec_alloc_frame();
306 bits_per_channel_
= config
.bits_per_channel();
307 channel_layout_
= config
.channel_layout();
308 samples_per_second_
= config
.samples_per_second();
309 output_timestamp_helper_
.reset(new AudioTimestampHelper(
310 config
.bytes_per_frame(), config
.samples_per_second()));
311 bytes_per_frame_
= config
.bytes_per_frame();
313 // Store initial values to guard against midstream configuration changes.
314 channels_
= codec_context_
->channels
;
315 av_sample_format_
= codec_context_
->sample_fmt
;
320 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
321 if (codec_context_
) {
322 av_free(codec_context_
->extradata
);
323 avcodec_close(codec_context_
);
324 av_free(codec_context_
);
333 void FFmpegAudioDecoder::ResetTimestampState() {
334 output_timestamp_helper_
->SetBaseTimestamp(kNoTimestamp());
335 last_input_timestamp_
= kNoTimestamp();
336 output_bytes_to_drop_
= 0;
339 void FFmpegAudioDecoder::RunDecodeLoop(
340 const scoped_refptr
<DecoderBuffer
>& input
,
341 bool skip_eos_append
) {
343 av_init_packet(&packet
);
344 if (input
->IsEndOfStream()) {
348 packet
.data
= const_cast<uint8
*>(input
->GetData());
349 packet
.size
= input
->GetDataSize();
352 // Each audio packet may contain several frames, so we must call the decoder
353 // until we've exhausted the packet. Regardless of the packet size we always
354 // want to hand it to the decoder at least once, otherwise we would end up
355 // skipping end of stream packets since they have a size of zero.
357 // Reset frame to default values.
358 avcodec_get_frame_defaults(av_frame_
);
360 int frame_decoded
= 0;
361 int result
= avcodec_decode_audio4(
362 codec_context_
, av_frame_
, &frame_decoded
, &packet
);
365 DCHECK(!input
->IsEndOfStream())
366 << "End of stream buffer produced an error! "
367 << "This is quite possibly a bug in the audio decoder not handling "
368 << "end of stream AVPackets correctly.";
371 << "Error decoding an audio frame with timestamp: "
372 << input
->GetTimestamp().InMicroseconds() << " us, duration: "
373 << input
->GetDuration().InMicroseconds() << " us, packet size: "
374 << input
->GetDataSize() << " bytes";
376 // TODO(dalecurtis): We should return a kDecodeError here instead:
377 // http://crbug.com/145276
381 // Update packet size and data pointer in case we need to call the decoder
382 // with the remaining bytes from this packet.
383 packet
.size
-= result
;
384 packet
.data
+= result
;
386 if (output_timestamp_helper_
->base_timestamp() == kNoTimestamp() &&
387 !input
->IsEndOfStream()) {
388 DCHECK(input
->GetTimestamp() != kNoTimestamp());
389 if (output_bytes_to_drop_
> 0) {
390 // Currently Vorbis is the only codec that causes us to drop samples.
391 // If we have to drop samples it always means the timeline starts at 0.
392 DCHECK_EQ(codec_context_
->codec_id
, AV_CODEC_ID_VORBIS
);
393 output_timestamp_helper_
->SetBaseTimestamp(base::TimeDelta());
395 output_timestamp_helper_
->SetBaseTimestamp(input
->GetTimestamp());
399 int decoded_audio_size
= 0;
400 #ifdef CHROMIUM_NO_AVFRAME_CHANNELS
401 int channels
= av_get_channel_layout_nb_channels(
402 av_frame_
->channel_layout
);
404 int channels
= av_frame_
->channels
;
407 if (av_frame_
->sample_rate
!= samples_per_second_
||
408 channels
!= channels_
||
409 av_frame_
->format
!= av_sample_format_
) {
410 DLOG(ERROR
) << "Unsupported midstream configuration change!"
411 << " Sample Rate: " << av_frame_
->sample_rate
<< " vs "
412 << samples_per_second_
413 << ", Channels: " << channels
<< " vs "
415 << ", Sample Format: " << av_frame_
->format
<< " vs "
416 << av_sample_format_
;
418 // This is an unrecoverable error, so bail out.
419 QueuedAudioBuffer queue_entry
= { kDecodeError
, NULL
};
420 queued_audio_
.push_back(queue_entry
);
424 decoded_audio_size
= av_samples_get_buffer_size(
425 NULL
, codec_context_
->channels
, av_frame_
->nb_samples
,
426 codec_context_
->sample_fmt
, 1);
427 // If we're decoding into float, adjust audio size.
428 if (converter_bus_
&& bits_per_channel_
/ 8 != sizeof(float)) {
429 DCHECK(codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
||
430 codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLTP
);
431 decoded_audio_size
*=
432 static_cast<float>(bits_per_channel_
/ 8) / sizeof(float);
436 int start_sample
= 0;
437 if (decoded_audio_size
> 0 && output_bytes_to_drop_
> 0) {
438 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
439 << "Decoder didn't output full frames";
441 int dropped_size
= std::min(decoded_audio_size
, output_bytes_to_drop_
);
442 start_sample
= dropped_size
/ bytes_per_frame_
;
443 decoded_audio_size
-= dropped_size
;
444 output_bytes_to_drop_
-= dropped_size
;
447 scoped_refptr
<DataBuffer
> output
;
448 if (decoded_audio_size
> 0) {
449 DCHECK_EQ(decoded_audio_size
% bytes_per_frame_
, 0)
450 << "Decoder didn't output full frames";
452 // Convert float data using an AudioBus.
453 if (converter_bus_
) {
454 // Setup the AudioBus as a wrapper of the AVFrame data and then use
455 // AudioBus::ToInterleaved() to convert the data as necessary.
456 int skip_frames
= start_sample
;
457 int total_frames
= av_frame_
->nb_samples
;
458 int frames_to_interleave
= decoded_audio_size
/ bytes_per_frame_
;
459 if (codec_context_
->sample_fmt
== AV_SAMPLE_FMT_FLT
) {
460 DCHECK_EQ(converter_bus_
->channels(), 1);
461 total_frames
*= codec_context_
->channels
;
462 skip_frames
*= codec_context_
->channels
;
463 frames_to_interleave
*= codec_context_
->channels
;
466 converter_bus_
->set_frames(total_frames
);
467 for (int i
= 0; i
< converter_bus_
->channels(); ++i
) {
468 converter_bus_
->SetChannelData(i
, reinterpret_cast<float*>(
469 av_frame_
->extended_data
[i
]));
472 output
= new DataBuffer(decoded_audio_size
);
473 output
->set_data_size(decoded_audio_size
);
475 DCHECK_EQ(frames_to_interleave
, converter_bus_
->frames() - skip_frames
);
476 converter_bus_
->ToInterleavedPartial(
477 skip_frames
, frames_to_interleave
, bits_per_channel_
/ 8,
478 output
->writable_data());
480 output
= DataBuffer::CopyFrom(
481 av_frame_
->extended_data
[0] + start_sample
* bytes_per_frame_
,
484 output
->set_timestamp(output_timestamp_helper_
->GetTimestamp());
485 output
->set_duration(
486 output_timestamp_helper_
->GetDuration(decoded_audio_size
));
487 output_timestamp_helper_
->AddBytes(decoded_audio_size
);
488 } else if (IsEndOfStream(result
, decoded_audio_size
, input
) &&
490 DCHECK_EQ(packet
.size
, 0);
491 output
= DataBuffer::CreateEOSBuffer();
495 QueuedAudioBuffer queue_entry
= { kOk
, output
};
496 queued_audio_
.push_back(queue_entry
);
499 // Decoding finished successfully, update statistics.
501 PipelineStatistics statistics
;
502 statistics
.audio_bytes_decoded
= result
;
503 statistics_cb_
.Run(statistics
);
505 } while (packet
.size
> 0);