media/filters/ffmpeg_audio_decoder.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/ffmpeg_audio_decoder.h"
   6
   7 #include "base/bind.h"
   8 #include "base/callback_helpers.h"
   9 #include "base/location.h"
  10 #include "base/message_loop/message_loop_proxy.h"
  11 #include "media/base/audio_bus.h"
  12 #include "media/base/audio_decoder_config.h"
  13 #include "media/base/audio_timestamp_helper.h"
  14 #include "media/base/bind_to_loop.h"
  15 #include "media/base/data_buffer.h"
  16 #include "media/base/decoder_buffer.h"
  17 #include "media/base/demuxer.h"
  18 #include "media/base/pipeline.h"
  19 #include "media/ffmpeg/ffmpeg_common.h"
  20 #include "media/filters/ffmpeg_glue.h"
  21
  22 namespace media {
  23
  24 // Helper structure for managing multiple decoded audio frames per packet.
  25 struct QueuedAudioBuffer {
  26   AudioDecoder::Status status;
  27   scoped_refptr<DataBuffer> buffer;
  28 };
  29
  30 // Returns true if the decode result was end of stream.
  31 static inline bool IsEndOfStream(int result, int decoded_size,
  32                                  const scoped_refptr<DecoderBuffer>& input) {
  33   // Three conditions to meet to declare end of stream for this decoder:
  34   // 1. FFmpeg didn't read anything.
  35   // 2. FFmpeg didn't output anything.
  36   // 3. An end of stream buffer is received.
  37   return result == 0 && decoded_size == 0 && input->IsEndOfStream();
  38 }
  39
  40 FFmpegAudioDecoder::FFmpegAudioDecoder(
  41     const scoped_refptr<base::MessageLoopProxy>& message_loop)
  42     : message_loop_(message_loop),
  43       weak_factory_(this),
  44       demuxer_stream_(NULL),
  45       codec_context_(NULL),
  46       bits_per_channel_(0),
  47       channel_layout_(CHANNEL_LAYOUT_NONE),
  48       channels_(0),
  49       samples_per_second_(0),
  50       av_sample_format_(0),
  51       bytes_per_frame_(0),
  52       last_input_timestamp_(kNoTimestamp()),
  53       output_bytes_to_drop_(0),
  54       av_frame_(NULL) {
  55 }
  56
  57 void FFmpegAudioDecoder::Initialize(
  58     DemuxerStream* stream,
  59     const PipelineStatusCB& status_cb,
  60     const StatisticsCB& statistics_cb) {
  61   DCHECK(message_loop_->BelongsToCurrentThread());
  62   PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
  63
  64   FFmpegGlue::InitializeFFmpeg();
  65
  66   if (demuxer_stream_) {
  67     // TODO(scherkus): initialization currently happens more than once in
  68     // PipelineIntegrationTest.BasicPlayback.
  69     LOG(ERROR) << "Initialize has already been called.";
  70     CHECK(false);
  71   }
  72
  73   weak_this_ = weak_factory_.GetWeakPtr();
  74   demuxer_stream_ = stream;
  75
  76   if (!ConfigureDecoder()) {
  77     status_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
  78     return;
  79   }
  80
  81   statistics_cb_ = statistics_cb;
  82   initialize_cb.Run(PIPELINE_OK);
  83 }
  84
  85 void FFmpegAudioDecoder::Read(const ReadCB& read_cb) {
  86   DCHECK(message_loop_->BelongsToCurrentThread());
  87   DCHECK(!read_cb.is_null());
  88   CHECK(read_cb_.is_null()) << "Overlapping decodes are not supported.";
  89
  90   read_cb_ = BindToCurrentLoop(read_cb);
  91
  92   // If we don't have any queued audio from the last packet we decoded, ask for
  93   // more data from the demuxer to satisfy this read.
  94   if (queued_audio_.empty()) {
  95     ReadFromDemuxerStream();
  96     return;
  97   }
  98
  99   base::ResetAndReturn(&read_cb_).Run(
 100       queued_audio_.front().status, queued_audio_.front().buffer);
 101   queued_audio_.pop_front();
 102 }
 103
 104 int FFmpegAudioDecoder::bits_per_channel() {
 105   DCHECK(message_loop_->BelongsToCurrentThread());
 106   return bits_per_channel_;
 107 }
 108
 109 ChannelLayout FFmpegAudioDecoder::channel_layout() {
 110   DCHECK(message_loop_->BelongsToCurrentThread());
 111   return channel_layout_;
 112 }
 113
 114 int FFmpegAudioDecoder::samples_per_second() {
 115   DCHECK(message_loop_->BelongsToCurrentThread());
 116   return samples_per_second_;
 117 }
 118
 119 void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
 120   DCHECK(message_loop_->BelongsToCurrentThread());
 121   base::Closure reset_cb = BindToCurrentLoop(closure);
 122
 123   avcodec_flush_buffers(codec_context_);
 124   ResetTimestampState();
 125   queued_audio_.clear();
 126   reset_cb.Run();
 127 }
 128
 129 FFmpegAudioDecoder::~FFmpegAudioDecoder() {
 130   // TODO(scherkus): should we require Stop() to be called? this might end up
 131   // getting called on a random thread due to refcounting.
 132   ReleaseFFmpegResources();
 133 }
 134
 135 void FFmpegAudioDecoder::ReadFromDemuxerStream() {
 136   DCHECK(!read_cb_.is_null());
 137   demuxer_stream_->Read(base::Bind(
 138       &FFmpegAudioDecoder::BufferReady, weak_this_));
 139 }
 140
 141 void FFmpegAudioDecoder::BufferReady(
 142     DemuxerStream::Status status,
 143     const scoped_refptr<DecoderBuffer>& input) {
 144   DCHECK(message_loop_->BelongsToCurrentThread());
 145   DCHECK(!read_cb_.is_null());
 146   DCHECK(queued_audio_.empty());
 147   DCHECK_EQ(status != DemuxerStream::kOk, !input.get()) << status;
 148
 149   if (status == DemuxerStream::kAborted) {
 150     DCHECK(!input.get());
 151     base::ResetAndReturn(&read_cb_).Run(kAborted, NULL);
 152     return;
 153   }
 154
 155   if (status == DemuxerStream::kConfigChanged) {
 156     DCHECK(!input.get());
 157
 158     // Send a "end of stream" buffer to the decode loop
 159     // to output any remaining data still in the decoder.
 160     RunDecodeLoop(DecoderBuffer::CreateEOSBuffer(), true);
 161
 162     DVLOG(1) << "Config changed.";
 163
 164     if (!ConfigureDecoder()) {
 165       base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 166       return;
 167     }
 168
 169     ResetTimestampState();
 170
 171     if (queued_audio_.empty()) {
 172       ReadFromDemuxerStream();
 173       return;
 174     }
 175
 176     base::ResetAndReturn(&read_cb_).Run(
 177         queued_audio_.front().status, queued_audio_.front().buffer);
 178     queued_audio_.pop_front();
 179     return;
 180   }
 181
 182   DCHECK_EQ(status, DemuxerStream::kOk);
 183   DCHECK(input.get());
 184
 185   // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
 186   // occurs with some damaged files.
 187   if (!input->IsEndOfStream() && input->GetTimestamp() == kNoTimestamp() &&
 188       output_timestamp_helper_->base_timestamp() == kNoTimestamp()) {
 189     DVLOG(1) << "Received a buffer without timestamps!";
 190     base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 191     return;
 192   }
 193
 194   bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
 195   if (!input->IsEndOfStream()) {
 196     if (last_input_timestamp_ == kNoTimestamp()) {
 197       if (is_vorbis && (input->GetTimestamp() < base::TimeDelta())) {
 198         // Dropping frames for negative timestamps as outlined in section A.2
 199         // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
 200         int frames_to_drop = floor(
 201             0.5 + -input->GetTimestamp().InSecondsF() * samples_per_second_);
 202         output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
 203       } else {
 204         last_input_timestamp_ = input->GetTimestamp();
 205       }
 206     } else if (input->GetTimestamp() != kNoTimestamp()) {
 207       if (input->GetTimestamp() < last_input_timestamp_) {
 208         base::TimeDelta diff = input->GetTimestamp() - last_input_timestamp_;
 209         DVLOG(1) << "Input timestamps are not monotonically increasing! "
 210                  << " ts " << input->GetTimestamp().InMicroseconds() << " us"
 211                  << " diff " << diff.InMicroseconds() << " us";
 212         base::ResetAndReturn(&read_cb_).Run(kDecodeError, NULL);
 213         return;
 214       }
 215
 216       last_input_timestamp_ = input->GetTimestamp();
 217     }
 218   }
 219
 220   RunDecodeLoop(input, false);
 221
 222   // We exhausted the provided packet, but it wasn't enough for a frame.  Ask
 223   // for more data in order to fulfill this read.
 224   if (queued_audio_.empty()) {
 225     ReadFromDemuxerStream();
 226     return;
 227   }
 228
 229   // Execute callback to return the first frame we decoded.
 230   base::ResetAndReturn(&read_cb_).Run(
 231       queued_audio_.front().status, queued_audio_.front().buffer);
 232   queued_audio_.pop_front();
 233 }
 234
 235 bool FFmpegAudioDecoder::ConfigureDecoder() {
 236   const AudioDecoderConfig& config = demuxer_stream_->audio_decoder_config();
 237
 238   if (!config.IsValidConfig()) {
 239     DLOG(ERROR) << "Invalid audio stream -"
 240                 << " codec: " << config.codec()
 241                 << " channel layout: " << config.channel_layout()
 242                 << " bits per channel: " << config.bits_per_channel()
 243                 << " samples per second: " << config.samples_per_second();
 244     return false;
 245   }
 246
 247   if (config.is_encrypted()) {
 248     DLOG(ERROR) << "Encrypted audio stream not supported";
 249     return false;
 250   }
 251
 252   if (codec_context_ &&
 253       (bits_per_channel_ != config.bits_per_channel() ||
 254        channel_layout_ != config.channel_layout() ||
 255        samples_per_second_ != config.samples_per_second())) {
 256     DVLOG(1) << "Unsupported config change :";
 257     DVLOG(1) << "\tbits_per_channel : " << bits_per_channel_
 258              << " -> " << config.bits_per_channel();
 259     DVLOG(1) << "\tchannel_layout : " << channel_layout_
 260              << " -> " << config.channel_layout();
 261     DVLOG(1) << "\tsample_rate : " << samples_per_second_
 262              << " -> " << config.samples_per_second();
 263     return false;
 264   }
 265
 266   // Release existing decoder resources if necessary.
 267   ReleaseFFmpegResources();
 268
 269   // Initialize AVCodecContext structure.
 270   codec_context_ = avcodec_alloc_context3(NULL);
 271   AudioDecoderConfigToAVCodecContext(config, codec_context_);
 272
 273   // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
 274   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
 275     codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
 276
 277   AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
 278   if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) {
 279     DLOG(ERROR) << "Could not initialize audio decoder: "
 280                 << codec_context_->codec_id;
 281     return false;
 282   }
 283
 284   // Ensure avcodec_open2() respected our format request.
 285   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
 286     DLOG(ERROR) << "Unable to configure a supported sample format: "
 287                 << codec_context_->sample_fmt;
 288     return false;
 289   }
 290
 291   // Some codecs will only output float data, so we need to convert to integer
 292   // before returning the decoded buffer.
 293   if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
 294       codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 295     // Preallocate the AudioBus for float conversions.  We can treat interleaved
 296     // float data as a single planar channel since our output is expected in an
 297     // interleaved format anyways.
 298     int channels = codec_context_->channels;
 299     if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT)
 300       channels = 1;
 301     converter_bus_ = AudioBus::CreateWrapper(channels);
 302   }
 303
 304   // Success!
 305   av_frame_ = avcodec_alloc_frame();
 306   bits_per_channel_ = config.bits_per_channel();
 307   channel_layout_ = config.channel_layout();
 308   samples_per_second_ = config.samples_per_second();
 309   output_timestamp_helper_.reset(new AudioTimestampHelper(
 310       config.bytes_per_frame(), config.samples_per_second()));
 311   bytes_per_frame_ = config.bytes_per_frame();
 312
 313   // Store initial values to guard against midstream configuration changes.
 314   channels_ = codec_context_->channels;
 315   av_sample_format_ = codec_context_->sample_fmt;
 316
 317   return true;
 318 }
 319
 320 void FFmpegAudioDecoder::ReleaseFFmpegResources() {
 321   if (codec_context_) {
 322     av_free(codec_context_->extradata);
 323     avcodec_close(codec_context_);
 324     av_free(codec_context_);
 325   }
 326
 327   if (av_frame_) {
 328     av_free(av_frame_);
 329     av_frame_ = NULL;
 330   }
 331 }
 332
 333 void FFmpegAudioDecoder::ResetTimestampState() {
 334   output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
 335   last_input_timestamp_ = kNoTimestamp();
 336   output_bytes_to_drop_ = 0;
 337 }
 338
 339 void FFmpegAudioDecoder::RunDecodeLoop(
 340     const scoped_refptr<DecoderBuffer>& input,
 341     bool skip_eos_append) {
 342   AVPacket packet;
 343   av_init_packet(&packet);
 344   if (input->IsEndOfStream()) {
 345     packet.data = NULL;
 346     packet.size = 0;
 347   } else {
 348     packet.data = const_cast<uint8*>(input->GetData());
 349     packet.size = input->GetDataSize();
 350   }
 351
 352   // Each audio packet may contain several frames, so we must call the decoder
 353   // until we've exhausted the packet.  Regardless of the packet size we always
 354   // want to hand it to the decoder at least once, otherwise we would end up
 355   // skipping end of stream packets since they have a size of zero.
 356   do {
 357     // Reset frame to default values.
 358     avcodec_get_frame_defaults(av_frame_);
 359
 360     int frame_decoded = 0;
 361     int result = avcodec_decode_audio4(
 362         codec_context_, av_frame_, &frame_decoded, &packet);
 363
 364     if (result < 0) {
 365       DCHECK(!input->IsEndOfStream())
 366           << "End of stream buffer produced an error! "
 367           << "This is quite possibly a bug in the audio decoder not handling "
 368           << "end of stream AVPackets correctly.";
 369
 370       DLOG(ERROR)
 371           << "Error decoding an audio frame with timestamp: "
 372           << input->GetTimestamp().InMicroseconds() << " us, duration: "
 373           << input->GetDuration().InMicroseconds() << " us, packet size: "
 374           << input->GetDataSize() << " bytes";
 375
 376       // TODO(dalecurtis): We should return a kDecodeError here instead:
 377       // http://crbug.com/145276
 378       break;
 379     }
 380
 381     // Update packet size and data pointer in case we need to call the decoder
 382     // with the remaining bytes from this packet.
 383     packet.size -= result;
 384     packet.data += result;
 385
 386     if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
 387         !input->IsEndOfStream()) {
 388       DCHECK(input->GetTimestamp() != kNoTimestamp());
 389       if (output_bytes_to_drop_ > 0) {
 390         // Currently Vorbis is the only codec that causes us to drop samples.
 391         // If we have to drop samples it always means the timeline starts at 0.
 392         DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
 393         output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 394       } else {
 395         output_timestamp_helper_->SetBaseTimestamp(input->GetTimestamp());
 396       }
 397     }
 398
 399     int decoded_audio_size = 0;
 400 #ifdef CHROMIUM_NO_AVFRAME_CHANNELS
 401     int channels = av_get_channel_layout_nb_channels(
 402         av_frame_->channel_layout);
 403 #else
 404     int channels = av_frame_->channels;
 405 #endif
 406     if (frame_decoded) {
 407       if (av_frame_->sample_rate != samples_per_second_ ||
 408           channels != channels_ ||
 409           av_frame_->format != av_sample_format_) {
 410         DLOG(ERROR) << "Unsupported midstream configuration change!"
 411                     << " Sample Rate: " << av_frame_->sample_rate << " vs "
 412                     << samples_per_second_
 413                     << ", Channels: " << channels << " vs "
 414                     << channels_
 415                     << ", Sample Format: " << av_frame_->format << " vs "
 416                     << av_sample_format_;
 417
 418         // This is an unrecoverable error, so bail out.
 419         QueuedAudioBuffer queue_entry = { kDecodeError, NULL };
 420         queued_audio_.push_back(queue_entry);
 421         break;
 422       }
 423
 424       decoded_audio_size = av_samples_get_buffer_size(
 425           NULL, codec_context_->channels, av_frame_->nb_samples,
 426           codec_context_->sample_fmt, 1);
 427       // If we're decoding into float, adjust audio size.
 428       if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) {
 429         DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT ||
 430                codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP);
 431         decoded_audio_size *=
 432             static_cast<float>(bits_per_channel_ / 8) / sizeof(float);
 433       }
 434     }
 435
 436     int start_sample = 0;
 437     if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
 438       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 439           << "Decoder didn't output full frames";
 440
 441       int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
 442       start_sample = dropped_size / bytes_per_frame_;
 443       decoded_audio_size -= dropped_size;
 444       output_bytes_to_drop_ -= dropped_size;
 445     }
 446
 447     scoped_refptr<DataBuffer> output;
 448     if (decoded_audio_size > 0) {
 449       DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
 450           << "Decoder didn't output full frames";
 451
 452       // Convert float data using an AudioBus.
 453       if (converter_bus_) {
 454         // Setup the AudioBus as a wrapper of the AVFrame data and then use
 455         // AudioBus::ToInterleaved() to convert the data as necessary.
 456         int skip_frames = start_sample;
 457         int total_frames = av_frame_->nb_samples;
 458         int frames_to_interleave = decoded_audio_size / bytes_per_frame_;
 459         if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) {
 460           DCHECK_EQ(converter_bus_->channels(), 1);
 461           total_frames *= codec_context_->channels;
 462           skip_frames *= codec_context_->channels;
 463           frames_to_interleave *= codec_context_->channels;
 464         }
 465
 466         converter_bus_->set_frames(total_frames);
 467         for (int i = 0; i < converter_bus_->channels(); ++i) {
 468           converter_bus_->SetChannelData(i, reinterpret_cast<float*>(
 469               av_frame_->extended_data[i]));
 470         }
 471
 472         output = new DataBuffer(decoded_audio_size);
 473         output->set_data_size(decoded_audio_size);
 474
 475         DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames);
 476         converter_bus_->ToInterleavedPartial(
 477             skip_frames, frames_to_interleave, bits_per_channel_ / 8,
 478             output->writable_data());
 479       } else {
 480         output = DataBuffer::CopyFrom(
 481             av_frame_->extended_data[0] + start_sample * bytes_per_frame_,
 482             decoded_audio_size);
 483       }
 484       output->set_timestamp(output_timestamp_helper_->GetTimestamp());
 485       output->set_duration(
 486           output_timestamp_helper_->GetDuration(decoded_audio_size));
 487       output_timestamp_helper_->AddBytes(decoded_audio_size);
 488     } else if (IsEndOfStream(result, decoded_audio_size, input) &&
 489                !skip_eos_append) {
 490       DCHECK_EQ(packet.size, 0);
 491       output = DataBuffer::CreateEOSBuffer();
 492     }
 493
 494     if (output.get()) {
 495       QueuedAudioBuffer queue_entry = { kOk, output };
 496       queued_audio_.push_back(queue_entry);
 497     }
 498
 499     // Decoding finished successfully, update statistics.
 500     if (result > 0) {
 501       PipelineStatistics statistics;
 502       statistics.audio_bytes_decoded = result;
 503       statistics_cb_.Run(statistics);
 504     }
 505   } while (packet.size > 0);
 506 }
 507
 508 }  // namespace media