libmedia/ffmpeg/MediaParserFfmpeg.cpp

   1 // MediaParserFfmpeg.cpp: FFMPEG media parsers, for Gnash
   2 //
   3 //   Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 //
   5 // This program is free software; you can redistribute it and/or modify
   6 // it under the terms of the GNU General Public License as published by
   7 // the Free Software Foundation; either version 3 of the License, or
   8 // (at your option) any later version.
   9 //
  10 // This program is distributed in the hope that it will be useful,
  11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 // GNU General Public License for more details.
  14 //
  15 // You should have received a copy of the GNU General Public License
  16 // along with this program; if not, write to the Free Software
  17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  18 //
  19
  20 #include "ffmpegHeaders.h"
  21 #include "MediaParserFfmpeg.h"
  22 #include "GnashException.h"
  23 #include "log.h"
  24 #include "IOChannel.h"
  25
  26 //#define GNASH_ALLOW_VCODEC_ENV 1
  27 // Set this to enable a special GNASH_DEFAULT_VCODEC environment variable, which
  28 // is used as a default when the video codec can't be detected. This is a quick
  29 // hack to make MJPEG HTTP videos work (which can't be detected as their MIME
  30 // type is just "mixed/multipart"). Perhaps the codec will be configurable via
  31 // ActionScript sometime. - Udo
  32
  33 namespace gnash {
  34 namespace media {
  35 namespace ffmpeg {
  36
  37 namespace {
  38
  39         // Used to calculate a decimal value from a ffmpeg fraction
  40         inline double as_double(AVRational time)
  41         {
  42                 return time.num / static_cast<double>(time.den);
  43         }
  44
  45 } // anonymous namespace
  46
  47
  48 int
  49 MediaParserFfmpeg::readPacketWrapper(void* opaque, boost::uint8_t* buf, int buf_size)
  50 {
  51         MediaParserFfmpeg* p = static_cast<MediaParserFfmpeg*>(opaque);
  52         return p->readPacket(buf, buf_size);
  53 }
  54
  55 boost::int64_t
  56 MediaParserFfmpeg::seekMediaWrapper(void *opaque, boost::int64_t offset, int whence)
  57 {
  58         MediaParserFfmpeg* p = static_cast<MediaParserFfmpeg*>(opaque);
  59         return p->seekMedia(offset, whence);
  60 }
  61
  62 AVInputFormat*
  63 MediaParserFfmpeg::probeStream()
  64 {
  65     const size_t probeSize = 2048;
  66     const size_t bufSize = probeSize + FF_INPUT_BUFFER_PADDING_SIZE;
  67
  68         boost::scoped_array<boost::uint8_t> buffer(new boost::uint8_t[bufSize]);
  69
  70         assert(_stream->tell() == static_cast<std::streampos>(0));
  71         size_t actuallyRead = _stream->read(buffer.get(), probeSize);
  72
  73     // Fill any padding with 0s.
  74     std::fill(buffer.get() + actuallyRead, buffer.get() + bufSize, 0);
  75
  76         _stream->seek(0);
  77
  78         if (actuallyRead < 1)
  79         {
  80                 throw IOException(_("MediaParserFfmpeg could not read probe data "
  81                     "from input"));
  82         }
  83
  84         // Probe the file to detect the format
  85         AVProbeData probe_data;
  86         probe_data.filename = "";
  87         probe_data.buf = buffer.get();
  88     probe_data.buf_size = actuallyRead;
  89
  90     AVInputFormat* ret = av_probe_input_format(&probe_data, 1);
  91         return ret;
  92 }
  93
  94 bool
  95 MediaParserFfmpeg::seek(boost::uint32_t& pos)
  96 {
  97         LOG_ONCE(log_unimpl("MediaParserFfmpeg::seek()"));
  98         return false;
  99
 100         log_debug("MediaParserFfmpeg::seek(%d) TESTING", pos);
 101
 102         AVStream* videostream = _formatCtx->streams[_videoStreamIndex];
 103         double timebase = static_cast<double>(videostream->time_base.num / videostream->time_base.den);
 104         long newpos = static_cast<long>(pos / timebase);
 105
 106         if (av_seek_frame(_formatCtx, _videoStreamIndex, newpos, 0) < 0)
 107         {
 108                 log_error(_("%s: seeking failed"), __FUNCTION__);
 109                 return 0;
 110         }
 111
 112         AVPacket Packet;
 113         av_init_packet(&Packet);
 114         double newtime = 0;
 115         while (newtime == 0)
 116         {
 117                 if (av_read_frame(_formatCtx, &Packet) < 0)
 118                 {
 119                         log_error("Error in av_read_frame (while seeking)");
 120                         av_seek_frame(_formatCtx, -1, 0, AVSEEK_FLAG_BACKWARD);
 121                         //av_free_packet( &Packet );
 122                         return 0; // ??
 123                 }
 124
 125                 newtime = timebase * static_cast<double>(_formatCtx->streams[_videoStreamIndex]->cur_dts);
 126         }
 127
 128         //av_free_packet( &Packet );
 129         av_seek_frame(_formatCtx, _videoStreamIndex, newpos, 0);
 130
 131         newtime = static_cast<boost::int32_t>(newtime / 1000.0);
 132         log_debug("Seek requested to time %d triggered seek to key frame at "
 133             "time %d", pos, newtime);
 134         pos = newtime;
 135
 136     return true;
 137 }
 138
 139 bool
 140 MediaParserFfmpeg::parseVideoFrame(AVPacket& packet)
 141 {
 142         assert(packet.stream_index == _videoStreamIndex);
 143         assert(_videoStream);
 144
 145         // packet.dts is "decompression" timestamp
 146         // packet.pts is "presentation" timestamp
 147         // Dunno why we use dts, and don't understand the magic formula either...
 148         //
 149         // From ffmpeg dox:
 150         //    pkt->pts can be AV_NOPTS_VALUE if the video format has B frames,
 151         //    so it is better to rely on pkt->dts if you do not decompress the payload.
 152         //
 153         boost::uint64_t timestamp = static_cast<boost::uint64_t>(packet.dts * as_double(_videoStream->time_base) * 1000.0);
 154
 155 #if 0
 156         LOG_ONCE( log_unimpl("%s", __PRETTY_FUNCTION__) );
 157         return false;
 158 #else
 159
 160         // flags, for keyframe
 161         //bool isKeyFrame = packet.flags&PKT_FLAG_KEY;
 162
 163         // TODO: FIXME: *2 is an hack to avoid libavcodec reading past end of allocated space
 164         //       we might do proper padding or (better) avoid the copy as a whole by making
 165         //       EncodedVideoFrame virtual.
 166         size_t allocSize = packet.size*2;
 167         boost::uint8_t* data = new boost::uint8_t[allocSize];
 168         std::copy(packet.data, packet.data+packet.size, data);
 169         std::auto_ptr<EncodedVideoFrame> frame(new EncodedVideoFrame(data, packet.size, 0, timestamp));
 170
 171         pushEncodedVideoFrame(frame);
 172
 173         return true;
 174 #endif
 175 }
 176
 177 bool
 178 MediaParserFfmpeg::parseAudioFrame(AVPacket& packet)
 179 {
 180         assert(packet.stream_index == _audioStreamIndex);
 181         assert(_audioStream);
 182
 183         // packet.dts is "decompression" timestamp
 184         // packet.pts is "presentation" timestamp
 185         // Dunno why we use dts, and don't understand the magic formula either...
 186         //
 187         // From ffmpeg dox:
 188         //    pkt->pts can be AV_NOPTS_VALUE if the video format has B frames,
 189         //    so it is better to rely on pkt->dts if you do not decompress the payload.
 190         //
 191
 192         boost::uint64_t dts = packet.dts;
 193     if ( dts == static_cast<boost::uint64_t>(AV_NOPTS_VALUE) ) {
 194         // We'll take 'nopts' value as zero.
 195         // Would likely be better to make it use timestamp
 196         // of previous frame, if any.
 197         //
 198         // For now, this handling fixes warnings like:
 199         //   mdb:93, lastbuf:0 skiping granule 0
 200         //   mdb:93, lastbuf:0 skiping granule 0
 201         // When playing: http://downloads.bbc.co.uk/news/nol/shared/spl/hi/audio_slideshow/kenadamptw/slideshow_629.swf
 202         //
 203         LOG_ONCE(log_error("FIXME: FFmpeg packet decompression "
 204                     "timestamp has no value, taking as zero"));
 205         dts = 0;
 206     }
 207         boost::uint64_t timestamp = static_cast<boost::uint64_t>(dts * as_double(_audioStream->time_base) * 1000.0);
 208     //log_debug("On getting audio frame with timestamp %d, duration is %d", timestamp, _audioStream->duration);
 209
 210         std::auto_ptr<EncodedAudioFrame> frame ( new EncodedAudioFrame );
 211
 212         // TODO: FIXME: *2 is an hack to avoid libavcodec reading past end of allocated space
 213         //       we might do proper padding or (better) avoid the copy as a whole by making
 214         //       EncodedVideoFrame virtual.
 215         size_t allocSize = packet.size*2;
 216         boost::uint8_t* data = new boost::uint8_t[allocSize];
 217         std::copy(packet.data, packet.data+packet.size, data);
 218
 219         frame->data.reset(data);
 220         frame->dataSize = packet.size;
 221         frame->timestamp = timestamp;
 222
 223         pushEncodedAudioFrame(frame);
 224
 225         return true;
 226 }
 227
 228 bool
 229 MediaParserFfmpeg::parseNextFrame()
 230 {
 231         // lock the stream while reading from it, so actionscript
 232         // won't mess with the parser on seek  or on getBytesLoaded
 233         boost::mutex::scoped_lock streamLock(_streamMutex);
 234
 235         if ( _parsingComplete )
 236         {
 237                 //log_debug("MediaParserFfmpeg::parseNextFrame: parsing "
 238         //"complete, nothing to do");
 239                 return false;
 240         }
 241
 242         // position the stream where we left parsing as
 243         // it could be somewhere else for reading a specific
 244         // or seeking.
 245         //_stream->seek(_lastParsedPosition);
 246
 247         assert(_formatCtx);
 248
 249         AVPacket packet;
 250
 251         //log_debug("av_read_frame call");
 252         int rc = av_read_frame(_formatCtx, &packet);
 253
 254         // Update _lastParsedPosition, even in case of error..
 255         boost::uint64_t curPos = _stream->tell();
 256         if ( curPos > _lastParsedPosition )
 257         {
 258                 _lastParsedPosition = curPos;
 259         }
 260
 261         //log_debug("av_read_frame returned %d", rc);
 262         if ( rc < 0 )
 263         {
 264         log_error(_("MediaParserFfmpeg::parseNextFrame: "
 265             "Problems parsing next frame "
 266             "(av_read_frame returned %d)."
 267             " We'll consider the stream fully parsed."), rc);
 268         _parsingComplete=true; // No point in parsing over
 269         return false;
 270         }
 271
 272         bool ret = false;
 273
 274         if ( packet.stream_index == _videoStreamIndex )
 275         {
 276                 ret = parseVideoFrame(packet);
 277         }
 278         else if ( packet.stream_index == _audioStreamIndex )
 279         {
 280                 ret = parseAudioFrame(packet);
 281         }
 282         else
 283         {
 284                 ret = false; // redundant..
 285                 log_debug("MediaParserFfmpeg::parseNextFrame: unknown stream index %d",
 286                 packet.stream_index);
 287         }
 288
 289         av_free_packet(&packet);
 290
 291         // Check if EOF was reached
 292         if ( _stream->eof() )
 293         {
 294                 log_debug("MediaParserFfmpeg::parseNextFrame: at eof after "
 295                 "av_read_frame");
 296                 _parsingComplete=true;
 297         }
 298
 299         return ret;
 300 }
 301
 302 bool
 303 MediaParserFfmpeg::parseNextChunk()
 304 {
 305         if ( ! parseNextFrame() ) return false;
 306         return true;
 307 }
 308
 309 boost::uint64_t
 310 MediaParserFfmpeg::getBytesLoaded() const
 311 {
 312         //log_unimpl("%s", __PRETTY_FUNCTION__);
 313         return _lastParsedPosition;
 314 }
 315
 316 MediaParserFfmpeg::MediaParserFfmpeg(std::auto_ptr<IOChannel> stream)
 317         :
 318         MediaParser(stream),
 319         _nextVideoFrame(0),
 320         _nextAudioFrame(0),
 321         _inputFmt(0),
 322         _formatCtx(0),
 323         _videoStreamIndex(-1),
 324         _videoStream(0),
 325         _audioStreamIndex(-1),
 326         _audioStream(0),
 327         _lastParsedPosition(0)
 328 {
 329         initializeParser();
 330
 331         startParserThread();
 332 }
 333
 334 /*private*/
 335 void
 336 MediaParserFfmpeg::initializeParser()
 337 {
 338     av_register_all(); // TODO: needs to be invoked only once ?
 339
 340     _byteIOCxt.buffer = NULL;
 341
 342     _inputFmt = probeStream();
 343 #ifdef GNASH_ALLOW_VCODEC_ENV
 344     if ( ! _inputFmt ) {
 345         char* defcodec = getenv("GNASH_DEFAULT_VCODEC");
 346         if (defcodec && strlen(defcodec))
 347             _inputFmt = av_find_input_format(defcodec);
 348
 349     }
 350 #endif
 351     if ( ! _inputFmt ) {
 352         throw MediaException("MediaParserFfmpeg couldn't figure out input "
 353                              "format");
 354     }
 355
 356 // av_alloc_format_context was deprecated on
 357 // 2009-02-08 (r17047) in favor of avformat_alloc_context()
 358 #if !defined (LIBAVCODEC_VERSION_MAJOR) || LIBAVCODEC_VERSION_MAJOR < 52
 359     _formatCtx = av_alloc_format_context();
 360 #else
 361     _formatCtx = avformat_alloc_context();
 362 #endif
 363
 364     assert(_formatCtx);
 365
 366     // Setup the filereader/seeker mechanism.
 367     // 7th argument (NULL) is the writer function,
 368     // which isn't needed.
 369     _byteIOBuffer.reset( new unsigned char[byteIOBufferSize] );
 370     init_put_byte(&_byteIOCxt,
 371                   _byteIOBuffer.get(), // buffer
 372                   byteIOBufferSize, // buffer size
 373                   0, // write flags
 374                   this, // opaque pointer to pass to the callbacks
 375                   MediaParserFfmpeg::readPacketWrapper, // packet reader callback
 376                   NULL, // packet writer callback
 377                   MediaParserFfmpeg::seekMediaWrapper // seeker callback
 378                   );
 379
 380     _byteIOCxt.is_streamed = 1;
 381
 382     // Open the stream. the 4th argument is the filename, which we ignore.
 383     if(av_open_input_stream(&_formatCtx, &_byteIOCxt, "", _inputFmt, NULL) < 0)
 384     {
 385         throw IOException("MediaParserFfmpeg couldn't open input stream");
 386     }
 387
 388     log_debug("Parsing FFMPEG media file: format:%s; nstreams:%d",
 389         _inputFmt->name, _formatCtx->nb_streams);
 390
 391     if ( _formatCtx->title[0] )
 392         log_debug(_("  Title:'%s'"), _formatCtx->title);
 393     if ( _formatCtx->author[0] )
 394         log_debug(_("  Author:'%s'"), _formatCtx->author);
 395     if ( _formatCtx->copyright[0] )
 396         log_debug(_("  Copyright:'%s'"), _formatCtx->copyright);
 397     if ( _formatCtx->comment[0] )
 398         log_debug(_("  Comment:'%s'"), _formatCtx->comment);
 399     if ( _formatCtx->album[0] )
 400         log_debug(_("  Album:'%s'"), _formatCtx->album);
 401
 402     // Find first audio and video stream
 403     for (unsigned int i = 0; i < static_cast<unsigned int>(_formatCtx->nb_streams); i++)
 404         {
 405             AVStream* stream = _formatCtx->streams[i];
 406             if ( ! stream ) {
 407                 log_debug("Stream %d of FFMPEG media file is null ?", i);
 408                 continue;
 409             }
 410
 411             AVCodecContext* enc = stream->codec;
 412             if ( ! enc ) {
 413                 log_debug("Stream %d of FFMPEG media file has no codec info", i);
 414                 continue;
 415             }
 416
 417             switch (enc->codec_type) {
 418             case CODEC_TYPE_AUDIO:
 419                 if (_audioStreamIndex < 0) {
 420                     _audioStreamIndex = i;
 421                     _audioStream = _formatCtx->streams[i];
 422                     log_debug(_("  Using stream %d for audio: codec id %d"),
 423                               i, _audioStream->codec->codec_id);
 424                     // codec_name will only be filled by avcodec_find_decoder (later);
 425                 }
 426                 break;
 427
 428             case CODEC_TYPE_VIDEO:
 429                 if (_videoStreamIndex < 0) {
 430                     _videoStreamIndex = i;
 431                     _videoStream = _formatCtx->streams[i];
 432                     log_debug(_("  Using stream %d for video: codec id %d"),
 433                               i, _videoStream->codec->codec_id);
 434                     // codec_name will only be filled by avcodec_find_decoder (later);
 435                 }
 436                 break;
 437             default:
 438                 break;
 439             }
 440         }
 441
 442     // Create VideoInfo
 443     if ( _videoStream) {
 444         const int codec = static_cast<int>(_videoStream->codec->codec_id);
 445         boost::uint16_t width = _videoStream->codec->width;
 446         boost::uint16_t height = _videoStream->codec->height;
 447         boost::uint16_t frameRate = static_cast<boost::uint16_t>(as_double(_videoStream->r_frame_rate));
 448 #if !defined(HAVE_LIBAVFORMAT_AVFORMAT_H) && !defined(HAVE_FFMPEG_AVCODEC_H)
 449         boost::uint64_t duration = _videoStream->codec_info_duration;
 450 #else
 451         boost::uint64_t duration = _videoStream->duration;
 452 #endif
 453         if (duration == AV_NOPTS_VALUE) {
 454             log_error("Duration of video stream unknown");
 455             duration=0; // TODO: guess!
 456         } else {
 457             duration = duration / as_double(_videoStream->time_base); // TODO: check this
 458         }
 459
 460         _videoInfo.reset(new VideoInfo(codec, width, height, frameRate,
 461                     duration, CODEC_TYPE_CUSTOM /*codec type*/));
 462
 463         // NOTE: AVCodecContext.extradata : void* for 51.11.0, uint8_t* for 51.38.0
 464         _videoInfo->extra.reset(new ExtraVideoInfoFfmpeg(
 465                      (uint8_t*)_videoStream->codec->extradata,
 466                      _videoStream->codec->extradata_size));
 467
 468     }
 469
 470     // Create AudioInfo
 471     if ( _audioStream) {
 472         const int codec = static_cast<int>(_audioStream->codec->codec_id);
 473         boost::uint16_t sampleRate = _audioStream->codec->sample_rate;
 474         boost::uint16_t sampleSize = SampleFormatToSampleSize(_audioStream->codec->sample_fmt);
 475         bool stereo = (_audioStream->codec->channels == 2);
 476 #if !defined(HAVE_LIBAVFORMAT_AVFORMAT_H) && !defined(HAVE_FFMPEG_AVCODEC_H)
 477         boost::uint64_t duration = _audioStream->codec_info_duration;
 478 #else
 479         boost::uint64_t duration = _audioStream->duration;
 480 #endif
 481         if (duration == AV_NOPTS_VALUE) {
 482             log_error("Duration of audio stream unknown to ffmpeg");
 483             duration=0; // TODO: guess!
 484         }
 485         else {
 486             duration = duration / as_double(_audioStream->time_base); // TODO: check this
 487         }
 488
 489         _audioInfo.reset(new AudioInfo(codec, sampleRate, sampleSize, stereo,
 490                     duration, CODEC_TYPE_CUSTOM /*codec type*/));
 491
 492         // NOTE: AVCodecContext.extradata : void* for 51.11.0, uint8_t* for 51.38.0
 493         _audioInfo->extra.reset(new ExtraAudioInfoFfmpeg(
 494                      (uint8_t*)_audioStream->codec->extradata,
 495                      _audioStream->codec->extradata_size));
 496
 497     }
 498
 499
 500 }
 501
 502 MediaParserFfmpeg::~MediaParserFfmpeg()
 503 {
 504         stopParserThread();
 505
 506         if ( _formatCtx )
 507         {
 508                 // TODO: check if this is correct (should we create RIIA classes for ffmpeg stuff?)
 509                 //av_close_input_file(_formatCtx); // NOTE: this one triggers a mismatched free/delete on _byteIOBuffer with libavformat.so.52 !
 510                 av_free(_formatCtx);
 511         }
 512
 513         if ( _inputFmt )
 514         {
 515                 // TODO: check if this is correct (should we create RIIA classes for ffmpeg stuff?)
 516                 //av_free(_inputFmt); // it seems this one blows up, could be due to av_free(_formatCtx) above
 517         }
 518
 519 }
 520
 521 int
 522 MediaParserFfmpeg::readPacket(boost::uint8_t* buf, int buf_size)
 523 {
 524         //GNASH_REPORT_FUNCTION;
 525         //log_debug("readPacket(%d)", buf_size);
 526
 527         size_t ret = _stream->read(static_cast<void*>(buf), buf_size);
 528
 529         return ret;
 530
 531 }
 532
 533 boost::int64_t
 534 MediaParserFfmpeg::seekMedia(boost::int64_t offset, int whence)
 535 {
 536         //GNASH_REPORT_FUNCTION;
 537         //log_debug("::seekMedia(%1%, %2%)", offset, whence);
 538
 539         assert(_stream.get());
 540
 541         if (whence == SEEK_SET)
 542         {
 543                 // Offset is absolute new position in the file
 544                 if ( offset < 0 ) {
 545                         boost::format fmt = boost::format(
 546                                 _("MediaParserFfmpeg couldn't parse input format: "
 547                                 "tried to seek at negative offset %1%."))
 548                                 % offset;
 549                         throw MediaException(fmt.str());
 550                 }
 551                 _stream->seek(offset);
 552         }
 553         else if (whence == SEEK_CUR)
 554         {
 555                 // New position is offset + old position
 556                 _stream->seek(_stream->tell() + static_cast<std::streamoff>(offset));
 557         }
 558         else if (whence == SEEK_END)
 559         {
 560                 // New position is offset + end of file
 561                 log_unimpl("MediaParserFfmpeg seek from end of file");
 562                 // This is (most likely) a streamed file, so we can't seek to the end!
 563                 // Instead we seek to byteIOBufferSize bytes... seems to work fine...
 564                 _stream->seek(byteIOBufferSize);
 565
 566         }
 567         else
 568         {
 569                 // ffmpeg uses whence=AVSEEK_SIZE and offset=0 to request
 570                 // stream size !
 571                 log_unimpl("MediaParserFfmpeg: unsupported whence value %d", whence);
 572         }
 573
 574
 575         return _stream->tell();
 576 }
 577
 578 boost::uint16_t
 579 MediaParserFfmpeg::SampleFormatToSampleSize(SampleFormat fmt)
 580 {
 581         switch (fmt)
 582         {
 583                 case SAMPLE_FMT_U8: // unsigned 8 bits
 584                         return 1;
 585
 586                 case SAMPLE_FMT_S16: // signed 16 bits
 587                 case SAMPLE_FMT_FLT: // float
 588                         return 2;
 589
 590 #if !defined (LIBAVCODEC_VERSION_MAJOR) || LIBAVCODEC_VERSION_MAJOR < 52
 591 // Was dropped for version 52.0.0
 592                 case SAMPLE_FMT_S24: // signed 24 bits
 593                         return 3;
 594 #endif
 595
 596                 case SAMPLE_FMT_S32: // signed 32 bits
 597                         return 4;
 598
 599                 case SAMPLE_FMT_NONE:
 600                 default:
 601                         return 8; // arbitrary value
 602         }
 603 }
 604
 605
 606 } // gnash.media.ffmpeg namespace
 607 } // end of gnash::media namespace
 608 } // end of gnash namespace
 609
 610 #undef PADDING_BYTES
 611 #undef READ_CHUNKS