libmedia/FLVParser.cpp

   1 // FLVParser.cpp:  Flash Video file parser, for Gnash.
   2 //
   3 //   Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
   4 //
   5 // This program is free software; you can redistribute it and/or modify
   6 // it under the terms of the GNU General Public License as published by
   7 // the Free Software Foundation; either version 3 of the License, or
   8 // (at your option) any later version.
   9 //
  10 // This program is distributed in the hope that it will be useful,
  11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 // GNU General Public License for more details.
  14 //
  15 // You should have received a copy of the GNU General Public License
  16 // along with this program; if not, write to the Free Software
  17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  18 //
  19
  20
  21 #include "FLVParser.h"
  22 #include "log.h"
  23 #include "utility.h"
  24 #include "GnashException.h"
  25 #include "IOChannel.h"
  26 #include "SimpleBuffer.h"
  27 #include "GnashAlgorithm.h"
  28
  29 #include <string>
  30 #include <iosfwd>
  31
  32 // Define the following macro the have seek() operations printed
  33 //#define GNASH_DEBUG_SEEK 1
  34
  35 namespace gnash {
  36 namespace media {
  37
  38
  39 const size_t FLVParser::paddingBytes;
  40 const boost::uint16_t FLVParser::FLVAudioTag::flv_audio_rates [] =
  41     { 5500, 11000, 22050, 44100 };
  42
  43 FLVParser::FLVParser(std::auto_ptr<IOChannel> lt)
  44         :
  45         MediaParser(lt),
  46         _lastParsedPosition(0),
  47         _nextPosToIndex(0),
  48         _nextAudioFrame(0),
  49         _nextVideoFrame(0),
  50         _audio(false),
  51         _video(false),
  52         _cuePoints(),
  53         _indexingCompleted(false)
  54 {
  55         if (!parseHeader()) {
  56                 throw MediaException("FLVParser couldn't parse header from input");
  57     }
  58
  59         startParserThread();
  60 }
  61
  62 FLVParser::~FLVParser()
  63 {
  64         stopParserThread();
  65 }
  66
  67
  68 // would be called by main thread
  69 bool
  70 FLVParser::seek(boost::uint32_t& time)
  71 {
  72
  73         boost::mutex::scoped_lock streamLock(_streamMutex);
  74         // we might obtain this lock while the parser is pushing the last
  75         // encoded frame on the queue, or while it is waiting on the wakeup
  76         // condition
  77
  78         // Setting _seekRequest to true will make the parser thread
  79         // take care of cleaning up the buffers before going on with
  80         // parsing, thus fixing the case in which streamLock was obtained
  81         // while the parser was pushing to queue
  82         _seekRequest = true;
  83
  84         if ( _cuePoints.empty() )
  85         {
  86                 log_debug("No known cue points yet, can't seek");
  87                 return false;
  88         }
  89
  90         CuePointsMap::iterator it = _cuePoints.lower_bound(time);
  91         if ( it == _cuePoints.end() )
  92         {
  93                 log_debug("No cue points greater or equal requested time %d", time);
  94                 return false;
  95         }
  96
  97         long lowerBoundPosition = it->second;
  98         log_debug("Seek requested to time %d triggered seek to cue point at "
  99             "position %d and time %d", time, it->second, it->first);
 100         time = it->first;
 101         _lastParsedPosition=lowerBoundPosition;
 102         _parsingComplete=false; // or NetStream will send the Play.Stop event...
 103
 104
 105         // Finally, clear the buffers.
 106         // The call will also wake the parse up if it was sleeping.
 107         // WARNING: a race condition might be pending here:
 108         // If we handled to do all the seek work in the *small*
 109         // time that the parser runs w/out mutex locked (ie:
 110         // after it unlocked the stream mutex and before it locked
 111         // the queue mutex), it will still push an old encoded frame
 112         // to the queue; if the pushed frame alone makes it block
 113         // again (bufferFull) we'll have a problem.
 114         // Note though, that a single frame can't reach a bufferFull
 115         // condition, as it takes at least two for anything != 0.
 116         //
 117         clearBuffers();
 118
 119         return true;
 120 }
 121
 122 // would be called by parser thread
 123 bool
 124 FLVParser::parseNextChunk()
 125 {
 126         bool indexOnly = bufferFull(); // won't lock, but our caller locked...
 127         return parseNextTag(indexOnly);
 128 }
 129
 130 // would be called by parser thread
 131 void
 132 FLVParser::indexAudioTag(const FLVTag& tag, boost::uint32_t thisTagPos)
 133 {
 134         if ( _videoInfo.get()) {
 135                 // if we have video we let that drive cue points
 136                 return;
 137         }
 138
 139         // we can theoretically seek anywhere, but
 140         // let's just keep 5 seconds of distance
 141         CuePointsMap::iterator it = _cuePoints.lower_bound(tag.timestamp);
 142         if ( it == _cuePoints.end() || it->first - tag.timestamp >= 5000)
 143         {
 144                 //log_debug("Added cue point at timestamp %d and position %d "
 145         //"(audio frame)", tag.timestamp, thisTagPos);
 146                 _cuePoints[tag.timestamp] = thisTagPos;
 147         }
 148 }
 149
 150 void
 151 FLVParser::indexVideoTag(const FLVTag& tag, const FLVVideoTag& videotag, boost::uint32_t thisTagPos)
 152 {
 153         if ( videotag.frametype != FLV_VIDEO_KEYFRAME ) {
 154                 return;
 155         }
 156
 157         //log_debug("Added cue point at timestamp %d and position %d "
 158     //"(key video frame)", tag.timestamp, thisTagPos);
 159         _cuePoints[tag.timestamp] = thisTagPos;
 160 }
 161
 162
 163 std::auto_ptr<EncodedAudioFrame>
 164 FLVParser::parseAudioTag(const FLVTag& flvtag, const FLVAudioTag& audiotag, boost::uint32_t thisTagPos)
 165 {
 166         std::auto_ptr<EncodedAudioFrame> frame;
 167
 168         if ( ! _audio ) {
 169                 log_error(_("Unexpected audio tag found at offset %d FLV stream "
 170                     "advertising no audio in header. We'll warn only once for "
 171                     "each FLV, expecting any further audio tag."), thisTagPos);
 172                 _audio = true; // TOCHECK: is this safe ?
 173         }
 174
 175         bool header = false;
 176         boost::uint32_t bodyLength = flvtag.body_size;
 177
 178         if (audiotag.codec == AUDIO_CODEC_AAC) {
 179                 boost::uint8_t packettype = _stream->read_byte();
 180                 header = (packettype == 0);
 181                 --bodyLength;
 182         }
 183
 184         frame = readAudioFrame(bodyLength-1, flvtag.timestamp);
 185         if ( ! frame.get() ) {
 186                 log_error("could not read audio frame?");
 187         }
 188
 189         // If this is the first audioframe no info about the
 190         // audio format has been noted, so we do that now
 191         if (!_audioInfo.get()) {
 192                 _audioInfo.reset(new AudioInfo(audiotag.codec, audiotag.samplerate,
 193                     audiotag.samplesize, audiotag.stereo, 0,
 194                     CODEC_TYPE_FLASH));
 195
 196         if (header) {
 197
 198             // The frame is 0-padded up to the end. It may be larger than
 199             // this if fewer bytes were read than requested, but it is
 200             // never smaller.
 201             const size_t bufSize = frame->dataSize + paddingBytes;
 202
 203             boost::uint8_t* data = new boost::uint8_t[bufSize];
 204
 205             std::copy(frame->data.get(), frame->data.get() + bufSize, data);
 206
 207                         _audioInfo->extra.reset(
 208                                 new ExtraAudioInfoFlv(data, frame->dataSize)
 209                         );
 210
 211                         // The FAAD decoder will reject us if we pass the header buffer.
 212                         // It will receive the header via the extra audio info anyway.
 213                         frame.reset();
 214                 }
 215         }
 216
 217         return frame;
 218 }
 219
 220 std::auto_ptr<EncodedVideoFrame>
 221 FLVParser::parseVideoTag(const FLVTag& flvtag, const FLVVideoTag& videotag, boost::uint32_t thisTagPos)
 222 {
 223         if ( ! _video ) {
 224                 log_error(_("Unexpected video tag found at offset %d of FLV stream "
 225                     "advertising no video in header. We'll warn only once per "
 226                     "FLV, expecting any further video tag."), thisTagPos);
 227                 _video = true; // TOCHECK: is this safe ?
 228         }
 229
 230         bool header = false;
 231         boost::uint32_t bodyLength = flvtag.body_size;
 232
 233         switch(videotag.codec) {
 234                 case VIDEO_CODEC_VP6:
 235                 case VIDEO_CODEC_VP6A:
 236                 {
 237                         _stream->read_byte();
 238                         --bodyLength;
 239                         break;
 240                 }
 241                 case VIDEO_CODEC_H264:
 242                 {
 243                         boost::uint8_t packettype = _stream->read_byte();
 244                         IF_VERBOSE_PARSE( log_debug(_("AVC packet type: %d"),
 245                         (unsigned)packettype) );
 246
 247                         header = (packettype == 0);
 248
 249                         // 24-bits value for composition time offset ignored for now.
 250                         boost::uint8_t tmp[3];
 251                         _stream->read(tmp, 3);
 252
 253                         bodyLength -= 4;
 254                         break;
 255                 }
 256                 default:
 257                         break;
 258         }
 259
 260         std::auto_ptr<EncodedVideoFrame> frame = readVideoFrame(bodyLength-1,
 261             flvtag.timestamp);
 262         if ( ! frame.get() ) {
 263                 log_error("could not read video frame?");
 264         }
 265
 266         // If this is the first videoframe no info about the
 267         // video format has been noted, so we do that now
 268         if ( ! _videoInfo.get() ) {
 269                 _videoInfo.reset(new VideoInfo(videotag.codec, 0, 0, 0, 0,
 270                     CODEC_TYPE_FLASH));
 271
 272                 if (header) {
 273             // The frame is 0-padded up to the end. It may be larger than
 274             // this if fewer bytes were read than requested, but it is
 275             // never smaller.
 276             const size_t bufSize = frame->dataSize() + paddingBytes;
 277
 278             boost::uint8_t* data = new boost::uint8_t[bufSize];
 279
 280             std::copy(frame->data(), frame->data() + bufSize, data);
 281                         _videoInfo->extra.reset(
 282                                 new ExtraVideoInfoFlv(data, frame->dataSize())
 283                         );
 284
 285                         // Don't bother emitting the header buffer.
 286                         frame.reset();
 287                 }
 288         }
 289         return frame;
 290 }
 291
 292
 293 // would be called by parser thread
 294 bool
 295 FLVParser::parseNextTag(bool index_only)
 296 {
 297         // lock the stream while reading from it, so actionscript
 298         // won't mess with the parser on seek  or on getBytesLoaded
 299         boost::mutex::scoped_lock streamLock(_streamMutex);
 300
 301         if ( index_only && _indexingCompleted ) return false;
 302         if ( _parsingComplete ) return false;
 303
 304         if ( _seekRequest )
 305         {
 306                 clearBuffers();
 307                 _seekRequest = false;
 308         }
 309
 310         boost::uint64_t& position = index_only ? _nextPosToIndex : _lastParsedPosition;
 311         bool& completed = index_only ? _indexingCompleted : _parsingComplete;
 312
 313         //log_debug("parseNextTag: _lastParsedPosition:%d, _nextPosToIndex:%d, index_only:%d", _lastParsedPosition, _nextPosToIndex, index_only);
 314
 315         unsigned long thisTagPos = position;
 316
 317         // Seek to next frame and skip the tag size
 318         //log_debug("FLVParser::parseNextTag seeking to %d", thisTagPos+4);
 319         if (!_stream->seek(thisTagPos+4))
 320         {
 321                 log_error("FLVParser::parseNextTag: can't seek to %d", thisTagPos+4);
 322
 323                 completed = true;
 324                 return false;
 325         }
 326         //log_debug("FLVParser::parseNextTag seeked to %d", thisTagPos+4);
 327
 328         // Read the tag info
 329         boost::uint8_t chunk[12];
 330         int actuallyRead = _stream->read(chunk, 12);
 331         if ( actuallyRead < 12 )
 332         {
 333                 if ( actuallyRead )
 334                         log_error("FLVParser::parseNextTag: can't read tag info "
 335                     "(needed 12 bytes, only got %d)", actuallyRead);
 336                 // else { assert(_stream->eof(); } ?
 337
 338                 completed = true;
 339
 340         // update bytes loaded
 341         boost::mutex::scoped_lock lock(_bytesLoadedMutex);
 342                 _bytesLoaded = _stream->tell();
 343                 return false;
 344         }
 345
 346         FLVTag flvtag(chunk);
 347
 348     // May be _lastParsedPosition OR _nextPosToIndex
 349     position += 15 + flvtag.body_size;
 350
 351         bool doIndex = (_lastParsedPosition+4 > _nextPosToIndex) || index_only;
 352         if ( _lastParsedPosition > _nextPosToIndex )
 353         {
 354                 //log_debug("::parseNextTag setting _nextPosToIndex=%d", _lastParsedPosition+4);
 355                 _nextPosToIndex = _lastParsedPosition;
 356         }
 357
 358         if ( position > _bytesLoaded ) {
 359                 boost::mutex::scoped_lock lock(_bytesLoadedMutex);
 360                 _bytesLoaded = position;
 361         }
 362
 363         // check for empty tag
 364         if (flvtag.body_size == 0) return true;
 365
 366         if (flvtag.type == FLV_AUDIO_TAG)
 367         {
 368                 FLVAudioTag audiotag(chunk[11]);
 369
 370                 if (doIndex) {
 371                         indexAudioTag(flvtag, thisTagPos);
 372                         if (index_only) {
 373                                 return true;
 374                         }
 375                 }
 376
 377
 378                 std::auto_ptr<EncodedAudioFrame> frame =
 379             parseAudioTag(flvtag, audiotag, thisTagPos);
 380                 if (!frame.get()) {
 381                         return false;
 382                 }
 383                 // Release the stream lock
 384                 // *before* pushing the frame as that
 385                 // might block us waiting for buffers flush
 386                 // the _qMutex...
 387                 // We've done using the stream for this tag parsing anyway
 388                 streamLock.unlock();
 389                 pushEncodedAudioFrame(frame);
 390         }
 391         else if (flvtag.type == FLV_VIDEO_TAG)
 392         {
 393                 FLVVideoTag videotag(chunk[11]);
 394
 395                 if (doIndex) {
 396                         indexVideoTag(flvtag, videotag, thisTagPos);
 397                         if (index_only) {
 398                                 return true;
 399                         }
 400                 }
 401
 402                 std::auto_ptr<EncodedVideoFrame> frame =
 403             parseVideoTag(flvtag, videotag, thisTagPos);
 404                 if (!frame.get()) {
 405                         return false;
 406                 }
 407
 408                 // Release the stream lock
 409                 // *before* pushing the frame as that
 410                 // might block us waiting for buffers flush
 411                 // the _qMutex...
 412                 streamLock.unlock();
 413                 pushEncodedVideoFrame(frame);
 414
 415         }
 416         else if (flvtag.type == FLV_META_TAG)
 417         {
 418                 if ( chunk[11] != 2 )
 419                 {
 420                         // ::processTags relies on the first AMF0 value being a string...
 421                         log_unimpl(_("First byte of FLV_META_TAG is %d, expected "
 422                         "0x02 (STRING AMF0 type)"),
 423                     static_cast<int>(chunk[11]));
 424                 }
 425                 // Extract information from the meta tag
 426                 std::auto_ptr<SimpleBuffer> metaTag(new SimpleBuffer(
 427                     flvtag.body_size-1));
 428                 size_t actuallyRead = _stream->read(metaTag->data(),
 429                 flvtag.body_size - 1);
 430
 431         if ( actuallyRead < flvtag.body_size-1 )
 432                 {
 433                         log_error("FLVParser::parseNextTag: can't read metaTag (%d) "
 434                     "body (needed %d bytes, only got %d)",
 435                                 FLV_META_TAG, flvtag.body_size, actuallyRead);
 436                         return false;
 437                 }
 438                 metaTag->resize(actuallyRead);
 439
 440                 boost::uint32_t terminus = getUInt24(metaTag->data() +
 441                 actuallyRead - 3);
 442
 443         if (terminus != 9) {
 444                         log_error(_("Corrupt FLV: Meta tag unterminated!"));
 445                 }
 446
 447                 boost::mutex::scoped_lock lock(_metaTagsMutex);
 448                 _metaTags.insert(std::make_pair(flvtag.timestamp, metaTag.release()));
 449         }
 450         else
 451         {
 452                 log_error(_("FLVParser::parseNextTag: unknown FLV tag type %d"),
 453                 (int)chunk[0]);
 454                 return false;
 455         }
 456
 457         _stream->read(chunk, 4);
 458         boost::uint32_t prevtagsize = chunk[0] << 24 | chunk[1] << 16 |
 459         chunk[2] << 8 | chunk[3];
 460         if (prevtagsize != flvtag.body_size + 11) {
 461                 log_error(_("Corrupt FLV: previous tag size record (%1%) unexpected "
 462                     "(actual size: %2%)"), prevtagsize, flvtag.body_size + 11);
 463         }
 464
 465         return true;
 466 }
 467
 468 // would be called by MAIN thread
 469 bool
 470 FLVParser::parseHeader()
 471 {
 472         assert(_stream->tell() == static_cast<std::streampos>(0));
 473
 474         // We only use 5 bytes of the header, because the last 4 bytes represent
 475     // an integer which is always 1.
 476         boost::uint8_t header[9];
 477         if ( _stream->read(header, 9) != 9 )
 478         {
 479                 log_error("FLVParser::parseHeader: couldn't read 9 bytes of header");
 480                 return false;
 481         }
 482
 483         _lastParsedPosition = _bytesLoaded = _nextPosToIndex = 9;
 484
 485         if (!std::equal(header, header + 3, "FLV")) {
 486                 return false;
 487         }
 488
 489         const boost::uint8_t version = header[3];
 490
 491         // Parse the audio+video bitmask
 492         _audio = header[4]&(1<<2);
 493         _video = header[4]&(1<<0);
 494
 495         log_debug("Parsing FLV version %d, audio:%d, video:%d",
 496                         (int)version, _audio, _video);
 497
 498         return true;
 499 }
 500
 501 inline boost::uint32_t
 502 FLVParser::getUInt24(boost::uint8_t* in)
 503 {
 504         // The bits are in big endian order
 505         return (in[0] << 16) | (in[1] << 8) | in[2];
 506 }
 507
 508 boost::uint64_t
 509 FLVParser::getBytesLoaded() const
 510 {
 511         boost::mutex::scoped_lock lock(_bytesLoadedMutex);
 512         return _bytesLoaded;
 513 }
 514
 515 // would be called by parser thread
 516 /*private*/
 517 std::auto_ptr<EncodedAudioFrame>
 518 FLVParser::readAudioFrame(boost::uint32_t dataSize, boost::uint32_t timestamp)
 519 {
 520
 521         std::auto_ptr<EncodedAudioFrame> frame(new EncodedAudioFrame);
 522
 523     const size_t bufSize = dataSize + paddingBytes;
 524
 525     boost::uint8_t* data = new boost::uint8_t[bufSize];
 526         const size_t bytesRead = _stream->read(data, dataSize);
 527
 528     std::fill(data + bytesRead, data + bufSize, 0);
 529
 530         if (bytesRead < dataSize) {
 531                 log_error("FLVParser::readAudioFrame: could only read %d/%d bytes",
 532                 bytesRead, dataSize);
 533         }
 534
 535         frame->dataSize = bytesRead;
 536         frame->timestamp = timestamp;
 537         frame->data.reset(data);
 538
 539         return frame;
 540 }
 541
 542 // would be called by parser thread
 543 /*private*/
 544 std::auto_ptr<EncodedVideoFrame>
 545 FLVParser::readVideoFrame(boost::uint32_t dataSize, boost::uint32_t timestamp)
 546 {
 547         std::auto_ptr<EncodedVideoFrame> frame;
 548
 549     const size_t bufSize = dataSize + paddingBytes;
 550
 551         boost::uint8_t* data = new boost::uint8_t[bufSize];
 552         const size_t bytesRead = _stream->read(data, dataSize);
 553
 554     std::fill(data + bytesRead, data + bufSize, 0);
 555
 556         // We won't need frameNum, so will set to zero...
 557         // TODO: fix this ?
 558         // NOTE: ownership of 'data' is transferred here
 559
 560         frame.reset(new EncodedVideoFrame(data, bytesRead, 0, timestamp));
 561         return frame;
 562 }
 563
 564
 565 void
 566 FLVParser::fetchMetaTags(OrderedMetaTags& tags, boost::uint64_t ts)
 567 {
 568         boost::mutex::scoped_lock lock(_metaTagsMutex);
 569         if (!_metaTags.empty()) {
 570         MetaTags::iterator it = _metaTags.upper_bound(ts);
 571
 572         // Copy the first value into the return container.
 573         std::transform(_metaTags.begin(), it, std::back_inserter(tags),
 574                 boost::bind(&MetaTags::value_type::second, _1));
 575
 576         _metaTags.erase(_metaTags.begin(), it);
 577         }
 578 }
 579
 580 } // end of gnash::media namespace
 581 } // end of gnash namespace