content/media/MediaDecoderReader.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6 #if !defined(MediaDecoderReader_h_)
   7 #define MediaDecoderReader_h_
   8
   9 #include <nsDeque.h>
  10 #include "nsSize.h"
  11 #include "mozilla/ReentrantMonitor.h"
  12 #include "SharedBuffer.h"
  13 #include "AudioSampleFormat.h"
  14 #include "AbstractMediaDecoder.h"
  15 #include "ImageTypes.h"
  16
  17 struct nsIntRect;
  18
  19 namespace mozilla {
  20
  21 namespace layers {
  22 class Image;
  23 class ImageContainer;
  24 }
  25
  26 namespace dom {
  27 class TimeRanges;
  28 }
  29
  30 // Stores info relevant to presenting media frames.
  31 class VideoInfo {
  32 public:
  33   VideoInfo()
  34     : mDisplay(0,0),
  35       mStereoMode(STEREO_MODE_MONO),
  36       mHasVideo(false)
  37   {}
  38
  39   // Returns true if it's safe to use aPicture as the picture to be
  40   // extracted inside a frame of size aFrame, and scaled up to and displayed
  41   // at a size of aDisplay. You should validate the frame, picture, and
  42   // display regions before using them to display video frames.
  43   static bool ValidateVideoRegion(const nsIntSize& aFrame,
  44                                   const nsIntRect& aPicture,
  45                                   const nsIntSize& aDisplay);
  46
  47   // Size in pixels at which the video is rendered. This is after it has
  48   // been scaled by its aspect ratio.
  49   nsIntSize mDisplay;
  50
  51   // Indicates the frame layout for single track stereo videos.
  52   StereoMode mStereoMode;
  53
  54   // True if we have an active video bitstream.
  55   bool mHasVideo;
  56 };
  57
  58 class AudioInfo {
  59 public:
  60   AudioInfo()
  61     : mRate(44100),
  62       mChannels(2),
  63       mHasAudio(false)
  64   {}
  65
  66   // Sample rate.
  67   uint32_t mRate;
  68
  69   // Number of audio channels.
  70   uint32_t mChannels;
  71
  72   // True if we have an active audio bitstream.
  73   bool mHasAudio;
  74 };
  75
  76 class MediaInfo {
  77 public:
  78   bool HasVideo() const
  79   {
  80     return mVideo.mHasVideo;
  81   }
  82
  83   bool HasAudio() const
  84   {
  85     return mAudio.mHasAudio;
  86   }
  87
  88   bool HasValidMedia() const
  89   {
  90     return HasVideo() || HasAudio();
  91   }
  92
  93   VideoInfo mVideo;
  94   AudioInfo mAudio;
  95 };
  96
  97 // Container that holds media samples.
  98 class MediaData {
  99 public:
 100
 101   enum Type {
 102     AUDIO_SAMPLES = 0,
 103     VIDEO_FRAME = 1
 104   };
 105
 106   MediaData(Type aType,
 107             int64_t aOffset,
 108             int64_t aTimestamp,
 109             int64_t aDuration)
 110     : mType(aType),
 111       mOffset(aOffset),
 112       mTime(aTimestamp),
 113       mDuration(aDuration)
 114   {}
 115
 116   virtual ~MediaData() {}
 117
 118   // Type of contained data.
 119   const Type mType;
 120
 121   // Approximate byte offset where this data was demuxed from its media.
 122   const int64_t mOffset;
 123
 124   // Start time of sample, in microseconds.
 125   const int64_t mTime;
 126
 127   // Duration of sample, in microseconds.
 128   const int64_t mDuration;
 129
 130   int64_t GetEndTime() const { return mTime + mDuration; }
 131
 132 };
 133
 134 // Holds chunk a decoded audio frames.
 135 class AudioData : public MediaData {
 136 public:
 137
 138   AudioData(int64_t aOffset,
 139             int64_t aTime,
 140             int64_t aDuration,
 141             uint32_t aFrames,
 142             AudioDataValue* aData,
 143             uint32_t aChannels)
 144   : MediaData(AUDIO_SAMPLES, aOffset, aTime, aDuration),
 145     mFrames(aFrames),
 146     mChannels(aChannels),
 147     mAudioData(aData)
 148   {
 149     MOZ_COUNT_CTOR(AudioData);
 150   }
 151
 152   ~AudioData()
 153   {
 154     MOZ_COUNT_DTOR(AudioData);
 155   }
 156
 157   // If mAudioBuffer is null, creates it from mAudioData.
 158   void EnsureAudioBuffer();
 159
 160   const uint32_t mFrames;
 161   const uint32_t mChannels;
 162   // At least one of mAudioBuffer/mAudioData must be non-null.
 163   // mChannels channels, each with mFrames frames
 164   nsRefPtr<SharedBuffer> mAudioBuffer;
 165   // mFrames frames, each with mChannels values
 166   nsAutoArrayPtr<AudioDataValue> mAudioData;
 167 };
 168
 169 namespace layers {
 170 class GraphicBufferLocked;
 171 }
 172
 173 // Holds a decoded video frame, in YCbCr format. These are queued in the reader.
 174 class VideoData : public MediaData {
 175 public:
 176   typedef layers::ImageContainer ImageContainer;
 177   typedef layers::Image Image;
 178
 179   // YCbCr data obtained from decoding the video. The index's are:
 180   //   0 = Y
 181   //   1 = Cb
 182   //   2 = Cr
 183   struct YCbCrBuffer {
 184     struct Plane {
 185       uint8_t* mData;
 186       uint32_t mWidth;
 187       uint32_t mHeight;
 188       uint32_t mStride;
 189       uint32_t mOffset;
 190       uint32_t mSkip;
 191     };
 192
 193     Plane mPlanes[3];
 194   };
 195
 196   // Constructs a VideoData object. If aImage is nullptr, creates a new Image
 197   // holding a copy of the YCbCr data passed in aBuffer. If aImage is not
 198   // nullptr, it's stored as the underlying video image and aBuffer is assumed
 199   // to point to memory within aImage so no copy is made. aTimecode is a codec
 200   // specific number representing the timestamp of the frame of video data.
 201   // Returns nsnull if an error occurs. This may indicate that memory couldn't
 202   // be allocated to create the VideoData object, or it may indicate some
 203   // problem with the input data (e.g. negative stride).
 204   static VideoData* Create(VideoInfo& aInfo,
 205                            ImageContainer* aContainer,
 206                            Image* aImage,
 207                            int64_t aOffset,
 208                            int64_t aTime,
 209                            int64_t aDuration,
 210                            const YCbCrBuffer &aBuffer,
 211                            bool aKeyframe,
 212                            int64_t aTimecode,
 213                            nsIntRect aPicture);
 214
 215   // Variant that always makes a copy of aBuffer
 216   static VideoData* Create(VideoInfo& aInfo,
 217                            ImageContainer* aContainer,
 218                            int64_t aOffset,
 219                            int64_t aTime,
 220                            int64_t aDuration,
 221                            const YCbCrBuffer &aBuffer,
 222                            bool aKeyframe,
 223                            int64_t aTimecode,
 224                            nsIntRect aPicture);
 225
 226   // Variant to create a VideoData instance given an existing aImage
 227   static VideoData* Create(VideoInfo& aInfo,
 228                            Image* aImage,
 229                            int64_t aOffset,
 230                            int64_t aTime,
 231                            int64_t aDuration,
 232                            const YCbCrBuffer &aBuffer,
 233                            bool aKeyframe,
 234                            int64_t aTimecode,
 235                            nsIntRect aPicture);
 236
 237   static VideoData* Create(VideoInfo& aInfo,
 238                            ImageContainer* aContainer,
 239                            int64_t aOffset,
 240                            int64_t aTime,
 241                            int64_t aDuration,
 242                            layers::GraphicBufferLocked* aBuffer,
 243                            bool aKeyframe,
 244                            int64_t aTimecode,
 245                            nsIntRect aPicture);
 246
 247   static VideoData* CreateFromImage(VideoInfo& aInfo,
 248                                     ImageContainer* aContainer,
 249                                     int64_t aOffset,
 250                                     int64_t aTime,
 251                                     int64_t aDuration,
 252                                     const nsRefPtr<Image>& aImage,
 253                                     bool aKeyframe,
 254                                     int64_t aTimecode,
 255                                     nsIntRect aPicture);
 256
 257   // Creates a new VideoData identical to aOther, but with a different
 258   // specified duration. All data from aOther is copied into the new
 259   // VideoData. The new VideoData's mImage field holds a reference to
 260   // aOther's mImage, i.e. the Image is not copied. This function is useful
 261   // in reader backends that can't determine the duration of a VideoData
 262   // until the next frame is decoded, i.e. it's a way to change the const
 263   // duration field on a VideoData.
 264   static VideoData* ShallowCopyUpdateDuration(VideoData* aOther,
 265                                               int64_t aDuration);
 266
 267   // Constructs a duplicate VideoData object. This intrinsically tells the
 268   // player that it does not need to update the displayed frame when this
 269   // frame is played; this frame is identical to the previous.
 270   static VideoData* CreateDuplicate(int64_t aOffset,
 271                                     int64_t aTime,
 272                                     int64_t aDuration,
 273                                     int64_t aTimecode)
 274   {
 275     return new VideoData(aOffset, aTime, aDuration, aTimecode);
 276   }
 277
 278   ~VideoData();
 279
 280   // Dimensions at which to display the video frame. The picture region
 281   // will be scaled to this size. This is should be the picture region's
 282   // dimensions scaled with respect to its aspect ratio.
 283   const nsIntSize mDisplay;
 284
 285   // Codec specific internal time code. For Ogg based codecs this is the
 286   // granulepos.
 287   const int64_t mTimecode;
 288
 289   // This frame's image.
 290   nsRefPtr<Image> mImage;
 291
 292   // When true, denotes that this frame is identical to the frame that
 293   // came before; it's a duplicate. mBuffer will be empty.
 294   const bool mDuplicate;
 295   const bool mKeyframe;
 296
 297 public:
 298   VideoData(int64_t aOffset,
 299             int64_t aTime,
 300             int64_t aDuration,
 301             int64_t aTimecode);
 302
 303   VideoData(int64_t aOffset,
 304             int64_t aTime,
 305             int64_t aDuration,
 306             bool aKeyframe,
 307             int64_t aTimecode,
 308             nsIntSize aDisplay);
 309
 310 };
 311
 312 // Thread and type safe wrapper around nsDeque.
 313 template <class T>
 314 class MediaQueueDeallocator : public nsDequeFunctor {
 315   virtual void* operator() (void* anObject) {
 316     delete static_cast<T*>(anObject);
 317     return nullptr;
 318   }
 319 };
 320
 321 template <class T> class MediaQueue : private nsDeque {
 322  public:
 323
 324    MediaQueue()
 325      : nsDeque(new MediaQueueDeallocator<T>()),
 326        mReentrantMonitor("mediaqueue"),
 327        mEndOfStream(false)
 328    {}
 329
 330   ~MediaQueue() {
 331     Reset();
 332   }
 333
 334   inline int32_t GetSize() {
 335     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 336     return nsDeque::GetSize();
 337   }
 338
 339   inline void Push(T* aItem) {
 340     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 341     nsDeque::Push(aItem);
 342   }
 343
 344   inline void PushFront(T* aItem) {
 345     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 346     nsDeque::PushFront(aItem);
 347   }
 348
 349   inline T* Pop() {
 350     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 351     return static_cast<T*>(nsDeque::Pop());
 352   }
 353
 354   inline T* PopFront() {
 355     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 356     return static_cast<T*>(nsDeque::PopFront());
 357   }
 358
 359   inline T* Peek() {
 360     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 361     return static_cast<T*>(nsDeque::Peek());
 362   }
 363
 364   inline T* PeekFront() {
 365     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 366     return static_cast<T*>(nsDeque::PeekFront());
 367   }
 368
 369   inline void Empty() {
 370     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 371     nsDeque::Empty();
 372   }
 373
 374   inline void Erase() {
 375     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 376     nsDeque::Erase();
 377   }
 378
 379   void Reset() {
 380     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 381     while (GetSize() > 0) {
 382       T* x = PopFront();
 383       delete x;
 384     }
 385     mEndOfStream = false;
 386   }
 387
 388   bool AtEndOfStream() {
 389     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 390     return GetSize() == 0 && mEndOfStream;
 391   }
 392
 393   // Returns true if the media queue has had its last item added to it.
 394   // This happens when the media stream has been completely decoded. Note this
 395   // does not mean that the corresponding stream has finished playback.
 396   bool IsFinished() {
 397     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 398     return mEndOfStream;
 399   }
 400
 401   // Informs the media queue that it won't be receiving any more items.
 402   void Finish() {
 403     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 404     mEndOfStream = true;
 405   }
 406
 407   // Returns the approximate number of microseconds of items in the queue.
 408   int64_t Duration() {
 409     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 410     if (GetSize() < 2) {
 411       return 0;
 412     }
 413     T* last = Peek();
 414     T* first = PeekFront();
 415     return last->mTime - first->mTime;
 416   }
 417
 418   void LockedForEach(nsDequeFunctor& aFunctor) const {
 419     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 420     ForEach(aFunctor);
 421   }
 422
 423   // Extracts elements from the queue into aResult, in order.
 424   // Elements whose start time is before aTime are ignored.
 425   void GetElementsAfter(int64_t aTime, nsTArray<T*>* aResult) {
 426     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 427     if (!GetSize())
 428       return;
 429     int32_t i;
 430     for (i = GetSize() - 1; i > 0; --i) {
 431       T* v = static_cast<T*>(ObjectAt(i));
 432       if (v->GetEndTime() < aTime)
 433         break;
 434     }
 435     // Elements less than i have a end time before aTime. It's also possible
 436     // that the element at i has a end time before aTime, but that's OK.
 437     for (; i < GetSize(); ++i) {
 438       aResult->AppendElement(static_cast<T*>(ObjectAt(i)));
 439     }
 440   }
 441
 442   uint32_t FrameCount() {
 443     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
 444     uint32_t frames = 0;
 445     for (int32_t i = 0; i < GetSize(); ++i) {
 446       T* v = static_cast<T*>(ObjectAt(i));
 447       frames += v->mFrames;
 448     }
 449     return frames;
 450   }
 451
 452 private:
 453   mutable ReentrantMonitor mReentrantMonitor;
 454
 455   // True when we've decoded the last frame of data in the
 456   // bitstream for which we're queueing frame data.
 457   bool mEndOfStream;
 458 };
 459
 460 // Encapsulates the decoding and reading of media data. Reading can only be
 461 // done on the decode thread. Never hold the decoder monitor when
 462 // calling into this class. Unless otherwise specified, methods and fields of
 463 // this class can only be accessed on the decode thread.
 464 class MediaDecoderReader {
 465 public:
 466   MediaDecoderReader(AbstractMediaDecoder* aDecoder);
 467   virtual ~MediaDecoderReader();
 468
 469   // Initializes the reader, returns NS_OK on success, or NS_ERROR_FAILURE
 470   // on failure.
 471   virtual nsresult Init(MediaDecoderReader* aCloneDonor) = 0;
 472
 473   // True if this reader is waiting media resource allocation
 474   virtual bool IsWaitingMediaResources() { return false; }
 475   // True when this reader need to become dormant state
 476   virtual bool IsDormantNeeded() { return false; }
 477   // Release media resources they should be released in dormant state
 478   virtual void ReleaseMediaResources() {};
 479   // Release the decoder during shutdown
 480   virtual void ReleaseDecoder() {};
 481
 482   // Resets all state related to decoding, emptying all buffers etc.
 483   virtual nsresult ResetDecode();
 484
 485   // Decodes an unspecified amount of audio data, enqueuing the audio data
 486   // in mAudioQueue. Returns true when there's more audio to decode,
 487   // false if the audio is finished, end of file has been reached,
 488   // or an un-recoverable read error has occured.
 489   virtual bool DecodeAudioData() = 0;
 490
 491 #ifdef MOZ_DASH
 492   // Steps to carry out at the start of the |DecodeLoop|.
 493   virtual void PrepareToDecode() { }
 494 #endif
 495
 496   // Reads and decodes one video frame. Packets with a timestamp less
 497   // than aTimeThreshold will be decoded (unless they're not keyframes
 498   // and aKeyframeSkip is true), but will not be added to the queue.
 499   virtual bool DecodeVideoFrame(bool &aKeyframeSkip,
 500                                 int64_t aTimeThreshold) = 0;
 501
 502   virtual bool HasAudio() = 0;
 503   virtual bool HasVideo() = 0;
 504
 505   // Read header data for all bitstreams in the file. Fills aInfo with
 506   // the data required to present the media, and optionally fills *aTags
 507   // with tag metadata from the file.
 508   // Returns NS_OK on success, or NS_ERROR_FAILURE on failure.
 509   virtual nsresult ReadMetadata(MediaInfo* aInfo,
 510                                 MetadataTags** aTags) = 0;
 511
 512   // Stores the presentation time of the first frame we'd be able to play if
 513   // we started playback at the current position. Returns the first video
 514   // frame, if we have video.
 515   virtual VideoData* FindStartTime(int64_t& aOutStartTime);
 516
 517   // Moves the decode head to aTime microseconds. aStartTime and aEndTime
 518   // denote the start and end times of the media in usecs, and aCurrentTime
 519   // is the current playback position in microseconds.
 520   virtual nsresult Seek(int64_t aTime,
 521                         int64_t aStartTime,
 522                         int64_t aEndTime,
 523                         int64_t aCurrentTime) = 0;
 524
 525   // Called when the decode thread is started, before calling any other
 526   // decode, read metadata, or seek functions. Do any thread local setup
 527   // in this function.
 528   virtual void OnDecodeThreadStart() {}
 529
 530   // Called when the decode thread is about to finish, after all calls to
 531   // any other decode, read metadata, or seek functions. Any backend specific
 532   // thread local tear down must be done in this function. Note that another
 533   // decode thread could start up and run in future.
 534   virtual void OnDecodeThreadFinish() {}
 535
 536   // Tell the reader that the data decoded are not for direct playback, so it
 537   // can accept more files, in particular those which have more channels than
 538   // available in the audio output.
 539   void SetIgnoreAudioOutputFormat()
 540   {
 541     mIgnoreAudioOutputFormat = true;
 542   }
 543
 544 protected:
 545   // Queue of audio frames. This queue is threadsafe, and is accessed from
 546   // the audio, decoder, state machine, and main threads.
 547   MediaQueue<AudioData> mAudioQueue;
 548
 549   // Queue of video frames. This queue is threadsafe, and is accessed from
 550   // the decoder, state machine, and main threads.
 551   MediaQueue<VideoData> mVideoQueue;
 552
 553 public:
 554   // Populates aBuffered with the time ranges which are buffered. aStartTime
 555   // must be the presentation time of the first frame in the media, e.g.
 556   // the media time corresponding to playback time/position 0. This function
 557   // is called on the main, decode, and state machine threads.
 558   //
 559   // This base implementation in MediaDecoderReader estimates the time ranges
 560   // buffered by interpolating the cached byte ranges with the duration
 561   // of the media. Reader subclasses should override this method if they
 562   // can quickly calculate the buffered ranges more accurately.
 563   //
 564   // The primary advantage of this implementation in the reader base class
 565   // is that it's a fast approximation, which does not perform any I/O.
 566   //
 567   // The OggReader relies on this base implementation not performing I/O,
 568   // since in FirefoxOS we can't do I/O on the main thread, where this is
 569   // called.
 570   virtual nsresult GetBuffered(dom::TimeRanges* aBuffered,
 571                                int64_t aStartTime);
 572
 573   class VideoQueueMemoryFunctor : public nsDequeFunctor {
 574   public:
 575     VideoQueueMemoryFunctor() : mResult(0) {}
 576
 577     virtual void* operator()(void* anObject);
 578
 579     int64_t mResult;
 580   };
 581
 582   virtual int64_t VideoQueueMemoryInUse() {
 583     VideoQueueMemoryFunctor functor;
 584     mVideoQueue.LockedForEach(functor);
 585     return functor.mResult;
 586   }
 587
 588   class AudioQueueMemoryFunctor : public nsDequeFunctor {
 589   public:
 590     AudioQueueMemoryFunctor() : mResult(0) {}
 591
 592     virtual void* operator()(void* anObject) {
 593       const AudioData* audioData = static_cast<const AudioData*>(anObject);
 594       mResult += audioData->mFrames * audioData->mChannels * sizeof(AudioDataValue);
 595       return nullptr;
 596     }
 597
 598     int64_t mResult;
 599   };
 600
 601   virtual int64_t AudioQueueMemoryInUse() {
 602     AudioQueueMemoryFunctor functor;
 603     mAudioQueue.LockedForEach(functor);
 604     return functor.mResult;
 605   }
 606
 607   // Only used by WebMReader and MediaOmxReader for now, so stub here rather
 608   // than in every reader than inherits from MediaDecoderReader.
 609   virtual void NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset) {}
 610
 611   virtual MediaQueue<AudioData>& AudioQueue() { return mAudioQueue; }
 612   virtual MediaQueue<VideoData>& VideoQueue() { return mVideoQueue; }
 613
 614   // Returns a pointer to the decoder.
 615   AbstractMediaDecoder* GetDecoder() {
 616     return mDecoder;
 617   }
 618
 619   AudioData* DecodeToFirstAudioData();
 620   VideoData* DecodeToFirstVideoData();
 621
 622 protected:
 623   // Pumps the decode until we reach frames required to play at time aTarget
 624   // (usecs).
 625   nsresult DecodeToTarget(int64_t aTarget);
 626
 627   // Reference to the owning decoder object.
 628   AbstractMediaDecoder* mDecoder;
 629
 630   // Stores presentation info required for playback.
 631   MediaInfo mInfo;
 632
 633   // Whether we should accept media that we know we can't play
 634   // directly, because they have a number of channel higher than
 635   // what we support.
 636   bool mIgnoreAudioOutputFormat;
 637 };
 638
 639 } // namespace mozilla
 640
 641 #endif