1 // MediaParser.h: Base class for media parsers
3 // Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
4 // Free Software Foundation, Inc.
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #ifndef GNASH_MEDIAPARSER_H
21 #define GNASH_MEDIAPARSER_H
23 #include <boost/scoped_array.hpp>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/thread/thread.hpp>
26 #include <boost/thread/condition.hpp>
27 #include <boost/thread/barrier.hpp>
32 #include <iosfwd> // for output operator forward declarations
33 #include <boost/optional.hpp>
35 #include "IOChannel.h" // for inlines
36 #include "dsodefs.h" // DSOEXPORT
38 // Undefine this to load/parse media files in main thread
39 #define LOAD_MEDIA_IN_A_SEPARATE_THREAD 1
61 /// Disposable interlaced frames
65 /// The type of the codec id passed in the AudioInfo or VideoInfo class
68 /// The internal flash codec ids
75 /// Video codec ids as defined in flash
78 /// H263/SVQ3 video codec
82 VIDEO_CODEC_SCREENVIDEO
= 3,
84 /// On2 VP6 video codec
87 /// On2 VP6 Alpha video codec
90 /// Screenvideo2 codec
91 VIDEO_CODEC_SCREENVIDEO2
= 6,
93 /// MPEG-4 Part 10, or Advanced Video Coding
96 // NOTE: if you add more elements here remember to
97 // also add them to the output operator!
100 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const videoCodecType
& t
);
102 /// Audio codec ids as defined in flash
104 /// For some encodings, audio data is organized
105 /// in logical frames. The structure of such frames
106 /// (header/payload) is codec dependent.
107 /// The actual size of each frame may not be known
108 /// w/out parsing the encoded stream, as it
109 /// might be specified in the header of each frame.
111 /// Other encodings are loosier on frames. For these
112 /// you can define a frame any way you want, as long
113 /// as a frame doesn't contain partial samples.
115 /// For FFMPEG, you can NOT construct a parser for the
116 /// loosy-framed codecs.
118 /// Parser-needing codecs will be documented as such.
122 /// Signed Linear PCM, unspecified byte order
124 /// Use of this codec is deprecated (but still supported) due to
125 /// the unspecified byte order (you can only play >8bit samples
126 /// in a sane way when the endiannes of encoding and decoding
129 /// 90% of the times the actual encoder did run on windows, so
130 /// it is a good bet to guess for little-endian.
131 /// SampleSize may be 8 or 16 bits.
137 /// SWF support 2, 3, 4, and 5 bits / sample.
138 /// ADPCM "frames" consits of 4096 ADPCM codes per channel.
140 /// For streaming there is no concept of "seekSamples" like
141 /// MP3 streaming implements. Thus ADPCM ist suboptimal for
142 /// streaming as it is difficult to match sound frames with
144 /// Uncompressed SampleSize is always 16 bit.
146 AUDIO_CODEC_ADPCM
= 1,
150 /// MP3 is supported for SWF4 and later.
151 /// MP3 sound is structured in frames consiting of a fixed sized
152 /// header (32Bit) and compressed sound data. Compressed sound
153 /// data always contains a fixed number of sound samples (576 or 1152).
154 /// For streaming sound an additional field is necessary (seekSamples)
155 /// to keep track of samples exceeding movie frame border.
157 /// MP3 header contains all necessary information to decode a single
158 /// frame. From this information one can derive the number of samples
159 /// and the frame's size.
160 /// Uncompressed SampleSize is always 16 bit.
164 /// Linear PCM, strictly little-endian
165 AUDIO_CODEC_UNCOMPRESSED
= 3,
167 /// Proprietary simple format. Always 5Khz mono ?
170 /// Data is organized in frames of 256 samples.
172 AUDIO_CODEC_NELLYMOSER_8HZ_MONO
= 5,
174 /// Proprietary simple format
177 /// Data is organized in frames of 256 samples.
179 AUDIO_CODEC_NELLYMOSER
= 6,
181 /// Advanced Audio Coding
182 AUDIO_CODEC_AAC
= 10,
184 /// Always 16kHz mono
185 AUDIO_CODEC_SPEEX
= 11
187 // NOTE: if you add more elements here remember to
188 // also add them to the output operator!
191 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const audioCodecType
& t
);
193 /// Information about an audio stream
195 /// The information stored is codec-id,
196 /// samplerate, samplesize, stereo, duration and codec-type.
198 /// Additionally, an abstract ExtraInfo can be hold.
205 /// Construct an AudioInfo object
209 /// To be interpreted as a media::audioCodecType if the typei
210 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
211 /// for codec information transfer between a MediaParser and a
212 /// AudioDecoder from the same %media handler module.
214 /// @param sampleRatei
215 /// Nominal sample rate.
216 /// @todo document units.
218 /// @param sampleSizei
219 /// Sample size, in bytes.
222 /// Sample type (stereo if true, mono otherwise).
223 /// @todo document if and how intepretation of sampleSizei changes
226 /// Nominal audio stream duration, in milliseconds.
229 /// Changes interpretation of the codeci parameter.
231 AudioInfo(int codeci
, boost::uint16_t sampleRatei
,
232 boost::uint16_t sampleSizei
, bool stereoi
,
233 boost::uint64_t durationi
, codecType typei
)
236 sampleRate(sampleRatei
),
237 sampleSize(sampleSizei
),
246 /// This has to be interpreted as audioCodecType if codecType type is CODEC_TYPE_FLASH
247 /// or interpretation is opaque and we rely on the assumption that the AudioInfo
248 /// creator and the AudioInfo user have a way to get a shared interpretation
252 boost::uint16_t sampleRate
;
254 /// Size of each sample, in bytes
255 boost::uint16_t sampleSize
;
259 boost::uint64_t duration
;
263 /// Extra info about an audio stream
265 /// Abstract class to hold any additional info
266 /// when required for proper decoder initialization.
270 virtual ~ExtraInfo() {}
273 /// Extra info about audio stream, if when needed
275 /// Could be ExtraVideoInfoFlv or a media-handler specific info
277 std::auto_ptr
<ExtraInfo
> extra
;
280 /// Information about a video stream
282 /// The information stored is codec-id, width, height, framerate and duration.
284 /// Additionally, an abstract ExtraInfo can be hold.
290 /// Construct a VideoInfo object
294 /// To be interpreted as a media::videoCodecType if the typei
295 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
296 /// for codec information transfer between a MediaParser and a
297 /// VideoDecoder from the same %media handler module.
300 /// Video frame width.
301 /// @todo check if still needed.
304 /// Video frame height.
305 /// @todo check if still needed.
307 /// @param frameRatei
308 /// Nominal video frame rate.
309 /// @todo document units.
312 /// Nominal video duration.
313 /// @todo check if still needed, if so document units!
316 /// Changes interpretation of the codeci parameter.
318 VideoInfo(int codeci
, boost::uint16_t widthi
, boost::uint16_t heighti
,
319 boost::uint16_t frameRatei
, boost::uint64_t durationi
,
325 frameRate(frameRatei
),
332 boost::uint16_t width
;
333 boost::uint16_t height
;
334 boost::uint16_t frameRate
;
335 boost::uint64_t duration
;
338 /// Extra info about a video stream
340 /// Abstract class to hold any additional info
341 /// when required for proper decoder initialization
345 virtual ~ExtraInfo() {}
348 /// Extra info about video stream, if when needed
350 /// Could be ExtraAudioInfoFlv or a media-handler specific info
352 std::auto_ptr
<ExtraInfo
> extra
;
355 DSOEXPORT
std::ostream
& operator << (std::ostream
& os
, const VideoInfo
& vi
);
358 class EncodedExtraData
{
361 virtual ~EncodedExtraData() {}
365 /// An encoded video frame
366 class EncodedVideoFrame
370 /// Create an encoded video frame
373 /// Data buffer, ownership transferred
376 /// Size of the data buffer
382 /// Presentation timestamp, in milliseconds.
384 EncodedVideoFrame(boost::uint8_t* data
, boost::uint32_t size
,
385 unsigned int frameNum
,
386 boost::uint64_t timestamp
=0)
391 _timestamp(timestamp
)
394 /// Return pointer to actual data. Ownership retained by this class.
395 const boost::uint8_t* data() const { return _data
.get(); }
397 /// Return size of data buffer.
398 boost::uint32_t dataSize() const { return _size
; }
400 /// Return video frame presentation timestamp
401 boost::uint64_t timestamp() const { return _timestamp
; }
403 /// Return video frame number
404 unsigned frameNum() const { return _frameNum
; }
406 // FIXME: should have better encapsulation for this sort of stuff.
407 std::auto_ptr
<EncodedExtraData
> extradata
;
410 boost::uint32_t _size
;
411 boost::scoped_array
<boost::uint8_t> _data
;
412 unsigned int _frameNum
;
413 boost::uint64_t _timestamp
;
416 /// An encoded audio frame
417 class EncodedAudioFrame
420 boost::uint32_t dataSize
;
421 boost::scoped_array
<boost::uint8_t> data
;
422 boost::uint64_t timestamp
;
424 // FIXME: should have better encapsulation for this sort of stuff.
425 std::auto_ptr
<EncodedExtraData
> extradata
;
428 /// The MediaParser class provides cursor-based access to encoded %media frames
430 /// Cursor-based access allow seeking as close as possible to a specified time
431 /// and fetching frames from there on, sequentially.
432 /// See seek(), nextVideoFrame(), nextAudioFrame()
434 /// Input is received from a IOChannel object.
440 /// A container for executable MetaTags contained in media streams.
442 /// Presently only known in FLV.
443 typedef std::multimap
<boost::uint64_t, boost::shared_ptr
<SimpleBuffer
> >
446 typedef std::vector
<MetaTags::mapped_type
> OrderedMetaTags
;
447 MediaParser(std::auto_ptr
<IOChannel
> stream
);
449 // Classes with virtual methods (virtual classes)
450 // must have a virtual destructor, or the destructors
451 // of subclasses will never be invoked, tipically resulting
454 virtual ~MediaParser();
457 /// Seeks to the closest possible position the given position,
458 /// and returns the new position.
461 /// @param time input/output parameter, input requests a time, output
462 /// return the actual time seeked to.
464 /// @return true if the seek was valid, false otherwise.
466 virtual bool seek(boost::uint32_t& time
)=0;
468 /// Returns mininum length of available buffers in milliseconds
470 /// TODO: FIXME: NOTE: this is currently used by NetStream.bufferLength
471 /// but is bogus as it doesn't take the *current* playhead cursor time
472 /// into account. A proper way would be having a getLastBufferTime ()
473 /// interface here, returning minimun timestamp of last available
474 /// frames and let NetSTream::bufferLength() use that with playhead
475 /// time to find out...
477 DSOEXPORT
boost::uint64_t getBufferLength() const;
479 /// Return true if both audio and video buffers are empty
481 /// NOTE: locks _qMutex
482 DSOEXPORT
bool isBufferEmpty() const;
484 /// Return the time we want the parser thread to maintain in the buffer
485 DSOEXPORT
boost::uint64_t getBufferTime() const
487 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
491 /// Set the time we want the parser thread to maintain in the buffer
494 /// Number of milliseconds to keep in the buffers.
496 DSOEXPORT
void setBufferTime(boost::uint64_t t
)
498 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
502 /// Get timestamp of the next frame available, if any
504 /// @param ts will be set to timestamp of next available frame
505 /// @return false if no frame is available yet
507 /// NOTE: locks _qMutex
509 DSOEXPORT
bool nextFrameTimestamp(boost::uint64_t& ts
) const;
511 /// Get timestamp of the video frame which would be returned on nextVideoFrame
513 /// @return false if there no video frame left
514 /// (either none or no more)
516 /// NOTE: locks _qMutex
518 DSOEXPORT
bool nextVideoFrameTimestamp(boost::uint64_t& ts
) const;
520 /// Returns the next video frame in the parsed buffer, advancing video cursor.
522 /// If no frame has been played before the first frame is returned.
523 /// If there is no more frames in the parsed buffer NULL is returned.
524 /// you can check with parsingCompleted() to know wheter this is due to
527 DSOEXPORT
std::auto_ptr
<EncodedVideoFrame
> nextVideoFrame();
529 /// Get timestamp of the audio frame which would be returned on nextAudioFrame
531 /// @return false if there no video frame left
532 /// (either none or no more)
534 /// NOTE: locks _qMutex
536 DSOEXPORT
bool nextAudioFrameTimestamp(boost::uint64_t& ts
) const;
538 /// Returns the next audio frame in the parsed buffer, advancing audio cursor.
540 /// If no frame has been played before the first frame is returned.
541 /// If there is no more frames in the parsed buffer NULL is returned.
542 /// you can check with parsingCompleted() to know wheter this is due to
545 DSOEXPORT
std::auto_ptr
<EncodedAudioFrame
> nextAudioFrame();
547 /// Returns a VideoInfo class about the videostream
549 /// @return a VideoInfo class about the videostream,
550 /// or zero if unknown (no video or not enough data parsed yet).
552 VideoInfo
* getVideoInfo() { return _videoInfo
.get(); }
554 /// Returns a AudioInfo class about the audiostream
556 /// @return a AudioInfo class about the audiostream,
557 /// or zero if unknown (no audio or not enough data parsed yet).
559 AudioInfo
* getAudioInfo() { return _audioInfo
.get(); }
561 /// Return true of parsing is completed
563 /// If this function returns true, any call to nextVideoFrame()
564 /// or nextAudioFrame() will always return NULL
566 /// TODO: make thread-safe
568 bool parsingCompleted() const { return _parsingComplete
; }
570 /// Return true of indexing is completed
572 /// If this function returns false, parseNextChunk will
573 /// be called even when buffers are full. Parsers
574 /// supporting indexing separated from parsing should
575 /// override this method and have parseNextChunk figure
576 /// if they only need to index or to parse based on bufferFull.
578 virtual bool indexingCompleted() const { return true; }
580 /// Return number of bytes parsed so far
581 virtual boost::uint64_t getBytesLoaded() const { return 0; }
583 /// Return total number of bytes in input
584 boost::uint64_t getBytesTotal() const
586 return _stream
->size();
589 /// Parse next chunk of input
591 /// The implementations are required to parse a small chunk
592 /// of input, so to avoid blocking too much if parsing conditions
593 /// change (ie: seek or destruction requested)
595 /// When LOAD_MEDIA_IN_A_SEPARATE_THREAD is defined, this should
596 /// never be called by users (consider protected).
598 virtual bool parseNextChunk()=0;
600 /// Retrieve any parsed metadata tags up to a specified timestamp.
602 /// @param ts The latest timestamp to retrieve metadata for.
603 /// @param tags This is filled with shared pointers to metatags in
604 /// timestamp order. Ownership of the data is shared. It
605 /// is destroyed automatically along with the last owner.
607 /// Metadata is currently only parsed from FLV streams. The default
609 virtual void fetchMetaTags(OrderedMetaTags
& tags
, boost::uint64_t ts
);
611 /// Get ID3 data from the parsed stream if it exists.
613 /// It's best to do this only when parsingComplete is true.
614 virtual boost::optional
<Id3Info
> getId3Info() const;
618 /// Subclasses *must* set the following variables: @{
620 /// Info about the video stream (if any)
621 std::auto_ptr
<VideoInfo
> _videoInfo
;
623 /// Info about the audio stream (if any)
624 std::auto_ptr
<AudioInfo
> _audioInfo
;
626 /// Whether the parsing is complete or not
627 bool _parsingComplete
;
629 /// Number of bytes loaded
630 boost::uint64_t _bytesLoaded
;
634 /// Start the parser thread
635 void startParserThread();
637 /// Stop the parser thread
639 /// This method should be always called
640 /// by destructors of subclasses to ensure
641 /// the parser thread won't attempt to access
642 /// destroyed structures.
644 void stopParserThread();
646 /// Clear the a/v buffers
649 /// Push an encoded audio frame to buffer.
651 /// Will wait on a condition if buffer is full or parsing was completed
653 void pushEncodedAudioFrame(std::auto_ptr
<EncodedAudioFrame
> frame
);
655 /// Push an encoded video frame to buffer.
657 /// Will wait on a condition if buffer is full or parsing was completed
659 void pushEncodedVideoFrame(std::auto_ptr
<EncodedVideoFrame
> frame
);
661 /// The stream used to access the file
662 std::auto_ptr
<IOChannel
> _stream
;
663 mutable boost::mutex _streamMutex
;
665 static void parserLoopStarter(MediaParser
* mp
)
670 /// The parser loop runs in a separate thread
671 /// and calls parseNextChunk until killed.
673 /// parseNextChunk is expected to push encoded frames
674 /// on the queue, which may trigger the thread to be
675 /// put to sleep when queues are full or parsing
680 bool parserThreadKillRequested() const
682 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
683 return _parserThreadKillRequested
;
686 boost::uint64_t _bufferTime
;
687 mutable boost::mutex _bufferTimeMutex
;
689 std::auto_ptr
<boost::thread
> _parserThread
;
690 boost::barrier _parserThreadStartBarrier
;
691 mutable boost::mutex _parserThreadKillRequestMutex
;
692 bool _parserThreadKillRequested
;
693 boost::condition _parserThreadWakeup
;
695 /// Wait on the _parserThreadWakeup condition if buffer is full
696 /// or parsing was completed.
698 /// Callers *must* pass a locked lock on _qMutex
700 void waitIfNeeded(boost::mutex::scoped_lock
& qMutexLock
);
702 void wakeupParserThread();
704 /// mutex protecting access to the a/v encoded frames queues
705 mutable boost::mutex _qMutex
;
707 /// Mutex protecting _bytesLoaded (read by main, set by parser)
708 mutable boost::mutex _bytesLoadedMutex
;
710 /// Method to check if buffer is full w/out locking the _qMutex
713 /// This is intended for being called by waitIfNeeded, which
714 /// is passed a locked lock on _qMutex, and by parseNextChunk
715 /// to determine whether to index-only or also push on queue.
717 bool bufferFull() const;
719 /// On seek, this flag will be set, while holding a lock on _streamMutex.
720 /// The parser, when obtained a lock on _streamMutex, will check this
721 /// flag, if found to be true will clear the buffers and reset to false.
726 typedef std::deque
<EncodedVideoFrame
*> VideoFrames
;
727 typedef std::deque
<EncodedAudioFrame
*> AudioFrames
;
729 /// Return pointer to next encoded video frame in buffer
731 /// If no video is present, or queue is empty, 0 is returned
733 /// NOTE: Caller is expected to hold a lock on _qMutex
735 const EncodedVideoFrame
* peekNextVideoFrame() const;
737 /// Return pointer to next encoded audio frame in buffer
739 /// If no video is present, or queue is empty, 0 is returned
741 /// NOTE: Caller is expected to hold a lock on _qMutex
743 const EncodedAudioFrame
* peekNextAudioFrame() const;
746 /// Queue of video frames (the video buffer)
748 /// Elements owned by this class.
750 VideoFrames _videoFrames
;
752 /// Queue of audio frames (the audio buffer)
754 /// Elements owned by this class.
756 AudioFrames _audioFrames
;
758 void requestParserThreadKill()
760 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
761 _parserThreadKillRequested
=true;
762 _parserThreadWakeup
.notify_all();
765 /// Return diff between timestamp of last and first audio frame
766 boost::uint64_t audioBufferLength() const;
768 /// Return diff between timestamp of last and first video frame
769 boost::uint64_t videoBufferLength() const;
771 /// A getBufferLength method not locking the _qMutex (expected to be locked by caller already).
772 boost::uint64_t getBufferLengthNoLock() const;
777 } // gnash.media namespace
780 #endif // __MEDIAPARSER_H__