1 // MediaParser.h: Base class for media parsers
3 // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef GNASH_MEDIAPARSER_H
20 #define GNASH_MEDIAPARSER_H
22 #include <boost/scoped_array.hpp>
23 #include <boost/shared_ptr.hpp>
24 #include <boost/thread/thread.hpp>
25 #include <boost/thread/condition.hpp>
26 #include <boost/thread/barrier.hpp>
31 #include <iosfwd> // for output operator forward declarations
32 #include <boost/optional.hpp>
34 #include "IOChannel.h" // for inlines
35 #include "dsodefs.h" // DSOEXPORT
37 // Undefine this to load/parse media files in main thread
38 #define LOAD_MEDIA_IN_A_SEPARATE_THREAD 1
60 /// Disposable interlaced frames
64 /// The type of the codec id passed in the AudioInfo or VideoInfo class
67 /// The internal flash codec ids
74 /// Video codec ids as defined in flash
77 /// H263/SVQ3 video codec
81 VIDEO_CODEC_SCREENVIDEO
= 3,
83 /// On2 VP6 video codec
86 /// On2 VP6 Alpha video codec
89 /// Screenvideo2 codec
90 VIDEO_CODEC_SCREENVIDEO2
= 6,
92 /// MPEG-4 Part 10, or Advanced Video Coding
95 // NOTE: if you add more elements here remember to
96 // also add them to the output operator!
99 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const videoCodecType
& t
);
101 /// Audio codec ids as defined in flash
103 /// For some encodings, audio data is organized
104 /// in logical frames. The structure of such frames
105 /// (header/payload) is codec dependent.
106 /// The actual size of each frame may not be known
107 /// w/out parsing the encoded stream, as it
108 /// might be specified in the header of each frame.
110 /// Other encodings are loosier on frames. For these
111 /// you can define a frame any way you want, as long
112 /// as a frame doesn't contain partial samples.
114 /// For FFMPEG, you can NOT construct a parser for the
115 /// loosy-framed codecs.
117 /// Parser-needing codecs will be documented as such.
121 /// Signed Linear PCM, unspecified byte order
123 /// Use of this codec is deprecated (but still supported) due to
124 /// the unspecified byte order (you can only play >8bit samples
125 /// in a sane way when the endiannes of encoding and decoding
128 /// 90% of the times the actual encoder did run on windows, so
129 /// it is a good bet to guess for little-endian.
130 /// SampleSize may be 8 or 16 bits.
136 /// SWF support 2, 3, 4, and 5 bits / sample.
137 /// ADPCM "frames" consits of 4096 ADPCM codes per channel.
139 /// For streaming there is no concept of "seekSamples" like
140 /// MP3 streaming implements. Thus ADPCM ist suboptimal for
141 /// streaming as it is difficult to match sound frames with
143 /// Uncompressed SampleSize is always 16 bit.
145 AUDIO_CODEC_ADPCM
= 1,
149 /// MP3 is supported for SWF4 and later.
150 /// MP3 sound is structured in frames consiting of a fixed sized
151 /// header (32Bit) and compressed sound data. Compressed sound
152 /// data always contains a fixed number of sound samples (576 or 1152).
153 /// For streaming sound an additional field is necessary (seekSamples)
154 /// to keep track of samples exceeding movie frame border.
156 /// MP3 header contains all necessary information to decode a single
157 /// frame. From this information one can derive the number of samples
158 /// and the frame's size.
159 /// Uncompressed SampleSize is always 16 bit.
163 /// Linear PCM, strictly little-endian
164 AUDIO_CODEC_UNCOMPRESSED
= 3,
166 /// Proprietary simple format. Always 5Khz mono ?
169 /// Data is organized in frames of 256 samples.
171 AUDIO_CODEC_NELLYMOSER_8HZ_MONO
= 5,
173 /// Proprietary simple format
176 /// Data is organized in frames of 256 samples.
178 AUDIO_CODEC_NELLYMOSER
= 6,
180 /// Advanced Audio Coding
181 AUDIO_CODEC_AAC
= 10,
183 /// Always 16kHz mono
184 AUDIO_CODEC_SPEEX
= 11
186 // NOTE: if you add more elements here remember to
187 // also add them to the output operator!
190 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const audioCodecType
& t
);
192 /// Information about an audio stream
194 /// The information stored is codec-id,
195 /// samplerate, samplesize, stereo, duration and codec-type.
197 /// Additionally, an abstract ExtraInfo can be hold.
204 /// Construct an AudioInfo object
208 /// To be interpreted as a media::audioCodecType if the typei
209 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
210 /// for codec information transfer between a MediaParser and a
211 /// AudioDecoder from the same %media handler module.
213 /// @param sampleRatei
214 /// Nominal sample rate.
215 /// @todo document units.
217 /// @param sampleSizei
218 /// Sample size, in bytes.
221 /// Sample type (stereo if true, mono otherwise).
222 /// @todo document if and how intepretation of sampleSizei changes
225 /// Nominal audio stream duration, in milliseconds.
228 /// Changes interpretation of the codeci parameter.
230 AudioInfo(int codeci
, boost::uint16_t sampleRatei
,
231 boost::uint16_t sampleSizei
, bool stereoi
,
232 boost::uint64_t durationi
, codecType typei
)
235 sampleRate(sampleRatei
),
236 sampleSize(sampleSizei
),
245 /// This has to be interpreted as audioCodecType if codecType type is CODEC_TYPE_FLASH
246 /// or interpretation is opaque and we rely on the assumption that the AudioInfo
247 /// creator and the AudioInfo user have a way to get a shared interpretation
251 boost::uint16_t sampleRate
;
253 /// Size of each sample, in bytes
254 boost::uint16_t sampleSize
;
258 boost::uint64_t duration
;
262 /// Extra info about an audio stream
264 /// Abstract class to hold any additional info
265 /// when required for proper decoder initialization.
269 virtual ~ExtraInfo() {}
272 /// Extra info about audio stream, if when needed
274 /// Could be ExtraVideoInfoFlv or a media-handler specific info
276 std::auto_ptr
<ExtraInfo
> extra
;
279 /// Information about a video stream
281 /// The information stored is codec-id, width, height, framerate and duration.
283 /// Additionally, an abstract ExtraInfo can be hold.
289 /// Construct a VideoInfo object
293 /// To be interpreted as a media::videoCodecType if the typei
294 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
295 /// for codec information transfer between a MediaParser and a
296 /// VideoDecoder from the same %media handler module.
299 /// Video frame width.
300 /// @todo check if still needed.
303 /// Video frame height.
304 /// @todo check if still needed.
306 /// @param frameRatei
307 /// Nominal video frame rate.
308 /// @todo document units.
311 /// Nominal video duration.
312 /// @todo check if still needed, if so document units!
315 /// Changes interpretation of the codeci parameter.
317 VideoInfo(int codeci
, boost::uint16_t widthi
, boost::uint16_t heighti
,
318 boost::uint16_t frameRatei
, boost::uint64_t durationi
,
324 frameRate(frameRatei
),
331 boost::uint16_t width
;
332 boost::uint16_t height
;
333 boost::uint16_t frameRate
;
334 boost::uint64_t duration
;
337 /// Extra info about a video stream
339 /// Abstract class to hold any additional info
340 /// when required for proper decoder initialization
344 virtual ~ExtraInfo() {}
347 /// Extra info about video stream, if when needed
349 /// Could be ExtraAudioInfoFlv or a media-handler specific info
351 std::auto_ptr
<ExtraInfo
> extra
;
354 DSOEXPORT
std::ostream
& operator << (std::ostream
& os
, const VideoInfo
& vi
);
357 class EncodedExtraData
{
360 virtual ~EncodedExtraData() {}
364 /// An encoded video frame
365 class EncodedVideoFrame
369 /// Create an encoded video frame
372 /// Data buffer, ownership transferred
375 /// Size of the data buffer
381 /// Presentation timestamp, in milliseconds.
383 EncodedVideoFrame(boost::uint8_t* data
, boost::uint32_t size
,
384 unsigned int frameNum
,
385 boost::uint64_t timestamp
=0)
390 _timestamp(timestamp
)
393 /// Return pointer to actual data. Ownership retained by this class.
394 const boost::uint8_t* data() const { return _data
.get(); }
396 /// Return size of data buffer.
397 boost::uint32_t dataSize() const { return _size
; }
399 /// Return video frame presentation timestamp
400 boost::uint64_t timestamp() const { return _timestamp
; }
402 /// Return video frame number
403 unsigned frameNum() const { return _frameNum
; }
405 // FIXME: should have better encapsulation for this sort of stuff.
406 std::auto_ptr
<EncodedExtraData
> extradata
;
409 boost::uint32_t _size
;
410 boost::scoped_array
<boost::uint8_t> _data
;
411 unsigned int _frameNum
;
412 boost::uint64_t _timestamp
;
415 /// An encoded audio frame
416 class EncodedAudioFrame
419 boost::uint32_t dataSize
;
420 boost::scoped_array
<boost::uint8_t> data
;
421 boost::uint64_t timestamp
;
423 // FIXME: should have better encapsulation for this sort of stuff.
424 std::auto_ptr
<EncodedExtraData
> extradata
;
427 /// The MediaParser class provides cursor-based access to encoded %media frames
429 /// Cursor-based access allow seeking as close as possible to a specified time
430 /// and fetching frames from there on, sequentially.
431 /// See seek(), nextVideoFrame(), nextAudioFrame()
433 /// Input is received from a IOChannel object.
439 /// A container for executable MetaTags contained in media streams.
441 /// Presently only known in FLV.
442 typedef std::multimap
<boost::uint64_t, boost::shared_ptr
<SimpleBuffer
> >
445 typedef std::vector
<MetaTags::mapped_type
> OrderedMetaTags
;
446 MediaParser(std::auto_ptr
<IOChannel
> stream
);
448 // Classes with virtual methods (virtual classes)
449 // must have a virtual destructor, or the destructors
450 // of subclasses will never be invoked, tipically resulting
453 virtual ~MediaParser();
456 /// Seeks to the closest possible position the given position,
457 /// and returns the new position.
460 /// @param time input/output parameter, input requests a time, output
461 /// return the actual time seeked to.
463 /// @return true if the seek was valid, false otherwise.
465 virtual bool seek(boost::uint32_t& time
)=0;
467 /// Returns mininum length of available buffers in milliseconds
469 /// TODO: FIXME: NOTE: this is currently used by NetStream.bufferLength
470 /// but is bogus as it doesn't take the *current* playhead cursor time
471 /// into account. A proper way would be having a getLastBufferTime ()
472 /// interface here, returning minimun timestamp of last available
473 /// frames and let NetSTream::bufferLength() use that with playhead
474 /// time to find out...
476 DSOEXPORT
boost::uint64_t getBufferLength() const;
478 /// Return true if both audio and video buffers are empty
480 /// NOTE: locks _qMutex
481 DSOEXPORT
bool isBufferEmpty() const;
483 /// Return the time we want the parser thread to maintain in the buffer
484 DSOEXPORT
boost::uint64_t getBufferTime() const
486 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
490 /// Set the time we want the parser thread to maintain in the buffer
493 /// Number of milliseconds to keep in the buffers.
495 DSOEXPORT
void setBufferTime(boost::uint64_t t
)
497 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
501 /// Get timestamp of the next frame available, if any
503 /// @param ts will be set to timestamp of next available frame
504 /// @return false if no frame is available yet
506 /// NOTE: locks _qMutex
508 DSOEXPORT
bool nextFrameTimestamp(boost::uint64_t& ts
) const;
510 /// Get timestamp of the video frame which would be returned on nextVideoFrame
512 /// @return false if there no video frame left
513 /// (either none or no more)
515 /// NOTE: locks _qMutex
517 DSOEXPORT
bool nextVideoFrameTimestamp(boost::uint64_t& ts
) const;
519 /// Returns the next video frame in the parsed buffer, advancing video cursor.
521 /// If no frame has been played before the first frame is returned.
522 /// If there is no more frames in the parsed buffer NULL is returned.
523 /// you can check with parsingCompleted() to know wheter this is due to
526 DSOEXPORT
std::auto_ptr
<EncodedVideoFrame
> nextVideoFrame();
528 /// Get timestamp of the audio frame which would be returned on nextAudioFrame
530 /// @return false if there no video frame left
531 /// (either none or no more)
533 /// NOTE: locks _qMutex
535 DSOEXPORT
bool nextAudioFrameTimestamp(boost::uint64_t& ts
) const;
537 /// Returns the next audio frame in the parsed buffer, advancing audio cursor.
539 /// If no frame has been played before the first frame is returned.
540 /// If there is no more frames in the parsed buffer NULL is returned.
541 /// you can check with parsingCompleted() to know wheter this is due to
544 DSOEXPORT
std::auto_ptr
<EncodedAudioFrame
> nextAudioFrame();
546 /// Returns a VideoInfo class about the videostream
548 /// @return a VideoInfo class about the videostream,
549 /// or zero if unknown (no video or not enough data parsed yet).
551 VideoInfo
* getVideoInfo() { return _videoInfo
.get(); }
553 /// Returns a AudioInfo class about the audiostream
555 /// @return a AudioInfo class about the audiostream,
556 /// or zero if unknown (no audio or not enough data parsed yet).
558 AudioInfo
* getAudioInfo() { return _audioInfo
.get(); }
560 /// Return true of parsing is completed
562 /// If this function returns true, any call to nextVideoFrame()
563 /// or nextAudioFrame() will always return NULL
565 /// TODO: make thread-safe
567 bool parsingCompleted() const { return _parsingComplete
; }
569 /// Return true of indexing is completed
571 /// If this function returns false, parseNextChunk will
572 /// be called even when buffers are full. Parsers
573 /// supporting indexing separated from parsing should
574 /// override this method and have parseNextChunk figure
575 /// if they only need to index or to parse based on bufferFull.
577 virtual bool indexingCompleted() const { return true; }
579 /// Return number of bytes parsed so far
580 virtual boost::uint64_t getBytesLoaded() const { return 0; }
582 /// Return total number of bytes in input
583 boost::uint64_t getBytesTotal() const
585 return _stream
->size();
588 /// Parse next chunk of input
590 /// The implementations are required to parse a small chunk
591 /// of input, so to avoid blocking too much if parsing conditions
592 /// change (ie: seek or destruction requested)
594 /// When LOAD_MEDIA_IN_A_SEPARATE_THREAD is defined, this should
595 /// never be called by users (consider protected).
597 virtual bool parseNextChunk()=0;
599 /// Retrieve any parsed metadata tags up to a specified timestamp.
601 /// @param ts The latest timestamp to retrieve metadata for.
602 /// @param tags This is filled with shared pointers to metatags in
603 /// timestamp order. Ownership of the data is shared. It
604 /// is destroyed automatically along with the last owner.
606 /// Metadata is currently only parsed from FLV streams. The default
608 virtual void fetchMetaTags(OrderedMetaTags
& tags
, boost::uint64_t ts
);
610 /// Get ID3 data from the parsed stream if it exists.
612 /// It's best to do this only when parsingComplete is true.
613 virtual boost::optional
<Id3Info
> getId3Info() const;
617 /// Subclasses *must* set the following variables: @{
619 /// Info about the video stream (if any)
620 std::auto_ptr
<VideoInfo
> _videoInfo
;
622 /// Info about the audio stream (if any)
623 std::auto_ptr
<AudioInfo
> _audioInfo
;
625 /// Whether the parsing is complete or not
626 bool _parsingComplete
;
628 /// Number of bytes loaded
629 boost::uint64_t _bytesLoaded
;
633 /// Start the parser thread
634 void startParserThread();
636 /// Stop the parser thread
638 /// This method should be always called
639 /// by destructors of subclasses to ensure
640 /// the parser thread won't attempt to access
641 /// destroyed structures.
643 void stopParserThread();
645 /// Clear the a/v buffers
648 /// Push an encoded audio frame to buffer.
650 /// Will wait on a condition if buffer is full or parsing was completed
652 void pushEncodedAudioFrame(std::auto_ptr
<EncodedAudioFrame
> frame
);
654 /// Push an encoded video frame to buffer.
656 /// Will wait on a condition if buffer is full or parsing was completed
658 void pushEncodedVideoFrame(std::auto_ptr
<EncodedVideoFrame
> frame
);
660 /// The stream used to access the file
661 std::auto_ptr
<IOChannel
> _stream
;
662 mutable boost::mutex _streamMutex
;
664 static void parserLoopStarter(MediaParser
* mp
)
669 /// The parser loop runs in a separate thread
670 /// and calls parseNextChunk until killed.
672 /// parseNextChunk is expected to push encoded frames
673 /// on the queue, which may trigger the thread to be
674 /// put to sleep when queues are full or parsing
679 bool parserThreadKillRequested() const
681 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
682 return _parserThreadKillRequested
;
685 boost::uint64_t _bufferTime
;
686 mutable boost::mutex _bufferTimeMutex
;
688 std::auto_ptr
<boost::thread
> _parserThread
;
689 boost::barrier _parserThreadStartBarrier
;
690 mutable boost::mutex _parserThreadKillRequestMutex
;
691 bool _parserThreadKillRequested
;
692 boost::condition _parserThreadWakeup
;
694 /// Wait on the _parserThreadWakeup condition if buffer is full
695 /// or parsing was completed.
697 /// Callers *must* pass a locked lock on _qMutex
699 void waitIfNeeded(boost::mutex::scoped_lock
& qMutexLock
);
701 void wakeupParserThread();
703 /// mutex protecting access to the a/v encoded frames queues
704 mutable boost::mutex _qMutex
;
706 /// Mutex protecting _bytesLoaded (read by main, set by parser)
707 mutable boost::mutex _bytesLoadedMutex
;
709 /// Method to check if buffer is full w/out locking the _qMutex
712 /// This is intended for being called by waitIfNeeded, which
713 /// is passed a locked lock on _qMutex, and by parseNextChunk
714 /// to determine whether to index-only or also push on queue.
716 bool bufferFull() const;
718 /// On seek, this flag will be set, while holding a lock on _streamMutex.
719 /// The parser, when obtained a lock on _streamMutex, will check this
720 /// flag, if found to be true will clear the buffers and reset to false.
725 typedef std::deque
<EncodedVideoFrame
*> VideoFrames
;
726 typedef std::deque
<EncodedAudioFrame
*> AudioFrames
;
728 /// Return pointer to next encoded video frame in buffer
730 /// If no video is present, or queue is empty, 0 is returned
732 /// NOTE: Caller is expected to hold a lock on _qMutex
734 const EncodedVideoFrame
* peekNextVideoFrame() const;
736 /// Return pointer to next encoded audio frame in buffer
738 /// If no video is present, or queue is empty, 0 is returned
740 /// NOTE: Caller is expected to hold a lock on _qMutex
742 const EncodedAudioFrame
* peekNextAudioFrame() const;
745 /// Queue of video frames (the video buffer)
747 /// Elements owned by this class.
749 VideoFrames _videoFrames
;
751 /// Queue of audio frames (the audio buffer)
753 /// Elements owned by this class.
755 AudioFrames _audioFrames
;
757 void requestParserThreadKill()
759 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
760 _parserThreadKillRequested
=true;
761 _parserThreadWakeup
.notify_all();
764 /// Return diff between timestamp of last and first audio frame
765 boost::uint64_t audioBufferLength() const;
767 /// Return diff between timestamp of last and first video frame
768 boost::uint64_t videoBufferLength() const;
770 /// A getBufferLength method not locking the _qMutex (expected to be locked by caller already).
771 boost::uint64_t getBufferLengthNoLock() const;
776 } // gnash.media namespace
779 #endif // __MEDIAPARSER_H__