1 // MediaParser.h: Base class for media parsers
3 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef GNASH_MEDIAPARSER_H
20 #define GNASH_MEDIAPARSER_H
22 #include "IOChannel.h" // for inlines
23 #include "dsodefs.h" // DSOEXPORT
25 #include <boost/scoped_array.hpp>
26 #include <boost/shared_ptr.hpp>
27 #include <boost/thread/thread.hpp>
28 #include <boost/thread/condition.hpp>
29 #include <boost/thread/barrier.hpp>
34 #include <iosfwd> // for output operator forward declarations
36 // Undefine this to load/parse media files in main thread
37 #define LOAD_MEDIA_IN_A_SEPARATE_THREAD 1
56 /// Disposable interlaced frames
60 /// The type of the codec id passed in the AudioInfo or VideoInfo class
63 /// The internal flash codec ids
70 /// Video codec ids as defined in flash
73 /// H263/SVQ3 video codec
77 VIDEO_CODEC_SCREENVIDEO
= 3,
79 /// On2 VP6 video codec
82 /// On2 VP6 Alpha video codec
85 /// Screenvideo2 codec
86 VIDEO_CODEC_SCREENVIDEO2
= 6,
88 /// MPEG-4 Part 10, or Advanced Video Coding
91 // NOTE: if you add more elements here remember to
92 // also add them to the output operator!
95 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const videoCodecType
& t
);
97 /// Audio codec ids as defined in flash
99 /// For some encodings, audio data is organized
100 /// in logical frames. The structure of such frames
101 /// (header/payload) is codec dependent.
102 /// The actual size of each frame may not be known
103 /// w/out parsing the encoded stream, as it
104 /// might be specified in the header of each frame.
106 /// Other encodings are loosier on frames. For these
107 /// you can define a frame any way you want, as long
108 /// as a frame doesn't contain partial samples.
110 /// For FFMPEG, you can NOT construct a parser for the
111 /// loosy-framed codecs.
113 /// Parser-needing codecs will be documented as such.
117 /// Linear PCM, unspecified byte order
119 /// Use of this codec is deprecated (but still supported) due to
120 /// the unspecified byte order (you can only play >8bit samples
121 /// in a sane way when the endiannes of encoding and decoding
124 /// 90% of the times the actual encoder did run on windows, so
125 /// it is a good bet to guess for little-endian.
126 /// SampleSize may be 8 or 16 bits.
132 /// SWF support 2, 3, 4, and 5 bits / sample.
133 /// ADPCM "frames" consits of 4096 ADPCM codes per channel.
135 /// For streaming there is no concept of "seekSamples" like
136 /// MP3 streaming implements. Thus ADPCM ist suboptimal for
137 /// streaming as it is difficult to match sound frames with
139 /// Uncompressed SampleSize is always 16 bit.
141 AUDIO_CODEC_ADPCM
= 1,
145 /// MP3 is supported for SWF4 and later.
146 /// MP3 sound is structured in frames consiting of a fixed sized
147 /// header (32Bit) and compressed sound data. Compressed sound
148 /// data always contains a fixed number of sound samples (576 or 1152).
149 /// For streaming sound an additional field is necessary (seekSamples)
150 /// to keep track of samples exceeding movie frame border.
152 /// MP3 header contains all necessary information to decode a single
153 /// frame. From this information one can derive the number of samples
154 /// and the frame's size.
155 /// Uncompressed SampleSize is always 16 bit.
159 /// Linear PCM, strictly little-endian
160 AUDIO_CODEC_UNCOMPRESSED
= 3,
162 /// Proprietary simple format. Always 5Khz mono ?
165 /// Data is organized in frames of 256 samples.
167 AUDIO_CODEC_NELLYMOSER_8HZ_MONO
= 5,
169 /// Proprietary simple format
172 /// Data is organized in frames of 256 samples.
174 AUDIO_CODEC_NELLYMOSER
= 6,
176 /// Advanced Audio Coding
177 AUDIO_CODEC_AAC
= 10,
179 /// Always 16kHz mono
180 AUDIO_CODEC_SPEEX
= 11
182 // NOTE: if you add more elements here remember to
183 // also add them to the output operator!
186 DSOEXPORT
std::ostream
& operator<< (std::ostream
& os
, const audioCodecType
& t
);
188 /// Information about an audio stream
190 /// The information stored is codec-id,
191 /// samplerate, samplesize, stereo, duration and codec-type.
193 /// Additionally, an abstract ExtraInfo can be hold.
200 /// Construct an AudioInfo object
204 /// To be interpreted as a media::audioCodecType if the typei
205 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
206 /// for codec information transfer between a MediaParser and a
207 /// AudioDecoder from the same %media handler module.
209 /// @param sampleRatei
210 /// Nominal sample rate.
211 /// @todo document units.
213 /// @param sampleSizei
214 /// Sample size, in bytes.
217 /// Sample type (stereo if true, mono otherwise).
218 /// @todo document if and how intepretation of sampleSizei changes
221 /// Nominal audio stream duration, in milliseconds.
224 /// Changes interpretation of the codeci parameter.
226 AudioInfo(int codeci
, boost::uint16_t sampleRatei
,
227 boost::uint16_t sampleSizei
, bool stereoi
,
228 boost::uint64_t durationi
, codecType typei
)
231 sampleRate(sampleRatei
),
232 sampleSize(sampleSizei
),
241 /// This has to be interpreted as audioCodecType if codecType type is CODEC_TYPE_FLASH
242 /// or interpretation is opaque and we rely on the assumption that the AudioInfo
243 /// creator and the AudioInfo user have a way to get a shared interpretation
247 boost::uint16_t sampleRate
;
249 /// Size of each sample, in bytes
250 boost::uint16_t sampleSize
;
254 boost::uint64_t duration
;
258 /// Extra info about an audio stream
260 /// Abstract class to hold any additional info
261 /// when required for proper decoder initialization.
265 virtual ~ExtraInfo() {}
268 /// Extra info about audio stream, if when needed
270 /// Could be ExtraVideoInfoFlv or a media-handler specific info
272 std::auto_ptr
<ExtraInfo
> extra
;
275 /// Information about a video stream
277 /// The information stored is codec-id, width, height, framerate and duration.
279 /// Additionally, an abstract ExtraInfo can be hold.
285 /// Construct a VideoInfo object
289 /// To be interpreted as a media::videoCodecType if the typei
290 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
291 /// for codec information transfer between a MediaParser and a
292 /// VideoDecoder from the same %media handler module.
295 /// Video frame width.
296 /// @todo check if still needed.
299 /// Video frame height.
300 /// @todo check if still needed.
302 /// @param frameRatei
303 /// Nominal video frame rate.
304 /// @todo document units.
307 /// Nominal video duration.
308 /// @todo check if still needed, if so document units!
311 /// Changes interpretation of the codeci parameter.
313 VideoInfo(int codeci
, boost::uint16_t widthi
, boost::uint16_t heighti
,
314 boost::uint16_t frameRatei
, boost::uint64_t durationi
,
320 frameRate(frameRatei
),
327 boost::uint16_t width
;
328 boost::uint16_t height
;
329 boost::uint16_t frameRate
;
330 boost::uint64_t duration
;
333 /// Extra info about a video stream
335 /// Abstract class to hold any additional info
336 /// when required for proper decoder initialization
340 virtual ~ExtraInfo() {}
343 /// Extra info about video stream, if when needed
345 /// Could be ExtraAudioInfoFlv or a media-handler specific info
347 std::auto_ptr
<ExtraInfo
> extra
;
350 DSOEXPORT
std::ostream
& operator << (std::ostream
& os
, const VideoInfo
& vi
);
353 class EncodedExtraData
{
356 virtual ~EncodedExtraData() {}
360 /// An encoded video frame
361 class EncodedVideoFrame
365 /// Create an encoded video frame
368 /// Data buffer, ownership transferred
371 /// Size of the data buffer
377 /// Presentation timestamp, in milliseconds.
379 EncodedVideoFrame(boost::uint8_t* data
, boost::uint32_t size
,
380 unsigned int frameNum
,
381 boost::uint64_t timestamp
=0)
386 _timestamp(timestamp
)
389 /// Return pointer to actual data. Ownership retained by this class.
390 const boost::uint8_t* data() const { return _data
.get(); }
392 /// Return size of data buffer.
393 boost::uint32_t dataSize() const { return _size
; }
395 /// Return video frame presentation timestamp
396 boost::uint64_t timestamp() const { return _timestamp
; }
398 /// Return video frame number
399 unsigned frameNum() const { return _frameNum
; }
401 // FIXME: should have better encapsulation for this sort of stuff.
402 std::auto_ptr
<EncodedExtraData
> extradata
;
405 boost::uint32_t _size
;
406 boost::scoped_array
<boost::uint8_t> _data
;
407 unsigned int _frameNum
;
408 boost::uint64_t _timestamp
;
411 /// An encoded audio frame
412 class EncodedAudioFrame
415 boost::uint32_t dataSize
;
416 boost::scoped_array
<boost::uint8_t> data
;
417 boost::uint64_t timestamp
;
419 // FIXME: should have better encapsulation for this sort of stuff.
420 std::auto_ptr
<EncodedExtraData
> extradata
;
423 /// The MediaParser class provides cursor-based access to encoded %media frames
425 /// Cursor-based access allow seeking as close as possible to a specified time
426 /// and fetching frames from there on, sequentially.
427 /// See seek(), nextVideoFrame(), nextAudioFrame()
429 /// Input is received from a IOChannel object.
435 /// A container for executable MetaTags contained in media streams.
437 /// Presently only known in FLV.
438 typedef std::multimap
<boost::uint64_t, boost::shared_ptr
<SimpleBuffer
> >
441 typedef std::vector
<MetaTags::mapped_type
> OrderedMetaTags
;
442 MediaParser(std::auto_ptr
<IOChannel
> stream
);
444 // Classes with virtual methods (virtual classes)
445 // must have a virtual destructor, or the destructors
446 // of subclasses will never be invoked, tipically resulting
449 virtual ~MediaParser();
452 /// Seeks to the closest possible position the given position,
453 /// and returns the new position.
456 /// @param time input/output parameter, input requests a time, output
457 /// return the actual time seeked to.
459 /// @return true if the seek was valid, false otherwise.
461 virtual bool seek(boost::uint32_t& time
)=0;
463 /// Returns mininum length of available buffers in milliseconds
465 /// TODO: FIXME: NOTE: this is currently used by NetStream.bufferLength
466 /// but is bogus as it doesn't take the *current* playhead cursor time
467 /// into account. A proper way would be having a getLastBufferTime ()
468 /// interface here, returning minimun timestamp of last available
469 /// frames and let NetSTream::bufferLength() use that with playhead
470 /// time to find out...
472 DSOEXPORT
boost::uint64_t getBufferLength() const;
474 /// Return the time we want the parser thread to maintain in the buffer
475 DSOEXPORT
boost::uint64_t getBufferTime() const
477 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
481 /// Set the time we want the parser thread to maintain in the buffer
484 /// Number of milliseconds to keep in the buffers.
486 DSOEXPORT
void setBufferTime(boost::uint64_t t
)
488 boost::mutex::scoped_lock
lock(_bufferTimeMutex
);
492 /// Get timestamp of the next frame available, if any
494 /// @param ts will be set to timestamp of next available frame
495 /// @return false if no frame is available yet
497 DSOEXPORT
bool nextFrameTimestamp(boost::uint64_t& ts
) const;
499 /// Get timestamp of the video frame which would be returned on nextVideoFrame
501 /// @return false if there no video frame left
502 /// (either none or no more)
504 DSOEXPORT
bool nextVideoFrameTimestamp(boost::uint64_t& ts
) const;
506 /// Returns the next video frame in the parsed buffer, advancing video cursor.
508 /// If no frame has been played before the first frame is returned.
509 /// If there is no more frames in the parsed buffer NULL is returned.
510 /// you can check with parsingCompleted() to know wheter this is due to
513 DSOEXPORT
std::auto_ptr
<EncodedVideoFrame
> nextVideoFrame();
515 /// Get timestamp of the audio frame which would be returned on nextAudioFrame
517 /// @return false if there no video frame left
518 /// (either none or no more)
520 DSOEXPORT
bool nextAudioFrameTimestamp(boost::uint64_t& ts
) const;
522 /// Returns the next audio frame in the parsed buffer, advancing audio cursor.
524 /// If no frame has been played before the first frame is returned.
525 /// If there is no more frames in the parsed buffer NULL is returned.
526 /// you can check with parsingCompleted() to know wheter this is due to
529 DSOEXPORT
std::auto_ptr
<EncodedAudioFrame
> nextAudioFrame();
531 /// Returns a VideoInfo class about the videostream
533 /// @return a VideoInfo class about the videostream,
534 /// or zero if unknown (no video or not enough data parsed yet).
536 VideoInfo
* getVideoInfo() { return _videoInfo
.get(); }
538 /// Returns a AudioInfo class about the audiostream
540 /// @return a AudioInfo class about the audiostream,
541 /// or zero if unknown (no audio or not enough data parsed yet).
543 AudioInfo
* getAudioInfo() { return _audioInfo
.get(); }
545 /// Return true of parsing is completed
547 /// If this function returns true, any call to nextVideoFrame()
548 /// or nextAudioFrame() will always return NULL
550 /// TODO: make thread-safe
552 bool parsingCompleted() const { return _parsingComplete
; }
554 /// Return true of indexing is completed
556 /// If this function returns false, parseNextChunk will
557 /// be called even when buffers are full. Parsers
558 /// supporting indexing separated from parsing should
559 /// override this method and have parseNextChunk figure
560 /// if they only need to index or to parse based on bufferFull.
562 virtual bool indexingCompleted() const { return true; }
564 /// Return number of bytes parsed so far
565 virtual boost::uint64_t getBytesLoaded() const { return 0; }
567 /// Return total number of bytes in input
568 boost::uint64_t getBytesTotal() const
570 return _stream
->size();
573 /// Parse next chunk of input
575 /// The implementations are required to parse a small chunk
576 /// of input, so to avoid blocking too much if parsing conditions
577 /// change (ie: seek or destruction requested)
579 /// When LOAD_MEDIA_IN_A_SEPARATE_THREAD is defined, this should
580 /// never be called by users (consider protected).
582 virtual bool parseNextChunk()=0;
584 /// Retrieve any parsed metadata tags up to a specified timestamp.
586 /// @param ts The latest timestamp to retrieve metadata for.
587 /// @param tags This is filled with shared pointers to metatags in
588 /// timestamp order. Ownership of the data is shared. It
589 /// is destroyed automatically along with the last owner.
591 /// Metadata is currently only parsed from FLV streams. The default
593 virtual void fetchMetaTags(OrderedMetaTags
& tags
, boost::uint64_t ts
);
597 /// Subclasses *must* set the following variables: @{
599 /// Info about the video stream (if any)
600 std::auto_ptr
<VideoInfo
> _videoInfo
;
602 /// Info about the audio stream (if any)
603 std::auto_ptr
<AudioInfo
> _audioInfo
;
605 /// Whether the parsing is complete or not
606 bool _parsingComplete
;
608 /// Number of bytes loaded
609 boost::uint64_t _bytesLoaded
;
613 /// Start the parser thread
614 void startParserThread();
616 /// Stop the parser thread
618 /// This method should be always called
619 /// by destructors of subclasses to ensure
620 /// the parser thread won't attempt to access
621 /// destroyed structures.
623 void stopParserThread();
625 /// Clear the a/v buffers
628 /// Push an encoded audio frame to buffer.
630 /// Will wait on a condition if buffer is full or parsing was completed
632 void pushEncodedAudioFrame(std::auto_ptr
<EncodedAudioFrame
> frame
);
634 /// Push an encoded video frame to buffer.
636 /// Will wait on a condition if buffer is full or parsing was completed
638 void pushEncodedVideoFrame(std::auto_ptr
<EncodedVideoFrame
> frame
);
640 /// Return pointer to next encoded video frame in buffer
642 /// If no video is present, or queue is empty, 0 is returned
644 const EncodedVideoFrame
* peekNextVideoFrame() const;
646 /// Return pointer to next encoded audio frame in buffer
648 /// If no video is present, or queue is empty, 0 is returned
650 const EncodedAudioFrame
* peekNextAudioFrame() const;
652 /// The stream used to access the file
653 std::auto_ptr
<IOChannel
> _stream
;
654 mutable boost::mutex _streamMutex
;
656 static void parserLoopStarter(MediaParser
* mp
)
661 /// The parser loop runs in a separate thread
662 /// and calls parseNextChunk until killed.
664 /// parseNextChunk is expected to push encoded frames
665 /// on the queue, which may trigger the thread to be
666 /// put to sleep when queues are full or parsing
671 bool parserThreadKillRequested() const
673 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
674 return _parserThreadKillRequested
;
677 boost::uint64_t _bufferTime
;
678 mutable boost::mutex _bufferTimeMutex
;
680 std::auto_ptr
<boost::thread
> _parserThread
;
681 boost::barrier _parserThreadStartBarrier
;
682 mutable boost::mutex _parserThreadKillRequestMutex
;
683 bool _parserThreadKillRequested
;
684 boost::condition _parserThreadWakeup
;
686 /// Wait on the _parserThreadWakeup condition if buffer is full
687 /// or parsing was completed.
689 /// Callers *must* pass a locked lock on _qMutex
691 void waitIfNeeded(boost::mutex::scoped_lock
& qMutexLock
);
693 void wakeupParserThread();
695 /// mutex protecting access to the a/v encoded frames queues
696 mutable boost::mutex _qMutex
;
699 /// Mutex protecting _bytesLoaded (read by main, set by parser)
700 mutable boost::mutex _bytesLoadedMutex
;
702 /// Method to check if buffer is full w/out locking the _qMutex
705 /// This is intended for being called by waitIfNeeded, which
706 /// is passed a locked lock on _qMutex, and by parseNextChunk
707 /// to determine whether to index-only or also push on queue.
709 bool bufferFull() const;
711 /// On seek, this flag will be set, while holding a lock on _streamMutex.
712 /// The parser, when obtained a lock on _streamMutex, will check this
713 /// flag, if found to be true will clear the buffers and reset to false.
717 typedef std::deque
<EncodedVideoFrame
*> VideoFrames
;
718 typedef std::deque
<EncodedAudioFrame
*> AudioFrames
;
720 /// Queue of video frames (the video buffer)
722 /// Elements owned by this class.
724 VideoFrames _videoFrames
;
726 /// Queue of audio frames (the audio buffer)
728 /// Elements owned by this class.
730 AudioFrames _audioFrames
;
732 void requestParserThreadKill()
734 boost::mutex::scoped_lock
lock(_parserThreadKillRequestMutex
);
735 _parserThreadKillRequested
=true;
736 _parserThreadWakeup
.notify_all();
739 /// Return diff between timestamp of last and first audio frame
740 boost::uint64_t audioBufferLength() const;
742 /// Return diff between timestamp of last and first video frame
743 boost::uint64_t videoBufferLength() const;
745 /// A getBufferLength method not locking the _qMutex (expected to be locked by caller already).
746 boost::uint64_t getBufferLengthNoLock() const;
751 } // gnash.media namespace
754 #endif // __MEDIAPARSER_H__