clarify that software double buffering is for AGG only.
[gnash.git] / libmedia / MediaParser.h
blob5a3243110fd5f3ea701315327553944a279ca29d
1 // MediaParser.h: Base class for media parsers
2 //
3 // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef GNASH_MEDIAPARSER_H
20 #define GNASH_MEDIAPARSER_H
22 #include <boost/scoped_array.hpp>
23 #include <boost/shared_ptr.hpp>
24 #include <boost/thread/thread.hpp>
25 #include <boost/thread/condition.hpp>
26 #include <boost/thread/barrier.hpp>
27 #include <memory>
28 #include <deque>
29 #include <map>
30 #include <vector>
31 #include <iosfwd> // for output operator forward declarations
32 #include <boost/optional.hpp>
34 #include "IOChannel.h" // for inlines
35 #include "dsodefs.h" // DSOEXPORT
37 // Undefine this to load/parse media files in main thread
38 #define LOAD_MEDIA_IN_A_SEPARATE_THREAD 1
40 namespace gnash {
41 class SimpleBuffer;
42 namespace media {
43 struct Id3Info;
47 namespace gnash {
48 namespace media {
51 /// Video frame types
52 enum videoFrameType
54 /// Key frames
55 KEY_FRAME = 1,
57 /// Interlaced frames
58 INTER_FRAME = 2,
60 /// Disposable interlaced frames
61 DIS_INTER_FRAME = 3
64 /// The type of the codec id passed in the AudioInfo or VideoInfo class
65 enum codecType
67 /// The internal flash codec ids
68 CODEC_TYPE_FLASH,
70 /// Custom codecs ids
71 CODEC_TYPE_CUSTOM
74 /// Video codec ids as defined in flash
75 enum videoCodecType
77 /// H263/SVQ3 video codec
78 VIDEO_CODEC_H263 = 2,
80 /// Screenvideo codec
81 VIDEO_CODEC_SCREENVIDEO = 3,
83 /// On2 VP6 video codec
84 VIDEO_CODEC_VP6 = 4,
86 /// On2 VP6 Alpha video codec
87 VIDEO_CODEC_VP6A = 5,
89 /// Screenvideo2 codec
90 VIDEO_CODEC_SCREENVIDEO2 = 6,
92 /// MPEG-4 Part 10, or Advanced Video Coding
93 VIDEO_CODEC_H264 = 7
95 // NOTE: if you add more elements here remember to
96 // also add them to the output operator!
99 DSOEXPORT std::ostream& operator<< (std::ostream& os, const videoCodecType& t);
101 /// Audio codec ids as defined in flash
103 /// For some encodings, audio data is organized
104 /// in logical frames. The structure of such frames
105 /// (header/payload) is codec dependent.
106 /// The actual size of each frame may not be known
107 /// w/out parsing the encoded stream, as it
108 /// might be specified in the header of each frame.
110 /// Other encodings are loosier on frames. For these
111 /// you can define a frame any way you want, as long
112 /// as a frame doesn't contain partial samples.
114 /// For FFMPEG, you can NOT construct a parser for the
115 /// loosy-framed codecs.
117 /// Parser-needing codecs will be documented as such.
119 enum audioCodecType
121 /// Signed Linear PCM, unspecified byte order
123 /// Use of this codec is deprecated (but still supported) due to
124 /// the unspecified byte order (you can only play >8bit samples
125 /// in a sane way when the endiannes of encoding and decoding
126 /// hosts match).
128 /// 90% of the times the actual encoder did run on windows, so
129 /// it is a good bet to guess for little-endian.
130 /// SampleSize may be 8 or 16 bits.
132 AUDIO_CODEC_RAW = 0,
134 /// ADPCM format
136 /// SWF support 2, 3, 4, and 5 bits / sample.
137 /// ADPCM "frames" consits of 4096 ADPCM codes per channel.
138 ///
139 /// For streaming there is no concept of "seekSamples" like
140 /// MP3 streaming implements. Thus ADPCM ist suboptimal for
141 /// streaming as it is difficult to match sound frames with
142 /// movie frames.
143 /// Uncompressed SampleSize is always 16 bit.
145 AUDIO_CODEC_ADPCM = 1,
147 /// MP3 format
149 /// MP3 is supported for SWF4 and later.
150 /// MP3 sound is structured in frames consiting of a fixed sized
151 /// header (32Bit) and compressed sound data. Compressed sound
152 /// data always contains a fixed number of sound samples (576 or 1152).
153 /// For streaming sound an additional field is necessary (seekSamples)
154 /// to keep track of samples exceeding movie frame border.
156 /// MP3 header contains all necessary information to decode a single
157 /// frame. From this information one can derive the number of samples
158 /// and the frame's size.
159 /// Uncompressed SampleSize is always 16 bit.
161 AUDIO_CODEC_MP3 = 2,
163 /// Linear PCM, strictly little-endian
164 AUDIO_CODEC_UNCOMPRESSED = 3,
166 /// Proprietary simple format. Always 5Khz mono ?
168 /// SWF6 and later.
169 /// Data is organized in frames of 256 samples.
171 AUDIO_CODEC_NELLYMOSER_8HZ_MONO = 5,
173 /// Proprietary simple format
175 /// SWF6 and later.
176 /// Data is organized in frames of 256 samples.
178 AUDIO_CODEC_NELLYMOSER = 6,
180 /// Advanced Audio Coding
181 AUDIO_CODEC_AAC = 10,
183 /// Always 16kHz mono
184 AUDIO_CODEC_SPEEX = 11
186 // NOTE: if you add more elements here remember to
187 // also add them to the output operator!
190 DSOEXPORT std::ostream& operator<< (std::ostream& os, const audioCodecType& t);
192 /// Information about an audio stream
194 /// The information stored is codec-id,
195 /// samplerate, samplesize, stereo, duration and codec-type.
197 /// Additionally, an abstract ExtraInfo can be hold.
199 class AudioInfo
202 public:
204 /// Construct an AudioInfo object
206 /// @param codeci
207 /// Audio codec id.
208 /// To be interpreted as a media::audioCodecType if the typei
209 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
210 /// for codec information transfer between a MediaParser and a
211 /// AudioDecoder from the same %media handler module.
213 /// @param sampleRatei
214 /// Nominal sample rate.
215 /// @todo document units.
217 /// @param sampleSizei
218 /// Sample size, in bytes.
220 /// @param stereoi
221 /// Sample type (stereo if true, mono otherwise).
222 /// @todo document if and how intepretation of sampleSizei changes
224 /// @param durationi
225 /// Nominal audio stream duration, in milliseconds.
227 /// @param typei
228 /// Changes interpretation of the codeci parameter.
230 AudioInfo(int codeci, boost::uint16_t sampleRatei,
231 boost::uint16_t sampleSizei, bool stereoi,
232 boost::uint64_t durationi, codecType typei)
234 codec(codeci),
235 sampleRate(sampleRatei),
236 sampleSize(sampleSizei),
237 stereo(stereoi),
238 duration(durationi),
239 type(typei)
243 /// Codec identifier
245 /// This has to be interpreted as audioCodecType if codecType type is CODEC_TYPE_FLASH
246 /// or interpretation is opaque and we rely on the assumption that the AudioInfo
247 /// creator and the AudioInfo user have a way to get a shared interpretation
249 int codec;
251 boost::uint16_t sampleRate;
253 /// Size of each sample, in bytes
254 boost::uint16_t sampleSize;
256 bool stereo;
258 boost::uint64_t duration;
260 codecType type;
262 /// Extra info about an audio stream
264 /// Abstract class to hold any additional info
265 /// when required for proper decoder initialization.
267 class ExtraInfo {
268 public:
269 virtual ~ExtraInfo() {}
272 /// Extra info about audio stream, if when needed
274 /// Could be ExtraVideoInfoFlv or a media-handler specific info
276 std::auto_ptr<ExtraInfo> extra;
279 /// Information about a video stream
281 /// The information stored is codec-id, width, height, framerate and duration.
283 /// Additionally, an abstract ExtraInfo can be hold.
285 class VideoInfo
287 public:
289 /// Construct a VideoInfo object
291 /// @param codeci
292 /// Video codec id.
293 /// To be interpreted as a media::videoCodecType if the typei
294 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
295 /// for codec information transfer between a MediaParser and a
296 /// VideoDecoder from the same %media handler module.
298 /// @param widthi
299 /// Video frame width.
300 /// @todo check if still needed.
302 /// @param heighti
303 /// Video frame height.
304 /// @todo check if still needed.
306 /// @param frameRatei
307 /// Nominal video frame rate.
308 /// @todo document units.
310 /// @param durationi
311 /// Nominal video duration.
312 /// @todo check if still needed, if so document units!
314 /// @param typei
315 /// Changes interpretation of the codeci parameter.
316 ///
317 VideoInfo(int codeci, boost::uint16_t widthi, boost::uint16_t heighti,
318 boost::uint16_t frameRatei, boost::uint64_t durationi,
319 codecType typei)
321 codec(codeci),
322 width(widthi),
323 height(heighti),
324 frameRate(frameRatei),
325 duration(durationi),
326 type(typei)
330 int codec;
331 boost::uint16_t width;
332 boost::uint16_t height;
333 boost::uint16_t frameRate;
334 boost::uint64_t duration;
335 codecType type;
337 /// Extra info about a video stream
339 /// Abstract class to hold any additional info
340 /// when required for proper decoder initialization
342 class ExtraInfo {
343 public:
344 virtual ~ExtraInfo() {}
347 /// Extra info about video stream, if when needed
349 /// Could be ExtraAudioInfoFlv or a media-handler specific info
351 std::auto_ptr<ExtraInfo> extra;
354 DSOEXPORT std::ostream& operator << (std::ostream& os, const VideoInfo& vi);
357 class EncodedExtraData {
359 public:
360 virtual ~EncodedExtraData() {}
364 /// An encoded video frame
365 class EncodedVideoFrame
367 public:
369 /// Create an encoded video frame
371 /// @param data
372 /// Data buffer, ownership transferred
374 /// @param size
375 /// Size of the data buffer
377 /// @param frameNum
378 /// Frame number.
380 /// @param timestamp
381 /// Presentation timestamp, in milliseconds.
383 EncodedVideoFrame(boost::uint8_t* data, boost::uint32_t size,
384 unsigned int frameNum,
385 boost::uint64_t timestamp=0)
387 _size(size),
388 _data(data),
389 _frameNum(frameNum),
390 _timestamp(timestamp)
393 /// Return pointer to actual data. Ownership retained by this class.
394 const boost::uint8_t* data() const { return _data.get(); }
396 /// Return size of data buffer.
397 boost::uint32_t dataSize() const { return _size; }
399 /// Return video frame presentation timestamp
400 boost::uint64_t timestamp() const { return _timestamp; }
402 /// Return video frame number
403 unsigned frameNum() const { return _frameNum; }
405 // FIXME: should have better encapsulation for this sort of stuff.
406 std::auto_ptr<EncodedExtraData> extradata;
407 private:
409 boost::uint32_t _size;
410 boost::scoped_array<boost::uint8_t> _data;
411 unsigned int _frameNum;
412 boost::uint64_t _timestamp;
415 /// An encoded audio frame
416 class EncodedAudioFrame
418 public:
419 boost::uint32_t dataSize;
420 boost::scoped_array<boost::uint8_t> data;
421 boost::uint64_t timestamp;
423 // FIXME: should have better encapsulation for this sort of stuff.
424 std::auto_ptr<EncodedExtraData> extradata;
427 /// The MediaParser class provides cursor-based access to encoded %media frames
429 /// Cursor-based access allow seeking as close as possible to a specified time
430 /// and fetching frames from there on, sequentially.
431 /// See seek(), nextVideoFrame(), nextAudioFrame()
433 /// Input is received from a IOChannel object.
435 class MediaParser
437 public:
439 /// A container for executable MetaTags contained in media streams.
441 /// Presently only known in FLV.
442 typedef std::multimap<boost::uint64_t, boost::shared_ptr<SimpleBuffer> >
443 MetaTags;
445 typedef std::vector<MetaTags::mapped_type> OrderedMetaTags;
446 MediaParser(std::auto_ptr<IOChannel> stream);
448 // Classes with virtual methods (virtual classes)
449 // must have a virtual destructor, or the destructors
450 // of subclasses will never be invoked, tipically resulting
451 // in memory leaks..
453 virtual ~MediaParser();
455 /// \brief
456 /// Seeks to the closest possible position the given position,
457 /// and returns the new position.
460 /// @param time input/output parameter, input requests a time, output
461 /// return the actual time seeked to.
462 ///
463 /// @return true if the seek was valid, false otherwise.
465 virtual bool seek(boost::uint32_t& time)=0;
467 /// Returns mininum length of available buffers in milliseconds
469 /// TODO: FIXME: NOTE: this is currently used by NetStream.bufferLength
470 /// but is bogus as it doesn't take the *current* playhead cursor time
471 /// into account. A proper way would be having a getLastBufferTime ()
472 /// interface here, returning minimun timestamp of last available
473 /// frames and let NetSTream::bufferLength() use that with playhead
474 /// time to find out...
476 DSOEXPORT boost::uint64_t getBufferLength() const;
478 /// Return true if both audio and video buffers are empty
480 /// NOTE: locks _qMutex
481 DSOEXPORT bool isBufferEmpty() const;
483 /// Return the time we want the parser thread to maintain in the buffer
484 DSOEXPORT boost::uint64_t getBufferTime() const
486 boost::mutex::scoped_lock lock(_bufferTimeMutex);
487 return _bufferTime;
490 /// Set the time we want the parser thread to maintain in the buffer
492 /// @param t
493 /// Number of milliseconds to keep in the buffers.
495 DSOEXPORT void setBufferTime(boost::uint64_t t)
497 boost::mutex::scoped_lock lock(_bufferTimeMutex);
498 _bufferTime=t;
501 /// Get timestamp of the next frame available, if any
503 /// @param ts will be set to timestamp of next available frame
504 /// @return false if no frame is available yet
506 /// NOTE: locks _qMutex
508 DSOEXPORT bool nextFrameTimestamp(boost::uint64_t& ts) const;
510 /// Get timestamp of the video frame which would be returned on nextVideoFrame
512 /// @return false if there no video frame left
513 /// (either none or no more)
515 /// NOTE: locks _qMutex
517 DSOEXPORT bool nextVideoFrameTimestamp(boost::uint64_t& ts) const;
519 /// Returns the next video frame in the parsed buffer, advancing video cursor.
521 /// If no frame has been played before the first frame is returned.
522 /// If there is no more frames in the parsed buffer NULL is returned.
523 /// you can check with parsingCompleted() to know wheter this is due to
524 /// EOF reached.
526 DSOEXPORT std::auto_ptr<EncodedVideoFrame> nextVideoFrame();
528 /// Get timestamp of the audio frame which would be returned on nextAudioFrame
530 /// @return false if there no video frame left
531 /// (either none or no more)
533 /// NOTE: locks _qMutex
535 DSOEXPORT bool nextAudioFrameTimestamp(boost::uint64_t& ts) const;
537 /// Returns the next audio frame in the parsed buffer, advancing audio cursor.
539 /// If no frame has been played before the first frame is returned.
540 /// If there is no more frames in the parsed buffer NULL is returned.
541 /// you can check with parsingCompleted() to know wheter this is due to
542 /// EOF reached.
544 DSOEXPORT std::auto_ptr<EncodedAudioFrame> nextAudioFrame();
546 /// Returns a VideoInfo class about the videostream
548 /// @return a VideoInfo class about the videostream,
549 /// or zero if unknown (no video or not enough data parsed yet).
551 VideoInfo* getVideoInfo() { return _videoInfo.get(); }
553 /// Returns a AudioInfo class about the audiostream
555 /// @return a AudioInfo class about the audiostream,
556 /// or zero if unknown (no audio or not enough data parsed yet).
558 AudioInfo* getAudioInfo() { return _audioInfo.get(); }
560 /// Return true of parsing is completed
562 /// If this function returns true, any call to nextVideoFrame()
563 /// or nextAudioFrame() will always return NULL
565 /// TODO: make thread-safe
567 bool parsingCompleted() const { return _parsingComplete; }
569 /// Return true of indexing is completed
571 /// If this function returns false, parseNextChunk will
572 /// be called even when buffers are full. Parsers
573 /// supporting indexing separated from parsing should
574 /// override this method and have parseNextChunk figure
575 /// if they only need to index or to parse based on bufferFull.
577 virtual bool indexingCompleted() const { return true; }
579 /// Return number of bytes parsed so far
580 virtual boost::uint64_t getBytesLoaded() const { return 0; }
582 /// Return total number of bytes in input
583 boost::uint64_t getBytesTotal() const
585 return _stream->size();
588 /// Parse next chunk of input
590 /// The implementations are required to parse a small chunk
591 /// of input, so to avoid blocking too much if parsing conditions
592 /// change (ie: seek or destruction requested)
594 /// When LOAD_MEDIA_IN_A_SEPARATE_THREAD is defined, this should
595 /// never be called by users (consider protected).
597 virtual bool parseNextChunk()=0;
599 /// Retrieve any parsed metadata tags up to a specified timestamp.
601 /// @param ts The latest timestamp to retrieve metadata for.
602 /// @param tags This is filled with shared pointers to metatags in
603 /// timestamp order. Ownership of the data is shared. It
604 /// is destroyed automatically along with the last owner.
606 /// Metadata is currently only parsed from FLV streams. The default
607 /// is a no-op.
608 virtual void fetchMetaTags(OrderedMetaTags& tags, boost::uint64_t ts);
610 /// Get ID3 data from the parsed stream if it exists.
612 /// It's best to do this only when parsingComplete is true.
613 virtual boost::optional<Id3Info> getId3Info() const;
615 protected:
617 /// Subclasses *must* set the following variables: @{
619 /// Info about the video stream (if any)
620 std::auto_ptr<VideoInfo> _videoInfo;
622 /// Info about the audio stream (if any)
623 std::auto_ptr<AudioInfo> _audioInfo;
625 /// Whether the parsing is complete or not
626 bool _parsingComplete;
628 /// Number of bytes loaded
629 boost::uint64_t _bytesLoaded;
631 /// }@
633 /// Start the parser thread
634 void startParserThread();
636 /// Stop the parser thread
638 /// This method should be always called
639 /// by destructors of subclasses to ensure
640 /// the parser thread won't attempt to access
641 /// destroyed structures.
643 void stopParserThread();
645 /// Clear the a/v buffers
646 void clearBuffers();
648 /// Push an encoded audio frame to buffer.
650 /// Will wait on a condition if buffer is full or parsing was completed
652 void pushEncodedAudioFrame(std::auto_ptr<EncodedAudioFrame> frame);
654 /// Push an encoded video frame to buffer.
656 /// Will wait on a condition if buffer is full or parsing was completed
658 void pushEncodedVideoFrame(std::auto_ptr<EncodedVideoFrame> frame);
660 /// The stream used to access the file
661 std::auto_ptr<IOChannel> _stream;
662 mutable boost::mutex _streamMutex;
664 static void parserLoopStarter(MediaParser* mp)
666 mp->parserLoop();
669 /// The parser loop runs in a separate thread
670 /// and calls parseNextChunk until killed.
672 /// parseNextChunk is expected to push encoded frames
673 /// on the queue, which may trigger the thread to be
674 /// put to sleep when queues are full or parsing
675 /// was completed.
677 void parserLoop();
679 bool parserThreadKillRequested() const
681 boost::mutex::scoped_lock lock(_parserThreadKillRequestMutex);
682 return _parserThreadKillRequested;
685 boost::uint64_t _bufferTime;
686 mutable boost::mutex _bufferTimeMutex;
688 std::auto_ptr<boost::thread> _parserThread;
689 boost::barrier _parserThreadStartBarrier;
690 mutable boost::mutex _parserThreadKillRequestMutex;
691 bool _parserThreadKillRequested;
692 boost::condition _parserThreadWakeup;
694 /// Wait on the _parserThreadWakeup condition if buffer is full
695 /// or parsing was completed.
696 ///
697 /// Callers *must* pass a locked lock on _qMutex
699 void waitIfNeeded(boost::mutex::scoped_lock& qMutexLock);
701 void wakeupParserThread();
703 /// mutex protecting access to the a/v encoded frames queues
704 mutable boost::mutex _qMutex;
706 /// Mutex protecting _bytesLoaded (read by main, set by parser)
707 mutable boost::mutex _bytesLoadedMutex;
709 /// Method to check if buffer is full w/out locking the _qMutex
712 /// This is intended for being called by waitIfNeeded, which
713 /// is passed a locked lock on _qMutex, and by parseNextChunk
714 /// to determine whether to index-only or also push on queue.
716 bool bufferFull() const;
718 /// On seek, this flag will be set, while holding a lock on _streamMutex.
719 /// The parser, when obtained a lock on _streamMutex, will check this
720 /// flag, if found to be true will clear the buffers and reset to false.
721 bool _seekRequest;
723 private:
725 typedef std::deque<EncodedVideoFrame*> VideoFrames;
726 typedef std::deque<EncodedAudioFrame*> AudioFrames;
728 /// Return pointer to next encoded video frame in buffer
730 /// If no video is present, or queue is empty, 0 is returned
731 ///
732 /// NOTE: Caller is expected to hold a lock on _qMutex
733 ///
734 const EncodedVideoFrame* peekNextVideoFrame() const;
736 /// Return pointer to next encoded audio frame in buffer
738 /// If no video is present, or queue is empty, 0 is returned
739 ///
740 /// NOTE: Caller is expected to hold a lock on _qMutex
742 const EncodedAudioFrame* peekNextAudioFrame() const;
745 /// Queue of video frames (the video buffer)
747 /// Elements owned by this class.
749 VideoFrames _videoFrames;
751 /// Queue of audio frames (the audio buffer)
753 /// Elements owned by this class.
755 AudioFrames _audioFrames;
757 void requestParserThreadKill()
759 boost::mutex::scoped_lock lock(_parserThreadKillRequestMutex);
760 _parserThreadKillRequested=true;
761 _parserThreadWakeup.notify_all();
764 /// Return diff between timestamp of last and first audio frame
765 boost::uint64_t audioBufferLength() const;
767 /// Return diff between timestamp of last and first video frame
768 boost::uint64_t videoBufferLength() const;
770 /// A getBufferLength method not locking the _qMutex (expected to be locked by caller already).
771 boost::uint64_t getBufferLengthNoLock() const;
776 } // gnash.media namespace
777 } // namespace gnash
779 #endif // __MEDIAPARSER_H__