Add "make package" target
[gnash.git] / libmedia / MediaParser.h
blob267d70219e3f1eae895da633665c4dfd5225117f
1 // MediaParser.h: Base class for media parsers
2 //
3 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #ifndef GNASH_MEDIAPARSER_H
20 #define GNASH_MEDIAPARSER_H
22 #include "IOChannel.h" // for inlines
23 #include "dsodefs.h" // DSOEXPORT
25 #include <boost/scoped_array.hpp>
26 #include <boost/shared_ptr.hpp>
27 #include <boost/thread/thread.hpp>
28 #include <boost/thread/condition.hpp>
29 #include <boost/thread/barrier.hpp>
30 #include <memory>
31 #include <deque>
32 #include <map>
33 #include <vector>
34 #include <iosfwd> // for output operator forward declarations
36 // Undefine this to load/parse media files in main thread
37 #define LOAD_MEDIA_IN_A_SEPARATE_THREAD 1
39 namespace gnash {
40 class SimpleBuffer;
43 namespace gnash {
44 namespace media {
47 /// Video frame types
48 enum videoFrameType
50 /// Key frames
51 KEY_FRAME = 1,
53 /// Interlaced frames
54 INTER_FRAME = 2,
56 /// Disposable interlaced frames
57 DIS_INTER_FRAME = 3
60 /// The type of the codec id passed in the AudioInfo or VideoInfo class
61 enum codecType
63 /// The internal flash codec ids
64 CODEC_TYPE_FLASH,
66 /// Custom codecs ids
67 CODEC_TYPE_CUSTOM
70 /// Video codec ids as defined in flash
71 enum videoCodecType
73 /// H263/SVQ3 video codec
74 VIDEO_CODEC_H263 = 2,
76 /// Screenvideo codec
77 VIDEO_CODEC_SCREENVIDEO = 3,
79 /// On2 VP6 video codec
80 VIDEO_CODEC_VP6 = 4,
82 /// On2 VP6 Alpha video codec
83 VIDEO_CODEC_VP6A = 5,
85 /// Screenvideo2 codec
86 VIDEO_CODEC_SCREENVIDEO2 = 6,
88 /// MPEG-4 Part 10, or Advanced Video Coding
89 VIDEO_CODEC_H264 = 7
91 // NOTE: if you add more elements here remember to
92 // also add them to the output operator!
95 DSOEXPORT std::ostream& operator<< (std::ostream& os, const videoCodecType& t);
97 /// Audio codec ids as defined in flash
99 /// For some encodings, audio data is organized
100 /// in logical frames. The structure of such frames
101 /// (header/payload) is codec dependent.
102 /// The actual size of each frame may not be known
103 /// w/out parsing the encoded stream, as it
104 /// might be specified in the header of each frame.
106 /// Other encodings are loosier on frames. For these
107 /// you can define a frame any way you want, as long
108 /// as a frame doesn't contain partial samples.
110 /// For FFMPEG, you can NOT construct a parser for the
111 /// loosy-framed codecs.
113 /// Parser-needing codecs will be documented as such.
115 enum audioCodecType
117 /// Linear PCM, unspecified byte order
119 /// Use of this codec is deprecated (but still supported) due to
120 /// the unspecified byte order (you can only play >8bit samples
121 /// in a sane way when the endiannes of encoding and decoding
122 /// hosts match).
124 /// 90% of the times the actual encoder did run on windows, so
125 /// it is a good bet to guess for little-endian.
126 /// SampleSize may be 8 or 16 bits.
128 AUDIO_CODEC_RAW = 0,
130 /// ADPCM format
132 /// SWF support 2, 3, 4, and 5 bits / sample.
133 /// ADPCM "frames" consits of 4096 ADPCM codes per channel.
134 ///
135 /// For streaming there is no concept of "seekSamples" like
136 /// MP3 streaming implements. Thus ADPCM ist suboptimal for
137 /// streaming as it is difficult to match sound frames with
138 /// movie frames.
139 /// Uncompressed SampleSize is always 16 bit.
141 AUDIO_CODEC_ADPCM = 1,
143 /// MP3 format
145 /// MP3 is supported for SWF4 and later.
146 /// MP3 sound is structured in frames consiting of a fixed sized
147 /// header (32Bit) and compressed sound data. Compressed sound
148 /// data always contains a fixed number of sound samples (576 or 1152).
149 /// For streaming sound an additional field is necessary (seekSamples)
150 /// to keep track of samples exceeding movie frame border.
152 /// MP3 header contains all necessary information to decode a single
153 /// frame. From this information one can derive the number of samples
154 /// and the frame's size.
155 /// Uncompressed SampleSize is always 16 bit.
157 AUDIO_CODEC_MP3 = 2,
159 /// Linear PCM, strictly little-endian
160 AUDIO_CODEC_UNCOMPRESSED = 3,
162 /// Proprietary simple format. Always 5Khz mono ?
164 /// SWF6 and later.
165 /// Data is organized in frames of 256 samples.
167 AUDIO_CODEC_NELLYMOSER_8HZ_MONO = 5,
169 /// Proprietary simple format
171 /// SWF6 and later.
172 /// Data is organized in frames of 256 samples.
174 AUDIO_CODEC_NELLYMOSER = 6,
176 /// Advanced Audio Coding
177 AUDIO_CODEC_AAC = 10,
179 /// Always 16kHz mono
180 AUDIO_CODEC_SPEEX = 11
182 // NOTE: if you add more elements here remember to
183 // also add them to the output operator!
186 DSOEXPORT std::ostream& operator<< (std::ostream& os, const audioCodecType& t);
188 /// Information about an audio stream
190 /// The information stored is codec-id,
191 /// samplerate, samplesize, stereo, duration and codec-type.
193 /// Additionally, an abstract ExtraInfo can be hold.
195 class AudioInfo
198 public:
200 /// Construct an AudioInfo object
202 /// @param codeci
203 /// Audio codec id.
204 /// To be interpreted as a media::audioCodecType if the typei
205 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
206 /// for codec information transfer between a MediaParser and a
207 /// AudioDecoder from the same %media handler module.
209 /// @param sampleRatei
210 /// Nominal sample rate.
211 /// @todo document units.
213 /// @param sampleSizei
214 /// Sample size, in bytes.
216 /// @param stereoi
217 /// Sample type (stereo if true, mono otherwise).
218 /// @todo document if and how intepretation of sampleSizei changes
220 /// @param durationi
221 /// Nominal audio stream duration, in milliseconds.
223 /// @param typei
224 /// Changes interpretation of the codeci parameter.
226 AudioInfo(int codeci, boost::uint16_t sampleRatei,
227 boost::uint16_t sampleSizei, bool stereoi,
228 boost::uint64_t durationi, codecType typei)
230 codec(codeci),
231 sampleRate(sampleRatei),
232 sampleSize(sampleSizei),
233 stereo(stereoi),
234 duration(durationi),
235 type(typei)
239 /// Codec identifier
241 /// This has to be interpreted as audioCodecType if codecType type is CODEC_TYPE_FLASH
242 /// or interpretation is opaque and we rely on the assumption that the AudioInfo
243 /// creator and the AudioInfo user have a way to get a shared interpretation
245 int codec;
247 boost::uint16_t sampleRate;
249 /// Size of each sample, in bytes
250 boost::uint16_t sampleSize;
252 bool stereo;
254 boost::uint64_t duration;
256 codecType type;
258 /// Extra info about an audio stream
260 /// Abstract class to hold any additional info
261 /// when required for proper decoder initialization.
263 class ExtraInfo {
264 public:
265 virtual ~ExtraInfo() {}
268 /// Extra info about audio stream, if when needed
270 /// Could be ExtraVideoInfoFlv or a media-handler specific info
272 std::auto_ptr<ExtraInfo> extra;
275 /// Information about a video stream
277 /// The information stored is codec-id, width, height, framerate and duration.
279 /// Additionally, an abstract ExtraInfo can be hold.
281 class VideoInfo
283 public:
285 /// Construct a VideoInfo object
287 /// @param codeci
288 /// Video codec id.
289 /// To be interpreted as a media::videoCodecType if the typei
290 /// parameter is CODEC_TYPE_FLASH; otherwise it's an opaque number to use
291 /// for codec information transfer between a MediaParser and a
292 /// VideoDecoder from the same %media handler module.
294 /// @param widthi
295 /// Video frame width.
296 /// @todo check if still needed.
298 /// @param heighti
299 /// Video frame height.
300 /// @todo check if still needed.
302 /// @param frameRatei
303 /// Nominal video frame rate.
304 /// @todo document units.
306 /// @param durationi
307 /// Nominal video duration.
308 /// @todo check if still needed, if so document units!
310 /// @param typei
311 /// Changes interpretation of the codeci parameter.
312 ///
313 VideoInfo(int codeci, boost::uint16_t widthi, boost::uint16_t heighti,
314 boost::uint16_t frameRatei, boost::uint64_t durationi,
315 codecType typei)
317 codec(codeci),
318 width(widthi),
319 height(heighti),
320 frameRate(frameRatei),
321 duration(durationi),
322 type(typei)
326 int codec;
327 boost::uint16_t width;
328 boost::uint16_t height;
329 boost::uint16_t frameRate;
330 boost::uint64_t duration;
331 codecType type;
333 /// Extra info about a video stream
335 /// Abstract class to hold any additional info
336 /// when required for proper decoder initialization
338 class ExtraInfo {
339 public:
340 virtual ~ExtraInfo() {}
343 /// Extra info about video stream, if when needed
345 /// Could be ExtraAudioInfoFlv or a media-handler specific info
347 std::auto_ptr<ExtraInfo> extra;
350 DSOEXPORT std::ostream& operator << (std::ostream& os, const VideoInfo& vi);
353 class EncodedExtraData {
355 public:
356 virtual ~EncodedExtraData() {}
360 /// An encoded video frame
361 class EncodedVideoFrame
363 public:
365 /// Create an encoded video frame
367 /// @param data
368 /// Data buffer, ownership transferred
370 /// @param size
371 /// Size of the data buffer
373 /// @param frameNum
374 /// Frame number.
376 /// @param timestamp
377 /// Presentation timestamp, in milliseconds.
379 EncodedVideoFrame(boost::uint8_t* data, boost::uint32_t size,
380 unsigned int frameNum,
381 boost::uint64_t timestamp=0)
383 _size(size),
384 _data(data),
385 _frameNum(frameNum),
386 _timestamp(timestamp)
389 /// Return pointer to actual data. Ownership retained by this class.
390 const boost::uint8_t* data() const { return _data.get(); }
392 /// Return size of data buffer.
393 boost::uint32_t dataSize() const { return _size; }
395 /// Return video frame presentation timestamp
396 boost::uint64_t timestamp() const { return _timestamp; }
398 /// Return video frame number
399 unsigned frameNum() const { return _frameNum; }
401 // FIXME: should have better encapsulation for this sort of stuff.
402 std::auto_ptr<EncodedExtraData> extradata;
403 private:
405 boost::uint32_t _size;
406 boost::scoped_array<boost::uint8_t> _data;
407 unsigned int _frameNum;
408 boost::uint64_t _timestamp;
411 /// An encoded audio frame
412 class EncodedAudioFrame
414 public:
415 boost::uint32_t dataSize;
416 boost::scoped_array<boost::uint8_t> data;
417 boost::uint64_t timestamp;
419 // FIXME: should have better encapsulation for this sort of stuff.
420 std::auto_ptr<EncodedExtraData> extradata;
423 /// The MediaParser class provides cursor-based access to encoded %media frames
425 /// Cursor-based access allow seeking as close as possible to a specified time
426 /// and fetching frames from there on, sequentially.
427 /// See seek(), nextVideoFrame(), nextAudioFrame()
429 /// Input is received from a IOChannel object.
431 class MediaParser
433 public:
435 /// A container for executable MetaTags contained in media streams.
437 /// Presently only known in FLV.
438 typedef std::multimap<boost::uint64_t, boost::shared_ptr<SimpleBuffer> >
439 MetaTags;
441 typedef std::vector<MetaTags::mapped_type> OrderedMetaTags;
442 MediaParser(std::auto_ptr<IOChannel> stream);
444 // Classes with virtual methods (virtual classes)
445 // must have a virtual destructor, or the destructors
446 // of subclasses will never be invoked, tipically resulting
447 // in memory leaks..
449 virtual ~MediaParser();
451 /// \brief
452 /// Seeks to the closest possible position the given position,
453 /// and returns the new position.
456 /// @param time input/output parameter, input requests a time, output
457 /// return the actual time seeked to.
458 ///
459 /// @return true if the seek was valid, false otherwise.
461 virtual bool seek(boost::uint32_t& time)=0;
463 /// Returns mininum length of available buffers in milliseconds
465 /// TODO: FIXME: NOTE: this is currently used by NetStream.bufferLength
466 /// but is bogus as it doesn't take the *current* playhead cursor time
467 /// into account. A proper way would be having a getLastBufferTime ()
468 /// interface here, returning minimun timestamp of last available
469 /// frames and let NetSTream::bufferLength() use that with playhead
470 /// time to find out...
472 DSOEXPORT boost::uint64_t getBufferLength() const;
474 /// Return the time we want the parser thread to maintain in the buffer
475 DSOEXPORT boost::uint64_t getBufferTime() const
477 boost::mutex::scoped_lock lock(_bufferTimeMutex);
478 return _bufferTime;
481 /// Set the time we want the parser thread to maintain in the buffer
483 /// @param t
484 /// Number of milliseconds to keep in the buffers.
486 DSOEXPORT void setBufferTime(boost::uint64_t t)
488 boost::mutex::scoped_lock lock(_bufferTimeMutex);
489 _bufferTime=t;
492 /// Get timestamp of the next frame available, if any
494 /// @param ts will be set to timestamp of next available frame
495 /// @return false if no frame is available yet
497 DSOEXPORT bool nextFrameTimestamp(boost::uint64_t& ts) const;
499 /// Get timestamp of the video frame which would be returned on nextVideoFrame
501 /// @return false if there no video frame left
502 /// (either none or no more)
504 DSOEXPORT bool nextVideoFrameTimestamp(boost::uint64_t& ts) const;
506 /// Returns the next video frame in the parsed buffer, advancing video cursor.
508 /// If no frame has been played before the first frame is returned.
509 /// If there is no more frames in the parsed buffer NULL is returned.
510 /// you can check with parsingCompleted() to know wheter this is due to
511 /// EOF reached.
513 DSOEXPORT std::auto_ptr<EncodedVideoFrame> nextVideoFrame();
515 /// Get timestamp of the audio frame which would be returned on nextAudioFrame
517 /// @return false if there no video frame left
518 /// (either none or no more)
520 DSOEXPORT bool nextAudioFrameTimestamp(boost::uint64_t& ts) const;
522 /// Returns the next audio frame in the parsed buffer, advancing audio cursor.
524 /// If no frame has been played before the first frame is returned.
525 /// If there is no more frames in the parsed buffer NULL is returned.
526 /// you can check with parsingCompleted() to know wheter this is due to
527 /// EOF reached.
529 DSOEXPORT std::auto_ptr<EncodedAudioFrame> nextAudioFrame();
531 /// Returns a VideoInfo class about the videostream
533 /// @return a VideoInfo class about the videostream,
534 /// or zero if unknown (no video or not enough data parsed yet).
536 VideoInfo* getVideoInfo() { return _videoInfo.get(); }
538 /// Returns a AudioInfo class about the audiostream
540 /// @return a AudioInfo class about the audiostream,
541 /// or zero if unknown (no audio or not enough data parsed yet).
543 AudioInfo* getAudioInfo() { return _audioInfo.get(); }
545 /// Return true of parsing is completed
547 /// If this function returns true, any call to nextVideoFrame()
548 /// or nextAudioFrame() will always return NULL
550 /// TODO: make thread-safe
552 bool parsingCompleted() const { return _parsingComplete; }
554 /// Return true of indexing is completed
556 /// If this function returns false, parseNextChunk will
557 /// be called even when buffers are full. Parsers
558 /// supporting indexing separated from parsing should
559 /// override this method and have parseNextChunk figure
560 /// if they only need to index or to parse based on bufferFull.
562 virtual bool indexingCompleted() const { return true; }
564 /// Return number of bytes parsed so far
565 virtual boost::uint64_t getBytesLoaded() const { return 0; }
567 /// Return total number of bytes in input
568 boost::uint64_t getBytesTotal() const
570 return _stream->size();
573 /// Parse next chunk of input
575 /// The implementations are required to parse a small chunk
576 /// of input, so to avoid blocking too much if parsing conditions
577 /// change (ie: seek or destruction requested)
579 /// When LOAD_MEDIA_IN_A_SEPARATE_THREAD is defined, this should
580 /// never be called by users (consider protected).
582 virtual bool parseNextChunk()=0;
584 /// Retrieve any parsed metadata tags up to a specified timestamp.
586 /// @param ts The latest timestamp to retrieve metadata for.
587 /// @param tags This is filled with shared pointers to metatags in
588 /// timestamp order. Ownership of the data is shared. It
589 /// is destroyed automatically along with the last owner.
591 /// Metadata is currently only parsed from FLV streams. The default
592 /// is a no-op.
593 virtual void fetchMetaTags(OrderedMetaTags& tags, boost::uint64_t ts);
595 protected:
597 /// Subclasses *must* set the following variables: @{
599 /// Info about the video stream (if any)
600 std::auto_ptr<VideoInfo> _videoInfo;
602 /// Info about the audio stream (if any)
603 std::auto_ptr<AudioInfo> _audioInfo;
605 /// Whether the parsing is complete or not
606 bool _parsingComplete;
608 /// Number of bytes loaded
609 boost::uint64_t _bytesLoaded;
611 /// }@
613 /// Start the parser thread
614 void startParserThread();
616 /// Stop the parser thread
618 /// This method should be always called
619 /// by destructors of subclasses to ensure
620 /// the parser thread won't attempt to access
621 /// destroyed structures.
623 void stopParserThread();
625 /// Clear the a/v buffers
626 void clearBuffers();
628 /// Push an encoded audio frame to buffer.
630 /// Will wait on a condition if buffer is full or parsing was completed
632 void pushEncodedAudioFrame(std::auto_ptr<EncodedAudioFrame> frame);
634 /// Push an encoded video frame to buffer.
636 /// Will wait on a condition if buffer is full or parsing was completed
638 void pushEncodedVideoFrame(std::auto_ptr<EncodedVideoFrame> frame);
640 /// Return pointer to next encoded video frame in buffer
642 /// If no video is present, or queue is empty, 0 is returned
644 const EncodedVideoFrame* peekNextVideoFrame() const;
646 /// Return pointer to next encoded audio frame in buffer
648 /// If no video is present, or queue is empty, 0 is returned
650 const EncodedAudioFrame* peekNextAudioFrame() const;
652 /// The stream used to access the file
653 std::auto_ptr<IOChannel> _stream;
654 mutable boost::mutex _streamMutex;
656 static void parserLoopStarter(MediaParser* mp)
658 mp->parserLoop();
661 /// The parser loop runs in a separate thread
662 /// and calls parseNextChunk until killed.
664 /// parseNextChunk is expected to push encoded frames
665 /// on the queue, which may trigger the thread to be
666 /// put to sleep when queues are full or parsing
667 /// was completed.
669 void parserLoop();
671 bool parserThreadKillRequested() const
673 boost::mutex::scoped_lock lock(_parserThreadKillRequestMutex);
674 return _parserThreadKillRequested;
677 boost::uint64_t _bufferTime;
678 mutable boost::mutex _bufferTimeMutex;
680 std::auto_ptr<boost::thread> _parserThread;
681 boost::barrier _parserThreadStartBarrier;
682 mutable boost::mutex _parserThreadKillRequestMutex;
683 bool _parserThreadKillRequested;
684 boost::condition _parserThreadWakeup;
686 /// Wait on the _parserThreadWakeup condition if buffer is full
687 /// or parsing was completed.
688 ///
689 /// Callers *must* pass a locked lock on _qMutex
691 void waitIfNeeded(boost::mutex::scoped_lock& qMutexLock);
693 void wakeupParserThread();
695 /// mutex protecting access to the a/v encoded frames queues
696 mutable boost::mutex _qMutex;
699 /// Mutex protecting _bytesLoaded (read by main, set by parser)
700 mutable boost::mutex _bytesLoadedMutex;
702 /// Method to check if buffer is full w/out locking the _qMutex
705 /// This is intended for being called by waitIfNeeded, which
706 /// is passed a locked lock on _qMutex, and by parseNextChunk
707 /// to determine whether to index-only or also push on queue.
709 bool bufferFull() const;
711 /// On seek, this flag will be set, while holding a lock on _streamMutex.
712 /// The parser, when obtained a lock on _streamMutex, will check this
713 /// flag, if found to be true will clear the buffers and reset to false.
714 bool _seekRequest;
715 private:
717 typedef std::deque<EncodedVideoFrame*> VideoFrames;
718 typedef std::deque<EncodedAudioFrame*> AudioFrames;
720 /// Queue of video frames (the video buffer)
722 /// Elements owned by this class.
724 VideoFrames _videoFrames;
726 /// Queue of audio frames (the audio buffer)
728 /// Elements owned by this class.
730 AudioFrames _audioFrames;
732 void requestParserThreadKill()
734 boost::mutex::scoped_lock lock(_parserThreadKillRequestMutex);
735 _parserThreadKillRequested=true;
736 _parserThreadWakeup.notify_all();
739 /// Return diff between timestamp of last and first audio frame
740 boost::uint64_t audioBufferLength() const;
742 /// Return diff between timestamp of last and first video frame
743 boost::uint64_t videoBufferLength() const;
745 /// A getBufferLength method not locking the _qMutex (expected to be locked by caller already).
746 boost::uint64_t getBufferLengthNoLock() const;
751 } // gnash.media namespace
752 } // namespace gnash
754 #endif // __MEDIAPARSER_H__