1 // AudioDecoderFfmpeg.cpp: Audio decoding using the FFmpeg library.
3 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "AudioDecoderFfmpeg.h"
22 #include <cmath> // for std::ceil
23 #include <algorithm> // for std::copy, std::max
25 #include "MediaParserFfmpeg.h" // for ExtraAudioInfoFfmpeg
26 #include "FLVParser.h"
27 #include "SoundInfo.h"
28 #include "MediaParser.h" // for AudioInfo
30 //#define GNASH_DEBUG_AUDIO_DECODING
32 #define AVCODEC_DECODE_AUDIO avcodec_decode_audio2
38 AudioDecoderFfmpeg::AudioDecoderFfmpeg(const AudioInfo
& info
)
47 if (info
.type
== CODEC_TYPE_CUSTOM
) {
48 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %d (%s)"),
49 _audioCodec
->id
, _audioCodec
->name
);
51 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %d (%s) "
52 "for flash codec %d (%s)"),
53 _audioCodec
->id
, _audioCodec
->name
,
54 info
.codec
, (audioCodecType
)info
.codec
);
58 AudioDecoderFfmpeg::AudioDecoderFfmpeg(SoundInfo
& info
)
66 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %s (%d)"),
67 _audioCodec
->name
, _audioCodec
->id
);
70 AudioDecoderFfmpeg::~AudioDecoderFfmpeg()
74 avcodec_close(_audioCodecCtx
);
75 av_free(_audioCodecCtx
);
77 if (_parser
) av_parser_close(_parser
);
80 void AudioDecoderFfmpeg::setup(SoundInfo
& info
)
82 // Init the avdecoder-decoder
84 avcodec_register_all();// change this to only register need codec?
86 enum CodecID codec_id
;
88 switch(info
.getFormat()) {
90 codec_id
= CODEC_ID_PCM_U16LE
;
92 case AUDIO_CODEC_ADPCM
:
93 codec_id
= CODEC_ID_ADPCM_SWF
;
96 codec_id
= CODEC_ID_MP3
;
100 codec_id
= CODEC_ID_AAC
;
104 boost::format err
= boost::format(
105 _("Unsupported audio codec %d")) %
106 static_cast<int>(info
.getFormat());
107 throw MediaException(err
.str());
110 _audioCodec
= avcodec_find_decoder(codec_id
);
112 audioCodecType codec
= info
.getFormat();
113 boost::format err
= boost::format(
114 _("libavcodec could not find a decoder for codec %d (%s)")) %
115 static_cast<int>(codec
) % codec
;
116 throw MediaException(err
.str());
122 _parser
= av_parser_init(codec_id
);
124 throw MediaException(_("AudioDecoderFfmpeg can't initialize "
129 _audioCodecCtx
= avcodec_alloc_context();
130 if (!_audioCodecCtx
) {
131 throw MediaException(_("libavcodec couldn't allocate context"));
134 int ret
= avcodec_open(_audioCodecCtx
, _audioCodec
);
136 av_free(_audioCodecCtx
);
138 boost::format err
= boost::format(
139 _("AudioDecoderFfmpeg: avcodec_open failed to initialize "
140 "FFmpeg codec %s (%d)")) % _audioCodec
->name
% (int)codec_id
;
141 throw MediaException(err
.str());
144 log_debug(_("AudioDecoder: initialized FFMPEG codec %s (%d)"),
145 _audioCodec
->name
, (int)codec_id
);
147 /// @todo do this only if !_needsParsing ?
148 switch (_audioCodecCtx
->codec
->id
)
153 case CODEC_ID_PCM_U16LE
:
154 _audioCodecCtx
->channels
= (info
.isStereo() ? 2 : 1);
155 _audioCodecCtx
->sample_rate
= info
.getSampleRate();
156 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
; // ?! arbitrary ?
157 _audioCodecCtx
->frame_size
= 1;
161 _audioCodecCtx
->channels
= (info
.isStereo() ? 2 : 1);
162 _audioCodecCtx
->sample_rate
= info
.getSampleRate();
163 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
; // ?! arbitrary ?
168 void AudioDecoderFfmpeg::setup(const AudioInfo
& info
)
170 // Init the avdecoder-decoder
172 avcodec_register_all();// change this to only register need codec?
174 enum CodecID codec_id
= CODEC_ID_NONE
;
176 if (info
.type
== CODEC_TYPE_CUSTOM
)
178 codec_id
= static_cast<CodecID
>(info
.codec
);
179 _needsParsing
=true; // @todo check this !
181 else if (info
.type
== CODEC_TYPE_FLASH
)
186 case AUDIO_CODEC_UNCOMPRESSED
:
187 case AUDIO_CODEC_RAW
:
188 if (info
.sampleSize
== 2) {
189 codec_id
= CODEC_ID_PCM_S16LE
;
191 codec_id
= CODEC_ID_PCM_S8
;
195 case AUDIO_CODEC_ADPCM
:
196 codec_id
= CODEC_ID_ADPCM_SWF
;
199 case AUDIO_CODEC_MP3
:
200 codec_id
= CODEC_ID_MP3
;
204 case AUDIO_CODEC_AAC
:
205 codec_id
= CODEC_ID_AAC
;
209 #ifdef FFMPEG_NELLYMOSER
210 // NOTE: bjacques found this failing in decodeFrame
211 // (but probably not Ffmpeg's fault, he said)
212 // I'd like to take a look at the testcase --strk
213 case AUDIO_CODEC_NELLYMOSER
:
214 codec_id
= CODEC_ID_NELLYMOSER
;
220 boost::format err
= boost::format(
221 _("AudioDecoderFfmpeg: unsupported flash audio "
223 info
.codec
% (audioCodecType
)info
.codec
;
224 throw MediaException(err
.str());
229 boost::format err
= boost::format(
230 _("AudioDecoderFfmpeg: unknown codec type %d "
231 "(should never happen)")) % info
.type
;
232 throw MediaException(err
.str());
235 _audioCodec
= avcodec_find_decoder(codec_id
);
238 if (info
.type
== CODEC_TYPE_FLASH
) {
239 boost::format err
= boost::format(
240 _("AudioDecoderFfmpeg: libavcodec could not find a decoder "
241 "for codec %d (%s)")) %
242 info
.codec
% static_cast<audioCodecType
>(info
.codec
);
243 throw MediaException(err
.str());
245 boost::format err
= boost::format(
246 _("AudioDecoderFfmpeg: libavcodec could not find a decoder "
247 "for ffmpeg codec id %s")) % codec_id
;
248 throw MediaException(err
.str());
255 #ifdef GNASH_DEBUG_AUDIO_DECODING
256 log_debug(" Initializing ffmpeg parser");
257 #endif // GNASH_DEBUG_AUDIO_DECODING
258 _parser
= av_parser_init(codec_id
);
261 if (info
.type
== CODEC_TYPE_FLASH
) {
263 _("AudioDecoderFfmpeg: could not initialize a parser for "
264 "flash codec id %d (%s)")) %
265 info
.codec
% (audioCodecType
)info
.codec
;
268 _("AudioDecoderFfmpeg: could not initialize a parser "
269 "for ffmpeg codec id %s")) % codec_id
;
271 throw MediaException(err
.str());
273 #ifdef GNASH_DEBUG_AUDIO_DECODING
274 log_debug(" Ffmpeg parser initialized");
275 #endif // GNASH_DEBUG_AUDIO_DECODING
278 // Create an audioCodecCtx from the ffmpeg parser if exists/possible
279 _audioCodecCtx
= avcodec_alloc_context();
280 if (!_audioCodecCtx
) {
281 throw MediaException(_("AudioDecoderFfmpeg: libavcodec couldn't "
282 "allocate context"));
285 if ( info
.extra
.get() )
287 if (dynamic_cast<ExtraAudioInfoFfmpeg
*>(info
.extra
.get())) {
288 const ExtraAudioInfoFfmpeg
& ei
=
289 static_cast<ExtraAudioInfoFfmpeg
&>(*info
.extra
);
290 _audioCodecCtx
->extradata
= ei
.data
;
291 _audioCodecCtx
->extradata_size
= ei
.dataSize
;
292 } else if (dynamic_cast<ExtraAudioInfoFlv
*>(info
.extra
.get())) {
293 ExtraAudioInfoFlv
* extra
=
294 static_cast<ExtraAudioInfoFlv
*>(info
.extra
.get());
295 _audioCodecCtx
->extradata
= extra
->data
.get();
296 _audioCodecCtx
->extradata_size
= extra
->size
;
300 // Setup known configurations for the audio codec context
301 // (should this be done only if ! _needsParsing?)
302 // NOTE: this is done before calling avcodec_open, as that might update
303 // some of the variables
309 case CODEC_ID_PCM_S8
:
310 // Either FFMPEG or the parser are getting this wrong.
311 _audioCodecCtx
->sample_rate
= info
.sampleRate
/ 2;
312 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
314 case CODEC_ID_PCM_S16LE
:
315 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
316 _audioCodecCtx
->sample_rate
= info
.sampleRate
;
320 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
321 _audioCodecCtx
->sample_rate
= info
.sampleRate
;
322 // was commented out (why?):
323 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
;
328 #ifdef GNASH_DEBUG_AUDIO_DECODING
329 log_debug(" Opening codec");
330 #endif // GNASH_DEBUG_AUDIO_DECODING
331 int ret
= avcodec_open(_audioCodecCtx
, _audioCodec
);
333 //avcodec_close(_audioCodecCtx);
334 av_free(_audioCodecCtx
);
337 boost::format err
= boost::format(
338 _("AudioDecoderFfmpeg: avcodec_open failed to initialize "
339 "FFmpeg codec %s (%d)")) % _audioCodec
->name
% (int)codec_id
;
340 throw MediaException(err
.str());
346 AudioDecoderFfmpeg::decode(const boost::uint8_t* input
,
347 boost::uint32_t inputSize
, boost::uint32_t&
348 outputSize
, boost::uint32_t& decodedBytes
,
351 //GNASH_REPORT_FUNCTION;
357 log_error("AudioDecoderFfmpeg::decode called with 'parse' "
358 "parameter off but we know we need parsing for this codec");
363 if ( !_needsParsing
)
365 assert(!_parser
); // so we can directly return here...
366 log_debug("AudioDecoderFfmpeg::decode called with 'parse' "
367 "parameter on but we know we don't need parsing for "
369 parse
= false; // let's believe in us !
373 size_t retCapacity
= AVCODEC_MAX_AUDIO_FRAME_SIZE
;
374 boost::uint8_t* retBuf
= new boost::uint8_t[retCapacity
];
377 #ifdef GNASH_DEBUG_AUDIO_DECODING
378 log_debug(" Parsing loop starts, input is %d bytes, retCapacity is %d "
379 "bytes", inputSize
, retCapacity
);
381 decodedBytes
= 0; // nothing decoded yet
382 while (decodedBytes
< inputSize
)
384 const boost::uint8_t* frame
=0; // parsed frame (pointer into input)
385 int framesize
; // parsed frame size
387 int consumed
= parseInput(input
+decodedBytes
,
388 inputSize
-decodedBytes
,
392 log_error(_("av_parser_parse returned %d. "
393 "Upgrading ffmpeg/libavcodec might fix this issue."),
395 // Setting data position to data size will get the sound removed
396 // from the active sound list later on.
397 decodedBytes
= inputSize
;
401 #ifdef GNASH_DEBUG_AUDIO_DECODING
402 log_debug(" parsed frame is %d bytes (consumed +%d = %d/%d)",
403 framesize
, consumed
, decodedBytes
+consumed
, inputSize
);
406 #if GNASH_PARANOIA_LEVEL > 1
409 // the returned frame pointer is inside the input buffer
410 assert(frame
== input
+decodedBytes
);
411 // the returned frame size is within the input size
412 assert(framesize
<= inputSize
);
416 // all good so far, keep going..
417 // (we might do this immediately, as we'll override decodedBytes
419 decodedBytes
+= consumed
;
423 // If nothing is consumed, this will fail. It can happen if a
424 // block is passed to the decoder when nothing can be
425 // parsed from the block. This is probably a malformed SWF.
426 //assert(decodedBytes == inputSize);
428 // NOTE: If this happens the caller sent us
429 // a block of data which is not composed
430 // by complete audio frames.
431 // Could be due to an error in the caller
432 // code, or to a malformed SWF...
433 // At time of writing this (2008-11-01)
434 // it is most likely an error in caller
435 // code (streaming sound/event sound)
436 // so we log an ERROR rather then a
437 // MALFORMED input. You can uncomment the
438 // abort below to check who is the caller
439 // with gdb. When callers are checked,
440 // we may turn this into a MALFORMED
441 // kind of error (DEFINESOUND, SOUNDSTREAMBLOCK
442 // or FLV AudioTag not containing full audio frames)
445 log_error("AudioDecoderFfmpeg: "
446 "could not find a complete frame in "
447 "the last %d bytes of input"
448 " (malformed SWF or FLV?)",
455 // Now, decode the frame. We use the ::decodeFrame specialized function
456 // here so resampling is done appropriately
457 boost::uint32_t outSize
= 0;
458 boost::scoped_array
<boost::uint8_t> outBuf(
459 decodeFrame(frame
, framesize
, outSize
));
463 // Setting data position to data size will get the sound removed
464 // from the active sound list later on.
465 decodedBytes
= inputSize
;
469 #ifdef GNASH_DEBUG_AUDIO_DECODING
470 log_debug(" decoded frame is %d bytes, would grow return "
471 "buffer size to %d bytes", outSize
,
472 retBufSize
+static_cast<unsigned int>(outSize
));
476 // Now append this data to the buffer we're going to return
479 // if the new data doesn't fit, reallocate the output
480 // TODO: can use the Buffer class here.. if we return it too...
481 if ( retBufSize
+(unsigned)outSize
> retCapacity
)
483 #ifdef GNASH_DEBUG_AUDIO_DECODING
484 log_debug(" output buffer won't be able to hold %d bytes, "
485 "capacity is only %d bytes",
486 retBufSize
+(unsigned)outSize
, retCapacity
);
489 boost::uint8_t* tmp
= retBuf
;
490 retCapacity
= std::max(retBufSize
+static_cast<size_t>(outSize
),
493 #ifdef GNASH_DEBUG_AUDIO_DECODING
494 log_debug(" reallocating it to hold up to %d bytes",
496 #endif // GNASH_DEBUG_AUDIO_DECODING
498 retBuf
= new boost::uint8_t[retCapacity
];
499 if ( retBufSize
) std::copy(tmp
, tmp
+retBufSize
, retBuf
);
502 std::copy(outBuf
.get(), outBuf
.get()+outSize
, retBuf
+retBufSize
);
503 retBufSize
+= static_cast<unsigned int>(outSize
);
507 outputSize
= retBufSize
;
513 AudioDecoderFfmpeg::decode(const EncodedAudioFrame
& ef
,
514 boost::uint32_t& outputSize
)
516 return decodeFrame(ef
.data
.get(), ef
.dataSize
, outputSize
);
520 AudioDecoderFfmpeg::decodeFrame(const boost::uint8_t* input
,
521 boost::uint32_t inputSize
, boost::uint32_t& outputSize
)
523 //GNASH_REPORT_FUNCTION;
527 const size_t bufsize
= AVCODEC_MAX_AUDIO_FRAME_SIZE
;
529 // TODO: make this a private member, to reuse (see NetStreamFfmpeg in 0.8.3)
530 boost::uint8_t* output
;
532 output
= reinterpret_cast<boost::uint8_t*>(av_malloc(bufsize
));
534 log_error(_("failed to allocate audio buffer."));
539 boost::int16_t* outPtr
= reinterpret_cast<boost::int16_t*>(output
);
541 // We initialize output size to the full size
542 // then decoding will eventually reduce it
543 int outSize
= bufsize
;
545 #ifdef GNASH_DEBUG_AUDIO_DECODING
546 log_debug("AudioDecoderFfmpeg: about to decode %d bytes; "
547 "ctx->channels:%d, avctx->frame_size:%d",
548 inputSize
, _audioCodecCtx
->channels
, _audioCodecCtx
->frame_size
);
551 // older ffmpeg versions didn't accept a const input..
552 int tmp
= AVCODEC_DECODE_AUDIO(_audioCodecCtx
, outPtr
, &outSize
,
555 #ifdef GNASH_DEBUG_AUDIO_DECODING
556 log_debug(" avcodec_decode_audio[2](ctx, bufptr, %d, input, %d) "
557 "returned %d; set frame_size=%d",
558 bufsize
, inputSize
, tmp
, outSize
);
562 log_error(_("avcodec_decode_audio returned %d. Upgrading "
563 "ffmpeg/libavcodec might fix this issue."), tmp
);
571 log_error(_("outputSize:%d after decoding %d bytes of input audio "
572 "data. Upgrading ffmpeg/libavcodec might fix this issue."),
573 outputSize
, inputSize
);
580 // Resampling is needed.
581 if (_resampler
.init(_audioCodecCtx
)) {
582 // Resampling is needed.
584 // Compute new size based on frame_size and
585 // resampling configuration
586 double resampleFactor
= (44100.0/_audioCodecCtx
->sample_rate
) * (2.0/_audioCodecCtx
->channels
);
587 bool stereo
= _audioCodecCtx
->channels
> 1 ? true : false;
588 int inSamples
= stereo
? outSize
>> 2 : outSize
>> 1;
590 int expectedMaxOutSamples
= std::ceil(inSamples
*resampleFactor
);
592 // *channels *sampleSize
593 int resampledFrameSize
= expectedMaxOutSamples
*2*2;
595 // Allocate just the required amount of bytes
596 boost::uint8_t* resampledOutput
= new boost::uint8_t[resampledFrameSize
];
598 #ifdef GNASH_DEBUG_AUDIO_DECODING
599 log_debug("Calling the resampler; resampleFactor:%d; "
600 "ouput to 44100hz, 2channels, %dbytes; "
601 "input is %dhz, %dchannels, %dbytes, %dsamples",
603 resampledFrameSize
, _audioCodecCtx
->sample_rate
,
604 _audioCodecCtx
->channels
, outSize
, inSamples
);
607 int outSamples
= _resampler
.resample(outPtr
, // input
608 reinterpret_cast<boost::int16_t*>(resampledOutput
), // output
609 inSamples
); // input..
611 #ifdef GNASH_DEBUG_AUDIO_DECODING
612 log_debug("resampler returned %d samples ", outSamples
);
615 // make sure to set outPtr *after* we use it as input to the resampler
616 outPtr
= reinterpret_cast<boost::int16_t*>(resampledOutput
);
620 if (expectedMaxOutSamples
< outSamples
) {
621 log_error(" --- Computation of resampled samples (%d) < then the actual returned samples (%d)",
622 expectedMaxOutSamples
, outSamples
);
624 log_debug(" input frame size: %d", outSize
);
625 log_debug(" input sample rate: %d", _audioCodecCtx
->sample_rate
);
626 log_debug(" input channels: %d", _audioCodecCtx
->channels
);
627 log_debug(" input samples: %d", inSamples
);
629 log_debug(" output sample rate (assuming): %d", 44100);
630 log_debug(" output channels (assuming): %d", 2);
631 log_debug(" output samples: %d", outSamples
);
637 // Use the actual number of samples returned, multiplied
638 // to get size in bytes (not two-byte samples) and for
640 outSize
= outSamples
* 2 * 2;
644 boost::uint8_t* newOutput
= new boost::uint8_t[outSize
];
645 std::memcpy(newOutput
, output
, outSize
);
646 outPtr
= reinterpret_cast<boost::int16_t*>(newOutput
);
650 outputSize
= outSize
;
651 return reinterpret_cast<uint8_t*>(outPtr
);
655 AudioDecoderFfmpeg::parseInput(const boost::uint8_t* input
,
656 boost::uint32_t inputSize
,
657 boost::uint8_t const ** outFrame
, int* outFrameSize
)
661 return av_parser_parse(_parser
, _audioCodecCtx
,
662 // as of 2008-10-28 SVN, ffmpeg doesn't
663 // accept a pointer to pointer to const..
664 const_cast<boost::uint8_t**>(outFrame
),
671 // democratic value for a chunk to decode...
672 // @todo this might be constrained by codec id, check that !
674 // NOTE: AVCODEC_MAX_AUDIO_FRAME_SIZE resulted bigger
675 // than avcodec_decode_audio could handle, resulting
676 // in eventSoundTest1.swf regression.
677 //static const unsigned int maxFrameSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
679 // NOTE: 1024 resulted too few
680 // to properly decode (or resample?) raw audio
681 // thus resulting noisy (bugs #21177 and #22284)
682 //static const unsigned int maxFrameSize = 1024;
684 // NOTE: 96000 was found to be the max returned
685 // by avcodec_decode_audio when passed anything
686 // bigger than that. Works fine with all of
687 // eventSoundTest1.swf, bug #21177 and bug #22284
689 static const unsigned int maxFrameSize
= 96000;
691 int frameSize
= inputSize
< maxFrameSize
? inputSize
: maxFrameSize
;
693 // we assume the input is just a set of frames
694 // and we'll consume all
695 *outFrame
= input
; // frame always start on input
696 *outFrameSize
= frameSize
;
697 int parsed
= frameSize
;
703 } // gnash.media.ffmpeg namespace
704 } // gnash.media namespace