1 // AudioDecoderFfmpeg.cpp: Audio decoding using the FFmpeg library.
3 // Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
4 // Free Software Foundation, Inc.
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "AudioDecoderFfmpeg.h"
23 #include <cmath> // for std::ceil
24 #include <algorithm> // for std::copy, std::max
26 #include "MediaParserFfmpeg.h" // for ExtraAudioInfoFfmpeg
27 #include "FLVParser.h"
28 #include "SoundInfo.h"
29 #include "MediaParser.h" // for AudioInfo
31 //#define GNASH_DEBUG_AUDIO_DECODING
33 #if LIBAVCODEC_VERSION_MAJOR >= 53
34 #define AVCODEC_DECODE_AUDIO avcodec_decode_audio3
36 #define AVCODEC_DECODE_AUDIO avcodec_decode_audio2
43 AudioDecoderFfmpeg::AudioDecoderFfmpeg(const AudioInfo
& info
)
52 if (info
.type
== CODEC_TYPE_CUSTOM
) {
53 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %d (%s)"),
54 _audioCodec
->id
, _audioCodec
->name
);
56 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %d (%s) "
57 "for flash codec %d (%s)"),
58 _audioCodec
->id
, _audioCodec
->name
,
59 info
.codec
, (audioCodecType
)info
.codec
);
63 AudioDecoderFfmpeg::AudioDecoderFfmpeg(SoundInfo
& info
)
71 log_debug(_("AudioDecoderFfmpeg: initialized FFmpeg codec %s (%d)"),
72 _audioCodec
->name
, _audioCodec
->id
);
75 AudioDecoderFfmpeg::~AudioDecoderFfmpeg()
79 avcodec_close(_audioCodecCtx
);
80 av_free(_audioCodecCtx
);
82 if (_parser
) av_parser_close(_parser
);
85 void AudioDecoderFfmpeg::setup(SoundInfo
& info
)
87 // Init the avdecoder-decoder
89 avcodec_register_all();// change this to only register need codec?
91 enum CodecID codec_id
;
93 switch(info
.getFormat()) {
95 codec_id
= CODEC_ID_PCM_U16LE
;
97 case AUDIO_CODEC_ADPCM
:
98 codec_id
= CODEC_ID_ADPCM_SWF
;
100 case AUDIO_CODEC_MP3
:
101 codec_id
= CODEC_ID_MP3
;
104 case AUDIO_CODEC_AAC
:
105 codec_id
= CODEC_ID_AAC
;
109 boost::format err
= boost::format(
110 _("Unsupported audio codec %d")) %
111 static_cast<int>(info
.getFormat());
112 throw MediaException(err
.str());
115 _audioCodec
= avcodec_find_decoder(codec_id
);
117 audioCodecType codec
= info
.getFormat();
118 boost::format err
= boost::format(
119 _("libavcodec could not find a decoder for codec %d (%s)")) %
120 static_cast<int>(codec
) % codec
;
121 throw MediaException(err
.str());
127 _parser
= av_parser_init(codec_id
);
129 throw MediaException(_("AudioDecoderFfmpeg can't initialize "
134 _audioCodecCtx
= avcodec_alloc_context();
135 if (!_audioCodecCtx
) {
136 throw MediaException(_("libavcodec couldn't allocate context"));
139 int ret
= avcodec_open(_audioCodecCtx
, _audioCodec
);
141 av_free(_audioCodecCtx
);
143 boost::format err
= boost::format(
144 _("AudioDecoderFfmpeg: avcodec_open failed to initialize "
145 "FFmpeg codec %s (%d)")) % _audioCodec
->name
% (int)codec_id
;
146 throw MediaException(err
.str());
149 log_debug(_("AudioDecoder: initialized FFMPEG codec %s (%d)"),
150 _audioCodec
->name
, (int)codec_id
);
152 /// @todo do this only if !_needsParsing ?
153 switch (_audioCodecCtx
->codec
->id
)
158 case CODEC_ID_PCM_U16LE
:
159 _audioCodecCtx
->channels
= (info
.isStereo() ? 2 : 1);
160 _audioCodecCtx
->sample_rate
= info
.getSampleRate();
161 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
; // ?! arbitrary ?
162 _audioCodecCtx
->frame_size
= 1;
166 _audioCodecCtx
->channels
= (info
.isStereo() ? 2 : 1);
167 _audioCodecCtx
->sample_rate
= info
.getSampleRate();
168 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
; // ?! arbitrary ?
173 void AudioDecoderFfmpeg::setup(const AudioInfo
& info
)
175 // Init the avdecoder-decoder
177 avcodec_register_all();// change this to only register need codec?
179 enum CodecID codec_id
= CODEC_ID_NONE
;
181 if (info
.type
== CODEC_TYPE_CUSTOM
)
183 codec_id
= static_cast<CodecID
>(info
.codec
);
185 else if (info
.type
== CODEC_TYPE_FLASH
)
190 case AUDIO_CODEC_UNCOMPRESSED
:
191 case AUDIO_CODEC_RAW
:
192 if (info
.sampleSize
== 2) {
193 codec_id
= CODEC_ID_PCM_S16LE
;
195 codec_id
= CODEC_ID_PCM_S8
;
199 case AUDIO_CODEC_ADPCM
:
200 codec_id
= CODEC_ID_ADPCM_SWF
;
203 case AUDIO_CODEC_MP3
:
204 codec_id
= CODEC_ID_MP3
;
207 case AUDIO_CODEC_AAC
:
208 codec_id
= CODEC_ID_AAC
;
211 #ifdef FFMPEG_NELLYMOSER
212 // NOTE: bjacques found this failing in decodeFrame
213 // (but probably not Ffmpeg's fault, he said)
214 // I'd like to take a look at the testcase --strk
215 case AUDIO_CODEC_NELLYMOSER
:
216 codec_id
= CODEC_ID_NELLYMOSER
;
221 boost::format err
= boost::format(
222 _("AudioDecoderFfmpeg: unsupported flash audio "
224 info
.codec
% (audioCodecType
)info
.codec
;
225 throw MediaException(err
.str());
230 boost::format err
= boost::format(
231 _("AudioDecoderFfmpeg: unknown codec type %d "
232 "(should never happen)")) % info
.type
;
233 throw MediaException(err
.str());
236 _audioCodec
= avcodec_find_decoder(codec_id
);
239 if (info
.type
== CODEC_TYPE_FLASH
) {
240 boost::format err
= boost::format(
241 _("AudioDecoderFfmpeg: libavcodec could not find a decoder "
242 "for codec %d (%s)")) %
243 info
.codec
% static_cast<audioCodecType
>(info
.codec
);
244 throw MediaException(err
.str());
246 boost::format err
= boost::format(
247 _("AudioDecoderFfmpeg: libavcodec could not find a decoder "
248 "for ffmpeg codec id %s")) % codec_id
;
249 throw MediaException(err
.str());
253 _parser
= av_parser_init(codec_id
);
254 _needsParsing
= (_parser
!= NULL
);
256 // Create an audioCodecCtx from the ffmpeg parser if exists/possible
257 _audioCodecCtx
= avcodec_alloc_context();
258 if (!_audioCodecCtx
) {
259 throw MediaException(_("AudioDecoderFfmpeg: libavcodec couldn't "
260 "allocate context"));
263 if ( info
.extra
.get() )
265 if (dynamic_cast<ExtraAudioInfoFfmpeg
*>(info
.extra
.get())) {
266 const ExtraAudioInfoFfmpeg
& ei
=
267 static_cast<ExtraAudioInfoFfmpeg
&>(*info
.extra
);
268 _audioCodecCtx
->extradata
= ei
.data
;
269 _audioCodecCtx
->extradata_size
= ei
.dataSize
;
270 } else if (dynamic_cast<ExtraAudioInfoFlv
*>(info
.extra
.get())) {
271 ExtraAudioInfoFlv
* extra
=
272 static_cast<ExtraAudioInfoFlv
*>(info
.extra
.get());
273 _audioCodecCtx
->extradata
= extra
->data
.get();
274 _audioCodecCtx
->extradata_size
= extra
->size
;
278 // Setup known configurations for the audio codec context
279 // NOTE: this is done before calling avcodec_open, as that might update
280 // some of the variables
286 case CODEC_ID_PCM_S8
:
287 // Either FFMPEG or the parser are getting this wrong.
288 _audioCodecCtx
->sample_rate
= info
.sampleRate
/ 2;
289 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
291 case CODEC_ID_PCM_S16LE
:
292 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
293 _audioCodecCtx
->sample_rate
= info
.sampleRate
;
297 _audioCodecCtx
->channels
= (info
.stereo
? 2 : 1);
298 _audioCodecCtx
->sample_rate
= info
.sampleRate
;
299 // was commented out (why?):
300 _audioCodecCtx
->sample_fmt
= SAMPLE_FMT_S16
;
305 #ifdef GNASH_DEBUG_AUDIO_DECODING
306 log_debug(" Opening codec");
307 #endif // GNASH_DEBUG_AUDIO_DECODING
308 int ret
= avcodec_open(_audioCodecCtx
, _audioCodec
);
310 //avcodec_close(_audioCodecCtx);
311 av_free(_audioCodecCtx
);
314 boost::format err
= boost::format(
315 _("AudioDecoderFfmpeg: avcodec_open failed to initialize "
316 "FFmpeg codec %s (%d)")) % _audioCodec
->name
% (int)codec_id
;
317 throw MediaException(err
.str());
323 AudioDecoderFfmpeg::decode(const boost::uint8_t* input
,
324 boost::uint32_t inputSize
, boost::uint32_t&
325 outputSize
, boost::uint32_t& decodedBytes
)
327 //GNASH_REPORT_FUNCTION;
329 size_t retCapacity
= AVCODEC_MAX_AUDIO_FRAME_SIZE
;
330 boost::uint8_t* retBuf
= new boost::uint8_t[retCapacity
];
333 #ifdef GNASH_DEBUG_AUDIO_DECODING
334 log_debug(" Parsing loop starts, input is %d bytes, retCapacity is %d "
335 "bytes", inputSize
, retCapacity
);
337 decodedBytes
= 0; // nothing decoded yet
338 while (decodedBytes
< inputSize
)
340 const boost::uint8_t* frame
=0; // parsed frame (pointer into input)
341 int framesize
; // parsed frame size
343 int consumed
= parseInput(input
+decodedBytes
,
344 inputSize
-decodedBytes
,
348 log_error(_("av_parser_parse returned %d. "
349 "Upgrading ffmpeg/libavcodec might fix this issue."),
351 // Setting data position to data size will get the sound removed
352 // from the active sound list later on.
353 decodedBytes
= inputSize
;
357 #ifdef GNASH_DEBUG_AUDIO_DECODING
358 log_debug(" parsed frame is %d bytes (consumed +%d = %d/%d)",
359 framesize
, consumed
, decodedBytes
+consumed
, inputSize
);
362 #if GNASH_PARANOIA_LEVEL > 1
365 // the returned frame pointer is inside the input buffer
366 assert(frame
== input
+decodedBytes
);
367 // the returned frame size is within the input size
368 assert(framesize
<= inputSize
);
372 // all good so far, keep going..
373 // (we might do this immediately, as we'll override decodedBytes
375 decodedBytes
+= consumed
;
379 // If nothing is consumed, this will fail. It can happen if a
380 // block is passed to the decoder when nothing can be
381 // parsed from the block. This is probably a malformed SWF.
382 //assert(decodedBytes == inputSize);
384 // NOTE: If this happens the caller sent us
385 // a block of data which is not composed
386 // by complete audio frames.
387 // Could be due to an error in the caller
388 // code, or to a malformed SWF...
389 // At time of writing this (2008-11-01)
390 // it is most likely an error in caller
391 // code (streaming sound/event sound)
392 // so we log an ERROR rather then a
393 // MALFORMED input. You can uncomment the
394 // abort below to check who is the caller
395 // with gdb. When callers are checked,
396 // we may turn this into a MALFORMED
397 // kind of error (DEFINESOUND, SOUNDSTREAMBLOCK
398 // or FLV AudioTag not containing full audio frames)
401 log_error(_("AudioDecoderFfmpeg: "
402 "could not find a complete frame in "
403 "the last %d bytes of input"
404 " (malformed SWF or FLV?)"),
411 // Now, decode the frame. We use the ::decodeFrame specialized function
412 // here so resampling is done appropriately
413 boost::uint32_t outSize
= 0;
414 boost::scoped_array
<boost::uint8_t> outBuf(
415 decodeFrame(frame
, framesize
, outSize
));
419 // Setting data position to data size will get the sound removed
420 // from the active sound list later on.
421 decodedBytes
= inputSize
;
425 #ifdef GNASH_DEBUG_AUDIO_DECODING
426 log_debug(" decoded frame is %d bytes, would grow return "
427 "buffer size to %d bytes", outSize
,
428 retBufSize
+static_cast<unsigned int>(outSize
));
432 // Now append this data to the buffer we're going to return
435 // if the new data doesn't fit, reallocate the output
436 // TODO: can use the Buffer class here.. if we return it too...
437 if ( retBufSize
+(unsigned)outSize
> retCapacity
)
439 #ifdef GNASH_DEBUG_AUDIO_DECODING
440 log_debug(" output buffer won't be able to hold %d bytes, "
441 "capacity is only %d bytes",
442 retBufSize
+(unsigned)outSize
, retCapacity
);
445 boost::uint8_t* tmp
= retBuf
;
446 retCapacity
= std::max(retBufSize
+static_cast<size_t>(outSize
),
449 #ifdef GNASH_DEBUG_AUDIO_DECODING
450 log_debug(" reallocating it to hold up to %d bytes",
452 #endif // GNASH_DEBUG_AUDIO_DECODING
454 retBuf
= new boost::uint8_t[retCapacity
];
455 if ( retBufSize
) std::copy(tmp
, tmp
+retBufSize
, retBuf
);
458 std::copy(outBuf
.get(), outBuf
.get()+outSize
, retBuf
+retBufSize
);
459 retBufSize
+= static_cast<unsigned int>(outSize
);
463 outputSize
= retBufSize
;
469 AudioDecoderFfmpeg::decode(const EncodedAudioFrame
& ef
,
470 boost::uint32_t& outputSize
)
472 return decodeFrame(ef
.data
.get(), ef
.dataSize
, outputSize
);
476 AudioDecoderFfmpeg::decodeFrame(const boost::uint8_t* input
,
477 boost::uint32_t inputSize
, boost::uint32_t& outputSize
)
479 //GNASH_REPORT_FUNCTION;
483 const size_t bufsize
= AVCODEC_MAX_AUDIO_FRAME_SIZE
;
485 // TODO: make this a private member, to reuse (see NetStreamFfmpeg in 0.8.3)
486 boost::uint8_t* output
;
488 output
= reinterpret_cast<boost::uint8_t*>(av_malloc(bufsize
));
490 log_error(_("failed to allocate audio buffer."));
495 boost::int16_t* outPtr
= reinterpret_cast<boost::int16_t*>(output
);
497 // We initialize output size to the full size
498 // then decoding will eventually reduce it
499 int outSize
= bufsize
;
501 #ifdef GNASH_DEBUG_AUDIO_DECODING
502 log_debug("AudioDecoderFfmpeg: about to decode %d bytes; "
503 "ctx->channels:%d, avctx->frame_size:%d",
504 inputSize
, _audioCodecCtx
->channels
, _audioCodecCtx
->frame_size
);
507 // older ffmpeg versions didn't accept a const input..
508 #if LIBAVCODEC_VERSION_MAJOR >= 53
510 av_init_packet(&pkt
);
511 pkt
.data
= (uint8_t*) input
;
512 pkt
.size
= inputSize
;
514 int tmp
= AVCODEC_DECODE_AUDIO(_audioCodecCtx
, outPtr
, &outSize
,
515 #if LIBAVCODEC_VERSION_MAJOR >= 53
521 #ifdef GNASH_DEBUG_AUDIO_DECODING
522 log_debug(" avcodec_decode_audio[2](ctx, bufptr, %d, input, %d) "
523 "returned %d; set frame_size=%d",
524 bufsize
, inputSize
, tmp
, outSize
);
528 log_error(_("avcodec_decode_audio returned %d. Upgrading "
529 "ffmpeg/libavcodec might fix this issue."), tmp
);
537 log_error(_("outputSize:%d after decoding %d bytes of input audio "
538 "data. Upgrading ffmpeg/libavcodec might fix this issue."),
539 outputSize
, inputSize
);
546 // Resampling is needed.
547 if (_resampler
.init(_audioCodecCtx
)) {
548 // Resampling is needed.
550 // Compute new size based on frame_size and
551 // resampling configuration
552 double resampleFactor
= (44100.0/_audioCodecCtx
->sample_rate
) * (2.0/_audioCodecCtx
->channels
);
553 bool stereo
= _audioCodecCtx
->channels
> 1 ? true : false;
554 int inSamples
= stereo
? outSize
>> 2 : outSize
>> 1;
556 int expectedMaxOutSamples
= std::ceil(inSamples
*resampleFactor
);
558 // *channels *sampleSize
559 int resampledFrameSize
= expectedMaxOutSamples
*2*2;
561 // Allocate just the required amount of bytes
562 boost::uint8_t* resampledOutput
= new boost::uint8_t[resampledFrameSize
];
564 #ifdef GNASH_DEBUG_AUDIO_DECODING
565 log_debug("Calling the resampler; resampleFactor:%d; "
566 "ouput to 44100hz, 2channels, %dbytes; "
567 "input is %dhz, %dchannels, %dbytes, %dsamples",
569 resampledFrameSize
, _audioCodecCtx
->sample_rate
,
570 _audioCodecCtx
->channels
, outSize
, inSamples
);
573 int outSamples
= _resampler
.resample(outPtr
, // input
574 reinterpret_cast<boost::int16_t*>(resampledOutput
), // output
575 inSamples
); // input..
577 #ifdef GNASH_DEBUG_AUDIO_DECODING
578 log_debug("resampler returned %d samples ", outSamples
);
581 // make sure to set outPtr *after* we use it as input to the resampler
582 outPtr
= reinterpret_cast<boost::int16_t*>(resampledOutput
);
586 if (expectedMaxOutSamples
< outSamples
) {
587 log_error(_(" --- Computation of resampled samples (%d) < then the actual returned samples (%d)"),
588 expectedMaxOutSamples
, outSamples
);
590 log_debug(" input frame size: %d", outSize
);
591 log_debug(" input sample rate: %d", _audioCodecCtx
->sample_rate
);
592 log_debug(" input channels: %d", _audioCodecCtx
->channels
);
593 log_debug(" input samples: %d", inSamples
);
595 log_debug(" output sample rate (assuming): %d", 44100);
596 log_debug(" output channels (assuming): %d", 2);
597 log_debug(" output samples: %d", outSamples
);
603 // Use the actual number of samples returned, multiplied
604 // to get size in bytes (not two-byte samples) and for
606 outSize
= outSamples
* 2 * 2;
610 boost::uint8_t* newOutput
= new boost::uint8_t[outSize
];
611 std::memcpy(newOutput
, output
, outSize
);
612 outPtr
= reinterpret_cast<boost::int16_t*>(newOutput
);
616 outputSize
= outSize
;
617 return reinterpret_cast<uint8_t*>(outPtr
);
621 AudioDecoderFfmpeg::parseInput(const boost::uint8_t* input
,
622 boost::uint32_t inputSize
,
623 boost::uint8_t const ** outFrame
, int* outFrameSize
)
627 #if LIBAVCODEC_VERSION_MAJOR >= 53
628 return av_parser_parse2(_parser
, _audioCodecCtx
,
630 return av_parser_parse(_parser
, _audioCodecCtx
,
632 // as of 2008-10-28 SVN, ffmpeg doesn't
633 // accept a pointer to pointer to const..
634 const_cast<boost::uint8_t**>(outFrame
),
637 #if LIBAVCODEC_VERSION_MAJOR >= 53
638 0, 0, AV_NOPTS_VALUE
); // pts, dts, pos
645 // democratic value for a chunk to decode...
646 // @todo this might be constrained by codec id, check that !
648 // NOTE: AVCODEC_MAX_AUDIO_FRAME_SIZE resulted bigger
649 // than avcodec_decode_audio could handle, resulting
650 // in eventSoundTest1.swf regression.
651 //static const unsigned int maxFrameSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;
653 // NOTE: 1024 resulted too few
654 // to properly decode (or resample?) raw audio
655 // thus resulting noisy (bugs #21177 and #22284)
656 //static const unsigned int maxFrameSize = 1024;
658 // NOTE: 96000 was found to be the max returned
659 // by avcodec_decode_audio when passed anything
660 // bigger than that. Works fine with all of
661 // eventSoundTest1.swf, bug #21177 and bug #22284
663 static const unsigned int maxFrameSize
= 96000;
665 int frameSize
= inputSize
< maxFrameSize
? inputSize
: maxFrameSize
;
667 // we assume the input is just a set of frames
668 // and we'll consume all
669 *outFrame
= input
; // frame always start on input
670 *outFrameSize
= frameSize
;
671 int parsed
= frameSize
;
677 } // gnash.media.ffmpeg namespace
678 } // gnash.media namespace