demuxer.c: Add support for parsing LATM
[mplayer.git] / libmpdemux / demux_rtp_codec.cpp
blob86d41ddae1b3ebea7a6e36f6a060c555d0ac6755
1 /*
2 * codec-specific routines used to interface between MPlayer
3 * and the "LIVE555 Streaming Media" libraries
5 * This file is part of MPlayer.
7 * MPlayer is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * MPlayer is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #include "demux_rtp_internal.h"
23 extern "C" {
24 #include <limits.h>
25 #include <math.h>
26 #include "stheader.h"
27 #include "libavutil/base64.h"
30 #ifdef CONFIG_FFMPEG
31 AVCodecParserContext * h264parserctx;
32 AVCodecContext *avcctx;
33 #endif
35 // Copied from vlc
36 static unsigned char* parseH264ConfigStr( char const* configStr,
37 unsigned int& configSize )
40 char *dup, *psz;
41 int i, i_records = 1;
43 if( configSize )
44 configSize = 0;
45 if( configStr == NULL || *configStr == '\0' )
46 return NULL;
47 psz = dup = strdup( configStr );
49 /* Count the number of comma's */
50 for( psz = dup; *psz != '\0'; ++psz )
52 if( *psz == ',')
54 ++i_records;
55 *psz = '\0';
59 unsigned char *cfg = new unsigned char[5 * strlen(dup)];
60 psz = dup;
61 for( i = 0; i < i_records; i++ )
64 cfg[configSize++] = 0x00;
65 cfg[configSize++] = 0x00;
66 cfg[configSize++] = 0x01;
67 configSize += av_base64_decode( (uint8_t*)&cfg[configSize],
68 psz,
69 5 * strlen(dup) - 3 );
71 psz += strlen(psz)+1;
73 if( dup ) free( dup );
75 return cfg;
78 static void
79 needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
80 static Boolean
81 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
82 unsigned& fourcc); // forward
83 static Boolean
84 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
85 unsigned& fourcc, unsigned& numChannels); // forward
87 static BITMAPINFOHEADER * insertVideoExtradata(BITMAPINFOHEADER *bih,
88 unsigned char * extraData,
89 unsigned size)
91 BITMAPINFOHEADER * original = bih;
92 if (!size || size > INT_MAX - sizeof(BITMAPINFOHEADER))
93 return bih;
94 bih = (BITMAPINFOHEADER*)realloc(bih, sizeof(BITMAPINFOHEADER) + size);
95 if (!bih)
96 return original;
97 bih->biSize = sizeof(BITMAPINFOHEADER) + size;
98 memcpy(bih+1, extraData, size);
99 return bih;
102 void rtpCodecInitialize_video(demuxer_t* demuxer,
103 MediaSubsession* subsession,
104 unsigned& flags) {
105 flags = 0;
106 // Create a dummy video stream header
107 // to make the main MPlayer code happy:
108 sh_video_t* sh_video = new_sh_video(demuxer,0);
109 BITMAPINFOHEADER* bih
110 = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
111 bih->biSize = sizeof(BITMAPINFOHEADER);
112 sh_video->bih = bih;
113 demux_stream_t* d_video = demuxer->video;
114 d_video->sh = sh_video; sh_video->ds = d_video;
116 // Map known video MIME types to the BITMAPINFOHEADER parameters
117 // that this program uses. (Note that not all types need all
118 // of the parameters to be set.)
119 if (strcmp(subsession->codecName(), "MPV") == 0) {
120 flags |= RTPSTATE_IS_MPEG12_VIDEO;
121 } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
122 strcmp(subsession->codecName(), "MP2T") == 0) {
123 flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
124 } else if (strcmp(subsession->codecName(), "H263") == 0 ||
125 strcmp(subsession->codecName(), "H263-2000") == 0 ||
126 strcmp(subsession->codecName(), "H263-1998") == 0) {
127 bih->biCompression = sh_video->format
128 = mmioFOURCC('H','2','6','3');
129 needVideoFrameRate(demuxer, subsession);
130 } else if (strcmp(subsession->codecName(), "H264") == 0) {
131 bih->biCompression = sh_video->format
132 = mmioFOURCC('H','2','6','4');
133 unsigned int configLen = 0;
134 unsigned char* configData
135 = parseH264ConfigStr(subsession->fmtp_spropparametersets(), configLen);
136 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
137 #ifdef CONFIG_FFMPEG
138 int fooLen;
139 const uint8_t* fooData;
140 avcodec_register_all();
141 h264parserctx = av_parser_init(CODEC_ID_H264);
142 avcctx = avcodec_alloc_context();
143 // Pass the config to the parser
144 h264parserctx->parser->parser_parse(h264parserctx, avcctx,
145 &fooData, &fooLen, configData, configLen);
146 #endif
147 delete[] configData;
148 needVideoFrameRate(demuxer, subsession);
149 } else if (strcmp(subsession->codecName(), "H261") == 0) {
150 bih->biCompression = sh_video->format
151 = mmioFOURCC('H','2','6','1');
152 needVideoFrameRate(demuxer, subsession);
153 } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
154 bih->biCompression = sh_video->format
155 = mmioFOURCC('M','J','P','G');
156 needVideoFrameRate(demuxer, subsession);
157 } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
158 bih->biCompression = sh_video->format
159 = mmioFOURCC('m','p','4','v');
160 // For the codec to work correctly, it may need a 'VOL Header' to be
161 // inserted at the front of the data stream. Construct this from the
162 // "config" MIME parameter, which was present (hopefully) in the
163 // session's SDP description:
164 unsigned configLen;
165 unsigned char* configData
166 = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
167 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
168 needVideoFrameRate(demuxer, subsession);
169 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
170 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
171 // QuickTime generic RTP format, as described in
172 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
174 // We can't initialize this stream until we've received the first packet
175 // that has QuickTime "sdAtom" information in the header. So, keep
176 // reading packets until we get one:
177 unsigned char* packetData; unsigned packetDataLen; float pts;
178 QuickTimeGenericRTPSource* qtRTPSource
179 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
180 unsigned fourcc;
181 do {
182 if (!awaitRTPPacket(demuxer, demuxer->video,
183 packetData, packetDataLen, pts)) {
184 return;
186 } while (!parseQTState_video(qtRTPSource->qtState, fourcc));
188 bih->biCompression = sh_video->format = fourcc;
189 bih->biWidth = qtRTPSource->qtState.width;
190 bih->biHeight = qtRTPSource->qtState.height;
191 if (qtRTPSource->qtState.sdAtomSize > 83)
192 bih->biBitCount = qtRTPSource->qtState.sdAtom[83];
193 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 86;
194 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
195 + qtRTPSource->qtState.sdAtomSize;
196 while (pos+8 < endpos) {
197 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
198 if (atomLength == 0 || atomLength > endpos-pos) break;
199 if (((!memcmp(pos+4, "avcC", 4) && fourcc==mmioFOURCC('a','v','c','1')) ||
200 !memcmp(pos+4, "esds", 4) ||
201 (!memcmp(pos+4, "SMI ", 4) && fourcc==mmioFOURCC('S','V','Q','3'))) &&
202 atomLength > 8) {
203 sh_video->bih = bih =
204 insertVideoExtradata(bih, pos+8, atomLength-8);
205 break;
207 pos += atomLength;
209 needVideoFrameRate(demuxer, subsession);
210 } else {
211 fprintf(stderr,
212 "Unknown MPlayer format code for MIME type \"video/%s\"\n",
213 subsession->codecName());
217 void rtpCodecInitialize_audio(demuxer_t* demuxer,
218 MediaSubsession* subsession,
219 unsigned& flags) {
220 flags = 0;
221 // Create a dummy audio stream header
222 // to make the main MPlayer code happy:
223 sh_audio_t* sh_audio = new_sh_audio(demuxer,0);
224 WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
225 sh_audio->wf = wf;
226 demux_stream_t* d_audio = demuxer->audio;
227 d_audio->sh = sh_audio; sh_audio->ds = d_audio;
228 d_audio->id = sh_audio->aid;
230 wf->nChannels = subsession->numChannels();
232 // Map known audio MIME types to the WAVEFORMATEX parameters
233 // that this program uses. (Note that not all types need all
234 // of the parameters to be set.)
235 wf->nSamplesPerSec
236 = subsession->rtpSource()->timestampFrequency(); // by default
237 if (strcmp(subsession->codecName(), "MPA") == 0 ||
238 strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
239 strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
240 wf->wFormatTag = sh_audio->format = 0x55;
241 // Note: 0x55 is for layer III, but should work for I,II also
242 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
243 } else if (strcmp(subsession->codecName(), "AC3") == 0) {
244 wf->wFormatTag = sh_audio->format = 0x2000;
245 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
246 } else if (strcmp(subsession->codecName(), "L16") == 0) {
247 wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
248 wf->nBlockAlign = 1;
249 wf->wBitsPerSample = 16;
250 wf->cbSize = 0;
251 } else if (strcmp(subsession->codecName(), "L8") == 0) {
252 wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
253 wf->nBlockAlign = 1;
254 wf->wBitsPerSample = 8;
255 wf->cbSize = 0;
256 } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
257 wf->wFormatTag = sh_audio->format = 0x7;
258 wf->nAvgBytesPerSec = 8000;
259 wf->nBlockAlign = 1;
260 wf->wBitsPerSample = 8;
261 wf->cbSize = 0;
262 } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
263 wf->wFormatTag = sh_audio->format = 0x6;
264 wf->nAvgBytesPerSec = 8000;
265 wf->nBlockAlign = 1;
266 wf->wBitsPerSample = 8;
267 wf->cbSize = 0;
268 } else if (strcmp(subsession->codecName(), "AMR") == 0) {
269 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','m','r');
270 } else if (strcmp(subsession->codecName(), "AMR-WB") == 0) {
271 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','w','b');
272 } else if (strcmp(subsession->codecName(), "GSM") == 0) {
273 wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
274 wf->nAvgBytesPerSec = 1650;
275 wf->nBlockAlign = 33;
276 wf->wBitsPerSample = 16;
277 wf->cbSize = 0;
278 } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
279 wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
280 wf->nAvgBytesPerSec = 1750;
281 wf->nBlockAlign = 35;
282 wf->wBitsPerSample = 16;
283 wf->cbSize = 0;
284 } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
285 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
286 // For the codec to work correctly, it needs "AudioSpecificConfig"
287 // data, which is parsed from the "StreamMuxConfig" string that
288 // was present (hopefully) in the SDP description:
289 unsigned codecdata_len;
290 sh_audio->codecdata
291 = parseStreamMuxConfigStr(subsession->fmtp_config(),
292 codecdata_len);
293 sh_audio->codecdata_len = codecdata_len;
294 //faad doesn't understand LATM's data length field, so omit it
295 ((MPEG4LATMAudioRTPSource*)subsession->rtpSource())->omitLATMDataLengthField();
296 } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
297 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
298 // For the codec to work correctly, it needs "AudioSpecificConfig"
299 // data, which was present (hopefully) in the SDP description:
300 unsigned codecdata_len;
301 sh_audio->codecdata
302 = parseGeneralConfigStr(subsession->fmtp_config(),
303 codecdata_len);
304 sh_audio->codecdata_len = codecdata_len;
305 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
306 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
307 // QuickTime generic RTP format, as described in
308 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
310 // We can't initialize this stream until we've received the first packet
311 // that has QuickTime "sdAtom" information in the header. So, keep
312 // reading packets until we get one:
313 unsigned char* packetData; unsigned packetDataLen; float pts;
314 QuickTimeGenericRTPSource* qtRTPSource
315 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
316 unsigned fourcc, numChannels;
317 do {
318 if (!awaitRTPPacket(demuxer, demuxer->audio,
319 packetData, packetDataLen, pts)) {
320 return;
322 } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));
324 wf->wFormatTag = sh_audio->format = fourcc;
325 wf->nChannels = numChannels;
327 if (qtRTPSource->qtState.sdAtomSize > 33) {
328 wf->wBitsPerSample = qtRTPSource->qtState.sdAtom[27];
329 wf->nSamplesPerSec = qtRTPSource->qtState.sdAtom[32]<<8|qtRTPSource->qtState.sdAtom[33];
331 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 52;
332 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
333 + qtRTPSource->qtState.sdAtomSize;
334 while (pos+8 < endpos) {
335 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
336 if (atomLength == 0 || atomLength > endpos-pos) break;
337 if (!memcmp(pos+4, "wave", 4) && fourcc==mmioFOURCC('Q','D','M','2') &&
338 atomLength > 8 &&
339 atomLength <= INT_MAX) {
340 sh_audio->codecdata = (unsigned char*) malloc(atomLength-8);
341 if (sh_audio->codecdata) {
342 memcpy(sh_audio->codecdata, pos+8, atomLength-8);
343 sh_audio->codecdata_len = atomLength-8;
345 break;
347 pos += atomLength;
349 } else {
350 fprintf(stderr,
351 "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
352 subsession->codecName());
356 static void needVideoFrameRate(demuxer_t* demuxer,
357 MediaSubsession* subsession) {
358 // For some codecs, MPlayer's decoding software can't (or refuses to :-)
359 // figure out the frame rate by itself, so (unless the user specifies
360 // it manually, using "-fps") we figure it out ourselves here, using the
361 // presentation timestamps in successive packets,
362 extern double force_fps; if (force_fps != 0.0) return; // user used "-fps"
364 demux_stream_t* d_video = demuxer->video;
365 sh_video_t* sh_video = (sh_video_t*)(d_video->sh);
367 // If we already know the subsession's video frame rate, use it:
368 int fps = (int)(subsession->videoFPS());
369 if (fps != 0) {
370 sh_video->fps = fps;
371 sh_video->frametime = 1.0f/fps;
372 return;
375 // Keep looking at incoming frames until we see two with different,
376 // non-zero "pts" timestamps:
377 unsigned char* packetData; unsigned packetDataLen;
378 float lastPTS = 0.0, curPTS;
379 unsigned const maxNumFramesToWaitFor = 300;
380 int lastfps = 0;
381 for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
382 if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
383 break;
386 if (curPTS != lastPTS && lastPTS != 0.0) {
387 // Use the difference between these two "pts"s to guess the frame rate.
388 // (should really check that there were no missing frames inbetween)#####
389 // Guess the frame rate as an integer. If it's not, use "-fps" instead.
390 fps = (int)(1/fabs(curPTS-lastPTS) + 0.5); // rounding
391 if (fps == lastfps) {
392 fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
393 sh_video->fps = fps;
394 sh_video->frametime=1.0f/fps;
395 return;
397 if (fps>lastfps) lastfps = fps;
399 lastPTS = curPTS;
401 fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
404 static Boolean
405 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
406 unsigned& fourcc) {
407 // qtState's "sdAtom" field is supposed to contain a QuickTime video
408 // 'sample description' atom. This atom's name is the 'fourcc' that we want:
409 char const* sdAtom = qtState.sdAtom;
410 if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;
412 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
413 return True;
416 static Boolean
417 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
418 unsigned& fourcc, unsigned& numChannels) {
419 // qtState's "sdAtom" field is supposed to contain a QuickTime audio
420 // 'sample description' atom. This atom's name is the 'fourcc' that we want.
421 // Also, the top half of the 5th word following the atom name should
422 // contain the number of channels ("numChannels") that we want:
423 char const* sdAtom = qtState.sdAtom;
424 if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;
426 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
428 char const* word7Ptr = &sdAtom[6*4];
429 numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
430 return True;