options: change --input=keylist, cmdlist implementation
[mplayer.git] / libmpdemux / demux_rtp_codec.cpp
blobcb21e6b633661d0b55051671fe4931932e0df53b
1 /*
2 * codec-specific routines used to interface between MPlayer
3 * and the "LIVE555 Streaming Media" libraries
5 * This file is part of MPlayer.
7 * MPlayer is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * MPlayer is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #include "demux_rtp_internal.h"
23 extern "C" {
24 #include <limits.h>
25 #include <math.h>
26 #include "stheader.h"
27 #include "libavutil/base64.h"
30 AVCodecParserContext * h264parserctx;
31 AVCodecContext *avcctx;
33 // Copied from vlc
34 static unsigned char* parseH264ConfigStr( char const* configStr,
35 unsigned int& configSize )
38 char *dup, *psz;
39 int i, i_records = 1;
41 if( configSize )
42 configSize = 0;
43 if( configStr == NULL || *configStr == '\0' )
44 return NULL;
45 psz = dup = strdup( configStr );
47 /* Count the number of comma's */
48 for( psz = dup; *psz != '\0'; ++psz )
50 if( *psz == ',')
52 ++i_records;
53 *psz = '\0';
57 unsigned char *cfg = new unsigned char[5 * strlen(dup)];
58 psz = dup;
59 for( i = 0; i < i_records; i++ )
62 cfg[configSize++] = 0x00;
63 cfg[configSize++] = 0x00;
64 cfg[configSize++] = 0x01;
65 configSize += av_base64_decode( (uint8_t*)&cfg[configSize],
66 psz,
67 5 * strlen(dup) - 3 );
69 psz += strlen(psz)+1;
71 free( dup );
73 return cfg;
76 static void
77 needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
78 static Boolean
79 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
80 unsigned& fourcc); // forward
81 static Boolean
82 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
83 unsigned& fourcc, unsigned& numChannels); // forward
85 static BITMAPINFOHEADER * insertVideoExtradata(BITMAPINFOHEADER *bih,
86 unsigned char * extraData,
87 unsigned size)
89 BITMAPINFOHEADER * original = bih;
90 if (!size || size > INT_MAX - sizeof(BITMAPINFOHEADER))
91 return bih;
92 bih = (BITMAPINFOHEADER*)realloc(bih, sizeof(BITMAPINFOHEADER) + size);
93 if (!bih)
94 return original;
95 bih->biSize = sizeof(BITMAPINFOHEADER) + size;
96 memcpy(bih+1, extraData, size);
97 return bih;
100 void rtpCodecInitialize_video(demuxer_t* demuxer,
101 MediaSubsession* subsession,
102 unsigned& flags) {
103 flags = 0;
104 // Create a dummy video stream header
105 // to make the main MPlayer code happy:
106 sh_video_t* sh_video = new_sh_video(demuxer,0);
107 BITMAPINFOHEADER* bih
108 = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
109 bih->biSize = sizeof(BITMAPINFOHEADER);
110 sh_video->bih = bih;
111 demux_stream_t* d_video = demuxer->video;
112 d_video->sh = sh_video; sh_video->ds = d_video;
114 // Map known video MIME types to the BITMAPINFOHEADER parameters
115 // that this program uses. (Note that not all types need all
116 // of the parameters to be set.)
117 if (strcmp(subsession->codecName(), "MPV") == 0) {
118 flags |= RTPSTATE_IS_MPEG12_VIDEO;
119 } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
120 strcmp(subsession->codecName(), "MP2T") == 0) {
121 flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
122 } else if (strcmp(subsession->codecName(), "H263") == 0 ||
123 strcmp(subsession->codecName(), "H263-2000") == 0 ||
124 strcmp(subsession->codecName(), "H263-1998") == 0) {
125 bih->biCompression = sh_video->format
126 = mmioFOURCC('H','2','6','3');
127 needVideoFrameRate(demuxer, subsession);
128 } else if (strcmp(subsession->codecName(), "H264") == 0) {
129 bih->biCompression = sh_video->format
130 = mmioFOURCC('H','2','6','4');
131 unsigned int configLen = 0;
132 unsigned char* configData
133 = parseH264ConfigStr(subsession->fmtp_spropparametersets(), configLen);
134 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
135 int fooLen;
136 const uint8_t* fooData;
137 h264parserctx = av_parser_init(CODEC_ID_H264);
138 avcctx = avcodec_alloc_context3(NULL);
139 // Pass the config to the parser
140 h264parserctx->parser->parser_parse(h264parserctx, avcctx,
141 &fooData, &fooLen, configData, configLen);
142 delete[] configData;
143 needVideoFrameRate(demuxer, subsession);
144 } else if (strcmp(subsession->codecName(), "H261") == 0) {
145 bih->biCompression = sh_video->format
146 = mmioFOURCC('H','2','6','1');
147 needVideoFrameRate(demuxer, subsession);
148 } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
149 bih->biCompression = sh_video->format
150 = mmioFOURCC('M','J','P','G');
151 needVideoFrameRate(demuxer, subsession);
152 } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
153 bih->biCompression = sh_video->format
154 = mmioFOURCC('m','p','4','v');
155 // For the codec to work correctly, it may need a 'VOL Header' to be
156 // inserted at the front of the data stream. Construct this from the
157 // "config" MIME parameter, which was present (hopefully) in the
158 // session's SDP description:
159 unsigned configLen;
160 unsigned char* configData
161 = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
162 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
163 needVideoFrameRate(demuxer, subsession);
164 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
165 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
166 // QuickTime generic RTP format, as described in
167 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
169 // We can't initialize this stream until we've received the first packet
170 // that has QuickTime "sdAtom" information in the header. So, keep
171 // reading packets until we get one:
172 unsigned char* packetData; unsigned packetDataLen; float pts;
173 QuickTimeGenericRTPSource* qtRTPSource
174 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
175 unsigned fourcc;
176 do {
177 if (!awaitRTPPacket(demuxer, demuxer->video,
178 packetData, packetDataLen, pts)) {
179 return;
181 } while (!parseQTState_video(qtRTPSource->qtState, fourcc));
183 bih->biCompression = sh_video->format = fourcc;
184 bih->biWidth = qtRTPSource->qtState.width;
185 bih->biHeight = qtRTPSource->qtState.height;
186 if (qtRTPSource->qtState.sdAtomSize > 83)
187 bih->biBitCount = qtRTPSource->qtState.sdAtom[83];
188 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 86;
189 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
190 + qtRTPSource->qtState.sdAtomSize;
191 while (pos+8 < endpos) {
192 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
193 if (atomLength == 0 || atomLength > endpos-pos) break;
194 if (((!memcmp(pos+4, "avcC", 4) && fourcc==mmioFOURCC('a','v','c','1')) ||
195 !memcmp(pos+4, "esds", 4) ||
196 (!memcmp(pos+4, "SMI ", 4) && fourcc==mmioFOURCC('S','V','Q','3'))) &&
197 atomLength > 8) {
198 sh_video->bih = bih =
199 insertVideoExtradata(bih, pos+8, atomLength-8);
200 break;
202 pos += atomLength;
204 needVideoFrameRate(demuxer, subsession);
205 } else {
206 fprintf(stderr,
207 "Unknown MPlayer format code for MIME type \"video/%s\"\n",
208 subsession->codecName());
212 void rtpCodecInitialize_audio(demuxer_t* demuxer,
213 MediaSubsession* subsession,
214 unsigned& flags) {
215 flags = 0;
216 // Create a dummy audio stream header
217 // to make the main MPlayer code happy:
218 sh_audio_t* sh_audio = new_sh_audio(demuxer,0);
219 WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
220 sh_audio->wf = wf;
221 demux_stream_t* d_audio = demuxer->audio;
222 d_audio->sh = sh_audio; sh_audio->ds = d_audio;
223 d_audio->id = sh_audio->aid;
225 wf->nChannels = subsession->numChannels();
227 // Map known audio MIME types to the WAVEFORMATEX parameters
228 // that this program uses. (Note that not all types need all
229 // of the parameters to be set.)
230 wf->nSamplesPerSec
231 = subsession->rtpSource()->timestampFrequency(); // by default
232 if (strcmp(subsession->codecName(), "MPA") == 0 ||
233 strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
234 strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
235 wf->wFormatTag = sh_audio->format = 0x55;
236 // Note: 0x55 is for layer III, but should work for I,II also
237 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
238 } else if (strcmp(subsession->codecName(), "AC3") == 0) {
239 wf->wFormatTag = sh_audio->format = 0x2000;
240 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
241 } else if (strcmp(subsession->codecName(), "L16") == 0) {
242 wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
243 wf->nBlockAlign = 1;
244 wf->wBitsPerSample = 16;
245 wf->cbSize = 0;
246 } else if (strcmp(subsession->codecName(), "L8") == 0) {
247 wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
248 wf->nBlockAlign = 1;
249 wf->wBitsPerSample = 8;
250 wf->cbSize = 0;
251 } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
252 wf->wFormatTag = sh_audio->format = 0x7;
253 wf->nAvgBytesPerSec = 8000;
254 wf->nBlockAlign = 1;
255 wf->wBitsPerSample = 8;
256 wf->cbSize = 0;
257 } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
258 wf->wFormatTag = sh_audio->format = 0x6;
259 wf->nAvgBytesPerSec = 8000;
260 wf->nBlockAlign = 1;
261 wf->wBitsPerSample = 8;
262 wf->cbSize = 0;
263 } else if (strcmp(subsession->codecName(), "AMR") == 0) {
264 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','m','r');
265 } else if (strcmp(subsession->codecName(), "AMR-WB") == 0) {
266 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','w','b');
267 } else if (strcmp(subsession->codecName(), "GSM") == 0) {
268 wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
269 wf->nAvgBytesPerSec = 1650;
270 wf->nBlockAlign = 33;
271 wf->wBitsPerSample = 16;
272 wf->cbSize = 0;
273 } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
274 wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
275 wf->nAvgBytesPerSec = 1750;
276 wf->nBlockAlign = 35;
277 wf->wBitsPerSample = 16;
278 wf->cbSize = 0;
279 } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
280 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
281 // For the codec to work correctly, it needs "AudioSpecificConfig"
282 // data, which is parsed from the "StreamMuxConfig" string that
283 // was present (hopefully) in the SDP description:
284 unsigned codecdata_len;
285 sh_audio->codecdata
286 = parseStreamMuxConfigStr(subsession->fmtp_config(),
287 codecdata_len);
288 sh_audio->codecdata_len = codecdata_len;
289 //faad doesn't understand LATM's data length field, so omit it
290 ((MPEG4LATMAudioRTPSource*)subsession->rtpSource())->omitLATMDataLengthField();
291 } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
292 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
293 // For the codec to work correctly, it needs "AudioSpecificConfig"
294 // data, which was present (hopefully) in the SDP description:
295 unsigned codecdata_len;
296 sh_audio->codecdata
297 = parseGeneralConfigStr(subsession->fmtp_config(),
298 codecdata_len);
299 sh_audio->codecdata_len = codecdata_len;
300 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
301 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
302 // QuickTime generic RTP format, as described in
303 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
305 // We can't initialize this stream until we've received the first packet
306 // that has QuickTime "sdAtom" information in the header. So, keep
307 // reading packets until we get one:
308 unsigned char* packetData; unsigned packetDataLen; float pts;
309 QuickTimeGenericRTPSource* qtRTPSource
310 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
311 unsigned fourcc, numChannels;
312 do {
313 if (!awaitRTPPacket(demuxer, demuxer->audio,
314 packetData, packetDataLen, pts)) {
315 return;
317 } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));
319 wf->wFormatTag = sh_audio->format = fourcc;
320 wf->nChannels = numChannels;
322 if (qtRTPSource->qtState.sdAtomSize > 33) {
323 wf->wBitsPerSample = qtRTPSource->qtState.sdAtom[27];
324 wf->nSamplesPerSec = qtRTPSource->qtState.sdAtom[32]<<8|qtRTPSource->qtState.sdAtom[33];
326 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 52;
327 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
328 + qtRTPSource->qtState.sdAtomSize;
329 while (pos+8 < endpos) {
330 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
331 if (atomLength == 0 || atomLength > endpos-pos) break;
332 if (!memcmp(pos+4, "wave", 4) && fourcc==mmioFOURCC('Q','D','M','2') &&
333 atomLength > 8 &&
334 atomLength <= INT_MAX) {
335 sh_audio->codecdata = (unsigned char*) malloc(atomLength-8);
336 if (sh_audio->codecdata) {
337 memcpy(sh_audio->codecdata, pos+8, atomLength-8);
338 sh_audio->codecdata_len = atomLength-8;
340 break;
342 pos += atomLength;
344 } else {
345 fprintf(stderr,
346 "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
347 subsession->codecName());
351 static void needVideoFrameRate(demuxer_t* demuxer,
352 MediaSubsession* subsession) {
353 // For some codecs, MPlayer's decoding software can't (or refuses to :-)
354 // figure out the frame rate by itself, so (unless the user specifies
355 // it manually, using "-fps") we figure it out ourselves here, using the
356 // presentation timestamps in successive packets,
357 extern double force_fps; if (force_fps != 0.0) return; // user used "-fps"
359 demux_stream_t* d_video = demuxer->video;
360 sh_video_t* sh_video = (sh_video_t*)(d_video->sh);
362 // If we already know the subsession's video frame rate, use it:
363 int fps = (int)(subsession->videoFPS());
364 if (fps != 0) {
365 sh_video->fps = fps;
366 sh_video->frametime = 1.0f/fps;
367 return;
370 // Keep looking at incoming frames until we see two with different,
371 // non-zero "pts" timestamps:
372 unsigned char* packetData; unsigned packetDataLen;
373 float lastPTS = 0.0, curPTS;
374 unsigned const maxNumFramesToWaitFor = 300;
375 int lastfps = 0;
376 for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
377 if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
378 break;
381 if (curPTS != lastPTS && lastPTS != 0.0) {
382 // Use the difference between these two "pts"s to guess the frame rate.
383 // (should really check that there were no missing frames inbetween)#####
384 // Guess the frame rate as an integer. If it's not, use "-fps" instead.
385 fps = (int)(1/fabs(curPTS-lastPTS) + 0.5); // rounding
386 if (fps == lastfps) {
387 fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
388 sh_video->fps = fps;
389 sh_video->frametime=1.0f/fps;
390 return;
392 if (fps>lastfps) lastfps = fps;
394 lastPTS = curPTS;
396 fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
399 static Boolean
400 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
401 unsigned& fourcc) {
402 // qtState's "sdAtom" field is supposed to contain a QuickTime video
403 // 'sample description' atom. This atom's name is the 'fourcc' that we want:
404 char const* sdAtom = qtState.sdAtom;
405 if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;
407 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
408 return True;
411 static Boolean
412 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
413 unsigned& fourcc, unsigned& numChannels) {
414 // qtState's "sdAtom" field is supposed to contain a QuickTime audio
415 // 'sample description' atom. This atom's name is the 'fourcc' that we want.
416 // Also, the top half of the 5th word following the atom name should
417 // contain the number of channels ("numChannels") that we want:
418 char const* sdAtom = qtState.sdAtom;
419 if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;
421 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
423 char const* word7Ptr = &sdAtom[6*4];
424 numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
425 return True;