At least direct3d vo supports -xineramascreen, too
[mplayer/glamo.git] / libmpdemux / demux_rtp_codec.cpp
blob15d9fd0be400fad7862d48a47e85cd533ee2f37c
1 ////////// Codec-specific routines used to interface between "MPlayer"
2 ////////// and the "LIVE555 Streaming Media" libraries:
4 #include "demux_rtp_internal.h"
5 extern "C" {
6 #include <limits.h>
7 #include <math.h>
8 #include "stheader.h"
9 #include "libavutil/base64.h"
12 #ifdef CONFIG_LIBAVCODEC
13 AVCodecParserContext * h264parserctx;
14 #endif
16 // Copied from vlc
17 static unsigned char* parseH264ConfigStr( char const* configStr,
18 unsigned int& configSize )
21 char *dup, *psz;
22 int i, i_records = 1;
24 if( configSize )
25 configSize = 0;
26 if( configStr == NULL || *configStr == '\0' )
27 return NULL;
28 psz = dup = strdup( configStr );
30 /* Count the number of comma's */
31 for( psz = dup; *psz != '\0'; ++psz )
33 if( *psz == ',')
35 ++i_records;
36 *psz = '\0';
40 unsigned char *cfg = new unsigned char[5 * strlen(dup)];
41 psz = dup;
42 for( i = 0; i < i_records; i++ )
45 cfg[configSize++] = 0x00;
46 cfg[configSize++] = 0x00;
47 cfg[configSize++] = 0x01;
48 configSize += av_base64_decode( (uint8_t*)&cfg[configSize],
49 psz,
50 5 * strlen(dup) - 3 );
52 psz += strlen(psz)+1;
54 if( dup ) free( dup );
56 return cfg;
59 static void
60 needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
61 static Boolean
62 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
63 unsigned& fourcc); // forward
64 static Boolean
65 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
66 unsigned& fourcc, unsigned& numChannels); // forward
68 static BITMAPINFOHEADER * insertVideoExtradata(BITMAPINFOHEADER *bih,
69 unsigned char * extraData,
70 unsigned size)
72 BITMAPINFOHEADER * original = bih;
73 if (!size || size > INT_MAX - sizeof(BITMAPINFOHEADER))
74 return bih;
75 bih = (BITMAPINFOHEADER*)realloc(bih, sizeof(BITMAPINFOHEADER) + size);
76 if (!bih)
77 return original;
78 bih->biSize = sizeof(BITMAPINFOHEADER) + size;
79 memcpy(bih+1, extraData, size);
80 return bih;
83 void rtpCodecInitialize_video(demuxer_t* demuxer,
84 MediaSubsession* subsession,
85 unsigned& flags) {
86 flags = 0;
87 // Create a dummy video stream header
88 // to make the main MPlayer code happy:
89 sh_video_t* sh_video = new_sh_video(demuxer,0);
90 BITMAPINFOHEADER* bih
91 = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
92 bih->biSize = sizeof(BITMAPINFOHEADER);
93 sh_video->bih = bih;
94 demux_stream_t* d_video = demuxer->video;
95 d_video->sh = sh_video; sh_video->ds = d_video;
97 // Map known video MIME types to the BITMAPINFOHEADER parameters
98 // that this program uses. (Note that not all types need all
99 // of the parameters to be set.)
100 if (strcmp(subsession->codecName(), "MPV") == 0) {
101 flags |= RTPSTATE_IS_MPEG12_VIDEO;
102 } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
103 strcmp(subsession->codecName(), "MP2T") == 0) {
104 flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
105 } else if (strcmp(subsession->codecName(), "H263") == 0 ||
106 strcmp(subsession->codecName(), "H263-2000") == 0 ||
107 strcmp(subsession->codecName(), "H263-1998") == 0) {
108 bih->biCompression = sh_video->format
109 = mmioFOURCC('H','2','6','3');
110 needVideoFrameRate(demuxer, subsession);
111 } else if (strcmp(subsession->codecName(), "H264") == 0) {
112 bih->biCompression = sh_video->format
113 = mmioFOURCC('H','2','6','4');
114 unsigned int configLen = 0;
115 unsigned char* configData
116 = parseH264ConfigStr(subsession->fmtp_spropparametersets(), configLen);
117 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
118 delete[] configData;
119 #ifdef CONFIG_LIBAVCODEC
120 avcodec_register_all();
121 h264parserctx = av_parser_init(CODEC_ID_H264);
122 #endif
123 needVideoFrameRate(demuxer, subsession);
124 } else if (strcmp(subsession->codecName(), "H261") == 0) {
125 bih->biCompression = sh_video->format
126 = mmioFOURCC('H','2','6','1');
127 needVideoFrameRate(demuxer, subsession);
128 } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
129 bih->biCompression = sh_video->format
130 = mmioFOURCC('M','J','P','G');
131 needVideoFrameRate(demuxer, subsession);
132 } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
133 bih->biCompression = sh_video->format
134 = mmioFOURCC('m','p','4','v');
135 // For the codec to work correctly, it may need a 'VOL Header' to be
136 // inserted at the front of the data stream. Construct this from the
137 // "config" MIME parameter, which was present (hopefully) in the
138 // session's SDP description:
139 unsigned configLen;
140 unsigned char* configData
141 = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
142 sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
143 needVideoFrameRate(demuxer, subsession);
144 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
145 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
146 // QuickTime generic RTP format, as described in
147 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
149 // We can't initialize this stream until we've received the first packet
150 // that has QuickTime "sdAtom" information in the header. So, keep
151 // reading packets until we get one:
152 unsigned char* packetData; unsigned packetDataLen; float pts;
153 QuickTimeGenericRTPSource* qtRTPSource
154 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
155 unsigned fourcc;
156 do {
157 if (!awaitRTPPacket(demuxer, demuxer->video,
158 packetData, packetDataLen, pts)) {
159 return;
161 } while (!parseQTState_video(qtRTPSource->qtState, fourcc));
163 bih->biCompression = sh_video->format = fourcc;
164 bih->biWidth = qtRTPSource->qtState.width;
165 bih->biHeight = qtRTPSource->qtState.height;
166 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 86;
167 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
168 + qtRTPSource->qtState.sdAtomSize;
169 while (pos+8 < endpos) {
170 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
171 if (atomLength == 0 || atomLength > endpos-pos) break;
172 if ((!memcmp(pos+4, "avcC", 4) && fourcc==mmioFOURCC('a','v','c','1') ||
173 !memcmp(pos+4, "esds", 4) ||
174 !memcmp(pos+4, "SMI ", 4) && fourcc==mmioFOURCC('S','V','Q','3')) &&
175 atomLength > 8) {
176 sh_video->bih = bih =
177 insertVideoExtradata(bih, pos+8, atomLength-8);
178 break;
180 pos += atomLength;
182 needVideoFrameRate(demuxer, subsession);
183 } else {
184 fprintf(stderr,
185 "Unknown MPlayer format code for MIME type \"video/%s\"\n",
186 subsession->codecName());
190 void rtpCodecInitialize_audio(demuxer_t* demuxer,
191 MediaSubsession* subsession,
192 unsigned& flags) {
193 flags = 0;
194 // Create a dummy audio stream header
195 // to make the main MPlayer code happy:
196 sh_audio_t* sh_audio = new_sh_audio(demuxer,0);
197 WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
198 sh_audio->wf = wf;
199 demux_stream_t* d_audio = demuxer->audio;
200 d_audio->sh = sh_audio; sh_audio->ds = d_audio;
201 d_audio->id = sh_audio->aid;
203 wf->nChannels = subsession->numChannels();
205 // Map known audio MIME types to the WAVEFORMATEX parameters
206 // that this program uses. (Note that not all types need all
207 // of the parameters to be set.)
208 wf->nSamplesPerSec
209 = subsession->rtpSource()->timestampFrequency(); // by default
210 if (strcmp(subsession->codecName(), "MPA") == 0 ||
211 strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
212 strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
213 wf->wFormatTag = sh_audio->format = 0x55;
214 // Note: 0x55 is for layer III, but should work for I,II also
215 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
216 } else if (strcmp(subsession->codecName(), "AC3") == 0) {
217 wf->wFormatTag = sh_audio->format = 0x2000;
218 wf->nSamplesPerSec = 0; // sample rate is deduced from the data
219 } else if (strcmp(subsession->codecName(), "L16") == 0) {
220 wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
221 wf->nBlockAlign = 1;
222 wf->wBitsPerSample = 16;
223 wf->cbSize = 0;
224 } else if (strcmp(subsession->codecName(), "L8") == 0) {
225 wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
226 wf->nBlockAlign = 1;
227 wf->wBitsPerSample = 8;
228 wf->cbSize = 0;
229 } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
230 wf->wFormatTag = sh_audio->format = 0x7;
231 wf->nAvgBytesPerSec = 8000;
232 wf->nBlockAlign = 1;
233 wf->wBitsPerSample = 8;
234 wf->cbSize = 0;
235 } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
236 wf->wFormatTag = sh_audio->format = 0x6;
237 wf->nAvgBytesPerSec = 8000;
238 wf->nBlockAlign = 1;
239 wf->wBitsPerSample = 8;
240 wf->cbSize = 0;
241 } else if (strcmp(subsession->codecName(), "AMR") == 0) {
242 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','m','r');
243 } else if (strcmp(subsession->codecName(), "AMR-WB") == 0) {
244 wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','w','b');
245 } else if (strcmp(subsession->codecName(), "GSM") == 0) {
246 wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
247 wf->nAvgBytesPerSec = 1650;
248 wf->nBlockAlign = 33;
249 wf->wBitsPerSample = 16;
250 wf->cbSize = 0;
251 } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
252 wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
253 wf->nAvgBytesPerSec = 1750;
254 wf->nBlockAlign = 35;
255 wf->wBitsPerSample = 16;
256 wf->cbSize = 0;
257 } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
258 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
259 // For the codec to work correctly, it needs "AudioSpecificConfig"
260 // data, which is parsed from the "StreamMuxConfig" string that
261 // was present (hopefully) in the SDP description:
262 unsigned codecdata_len;
263 sh_audio->codecdata
264 = parseStreamMuxConfigStr(subsession->fmtp_config(),
265 codecdata_len);
266 sh_audio->codecdata_len = codecdata_len;
267 //faad doesn't understand LATM's data length field, so omit it
268 ((MPEG4LATMAudioRTPSource*)subsession->rtpSource())->omitLATMDataLengthField();
269 } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
270 wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
271 // For the codec to work correctly, it needs "AudioSpecificConfig"
272 // data, which was present (hopefully) in the SDP description:
273 unsigned codecdata_len;
274 sh_audio->codecdata
275 = parseGeneralConfigStr(subsession->fmtp_config(),
276 codecdata_len);
277 sh_audio->codecdata_len = codecdata_len;
278 } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
279 strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
280 // QuickTime generic RTP format, as described in
281 // http://developer.apple.com/quicktime/icefloe/dispatch026.html
283 // We can't initialize this stream until we've received the first packet
284 // that has QuickTime "sdAtom" information in the header. So, keep
285 // reading packets until we get one:
286 unsigned char* packetData; unsigned packetDataLen; float pts;
287 QuickTimeGenericRTPSource* qtRTPSource
288 = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
289 unsigned fourcc, numChannels;
290 do {
291 if (!awaitRTPPacket(demuxer, demuxer->audio,
292 packetData, packetDataLen, pts)) {
293 return;
295 } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));
297 wf->wFormatTag = sh_audio->format = fourcc;
298 wf->nChannels = numChannels;
300 uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 52;
301 uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
302 + qtRTPSource->qtState.sdAtomSize;
303 while (pos+8 < endpos) {
304 unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
305 if (atomLength == 0 || atomLength > endpos-pos) break;
306 if (!memcmp(pos+4, "wave", 4) && fourcc==mmioFOURCC('Q','D','M','2') &&
307 atomLength > 8 &&
308 atomLength <= INT_MAX) {
309 sh_audio->codecdata = (unsigned char*) malloc(atomLength-8);
310 if (sh_audio->codecdata) {
311 memcpy(sh_audio->codecdata, pos+8, atomLength-8);
312 sh_audio->codecdata_len = atomLength-8;
314 break;
316 pos += atomLength;
318 } else {
319 fprintf(stderr,
320 "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
321 subsession->codecName());
325 static void needVideoFrameRate(demuxer_t* demuxer,
326 MediaSubsession* subsession) {
327 // For some codecs, MPlayer's decoding software can't (or refuses to :-)
328 // figure out the frame rate by itself, so (unless the user specifies
329 // it manually, using "-fps") we figure it out ourselves here, using the
330 // presentation timestamps in successive packets,
331 extern double force_fps; if (force_fps != 0.0) return; // user used "-fps"
333 demux_stream_t* d_video = demuxer->video;
334 sh_video_t* sh_video = (sh_video_t*)(d_video->sh);
336 // If we already know the subsession's video frame rate, use it:
337 int fps = (int)(subsession->videoFPS());
338 if (fps != 0) {
339 sh_video->fps = fps;
340 sh_video->frametime = 1.0f/fps;
341 return;
344 // Keep looking at incoming frames until we see two with different,
345 // non-zero "pts" timestamps:
346 unsigned char* packetData; unsigned packetDataLen;
347 float lastPTS = 0.0, curPTS;
348 unsigned const maxNumFramesToWaitFor = 300;
349 int lastfps = 0;
350 for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
351 if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
352 break;
355 if (curPTS != lastPTS && lastPTS != 0.0) {
356 // Use the difference between these two "pts"s to guess the frame rate.
357 // (should really check that there were no missing frames inbetween)#####
358 // Guess the frame rate as an integer. If it's not, use "-fps" instead.
359 fps = (int)(1/fabs(curPTS-lastPTS) + 0.5); // rounding
360 if (fps == lastfps) {
361 fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
362 sh_video->fps = fps;
363 sh_video->frametime=1.0f/fps;
364 return;
366 if (fps>lastfps) lastfps = fps;
368 lastPTS = curPTS;
370 fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
373 static Boolean
374 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
375 unsigned& fourcc) {
376 // qtState's "sdAtom" field is supposed to contain a QuickTime video
377 // 'sample description' atom. This atom's name is the 'fourcc' that we want:
378 char const* sdAtom = qtState.sdAtom;
379 if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;
381 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
382 return True;
385 static Boolean
386 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
387 unsigned& fourcc, unsigned& numChannels) {
388 // qtState's "sdAtom" field is supposed to contain a QuickTime audio
389 // 'sample description' atom. This atom's name is the 'fourcc' that we want.
390 // Also, the top half of the 5th word following the atom name should
391 // contain the number of channels ("numChannels") that we want:
392 char const* sdAtom = qtState.sdAtom;
393 if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;
395 fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
397 char const* word7Ptr = &sdAtom[6*4];
398 numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
399 return True;