libmpdemux/demux_rtp_codec.cpp

   1 ////////// Codec-specific routines used to interface between "MPlayer"
   2 ////////// and the "LIVE555 Streaming Media" libraries:
   3
   4 #include "demux_rtp_internal.h"
   5 extern "C" {
   6 #include <limits.h>
   7 #include <math.h>
   8 #include "stheader.h"
   9 #include "libavutil/base64.h"
  10 }
  11
  12 #ifdef CONFIG_LIBAVCODEC
  13 AVCodecParserContext * h264parserctx;
  14 #endif
  15
  16 // Copied from vlc
  17 static unsigned char* parseH264ConfigStr( char const* configStr,
  18                                           unsigned int& configSize )
  19 {
  20
  21     char *dup, *psz;
  22     int i, i_records = 1;
  23
  24     if( configSize )
  25     configSize = 0;
  26     if( configStr == NULL || *configStr == '\0' )
  27         return NULL;
  28     psz = dup = strdup( configStr );
  29
  30  /* Count the number of comma's */
  31     for( psz = dup; *psz != '\0'; ++psz )
  32     {
  33         if( *psz == ',')
  34         {
  35             ++i_records;
  36             *psz = '\0';
  37         }
  38     }
  39
  40     unsigned char *cfg = new unsigned char[5 * strlen(dup)];
  41     psz = dup;
  42     for( i = 0; i < i_records; i++ )
  43     {
  44
  45         cfg[configSize++] = 0x00;
  46         cfg[configSize++] = 0x00;
  47         cfg[configSize++] = 0x01;
  48         configSize += av_base64_decode( (uint8_t*)&cfg[configSize],
  49                                         psz,
  50                                         5 * strlen(dup) - 3 );
  51
  52     psz += strlen(psz)+1;
  53     }
  54     if( dup ) free( dup );
  55
  56     return cfg;
  57 }
  58
  59 static void
  60 needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
  61 static Boolean
  62 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
  63                    unsigned& fourcc); // forward
  64 static Boolean
  65 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
  66                    unsigned& fourcc, unsigned& numChannels); // forward
  67
  68 static BITMAPINFOHEADER * insertVideoExtradata(BITMAPINFOHEADER *bih,
  69                                                unsigned char * extraData,
  70                                                unsigned size)
  71 {
  72     BITMAPINFOHEADER * original = bih;
  73     if (!size || size > INT_MAX - sizeof(BITMAPINFOHEADER))
  74         return bih;
  75     bih = (BITMAPINFOHEADER*)realloc(bih, sizeof(BITMAPINFOHEADER) + size);
  76     if (!bih)
  77         return original;
  78     bih->biSize = sizeof(BITMAPINFOHEADER) + size;
  79     memcpy(bih+1, extraData, size);
  80     return bih;
  81 }
  82
  83 void rtpCodecInitialize_video(demuxer_t* demuxer,
  84                               MediaSubsession* subsession,
  85                               unsigned& flags) {
  86   flags = 0;
  87   // Create a dummy video stream header
  88   // to make the main MPlayer code happy:
  89   sh_video_t* sh_video = new_sh_video(demuxer,0);
  90   BITMAPINFOHEADER* bih
  91     = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
  92   bih->biSize = sizeof(BITMAPINFOHEADER);
  93   sh_video->bih = bih;
  94   demux_stream_t* d_video = demuxer->video;
  95   d_video->sh = sh_video; sh_video->ds = d_video;
  96
  97   // Map known video MIME types to the BITMAPINFOHEADER parameters
  98   // that this program uses.  (Note that not all types need all
  99   // of the parameters to be set.)
 100   if (strcmp(subsession->codecName(), "MPV") == 0) {
 101     flags |= RTPSTATE_IS_MPEG12_VIDEO;
 102   } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
 103              strcmp(subsession->codecName(), "MP2T") == 0) {
 104     flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
 105   } else if (strcmp(subsession->codecName(), "H263") == 0 ||
 106              strcmp(subsession->codecName(), "H263-2000") == 0 ||
 107              strcmp(subsession->codecName(), "H263-1998") == 0) {
 108     bih->biCompression = sh_video->format
 109       = mmioFOURCC('H','2','6','3');
 110     needVideoFrameRate(demuxer, subsession);
 111   } else if (strcmp(subsession->codecName(), "H264") == 0) {
 112     bih->biCompression = sh_video->format
 113       = mmioFOURCC('H','2','6','4');
 114     unsigned int configLen = 0;
 115     unsigned char* configData
 116       = parseH264ConfigStr(subsession->fmtp_spropparametersets(), configLen);
 117     sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
 118     delete[] configData;
 119 #ifdef CONFIG_LIBAVCODEC
 120     avcodec_register_all();
 121     h264parserctx = av_parser_init(CODEC_ID_H264);
 122 #endif
 123     needVideoFrameRate(demuxer, subsession);
 124   } else if (strcmp(subsession->codecName(), "H261") == 0) {
 125     bih->biCompression = sh_video->format
 126       = mmioFOURCC('H','2','6','1');
 127     needVideoFrameRate(demuxer, subsession);
 128   } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
 129     bih->biCompression = sh_video->format
 130       = mmioFOURCC('M','J','P','G');
 131     needVideoFrameRate(demuxer, subsession);
 132   } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
 133     bih->biCompression = sh_video->format
 134       = mmioFOURCC('m','p','4','v');
 135     // For the codec to work correctly, it may need a 'VOL Header' to be
 136     // inserted at the front of the data stream.  Construct this from the
 137     // "config" MIME parameter, which was present (hopefully) in the
 138     // session's SDP description:
 139     unsigned configLen;
 140     unsigned char* configData
 141       = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
 142     sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
 143     needVideoFrameRate(demuxer, subsession);
 144   } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
 145              strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
 146     // QuickTime generic RTP format, as described in
 147     // http://developer.apple.com/quicktime/icefloe/dispatch026.html
 148
 149     // We can't initialize this stream until we've received the first packet
 150     // that has QuickTime "sdAtom" information in the header.  So, keep
 151     // reading packets until we get one:
 152     unsigned char* packetData; unsigned packetDataLen; float pts;
 153     QuickTimeGenericRTPSource* qtRTPSource
 154       = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
 155     unsigned fourcc;
 156     do {
 157       if (!awaitRTPPacket(demuxer, demuxer->video,
 158                           packetData, packetDataLen, pts)) {
 159         return;
 160       }
 161     } while (!parseQTState_video(qtRTPSource->qtState, fourcc));
 162
 163     bih->biCompression = sh_video->format = fourcc;
 164     bih->biWidth = qtRTPSource->qtState.width;
 165     bih->biHeight = qtRTPSource->qtState.height;
 166       uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 86;
 167       uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
 168                         + qtRTPSource->qtState.sdAtomSize;
 169       while (pos+8 < endpos) {
 170         unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
 171         if (atomLength == 0 || atomLength > endpos-pos) break;
 172         if ((!memcmp(pos+4, "avcC", 4) && fourcc==mmioFOURCC('a','v','c','1') ||
 173              !memcmp(pos+4, "esds", 4) ||
 174              !memcmp(pos+4, "SMI ", 4) && fourcc==mmioFOURCC('S','V','Q','3')) &&
 175             atomLength > 8) {
 176           sh_video->bih = bih =
 177               insertVideoExtradata(bih, pos+8, atomLength-8);
 178           break;
 179         }
 180         pos += atomLength;
 181       }
 182     needVideoFrameRate(demuxer, subsession);
 183   } else {
 184     fprintf(stderr,
 185             "Unknown MPlayer format code for MIME type \"video/%s\"\n",
 186             subsession->codecName());
 187   }
 188 }
 189
 190 void rtpCodecInitialize_audio(demuxer_t* demuxer,
 191                               MediaSubsession* subsession,
 192                               unsigned& flags) {
 193   flags = 0;
 194   // Create a dummy audio stream header
 195   // to make the main MPlayer code happy:
 196   sh_audio_t* sh_audio = new_sh_audio(demuxer,0);
 197   WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
 198   sh_audio->wf = wf;
 199   demux_stream_t* d_audio = demuxer->audio;
 200   d_audio->sh = sh_audio; sh_audio->ds = d_audio;
 201   d_audio->id = sh_audio->aid;
 202
 203   wf->nChannels = subsession->numChannels();
 204
 205   // Map known audio MIME types to the WAVEFORMATEX parameters
 206   // that this program uses.  (Note that not all types need all
 207   // of the parameters to be set.)
 208   wf->nSamplesPerSec
 209     = subsession->rtpSource()->timestampFrequency(); // by default
 210   if (strcmp(subsession->codecName(), "MPA") == 0 ||
 211       strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
 212       strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
 213     wf->wFormatTag = sh_audio->format = 0x55;
 214     // Note: 0x55 is for layer III, but should work for I,II also
 215     wf->nSamplesPerSec = 0; // sample rate is deduced from the data
 216   } else if (strcmp(subsession->codecName(), "AC3") == 0) {
 217     wf->wFormatTag = sh_audio->format = 0x2000;
 218     wf->nSamplesPerSec = 0; // sample rate is deduced from the data
 219   } else if (strcmp(subsession->codecName(), "L16") == 0) {
 220     wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
 221     wf->nBlockAlign = 1;
 222     wf->wBitsPerSample = 16;
 223     wf->cbSize = 0;
 224   } else if (strcmp(subsession->codecName(), "L8") == 0) {
 225     wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
 226     wf->nBlockAlign = 1;
 227     wf->wBitsPerSample = 8;
 228     wf->cbSize = 0;
 229   } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
 230     wf->wFormatTag = sh_audio->format = 0x7;
 231     wf->nAvgBytesPerSec = 8000;
 232     wf->nBlockAlign = 1;
 233     wf->wBitsPerSample = 8;
 234     wf->cbSize = 0;
 235   } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
 236     wf->wFormatTag = sh_audio->format = 0x6;
 237     wf->nAvgBytesPerSec = 8000;
 238     wf->nBlockAlign = 1;
 239     wf->wBitsPerSample = 8;
 240     wf->cbSize = 0;
 241   } else if (strcmp(subsession->codecName(), "AMR") == 0) {
 242     wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','m','r');
 243   } else if (strcmp(subsession->codecName(), "AMR-WB") == 0) {
 244     wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','w','b');
 245   } else if (strcmp(subsession->codecName(), "GSM") == 0) {
 246     wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
 247     wf->nAvgBytesPerSec = 1650;
 248     wf->nBlockAlign = 33;
 249     wf->wBitsPerSample = 16;
 250     wf->cbSize = 0;
 251   } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
 252     wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
 253     wf->nAvgBytesPerSec = 1750;
 254     wf->nBlockAlign = 35;
 255     wf->wBitsPerSample = 16;
 256     wf->cbSize = 0;
 257   } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
 258     wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
 259     // For the codec to work correctly, it needs "AudioSpecificConfig"
 260     // data, which is parsed from the "StreamMuxConfig" string that
 261     // was present (hopefully) in the SDP description:
 262     unsigned codecdata_len;
 263     sh_audio->codecdata
 264       = parseStreamMuxConfigStr(subsession->fmtp_config(),
 265                                 codecdata_len);
 266     sh_audio->codecdata_len = codecdata_len;
 267     //faad doesn't understand LATM's data length field, so omit it
 268     ((MPEG4LATMAudioRTPSource*)subsession->rtpSource())->omitLATMDataLengthField();
 269   } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
 270     wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
 271     // For the codec to work correctly, it needs "AudioSpecificConfig"
 272     // data, which was present (hopefully) in the SDP description:
 273     unsigned codecdata_len;
 274     sh_audio->codecdata
 275       = parseGeneralConfigStr(subsession->fmtp_config(),
 276                               codecdata_len);
 277     sh_audio->codecdata_len = codecdata_len;
 278   } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
 279              strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
 280     // QuickTime generic RTP format, as described in
 281     // http://developer.apple.com/quicktime/icefloe/dispatch026.html
 282
 283     // We can't initialize this stream until we've received the first packet
 284     // that has QuickTime "sdAtom" information in the header.  So, keep
 285     // reading packets until we get one:
 286     unsigned char* packetData; unsigned packetDataLen; float pts;
 287     QuickTimeGenericRTPSource* qtRTPSource
 288       = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
 289     unsigned fourcc, numChannels;
 290     do {
 291       if (!awaitRTPPacket(demuxer, demuxer->audio,
 292                           packetData, packetDataLen, pts)) {
 293         return;
 294       }
 295     } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));
 296
 297     wf->wFormatTag = sh_audio->format = fourcc;
 298     wf->nChannels = numChannels;
 299
 300     uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 52;
 301     uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
 302                       + qtRTPSource->qtState.sdAtomSize;
 303     while (pos+8 < endpos) {
 304       unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
 305       if (atomLength == 0 || atomLength > endpos-pos) break;
 306       if (!memcmp(pos+4, "wave", 4) && fourcc==mmioFOURCC('Q','D','M','2') &&
 307           atomLength > 8 &&
 308           atomLength <= INT_MAX) {
 309         sh_audio->codecdata = (unsigned char*) malloc(atomLength-8);
 310         if (sh_audio->codecdata) {
 311           memcpy(sh_audio->codecdata, pos+8, atomLength-8);
 312           sh_audio->codecdata_len = atomLength-8;
 313         }
 314         break;
 315       }
 316       pos += atomLength;
 317     }
 318   } else {
 319     fprintf(stderr,
 320             "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
 321             subsession->codecName());
 322   }
 323 }
 324
 325 static void needVideoFrameRate(demuxer_t* demuxer,
 326                                MediaSubsession* subsession) {
 327   // For some codecs, MPlayer's decoding software can't (or refuses to :-)
 328   // figure out the frame rate by itself, so (unless the user specifies
 329   // it manually, using "-fps") we figure it out ourselves here, using the
 330   // presentation timestamps in successive packets,
 331   extern double force_fps; if (force_fps != 0.0) return; // user used "-fps"
 332
 333   demux_stream_t* d_video = demuxer->video;
 334   sh_video_t* sh_video = (sh_video_t*)(d_video->sh);
 335
 336   // If we already know the subsession's video frame rate, use it:
 337   int fps = (int)(subsession->videoFPS());
 338   if (fps != 0) {
 339     sh_video->fps = fps;
 340     sh_video->frametime = 1.0f/fps;
 341     return;
 342   }
 343
 344   // Keep looking at incoming frames until we see two with different,
 345   // non-zero "pts" timestamps:
 346   unsigned char* packetData; unsigned packetDataLen;
 347   float lastPTS = 0.0, curPTS;
 348   unsigned const maxNumFramesToWaitFor = 300;
 349   int lastfps = 0;
 350   for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
 351     if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
 352       break;
 353     }
 354
 355     if (curPTS != lastPTS && lastPTS != 0.0) {
 356       // Use the difference between these two "pts"s to guess the frame rate.
 357       // (should really check that there were no missing frames inbetween)#####
 358       // Guess the frame rate as an integer.  If it's not, use "-fps" instead.
 359       fps = (int)(1/fabs(curPTS-lastPTS) + 0.5); // rounding
 360         if (fps == lastfps) {
 361       fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
 362       sh_video->fps = fps;
 363       sh_video->frametime=1.0f/fps;
 364       return;
 365         }
 366       if (fps>lastfps) lastfps = fps;
 367     }
 368     lastPTS = curPTS;
 369   }
 370   fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
 371 }
 372
 373 static Boolean
 374 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
 375                    unsigned& fourcc) {
 376   // qtState's "sdAtom" field is supposed to contain a QuickTime video
 377   // 'sample description' atom.  This atom's name is the 'fourcc' that we want:
 378   char const* sdAtom = qtState.sdAtom;
 379   if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;
 380
 381   fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
 382   return True;
 383 }
 384
 385 static Boolean
 386 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
 387                    unsigned& fourcc, unsigned& numChannels) {
 388   // qtState's "sdAtom" field is supposed to contain a QuickTime audio
 389   // 'sample description' atom.  This atom's name is the 'fourcc' that we want.
 390   // Also, the top half of the 5th word following the atom name should
 391   // contain the number of channels ("numChannels") that we want:
 392   char const* sdAtom = qtState.sdAtom;
 393   if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;
 394
 395   fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
 396
 397   char const* word7Ptr = &sdAtom[6*4];
 398   numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
 399   return True;
 400 }