libmpdemux/demux_rtp_codec.cpp

   1 ////////// Codec-specific routines used to interface between "MPlayer"
   2 ////////// and the "LIVE555 Streaming Media" libraries:
   3
   4 #include "demux_rtp_internal.h"
   5 extern "C" {
   6 #include "stheader.h"
   7 }
   8
   9 static void
  10 needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
  11 static Boolean
  12 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
  13                    unsigned& fourcc); // forward
  14 static Boolean
  15 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
  16                    unsigned& fourcc, unsigned& numChannels); // forward
  17
  18 void rtpCodecInitialize_video(demuxer_t* demuxer,
  19                               MediaSubsession* subsession,
  20                               unsigned& flags) {
  21   flags = 0;
  22   // Create a dummy video stream header
  23   // to make the main MPlayer code happy:
  24   sh_video_t* sh_video = new_sh_video(demuxer,0);
  25   BITMAPINFOHEADER* bih
  26     = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
  27   bih->biSize = sizeof(BITMAPINFOHEADER);
  28   sh_video->bih = bih;
  29   demux_stream_t* d_video = demuxer->video;
  30   d_video->sh = sh_video; sh_video->ds = d_video;
  31
  32   // Map known video MIME types to the BITMAPINFOHEADER parameters
  33   // that this program uses.  (Note that not all types need all
  34   // of the parameters to be set.)
  35   if (strcmp(subsession->codecName(), "MPV") == 0) {
  36     flags |= RTPSTATE_IS_MPEG12_VIDEO;
  37   } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
  38              strcmp(subsession->codecName(), "MP2T") == 0) {
  39     flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
  40   } else if (strcmp(subsession->codecName(), "H263") == 0 ||
  41              strcmp(subsession->codecName(), "H263-1998") == 0) {
  42     bih->biCompression = sh_video->format
  43       = mmioFOURCC('H','2','6','3');
  44     needVideoFrameRate(demuxer, subsession);
  45   } else if (strcmp(subsession->codecName(), "H264") == 0) {
  46     bih->biCompression = sh_video->format
  47       = mmioFOURCC('H','2','6','4');
  48     needVideoFrameRate(demuxer, subsession);
  49   } else if (strcmp(subsession->codecName(), "H261") == 0) {
  50     bih->biCompression = sh_video->format
  51       = mmioFOURCC('H','2','6','1');
  52     needVideoFrameRate(demuxer, subsession);
  53   } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
  54     bih->biCompression = sh_video->format
  55       = mmioFOURCC('M','J','P','G');
  56     needVideoFrameRate(demuxer, subsession);
  57   } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
  58     bih->biCompression = sh_video->format
  59       = mmioFOURCC('m','p','4','v');
  60     // For the codec to work correctly, it may need a 'VOL Header' to be
  61     // inserted at the front of the data stream.  Construct this from the
  62     // "config" MIME parameter, which was present (hopefully) in the
  63     // session's SDP description:
  64     unsigned configLen;
  65     unsigned char* configData
  66       = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
  67     insertRTPData(demuxer, demuxer->video, configData, configLen);
  68     needVideoFrameRate(demuxer, subsession);
  69   } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
  70              strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
  71     // QuickTime generic RTP format, as described in
  72     // http://developer.apple.com/quicktime/icefloe/dispatch026.html
  73
  74     // We can't initialize this stream until we've received the first packet
  75     // that has QuickTime "sdAtom" information in the header.  So, keep
  76     // reading packets until we get one:
  77     unsigned char* packetData; unsigned packetDataLen; float pts;
  78     QuickTimeGenericRTPSource* qtRTPSource
  79       = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
  80     unsigned fourcc;
  81     do {
  82       if (!awaitRTPPacket(demuxer, demuxer->video,
  83                           packetData, packetDataLen, pts)) {
  84         return;
  85       }
  86     } while (!parseQTState_video(qtRTPSource->qtState, fourcc));
  87
  88     bih->biCompression = sh_video->format = fourcc;
  89   } else {
  90     fprintf(stderr,
  91             "Unknown MPlayer format code for MIME type \"video/%s\"\n",
  92             subsession->codecName());
  93   }
  94 }
  95
  96 void rtpCodecInitialize_audio(demuxer_t* demuxer,
  97                               MediaSubsession* subsession,
  98                               unsigned& flags) {
  99   flags = 0;
 100   // Create a dummy audio stream header
 101   // to make the main MPlayer code happy:
 102   sh_audio_t* sh_audio = new_sh_audio(demuxer,0);
 103   WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
 104   sh_audio->wf = wf;
 105   demux_stream_t* d_audio = demuxer->audio;
 106   d_audio->sh = sh_audio; sh_audio->ds = d_audio;
 107
 108   wf->nChannels = subsession->numChannels();
 109
 110   // Map known audio MIME types to the WAVEFORMATEX parameters
 111   // that this program uses.  (Note that not all types need all
 112   // of the parameters to be set.)
 113   wf->nSamplesPerSec
 114     = subsession->rtpSource()->timestampFrequency(); // by default
 115   if (strcmp(subsession->codecName(), "MPA") == 0 ||
 116       strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
 117       strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
 118     wf->wFormatTag = sh_audio->format = 0x55;
 119     // Note: 0x55 is for layer III, but should work for I,II also
 120     wf->nSamplesPerSec = 0; // sample rate is deduced from the data
 121   } else if (strcmp(subsession->codecName(), "AC3") == 0) {
 122     wf->wFormatTag = sh_audio->format = 0x2000;
 123     wf->nSamplesPerSec = 0; // sample rate is deduced from the data
 124   } else if (strcmp(subsession->codecName(), "L16") == 0) {
 125     wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
 126     wf->nBlockAlign = 1;
 127     wf->wBitsPerSample = 16;
 128     wf->cbSize = 0;
 129   } else if (strcmp(subsession->codecName(), "L8") == 0) {
 130     wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
 131     wf->nBlockAlign = 1;
 132     wf->wBitsPerSample = 8;
 133     wf->cbSize = 0;
 134   } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
 135     wf->wFormatTag = sh_audio->format = 0x7;
 136     wf->nAvgBytesPerSec = 8000;
 137     wf->nBlockAlign = 1;
 138     wf->wBitsPerSample = 8;
 139     wf->cbSize = 0;
 140   } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
 141     wf->wFormatTag = sh_audio->format = 0x6;
 142     wf->nAvgBytesPerSec = 8000;
 143     wf->nBlockAlign = 1;
 144     wf->wBitsPerSample = 8;
 145     wf->cbSize = 0;
 146   } else if (strcmp(subsession->codecName(), "GSM") == 0) {
 147     wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
 148     wf->nAvgBytesPerSec = 1650;
 149     wf->nBlockAlign = 33;
 150     wf->wBitsPerSample = 16;
 151     wf->cbSize = 0;
 152   } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
 153     wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
 154     wf->nAvgBytesPerSec = 1750;
 155     wf->nBlockAlign = 35;
 156     wf->wBitsPerSample = 16;
 157     wf->cbSize = 0;
 158   } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
 159     wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
 160     // For the codec to work correctly, it needs "AudioSpecificConfig"
 161     // data, which is parsed from the "StreamMuxConfig" string that
 162     // was present (hopefully) in the SDP description:
 163     unsigned codecdata_len;
 164     sh_audio->codecdata
 165       = parseStreamMuxConfigStr(subsession->fmtp_config(),
 166                                 codecdata_len);
 167     sh_audio->codecdata_len = codecdata_len;
 168   } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
 169     wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
 170     // For the codec to work correctly, it needs "AudioSpecificConfig"
 171     // data, which was present (hopefully) in the SDP description:
 172     unsigned codecdata_len;
 173     sh_audio->codecdata
 174       = parseGeneralConfigStr(subsession->fmtp_config(),
 175                               codecdata_len);
 176     sh_audio->codecdata_len = codecdata_len;
 177   } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
 178              strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
 179     // QuickTime generic RTP format, as described in
 180     // http://developer.apple.com/quicktime/icefloe/dispatch026.html
 181
 182     // We can't initialize this stream until we've received the first packet
 183     // that has QuickTime "sdAtom" information in the header.  So, keep
 184     // reading packets until we get one:
 185     unsigned char* packetData; unsigned packetDataLen; float pts;
 186     QuickTimeGenericRTPSource* qtRTPSource
 187       = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
 188     unsigned fourcc, numChannels;
 189     do {
 190       if (!awaitRTPPacket(demuxer, demuxer->audio,
 191                           packetData, packetDataLen, pts)) {
 192         return;
 193       }
 194     } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));
 195
 196     wf->wFormatTag = sh_audio->format = fourcc;
 197     wf->nChannels = numChannels;
 198   } else {
 199     fprintf(stderr,
 200             "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
 201             subsession->codecName());
 202   }
 203 }
 204
 205 static void needVideoFrameRate(demuxer_t* demuxer,
 206                                MediaSubsession* subsession) {
 207   // For some codecs, MPlayer's decoding software can't (or refuses to :-)
 208   // figure out the frame rate by itself, so (unless the user specifies
 209   // it manually, using "-fps") we figure it out ourselves here, using the
 210   // presentation timestamps in successive packets,
 211   extern float force_fps; if (force_fps != 0.0) return; // user used "-fps"
 212
 213   demux_stream_t* d_video = demuxer->video;
 214   sh_video_t* sh_video = (sh_video_t*)(d_video->sh);
 215
 216   // If we already know the subsession's video frame rate, use it:
 217   int fps = (int)(subsession->videoFPS());
 218   if (fps != 0) {
 219     sh_video->fps = fps;
 220     return;
 221   }
 222
 223   // Keep looking at incoming frames until we see two with different,
 224   // non-zero "pts" timestamps:
 225   unsigned char* packetData; unsigned packetDataLen;
 226   float lastPTS = 0.0, curPTS;
 227   unsigned const maxNumFramesToWaitFor = 300;
 228   for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
 229     if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
 230       break;
 231     }
 232
 233     if (curPTS > lastPTS && lastPTS != 0.0) {
 234       // Use the difference between these two "pts"s to guess the frame rate.
 235       // (should really check that there were no missing frames inbetween)#####
 236       // Guess the frame rate as an integer.  If it's not, use "-fps" instead.
 237       fps = (int)(1/(curPTS-lastPTS) + 0.5); // rounding
 238       fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
 239       sh_video->fps = fps;
 240       return;
 241     }
 242     lastPTS = curPTS;
 243   }
 244   fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
 245 }
 246
 247 static Boolean
 248 parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
 249                    unsigned& fourcc) {
 250   // qtState's "sdAtom" field is supposed to contain a QuickTime video
 251   // 'sample description' atom.  This atom's name is the 'fourcc' that we want:
 252   char const* sdAtom = qtState.sdAtom;
 253   if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;
 254
 255   fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
 256   return True;
 257 }
 258
 259 static Boolean
 260 parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
 261                    unsigned& fourcc, unsigned& numChannels) {
 262   // qtState's "sdAtom" field is supposed to contain a QuickTime audio
 263   // 'sample description' atom.  This atom's name is the 'fourcc' that we want.
 264   // Also, the top half of the 5th word following the atom name should
 265   // contain the number of channels ("numChannels") that we want:
 266   char const* sdAtom = qtState.sdAtom;
 267   if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;
 268
 269   fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
 270
 271   char const* word7Ptr = &sdAtom[6*4];
 272   numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
 273   return True;
 274 }