theora/examples/encoder_example.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
   5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
   6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
   7  *                                                                  *
   8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
   9  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  10  *                                                                  *
  11  ********************************************************************
  12
  13   function: example encoder application; makes an Ogg Theora/Vorbis
  14             file from YUV4MPEG2 and WAV input
  15   last mod: $Id$
  16
  17  ********************************************************************/
  18
  19 #if !defined(_GNU_SOURCE)
  20 #define _GNU_SOURCE
  21 #endif
  22 #if !defined(_LARGEFILE_SOURCE)
  23 #define _LARGEFILE_SOURCE
  24 #endif
  25 #if !defined(_LARGEFILE64_SOURCE)
  26 #define _LARGEFILE64_SOURCE
  27 #endif
  28 #if !defined(_FILE_OFFSET_BITS)
  29 #define _FILE_OFFSET_BITS 64
  30 #endif
  31
  32 /* Define to give performance data win32 only*/
  33 //#define THEORA_PERF_DATA
  34 #ifdef THEORA_PERF_DATA
  35 #include <windows.h>
  36 #endif
  37
  38 #ifdef HAVE_CONFIG_H
  39 # include <config.h>
  40
  41 #endif
  42
  43 #ifndef _REENTRANT
  44 # define _REENTRANT
  45 #endif
  46
  47 #include <stdio.h>
  48 #ifndef _WIN32
  49 #include <getopt.h>
  50 #include <unistd.h>
  51 #else
  52 #include "getopt.h"
  53 #endif
  54 #ifndef _WIN32
  55 #endif
  56 #include <stdlib.h>
  57 #include <string.h>
  58 #include <time.h>
  59 #include <math.h>
  60 #include "theora/theora.h"
  61 #include "vorbis/codec.h"
  62 #include "vorbis/vorbisenc.h"
  63
  64 #ifdef _WIN32
  65 /* supply missing headers and functions to Win32 */
  66
  67 #include <fcntl.h>
  68
  69 static double rint(double x)
  70 {
  71   if (x < 0.0)
  72     return (double)(int)(x - 0.5);
  73   else
  74     return (double)(int)(x + 0.5);
  75 }
  76 #endif
  77
  78 const char *optstring = "b:e:o:a:A:v:V:s:S:f:F:n:m:k:";
  79 struct option options [] = {
  80   {"begin-time",required_argument,NULL,'b'},
  81   {"end-time",required_argument,NULL,'e'},
  82   {"output",required_argument,NULL,'o'},
  83   {"audio-rate-target",required_argument,NULL,'A'},
  84   {"video-rate-target",required_argument,NULL,'V'},
  85   {"audio-quality",required_argument,NULL,'a'},
  86   {"video-quality",required_argument,NULL,'v'},
  87   {"aspect-numerator",optional_argument,NULL,'s'},
  88   {"aspect-denominator",optional_argument,NULL,'S'},
  89   {"framerate-numerator",optional_argument,NULL,'f'},
  90   {"framerate-denominator",optional_argument,NULL,'F'},
  91   {"noise-sensitivity",required_argument,NULL,'n'},
  92   {"sharpness",required_argument,NULL,'m'},
  93   {"keyframe-freq",required_argument,NULL,'k'},
  94   {NULL,0,NULL,0}
  95 };
  96
  97 /* You'll go to Hell for using globals. */
  98
  99 FILE *audio=NULL;
 100 FILE *video=NULL;
 101
 102 int audio_ch=0;
 103 int audio_hz=0;
 104
 105 float audio_q=.1;
 106 int audio_r=-1;
 107
 108 int video_x=0;
 109 int video_y=0;
 110 int frame_x=0;
 111 int frame_y=0;
 112 int frame_x_offset=0;
 113 int frame_y_offset=0;
 114 int video_hzn=-1;
 115 int video_hzd=-1;
 116 int video_an=-1;
 117 int video_ad=-1;
 118
 119 int video_r=-1;
 120 int video_q=16;
 121 int noise_sensitivity=1;
 122 int sharpness=0;
 123 int keyframe_frequency=64;
 124
 125 long begin_sec=-1;
 126 long begin_usec=0;
 127 long end_sec=-1;
 128 long end_usec=0;
 129
 130 static void usage(void){
 131   fprintf(stderr,
 132           "Usage: encoder_example [options] [audio_file] video_file\n\n"
 133           "Options: \n\n"
 134           "  -o --output <filename.ogg>     file name for encoded output;\n"
 135           "                                 If this option is not given, the\n"
 136           "                                 compressed data is sent to stdout.\n\n"
 137           "  -A --audio-rate-target <n>     bitrate target for Vorbis audio;\n"
 138           "                                 use -a and not -A if at all possible,\n"
 139           "                                 as -a gives higher quality for a given\n"
 140           "                                 bitrate.\n\n"
 141           "  -V --video-rate-target <n>     bitrate target for Theora video\n\n"
 142           "  -a --audio-quality <n>         Vorbis quality selector from -1 to 10\n"
 143           "                                 (-1 yields smallest files but lowest\n"
 144           "                                 fidelity; 10 yields highest fidelity\n"
 145           "                                 but large files. '2' is a reasonable\n"
 146           "                                 default).\n\n"
 147           "   -v --video-quality <n>        Theora quality selector fro 0 to 10\n"
 148           "                                 (0 yields smallest files but lowest\n"
 149           "                                 video quality. 10 yields highest\n"
 150           "                                 fidelity but large files).\n\n"
 151           "   -s --aspect-numerator <n>     Aspect ratio numerator, default is 0\n"
 152           "                                 or extracted from YUV input file\n"
 153           "   -S --aspect-denominator <n>   Aspect ratio denominator, default is 0\n"
 154           "                                 or extracted from YUV input file\n"
 155           "   -f --framerate-numerator <n>  Frame rate numerator, can be extracted\n"
 156           "                                 from YUV input file. ex: 30000000\n"
 157           "   -F --framerate-denominator <n>Frame rate denominator, can be extracted\n"
 158           "                                 from YUV input file. ex: 1000000\n"
 159           "                                 The frame rate nominator divided by this\n"
 160           "                                 determinates the frame rate in units per tick\n"
 161           "   -n --noise-sensitivity <n>    Theora noise sensitivity selector from 0\n"
 162           "                                 to 6 (0 yields best quality but larger\n"
 163           "                                 files, defaults to 1)\n"
 164           "   -m --sharpness <n>            Theora sharpness selector from 0 to 2\n"
 165           "                                 (0 yields crispest video at the cost of\n"
 166           "                                 larger files, selecting 2 can greatly\n"
 167           "                                 reduce file size but resulting video\n"
 168           "                                 is blurrier, defaults to 0)\n"
 169           "   -k --keyframe-freq <n>        Keyframe frequency from 8 to 1000\n"
 170           "   -b --begin-time <h:m:s.f>     Begin encoding at offset into input\n"
 171           "   -e --end-time <h:m:s.f>       End encoding at offset into input\n"
 172           "encoder_example accepts only uncompressed RIFF WAV format audio and\n"
 173           "YUV4MPEG2 uncompressed video.\n\n");
 174   exit(1);
 175 }
 176
 177 static void id_file(char *f){
 178   FILE *test;
 179   unsigned char buffer[80];
 180   int ret;
 181   int tmp_video_hzn = -1,
 182       tmp_video_hzd = -1,
 183       tmp_video_an = -1,
 184       tmp_video_ad = -1;
 185   int extra_hdr_bytes;
 186
 187   /* open it, look for magic */
 188
 189   if(!strcmp(f,"-")){
 190     /* stdin */
 191     test=stdin;
 192   }else{
 193     test=fopen(f,"rb");
 194     if(!test){
 195       fprintf(stderr,"Unable to open file %s.\n",f);
 196       exit(1);
 197     }
 198   }
 199
 200   ret=fread(buffer,1,4,test);
 201   if(ret<4){
 202     fprintf(stderr,"EOF determining file type of file %s.\n",f);
 203     exit(1);
 204   }
 205
 206   if(!memcmp(buffer,"RIFF",4)){
 207     /* possible WAV file */
 208
 209     if(audio){
 210       /* umm, we already have one */
 211       fprintf(stderr,"Multiple RIFF WAVE files specified on command line.\n");
 212       exit(1);
 213     }
 214
 215     /* Parse the rest of the header */
 216
 217     ret=fread(buffer,1,4,test);
 218     ret=fread(buffer,1,4,test);
 219     if(ret<4)goto riff_err;
 220     if(!memcmp(buffer,"WAVE",4)){
 221
 222       while(!feof(test)){
 223         ret=fread(buffer,1,4,test);
 224         if(ret<4)goto riff_err;
 225         if(!memcmp("fmt",buffer,3)){
 226
 227           /* OK, this is our audio specs chunk.  Slurp it up. */
 228
 229           ret=fread(buffer,1,20,test);
 230           if(ret<20)goto riff_err;
 231
 232           extra_hdr_bytes = (buffer[0]  + (buffer[1] << 8) +
 233                             (buffer[2] << 16) + (buffer[3] << 24)) - 16;
 234
 235           if(memcmp(buffer+4,"\001\000",2)){
 236             fprintf(stderr,"The WAV file %s is in a compressed format; "
 237                     "can't read it.\n",f);
 238             exit(1);
 239           }
 240
 241           audio=test;
 242           audio_ch=buffer[6]+(buffer[7]<<8);
 243           audio_hz=buffer[8]+(buffer[9]<<8)+
 244             (buffer[10]<<16)+(buffer[11]<<24);
 245
 246           if(buffer[18]+(buffer[19]<<8)!=16){
 247             fprintf(stderr,"Can only read 16 bit WAV files for now.\n");
 248             exit(1);
 249           }
 250
 251           /* read past extra header bytes */
 252           while(extra_hdr_bytes){
 253             int read_size = (extra_hdr_bytes > sizeof(buffer)) ?
 254              sizeof(buffer) : extra_hdr_bytes;
 255             ret = fread(buffer, 1, read_size, test);
 256
 257             if (ret < read_size)
 258               goto riff_err;
 259             else
 260               extra_hdr_bytes -= read_size;
 261           }
 262
 263           /* Now, align things to the beginning of the data */
 264           /* Look for 'dataxxxx' */
 265           while(!feof(test)){
 266             ret=fread(buffer,1,4,test);
 267             if(ret<4)goto riff_err;
 268             if(!memcmp("data",buffer,4)){
 269               /* We're there.  Ignore the declared size for now. */
 270               ret=fread(buffer,1,4,test);
 271               if(ret<4)goto riff_err;
 272
 273               fprintf(stderr,"File %s is 16 bit %d channel %d Hz RIFF WAV audio.\n",
 274                       f,audio_ch,audio_hz);
 275
 276               return;
 277             }
 278           }
 279         }
 280       }
 281     }
 282
 283     fprintf(stderr,"Couldn't find WAVE data in RIFF file %s.\n",f);
 284     exit(1);
 285
 286   }
 287   if(!memcmp(buffer,"YUV4",4)){
 288     /* possible YUV2MPEG2 format file */
 289     /* read until newline, or 80 cols, whichever happens first */
 290     /* NB the mjpegtools spec doesn't define a length limit */
 291     int i,j;
 292     for(i=0;i<79;i++){
 293       ret=fread(buffer+i,1,1,test);
 294       if(ret<1)goto yuv_err;
 295       if(buffer[i]=='\n')break;
 296     }
 297     if(i==79){
 298       fprintf(stderr,"Error parsing %s header; not a YUV2MPEG2 file?\n",f);
 299     }
 300     buffer[i]='\0';
 301
 302     if(!memcmp(buffer,"MPEG",4)){
 303       char interlace = '?';
 304
 305       if(video){
 306         /* umm, we already have one */
 307         fprintf(stderr,"Multiple video files specified on command line.\n");
 308         exit(1);
 309       }
 310
 311       if(buffer[4]!='2'){
 312         fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
 313       }
 314
 315       /* parse the frame header */
 316       j = 5;
 317       while (j < i) {
 318         if ((buffer[j] != ' ') && (buffer[j-1] == ' '))
 319           switch (buffer[j]) {
 320             case 'W': frame_x = atoi((char*)&buffer[j+1]); break;
 321             case 'H': frame_y = atoi((char*)&buffer[j+1]); break;
 322             case 'C': /* chroma subsampling */ break;
 323             case 'I': interlace = buffer[j+1]; break;
 324             case 'F': /* frame rate ratio */
 325               tmp_video_hzn = atoi((char*)&buffer[j+1]);
 326               while ((buffer[j] != ':') && (j < i)) j++;
 327               tmp_video_hzd = atoi((char*)&buffer[j+1]);
 328               break;
 329             case 'A': /* sample aspect ratio */
 330               tmp_video_an = atoi((char*)&buffer[j+1]);
 331               while ((buffer[j] != ':') && (j < i)) j++;
 332               tmp_video_ad = atoi((char*)&buffer[j+1]);
 333               break;
 334             case 'X': /* metadata */ break;
 335             default:
 336               fprintf(stderr, "unrecognized stream header tag '%c'\n", buffer[j]);
 337               break;
 338           }
 339         j++;
 340       }
 341       /* verify data from the stream header */
 342       if (frame_x <= 0) {
 343         fprintf(stderr,"Error parsing YUV4MPEG2 header:"
 344                 " missing width tag in file %s.\n", f);
 345         exit(1);
 346       }
 347       if (frame_y <= 0) {
 348         fprintf(stderr,"Error parsing YUV4MPEG2 header:"
 349                 " missing height tag in file %s.\n", f);
 350         exit(1);
 351       }
 352       if (tmp_video_hzn < 0 || tmp_video_hzd < 0) {
 353         /* default to 30 fps */
 354         tmp_video_hzn = 30; tmp_video_hzd = 1;
 355         fprintf(stderr,"Warning: no framerate defined in file %s.\n", f);
 356       }
 357       if (tmp_video_an < 0 || tmp_video_ad < 0) {
 358         /* default to unknown */
 359         tmp_video_an = 0; tmp_video_ad = 0;
 360       }
 361
 362       /* update fps and aspect ratio globals if not specified in the command line */
 363       if (video_hzn==-1) video_hzn = tmp_video_hzn;
 364       if (video_hzd==-1) video_hzd = tmp_video_hzd;
 365       if (video_an==-1) video_an = tmp_video_an;
 366       if (video_ad==-1) video_ad = tmp_video_ad;
 367
 368       if(interlace=='?'){
 369         fprintf(stderr,"Warning: input video isn't marked for interlacing;"
 370           " treating this\nas progressive scan video."
 371           " Deinterlace first if you get poor results.\n");
 372       }else if(interlace!='p'){
 373         fprintf(stderr,"Input video is interlaced; Theora handles only progressive scan\n");
 374         exit(1);
 375       }
 376
 377       video=test;
 378
 379       fprintf(stderr,"File %s is %dx%d %.02f fps YUV12 video.\n",
 380               f,frame_x,frame_y,(double)video_hzn/video_hzd);
 381
 382       return;
 383     }
 384   }
 385   fprintf(stderr,"Input file %s is neither a WAV nor YUV4MPEG2 file.\n",f);
 386   exit(1);
 387
 388  riff_err:
 389   fprintf(stderr,"EOF parsing RIFF file %s.\n",f);
 390   exit(1);
 391  yuv_err:
 392   fprintf(stderr,"EOF parsing YUV4MPEG2 file %s.\n",f);
 393   exit(1);
 394
 395 }
 396
 397 int spinner=0;
 398 char *spinascii="|/-\\";
 399 void spinnit(void){
 400   spinner++;
 401   if(spinner==4)spinner=0;
 402   fprintf(stderr,"\r%c",spinascii[spinner]);
 403 }
 404
 405 int fetch_and_process_audio(FILE *audio,ogg_page *audiopage,
 406                             ogg_stream_state *vo,
 407                             vorbis_dsp_state *vd,
 408                             vorbis_block *vb,
 409                             int audioflag){
 410   static ogg_int64_t samples_sofar=0;
 411   ogg_packet op;
 412   int i,j;
 413   ogg_int64_t beginsample = audio_hz*begin_sec + audio_hz*begin_usec/1000000;
 414   ogg_int64_t endsample = audio_hz*end_sec + audio_hz*end_usec/1000000;
 415
 416   while(audio && !audioflag){
 417     /* process any audio already buffered */
 418     spinnit();
 419     if(ogg_stream_pageout(vo,audiopage)>0) return 1;
 420     if(ogg_stream_eos(vo))return 0;
 421
 422     {
 423       /* read and process more audio */
 424       signed char readbuffer[4096];
 425       signed char *readptr=readbuffer;
 426       int toread=4096/2/audio_ch;
 427       int bytesread=fread(readbuffer,1,toread*2*audio_ch,audio);
 428       int sampread=bytesread/2/audio_ch;
 429       float **vorbis_buffer;
 430       int count=0;
 431
 432       if(bytesread<=0 ||
 433          (samples_sofar>=endsample && endsample>0)){
 434         /* end of file.  this can be done implicitly, but it's
 435            easier to see here in non-clever fashion.  Tell the
 436            library we're at end of stream so that it can handle the
 437            last frame and mark end of stream in the output properly */
 438         vorbis_analysis_wrote(vd,0);
 439       }else{
 440         if(samples_sofar < beginsample){
 441           if(samples_sofar+sampread > beginsample){
 442             readptr += (beginsample-samples_sofar)*2*audio_ch;
 443             sampread += samples_sofar-beginsample;
 444             samples_sofar = sampread+beginsample;
 445           }else{
 446             samples_sofar += sampread;
 447             sampread = 0;
 448           }
 449         }else{
 450           samples_sofar += sampread;
 451         }
 452
 453         if(samples_sofar > endsample && endsample > 0)
 454           sampread-= (samples_sofar - endsample);
 455
 456         if(sampread>0){
 457
 458           vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
 459           /* uninterleave samples */
 460           for(i=0;i<sampread;i++){
 461             for(j=0;j<audio_ch;j++){
 462               vorbis_buffer[j][i]=((readptr[count+1]<<8)|
 463                                    (0x00ff&(int)readptr[count]))/32768.f;
 464               count+=2;
 465             }
 466           }
 467
 468           vorbis_analysis_wrote(vd,sampread);
 469         }
 470       }
 471
 472       while(vorbis_analysis_blockout(vd,vb)==1){
 473
 474         /* analysis, assume we want to use bitrate management */
 475         vorbis_analysis(vb,NULL);
 476         vorbis_bitrate_addblock(vb);
 477
 478         /* weld packets into the bitstream */
 479         while(vorbis_bitrate_flushpacket(vd,&op))
 480           ogg_stream_packetin(vo,&op);
 481
 482       }
 483     }
 484   }
 485
 486   return audioflag;
 487 }
 488
 489 int fetch_and_process_video(FILE *video,ogg_page *videopage,
 490                             ogg_stream_state *to,
 491                             theora_state *td,
 492                             int videoflag){
 493   /* You'll go to Hell for using static variables */
 494   static ogg_int64_t frames=0;
 495   static int          state=-1;
 496   static unsigned char *yuvframe[2];
 497   unsigned char        *line;
 498   yuv_buffer          yuv;
 499   ogg_packet          op;
 500   int e;
 501   ogg_int64_t beginframe = (video_hzn*begin_sec + video_hzn*begin_usec/1000000)/video_hzd;
 502   ogg_int64_t endframe = (video_hzn*end_sec + video_hzn*end_usec/1000000)/video_hzd;
 503
 504   if(state==-1){
 505         /* initialize the double frame buffer */
 506     yuvframe[0]=malloc(video_x*video_y*3/2);
 507     yuvframe[1]=malloc(video_x*video_y*3/2);
 508
 509         /* clear initial frame as it may be larger than actual video data */
 510         /* fill Y plane with 0x10 and UV planes with 0X80, for black data */
 511     memset(yuvframe[0],0x10,video_x*video_y);
 512     memset(yuvframe[0]+video_x*video_y,0x80,video_x*video_y/2);
 513     memset(yuvframe[1],0x10,video_x*video_y);
 514     memset(yuvframe[1]+video_x*video_y,0x80,video_x*video_y/2);
 515
 516     state=0;
 517   }
 518
 519   /* is there a video page flushed?  If not, work until there is. */
 520   while(!videoflag){
 521     spinnit();
 522
 523     if(ogg_stream_pageout(to,videopage)>0) return 1;
 524     if(ogg_stream_eos(to)) return 0;
 525
 526     {
 527       /* read and process more video */
 528       /* video strategy reads one frame ahead so we know when we're
 529          at end of stream and can mark last video frame as such
 530          (vorbis audio has to flush one frame past last video frame
 531          due to overlap and thus doesn't need this extra work */
 532
 533       /* have two frame buffers full (if possible) before
 534          proceeding.  after first pass and until eos, one will
 535          always be full when we get here */
 536
 537       for(;state<2 && (frames<endframe || endframe<0);){
 538         char c,frame[6];
 539         int ret=fread(frame,1,6,video);
 540
 541         /* match and skip the frame header */
 542         if(ret<6)break;
 543         if(memcmp(frame,"FRAME",5)){
 544           fprintf(stderr,"Loss of framing in YUV input data\n");
 545           exit(1);
 546         }
 547         if(frame[5]!='\n'){
 548           int j;
 549           for(j=0;j<79;j++)
 550             if(fread(&c,1,1,video)&&c=='\n')break;
 551           if(j==79){
 552             fprintf(stderr,"Error parsing YUV frame header\n");
 553             exit(1);
 554           }
 555         }
 556
 557         /* read the Y plane into our frame buffer with centering */
 558         line=yuvframe[state]+video_x*frame_y_offset+frame_x_offset;
 559         for(e=0;e<frame_y;e++){
 560           ret=fread(line,1,frame_x,video);
 561             if(ret!=frame_x) break;
 562           line+=video_x;
 563         }
 564         /* now get U plane*/
 565         line=yuvframe[state]+(video_x*video_y)
 566           +(video_x/2)*(frame_y_offset/2)+frame_x_offset/2;
 567         for(e=0;e<frame_y/2;e++){
 568           ret=fread(line,1,frame_x/2,video);
 569             if(ret!=frame_x/2) break;
 570           line+=video_x/2;
 571         }
 572         /* and the V plane*/
 573         line=yuvframe[state]+(video_x*video_y*5/4)
 574                   +(video_x/2)*(frame_y_offset/2)+frame_x_offset/2;
 575         for(e=0;e<frame_y/2;e++){
 576           ret=fread(line,1,frame_x/2,video);
 577             if(ret!=frame_x/2) break;
 578           line+=video_x/2;
 579         }
 580
 581         frames++;
 582         if(frames>=beginframe)
 583           state++;
 584
 585       }
 586
 587       if(state<1){
 588         /* can't get here unless YUV4MPEG stream has no video */
 589         fprintf(stderr,"Video input contains no frames.\n");
 590         exit(1);
 591       }
 592
 593       /* Theora is a one-frame-in,one-frame-out system; submit a frame
 594          for compression and pull out the packet */
 595
 596       {
 597         yuv.y_width=video_x;
 598         yuv.y_height=video_y;
 599         yuv.y_stride=video_x;
 600
 601         yuv.uv_width=video_x/2;
 602         yuv.uv_height=video_y/2;
 603         yuv.uv_stride=video_x/2;
 604
 605         yuv.y= yuvframe[0];
 606         yuv.u= yuvframe[0]+ video_x*video_y;
 607         yuv.v= yuvframe[0]+ video_x*video_y*5/4 ;
 608       }
 609
 610       theora_encode_YUVin(td,&yuv);
 611
 612       /* if there's only one frame, it's the last in the stream */
 613       if(state<2)
 614         theora_encode_packetout(td,1,&op);
 615       else
 616         theora_encode_packetout(td,0,&op);
 617
 618       ogg_stream_packetin(to,&op);
 619
 620       {
 621         unsigned char *temp=yuvframe[0];
 622         yuvframe[0]=yuvframe[1];
 623         yuvframe[1]=temp;
 624         state--;
 625       }
 626
 627     }
 628   }
 629   return videoflag;
 630 }
 631
 632 int main(int argc,char *const *argv){
 633   int c,long_option_index,ret;
 634
 635   ogg_stream_state to; /* take physical pages, weld into a logical
 636                            stream of packets */
 637   ogg_stream_state vo; /* take physical pages, weld into a logical
 638                            stream of packets */
 639   ogg_page         og; /* one Ogg bitstream page.  Vorbis packets are inside */
 640   ogg_packet       op; /* one raw packet of data for decode */
 641
 642   theora_state     td;
 643   theora_info      ti;
 644   theora_comment   tc;
 645
 646   vorbis_info      vi; /* struct that stores all the static vorbis bitstream
 647                           settings */
 648   vorbis_comment   vc; /* struct that stores all the user comments */
 649
 650   vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */
 651   vorbis_block     vb; /* local working space for packet->PCM decode */
 652
 653   int audioflag=0;
 654   int videoflag=0;
 655   int akbps=0;
 656   int vkbps=0;
 657
 658   ogg_int64_t audio_bytesout=0;
 659   ogg_int64_t video_bytesout=0;
 660   double timebase;
 661
 662
 663   FILE* outfile = stdout;
 664
 665 #ifdef _WIN32
 666 # ifdef THEORA_PERF_DATA
 667     LARGE_INTEGER start_time;
 668     LARGE_INTEGER final_time;
 669
 670     LONGLONG elapsed_ticks;
 671     LARGE_INTEGER ticks_per_second;
 672
 673     LONGLONG elapsed_secs;
 674     LONGLONG elapsed_sec_mod;
 675     double elapsed_secs_dbl ;
 676 # endif
 677   /* We need to set stdin/stdout to binary mode. Damn windows. */
 678   /* if we were reading/writing a file, it would also need to in
 679      binary mode, eg, fopen("file.wav","wb"); */
 680   /* Beware the evil ifdef. We avoid these where we can, but this one we
 681      cannot. Don't add any more, you'll probably go to hell if you do. */
 682   _setmode( _fileno( stdin ), _O_BINARY );
 683   _setmode( _fileno( stdout ), _O_BINARY );
 684
 685
 686 #endif
 687
 688   while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
 689     switch(c){
 690     case 'o':
 691       outfile=fopen(optarg,"wb");
 692       if(outfile==NULL){
 693         fprintf(stderr,"Unable to open output file '%s'\n", optarg);
 694         exit(1);
 695       }
 696       break;;
 697
 698     case 'a':
 699       audio_q=atof(optarg)*.099;
 700       if(audio_q<-.1 || audio_q>1){
 701         fprintf(stderr,"Illegal audio quality (choose -1 through 10)\n");
 702         exit(1);
 703       }
 704       audio_r=-1;
 705       break;
 706
 707     case 'v':
 708       video_q=rint(atof(optarg)*6.3);
 709       if(video_q<0 || video_q>63){
 710         fprintf(stderr,"Illegal video quality (choose 0 through 10)\n");
 711         exit(1);
 712       }
 713       video_r=0;
 714       break;
 715
 716     case 'A':
 717       audio_r=atof(optarg)*1000;
 718       if(audio_q<0){
 719         fprintf(stderr,"Illegal audio quality (choose > 0 please)\n");
 720         exit(1);
 721       }
 722       audio_q=-99;
 723       break;
 724
 725     case 'V':
 726       video_r=rint(atof(optarg)*1000);
 727       if(video_r<45000 || video_r>2000000){
 728         fprintf(stderr,"Illegal video bitrate (choose 45kbps through 2000kbps)\n");
 729         exit(1);
 730       }
 731       video_q=0;
 732      break;
 733
 734     case 's':
 735       video_an=rint(atof(optarg));
 736       break;
 737
 738     case 'S':
 739       video_ad=rint(atof(optarg));
 740       break;
 741
 742     case 'f':
 743       video_hzn=rint(atof(optarg));
 744       break;
 745
 746     case 'F':
 747       video_hzd=rint(atof(optarg));
 748       break;
 749
 750     case 'n':
 751       noise_sensitivity=rint(atof(optarg));
 752       if(noise_sensitivity<0 || noise_sensitivity>6){
 753         fprintf(stderr,"Illegal noise sensitivity (choose 0 through 6)\n");
 754         exit(1);
 755       }
 756       break;
 757
 758     case 'm':
 759       sharpness=rint(atof(optarg));
 760       if(sharpness<0 || sharpness>2){
 761         fprintf(stderr,"Illegal sharpness (choose 0 through 2)\n");
 762         exit(1);
 763       }
 764       break;
 765
 766     case 'k':
 767       keyframe_frequency=rint(atof(optarg));
 768       if(keyframe_frequency<8 || keyframe_frequency>1000){
 769         fprintf(stderr,"Illegal keyframe frequency (choose 8 through 1000)\n");
 770         exit(1);
 771       }
 772       break;
 773
 774     case 'b':
 775       {
 776         char *pos=strchr(optarg,':');
 777         begin_sec=atol(optarg);
 778         if(pos){
 779           char *pos2=strchr(++pos,':');
 780           begin_sec*=60;
 781           begin_sec+=atol(pos);
 782           if(pos2){
 783             pos2++;
 784             begin_sec*=60;
 785             begin_sec+=atol(pos2);
 786           }else{
 787             pos2=pos;
 788           }
 789           pos2=strchr(pos2,'.');
 790           if(pos2){
 791             pos2++;
 792             begin_usec=atol(pos2);
 793           }
 794         }
 795       }
 796       break;
 797     case 'e':
 798       {
 799         char *pos=strchr(optarg,':');
 800         end_sec=atol(optarg);
 801         if(pos){
 802           char *pos2=strchr(++pos,':');
 803           end_sec*=60;
 804           end_sec+=atol(pos);
 805           if(pos2){
 806             pos2++;
 807             end_sec*=60;
 808             end_sec+=atol(pos2);
 809           }else{
 810             pos2=pos;
 811           }
 812           pos2=strchr(pos2,'.');
 813           if(pos2){
 814             pos2++;
 815             end_usec=atol(pos2);
 816           }
 817         }
 818       }
 819       break;
 820     default:
 821       usage();
 822     }
 823   }
 824
 825   while(optind<argc){
 826     /* assume that anything following the options must be a filename */
 827     id_file(argv[optind]);
 828     optind++;
 829   }
 830
 831
 832
 833 #ifdef THEORA_PERF_DATA
 834 # ifdef WIN32
 835     QueryPerformanceCounter(&start_time);
 836 # endif
 837 #endif
 838
 839
 840   /* yayness.  Set up Ogg output stream */
 841   srand(time(NULL));
 842   {
 843     /* need two inequal serial numbers */
 844     int serial1, serial2;
 845     serial1 = rand();
 846     serial2 = rand();
 847     if (serial1 == serial2) serial2++;
 848     ogg_stream_init(&to,serial1);
 849     ogg_stream_init(&vo,serial2);
 850   }
 851
 852   /* Set up Theora encoder */
 853   if(!video){
 854     fprintf(stderr,"No video files submitted for compression?\n");
 855     exit(1);
 856   }
 857   /* Theora has a divisible-by-sixteen restriction for the encoded video size */
 858   /* scale the frame size up to the nearest /16 and calculate offsets */
 859   video_x=((frame_x + 15) >>4)<<4;
 860   video_y=((frame_y + 15) >>4)<<4;
 861   /* We force the offset to be even.
 862      This ensures that the chroma samples align properly with the luma
 863       samples. */
 864   frame_x_offset=((video_x-frame_x)/2)&~1;
 865   frame_y_offset=((video_y-frame_y)/2)&~1;
 866
 867   theora_info_init(&ti);
 868   ti.width=video_x;
 869   ti.height=video_y;
 870   ti.frame_width=frame_x;
 871   ti.frame_height=frame_y;
 872   ti.offset_x=frame_x_offset;
 873   ti.offset_y=frame_y_offset;
 874   ti.fps_numerator=video_hzn;
 875   ti.fps_denominator=video_hzd;
 876   ti.aspect_numerator=video_an;
 877   ti.aspect_denominator=video_ad;
 878   ti.colorspace=OC_CS_UNSPECIFIED;
 879   ti.pixelformat=OC_PF_420;
 880   ti.target_bitrate=video_r;
 881   ti.quality=video_q;
 882
 883   ti.dropframes_p=0;
 884   ti.quick_p=1;
 885   ti.keyframe_auto_p=1;
 886   ti.keyframe_frequency=keyframe_frequency;
 887   ti.keyframe_frequency_force=keyframe_frequency;
 888   ti.keyframe_data_target_bitrate=video_r*1.5;
 889   ti.keyframe_auto_threshold=80;
 890   ti.keyframe_mindistance=8;
 891   ti.noise_sensitivity=noise_sensitivity;
 892   ti.sharpness=sharpness;
 893
 894   theora_encode_init(&td,&ti);
 895   theora_info_clear(&ti);
 896
 897   /* initialize Vorbis too, assuming we have audio to compress. */
 898   if(audio){
 899     vorbis_info_init(&vi);
 900     if(audio_q>-99)
 901       ret = vorbis_encode_init_vbr(&vi,audio_ch,audio_hz,audio_q);
 902     else
 903       ret = vorbis_encode_init(&vi,audio_ch,audio_hz,-1,audio_r,-1);
 904     if(ret){
 905       fprintf(stderr,"The Vorbis encoder could not set up a mode according to\n"
 906               "the requested quality or bitrate.\n\n");
 907       exit(1);
 908     }
 909
 910     vorbis_comment_init(&vc);
 911     vorbis_analysis_init(&vd,&vi);
 912     vorbis_block_init(&vd,&vb);
 913   }
 914
 915   /* write the bitstream header packets with proper page interleave */
 916
 917   /* first packet will get its own page automatically */
 918   theora_encode_header(&td,&op);
 919   ogg_stream_packetin(&to,&op);
 920   if(ogg_stream_pageout(&to,&og)!=1){
 921     fprintf(stderr,"Internal Ogg library error.\n");
 922     exit(1);
 923   }
 924   fwrite(og.header,1,og.header_len,outfile);
 925   fwrite(og.body,1,og.body_len,outfile);
 926
 927   /* create the remaining theora headers */
 928   theora_comment_init(&tc);
 929   theora_encode_comment(&tc,&op);
 930   ogg_stream_packetin(&to,&op);
 931   /*theora_encode_comment() doesn't take a theora_state parameter, so it has to
 932      allocate its own buffer to pass back the packet data.
 933     If we don't free it here, we'll leak.
 934     libogg2 makes this much cleaner: the stream owns the buffer after you call
 935      packetin in libogg2, but this is not true in libogg1.*/
 936   free(op.packet);
 937   theora_encode_tables(&td,&op);
 938   ogg_stream_packetin(&to,&op);
 939
 940   if(audio){
 941     ogg_packet header;
 942     ogg_packet header_comm;
 943     ogg_packet header_code;
 944
 945     vorbis_analysis_headerout(&vd,&vc,&header,&header_comm,&header_code);
 946     ogg_stream_packetin(&vo,&header); /* automatically placed in its own
 947                                          page */
 948     if(ogg_stream_pageout(&vo,&og)!=1){
 949       fprintf(stderr,"Internal Ogg library error.\n");
 950       exit(1);
 951     }
 952     fwrite(og.header,1,og.header_len,outfile);
 953     fwrite(og.body,1,og.body_len,outfile);
 954
 955     /* remaining vorbis header packets */
 956     ogg_stream_packetin(&vo,&header_comm);
 957     ogg_stream_packetin(&vo,&header_code);
 958   }
 959
 960   /* Flush the rest of our headers. This ensures
 961      the actual data in each stream will start
 962      on a new page, as per spec. */
 963   while(1){
 964     int result = ogg_stream_flush(&to,&og);
 965       if(result<0){
 966         /* can't get here */
 967         fprintf(stderr,"Internal Ogg library error.\n");
 968         exit(1);
 969       }
 970     if(result==0)break;
 971     fwrite(og.header,1,og.header_len,outfile);
 972     fwrite(og.body,1,og.body_len,outfile);
 973   }
 974   if(audio){
 975     while(1){
 976       int result=ogg_stream_flush(&vo,&og);
 977       if(result<0){
 978         /* can't get here */
 979         fprintf(stderr,"Internal Ogg library error.\n");
 980         exit(1);
 981       }
 982       if(result==0)break;
 983       fwrite(og.header,1,og.header_len,outfile);
 984       fwrite(og.body,1,og.body_len,outfile);
 985     }
 986   }
 987
 988   /* setup complete.  Raw processing loop */
 989   fprintf(stderr,"Compressing....\n");
 990   while(1){
 991     ogg_page audiopage;
 992     ogg_page videopage;
 993
 994     /* is there an audio page flushed?  If not, fetch one if possible */
 995     audioflag=fetch_and_process_audio(audio,&audiopage,&vo,&vd,&vb,audioflag);
 996
 997     /* is there a video page flushed?  If not, fetch one if possible */
 998     videoflag=fetch_and_process_video(video,&videopage,&to,&td,videoflag);
 999
1000     /* no pages of either?  Must be end of stream. */
1001     if(!audioflag && !videoflag)break;
1002
1003     /* which is earlier; the end of the audio page or the end of the
1004        video page? Flush the earlier to stream */
1005     {
1006       int audio_or_video=-1;
1007       double audiotime=
1008         audioflag?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
1009       double videotime=
1010         videoflag?theora_granule_time(&td,ogg_page_granulepos(&videopage)):-1;
1011
1012       if(!audioflag){
1013         audio_or_video=1;
1014       } else if(!videoflag) {
1015         audio_or_video=0;
1016       } else {
1017         if(audiotime<videotime)
1018           audio_or_video=0;
1019         else
1020           audio_or_video=1;
1021       }
1022
1023       if(audio_or_video==1){
1024         /* flush a video page */
1025         video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
1026         video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
1027         videoflag=0;
1028         timebase=videotime;
1029
1030       }else{
1031         /* flush an audio page */
1032         audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
1033         audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
1034         audioflag=0;
1035         timebase=audiotime;
1036       }
1037
1038       if(timebase!=-1.){
1039         int hundredths=timebase*100-(long)timebase*100;
1040         int seconds=(long)timebase%60;
1041         int minutes=((long)timebase/60)%60;
1042         int hours=(long)timebase/3600;
1043
1044         if(audio_or_video)
1045           vkbps=rint(video_bytesout*8./timebase*.001);
1046         else
1047           akbps=rint(audio_bytesout*8./timebase*.001);
1048
1049         fprintf(stderr,
1050                 "\r      %d:%02d:%02d.%02d audio: %dkbps video: %dkbps                 ",
1051                 hours,minutes,seconds,hundredths,akbps,vkbps);
1052       }
1053     }
1054
1055   }
1056
1057   /* clear out state */
1058
1059   if(audio){
1060     ogg_stream_clear(&vo);
1061     vorbis_block_clear(&vb);
1062     vorbis_dsp_clear(&vd);
1063     vorbis_comment_clear(&vc);
1064     vorbis_info_clear(&vi);
1065   }
1066   if(video){
1067     ogg_stream_clear(&to);
1068     theora_clear(&td);
1069   }
1070
1071   if(outfile && outfile!=stdout)fclose(outfile);
1072
1073   fprintf(stderr,"\r   \ndone.\n\n");
1074
1075 #ifdef THEORA_PERF_DATA
1076 # ifdef WIN32
1077     QueryPerformanceCounter(&final_time);
1078     elapsed_ticks = final_time.QuadPart - start_time.QuadPart;
1079     ticks_per_second;
1080     QueryPerformanceFrequency(&ticks_per_second);
1081     elapsed_secs = elapsed_ticks / ticks_per_second.QuadPart;
1082     elapsed_sec_mod = elapsed_ticks % ticks_per_second.QuadPart;
1083     elapsed_secs_dbl = elapsed_secs;
1084     elapsed_secs_dbl += ((double)elapsed_sec_mod / (double)ticks_per_second.QuadPart);
1085     printf("Encode time = %lld ticks\n", elapsed_ticks);
1086     printf("~%lld and %lld / %lld seconds\n", elapsed_secs, elapsed_sec_mod, ticks_per_second.QuadPart);
1087     printf("~%Lf seconds\n", elapsed_secs_dbl);
1088 # endif
1089
1090 #endif
1091
1092   return(0);
1093
1094 }