y4minput.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  *
  10  *  Based on code from the OggTheora software codec source code,
  11  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
  12  */
  13 #include <errno.h>
  14 #include <stdlib.h>
  15 #include <string.h>
  16
  17 #include "vpx/vpx_integer.h"
  18 #include "y4minput.h"
  19
  20 // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
  21 // Returns true on success.
  22 static int file_read(void *buf, size_t size, FILE *file) {
  23   const int kMaxRetries = 5;
  24   int retry_count = 0;
  25   int file_error;
  26   size_t len = 0;
  27   do {
  28     const size_t n = fread((uint8_t*)buf + len, 1, size - len, file);
  29     len += n;
  30     file_error = ferror(file);
  31     if (file_error) {
  32       if (errno == EINTR || errno == EAGAIN) {
  33         clearerr(file);
  34         continue;
  35       } else {
  36         fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n",
  37                 (uint32_t)len, (uint32_t)size, errno, strerror(errno));
  38         return 0;
  39       }
  40     }
  41   } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
  42
  43   if (!feof(file) && len != size) {
  44     fprintf(stderr, "Error reading file: %u of %u bytes read,"
  45                     " error: %d, retries: %d, %d: %s\n",
  46             (uint32_t)len, (uint32_t)size, file_error, retry_count,
  47             errno, strerror(errno));
  48   }
  49   return len == size;
  50 }
  51
  52 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
  53   int   got_w;
  54   int   got_h;
  55   int   got_fps;
  56   int   got_interlace;
  57   int   got_par;
  58   int   got_chroma;
  59   char *p;
  60   char *q;
  61   got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
  62   for (p = _tags;; p = q) {
  63     /*Skip any leading spaces.*/
  64     while (*p == ' ')p++;
  65     /*If that's all we have, stop.*/
  66     if (p[0] == '\0')break;
  67     /*Find the end of this tag.*/
  68     for (q = p + 1; *q != '\0' && *q != ' '; q++);
  69     /*Process the tag.*/
  70     switch (p[0]) {
  71       case 'W': {
  72         if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
  73         got_w = 1;
  74       }
  75       break;
  76       case 'H': {
  77         if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
  78         got_h = 1;
  79       }
  80       break;
  81       case 'F': {
  82         if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
  83           return -1;
  84         }
  85         got_fps = 1;
  86       }
  87       break;
  88       case 'I': {
  89         _y4m->interlace = p[1];
  90         got_interlace = 1;
  91       }
  92       break;
  93       case 'A': {
  94         if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
  95           return -1;
  96         }
  97         got_par = 1;
  98       }
  99       break;
 100       case 'C': {
 101         if (q - p > 16)return -1;
 102         memcpy(_y4m->chroma_type, p + 1, q - p - 1);
 103         _y4m->chroma_type[q - p - 1] = '\0';
 104         got_chroma = 1;
 105       }
 106       break;
 107       /*Ignore unknown tags.*/
 108     }
 109   }
 110   if (!got_w || !got_h || !got_fps)return -1;
 111   if (!got_interlace)_y4m->interlace = '?';
 112   if (!got_par)_y4m->par_n = _y4m->par_d = 0;
 113   /*Chroma-type is not specified in older files, e.g., those generated by
 114      mplayer.*/
 115   if (!got_chroma)strcpy(_y4m->chroma_type, "420");
 116   return 0;
 117 }
 118
 119
 120
 121 /*All anti-aliasing filters in the following conversion functions are based on
 122    one of two window functions:
 123   The 6-tap Lanczos window (for down-sampling and shifts):
 124    sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
 125    0,                         |t|>=3
 126   The 4-tap Mitchell window (for up-sampling):
 127    7|t|^3-12|t|^2+16/3,             |t|<1
 128    -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
 129    0,                               |t|>=2
 130   The number of taps is intentionally kept small to reduce computational
 131    overhead and limit ringing.
 132
 133   The taps from these filters are scaled so that their sum is 1, and the result
 134    is scaled by 128 and rounded to integers to create a filter whose
 135    intermediate values fit inside 16 bits.
 136   Coefficients are rounded in such a way as to ensure their sum is still 128,
 137    which is usually equivalent to normal rounding.
 138
 139   Conversions which require both horizontal and vertical filtering could
 140    have these steps pipelined, for less memory consumption and better cache
 141    performance, but we do them separately for simplicity.*/
 142
 143 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
 144 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
 145 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
 146
 147 /*420jpeg chroma samples are sited like:
 148   Y-------Y-------Y-------Y-------
 149   |       |       |       |
 150   |   BR  |       |   BR  |
 151   |       |       |       |
 152   Y-------Y-------Y-------Y-------
 153   |       |       |       |
 154   |       |       |       |
 155   |       |       |       |
 156   Y-------Y-------Y-------Y-------
 157   |       |       |       |
 158   |   BR  |       |   BR  |
 159   |       |       |       |
 160   Y-------Y-------Y-------Y-------
 161   |       |       |       |
 162   |       |       |       |
 163   |       |       |       |
 164
 165   420mpeg2 chroma samples are sited like:
 166   Y-------Y-------Y-------Y-------
 167   |       |       |       |
 168   BR      |       BR      |
 169   |       |       |       |
 170   Y-------Y-------Y-------Y-------
 171   |       |       |       |
 172   |       |       |       |
 173   |       |       |       |
 174   Y-------Y-------Y-------Y-------
 175   |       |       |       |
 176   BR      |       BR      |
 177   |       |       |       |
 178   Y-------Y-------Y-------Y-------
 179   |       |       |       |
 180   |       |       |       |
 181   |       |       |       |
 182
 183   We use a resampling filter to shift the site locations one quarter pixel (at
 184    the chroma plane's resolution) to the right.
 185   The 4:2:2 modes look exactly the same, except there are twice as many chroma
 186    lines, and they are vertically co-sited with the luma samples in both the
 187    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
 188 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
 189                                         const unsigned char *_src, int _c_w, int _c_h) {
 190   int y;
 191   int x;
 192   for (y = 0; y < _c_h; y++) {
 193     /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
 194        window.*/
 195     for (x = 0; x < OC_MINI(_c_w, 2); x++) {
 196       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
 197                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 198                                              _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
 199     }
 200     for (; x < _c_w - 3; x++) {
 201       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 202                                              114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
 203     }
 204     for (; x < _c_w; x++) {
 205       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 206                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 207                                              _src[_c_w - 1] + 64) >> 7, 255);
 208     }
 209     _dst += _c_w;
 210     _src += _c_w;
 211   }
 212 }
 213
 214 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
 215 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 216                                          unsigned char *_aux) {
 217   int c_w;
 218   int c_h;
 219   int c_sz;
 220   int pli;
 221   /*Skip past the luma data.*/
 222   _dst += _y4m->pic_w * _y4m->pic_h;
 223   /*Compute the size of each chroma plane.*/
 224   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 225   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 226   c_sz = c_w * c_h;
 227   for (pli = 1; pli < 3; pli++) {
 228     y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
 229     _dst += c_sz;
 230     _aux += c_sz;
 231   }
 232 }
 233
 234 /*This format is only used for interlaced content, but is included for
 235    completeness.
 236
 237   420jpeg chroma samples are sited like:
 238   Y-------Y-------Y-------Y-------
 239   |       |       |       |
 240   |   BR  |       |   BR  |
 241   |       |       |       |
 242   Y-------Y-------Y-------Y-------
 243   |       |       |       |
 244   |       |       |       |
 245   |       |       |       |
 246   Y-------Y-------Y-------Y-------
 247   |       |       |       |
 248   |   BR  |       |   BR  |
 249   |       |       |       |
 250   Y-------Y-------Y-------Y-------
 251   |       |       |       |
 252   |       |       |       |
 253   |       |       |       |
 254
 255   420paldv chroma samples are sited like:
 256   YR------Y-------YR------Y-------
 257   |       |       |       |
 258   |       |       |       |
 259   |       |       |       |
 260   YB------Y-------YB------Y-------
 261   |       |       |       |
 262   |       |       |       |
 263   |       |       |       |
 264   YR------Y-------YR------Y-------
 265   |       |       |       |
 266   |       |       |       |
 267   |       |       |       |
 268   YB------Y-------YB------Y-------
 269   |       |       |       |
 270   |       |       |       |
 271   |       |       |       |
 272
 273   We use a resampling filter to shift the site locations one quarter pixel (at
 274    the chroma plane's resolution) to the right.
 275   Then we use another filter to move the C_r location down one quarter pixel,
 276    and the C_b location up one quarter pixel.*/
 277 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 278                                          unsigned char *_aux) {
 279   unsigned char *tmp;
 280   int            c_w;
 281   int            c_h;
 282   int            c_sz;
 283   int            pli;
 284   int            y;
 285   int            x;
 286   /*Skip past the luma data.*/
 287   _dst += _y4m->pic_w * _y4m->pic_h;
 288   /*Compute the size of each chroma plane.*/
 289   c_w = (_y4m->pic_w + 1) / 2;
 290   c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 291   c_sz = c_w * c_h;
 292   tmp = _aux + 2 * c_sz;
 293   for (pli = 1; pli < 3; pli++) {
 294     /*First do the horizontal re-sampling.
 295       This is the same as the mpeg2 case, except that after the horizontal
 296        case, we need to apply a second vertical filter.*/
 297     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 298     _aux += c_sz;
 299     switch (pli) {
 300       case 1: {
 301         /*Slide C_b up a quarter-pel.
 302           This is the same filter used above, but in the other order.*/
 303         for (x = 0; x < c_w; x++) {
 304           for (y = 0; y < OC_MINI(c_h, 3); y++) {
 305             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
 306                                                          - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
 307                                                          + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 308                                                          + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
 309           }
 310           for (; y < c_h - 2; y++) {
 311             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 312                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 313                                                          - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
 314           }
 315           for (; y < c_h; y++) {
 316             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 317                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 318                                                          - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 319           }
 320           _dst++;
 321           tmp++;
 322         }
 323         _dst += c_sz - c_w;
 324         tmp -= c_w;
 325       }
 326       break;
 327       case 2: {
 328         /*Slide C_r down a quarter-pel.
 329           This is the same as the horizontal filter.*/
 330         for (x = 0; x < c_w; x++) {
 331           for (y = 0; y < OC_MINI(c_h, 2); y++) {
 332             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
 333                                                          - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
 334                                                          + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
 335                                                          + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
 336           }
 337           for (; y < c_h - 3; y++) {
 338             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 339                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
 340                                                          - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
 341           }
 342           for (; y < c_h; y++) {
 343             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 344                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 345                                                          - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 346           }
 347           _dst++;
 348           tmp++;
 349         }
 350       }
 351       break;
 352     }
 353     /*For actual interlaced material, this would have to be done separately on
 354        each field, and the shift amounts would be different.
 355       C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
 356        C_b up 1/8 in the bottom field.
 357       The corresponding filters would be:
 358        Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
 359        Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
 360   }
 361 }
 362
 363 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
 364   This is used as a helper by several converation routines.*/
 365 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
 366                                        const unsigned char *_src, int _c_w, int _c_h) {
 367   int y;
 368   int x;
 369   /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 370   for (x = 0; x < _c_w; x++) {
 371     for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
 372       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
 373                                           + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
 374                                           - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
 375                                           + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
 376     }
 377     for (; y < _c_h - 3; y += 2) {
 378       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
 379                                           - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
 380                                           + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
 381     }
 382     for (; y < _c_h; y += 2) {
 383       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
 384                                                + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
 385                                                                                   + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
 386                                           + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
 387     }
 388     _src++;
 389     _dst++;
 390   }
 391 }
 392
 393 /*420jpeg chroma samples are sited like:
 394   Y-------Y-------Y-------Y-------
 395   |       |       |       |
 396   |   BR  |       |   BR  |
 397   |       |       |       |
 398   Y-------Y-------Y-------Y-------
 399   |       |       |       |
 400   |       |       |       |
 401   |       |       |       |
 402   Y-------Y-------Y-------Y-------
 403   |       |       |       |
 404   |   BR  |       |   BR  |
 405   |       |       |       |
 406   Y-------Y-------Y-------Y-------
 407   |       |       |       |
 408   |       |       |       |
 409   |       |       |       |
 410
 411   422jpeg chroma samples are sited like:
 412   Y---BR--Y-------Y---BR--Y-------
 413   |       |       |       |
 414   |       |       |       |
 415   |       |       |       |
 416   Y---BR--Y-------Y---BR--Y-------
 417   |       |       |       |
 418   |       |       |       |
 419   |       |       |       |
 420   Y---BR--Y-------Y---BR--Y-------
 421   |       |       |       |
 422   |       |       |       |
 423   |       |       |       |
 424   Y---BR--Y-------Y---BR--Y-------
 425   |       |       |       |
 426   |       |       |       |
 427   |       |       |       |
 428
 429   We use a resampling filter to decimate the chroma planes by two in the
 430    vertical direction.*/
 431 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 432                                         unsigned char *_aux) {
 433   int c_w;
 434   int c_h;
 435   int c_sz;
 436   int dst_c_w;
 437   int dst_c_h;
 438   int dst_c_sz;
 439   int pli;
 440   /*Skip past the luma data.*/
 441   _dst += _y4m->pic_w * _y4m->pic_h;
 442   /*Compute the size of each chroma plane.*/
 443   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 444   c_h = _y4m->pic_h;
 445   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 446   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 447   c_sz = c_w * c_h;
 448   dst_c_sz = dst_c_w * dst_c_h;
 449   for (pli = 1; pli < 3; pli++) {
 450     y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
 451     _aux += c_sz;
 452     _dst += dst_c_sz;
 453   }
 454 }
 455
 456 /*420jpeg chroma samples are sited like:
 457   Y-------Y-------Y-------Y-------
 458   |       |       |       |
 459   |   BR  |       |   BR  |
 460   |       |       |       |
 461   Y-------Y-------Y-------Y-------
 462   |       |       |       |
 463   |       |       |       |
 464   |       |       |       |
 465   Y-------Y-------Y-------Y-------
 466   |       |       |       |
 467   |   BR  |       |   BR  |
 468   |       |       |       |
 469   Y-------Y-------Y-------Y-------
 470   |       |       |       |
 471   |       |       |       |
 472   |       |       |       |
 473
 474   422 chroma samples are sited like:
 475   YBR-----Y-------YBR-----Y-------
 476   |       |       |       |
 477   |       |       |       |
 478   |       |       |       |
 479   YBR-----Y-------YBR-----Y-------
 480   |       |       |       |
 481   |       |       |       |
 482   |       |       |       |
 483   YBR-----Y-------YBR-----Y-------
 484   |       |       |       |
 485   |       |       |       |
 486   |       |       |       |
 487   YBR-----Y-------YBR-----Y-------
 488   |       |       |       |
 489   |       |       |       |
 490   |       |       |       |
 491
 492   We use a resampling filter to shift the original site locations one quarter
 493    pixel (at the original chroma resolution) to the right.
 494   Then we use a second resampling filter to decimate the chroma planes by two
 495    in the vertical direction.*/
 496 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 497                                     unsigned char *_aux) {
 498   unsigned char *tmp;
 499   int            c_w;
 500   int            c_h;
 501   int            c_sz;
 502   int            dst_c_h;
 503   int            dst_c_sz;
 504   int            pli;
 505   /*Skip past the luma data.*/
 506   _dst += _y4m->pic_w * _y4m->pic_h;
 507   /*Compute the size of each chroma plane.*/
 508   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 509   c_h = _y4m->pic_h;
 510   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 511   c_sz = c_w * c_h;
 512   dst_c_sz = c_w * dst_c_h;
 513   tmp = _aux + 2 * c_sz;
 514   for (pli = 1; pli < 3; pli++) {
 515     /*In reality, the horizontal and vertical steps could be pipelined, for
 516        less memory consumption and better cache performance, but we do them
 517        separately for simplicity.*/
 518     /*First do horizontal filtering (convert to 422jpeg)*/
 519     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 520     /*Now do the vertical filtering.*/
 521     y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
 522     _aux += c_sz;
 523     _dst += dst_c_sz;
 524   }
 525 }
 526
 527 /*420jpeg chroma samples are sited like:
 528   Y-------Y-------Y-------Y-------
 529   |       |       |       |
 530   |   BR  |       |   BR  |
 531   |       |       |       |
 532   Y-------Y-------Y-------Y-------
 533   |       |       |       |
 534   |       |       |       |
 535   |       |       |       |
 536   Y-------Y-------Y-------Y-------
 537   |       |       |       |
 538   |   BR  |       |   BR  |
 539   |       |       |       |
 540   Y-------Y-------Y-------Y-------
 541   |       |       |       |
 542   |       |       |       |
 543   |       |       |       |
 544
 545   411 chroma samples are sited like:
 546   YBR-----Y-------Y-------Y-------
 547   |       |       |       |
 548   |       |       |       |
 549   |       |       |       |
 550   YBR-----Y-------Y-------Y-------
 551   |       |       |       |
 552   |       |       |       |
 553   |       |       |       |
 554   YBR-----Y-------Y-------Y-------
 555   |       |       |       |
 556   |       |       |       |
 557   |       |       |       |
 558   YBR-----Y-------Y-------Y-------
 559   |       |       |       |
 560   |       |       |       |
 561   |       |       |       |
 562
 563   We use a filter to resample at site locations one eighth pixel (at the source
 564    chroma plane's horizontal resolution) and five eighths of a pixel to the
 565    right.
 566   Then we use another filter to decimate the planes by 2 in the vertical
 567    direction.*/
 568 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 569                                     unsigned char *_aux) {
 570   unsigned char *tmp;
 571   int            c_w;
 572   int            c_h;
 573   int            c_sz;
 574   int            dst_c_w;
 575   int            dst_c_h;
 576   int            dst_c_sz;
 577   int            tmp_sz;
 578   int            pli;
 579   int            y;
 580   int            x;
 581   /*Skip past the luma data.*/
 582   _dst += _y4m->pic_w * _y4m->pic_h;
 583   /*Compute the size of each chroma plane.*/
 584   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 585   c_h = _y4m->pic_h;
 586   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 587   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 588   c_sz = c_w * c_h;
 589   dst_c_sz = dst_c_w * dst_c_h;
 590   tmp_sz = dst_c_w * c_h;
 591   tmp = _aux + 2 * c_sz;
 592   for (pli = 1; pli < 3; pli++) {
 593     /*In reality, the horizontal and vertical steps could be pipelined, for
 594        less memory consumption and better cache performance, but we do them
 595        separately for simplicity.*/
 596     /*First do horizontal filtering (convert to 422jpeg)*/
 597     for (y = 0; y < c_h; y++) {
 598       /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
 599          4-tap Mitchell window.*/
 600       for (x = 0; x < OC_MINI(c_w, 1); x++) {
 601         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
 602                                                    + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 603         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
 604                                                        + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 605       }
 606       for (; x < c_w - 2; x++) {
 607         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 608                                                    + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
 609         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 610                                                        + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
 611       }
 612       for (; x < c_w; x++) {
 613         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 614                                                    + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
 615         if ((x << 1 | 1) < dst_c_w) {
 616           tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 617                                                          + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
 618         }
 619       }
 620       tmp += dst_c_w;
 621       _aux += c_w;
 622     }
 623     tmp -= tmp_sz;
 624     /*Now do the vertical filtering.*/
 625     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 626     _dst += dst_c_sz;
 627   }
 628 }
 629
 630 /*Convert 444 to 420jpeg.*/
 631 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 632                                     unsigned char *_aux) {
 633   unsigned char *tmp;
 634   int            c_w;
 635   int            c_h;
 636   int            c_sz;
 637   int            dst_c_w;
 638   int            dst_c_h;
 639   int            dst_c_sz;
 640   int            tmp_sz;
 641   int            pli;
 642   int            y;
 643   int            x;
 644   /*Skip past the luma data.*/
 645   _dst += _y4m->pic_w * _y4m->pic_h;
 646   /*Compute the size of each chroma plane.*/
 647   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 648   c_h = _y4m->pic_h;
 649   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 650   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 651   c_sz = c_w * c_h;
 652   dst_c_sz = dst_c_w * dst_c_h;
 653   tmp_sz = dst_c_w * c_h;
 654   tmp = _aux + 2 * c_sz;
 655   for (pli = 1; pli < 3; pli++) {
 656     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 657     for (y = 0; y < c_h; y++) {
 658       for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
 659         tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
 660                                     - 17 * _aux[OC_MINI(2, c_w - 1)]
 661                                     + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
 662       }
 663       for (; x < c_w - 3; x += 2) {
 664         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
 665                                     - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
 666       }
 667       for (; x < c_w; x += 2) {
 668         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
 669                                     17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
 670                                     78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
 671       }
 672       tmp += dst_c_w;
 673       _aux += c_w;
 674     }
 675     tmp -= tmp_sz;
 676     /*Now do the vertical filtering.*/
 677     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 678     _dst += dst_c_sz;
 679   }
 680 }
 681
 682 /*The image is padded with empty chroma components at 4:2:0.*/
 683 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 684                                      unsigned char *_aux) {
 685   int c_sz;
 686   _dst += _y4m->pic_w * _y4m->pic_h;
 687   c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 688          ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 689   memset(_dst, 128, c_sz * 2);
 690 }
 691
 692 /*No conversion function needed.*/
 693 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
 694                              unsigned char *_aux) {
 695 }
 696
 697 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
 698                    int only_420) {
 699   char buffer[80];
 700   int  ret;
 701   int  i;
 702   /*Read until newline, or 80 cols, whichever happens first.*/
 703   for (i = 0; i < 79; i++) {
 704     if (_nskip > 0) {
 705       buffer[i] = *_skip++;
 706       _nskip--;
 707     } else {
 708       if (!file_read(buffer + i, 1, _fin)) return -1;
 709     }
 710     if (buffer[i] == '\n')break;
 711   }
 712   /*We skipped too much header data.*/
 713   if (_nskip > 0)return -1;
 714   if (i == 79) {
 715     fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
 716     return -1;
 717   }
 718   buffer[i] = '\0';
 719   if (memcmp(buffer, "YUV4MPEG", 8)) {
 720     fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
 721     return -1;
 722   }
 723   if (buffer[8] != '2') {
 724     fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
 725   }
 726   ret = y4m_parse_tags(_y4m, buffer + 5);
 727   if (ret < 0) {
 728     fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
 729     return ret;
 730   }
 731   if (_y4m->interlace == '?') {
 732     fprintf(stderr, "Warning: Input video interlacing format unknown; "
 733             "assuming progressive scan.\n");
 734   } else if (_y4m->interlace != 'p') {
 735     fprintf(stderr, "Input video is interlaced; "
 736             "Only progressive scan handled.\n");
 737     return -1;
 738   }
 739   _y4m->vpx_fmt = VPX_IMG_FMT_I420;
 740   _y4m->vpx_bps = 12;
 741   if (strcmp(_y4m->chroma_type, "420") == 0 ||
 742       strcmp(_y4m->chroma_type, "420jpeg") == 0) {
 743     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 744     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 745                             + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 746     /*Natively supported: no conversion required.*/
 747     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 748     _y4m->convert = y4m_convert_null;
 749   } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
 750     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 751     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 752     /*Chroma filter required: read into the aux buf first.*/
 753     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
 754                          2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 755     _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
 756   } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
 757     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 758     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 759     /*Chroma filter required: read into the aux buf first.
 760       We need to make two filter passes, so we need some extra space in the
 761        aux buffer.*/
 762     _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 763     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 764     _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
 765   } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
 766     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
 767     _y4m->src_c_dec_v = 1;
 768     _y4m->dst_c_dec_v = 2;
 769     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 770     /*Chroma filter required: read into the aux buf first.*/
 771     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 772     _y4m->convert = y4m_convert_422jpeg_420jpeg;
 773   } else if (strcmp(_y4m->chroma_type, "422") == 0) {
 774     _y4m->src_c_dec_h = 2;
 775     _y4m->src_c_dec_v = 1;
 776     if (only_420) {
 777       _y4m->dst_c_dec_h = 2;
 778       _y4m->dst_c_dec_v = 2;
 779       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 780       /*Chroma filter required: read into the aux buf first.
 781         We need to make two filter passes, so we need some extra space in the
 782          aux buffer.*/
 783       _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 784       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 785           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 786       _y4m->convert = y4m_convert_422_420jpeg;
 787     } else {
 788       _y4m->vpx_fmt = VPX_IMG_FMT_I422;
 789       _y4m->vpx_bps = 16;
 790       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 791       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 792       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 793                               + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 794       /*Natively supported: no conversion required.*/
 795       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 796       _y4m->convert = y4m_convert_null;
 797       }
 798   } else if (strcmp(_y4m->chroma_type, "411") == 0) {
 799     _y4m->src_c_dec_h = 4;
 800     _y4m->dst_c_dec_h = 2;
 801     _y4m->src_c_dec_v = 1;
 802     _y4m->dst_c_dec_v = 2;
 803     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 804     /*Chroma filter required: read into the aux buf first.
 805       We need to make two filter passes, so we need some extra space in the
 806        aux buffer.*/
 807     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
 808     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 809     _y4m->convert = y4m_convert_411_420jpeg;
 810   } else if (strcmp(_y4m->chroma_type, "444") == 0) {
 811     _y4m->src_c_dec_h = 1;
 812     _y4m->src_c_dec_v = 1;
 813     if (only_420) {
 814       _y4m->dst_c_dec_h = 2;
 815       _y4m->dst_c_dec_v = 2;
 816       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 817       /*Chroma filter required: read into the aux buf first.
 818         We need to make two filter passes, so we need some extra space in the
 819          aux buffer.*/
 820       _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
 821       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 822           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 823       _y4m->convert = y4m_convert_444_420jpeg;
 824     } else {
 825       _y4m->vpx_fmt = VPX_IMG_FMT_I444;
 826       _y4m->vpx_bps = 24;
 827       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 828       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 829       _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 830       /*Natively supported: no conversion required.*/
 831       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 832       _y4m->convert = y4m_convert_null;
 833     }
 834   } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
 835     _y4m->src_c_dec_h = 1;
 836     _y4m->src_c_dec_v = 1;
 837     if (only_420) {
 838       _y4m->dst_c_dec_h = 2;
 839       _y4m->dst_c_dec_v = 2;
 840       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 841       /*Chroma filter required: read into the aux buf first.
 842         We need to make two filter passes, so we need some extra space in the
 843          aux buffer.
 844         The extra plane also gets read into the aux buf.
 845         It will be discarded.*/
 846       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 847       _y4m->convert = y4m_convert_444_420jpeg;
 848     } else {
 849       _y4m->vpx_fmt = VPX_IMG_FMT_444A;
 850       _y4m->vpx_bps = 32;
 851       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 852       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 853       _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
 854       /*Natively supported: no conversion required.*/
 855       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 856       _y4m->convert = y4m_convert_null;
 857     }
 858   } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
 859     _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
 860     _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
 861     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 862     /*No extra space required, but we need to clear the chroma planes.*/
 863     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 864     _y4m->convert = y4m_convert_mono_420jpeg;
 865   } else {
 866     fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
 867     return -1;
 868   }
 869   /*The size of the final frame buffers is always computed from the
 870      destination chroma decimation type.*/
 871   _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
 872                      + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 873                      ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 874   _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
 875   _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
 876   return 0;
 877 }
 878
 879 void y4m_input_close(y4m_input *_y4m) {
 880   free(_y4m->dst_buf);
 881   free(_y4m->aux_buf);
 882 }
 883
 884 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
 885   char frame[6];
 886   int  pic_sz;
 887   int  c_w;
 888   int  c_h;
 889   int  c_sz;
 890   /*Read and skip the frame header.*/
 891   if (!file_read(frame, 6, _fin)) return 0;
 892   if (memcmp(frame, "FRAME", 5)) {
 893     fprintf(stderr, "Loss of framing in Y4M input data\n");
 894     return -1;
 895   }
 896   if (frame[5] != '\n') {
 897     char c;
 898     int  j;
 899     for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {}
 900     if (j == 79) {
 901       fprintf(stderr, "Error parsing Y4M frame header\n");
 902       return -1;
 903     }
 904   }
 905   /*Read the frame data that needs no conversion.*/
 906   if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) {
 907     fprintf(stderr, "Error reading Y4M frame data.\n");
 908     return -1;
 909   }
 910   /*Read the frame data that does need conversion.*/
 911   if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) {
 912     fprintf(stderr, "Error reading Y4M frame data.\n");
 913     return -1;
 914   }
 915   /*Now convert the just read frame.*/
 916   (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
 917   /*Fill in the frame buffer pointers.
 918     We don't use vpx_img_wrap() because it forces padding for odd picture
 919      sizes, which would require a separate fread call for every row.*/
 920   memset(_img, 0, sizeof(*_img));
 921   /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
 922   _img->fmt = _y4m->vpx_fmt;
 923   _img->w = _img->d_w = _y4m->pic_w;
 924   _img->h = _img->d_h = _y4m->pic_h;
 925   _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
 926   _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
 927   _img->bps = _y4m->vpx_bps;
 928
 929   /*Set up the buffer pointers.*/
 930   pic_sz = _y4m->pic_w * _y4m->pic_h;
 931   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 932   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 933   c_sz = c_w * c_h;
 934   _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
 935   _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
 936   _img->planes[PLANE_Y] = _y4m->dst_buf;
 937   _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
 938   _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
 939   _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
 940   return 1;
 941 }