y4minput.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  *
  10  *  Based on code from the OggTheora software codec source code,
  11  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
  12  */
  13 #include <stdlib.h>
  14 #include <string.h>
  15 #include "y4minput.h"
  16
  17 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
  18   int   got_w;
  19   int   got_h;
  20   int   got_fps;
  21   int   got_interlace;
  22   int   got_par;
  23   int   got_chroma;
  24   char *p;
  25   char *q;
  26   got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
  27   for (p = _tags;; p = q) {
  28     /*Skip any leading spaces.*/
  29     while (*p == ' ')p++;
  30     /*If that's all we have, stop.*/
  31     if (p[0] == '\0')break;
  32     /*Find the end of this tag.*/
  33     for (q = p + 1; *q != '\0' && *q != ' '; q++);
  34     /*Process the tag.*/
  35     switch (p[0]) {
  36       case 'W': {
  37         if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
  38         got_w = 1;
  39       }
  40       break;
  41       case 'H': {
  42         if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
  43         got_h = 1;
  44       }
  45       break;
  46       case 'F': {
  47         if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
  48           return -1;
  49         }
  50         got_fps = 1;
  51       }
  52       break;
  53       case 'I': {
  54         _y4m->interlace = p[1];
  55         got_interlace = 1;
  56       }
  57       break;
  58       case 'A': {
  59         if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
  60           return -1;
  61         }
  62         got_par = 1;
  63       }
  64       break;
  65       case 'C': {
  66         if (q - p > 16)return -1;
  67         memcpy(_y4m->chroma_type, p + 1, q - p - 1);
  68         _y4m->chroma_type[q - p - 1] = '\0';
  69         got_chroma = 1;
  70       }
  71       break;
  72       /*Ignore unknown tags.*/
  73     }
  74   }
  75   if (!got_w || !got_h || !got_fps)return -1;
  76   if (!got_interlace)_y4m->interlace = '?';
  77   if (!got_par)_y4m->par_n = _y4m->par_d = 0;
  78   /*Chroma-type is not specified in older files, e.g., those generated by
  79      mplayer.*/
  80   if (!got_chroma)strcpy(_y4m->chroma_type, "420");
  81   return 0;
  82 }
  83
  84
  85
  86 /*All anti-aliasing filters in the following conversion functions are based on
  87    one of two window functions:
  88   The 6-tap Lanczos window (for down-sampling and shifts):
  89    sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
  90    0,                         |t|>=3
  91   The 4-tap Mitchell window (for up-sampling):
  92    7|t|^3-12|t|^2+16/3,             |t|<1
  93    -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
  94    0,                               |t|>=2
  95   The number of taps is intentionally kept small to reduce computational
  96    overhead and limit ringing.
  97
  98   The taps from these filters are scaled so that their sum is 1, and the result
  99    is scaled by 128 and rounded to integers to create a filter whose
 100    intermediate values fit inside 16 bits.
 101   Coefficients are rounded in such a way as to ensure their sum is still 128,
 102    which is usually equivalent to normal rounding.
 103
 104   Conversions which require both horizontal and vertical filtering could
 105    have these steps pipelined, for less memory consumption and better cache
 106    performance, but we do them separately for simplicity.*/
 107
 108 #define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
 109 #define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
 110 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
 111
 112 /*420jpeg chroma samples are sited like:
 113   Y-------Y-------Y-------Y-------
 114   |       |       |       |
 115   |   BR  |       |   BR  |
 116   |       |       |       |
 117   Y-------Y-------Y-------Y-------
 118   |       |       |       |
 119   |       |       |       |
 120   |       |       |       |
 121   Y-------Y-------Y-------Y-------
 122   |       |       |       |
 123   |   BR  |       |   BR  |
 124   |       |       |       |
 125   Y-------Y-------Y-------Y-------
 126   |       |       |       |
 127   |       |       |       |
 128   |       |       |       |
 129
 130   420mpeg2 chroma samples are sited like:
 131   Y-------Y-------Y-------Y-------
 132   |       |       |       |
 133   BR      |       BR      |
 134   |       |       |       |
 135   Y-------Y-------Y-------Y-------
 136   |       |       |       |
 137   |       |       |       |
 138   |       |       |       |
 139   Y-------Y-------Y-------Y-------
 140   |       |       |       |
 141   BR      |       BR      |
 142   |       |       |       |
 143   Y-------Y-------Y-------Y-------
 144   |       |       |       |
 145   |       |       |       |
 146   |       |       |       |
 147
 148   We use a resampling filter to shift the site locations one quarter pixel (at
 149    the chroma plane's resolution) to the right.
 150   The 4:2:2 modes look exactly the same, except there are twice as many chroma
 151    lines, and they are vertically co-sited with the luma samples in both the
 152    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
 153 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
 154                                         const unsigned char *_src, int _c_w, int _c_h) {
 155   int y;
 156   int x;
 157   for (y = 0; y < _c_h; y++) {
 158     /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
 159        window.*/
 160     for (x = 0; x < OC_MINI(_c_w, 2); x++) {
 161       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
 162                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 163                                              _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
 164     }
 165     for (; x < _c_w - 3; x++) {
 166       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 167                                              114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
 168     }
 169     for (; x < _c_w; x++) {
 170       _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
 171                                              114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
 172                                              _src[_c_w - 1] + 64) >> 7, 255);
 173     }
 174     _dst += _c_w;
 175     _src += _c_w;
 176   }
 177 }
 178
 179 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
 180 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 181                                          unsigned char *_aux) {
 182   int c_w;
 183   int c_h;
 184   int c_sz;
 185   int pli;
 186   /*Skip past the luma data.*/
 187   _dst += _y4m->pic_w * _y4m->pic_h;
 188   /*Compute the size of each chroma plane.*/
 189   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 190   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 191   c_sz = c_w * c_h;
 192   for (pli = 1; pli < 3; pli++) {
 193     y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
 194     _dst += c_sz;
 195     _aux += c_sz;
 196   }
 197 }
 198
 199 /*This format is only used for interlaced content, but is included for
 200    completeness.
 201
 202   420jpeg chroma samples are sited like:
 203   Y-------Y-------Y-------Y-------
 204   |       |       |       |
 205   |   BR  |       |   BR  |
 206   |       |       |       |
 207   Y-------Y-------Y-------Y-------
 208   |       |       |       |
 209   |       |       |       |
 210   |       |       |       |
 211   Y-------Y-------Y-------Y-------
 212   |       |       |       |
 213   |   BR  |       |   BR  |
 214   |       |       |       |
 215   Y-------Y-------Y-------Y-------
 216   |       |       |       |
 217   |       |       |       |
 218   |       |       |       |
 219
 220   420paldv chroma samples are sited like:
 221   YR------Y-------YR------Y-------
 222   |       |       |       |
 223   |       |       |       |
 224   |       |       |       |
 225   YB------Y-------YB------Y-------
 226   |       |       |       |
 227   |       |       |       |
 228   |       |       |       |
 229   YR------Y-------YR------Y-------
 230   |       |       |       |
 231   |       |       |       |
 232   |       |       |       |
 233   YB------Y-------YB------Y-------
 234   |       |       |       |
 235   |       |       |       |
 236   |       |       |       |
 237
 238   We use a resampling filter to shift the site locations one quarter pixel (at
 239    the chroma plane's resolution) to the right.
 240   Then we use another filter to move the C_r location down one quarter pixel,
 241    and the C_b location up one quarter pixel.*/
 242 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
 243                                          unsigned char *_aux) {
 244   unsigned char *tmp;
 245   int            c_w;
 246   int            c_h;
 247   int            c_sz;
 248   int            pli;
 249   int            y;
 250   int            x;
 251   /*Skip past the luma data.*/
 252   _dst += _y4m->pic_w * _y4m->pic_h;
 253   /*Compute the size of each chroma plane.*/
 254   c_w = (_y4m->pic_w + 1) / 2;
 255   c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 256   c_sz = c_w * c_h;
 257   tmp = _aux + 2 * c_sz;
 258   for (pli = 1; pli < 3; pli++) {
 259     /*First do the horizontal re-sampling.
 260       This is the same as the mpeg2 case, except that after the horizontal
 261        case, we need to apply a second vertical filter.*/
 262     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 263     _aux += c_sz;
 264     switch (pli) {
 265       case 1: {
 266         /*Slide C_b up a quarter-pel.
 267           This is the same filter used above, but in the other order.*/
 268         for (x = 0; x < c_w; x++) {
 269           for (y = 0; y < OC_MINI(c_h, 3); y++) {
 270             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
 271                                                          - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
 272                                                          + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 273                                                          + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
 274           }
 275           for (; y < c_h - 2; y++) {
 276             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 277                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 278                                                          - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
 279           }
 280           for (; y < c_h; y++) {
 281             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
 282                                                          - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
 283                                                          - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 284           }
 285           _dst++;
 286           tmp++;
 287         }
 288         _dst += c_sz - c_w;
 289         tmp -= c_w;
 290       }
 291       break;
 292       case 2: {
 293         /*Slide C_r down a quarter-pel.
 294           This is the same as the horizontal filter.*/
 295         for (x = 0; x < c_w; x++) {
 296           for (y = 0; y < OC_MINI(c_h, 2); y++) {
 297             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
 298                                                          - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
 299                                                          + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
 300                                                          + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
 301           }
 302           for (; y < c_h - 3; y++) {
 303             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 304                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
 305                                                          - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
 306           }
 307           for (; y < c_h; y++) {
 308             _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
 309                                                          - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
 310                                                          - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
 311           }
 312           _dst++;
 313           tmp++;
 314         }
 315       }
 316       break;
 317     }
 318     /*For actual interlaced material, this would have to be done separately on
 319        each field, and the shift amounts would be different.
 320       C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
 321        C_b up 1/8 in the bottom field.
 322       The corresponding filters would be:
 323        Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
 324        Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
 325   }
 326 }
 327
 328 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
 329   This is used as a helper by several converation routines.*/
 330 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
 331                                        const unsigned char *_src, int _c_w, int _c_h) {
 332   int y;
 333   int x;
 334   /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 335   for (x = 0; x < _c_w; x++) {
 336     for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
 337       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
 338                                           + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
 339                                           - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
 340                                           + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
 341     }
 342     for (; y < _c_h - 3; y += 2) {
 343       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
 344                                           - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
 345                                           + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
 346     }
 347     for (; y < _c_h; y += 2) {
 348       _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
 349                                                + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
 350                                                                                   + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
 351                                           + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
 352     }
 353     _src++;
 354     _dst++;
 355   }
 356 }
 357
 358 /*420jpeg chroma samples are sited like:
 359   Y-------Y-------Y-------Y-------
 360   |       |       |       |
 361   |   BR  |       |   BR  |
 362   |       |       |       |
 363   Y-------Y-------Y-------Y-------
 364   |       |       |       |
 365   |       |       |       |
 366   |       |       |       |
 367   Y-------Y-------Y-------Y-------
 368   |       |       |       |
 369   |   BR  |       |   BR  |
 370   |       |       |       |
 371   Y-------Y-------Y-------Y-------
 372   |       |       |       |
 373   |       |       |       |
 374   |       |       |       |
 375
 376   422jpeg chroma samples are sited like:
 377   Y---BR--Y-------Y---BR--Y-------
 378   |       |       |       |
 379   |       |       |       |
 380   |       |       |       |
 381   Y---BR--Y-------Y---BR--Y-------
 382   |       |       |       |
 383   |       |       |       |
 384   |       |       |       |
 385   Y---BR--Y-------Y---BR--Y-------
 386   |       |       |       |
 387   |       |       |       |
 388   |       |       |       |
 389   Y---BR--Y-------Y---BR--Y-------
 390   |       |       |       |
 391   |       |       |       |
 392   |       |       |       |
 393
 394   We use a resampling filter to decimate the chroma planes by two in the
 395    vertical direction.*/
 396 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 397                                         unsigned char *_aux) {
 398   int c_w;
 399   int c_h;
 400   int c_sz;
 401   int dst_c_w;
 402   int dst_c_h;
 403   int dst_c_sz;
 404   int pli;
 405   /*Skip past the luma data.*/
 406   _dst += _y4m->pic_w * _y4m->pic_h;
 407   /*Compute the size of each chroma plane.*/
 408   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 409   c_h = _y4m->pic_h;
 410   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 411   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 412   c_sz = c_w * c_h;
 413   dst_c_sz = dst_c_w * dst_c_h;
 414   for (pli = 1; pli < 3; pli++) {
 415     y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
 416     _aux += c_sz;
 417     _dst += dst_c_sz;
 418   }
 419 }
 420
 421 /*420jpeg chroma samples are sited like:
 422   Y-------Y-------Y-------Y-------
 423   |       |       |       |
 424   |   BR  |       |   BR  |
 425   |       |       |       |
 426   Y-------Y-------Y-------Y-------
 427   |       |       |       |
 428   |       |       |       |
 429   |       |       |       |
 430   Y-------Y-------Y-------Y-------
 431   |       |       |       |
 432   |   BR  |       |   BR  |
 433   |       |       |       |
 434   Y-------Y-------Y-------Y-------
 435   |       |       |       |
 436   |       |       |       |
 437   |       |       |       |
 438
 439   422 chroma samples are sited like:
 440   YBR-----Y-------YBR-----Y-------
 441   |       |       |       |
 442   |       |       |       |
 443   |       |       |       |
 444   YBR-----Y-------YBR-----Y-------
 445   |       |       |       |
 446   |       |       |       |
 447   |       |       |       |
 448   YBR-----Y-------YBR-----Y-------
 449   |       |       |       |
 450   |       |       |       |
 451   |       |       |       |
 452   YBR-----Y-------YBR-----Y-------
 453   |       |       |       |
 454   |       |       |       |
 455   |       |       |       |
 456
 457   We use a resampling filter to shift the original site locations one quarter
 458    pixel (at the original chroma resolution) to the right.
 459   Then we use a second resampling filter to decimate the chroma planes by two
 460    in the vertical direction.*/
 461 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 462                                     unsigned char *_aux) {
 463   unsigned char *tmp;
 464   int            c_w;
 465   int            c_h;
 466   int            c_sz;
 467   int            dst_c_h;
 468   int            dst_c_sz;
 469   int            pli;
 470   /*Skip past the luma data.*/
 471   _dst += _y4m->pic_w * _y4m->pic_h;
 472   /*Compute the size of each chroma plane.*/
 473   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 474   c_h = _y4m->pic_h;
 475   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 476   c_sz = c_w * c_h;
 477   dst_c_sz = c_w * dst_c_h;
 478   tmp = _aux + 2 * c_sz;
 479   for (pli = 1; pli < 3; pli++) {
 480     /*In reality, the horizontal and vertical steps could be pipelined, for
 481        less memory consumption and better cache performance, but we do them
 482        separately for simplicity.*/
 483     /*First do horizontal filtering (convert to 422jpeg)*/
 484     y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
 485     /*Now do the vertical filtering.*/
 486     y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
 487     _aux += c_sz;
 488     _dst += dst_c_sz;
 489   }
 490 }
 491
 492 /*420jpeg chroma samples are sited like:
 493   Y-------Y-------Y-------Y-------
 494   |       |       |       |
 495   |   BR  |       |   BR  |
 496   |       |       |       |
 497   Y-------Y-------Y-------Y-------
 498   |       |       |       |
 499   |       |       |       |
 500   |       |       |       |
 501   Y-------Y-------Y-------Y-------
 502   |       |       |       |
 503   |   BR  |       |   BR  |
 504   |       |       |       |
 505   Y-------Y-------Y-------Y-------
 506   |       |       |       |
 507   |       |       |       |
 508   |       |       |       |
 509
 510   411 chroma samples are sited like:
 511   YBR-----Y-------Y-------Y-------
 512   |       |       |       |
 513   |       |       |       |
 514   |       |       |       |
 515   YBR-----Y-------Y-------Y-------
 516   |       |       |       |
 517   |       |       |       |
 518   |       |       |       |
 519   YBR-----Y-------Y-------Y-------
 520   |       |       |       |
 521   |       |       |       |
 522   |       |       |       |
 523   YBR-----Y-------Y-------Y-------
 524   |       |       |       |
 525   |       |       |       |
 526   |       |       |       |
 527
 528   We use a filter to resample at site locations one eighth pixel (at the source
 529    chroma plane's horizontal resolution) and five eighths of a pixel to the
 530    right.
 531   Then we use another filter to decimate the planes by 2 in the vertical
 532    direction.*/
 533 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 534                                     unsigned char *_aux) {
 535   unsigned char *tmp;
 536   int            c_w;
 537   int            c_h;
 538   int            c_sz;
 539   int            dst_c_w;
 540   int            dst_c_h;
 541   int            dst_c_sz;
 542   int            tmp_sz;
 543   int            pli;
 544   int            y;
 545   int            x;
 546   /*Skip past the luma data.*/
 547   _dst += _y4m->pic_w * _y4m->pic_h;
 548   /*Compute the size of each chroma plane.*/
 549   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 550   c_h = _y4m->pic_h;
 551   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 552   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 553   c_sz = c_w * c_h;
 554   dst_c_sz = dst_c_w * dst_c_h;
 555   tmp_sz = dst_c_w * c_h;
 556   tmp = _aux + 2 * c_sz;
 557   for (pli = 1; pli < 3; pli++) {
 558     /*In reality, the horizontal and vertical steps could be pipelined, for
 559        less memory consumption and better cache performance, but we do them
 560        separately for simplicity.*/
 561     /*First do horizontal filtering (convert to 422jpeg)*/
 562     for (y = 0; y < c_h; y++) {
 563       /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
 564          4-tap Mitchell window.*/
 565       for (x = 0; x < OC_MINI(c_w, 1); x++) {
 566         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
 567                                                    + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 568         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
 569                                                        + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
 570       }
 571       for (; x < c_w - 2; x++) {
 572         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 573                                                    + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
 574         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 575                                                        + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
 576       }
 577       for (; x < c_w; x++) {
 578         tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
 579                                                    + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
 580         if ((x << 1 | 1) < dst_c_w) {
 581           tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
 582                                                          + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
 583         }
 584       }
 585       tmp += dst_c_w;
 586       _aux += c_w;
 587     }
 588     tmp -= tmp_sz;
 589     /*Now do the vertical filtering.*/
 590     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 591     _dst += dst_c_sz;
 592   }
 593 }
 594
 595 /*Convert 444 to 420jpeg.*/
 596 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 597                                     unsigned char *_aux) {
 598   unsigned char *tmp;
 599   int            c_w;
 600   int            c_h;
 601   int            c_sz;
 602   int            dst_c_w;
 603   int            dst_c_h;
 604   int            dst_c_sz;
 605   int            tmp_sz;
 606   int            pli;
 607   int            y;
 608   int            x;
 609   /*Skip past the luma data.*/
 610   _dst += _y4m->pic_w * _y4m->pic_h;
 611   /*Compute the size of each chroma plane.*/
 612   c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
 613   c_h = _y4m->pic_h;
 614   dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 615   dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 616   c_sz = c_w * c_h;
 617   dst_c_sz = dst_c_w * dst_c_h;
 618   tmp_sz = dst_c_w * c_h;
 619   tmp = _aux + 2 * c_sz;
 620   for (pli = 1; pli < 3; pli++) {
 621     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
 622     for (y = 0; y < c_h; y++) {
 623       for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
 624         tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
 625                                     - 17 * _aux[OC_MINI(2, c_w - 1)]
 626                                     + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
 627       }
 628       for (; x < c_w - 3; x += 2) {
 629         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
 630                                     - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
 631       }
 632       for (; x < c_w; x += 2) {
 633         tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
 634                                     17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
 635                                     78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
 636       }
 637       tmp += dst_c_w;
 638       _aux += c_w;
 639     }
 640     tmp -= tmp_sz;
 641     /*Now do the vertical filtering.*/
 642     y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
 643     _dst += dst_c_sz;
 644   }
 645 }
 646
 647 /*The image is padded with empty chroma components at 4:2:0.*/
 648 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
 649                                      unsigned char *_aux) {
 650   int c_sz;
 651   _dst += _y4m->pic_w * _y4m->pic_h;
 652   c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 653          ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 654   memset(_dst, 128, c_sz * 2);
 655 }
 656
 657 /*No conversion function needed.*/
 658 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
 659                              unsigned char *_aux) {
 660 }
 661
 662 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
 663                    int only_420) {
 664   char buffer[80];
 665   int  ret;
 666   int  i;
 667   /*Read until newline, or 80 cols, whichever happens first.*/
 668   for (i = 0; i < 79; i++) {
 669     if (_nskip > 0) {
 670       buffer[i] = *_skip++;
 671       _nskip--;
 672     } else {
 673       ret = (int)fread(buffer + i, 1, 1, _fin);
 674       if (ret < 1)return -1;
 675     }
 676     if (buffer[i] == '\n')break;
 677   }
 678   /*We skipped too much header data.*/
 679   if (_nskip > 0)return -1;
 680   if (i == 79) {
 681     fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
 682     return -1;
 683   }
 684   buffer[i] = '\0';
 685   if (memcmp(buffer, "YUV4MPEG", 8)) {
 686     fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
 687     return -1;
 688   }
 689   if (buffer[8] != '2') {
 690     fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
 691   }
 692   ret = y4m_parse_tags(_y4m, buffer + 5);
 693   if (ret < 0) {
 694     fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
 695     return ret;
 696   }
 697   if (_y4m->interlace == '?') {
 698     fprintf(stderr, "Warning: Input video interlacing format unknown; "
 699             "assuming progressive scan.\n");
 700   } else if (_y4m->interlace != 'p') {
 701     fprintf(stderr, "Input video is interlaced; "
 702             "Only progressive scan handled.\n");
 703     return -1;
 704   }
 705   _y4m->vpx_fmt = VPX_IMG_FMT_I420;
 706   _y4m->vpx_bps = 12;
 707   if (strcmp(_y4m->chroma_type, "420") == 0 ||
 708       strcmp(_y4m->chroma_type, "420jpeg") == 0) {
 709     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 710     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 711                             + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 712     /*Natively supported: no conversion required.*/
 713     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 714     _y4m->convert = y4m_convert_null;
 715   } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
 716     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 717     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 718     /*Chroma filter required: read into the aux buf first.*/
 719     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
 720                          2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 721     _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
 722   } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
 723     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
 724     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 725     /*Chroma filter required: read into the aux buf first.
 726       We need to make two filter passes, so we need some extra space in the
 727        aux buffer.*/
 728     _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 729     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
 730     _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
 731   } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
 732     _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
 733     _y4m->src_c_dec_v = 1;
 734     _y4m->dst_c_dec_v = 2;
 735     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 736     /*Chroma filter required: read into the aux buf first.*/
 737     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 738     _y4m->convert = y4m_convert_422jpeg_420jpeg;
 739   } else if (strcmp(_y4m->chroma_type, "422") == 0) {
 740     _y4m->src_c_dec_h = 2;
 741     _y4m->src_c_dec_v = 1;
 742     if (only_420) {
 743       _y4m->dst_c_dec_h = 2;
 744       _y4m->dst_c_dec_v = 2;
 745       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 746       /*Chroma filter required: read into the aux buf first.
 747         We need to make two filter passes, so we need some extra space in the
 748          aux buffer.*/
 749       _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 750       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 751           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 752       _y4m->convert = y4m_convert_422_420jpeg;
 753     } else {
 754       _y4m->vpx_fmt = VPX_IMG_FMT_I422;
 755       _y4m->vpx_bps = 16;
 756       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 757       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 758       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
 759                               + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 760       /*Natively supported: no conversion required.*/
 761       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 762       _y4m->convert = y4m_convert_null;
 763       }
 764   } else if (strcmp(_y4m->chroma_type, "411") == 0) {
 765     _y4m->src_c_dec_h = 4;
 766     _y4m->dst_c_dec_h = 2;
 767     _y4m->src_c_dec_v = 1;
 768     _y4m->dst_c_dec_v = 2;
 769     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 770     /*Chroma filter required: read into the aux buf first.
 771       We need to make two filter passes, so we need some extra space in the
 772        aux buffer.*/
 773     _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
 774     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 775     _y4m->convert = y4m_convert_411_420jpeg;
 776   } else if (strcmp(_y4m->chroma_type, "444") == 0) {
 777     _y4m->src_c_dec_h = 1;
 778     _y4m->src_c_dec_v = 1;
 779     if (only_420) {
 780       _y4m->dst_c_dec_h = 2;
 781       _y4m->dst_c_dec_v = 2;
 782       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 783       /*Chroma filter required: read into the aux buf first.
 784         We need to make two filter passes, so we need some extra space in the
 785          aux buffer.*/
 786       _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
 787       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
 788           ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
 789       _y4m->convert = y4m_convert_444_420jpeg;
 790     } else {
 791       _y4m->vpx_fmt = VPX_IMG_FMT_I444;
 792       _y4m->vpx_bps = 24;
 793       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 794       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 795       _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 796       /*Natively supported: no conversion required.*/
 797       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 798       _y4m->convert = y4m_convert_null;
 799     }
 800   } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
 801     _y4m->src_c_dec_h = 1;
 802     _y4m->src_c_dec_v = 1;
 803     if (only_420) {
 804       _y4m->dst_c_dec_h = 2;
 805       _y4m->dst_c_dec_v = 2;
 806       _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 807       /*Chroma filter required: read into the aux buf first.
 808         We need to make two filter passes, so we need some extra space in the
 809          aux buffer.
 810         The extra plane also gets read into the aux buf.
 811         It will be discarded.*/
 812       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
 813       _y4m->convert = y4m_convert_444_420jpeg;
 814     } else {
 815       _y4m->vpx_fmt = VPX_IMG_FMT_444A;
 816       _y4m->vpx_bps = 32;
 817       _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
 818       _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
 819       _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
 820       /*Natively supported: no conversion required.*/
 821       _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 822       _y4m->convert = y4m_convert_null;
 823     }
 824   } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
 825     _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
 826     _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
 827     _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
 828     /*No extra space required, but we need to clear the chroma planes.*/
 829     _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
 830     _y4m->convert = y4m_convert_mono_420jpeg;
 831   } else {
 832     fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
 833     return -1;
 834   }
 835   /*The size of the final frame buffers is always computed from the
 836      destination chroma decimation type.*/
 837   _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
 838                      + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
 839                      ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
 840   _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
 841   _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
 842   return 0;
 843 }
 844
 845 void y4m_input_close(y4m_input *_y4m) {
 846   free(_y4m->dst_buf);
 847   free(_y4m->aux_buf);
 848 }
 849
 850 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
 851   char frame[6];
 852   int  pic_sz;
 853   int  c_w;
 854   int  c_h;
 855   int  c_sz;
 856   int  ret;
 857   /*Read and skip the frame header.*/
 858   ret = (int)fread(frame, 1, 6, _fin);
 859   if (ret < 6)return 0;
 860   if (memcmp(frame, "FRAME", 5)) {
 861     fprintf(stderr, "Loss of framing in Y4M input data\n");
 862     return -1;
 863   }
 864   if (frame[5] != '\n') {
 865     char c;
 866     int  j;
 867     for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++);
 868     if (j == 79) {
 869       fprintf(stderr, "Error parsing Y4M frame header\n");
 870       return -1;
 871     }
 872   }
 873   /*Read the frame data that needs no conversion.*/
 874   if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) {
 875     fprintf(stderr, "Error reading Y4M frame data.\n");
 876     return -1;
 877   }
 878   /*Read the frame data that does need conversion.*/
 879   if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) {
 880     fprintf(stderr, "Error reading Y4M frame data.\n");
 881     return -1;
 882   }
 883   /*Now convert the just read frame.*/
 884   (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
 885   /*Fill in the frame buffer pointers.
 886     We don't use vpx_img_wrap() because it forces padding for odd picture
 887      sizes, which would require a separate fread call for every row.*/
 888   memset(_img, 0, sizeof(*_img));
 889   /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
 890   _img->fmt = _y4m->vpx_fmt;
 891   _img->w = _img->d_w = _y4m->pic_w;
 892   _img->h = _img->d_h = _y4m->pic_h;
 893   _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
 894   _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
 895   _img->bps = _y4m->vpx_bps;
 896
 897   /*Set up the buffer pointers.*/
 898   pic_sz = _y4m->pic_w * _y4m->pic_h;
 899   c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
 900   c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
 901   c_sz = c_w * c_h;
 902   _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
 903   _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
 904   _img->planes[PLANE_Y] = _y4m->dst_buf;
 905   _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
 906   _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
 907   _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
 908   return 1;
 909 }