Merge "Add an MMX fwht4x4"
[aom.git] / y4minput.c
blob90c5310a1ec83894e8f154503ba52b8ee42b5d64
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
10 * Based on code from the OggTheora software codec source code,
11 * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
13 #include <errno.h>
14 #include <stdlib.h>
15 #include <string.h>
17 #include "vpx/vpx_integer.h"
18 #include "y4minput.h"
20 // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
21 // Returns true on success.
22 static int file_read(void *buf, size_t size, FILE *file) {
23 const int kMaxRetries = 5;
24 int retry_count = 0;
25 int file_error;
26 size_t len = 0;
27 do {
28 const size_t n = fread((uint8_t*)buf + len, 1, size - len, file);
29 len += n;
30 file_error = ferror(file);
31 if (file_error) {
32 if (errno == EINTR || errno == EAGAIN) {
33 clearerr(file);
34 continue;
35 } else {
36 fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n",
37 (uint32_t)len, (uint32_t)size, errno, strerror(errno));
38 return 0;
41 } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
43 if (!feof(file) && len != size) {
44 fprintf(stderr, "Error reading file: %u of %u bytes read,"
45 " error: %d, retries: %d, %d: %s\n",
46 (uint32_t)len, (uint32_t)size, file_error, retry_count,
47 errno, strerror(errno));
49 return len == size;
52 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
53 int got_w;
54 int got_h;
55 int got_fps;
56 int got_interlace;
57 int got_par;
58 int got_chroma;
59 char *p;
60 char *q;
61 got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
62 for (p = _tags;; p = q) {
63 /*Skip any leading spaces.*/
64 while (*p == ' ')p++;
65 /*If that's all we have, stop.*/
66 if (p[0] == '\0')break;
67 /*Find the end of this tag.*/
68 for (q = p + 1; *q != '\0' && *q != ' '; q++);
69 /*Process the tag.*/
70 switch (p[0]) {
71 case 'W': {
72 if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
73 got_w = 1;
75 break;
76 case 'H': {
77 if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
78 got_h = 1;
80 break;
81 case 'F': {
82 if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
83 return -1;
85 got_fps = 1;
87 break;
88 case 'I': {
89 _y4m->interlace = p[1];
90 got_interlace = 1;
92 break;
93 case 'A': {
94 if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
95 return -1;
97 got_par = 1;
99 break;
100 case 'C': {
101 if (q - p > 16)return -1;
102 memcpy(_y4m->chroma_type, p + 1, q - p - 1);
103 _y4m->chroma_type[q - p - 1] = '\0';
104 got_chroma = 1;
106 break;
107 /*Ignore unknown tags.*/
110 if (!got_w || !got_h || !got_fps)return -1;
111 if (!got_interlace)_y4m->interlace = '?';
112 if (!got_par)_y4m->par_n = _y4m->par_d = 0;
113 /*Chroma-type is not specified in older files, e.g., those generated by
114 mplayer.*/
115 if (!got_chroma)strcpy(_y4m->chroma_type, "420");
116 return 0;
121 /*All anti-aliasing filters in the following conversion functions are based on
122 one of two window functions:
123 The 6-tap Lanczos window (for down-sampling and shifts):
124 sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
125 0, |t|>=3
126 The 4-tap Mitchell window (for up-sampling):
127 7|t|^3-12|t|^2+16/3, |t|<1
128 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
129 0, |t|>=2
130 The number of taps is intentionally kept small to reduce computational
131 overhead and limit ringing.
133 The taps from these filters are scaled so that their sum is 1, and the result
134 is scaled by 128 and rounded to integers to create a filter whose
135 intermediate values fit inside 16 bits.
136 Coefficients are rounded in such a way as to ensure their sum is still 128,
137 which is usually equivalent to normal rounding.
139 Conversions which require both horizontal and vertical filtering could
140 have these steps pipelined, for less memory consumption and better cache
141 performance, but we do them separately for simplicity.*/
143 #define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
144 #define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
145 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
147 /*420jpeg chroma samples are sited like:
148 Y-------Y-------Y-------Y-------
149 | | | |
150 | BR | | BR |
151 | | | |
152 Y-------Y-------Y-------Y-------
153 | | | |
154 | | | |
155 | | | |
156 Y-------Y-------Y-------Y-------
157 | | | |
158 | BR | | BR |
159 | | | |
160 Y-------Y-------Y-------Y-------
161 | | | |
162 | | | |
163 | | | |
165 420mpeg2 chroma samples are sited like:
166 Y-------Y-------Y-------Y-------
167 | | | |
168 BR | BR |
169 | | | |
170 Y-------Y-------Y-------Y-------
171 | | | |
172 | | | |
173 | | | |
174 Y-------Y-------Y-------Y-------
175 | | | |
176 BR | BR |
177 | | | |
178 Y-------Y-------Y-------Y-------
179 | | | |
180 | | | |
181 | | | |
183 We use a resampling filter to shift the site locations one quarter pixel (at
184 the chroma plane's resolution) to the right.
185 The 4:2:2 modes look exactly the same, except there are twice as many chroma
186 lines, and they are vertically co-sited with the luma samples in both the
187 mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
188 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
189 const unsigned char *_src, int _c_w, int _c_h) {
190 int y;
191 int x;
192 for (y = 0; y < _c_h; y++) {
193 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
194 window.*/
195 for (x = 0; x < OC_MINI(_c_w, 2); x++) {
196 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
197 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
198 _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
200 for (; x < _c_w - 3; x++) {
201 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
202 114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
204 for (; x < _c_w; x++) {
205 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
206 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
207 _src[_c_w - 1] + 64) >> 7, 255);
209 _dst += _c_w;
210 _src += _c_w;
214 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
215 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
216 unsigned char *_aux) {
217 int c_w;
218 int c_h;
219 int c_sz;
220 int pli;
221 /*Skip past the luma data.*/
222 _dst += _y4m->pic_w * _y4m->pic_h;
223 /*Compute the size of each chroma plane.*/
224 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
225 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
226 c_sz = c_w * c_h;
227 for (pli = 1; pli < 3; pli++) {
228 y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
229 _dst += c_sz;
230 _aux += c_sz;
234 /*This format is only used for interlaced content, but is included for
235 completeness.
237 420jpeg chroma samples are sited like:
238 Y-------Y-------Y-------Y-------
239 | | | |
240 | BR | | BR |
241 | | | |
242 Y-------Y-------Y-------Y-------
243 | | | |
244 | | | |
245 | | | |
246 Y-------Y-------Y-------Y-------
247 | | | |
248 | BR | | BR |
249 | | | |
250 Y-------Y-------Y-------Y-------
251 | | | |
252 | | | |
253 | | | |
255 420paldv chroma samples are sited like:
256 YR------Y-------YR------Y-------
257 | | | |
258 | | | |
259 | | | |
260 YB------Y-------YB------Y-------
261 | | | |
262 | | | |
263 | | | |
264 YR------Y-------YR------Y-------
265 | | | |
266 | | | |
267 | | | |
268 YB------Y-------YB------Y-------
269 | | | |
270 | | | |
271 | | | |
273 We use a resampling filter to shift the site locations one quarter pixel (at
274 the chroma plane's resolution) to the right.
275 Then we use another filter to move the C_r location down one quarter pixel,
276 and the C_b location up one quarter pixel.*/
277 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
278 unsigned char *_aux) {
279 unsigned char *tmp;
280 int c_w;
281 int c_h;
282 int c_sz;
283 int pli;
284 int y;
285 int x;
286 /*Skip past the luma data.*/
287 _dst += _y4m->pic_w * _y4m->pic_h;
288 /*Compute the size of each chroma plane.*/
289 c_w = (_y4m->pic_w + 1) / 2;
290 c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
291 c_sz = c_w * c_h;
292 tmp = _aux + 2 * c_sz;
293 for (pli = 1; pli < 3; pli++) {
294 /*First do the horizontal re-sampling.
295 This is the same as the mpeg2 case, except that after the horizontal
296 case, we need to apply a second vertical filter.*/
297 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
298 _aux += c_sz;
299 switch (pli) {
300 case 1: {
301 /*Slide C_b up a quarter-pel.
302 This is the same filter used above, but in the other order.*/
303 for (x = 0; x < c_w; x++) {
304 for (y = 0; y < OC_MINI(c_h, 3); y++) {
305 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
306 - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
307 + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
308 + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
310 for (; y < c_h - 2; y++) {
311 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
312 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
313 - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
315 for (; y < c_h; y++) {
316 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
317 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
318 - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
320 _dst++;
321 tmp++;
323 _dst += c_sz - c_w;
324 tmp -= c_w;
326 break;
327 case 2: {
328 /*Slide C_r down a quarter-pel.
329 This is the same as the horizontal filter.*/
330 for (x = 0; x < c_w; x++) {
331 for (y = 0; y < OC_MINI(c_h, 2); y++) {
332 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
333 - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
334 + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
335 + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
337 for (; y < c_h - 3; y++) {
338 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
339 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
340 - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
342 for (; y < c_h; y++) {
343 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
344 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
345 - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
347 _dst++;
348 tmp++;
351 break;
353 /*For actual interlaced material, this would have to be done separately on
354 each field, and the shift amounts would be different.
355 C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
356 C_b up 1/8 in the bottom field.
357 The corresponding filters would be:
358 Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
359 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
363 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
364 This is used as a helper by several converation routines.*/
365 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
366 const unsigned char *_src, int _c_w, int _c_h) {
367 int y;
368 int x;
369 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
370 for (x = 0; x < _c_w; x++) {
371 for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
372 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
373 + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
374 - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
375 + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
377 for (; y < _c_h - 3; y += 2) {
378 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
379 - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
380 + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
382 for (; y < _c_h; y += 2) {
383 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
384 + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
385 + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
386 + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
388 _src++;
389 _dst++;
393 /*420jpeg chroma samples are sited like:
394 Y-------Y-------Y-------Y-------
395 | | | |
396 | BR | | BR |
397 | | | |
398 Y-------Y-------Y-------Y-------
399 | | | |
400 | | | |
401 | | | |
402 Y-------Y-------Y-------Y-------
403 | | | |
404 | BR | | BR |
405 | | | |
406 Y-------Y-------Y-------Y-------
407 | | | |
408 | | | |
409 | | | |
411 422jpeg chroma samples are sited like:
412 Y---BR--Y-------Y---BR--Y-------
413 | | | |
414 | | | |
415 | | | |
416 Y---BR--Y-------Y---BR--Y-------
417 | | | |
418 | | | |
419 | | | |
420 Y---BR--Y-------Y---BR--Y-------
421 | | | |
422 | | | |
423 | | | |
424 Y---BR--Y-------Y---BR--Y-------
425 | | | |
426 | | | |
427 | | | |
429 We use a resampling filter to decimate the chroma planes by two in the
430 vertical direction.*/
431 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
432 unsigned char *_aux) {
433 int c_w;
434 int c_h;
435 int c_sz;
436 int dst_c_w;
437 int dst_c_h;
438 int dst_c_sz;
439 int pli;
440 /*Skip past the luma data.*/
441 _dst += _y4m->pic_w * _y4m->pic_h;
442 /*Compute the size of each chroma plane.*/
443 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
444 c_h = _y4m->pic_h;
445 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
446 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
447 c_sz = c_w * c_h;
448 dst_c_sz = dst_c_w * dst_c_h;
449 for (pli = 1; pli < 3; pli++) {
450 y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
451 _aux += c_sz;
452 _dst += dst_c_sz;
456 /*420jpeg chroma samples are sited like:
457 Y-------Y-------Y-------Y-------
458 | | | |
459 | BR | | BR |
460 | | | |
461 Y-------Y-------Y-------Y-------
462 | | | |
463 | | | |
464 | | | |
465 Y-------Y-------Y-------Y-------
466 | | | |
467 | BR | | BR |
468 | | | |
469 Y-------Y-------Y-------Y-------
470 | | | |
471 | | | |
472 | | | |
474 422 chroma samples are sited like:
475 YBR-----Y-------YBR-----Y-------
476 | | | |
477 | | | |
478 | | | |
479 YBR-----Y-------YBR-----Y-------
480 | | | |
481 | | | |
482 | | | |
483 YBR-----Y-------YBR-----Y-------
484 | | | |
485 | | | |
486 | | | |
487 YBR-----Y-------YBR-----Y-------
488 | | | |
489 | | | |
490 | | | |
492 We use a resampling filter to shift the original site locations one quarter
493 pixel (at the original chroma resolution) to the right.
494 Then we use a second resampling filter to decimate the chroma planes by two
495 in the vertical direction.*/
496 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
497 unsigned char *_aux) {
498 unsigned char *tmp;
499 int c_w;
500 int c_h;
501 int c_sz;
502 int dst_c_h;
503 int dst_c_sz;
504 int pli;
505 /*Skip past the luma data.*/
506 _dst += _y4m->pic_w * _y4m->pic_h;
507 /*Compute the size of each chroma plane.*/
508 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
509 c_h = _y4m->pic_h;
510 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
511 c_sz = c_w * c_h;
512 dst_c_sz = c_w * dst_c_h;
513 tmp = _aux + 2 * c_sz;
514 for (pli = 1; pli < 3; pli++) {
515 /*In reality, the horizontal and vertical steps could be pipelined, for
516 less memory consumption and better cache performance, but we do them
517 separately for simplicity.*/
518 /*First do horizontal filtering (convert to 422jpeg)*/
519 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
520 /*Now do the vertical filtering.*/
521 y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
522 _aux += c_sz;
523 _dst += dst_c_sz;
527 /*420jpeg chroma samples are sited like:
528 Y-------Y-------Y-------Y-------
529 | | | |
530 | BR | | BR |
531 | | | |
532 Y-------Y-------Y-------Y-------
533 | | | |
534 | | | |
535 | | | |
536 Y-------Y-------Y-------Y-------
537 | | | |
538 | BR | | BR |
539 | | | |
540 Y-------Y-------Y-------Y-------
541 | | | |
542 | | | |
543 | | | |
545 411 chroma samples are sited like:
546 YBR-----Y-------Y-------Y-------
547 | | | |
548 | | | |
549 | | | |
550 YBR-----Y-------Y-------Y-------
551 | | | |
552 | | | |
553 | | | |
554 YBR-----Y-------Y-------Y-------
555 | | | |
556 | | | |
557 | | | |
558 YBR-----Y-------Y-------Y-------
559 | | | |
560 | | | |
561 | | | |
563 We use a filter to resample at site locations one eighth pixel (at the source
564 chroma plane's horizontal resolution) and five eighths of a pixel to the
565 right.
566 Then we use another filter to decimate the planes by 2 in the vertical
567 direction.*/
568 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
569 unsigned char *_aux) {
570 unsigned char *tmp;
571 int c_w;
572 int c_h;
573 int c_sz;
574 int dst_c_w;
575 int dst_c_h;
576 int dst_c_sz;
577 int tmp_sz;
578 int pli;
579 int y;
580 int x;
581 /*Skip past the luma data.*/
582 _dst += _y4m->pic_w * _y4m->pic_h;
583 /*Compute the size of each chroma plane.*/
584 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
585 c_h = _y4m->pic_h;
586 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
587 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
588 c_sz = c_w * c_h;
589 dst_c_sz = dst_c_w * dst_c_h;
590 tmp_sz = dst_c_w * c_h;
591 tmp = _aux + 2 * c_sz;
592 for (pli = 1; pli < 3; pli++) {
593 /*In reality, the horizontal and vertical steps could be pipelined, for
594 less memory consumption and better cache performance, but we do them
595 separately for simplicity.*/
596 /*First do horizontal filtering (convert to 422jpeg)*/
597 for (y = 0; y < c_h; y++) {
598 /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
599 4-tap Mitchell window.*/
600 for (x = 0; x < OC_MINI(c_w, 1); x++) {
601 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
602 + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
603 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
604 + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
606 for (; x < c_w - 2; x++) {
607 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
608 + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
609 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
610 + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
612 for (; x < c_w; x++) {
613 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
614 + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
615 if ((x << 1 | 1) < dst_c_w) {
616 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
617 + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
620 tmp += dst_c_w;
621 _aux += c_w;
623 tmp -= tmp_sz;
624 /*Now do the vertical filtering.*/
625 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
626 _dst += dst_c_sz;
630 /*Convert 444 to 420jpeg.*/
631 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
632 unsigned char *_aux) {
633 unsigned char *tmp;
634 int c_w;
635 int c_h;
636 int c_sz;
637 int dst_c_w;
638 int dst_c_h;
639 int dst_c_sz;
640 int tmp_sz;
641 int pli;
642 int y;
643 int x;
644 /*Skip past the luma data.*/
645 _dst += _y4m->pic_w * _y4m->pic_h;
646 /*Compute the size of each chroma plane.*/
647 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
648 c_h = _y4m->pic_h;
649 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
650 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
651 c_sz = c_w * c_h;
652 dst_c_sz = dst_c_w * dst_c_h;
653 tmp_sz = dst_c_w * c_h;
654 tmp = _aux + 2 * c_sz;
655 for (pli = 1; pli < 3; pli++) {
656 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
657 for (y = 0; y < c_h; y++) {
658 for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
659 tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
660 - 17 * _aux[OC_MINI(2, c_w - 1)]
661 + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
663 for (; x < c_w - 3; x += 2) {
664 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
665 - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
667 for (; x < c_w; x += 2) {
668 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
669 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
670 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
672 tmp += dst_c_w;
673 _aux += c_w;
675 tmp -= tmp_sz;
676 /*Now do the vertical filtering.*/
677 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
678 _dst += dst_c_sz;
682 /*The image is padded with empty chroma components at 4:2:0.*/
683 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
684 unsigned char *_aux) {
685 int c_sz;
686 _dst += _y4m->pic_w * _y4m->pic_h;
687 c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
688 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
689 memset(_dst, 128, c_sz * 2);
692 /*No conversion function needed.*/
693 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
694 unsigned char *_aux) {
697 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
698 int only_420) {
699 char buffer[80];
700 int ret;
701 int i;
702 /*Read until newline, or 80 cols, whichever happens first.*/
703 for (i = 0; i < 79; i++) {
704 if (_nskip > 0) {
705 buffer[i] = *_skip++;
706 _nskip--;
707 } else {
708 if (!file_read(buffer + i, 1, _fin)) return -1;
710 if (buffer[i] == '\n')break;
712 /*We skipped too much header data.*/
713 if (_nskip > 0)return -1;
714 if (i == 79) {
715 fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
716 return -1;
718 buffer[i] = '\0';
719 if (memcmp(buffer, "YUV4MPEG", 8)) {
720 fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
721 return -1;
723 if (buffer[8] != '2') {
724 fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
726 ret = y4m_parse_tags(_y4m, buffer + 5);
727 if (ret < 0) {
728 fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
729 return ret;
731 if (_y4m->interlace == '?') {
732 fprintf(stderr, "Warning: Input video interlacing format unknown; "
733 "assuming progressive scan.\n");
734 } else if (_y4m->interlace != 'p') {
735 fprintf(stderr, "Input video is interlaced; "
736 "Only progressive scan handled.\n");
737 return -1;
739 _y4m->vpx_fmt = VPX_IMG_FMT_I420;
740 _y4m->vpx_bps = 12;
741 if (strcmp(_y4m->chroma_type, "420") == 0 ||
742 strcmp(_y4m->chroma_type, "420jpeg") == 0) {
743 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
744 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
745 + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
746 /*Natively supported: no conversion required.*/
747 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
748 _y4m->convert = y4m_convert_null;
749 } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
750 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
751 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
752 /*Chroma filter required: read into the aux buf first.*/
753 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
754 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
755 _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
756 } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
757 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
758 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
759 /*Chroma filter required: read into the aux buf first.
760 We need to make two filter passes, so we need some extra space in the
761 aux buffer.*/
762 _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
763 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
764 _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
765 } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
766 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
767 _y4m->src_c_dec_v = 1;
768 _y4m->dst_c_dec_v = 2;
769 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
770 /*Chroma filter required: read into the aux buf first.*/
771 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
772 _y4m->convert = y4m_convert_422jpeg_420jpeg;
773 } else if (strcmp(_y4m->chroma_type, "422") == 0) {
774 _y4m->src_c_dec_h = 2;
775 _y4m->src_c_dec_v = 1;
776 if (only_420) {
777 _y4m->dst_c_dec_h = 2;
778 _y4m->dst_c_dec_v = 2;
779 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
780 /*Chroma filter required: read into the aux buf first.
781 We need to make two filter passes, so we need some extra space in the
782 aux buffer.*/
783 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
784 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
785 ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
786 _y4m->convert = y4m_convert_422_420jpeg;
787 } else {
788 _y4m->vpx_fmt = VPX_IMG_FMT_I422;
789 _y4m->vpx_bps = 16;
790 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
791 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
792 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
793 + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
794 /*Natively supported: no conversion required.*/
795 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
796 _y4m->convert = y4m_convert_null;
798 } else if (strcmp(_y4m->chroma_type, "411") == 0) {
799 _y4m->src_c_dec_h = 4;
800 _y4m->dst_c_dec_h = 2;
801 _y4m->src_c_dec_v = 1;
802 _y4m->dst_c_dec_v = 2;
803 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
804 /*Chroma filter required: read into the aux buf first.
805 We need to make two filter passes, so we need some extra space in the
806 aux buffer.*/
807 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
808 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
809 _y4m->convert = y4m_convert_411_420jpeg;
810 } else if (strcmp(_y4m->chroma_type, "444") == 0) {
811 _y4m->src_c_dec_h = 1;
812 _y4m->src_c_dec_v = 1;
813 if (only_420) {
814 _y4m->dst_c_dec_h = 2;
815 _y4m->dst_c_dec_v = 2;
816 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
817 /*Chroma filter required: read into the aux buf first.
818 We need to make two filter passes, so we need some extra space in the
819 aux buffer.*/
820 _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
821 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
822 ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
823 _y4m->convert = y4m_convert_444_420jpeg;
824 } else {
825 _y4m->vpx_fmt = VPX_IMG_FMT_I444;
826 _y4m->vpx_bps = 24;
827 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
828 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
829 _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
830 /*Natively supported: no conversion required.*/
831 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
832 _y4m->convert = y4m_convert_null;
834 } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
835 _y4m->src_c_dec_h = 1;
836 _y4m->src_c_dec_v = 1;
837 if (only_420) {
838 _y4m->dst_c_dec_h = 2;
839 _y4m->dst_c_dec_v = 2;
840 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
841 /*Chroma filter required: read into the aux buf first.
842 We need to make two filter passes, so we need some extra space in the
843 aux buffer.
844 The extra plane also gets read into the aux buf.
845 It will be discarded.*/
846 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
847 _y4m->convert = y4m_convert_444_420jpeg;
848 } else {
849 _y4m->vpx_fmt = VPX_IMG_FMT_444A;
850 _y4m->vpx_bps = 32;
851 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
852 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
853 _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
854 /*Natively supported: no conversion required.*/
855 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
856 _y4m->convert = y4m_convert_null;
858 } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
859 _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
860 _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
861 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
862 /*No extra space required, but we need to clear the chroma planes.*/
863 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
864 _y4m->convert = y4m_convert_mono_420jpeg;
865 } else {
866 fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
867 return -1;
869 /*The size of the final frame buffers is always computed from the
870 destination chroma decimation type.*/
871 _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
872 + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
873 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
874 _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
875 _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
876 return 0;
879 void y4m_input_close(y4m_input *_y4m) {
880 free(_y4m->dst_buf);
881 free(_y4m->aux_buf);
884 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
885 char frame[6];
886 int pic_sz;
887 int c_w;
888 int c_h;
889 int c_sz;
890 /*Read and skip the frame header.*/
891 if (!file_read(frame, 6, _fin)) return 0;
892 if (memcmp(frame, "FRAME", 5)) {
893 fprintf(stderr, "Loss of framing in Y4M input data\n");
894 return -1;
896 if (frame[5] != '\n') {
897 char c;
898 int j;
899 for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {}
900 if (j == 79) {
901 fprintf(stderr, "Error parsing Y4M frame header\n");
902 return -1;
905 /*Read the frame data that needs no conversion.*/
906 if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) {
907 fprintf(stderr, "Error reading Y4M frame data.\n");
908 return -1;
910 /*Read the frame data that does need conversion.*/
911 if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) {
912 fprintf(stderr, "Error reading Y4M frame data.\n");
913 return -1;
915 /*Now convert the just read frame.*/
916 (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
917 /*Fill in the frame buffer pointers.
918 We don't use vpx_img_wrap() because it forces padding for odd picture
919 sizes, which would require a separate fread call for every row.*/
920 memset(_img, 0, sizeof(*_img));
921 /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
922 _img->fmt = _y4m->vpx_fmt;
923 _img->w = _img->d_w = _y4m->pic_w;
924 _img->h = _img->d_h = _y4m->pic_h;
925 _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
926 _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
927 _img->bps = _y4m->vpx_bps;
929 /*Set up the buffer pointers.*/
930 pic_sz = _y4m->pic_w * _y4m->pic_h;
931 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
932 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
933 c_sz = c_w * c_h;
934 _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
935 _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
936 _img->planes[PLANE_Y] = _y4m->dst_buf;
937 _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
938 _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
939 _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
940 return 1;