Merge "sse2 intrinsic version of vp8_mbloop_filter_horizontal_edge()" into experimental
[aom.git] / y4minput.c
blob6be5b2bad2d63b76c132ad46d470c8b6bbcdbcbf
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
10 * Based on code from the OggTheora software codec source code,
11 * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
13 #include <stdlib.h>
14 #include <string.h>
15 #include "y4minput.h"
17 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
18 int got_w;
19 int got_h;
20 int got_fps;
21 int got_interlace;
22 int got_par;
23 int got_chroma;
24 char *p;
25 char *q;
26 got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
27 for (p = _tags;; p = q) {
28 /*Skip any leading spaces.*/
29 while (*p == ' ')p++;
30 /*If that's all we have, stop.*/
31 if (p[0] == '\0')break;
32 /*Find the end of this tag.*/
33 for (q = p + 1; *q != '\0' && *q != ' '; q++);
34 /*Process the tag.*/
35 switch (p[0]) {
36 case 'W': {
37 if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
38 got_w = 1;
40 break;
41 case 'H': {
42 if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
43 got_h = 1;
45 break;
46 case 'F': {
47 if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
48 return -1;
50 got_fps = 1;
52 break;
53 case 'I': {
54 _y4m->interlace = p[1];
55 got_interlace = 1;
57 break;
58 case 'A': {
59 if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
60 return -1;
62 got_par = 1;
64 break;
65 case 'C': {
66 if (q - p > 16)return -1;
67 memcpy(_y4m->chroma_type, p + 1, q - p - 1);
68 _y4m->chroma_type[q - p - 1] = '\0';
69 got_chroma = 1;
71 break;
72 /*Ignore unknown tags.*/
75 if (!got_w || !got_h || !got_fps)return -1;
76 if (!got_interlace)_y4m->interlace = '?';
77 if (!got_par)_y4m->par_n = _y4m->par_d = 0;
78 /*Chroma-type is not specified in older files, e.g., those generated by
79 mplayer.*/
80 if (!got_chroma)strcpy(_y4m->chroma_type, "420");
81 return 0;
86 /*All anti-aliasing filters in the following conversion functions are based on
87 one of two window functions:
88 The 6-tap Lanczos window (for down-sampling and shifts):
89 sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
90 0, |t|>=3
91 The 4-tap Mitchell window (for up-sampling):
92 7|t|^3-12|t|^2+16/3, |t|<1
93 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
94 0, |t|>=2
95 The number of taps is intentionally kept small to reduce computational
96 overhead and limit ringing.
98 The taps from these filters are scaled so that their sum is 1, and the result
99 is scaled by 128 and rounded to integers to create a filter whose
100 intermediate values fit inside 16 bits.
101 Coefficients are rounded in such a way as to ensure their sum is still 128,
102 which is usually equivalent to normal rounding.
104 Conversions which require both horizontal and vertical filtering could
105 have these steps pipelined, for less memory consumption and better cache
106 performance, but we do them separately for simplicity.*/
108 #define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
109 #define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
110 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
112 /*420jpeg chroma samples are sited like:
113 Y-------Y-------Y-------Y-------
114 | | | |
115 | BR | | BR |
116 | | | |
117 Y-------Y-------Y-------Y-------
118 | | | |
119 | | | |
120 | | | |
121 Y-------Y-------Y-------Y-------
122 | | | |
123 | BR | | BR |
124 | | | |
125 Y-------Y-------Y-------Y-------
126 | | | |
127 | | | |
128 | | | |
130 420mpeg2 chroma samples are sited like:
131 Y-------Y-------Y-------Y-------
132 | | | |
133 BR | BR |
134 | | | |
135 Y-------Y-------Y-------Y-------
136 | | | |
137 | | | |
138 | | | |
139 Y-------Y-------Y-------Y-------
140 | | | |
141 BR | BR |
142 | | | |
143 Y-------Y-------Y-------Y-------
144 | | | |
145 | | | |
146 | | | |
148 We use a resampling filter to shift the site locations one quarter pixel (at
149 the chroma plane's resolution) to the right.
150 The 4:2:2 modes look exactly the same, except there are twice as many chroma
151 lines, and they are vertically co-sited with the luma samples in both the
152 mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
153 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
154 const unsigned char *_src, int _c_w, int _c_h) {
155 int y;
156 int x;
157 for (y = 0; y < _c_h; y++) {
158 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
159 window.*/
160 for (x = 0; x < OC_MINI(_c_w, 2); x++) {
161 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
162 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
163 _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
165 for (; x < _c_w - 3; x++) {
166 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
167 114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
169 for (; x < _c_w; x++) {
170 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
171 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
172 _src[_c_w - 1] + 64) >> 7, 255);
174 _dst += _c_w;
175 _src += _c_w;
179 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
180 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
181 unsigned char *_aux) {
182 int c_w;
183 int c_h;
184 int c_sz;
185 int pli;
186 /*Skip past the luma data.*/
187 _dst += _y4m->pic_w * _y4m->pic_h;
188 /*Compute the size of each chroma plane.*/
189 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
190 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
191 c_sz = c_w * c_h;
192 for (pli = 1; pli < 3; pli++) {
193 y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
194 _dst += c_sz;
195 _aux += c_sz;
199 /*This format is only used for interlaced content, but is included for
200 completeness.
202 420jpeg chroma samples are sited like:
203 Y-------Y-------Y-------Y-------
204 | | | |
205 | BR | | BR |
206 | | | |
207 Y-------Y-------Y-------Y-------
208 | | | |
209 | | | |
210 | | | |
211 Y-------Y-------Y-------Y-------
212 | | | |
213 | BR | | BR |
214 | | | |
215 Y-------Y-------Y-------Y-------
216 | | | |
217 | | | |
218 | | | |
220 420paldv chroma samples are sited like:
221 YR------Y-------YR------Y-------
222 | | | |
223 | | | |
224 | | | |
225 YB------Y-------YB------Y-------
226 | | | |
227 | | | |
228 | | | |
229 YR------Y-------YR------Y-------
230 | | | |
231 | | | |
232 | | | |
233 YB------Y-------YB------Y-------
234 | | | |
235 | | | |
236 | | | |
238 We use a resampling filter to shift the site locations one quarter pixel (at
239 the chroma plane's resolution) to the right.
240 Then we use another filter to move the C_r location down one quarter pixel,
241 and the C_b location up one quarter pixel.*/
242 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
243 unsigned char *_aux) {
244 unsigned char *tmp;
245 int c_w;
246 int c_h;
247 int c_sz;
248 int pli;
249 int y;
250 int x;
251 /*Skip past the luma data.*/
252 _dst += _y4m->pic_w * _y4m->pic_h;
253 /*Compute the size of each chroma plane.*/
254 c_w = (_y4m->pic_w + 1) / 2;
255 c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
256 c_sz = c_w * c_h;
257 tmp = _aux + 2 * c_sz;
258 for (pli = 1; pli < 3; pli++) {
259 /*First do the horizontal re-sampling.
260 This is the same as the mpeg2 case, except that after the horizontal
261 case, we need to apply a second vertical filter.*/
262 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
263 _aux += c_sz;
264 switch (pli) {
265 case 1: {
266 /*Slide C_b up a quarter-pel.
267 This is the same filter used above, but in the other order.*/
268 for (x = 0; x < c_w; x++) {
269 for (y = 0; y < OC_MINI(c_h, 3); y++) {
270 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
271 - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
272 + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
273 + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
275 for (; y < c_h - 2; y++) {
276 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
277 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
278 - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
280 for (; y < c_h; y++) {
281 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
282 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
283 - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
285 _dst++;
286 tmp++;
288 _dst += c_sz - c_w;
289 tmp -= c_w;
291 break;
292 case 2: {
293 /*Slide C_r down a quarter-pel.
294 This is the same as the horizontal filter.*/
295 for (x = 0; x < c_w; x++) {
296 for (y = 0; y < OC_MINI(c_h, 2); y++) {
297 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
298 - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
299 + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
300 + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
302 for (; y < c_h - 3; y++) {
303 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
304 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
305 - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
307 for (; y < c_h; y++) {
308 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
309 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
310 - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
312 _dst++;
313 tmp++;
316 break;
318 /*For actual interlaced material, this would have to be done separately on
319 each field, and the shift amounts would be different.
320 C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
321 C_b up 1/8 in the bottom field.
322 The corresponding filters would be:
323 Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
324 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
328 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
329 This is used as a helper by several converation routines.*/
330 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
331 const unsigned char *_src, int _c_w, int _c_h) {
332 int y;
333 int x;
334 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
335 for (x = 0; x < _c_w; x++) {
336 for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
337 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
338 + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
339 - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
340 + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
342 for (; y < _c_h - 3; y += 2) {
343 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
344 - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
345 + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
347 for (; y < _c_h; y += 2) {
348 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
349 + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
350 + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
351 + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
353 _src++;
354 _dst++;
358 /*420jpeg chroma samples are sited like:
359 Y-------Y-------Y-------Y-------
360 | | | |
361 | BR | | BR |
362 | | | |
363 Y-------Y-------Y-------Y-------
364 | | | |
365 | | | |
366 | | | |
367 Y-------Y-------Y-------Y-------
368 | | | |
369 | BR | | BR |
370 | | | |
371 Y-------Y-------Y-------Y-------
372 | | | |
373 | | | |
374 | | | |
376 422jpeg chroma samples are sited like:
377 Y---BR--Y-------Y---BR--Y-------
378 | | | |
379 | | | |
380 | | | |
381 Y---BR--Y-------Y---BR--Y-------
382 | | | |
383 | | | |
384 | | | |
385 Y---BR--Y-------Y---BR--Y-------
386 | | | |
387 | | | |
388 | | | |
389 Y---BR--Y-------Y---BR--Y-------
390 | | | |
391 | | | |
392 | | | |
394 We use a resampling filter to decimate the chroma planes by two in the
395 vertical direction.*/
396 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
397 unsigned char *_aux) {
398 int c_w;
399 int c_h;
400 int c_sz;
401 int dst_c_w;
402 int dst_c_h;
403 int dst_c_sz;
404 int pli;
405 /*Skip past the luma data.*/
406 _dst += _y4m->pic_w * _y4m->pic_h;
407 /*Compute the size of each chroma plane.*/
408 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
409 c_h = _y4m->pic_h;
410 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
411 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
412 c_sz = c_w * c_h;
413 dst_c_sz = dst_c_w * dst_c_h;
414 for (pli = 1; pli < 3; pli++) {
415 y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
416 _aux += c_sz;
417 _dst += dst_c_sz;
421 /*420jpeg chroma samples are sited like:
422 Y-------Y-------Y-------Y-------
423 | | | |
424 | BR | | BR |
425 | | | |
426 Y-------Y-------Y-------Y-------
427 | | | |
428 | | | |
429 | | | |
430 Y-------Y-------Y-------Y-------
431 | | | |
432 | BR | | BR |
433 | | | |
434 Y-------Y-------Y-------Y-------
435 | | | |
436 | | | |
437 | | | |
439 422 chroma samples are sited like:
440 YBR-----Y-------YBR-----Y-------
441 | | | |
442 | | | |
443 | | | |
444 YBR-----Y-------YBR-----Y-------
445 | | | |
446 | | | |
447 | | | |
448 YBR-----Y-------YBR-----Y-------
449 | | | |
450 | | | |
451 | | | |
452 YBR-----Y-------YBR-----Y-------
453 | | | |
454 | | | |
455 | | | |
457 We use a resampling filter to shift the original site locations one quarter
458 pixel (at the original chroma resolution) to the right.
459 Then we use a second resampling filter to decimate the chroma planes by two
460 in the vertical direction.*/
461 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
462 unsigned char *_aux) {
463 unsigned char *tmp;
464 int c_w;
465 int c_h;
466 int c_sz;
467 int dst_c_h;
468 int dst_c_sz;
469 int pli;
470 /*Skip past the luma data.*/
471 _dst += _y4m->pic_w * _y4m->pic_h;
472 /*Compute the size of each chroma plane.*/
473 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
474 c_h = _y4m->pic_h;
475 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
476 c_sz = c_w * c_h;
477 dst_c_sz = c_w * dst_c_h;
478 tmp = _aux + 2 * c_sz;
479 for (pli = 1; pli < 3; pli++) {
480 /*In reality, the horizontal and vertical steps could be pipelined, for
481 less memory consumption and better cache performance, but we do them
482 separately for simplicity.*/
483 /*First do horizontal filtering (convert to 422jpeg)*/
484 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
485 /*Now do the vertical filtering.*/
486 y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
487 _aux += c_sz;
488 _dst += dst_c_sz;
492 /*420jpeg chroma samples are sited like:
493 Y-------Y-------Y-------Y-------
494 | | | |
495 | BR | | BR |
496 | | | |
497 Y-------Y-------Y-------Y-------
498 | | | |
499 | | | |
500 | | | |
501 Y-------Y-------Y-------Y-------
502 | | | |
503 | BR | | BR |
504 | | | |
505 Y-------Y-------Y-------Y-------
506 | | | |
507 | | | |
508 | | | |
510 411 chroma samples are sited like:
511 YBR-----Y-------Y-------Y-------
512 | | | |
513 | | | |
514 | | | |
515 YBR-----Y-------Y-------Y-------
516 | | | |
517 | | | |
518 | | | |
519 YBR-----Y-------Y-------Y-------
520 | | | |
521 | | | |
522 | | | |
523 YBR-----Y-------Y-------Y-------
524 | | | |
525 | | | |
526 | | | |
528 We use a filter to resample at site locations one eighth pixel (at the source
529 chroma plane's horizontal resolution) and five eighths of a pixel to the
530 right.
531 Then we use another filter to decimate the planes by 2 in the vertical
532 direction.*/
533 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
534 unsigned char *_aux) {
535 unsigned char *tmp;
536 int c_w;
537 int c_h;
538 int c_sz;
539 int dst_c_w;
540 int dst_c_h;
541 int dst_c_sz;
542 int tmp_sz;
543 int pli;
544 int y;
545 int x;
546 /*Skip past the luma data.*/
547 _dst += _y4m->pic_w * _y4m->pic_h;
548 /*Compute the size of each chroma plane.*/
549 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
550 c_h = _y4m->pic_h;
551 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
552 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
553 c_sz = c_w * c_h;
554 dst_c_sz = dst_c_w * dst_c_h;
555 tmp_sz = dst_c_w * c_h;
556 tmp = _aux + 2 * c_sz;
557 for (pli = 1; pli < 3; pli++) {
558 /*In reality, the horizontal and vertical steps could be pipelined, for
559 less memory consumption and better cache performance, but we do them
560 separately for simplicity.*/
561 /*First do horizontal filtering (convert to 422jpeg)*/
562 for (y = 0; y < c_h; y++) {
563 /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
564 4-tap Mitchell window.*/
565 for (x = 0; x < OC_MINI(c_w, 1); x++) {
566 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
567 + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
568 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
569 + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
571 for (; x < c_w - 2; x++) {
572 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
573 + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
574 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
575 + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
577 for (; x < c_w; x++) {
578 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
579 + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
580 if ((x << 1 | 1) < dst_c_w) {
581 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
582 + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
585 tmp += dst_c_w;
586 _aux += c_w;
588 tmp -= tmp_sz;
589 /*Now do the vertical filtering.*/
590 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
591 _dst += dst_c_sz;
595 /*Convert 444 to 420jpeg.*/
596 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
597 unsigned char *_aux) {
598 unsigned char *tmp;
599 int c_w;
600 int c_h;
601 int c_sz;
602 int dst_c_w;
603 int dst_c_h;
604 int dst_c_sz;
605 int tmp_sz;
606 int pli;
607 int y;
608 int x;
609 /*Skip past the luma data.*/
610 _dst += _y4m->pic_w * _y4m->pic_h;
611 /*Compute the size of each chroma plane.*/
612 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
613 c_h = _y4m->pic_h;
614 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
615 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
616 c_sz = c_w * c_h;
617 dst_c_sz = dst_c_w * dst_c_h;
618 tmp_sz = dst_c_w * c_h;
619 tmp = _aux + 2 * c_sz;
620 for (pli = 1; pli < 3; pli++) {
621 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
622 for (y = 0; y < c_h; y++) {
623 for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
624 tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
625 - 17 * _aux[OC_MINI(2, c_w - 1)]
626 + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
628 for (; x < c_w - 3; x += 2) {
629 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
630 - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
632 for (; x < c_w; x += 2) {
633 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
634 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
635 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
637 tmp += dst_c_w;
638 _aux += c_w;
640 tmp -= tmp_sz;
641 /*Now do the vertical filtering.*/
642 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
643 _dst += dst_c_sz;
647 /*The image is padded with empty chroma components at 4:2:0.*/
648 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
649 unsigned char *_aux) {
650 int c_sz;
651 _dst += _y4m->pic_w * _y4m->pic_h;
652 c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
653 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
654 memset(_dst, 128, c_sz * 2);
657 /*No conversion function needed.*/
658 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
659 unsigned char *_aux) {
662 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip) {
663 char buffer[80];
664 int ret;
665 int i;
666 /*Read until newline, or 80 cols, whichever happens first.*/
667 for (i = 0; i < 79; i++) {
668 if (_nskip > 0) {
669 buffer[i] = *_skip++;
670 _nskip--;
671 } else {
672 ret = fread(buffer + i, 1, 1, _fin);
673 if (ret < 1)return -1;
675 if (buffer[i] == '\n')break;
677 /*We skipped too much header data.*/
678 if (_nskip > 0)return -1;
679 if (i == 79) {
680 fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
681 return -1;
683 buffer[i] = '\0';
684 if (memcmp(buffer, "YUV4MPEG", 8)) {
685 fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
686 return -1;
688 if (buffer[8] != '2') {
689 fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
691 ret = y4m_parse_tags(_y4m, buffer + 5);
692 if (ret < 0) {
693 fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
694 return ret;
696 if (_y4m->interlace == '?') {
697 fprintf(stderr, "Warning: Input video interlacing format unknown; "
698 "assuming progressive scan.\n");
699 } else if (_y4m->interlace != 'p') {
700 fprintf(stderr, "Input video is interlaced; "
701 "Only progressive scan handled.\n");
702 return -1;
704 if (strcmp(_y4m->chroma_type, "420") == 0 ||
705 strcmp(_y4m->chroma_type, "420jpeg") == 0) {
706 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
707 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
708 + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
709 /*Natively supported: no conversion required.*/
710 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
711 _y4m->convert = y4m_convert_null;
712 } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
713 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
714 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
715 /*Chroma filter required: read into the aux buf first.*/
716 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
717 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
718 _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
719 } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
720 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
721 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
722 /*Chroma filter required: read into the aux buf first.
723 We need to make two filter passes, so we need some extra space in the
724 aux buffer.*/
725 _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
726 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
727 _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
728 } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
729 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
730 _y4m->src_c_dec_v = 1;
731 _y4m->dst_c_dec_v = 2;
732 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
733 /*Chroma filter required: read into the aux buf first.*/
734 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
735 _y4m->convert = y4m_convert_422jpeg_420jpeg;
736 } else if (strcmp(_y4m->chroma_type, "422") == 0) {
737 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
738 _y4m->src_c_dec_v = 1;
739 _y4m->dst_c_dec_v = 2;
740 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
741 /*Chroma filter required: read into the aux buf first.
742 We need to make two filter passes, so we need some extra space in the
743 aux buffer.*/
744 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
745 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
746 _y4m->convert = y4m_convert_422_420jpeg;
747 } else if (strcmp(_y4m->chroma_type, "411") == 0) {
748 _y4m->src_c_dec_h = 4;
749 _y4m->dst_c_dec_h = 2;
750 _y4m->src_c_dec_v = 1;
751 _y4m->dst_c_dec_v = 2;
752 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
753 /*Chroma filter required: read into the aux buf first.
754 We need to make two filter passes, so we need some extra space in the
755 aux buffer.*/
756 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
757 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
758 _y4m->convert = y4m_convert_411_420jpeg;
759 } else if (strcmp(_y4m->chroma_type, "444") == 0) {
760 _y4m->src_c_dec_h = 1;
761 _y4m->dst_c_dec_h = 2;
762 _y4m->src_c_dec_v = 1;
763 _y4m->dst_c_dec_v = 2;
764 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
765 /*Chroma filter required: read into the aux buf first.
766 We need to make two filter passes, so we need some extra space in the
767 aux buffer.*/
768 _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
769 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
770 _y4m->convert = y4m_convert_444_420jpeg;
771 } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
772 _y4m->src_c_dec_h = 1;
773 _y4m->dst_c_dec_h = 2;
774 _y4m->src_c_dec_v = 1;
775 _y4m->dst_c_dec_v = 2;
776 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
777 /*Chroma filter required: read into the aux buf first.
778 We need to make two filter passes, so we need some extra space in the
779 aux buffer.
780 The extra plane also gets read into the aux buf.
781 It will be discarded.*/
782 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
783 _y4m->convert = y4m_convert_444_420jpeg;
784 } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
785 _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
786 _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
787 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
788 /*No extra space required, but we need to clear the chroma planes.*/
789 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
790 _y4m->convert = y4m_convert_mono_420jpeg;
791 } else {
792 fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
793 return -1;
795 /*The size of the final frame buffers is always computed from the
796 destination chroma decimation type.*/
797 _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
798 + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
799 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
800 _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
801 _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
802 return 0;
805 void y4m_input_close(y4m_input *_y4m) {
806 free(_y4m->dst_buf);
807 free(_y4m->aux_buf);
810 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
811 char frame[6];
812 int pic_sz;
813 int c_w;
814 int c_h;
815 int c_sz;
816 int ret;
817 /*Read and skip the frame header.*/
818 ret = fread(frame, 1, 6, _fin);
819 if (ret < 6)return 0;
820 if (memcmp(frame, "FRAME", 5)) {
821 fprintf(stderr, "Loss of framing in Y4M input data\n");
822 return -1;
824 if (frame[5] != '\n') {
825 char c;
826 int j;
827 for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++);
828 if (j == 79) {
829 fprintf(stderr, "Error parsing Y4M frame header\n");
830 return -1;
833 /*Read the frame data that needs no conversion.*/
834 if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) {
835 fprintf(stderr, "Error reading Y4M frame data.\n");
836 return -1;
838 /*Read the frame data that does need conversion.*/
839 if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) {
840 fprintf(stderr, "Error reading Y4M frame data.\n");
841 return -1;
843 /*Now convert the just read frame.*/
844 (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
845 /*Fill in the frame buffer pointers.
846 We don't use vpx_img_wrap() because it forces padding for odd picture
847 sizes, which would require a separate fread call for every row.*/
848 memset(_img, 0, sizeof(*_img));
849 /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
850 _img->fmt = IMG_FMT_I420;
851 _img->w = _img->d_w = _y4m->pic_w;
852 _img->h = _img->d_h = _y4m->pic_h;
853 /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
854 _img->x_chroma_shift = 1;
855 _img->y_chroma_shift = 1;
856 _img->bps = 12;
857 /*Set up the buffer pointers.*/
858 pic_sz = _y4m->pic_w * _y4m->pic_h;
859 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
860 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
861 c_sz = c_w * c_h;
862 _img->stride[PLANE_Y] = _y4m->pic_w;
863 _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
864 _img->planes[PLANE_Y] = _y4m->dst_buf;
865 _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
866 _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
867 return 1;