Merge "SSE2 inverse 4x4 2D-DCT with DC only"
[aom.git] / y4minput.c
blob47f005a950be021325c1a3180033570e74c95f03
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
10 * Based on code from the OggTheora software codec source code,
11 * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
13 #include <stdlib.h>
14 #include <string.h>
15 #include "y4minput.h"
17 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
18 int got_w;
19 int got_h;
20 int got_fps;
21 int got_interlace;
22 int got_par;
23 int got_chroma;
24 char *p;
25 char *q;
26 got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
27 for (p = _tags;; p = q) {
28 /*Skip any leading spaces.*/
29 while (*p == ' ')p++;
30 /*If that's all we have, stop.*/
31 if (p[0] == '\0')break;
32 /*Find the end of this tag.*/
33 for (q = p + 1; *q != '\0' && *q != ' '; q++);
34 /*Process the tag.*/
35 switch (p[0]) {
36 case 'W': {
37 if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1)return -1;
38 got_w = 1;
40 break;
41 case 'H': {
42 if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1)return -1;
43 got_h = 1;
45 break;
46 case 'F': {
47 if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
48 return -1;
50 got_fps = 1;
52 break;
53 case 'I': {
54 _y4m->interlace = p[1];
55 got_interlace = 1;
57 break;
58 case 'A': {
59 if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
60 return -1;
62 got_par = 1;
64 break;
65 case 'C': {
66 if (q - p > 16)return -1;
67 memcpy(_y4m->chroma_type, p + 1, q - p - 1);
68 _y4m->chroma_type[q - p - 1] = '\0';
69 got_chroma = 1;
71 break;
72 /*Ignore unknown tags.*/
75 if (!got_w || !got_h || !got_fps)return -1;
76 if (!got_interlace)_y4m->interlace = '?';
77 if (!got_par)_y4m->par_n = _y4m->par_d = 0;
78 /*Chroma-type is not specified in older files, e.g., those generated by
79 mplayer.*/
80 if (!got_chroma)strcpy(_y4m->chroma_type, "420");
81 return 0;
86 /*All anti-aliasing filters in the following conversion functions are based on
87 one of two window functions:
88 The 6-tap Lanczos window (for down-sampling and shifts):
89 sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
90 0, |t|>=3
91 The 4-tap Mitchell window (for up-sampling):
92 7|t|^3-12|t|^2+16/3, |t|<1
93 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
94 0, |t|>=2
95 The number of taps is intentionally kept small to reduce computational
96 overhead and limit ringing.
98 The taps from these filters are scaled so that their sum is 1, and the result
99 is scaled by 128 and rounded to integers to create a filter whose
100 intermediate values fit inside 16 bits.
101 Coefficients are rounded in such a way as to ensure their sum is still 128,
102 which is usually equivalent to normal rounding.
104 Conversions which require both horizontal and vertical filtering could
105 have these steps pipelined, for less memory consumption and better cache
106 performance, but we do them separately for simplicity.*/
108 #define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
109 #define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
110 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
112 /*420jpeg chroma samples are sited like:
113 Y-------Y-------Y-------Y-------
114 | | | |
115 | BR | | BR |
116 | | | |
117 Y-------Y-------Y-------Y-------
118 | | | |
119 | | | |
120 | | | |
121 Y-------Y-------Y-------Y-------
122 | | | |
123 | BR | | BR |
124 | | | |
125 Y-------Y-------Y-------Y-------
126 | | | |
127 | | | |
128 | | | |
130 420mpeg2 chroma samples are sited like:
131 Y-------Y-------Y-------Y-------
132 | | | |
133 BR | BR |
134 | | | |
135 Y-------Y-------Y-------Y-------
136 | | | |
137 | | | |
138 | | | |
139 Y-------Y-------Y-------Y-------
140 | | | |
141 BR | BR |
142 | | | |
143 Y-------Y-------Y-------Y-------
144 | | | |
145 | | | |
146 | | | |
148 We use a resampling filter to shift the site locations one quarter pixel (at
149 the chroma plane's resolution) to the right.
150 The 4:2:2 modes look exactly the same, except there are twice as many chroma
151 lines, and they are vertically co-sited with the luma samples in both the
152 mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
153 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
154 const unsigned char *_src, int _c_w, int _c_h) {
155 int y;
156 int x;
157 for (y = 0; y < _c_h; y++) {
158 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
159 window.*/
160 for (x = 0; x < OC_MINI(_c_w, 2); x++) {
161 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] +
162 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
163 _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255);
165 for (; x < _c_w - 3; x++) {
166 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
167 114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255);
169 for (; x < _c_w; x++) {
170 _dst[x] = (unsigned char)OC_CLAMPI(0, (4 * _src[x - 2] - 17 * _src[x - 1] +
171 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] +
172 _src[_c_w - 1] + 64) >> 7, 255);
174 _dst += _c_w;
175 _src += _c_w;
179 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
180 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
181 unsigned char *_aux) {
182 int c_w;
183 int c_h;
184 int c_sz;
185 int pli;
186 /*Skip past the luma data.*/
187 _dst += _y4m->pic_w * _y4m->pic_h;
188 /*Compute the size of each chroma plane.*/
189 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
190 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
191 c_sz = c_w * c_h;
192 for (pli = 1; pli < 3; pli++) {
193 y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
194 _dst += c_sz;
195 _aux += c_sz;
199 /*This format is only used for interlaced content, but is included for
200 completeness.
202 420jpeg chroma samples are sited like:
203 Y-------Y-------Y-------Y-------
204 | | | |
205 | BR | | BR |
206 | | | |
207 Y-------Y-------Y-------Y-------
208 | | | |
209 | | | |
210 | | | |
211 Y-------Y-------Y-------Y-------
212 | | | |
213 | BR | | BR |
214 | | | |
215 Y-------Y-------Y-------Y-------
216 | | | |
217 | | | |
218 | | | |
220 420paldv chroma samples are sited like:
221 YR------Y-------YR------Y-------
222 | | | |
223 | | | |
224 | | | |
225 YB------Y-------YB------Y-------
226 | | | |
227 | | | |
228 | | | |
229 YR------Y-------YR------Y-------
230 | | | |
231 | | | |
232 | | | |
233 YB------Y-------YB------Y-------
234 | | | |
235 | | | |
236 | | | |
238 We use a resampling filter to shift the site locations one quarter pixel (at
239 the chroma plane's resolution) to the right.
240 Then we use another filter to move the C_r location down one quarter pixel,
241 and the C_b location up one quarter pixel.*/
242 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
243 unsigned char *_aux) {
244 unsigned char *tmp;
245 int c_w;
246 int c_h;
247 int c_sz;
248 int pli;
249 int y;
250 int x;
251 /*Skip past the luma data.*/
252 _dst += _y4m->pic_w * _y4m->pic_h;
253 /*Compute the size of each chroma plane.*/
254 c_w = (_y4m->pic_w + 1) / 2;
255 c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
256 c_sz = c_w * c_h;
257 tmp = _aux + 2 * c_sz;
258 for (pli = 1; pli < 3; pli++) {
259 /*First do the horizontal re-sampling.
260 This is the same as the mpeg2 case, except that after the horizontal
261 case, we need to apply a second vertical filter.*/
262 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
263 _aux += c_sz;
264 switch (pli) {
265 case 1: {
266 /*Slide C_b up a quarter-pel.
267 This is the same filter used above, but in the other order.*/
268 for (x = 0; x < c_w; x++) {
269 for (y = 0; y < OC_MINI(c_h, 3); y++) {
270 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[0]
271 - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w]
272 + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
273 + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255);
275 for (; y < c_h - 2; y++) {
276 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
277 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
278 - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255);
280 for (; y < c_h; y++) {
281 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (tmp[(y - 3) * c_w]
282 - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w]
283 - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
285 _dst++;
286 tmp++;
288 _dst += c_sz - c_w;
289 tmp -= c_w;
291 break;
292 case 2: {
293 /*Slide C_r down a quarter-pel.
294 This is the same as the horizontal filter.*/
295 for (x = 0; x < c_w; x++) {
296 for (y = 0; y < OC_MINI(c_h, 2); y++) {
297 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[0]
298 - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w]
299 + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w]
300 + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255);
302 for (; y < c_h - 3; y++) {
303 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
304 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w]
305 - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255);
307 for (; y < c_h; y++) {
308 _dst[y * c_w] = (unsigned char)OC_CLAMPI(0, (4 * tmp[(y - 2) * c_w]
309 - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w]
310 - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255);
312 _dst++;
313 tmp++;
316 break;
318 /*For actual interlaced material, this would have to be done separately on
319 each field, and the shift amounts would be different.
320 C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
321 C_b up 1/8 in the bottom field.
322 The corresponding filters would be:
323 Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
324 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
328 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
329 This is used as a helper by several converation routines.*/
330 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
331 const unsigned char *_src, int _c_w, int _c_h) {
332 int y;
333 int x;
334 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
335 for (x = 0; x < _c_w; x++) {
336 for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
337 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (64 * _src[0]
338 + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w]
339 - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w]
340 + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255);
342 for (; y < _c_h - 3; y += 2) {
343 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w])
344 - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w])
345 + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255);
347 for (; y < _c_h; y += 2) {
348 _dst[(y >> 1)*_c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w]
349 + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w]
350 + _src[OC_MINI(y + 2, _c_h - 1) * _c_w])
351 + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255);
353 _src++;
354 _dst++;
358 /*420jpeg chroma samples are sited like:
359 Y-------Y-------Y-------Y-------
360 | | | |
361 | BR | | BR |
362 | | | |
363 Y-------Y-------Y-------Y-------
364 | | | |
365 | | | |
366 | | | |
367 Y-------Y-------Y-------Y-------
368 | | | |
369 | BR | | BR |
370 | | | |
371 Y-------Y-------Y-------Y-------
372 | | | |
373 | | | |
374 | | | |
376 422jpeg chroma samples are sited like:
377 Y---BR--Y-------Y---BR--Y-------
378 | | | |
379 | | | |
380 | | | |
381 Y---BR--Y-------Y---BR--Y-------
382 | | | |
383 | | | |
384 | | | |
385 Y---BR--Y-------Y---BR--Y-------
386 | | | |
387 | | | |
388 | | | |
389 Y---BR--Y-------Y---BR--Y-------
390 | | | |
391 | | | |
392 | | | |
394 We use a resampling filter to decimate the chroma planes by two in the
395 vertical direction.*/
396 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
397 unsigned char *_aux) {
398 int c_w;
399 int c_h;
400 int c_sz;
401 int dst_c_w;
402 int dst_c_h;
403 int dst_c_sz;
404 int pli;
405 /*Skip past the luma data.*/
406 _dst += _y4m->pic_w * _y4m->pic_h;
407 /*Compute the size of each chroma plane.*/
408 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
409 c_h = _y4m->pic_h;
410 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
411 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
412 c_sz = c_w * c_h;
413 dst_c_sz = dst_c_w * dst_c_h;
414 for (pli = 1; pli < 3; pli++) {
415 y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
416 _aux += c_sz;
417 _dst += dst_c_sz;
421 /*420jpeg chroma samples are sited like:
422 Y-------Y-------Y-------Y-------
423 | | | |
424 | BR | | BR |
425 | | | |
426 Y-------Y-------Y-------Y-------
427 | | | |
428 | | | |
429 | | | |
430 Y-------Y-------Y-------Y-------
431 | | | |
432 | BR | | BR |
433 | | | |
434 Y-------Y-------Y-------Y-------
435 | | | |
436 | | | |
437 | | | |
439 422 chroma samples are sited like:
440 YBR-----Y-------YBR-----Y-------
441 | | | |
442 | | | |
443 | | | |
444 YBR-----Y-------YBR-----Y-------
445 | | | |
446 | | | |
447 | | | |
448 YBR-----Y-------YBR-----Y-------
449 | | | |
450 | | | |
451 | | | |
452 YBR-----Y-------YBR-----Y-------
453 | | | |
454 | | | |
455 | | | |
457 We use a resampling filter to shift the original site locations one quarter
458 pixel (at the original chroma resolution) to the right.
459 Then we use a second resampling filter to decimate the chroma planes by two
460 in the vertical direction.*/
461 static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
462 unsigned char *_aux) {
463 unsigned char *tmp;
464 int c_w;
465 int c_h;
466 int c_sz;
467 int dst_c_h;
468 int dst_c_sz;
469 int pli;
470 /*Skip past the luma data.*/
471 _dst += _y4m->pic_w * _y4m->pic_h;
472 /*Compute the size of each chroma plane.*/
473 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
474 c_h = _y4m->pic_h;
475 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
476 c_sz = c_w * c_h;
477 dst_c_sz = c_w * dst_c_h;
478 tmp = _aux + 2 * c_sz;
479 for (pli = 1; pli < 3; pli++) {
480 /*In reality, the horizontal and vertical steps could be pipelined, for
481 less memory consumption and better cache performance, but we do them
482 separately for simplicity.*/
483 /*First do horizontal filtering (convert to 422jpeg)*/
484 y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
485 /*Now do the vertical filtering.*/
486 y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
487 _aux += c_sz;
488 _dst += dst_c_sz;
492 /*420jpeg chroma samples are sited like:
493 Y-------Y-------Y-------Y-------
494 | | | |
495 | BR | | BR |
496 | | | |
497 Y-------Y-------Y-------Y-------
498 | | | |
499 | | | |
500 | | | |
501 Y-------Y-------Y-------Y-------
502 | | | |
503 | BR | | BR |
504 | | | |
505 Y-------Y-------Y-------Y-------
506 | | | |
507 | | | |
508 | | | |
510 411 chroma samples are sited like:
511 YBR-----Y-------Y-------Y-------
512 | | | |
513 | | | |
514 | | | |
515 YBR-----Y-------Y-------Y-------
516 | | | |
517 | | | |
518 | | | |
519 YBR-----Y-------Y-------Y-------
520 | | | |
521 | | | |
522 | | | |
523 YBR-----Y-------Y-------Y-------
524 | | | |
525 | | | |
526 | | | |
528 We use a filter to resample at site locations one eighth pixel (at the source
529 chroma plane's horizontal resolution) and five eighths of a pixel to the
530 right.
531 Then we use another filter to decimate the planes by 2 in the vertical
532 direction.*/
533 static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
534 unsigned char *_aux) {
535 unsigned char *tmp;
536 int c_w;
537 int c_h;
538 int c_sz;
539 int dst_c_w;
540 int dst_c_h;
541 int dst_c_sz;
542 int tmp_sz;
543 int pli;
544 int y;
545 int x;
546 /*Skip past the luma data.*/
547 _dst += _y4m->pic_w * _y4m->pic_h;
548 /*Compute the size of each chroma plane.*/
549 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
550 c_h = _y4m->pic_h;
551 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
552 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
553 c_sz = c_w * c_h;
554 dst_c_sz = dst_c_w * dst_c_h;
555 tmp_sz = dst_c_w * c_h;
556 tmp = _aux + 2 * c_sz;
557 for (pli = 1; pli < 3; pli++) {
558 /*In reality, the horizontal and vertical steps could be pipelined, for
559 less memory consumption and better cache performance, but we do them
560 separately for simplicity.*/
561 /*First do horizontal filtering (convert to 422jpeg)*/
562 for (y = 0; y < c_h; y++) {
563 /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
564 4-tap Mitchell window.*/
565 for (x = 0; x < OC_MINI(c_w, 1); x++) {
566 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (111 * _aux[0]
567 + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
568 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (47 * _aux[0]
569 + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255);
571 for (; x < c_w - 2; x++) {
572 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
573 + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255);
574 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
575 + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255);
577 for (; x < c_w; x++) {
578 tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x]
579 + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255);
580 if ((x << 1 | 1) < dst_c_w) {
581 tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(0, (-3 * _aux[x - 1] + 50 * _aux[x]
582 + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255);
585 tmp += dst_c_w;
586 _aux += c_w;
588 tmp -= tmp_sz;
589 /*Now do the vertical filtering.*/
590 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
591 _dst += dst_c_sz;
595 /*Convert 444 to 420jpeg.*/
596 static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
597 unsigned char *_aux) {
598 unsigned char *tmp;
599 int c_w;
600 int c_h;
601 int c_sz;
602 int dst_c_w;
603 int dst_c_h;
604 int dst_c_sz;
605 int tmp_sz;
606 int pli;
607 int y;
608 int x;
609 /*Skip past the luma data.*/
610 _dst += _y4m->pic_w * _y4m->pic_h;
611 /*Compute the size of each chroma plane.*/
612 c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
613 c_h = _y4m->pic_h;
614 dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
615 dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
616 c_sz = c_w * c_h;
617 dst_c_sz = dst_c_w * dst_c_h;
618 tmp_sz = dst_c_w * c_h;
619 tmp = _aux + 2 * c_sz;
620 for (pli = 1; pli < 3; pli++) {
621 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
622 for (y = 0; y < c_h; y++) {
623 for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
624 tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)]
625 - 17 * _aux[OC_MINI(2, c_w - 1)]
626 + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255);
628 for (; x < c_w - 3; x += 2) {
629 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3])
630 - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255);
632 for (; x < c_w; x += 2) {
633 tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
634 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
635 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255);
637 tmp += dst_c_w;
638 _aux += c_w;
640 tmp -= tmp_sz;
641 /*Now do the vertical filtering.*/
642 y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
643 _dst += dst_c_sz;
647 /*The image is padded with empty chroma components at 4:2:0.*/
648 static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
649 unsigned char *_aux) {
650 int c_sz;
651 _dst += _y4m->pic_w * _y4m->pic_h;
652 c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
653 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
654 memset(_dst, 128, c_sz * 2);
657 /*No conversion function needed.*/
658 static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
659 unsigned char *_aux) {
662 int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
663 int only_420) {
664 char buffer[80];
665 int ret;
666 int i;
667 /*Read until newline, or 80 cols, whichever happens first.*/
668 for (i = 0; i < 79; i++) {
669 if (_nskip > 0) {
670 buffer[i] = *_skip++;
671 _nskip--;
672 } else {
673 ret = (int)fread(buffer + i, 1, 1, _fin);
674 if (ret < 1)return -1;
676 if (buffer[i] == '\n')break;
678 /*We skipped too much header data.*/
679 if (_nskip > 0)return -1;
680 if (i == 79) {
681 fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
682 return -1;
684 buffer[i] = '\0';
685 if (memcmp(buffer, "YUV4MPEG", 8)) {
686 fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
687 return -1;
689 if (buffer[8] != '2') {
690 fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
692 ret = y4m_parse_tags(_y4m, buffer + 5);
693 if (ret < 0) {
694 fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
695 return ret;
697 if (_y4m->interlace == '?') {
698 fprintf(stderr, "Warning: Input video interlacing format unknown; "
699 "assuming progressive scan.\n");
700 } else if (_y4m->interlace != 'p') {
701 fprintf(stderr, "Input video is interlaced; "
702 "Only progressive scan handled.\n");
703 return -1;
705 _y4m->vpx_fmt = VPX_IMG_FMT_I420;
706 _y4m->vpx_bps = 12;
707 if (strcmp(_y4m->chroma_type, "420") == 0 ||
708 strcmp(_y4m->chroma_type, "420jpeg") == 0) {
709 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
710 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
711 + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
712 /*Natively supported: no conversion required.*/
713 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
714 _y4m->convert = y4m_convert_null;
715 } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
716 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
717 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
718 /*Chroma filter required: read into the aux buf first.*/
719 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
720 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
721 _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
722 } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
723 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
724 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
725 /*Chroma filter required: read into the aux buf first.
726 We need to make two filter passes, so we need some extra space in the
727 aux buffer.*/
728 _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
729 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
730 _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
731 } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
732 _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
733 _y4m->src_c_dec_v = 1;
734 _y4m->dst_c_dec_v = 2;
735 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
736 /*Chroma filter required: read into the aux buf first.*/
737 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
738 _y4m->convert = y4m_convert_422jpeg_420jpeg;
739 } else if (strcmp(_y4m->chroma_type, "422") == 0) {
740 _y4m->src_c_dec_h = 2;
741 _y4m->src_c_dec_v = 1;
742 if (only_420) {
743 _y4m->dst_c_dec_h = 2;
744 _y4m->dst_c_dec_v = 2;
745 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
746 /*Chroma filter required: read into the aux buf first.
747 We need to make two filter passes, so we need some extra space in the
748 aux buffer.*/
749 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
750 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
751 ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
752 _y4m->convert = y4m_convert_422_420jpeg;
753 } else {
754 _y4m->vpx_fmt = VPX_IMG_FMT_I422;
755 _y4m->vpx_bps = 16;
756 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
757 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
758 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
759 + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
760 /*Natively supported: no conversion required.*/
761 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
762 _y4m->convert = y4m_convert_null;
764 } else if (strcmp(_y4m->chroma_type, "411") == 0) {
765 _y4m->src_c_dec_h = 4;
766 _y4m->dst_c_dec_h = 2;
767 _y4m->src_c_dec_v = 1;
768 _y4m->dst_c_dec_v = 2;
769 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
770 /*Chroma filter required: read into the aux buf first.
771 We need to make two filter passes, so we need some extra space in the
772 aux buffer.*/
773 _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
774 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
775 _y4m->convert = y4m_convert_411_420jpeg;
776 } else if (strcmp(_y4m->chroma_type, "444") == 0) {
777 _y4m->src_c_dec_h = 1;
778 _y4m->src_c_dec_v = 1;
779 if (only_420) {
780 _y4m->dst_c_dec_h = 2;
781 _y4m->dst_c_dec_v = 2;
782 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
783 /*Chroma filter required: read into the aux buf first.
784 We need to make two filter passes, so we need some extra space in the
785 aux buffer.*/
786 _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
787 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz +
788 ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
789 _y4m->convert = y4m_convert_444_420jpeg;
790 } else {
791 _y4m->vpx_fmt = VPX_IMG_FMT_I444;
792 _y4m->vpx_bps = 24;
793 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
794 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
795 _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
796 /*Natively supported: no conversion required.*/
797 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
798 _y4m->convert = y4m_convert_null;
800 } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
801 _y4m->src_c_dec_h = 1;
802 _y4m->src_c_dec_v = 1;
803 if (only_420) {
804 _y4m->dst_c_dec_h = 2;
805 _y4m->dst_c_dec_v = 2;
806 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
807 /*Chroma filter required: read into the aux buf first.
808 We need to make two filter passes, so we need some extra space in the
809 aux buffer.
810 The extra plane also gets read into the aux buf.
811 It will be discarded.*/
812 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
813 _y4m->convert = y4m_convert_444_420jpeg;
814 } else {
815 _y4m->vpx_fmt = VPX_IMG_FMT_444A;
816 _y4m->vpx_bps = 32;
817 _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
818 _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
819 _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
820 /*Natively supported: no conversion required.*/
821 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
822 _y4m->convert = y4m_convert_null;
824 } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
825 _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
826 _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
827 _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
828 /*No extra space required, but we need to clear the chroma planes.*/
829 _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
830 _y4m->convert = y4m_convert_mono_420jpeg;
831 } else {
832 fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
833 return -1;
835 /*The size of the final frame buffers is always computed from the
836 destination chroma decimation type.*/
837 _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
838 + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
839 ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
840 _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
841 _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
842 return 0;
845 void y4m_input_close(y4m_input *_y4m) {
846 free(_y4m->dst_buf);
847 free(_y4m->aux_buf);
850 int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
851 char frame[6];
852 int pic_sz;
853 int c_w;
854 int c_h;
855 int c_sz;
856 int ret;
857 /*Read and skip the frame header.*/
858 ret = (int)fread(frame, 1, 6, _fin);
859 if (ret < 6)return 0;
860 if (memcmp(frame, "FRAME", 5)) {
861 fprintf(stderr, "Loss of framing in Y4M input data\n");
862 return -1;
864 if (frame[5] != '\n') {
865 char c;
866 int j;
867 for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++);
868 if (j == 79) {
869 fprintf(stderr, "Error parsing Y4M frame header\n");
870 return -1;
873 /*Read the frame data that needs no conversion.*/
874 if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) {
875 fprintf(stderr, "Error reading Y4M frame data.\n");
876 return -1;
878 /*Read the frame data that does need conversion.*/
879 if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) {
880 fprintf(stderr, "Error reading Y4M frame data.\n");
881 return -1;
883 /*Now convert the just read frame.*/
884 (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
885 /*Fill in the frame buffer pointers.
886 We don't use vpx_img_wrap() because it forces padding for odd picture
887 sizes, which would require a separate fread call for every row.*/
888 memset(_img, 0, sizeof(*_img));
889 /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
890 _img->fmt = _y4m->vpx_fmt;
891 _img->w = _img->d_w = _y4m->pic_w;
892 _img->h = _img->d_h = _y4m->pic_h;
893 _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
894 _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
895 _img->bps = _y4m->vpx_bps;
897 /*Set up the buffer pointers.*/
898 pic_sz = _y4m->pic_w * _y4m->pic_h;
899 c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
900 c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
901 c_sz = c_w * c_h;
902 _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
903 _img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
904 _img->planes[PLANE_Y] = _y4m->dst_buf;
905 _img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
906 _img->planes[PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
907 _img->planes[PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
908 return 1;