use enum value instead of numerical value for acmod
[ffmpeg-lucabe.git] / libavcodec / imgresample.c
blob3e1f3fe1105f570d57e02c5fee4a4aabe558319b
1 /*
2 * High quality image resampling with polyphase filters
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file imgresample.c
24 * High quality image resampling with polyphase filters .
27 #include "avcodec.h"
28 #include "swscale.h"
29 #include "dsputil.h"
31 #define NB_COMPONENTS 3
33 #define PHASE_BITS 4
34 #define NB_PHASES (1 << PHASE_BITS)
35 #define NB_TAPS 4
36 #define FCENTER 1 /* index of the center of the filter */
37 //#define TEST 1 /* Test it */
39 #define POS_FRAC_BITS 16
40 #define POS_FRAC (1 << POS_FRAC_BITS)
41 /* 6 bits precision is needed for MMX */
42 #define FILTER_BITS 8
44 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
46 struct SwsContext {
47 AVClass *av_class;
48 struct ImgReSampleContext *resampling_ctx;
49 enum PixelFormat src_pix_fmt, dst_pix_fmt;
52 struct ImgReSampleContext {
53 int iwidth, iheight, owidth, oheight;
54 int topBand, bottomBand, leftBand, rightBand;
55 int padtop, padbottom, padleft, padright;
56 int pad_owidth, pad_oheight;
57 int h_incr, v_incr;
58 DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
59 DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
60 uint8_t *line_buf;
63 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
65 static inline int get_phase(int pos)
67 return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
70 /* This function must be optimized */
71 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
72 int src_width, int src_start, int src_incr,
73 int16_t *filters)
75 int src_pos, phase, sum, i;
76 const uint8_t *s;
77 int16_t *filter;
79 src_pos = src_start;
80 for(i=0;i<dst_width;i++) {
81 #ifdef TEST
82 /* test */
83 if ((src_pos >> POS_FRAC_BITS) < 0 ||
84 (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
85 av_abort();
86 #endif
87 s = src + (src_pos >> POS_FRAC_BITS);
88 phase = get_phase(src_pos);
89 filter = filters + phase * NB_TAPS;
90 #if NB_TAPS == 4
91 sum = s[0] * filter[0] +
92 s[1] * filter[1] +
93 s[2] * filter[2] +
94 s[3] * filter[3];
95 #else
97 int j;
98 sum = 0;
99 for(j=0;j<NB_TAPS;j++)
100 sum += s[j] * filter[j];
102 #endif
103 sum = sum >> FILTER_BITS;
104 if (sum < 0)
105 sum = 0;
106 else if (sum > 255)
107 sum = 255;
108 dst[0] = sum;
109 src_pos += src_incr;
110 dst++;
114 /* This function must be optimized */
115 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
116 int wrap, int16_t *filter)
118 int sum, i;
119 const uint8_t *s;
121 s = src;
122 for(i=0;i<dst_width;i++) {
123 #if NB_TAPS == 4
124 sum = s[0 * wrap] * filter[0] +
125 s[1 * wrap] * filter[1] +
126 s[2 * wrap] * filter[2] +
127 s[3 * wrap] * filter[3];
128 #else
130 int j;
131 uint8_t *s1 = s;
133 sum = 0;
134 for(j=0;j<NB_TAPS;j++) {
135 sum += s1[0] * filter[j];
136 s1 += wrap;
139 #endif
140 sum = sum >> FILTER_BITS;
141 if (sum < 0)
142 sum = 0;
143 else if (sum > 255)
144 sum = 255;
145 dst[0] = sum;
146 dst++;
147 s++;
151 #ifdef HAVE_MMX
153 #include "i386/mmx.h"
155 #define FILTER4(reg) \
157 s = src + (src_pos >> POS_FRAC_BITS);\
158 phase = get_phase(src_pos);\
159 filter = filters + phase * NB_TAPS;\
160 movq_m2r(*s, reg);\
161 punpcklbw_r2r(mm7, reg);\
162 movq_m2r(*filter, mm6);\
163 pmaddwd_r2r(reg, mm6);\
164 movq_r2r(mm6, reg);\
165 psrlq_i2r(32, reg);\
166 paddd_r2r(mm6, reg);\
167 psrad_i2r(FILTER_BITS, reg);\
168 src_pos += src_incr;\
171 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
173 /* XXX: do four pixels at a time */
174 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
175 const uint8_t *src, int src_width,
176 int src_start, int src_incr, int16_t *filters)
178 int src_pos, phase;
179 const uint8_t *s;
180 int16_t *filter;
181 mmx_t tmp;
183 src_pos = src_start;
184 pxor_r2r(mm7, mm7);
186 while (dst_width >= 4) {
188 FILTER4(mm0);
189 FILTER4(mm1);
190 FILTER4(mm2);
191 FILTER4(mm3);
193 packuswb_r2r(mm7, mm0);
194 packuswb_r2r(mm7, mm1);
195 packuswb_r2r(mm7, mm3);
196 packuswb_r2r(mm7, mm2);
197 movq_r2m(mm0, tmp);
198 dst[0] = tmp.ub[0];
199 movq_r2m(mm1, tmp);
200 dst[1] = tmp.ub[0];
201 movq_r2m(mm2, tmp);
202 dst[2] = tmp.ub[0];
203 movq_r2m(mm3, tmp);
204 dst[3] = tmp.ub[0];
205 dst += 4;
206 dst_width -= 4;
208 while (dst_width > 0) {
209 FILTER4(mm0);
210 packuswb_r2r(mm7, mm0);
211 movq_r2m(mm0, tmp);
212 dst[0] = tmp.ub[0];
213 dst++;
214 dst_width--;
216 emms();
219 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
220 int wrap, int16_t *filter)
222 int sum, i, v;
223 const uint8_t *s;
224 mmx_t tmp;
225 mmx_t coefs[4];
227 for(i=0;i<4;i++) {
228 v = filter[i];
229 coefs[i].uw[0] = v;
230 coefs[i].uw[1] = v;
231 coefs[i].uw[2] = v;
232 coefs[i].uw[3] = v;
235 pxor_r2r(mm7, mm7);
236 s = src;
237 while (dst_width >= 4) {
238 movq_m2r(s[0 * wrap], mm0);
239 punpcklbw_r2r(mm7, mm0);
240 movq_m2r(s[1 * wrap], mm1);
241 punpcklbw_r2r(mm7, mm1);
242 movq_m2r(s[2 * wrap], mm2);
243 punpcklbw_r2r(mm7, mm2);
244 movq_m2r(s[3 * wrap], mm3);
245 punpcklbw_r2r(mm7, mm3);
247 pmullw_m2r(coefs[0], mm0);
248 pmullw_m2r(coefs[1], mm1);
249 pmullw_m2r(coefs[2], mm2);
250 pmullw_m2r(coefs[3], mm3);
252 paddw_r2r(mm1, mm0);
253 paddw_r2r(mm3, mm2);
254 paddw_r2r(mm2, mm0);
255 psraw_i2r(FILTER_BITS, mm0);
257 packuswb_r2r(mm7, mm0);
258 movq_r2m(mm0, tmp);
260 *(uint32_t *)dst = tmp.ud[0];
261 dst += 4;
262 s += 4;
263 dst_width -= 4;
265 while (dst_width > 0) {
266 sum = s[0 * wrap] * filter[0] +
267 s[1 * wrap] * filter[1] +
268 s[2 * wrap] * filter[2] +
269 s[3 * wrap] * filter[3];
270 sum = sum >> FILTER_BITS;
271 if (sum < 0)
272 sum = 0;
273 else if (sum > 255)
274 sum = 255;
275 dst[0] = sum;
276 dst++;
277 s++;
278 dst_width--;
280 emms();
282 #endif /* HAVE_MMX */
284 #ifdef HAVE_ALTIVEC
285 typedef union {
286 vector unsigned char v;
287 unsigned char c[16];
288 } vec_uc_t;
290 typedef union {
291 vector signed short v;
292 signed short s[8];
293 } vec_ss_t;
295 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
296 int wrap, int16_t *filter)
298 int sum, i;
299 const uint8_t *s;
300 vector unsigned char *tv, tmp, dstv, zero;
301 vec_ss_t srchv[4], srclv[4], fv[4];
302 vector signed short zeros, sumhv, sumlv;
303 s = src;
305 for(i=0;i<4;i++)
308 The vec_madds later on does an implicit >>15 on the result.
309 Since FILTER_BITS is 8, and we have 15 bits of magnitude in
310 a signed short, we have just enough bits to pre-shift our
311 filter constants <<7 to compensate for vec_madds.
313 fv[i].s[0] = filter[i] << (15-FILTER_BITS);
314 fv[i].v = vec_splat(fv[i].v, 0);
317 zero = vec_splat_u8(0);
318 zeros = vec_splat_s16(0);
322 When we're resampling, we'd ideally like both our input buffers,
323 and output buffers to be 16-byte aligned, so we can do both aligned
324 reads and writes. Sadly we can't always have this at the moment, so
325 we opt for aligned writes, as unaligned writes have a huge overhead.
326 To do this, do enough scalar resamples to get dst 16-byte aligned.
328 i = (-(int)dst) & 0xf;
329 while(i>0) {
330 sum = s[0 * wrap] * filter[0] +
331 s[1 * wrap] * filter[1] +
332 s[2 * wrap] * filter[2] +
333 s[3 * wrap] * filter[3];
334 sum = sum >> FILTER_BITS;
335 if (sum<0) sum = 0; else if (sum>255) sum=255;
336 dst[0] = sum;
337 dst++;
338 s++;
339 dst_width--;
340 i--;
343 /* Do our altivec resampling on 16 pixels at once. */
344 while(dst_width>=16) {
346 Read 16 (potentially unaligned) bytes from each of
347 4 lines into 4 vectors, and split them into shorts.
348 Interleave the multipy/accumulate for the resample
349 filter with the loads to hide the 3 cycle latency
350 the vec_madds have.
352 tv = (vector unsigned char *) &s[0 * wrap];
353 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
354 srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
355 srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
356 sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
357 sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
359 tv = (vector unsigned char *) &s[1 * wrap];
360 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
361 srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
362 srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
363 sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
364 sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
366 tv = (vector unsigned char *) &s[2 * wrap];
367 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
368 srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
369 srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
370 sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
371 sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
373 tv = (vector unsigned char *) &s[3 * wrap];
374 tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
375 srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
376 srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
377 sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
378 sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
381 Pack the results into our destination vector,
382 and do an aligned write of that back to memory.
384 dstv = vec_packsu(sumhv, sumlv) ;
385 vec_st(dstv, 0, (vector unsigned char *) dst);
387 dst+=16;
388 s+=16;
389 dst_width-=16;
393 If there are any leftover pixels, resample them
394 with the slow scalar method.
396 while(dst_width>0) {
397 sum = s[0 * wrap] * filter[0] +
398 s[1 * wrap] * filter[1] +
399 s[2 * wrap] * filter[2] +
400 s[3 * wrap] * filter[3];
401 sum = sum >> FILTER_BITS;
402 if (sum<0) sum = 0; else if (sum>255) sum=255;
403 dst[0] = sum;
404 dst++;
405 s++;
406 dst_width--;
409 #endif /* HAVE_ALTIVEC */
411 /* slow version to handle limit cases. Does not need optimisation */
412 static void h_resample_slow(uint8_t *dst, int dst_width,
413 const uint8_t *src, int src_width,
414 int src_start, int src_incr, int16_t *filters)
416 int src_pos, phase, sum, j, v, i;
417 const uint8_t *s, *src_end;
418 int16_t *filter;
420 src_end = src + src_width;
421 src_pos = src_start;
422 for(i=0;i<dst_width;i++) {
423 s = src + (src_pos >> POS_FRAC_BITS);
424 phase = get_phase(src_pos);
425 filter = filters + phase * NB_TAPS;
426 sum = 0;
427 for(j=0;j<NB_TAPS;j++) {
428 if (s < src)
429 v = src[0];
430 else if (s >= src_end)
431 v = src_end[-1];
432 else
433 v = s[0];
434 sum += v * filter[j];
435 s++;
437 sum = sum >> FILTER_BITS;
438 if (sum < 0)
439 sum = 0;
440 else if (sum > 255)
441 sum = 255;
442 dst[0] = sum;
443 src_pos += src_incr;
444 dst++;
448 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
449 int src_width, int src_start, int src_incr,
450 int16_t *filters)
452 int n, src_end;
454 if (src_start < 0) {
455 n = (0 - src_start + src_incr - 1) / src_incr;
456 h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
457 dst += n;
458 dst_width -= n;
459 src_start += n * src_incr;
461 src_end = src_start + dst_width * src_incr;
462 if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
463 n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
464 src_incr;
465 } else {
466 n = dst_width;
468 #ifdef HAVE_MMX
469 if ((mm_flags & MM_MMX) && NB_TAPS == 4)
470 h_resample_fast4_mmx(dst, n,
471 src, src_width, src_start, src_incr, filters);
472 else
473 #endif
474 h_resample_fast(dst, n,
475 src, src_width, src_start, src_incr, filters);
476 if (n < dst_width) {
477 dst += n;
478 dst_width -= n;
479 src_start += n * src_incr;
480 h_resample_slow(dst, dst_width,
481 src, src_width, src_start, src_incr, filters);
485 static void component_resample(ImgReSampleContext *s,
486 uint8_t *output, int owrap, int owidth, int oheight,
487 uint8_t *input, int iwrap, int iwidth, int iheight)
489 int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
490 uint8_t *new_line, *src_line;
492 last_src_y = - FCENTER - 1;
493 /* position of the bottom of the filter in the source image */
494 src_y = (last_src_y + NB_TAPS) * POS_FRAC;
495 ring_y = NB_TAPS; /* position in ring buffer */
496 for(y=0;y<oheight;y++) {
497 /* apply horizontal filter on new lines from input if needed */
498 src_y1 = src_y >> POS_FRAC_BITS;
499 while (last_src_y < src_y1) {
500 if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
501 ring_y = NB_TAPS;
502 last_src_y++;
503 /* handle limit conditions : replicate line (slightly
504 inefficient because we filter multiple times) */
505 y1 = last_src_y;
506 if (y1 < 0) {
507 y1 = 0;
508 } else if (y1 >= iheight) {
509 y1 = iheight - 1;
511 src_line = input + y1 * iwrap;
512 new_line = s->line_buf + ring_y * owidth;
513 /* apply filter and handle limit cases correctly */
514 h_resample(new_line, owidth,
515 src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
516 &s->h_filters[0][0]);
517 /* handle ring buffer wraping */
518 if (ring_y >= LINE_BUF_HEIGHT) {
519 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
520 new_line, owidth);
523 /* apply vertical filter */
524 phase_y = get_phase(src_y);
525 #ifdef HAVE_MMX
526 /* desactivated MMX because loss of precision */
527 if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
528 v_resample4_mmx(output, owidth,
529 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
530 &s->v_filters[phase_y][0]);
531 else
532 #endif
533 #ifdef HAVE_ALTIVEC
534 if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
535 v_resample16_altivec(output, owidth,
536 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
537 &s->v_filters[phase_y][0]);
538 else
539 #endif
540 v_resample(output, owidth,
541 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
542 &s->v_filters[phase_y][0]);
544 src_y += s->v_incr;
546 output += owrap;
550 ImgReSampleContext *img_resample_init(int owidth, int oheight,
551 int iwidth, int iheight)
553 return img_resample_full_init(owidth, oheight, iwidth, iheight,
554 0, 0, 0, 0, 0, 0, 0, 0);
557 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
558 int iwidth, int iheight,
559 int topBand, int bottomBand,
560 int leftBand, int rightBand,
561 int padtop, int padbottom,
562 int padleft, int padright)
564 ImgReSampleContext *s;
566 if (!owidth || !oheight || !iwidth || !iheight)
567 return NULL;
569 s = av_mallocz(sizeof(ImgReSampleContext));
570 if (!s)
571 return NULL;
572 if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
573 return NULL;
574 s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
575 if (!s->line_buf)
576 goto fail;
578 s->owidth = owidth;
579 s->oheight = oheight;
580 s->iwidth = iwidth;
581 s->iheight = iheight;
583 s->topBand = topBand;
584 s->bottomBand = bottomBand;
585 s->leftBand = leftBand;
586 s->rightBand = rightBand;
588 s->padtop = padtop;
589 s->padbottom = padbottom;
590 s->padleft = padleft;
591 s->padright = padright;
593 s->pad_owidth = owidth - (padleft + padright);
594 s->pad_oheight = oheight - (padtop + padbottom);
596 s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
597 s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
599 av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth /
600 (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
601 av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
602 (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
604 return s;
605 fail:
606 av_free(s);
607 return NULL;
610 void img_resample(ImgReSampleContext *s,
611 AVPicture *output, const AVPicture *input)
613 int i, shift;
614 uint8_t* optr;
616 for (i=0;i<3;i++) {
617 shift = (i == 0) ? 0 : 1;
619 optr = output->data[i] + (((output->linesize[i] *
620 s->padtop) + s->padleft) >> shift);
622 component_resample(s, optr, output->linesize[i],
623 s->pad_owidth >> shift, s->pad_oheight >> shift,
624 input->data[i] + (input->linesize[i] *
625 (s->topBand >> shift)) + (s->leftBand >> shift),
626 input->linesize[i], ((s->iwidth - s->leftBand -
627 s->rightBand) >> shift),
628 (s->iheight - s->topBand - s->bottomBand) >> shift);
632 void img_resample_close(ImgReSampleContext *s)
634 av_free(s->line_buf);
635 av_free(s);
638 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
639 int dstW, int dstH, int dstFormat,
640 int flags, SwsFilter *srcFilter,
641 SwsFilter *dstFilter, double *param)
643 struct SwsContext *ctx;
645 ctx = av_malloc(sizeof(struct SwsContext));
646 if (ctx)
647 ctx->av_class = av_mallocz(sizeof(AVClass));
648 if (!ctx || !ctx->av_class) {
649 av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
651 return NULL;
654 if ((srcH != dstH) || (srcW != dstW)) {
655 if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
656 av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
658 ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
659 } else {
660 ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
661 ctx->resampling_ctx->iheight = srcH;
662 ctx->resampling_ctx->iwidth = srcW;
663 ctx->resampling_ctx->oheight = dstH;
664 ctx->resampling_ctx->owidth = dstW;
666 ctx->src_pix_fmt = srcFormat;
667 ctx->dst_pix_fmt = dstFormat;
669 return ctx;
672 void sws_freeContext(struct SwsContext *ctx)
674 if (!ctx)
675 return;
676 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
677 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
678 img_resample_close(ctx->resampling_ctx);
679 } else {
680 av_free(ctx->resampling_ctx);
682 av_free(ctx->av_class);
683 av_free(ctx);
688 * Checks if context is valid or reallocs a new one instead.
689 * If context is NULL, just calls sws_getContext() to get a new one.
690 * Otherwise, checks if the parameters are the same already saved in context.
691 * If that is the case, returns the current context.
692 * Otherwise, frees context and gets a new one.
694 * Be warned that srcFilter, dstFilter are not checked, they are
695 * asumed to remain valid.
697 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
698 int srcW, int srcH, int srcFormat,
699 int dstW, int dstH, int dstFormat, int flags,
700 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
702 if (ctx != NULL) {
703 if ((ctx->resampling_ctx->iwidth != srcW) ||
704 (ctx->resampling_ctx->iheight != srcH) ||
705 (ctx->src_pix_fmt != srcFormat) ||
706 (ctx->resampling_ctx->owidth != dstW) ||
707 (ctx->resampling_ctx->oheight != dstH) ||
708 (ctx->dst_pix_fmt != dstFormat))
710 sws_freeContext(ctx);
711 ctx = NULL;
714 if (ctx == NULL) {
715 return sws_getContext(srcW, srcH, srcFormat,
716 dstW, dstH, dstFormat, flags,
717 srcFilter, dstFilter, param);
719 return ctx;
722 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
723 int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
725 AVPicture src_pict, dst_pict;
726 int i, res = 0;
727 AVPicture picture_format_temp;
728 AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
729 uint8_t *buf1 = NULL, *buf2 = NULL;
730 enum PixelFormat current_pix_fmt;
732 for (i = 0; i < 4; i++) {
733 src_pict.data[i] = src[i];
734 src_pict.linesize[i] = srcStride[i];
735 dst_pict.data[i] = dst[i];
736 dst_pict.linesize[i] = dstStride[i];
738 if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
739 (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
740 /* We have to rescale the picture, but only YUV420P rescaling is supported... */
742 if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
743 int size;
745 /* create temporary picture for rescaling input*/
746 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
747 buf1 = av_malloc(size);
748 if (!buf1) {
749 res = -1;
750 goto the_end;
752 formatted_picture = &picture_format_temp;
753 avpicture_fill((AVPicture*)formatted_picture, buf1,
754 PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
756 if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
757 &src_pict, ctx->src_pix_fmt,
758 ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
760 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
761 res = -1;
762 goto the_end;
764 } else {
765 formatted_picture = &src_pict;
768 if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
769 int size;
771 /* create temporary picture for rescaling output*/
772 size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
773 buf2 = av_malloc(size);
774 if (!buf2) {
775 res = -1;
776 goto the_end;
778 resampled_picture = &picture_resample_temp;
779 avpicture_fill((AVPicture*)resampled_picture, buf2,
780 PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
782 } else {
783 resampled_picture = &dst_pict;
786 /* ...and finally rescale!!! */
787 img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
788 current_pix_fmt = PIX_FMT_YUV420P;
789 } else {
790 resampled_picture = &src_pict;
791 current_pix_fmt = ctx->src_pix_fmt;
794 if (current_pix_fmt != ctx->dst_pix_fmt) {
795 if (img_convert(&dst_pict, ctx->dst_pix_fmt,
796 resampled_picture, current_pix_fmt,
797 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
799 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
801 res = -1;
802 goto the_end;
804 } else if (resampled_picture != &dst_pict) {
805 av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
806 ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
809 the_end:
810 av_free(buf1);
811 av_free(buf2);
812 return res;
816 #ifdef TEST
817 #include <stdio.h>
818 #undef exit
820 /* input */
821 #define XSIZE 256
822 #define YSIZE 256
823 uint8_t img[XSIZE * YSIZE];
825 /* output */
826 #define XSIZE1 512
827 #define YSIZE1 512
828 uint8_t img1[XSIZE1 * YSIZE1];
829 uint8_t img2[XSIZE1 * YSIZE1];
831 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
833 #undef fprintf
834 FILE *f;
835 f=fopen(filename,"w");
836 fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
837 fwrite(img,1, xsize * ysize,f);
838 fclose(f);
839 #define fprintf please_use_av_log
842 static void dump_filter(int16_t *filter)
844 int i, ph;
846 for(ph=0;ph<NB_PHASES;ph++) {
847 av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
848 for(i=0;i<NB_TAPS;i++) {
849 av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
851 av_log(NULL, AV_LOG_INFO, "\n");
855 #ifdef HAVE_MMX
856 int mm_flags;
857 #endif
859 int main(int argc, char **argv)
861 int x, y, v, i, xsize, ysize;
862 ImgReSampleContext *s;
863 float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
864 char buf[256];
866 /* build test image */
867 for(y=0;y<YSIZE;y++) {
868 for(x=0;x<XSIZE;x++) {
869 if (x < XSIZE/2 && y < YSIZE/2) {
870 if (x < XSIZE/4 && y < YSIZE/4) {
871 if ((x % 10) <= 6 &&
872 (y % 10) <= 6)
873 v = 0xff;
874 else
875 v = 0x00;
876 } else if (x < XSIZE/4) {
877 if (x & 1)
878 v = 0xff;
879 else
880 v = 0;
881 } else if (y < XSIZE/4) {
882 if (y & 1)
883 v = 0xff;
884 else
885 v = 0;
886 } else {
887 if (y < YSIZE*3/8) {
888 if ((y+x) & 1)
889 v = 0xff;
890 else
891 v = 0;
892 } else {
893 if (((x+3) % 4) <= 1 &&
894 ((y+3) % 4) <= 1)
895 v = 0xff;
896 else
897 v = 0x00;
900 } else if (x < XSIZE/2) {
901 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
902 } else if (y < XSIZE/2) {
903 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
904 } else {
905 v = ((x + y - XSIZE) * 255) / XSIZE;
907 img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
910 save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
911 for(i=0;i<sizeof(factors)/sizeof(float);i++) {
912 fact = factors[i];
913 xsize = (int)(XSIZE * fact);
914 ysize = (int)((YSIZE - 100) * fact);
915 s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
916 av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
917 dump_filter(&s->h_filters[0][0]);
918 component_resample(s, img1, xsize, xsize, ysize,
919 img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
920 img_resample_close(s);
922 snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
923 save_pgm(buf, img1, xsize, ysize);
926 /* mmx test */
927 #ifdef HAVE_MMX
928 av_log(NULL, AV_LOG_INFO, "MMX test\n");
929 fact = 0.72;
930 xsize = (int)(XSIZE * fact);
931 ysize = (int)(YSIZE * fact);
932 mm_flags = MM_MMX;
933 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
934 component_resample(s, img1, xsize, xsize, ysize,
935 img, XSIZE, XSIZE, YSIZE);
937 mm_flags = 0;
938 s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
939 component_resample(s, img2, xsize, xsize, ysize,
940 img, XSIZE, XSIZE, YSIZE);
941 if (memcmp(img1, img2, xsize * ysize) != 0) {
942 av_log(NULL, AV_LOG_ERROR, "mmx error\n");
943 exit(1);
945 av_log(NULL, AV_LOG_INFO, "MMX OK\n");
946 #endif /* HAVE_MMX */
947 return 0;
950 #endif /* TEST */