2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * the C code (not assembly, mmx, ...) of this file can be used
21 * under the LGPL license too
24 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
33 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
34 #define MAP_ANONYMOUS MAP_ANON
38 #define WIN32_LEAN_AND_MEAN
42 #include "swscale_internal.h"
44 #include "libavutil/intreadwrite.h"
45 #include "libavutil/x86_cpu.h"
46 #include "libavutil/avutil.h"
47 #include "libavutil/bswap.h"
48 #include "libavutil/pixdesc.h"
50 unsigned swscale_version(void)
52 return LIBSWSCALE_VERSION_INT
;
55 const char *swscale_configuration(void)
57 return FFMPEG_CONFIGURATION
;
60 const char *swscale_license(void)
62 #define LICENSE_PREFIX "libswscale license: "
63 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
66 #define RET 0xC3 //near return opcode for x86
68 #define isSupportedIn(x) ( \
69 (x)==PIX_FMT_YUV420P \
70 || (x)==PIX_FMT_YUVA420P \
71 || (x)==PIX_FMT_YUYV422 \
72 || (x)==PIX_FMT_UYVY422 \
73 || (x)==PIX_FMT_RGB48BE \
74 || (x)==PIX_FMT_RGB48LE \
75 || (x)==PIX_FMT_RGB32 \
76 || (x)==PIX_FMT_RGB32_1 \
77 || (x)==PIX_FMT_BGR24 \
78 || (x)==PIX_FMT_BGR565 \
79 || (x)==PIX_FMT_BGR555 \
80 || (x)==PIX_FMT_BGR32 \
81 || (x)==PIX_FMT_BGR32_1 \
82 || (x)==PIX_FMT_RGB24 \
83 || (x)==PIX_FMT_RGB565 \
84 || (x)==PIX_FMT_RGB555 \
85 || (x)==PIX_FMT_GRAY8 \
86 || (x)==PIX_FMT_YUV410P \
87 || (x)==PIX_FMT_YUV440P \
88 || (x)==PIX_FMT_NV12 \
89 || (x)==PIX_FMT_NV21 \
90 || (x)==PIX_FMT_GRAY16BE \
91 || (x)==PIX_FMT_GRAY16LE \
92 || (x)==PIX_FMT_YUV444P \
93 || (x)==PIX_FMT_YUV422P \
94 || (x)==PIX_FMT_YUV411P \
95 || (x)==PIX_FMT_PAL8 \
96 || (x)==PIX_FMT_BGR8 \
97 || (x)==PIX_FMT_RGB8 \
98 || (x)==PIX_FMT_BGR4_BYTE \
99 || (x)==PIX_FMT_RGB4_BYTE \
100 || (x)==PIX_FMT_YUV440P \
101 || (x)==PIX_FMT_MONOWHITE \
102 || (x)==PIX_FMT_MONOBLACK \
103 || (x)==PIX_FMT_YUV420P16LE \
104 || (x)==PIX_FMT_YUV422P16LE \
105 || (x)==PIX_FMT_YUV444P16LE \
106 || (x)==PIX_FMT_YUV420P16BE \
107 || (x)==PIX_FMT_YUV422P16BE \
108 || (x)==PIX_FMT_YUV444P16BE \
111 int sws_isSupportedInput(enum PixelFormat pix_fmt
)
113 return isSupportedIn(pix_fmt
);
116 #define isSupportedOut(x) ( \
117 (x)==PIX_FMT_YUV420P \
118 || (x)==PIX_FMT_YUVA420P \
119 || (x)==PIX_FMT_YUYV422 \
120 || (x)==PIX_FMT_UYVY422 \
121 || (x)==PIX_FMT_YUV444P \
122 || (x)==PIX_FMT_YUV422P \
123 || (x)==PIX_FMT_YUV411P \
126 || (x)==PIX_FMT_NV12 \
127 || (x)==PIX_FMT_NV21 \
128 || (x)==PIX_FMT_GRAY16BE \
129 || (x)==PIX_FMT_GRAY16LE \
130 || (x)==PIX_FMT_GRAY8 \
131 || (x)==PIX_FMT_YUV410P \
132 || (x)==PIX_FMT_YUV440P \
133 || (x)==PIX_FMT_YUV420P16LE \
134 || (x)==PIX_FMT_YUV422P16LE \
135 || (x)==PIX_FMT_YUV444P16LE \
136 || (x)==PIX_FMT_YUV420P16BE \
137 || (x)==PIX_FMT_YUV422P16BE \
138 || (x)==PIX_FMT_YUV444P16BE \
141 int sws_isSupportedOutput(enum PixelFormat pix_fmt
)
143 return isSupportedOut(pix_fmt
);
146 #define usePal(x) (av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL)
148 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
150 const char *sws_format_name(enum PixelFormat format
)
152 if ((unsigned)format
< PIX_FMT_NB
&& av_pix_fmt_descriptors
[format
].name
)
153 return av_pix_fmt_descriptors
[format
].name
;
155 return "Unknown format";
158 static double getSplineCoeff(double a
, double b
, double c
, double d
, double dist
)
160 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
161 if (dist
<=1.0) return ((d
*dist
+ c
)*dist
+ b
)*dist
+a
;
162 else return getSplineCoeff( 0.0,
169 static int initFilter(int16_t **outFilter
, int16_t **filterPos
, int *outFilterSize
, int xInc
,
170 int srcW
, int dstW
, int filterAlign
, int one
, int flags
,
171 SwsVector
*srcFilter
, SwsVector
*dstFilter
, double param
[2])
177 int64_t *filter
=NULL
;
178 int64_t *filter2
=NULL
;
179 const int64_t fone
= 1LL<<54;
182 if (flags
& SWS_CPU_CAPS_MMX
)
183 __asm__
volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
186 // NOTE: the +1 is for the MMX scaler which reads over the end
187 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+1)*sizeof(int16_t), fail
);
189 if (FFABS(xInc
- 0x10000) <10) { // unscaled
192 FF_ALLOCZ_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
194 for (i
=0; i
<dstW
; i
++) {
195 filter
[i
*filterSize
]= fone
;
199 } else if (flags
&SWS_POINT
) { // lame looking point sampling mode
203 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
205 xDstInSrc
= xInc
/2 - 0x8000;
206 for (i
=0; i
<dstW
; i
++) {
207 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
213 } else if ((xInc
<= (1<<16) && (flags
&SWS_AREA
)) || (flags
&SWS_FAST_BILINEAR
)) { // bilinear upscale
217 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
219 xDstInSrc
= xInc
/2 - 0x8000;
220 for (i
=0; i
<dstW
; i
++) {
221 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
225 //bilinear upscale / linear interpolate / area averaging
226 for (j
=0; j
<filterSize
; j
++) {
227 int64_t coeff
= fone
- FFABS((xx
<<16) - xDstInSrc
)*(fone
>>16);
228 if (coeff
<0) coeff
=0;
229 filter
[i
*filterSize
+ j
]= coeff
;
238 if (flags
&SWS_BICUBIC
) sizeFactor
= 4;
239 else if (flags
&SWS_X
) sizeFactor
= 8;
240 else if (flags
&SWS_AREA
) sizeFactor
= 1; //downscale only, for upscale it is bilinear
241 else if (flags
&SWS_GAUSS
) sizeFactor
= 8; // infinite ;)
242 else if (flags
&SWS_LANCZOS
) sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2*param
[0]) : 6;
243 else if (flags
&SWS_SINC
) sizeFactor
= 20; // infinite ;)
244 else if (flags
&SWS_SPLINE
) sizeFactor
= 20; // infinite ;)
245 else if (flags
&SWS_BILINEAR
) sizeFactor
= 2;
247 sizeFactor
= 0; //GCC warning killer
251 if (xInc
<= 1<<16) filterSize
= 1 + sizeFactor
; // upscale
252 else filterSize
= 1 + (sizeFactor
*srcW
+ dstW
- 1)/ dstW
;
254 if (filterSize
> srcW
-2) filterSize
=srcW
-2;
256 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
258 xDstInSrc
= xInc
- 0x10000;
259 for (i
=0; i
<dstW
; i
++) {
260 int xx
= (xDstInSrc
- ((filterSize
-2)<<16)) / (1<<17);
263 for (j
=0; j
<filterSize
; j
++) {
264 int64_t d
= ((int64_t)FFABS((xx
<<17) - xDstInSrc
))<<13;
270 floatd
= d
* (1.0/(1<<30));
272 if (flags
& SWS_BICUBIC
) {
273 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1<<24);
274 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1<<24);
275 int64_t dd
= ( d
*d
)>>30;
276 int64_t ddd
= (dd
*d
)>>30;
279 coeff
= (12*(1<<24)-9*B
-6*C
)*ddd
+ (-18*(1<<24)+12*B
+6*C
)*dd
+ (6*(1<<24)-2*B
)*(1<<30);
280 else if (d
< 1LL<<31)
281 coeff
= (-B
-6*C
)*ddd
+ (6*B
+30*C
)*dd
+ (-12*B
-48*C
)*d
+ (8*B
+24*C
)*(1<<30);
284 coeff
*= fone
>>(30+24);
286 /* else if (flags & SWS_X) {
287 double p= param ? param*0.01 : 0.3;
288 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
289 coeff*= pow(2.0, - p*d*d);
291 else if (flags
& SWS_X
) {
292 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
296 c
= cos(floatd
*M_PI
);
299 if (c
<0.0) c
= -pow(-c
, A
);
301 coeff
= (c
*0.5 + 0.5)*fone
;
302 } else if (flags
& SWS_AREA
) {
303 int64_t d2
= d
- (1<<29);
304 if (d2
*xInc
< -(1LL<<(29+16))) coeff
= 1.0 * (1LL<<(30+16));
305 else if (d2
*xInc
< (1LL<<(29+16))) coeff
= -d2
*xInc
+ (1LL<<(29+16));
307 coeff
*= fone
>>(30+16);
308 } else if (flags
& SWS_GAUSS
) {
309 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
310 coeff
= (pow(2.0, - p
*floatd
*floatd
))*fone
;
311 } else if (flags
& SWS_SINC
) {
312 coeff
= (d
? sin(floatd
*M_PI
)/(floatd
*M_PI
) : 1.0)*fone
;
313 } else if (flags
& SWS_LANCZOS
) {
314 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
315 coeff
= (d
? sin(floatd
*M_PI
)*sin(floatd
*M_PI
/p
)/(floatd
*floatd
*M_PI
*M_PI
/p
) : 1.0)*fone
;
316 if (floatd
>p
) coeff
=0;
317 } else if (flags
& SWS_BILINEAR
) {
319 if (coeff
<0) coeff
=0;
321 } else if (flags
& SWS_SPLINE
) {
322 double p
=-2.196152422706632;
323 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
-1.0, floatd
) * fone
;
325 coeff
= 0.0; //GCC warning killer
329 filter
[i
*filterSize
+ j
]= coeff
;
336 /* apply src & dst Filter to filter -> filter2
339 assert(filterSize
>0);
340 filter2Size
= filterSize
;
341 if (srcFilter
) filter2Size
+= srcFilter
->length
- 1;
342 if (dstFilter
) filter2Size
+= dstFilter
->length
- 1;
343 assert(filter2Size
>0);
344 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
*dstW
*sizeof(*filter2
), fail
);
346 for (i
=0; i
<dstW
; i
++) {
350 for (k
=0; k
<srcFilter
->length
; k
++) {
351 for (j
=0; j
<filterSize
; j
++)
352 filter2
[i
*filter2Size
+ k
+ j
] += srcFilter
->coeff
[k
]*filter
[i
*filterSize
+ j
];
355 for (j
=0; j
<filterSize
; j
++)
356 filter2
[i
*filter2Size
+ j
]= filter
[i
*filterSize
+ j
];
360 (*filterPos
)[i
]+= (filterSize
-1)/2 - (filter2Size
-1)/2;
364 /* try to reduce the filter-size (step1 find size and shift left) */
365 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
367 for (i
=dstW
-1; i
>=0; i
--) {
368 int min
= filter2Size
;
372 /* get rid of near zero elements on the left by shifting left */
373 for (j
=0; j
<filter2Size
; j
++) {
375 cutOff
+= FFABS(filter2
[i
*filter2Size
]);
377 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
379 /* preserve monotonicity because the core can't handle the filter otherwise */
380 if (i
<dstW
-1 && (*filterPos
)[i
] >= (*filterPos
)[i
+1]) break;
382 // move filter coefficients left
383 for (k
=1; k
<filter2Size
; k
++)
384 filter2
[i
*filter2Size
+ k
- 1]= filter2
[i
*filter2Size
+ k
];
385 filter2
[i
*filter2Size
+ k
- 1]= 0;
390 /* count near zeros on the right */
391 for (j
=filter2Size
-1; j
>0; j
--) {
392 cutOff
+= FFABS(filter2
[i
*filter2Size
+ j
]);
394 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
398 if (min
>minFilterSize
) minFilterSize
= min
;
401 if (flags
& SWS_CPU_CAPS_ALTIVEC
) {
402 // we can handle the special case 4,
403 // so we don't want to go to the full 8
404 if (minFilterSize
< 5)
407 // We really don't want to waste our time
408 // doing useless computation, so fall back on
409 // the scalar C code for very small filters.
410 // Vectorizing is worth it only if you have a
411 // decent-sized vector.
412 if (minFilterSize
< 3)
416 if (flags
& SWS_CPU_CAPS_MMX
) {
417 // special case for unscaled vertical filtering
418 if (minFilterSize
== 1 && filterAlign
== 2)
422 assert(minFilterSize
> 0);
423 filterSize
= (minFilterSize
+(filterAlign
-1)) & (~(filterAlign
-1));
424 assert(filterSize
> 0);
425 filter
= av_malloc(filterSize
*dstW
*sizeof(*filter
));
426 if (filterSize
>= MAX_FILTER_SIZE
*16/((flags
&SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
428 *outFilterSize
= filterSize
;
430 if (flags
&SWS_PRINT_INFO
)
431 av_log(NULL
, AV_LOG_VERBOSE
, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size
, filterSize
);
432 /* try to reduce the filter-size (step2 reduce it) */
433 for (i
=0; i
<dstW
; i
++) {
436 for (j
=0; j
<filterSize
; j
++) {
437 if (j
>=filter2Size
) filter
[i
*filterSize
+ j
]= 0;
438 else filter
[i
*filterSize
+ j
]= filter2
[i
*filter2Size
+ j
];
439 if((flags
& SWS_BITEXACT
) && j
>=minFilterSize
)
440 filter
[i
*filterSize
+ j
]= 0;
444 //FIXME try to align filterPos if possible
447 for (i
=0; i
<dstW
; i
++) {
449 if ((*filterPos
)[i
] < 0) {
450 // move filter coefficients left to compensate for filterPos
451 for (j
=1; j
<filterSize
; j
++) {
452 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
453 filter
[i
*filterSize
+ left
] += filter
[i
*filterSize
+ j
];
454 filter
[i
*filterSize
+ j
]=0;
459 if ((*filterPos
)[i
] + filterSize
> srcW
) {
460 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
461 // move filter coefficients right to compensate for filterPos
462 for (j
=filterSize
-2; j
>=0; j
--) {
463 int right
= FFMIN(j
+ shift
, filterSize
-1);
464 filter
[i
*filterSize
+right
] += filter
[i
*filterSize
+j
];
465 filter
[i
*filterSize
+j
]=0;
467 (*filterPos
)[i
]= srcW
- filterSize
;
471 // Note the +1 is for the MMX scaler which reads over the end
472 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
473 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
, *outFilterSize
*(dstW
+1)*sizeof(int16_t), fail
);
475 /* normalize & store in outFilter */
476 for (i
=0; i
<dstW
; i
++) {
481 for (j
=0; j
<filterSize
; j
++) {
482 sum
+= filter
[i
*filterSize
+ j
];
484 sum
= (sum
+ one
/2)/ one
;
485 for (j
=0; j
<*outFilterSize
; j
++) {
486 int64_t v
= filter
[i
*filterSize
+ j
] + error
;
487 int intV
= ROUNDED_DIV(v
, sum
);
488 (*outFilter
)[i
*(*outFilterSize
) + j
]= intV
;
493 (*filterPos
)[dstW
]= (*filterPos
)[dstW
-1]; // the MMX scaler will read over the end
494 for (i
=0; i
<*outFilterSize
; i
++) {
495 int j
= dstW
*(*outFilterSize
);
496 (*outFilter
)[j
+ i
]= (*outFilter
)[j
+ i
- (*outFilterSize
)];
506 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
507 static int initMMX2HScaler(int dstW
, int xInc
, uint8_t *filterCode
, int16_t *filter
, int32_t *filterPos
, int numSplits
)
510 x86_reg imm8OfPShufW1A
;
511 x86_reg imm8OfPShufW2A
;
512 x86_reg fragmentLengthA
;
514 x86_reg imm8OfPShufW1B
;
515 x86_reg imm8OfPShufW2B
;
516 x86_reg fragmentLengthB
;
521 // create an optimized horizontal scaling routine
522 /* This scaler is made of runtime-generated MMX2 code using specially
523 * tuned pshufw instructions. For every four output pixels, if four
524 * input pixels are enough for the fast bilinear scaling, then a chunk
525 * of fragmentB is used. If five input pixels are needed, then a chunk
526 * of fragmentA is used.
535 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
536 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
537 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
538 "punpcklbw %%mm7, %%mm1 \n\t"
539 "punpcklbw %%mm7, %%mm0 \n\t"
540 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
542 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
544 "psubw %%mm1, %%mm0 \n\t"
545 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
546 "pmullw %%mm3, %%mm0 \n\t"
547 "psllw $7, %%mm1 \n\t"
548 "paddw %%mm1, %%mm0 \n\t"
550 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
552 "add $8, %%"REG_a
" \n\t"
556 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
557 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
558 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
563 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
567 :"=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
568 "=r" (fragmentLengthA
)
575 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
576 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
577 "punpcklbw %%mm7, %%mm0 \n\t"
578 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
580 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
582 "psubw %%mm1, %%mm0 \n\t"
583 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
584 "pmullw %%mm3, %%mm0 \n\t"
585 "psllw $7, %%mm1 \n\t"
586 "paddw %%mm1, %%mm0 \n\t"
588 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
590 "add $8, %%"REG_a
" \n\t"
594 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
595 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
596 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
601 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
605 :"=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
606 "=r" (fragmentLengthB
)
609 xpos
= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
612 for (i
=0; i
<dstW
/numSplits
; i
++) {
617 int b
=((xpos
+xInc
)>>16) - xx
;
618 int c
=((xpos
+xInc
*2)>>16) - xx
;
619 int d
=((xpos
+xInc
*3)>>16) - xx
;
621 uint8_t *fragment
= (d
+1<4) ? fragmentB
: fragmentA
;
622 x86_reg imm8OfPShufW1
= (d
+1<4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
623 x86_reg imm8OfPShufW2
= (d
+1<4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
624 x86_reg fragmentLength
= (d
+1<4) ? fragmentLengthB
: fragmentLengthA
;
625 int maxShift
= 3-(d
+inc
);
629 filter
[i
] = (( xpos
& 0xFFFF) ^ 0xFFFF)>>9;
630 filter
[i
+1] = (((xpos
+xInc
) & 0xFFFF) ^ 0xFFFF)>>9;
631 filter
[i
+2] = (((xpos
+xInc
*2) & 0xFFFF) ^ 0xFFFF)>>9;
632 filter
[i
+3] = (((xpos
+xInc
*3) & 0xFFFF) ^ 0xFFFF)>>9;
635 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
637 filterCode
[fragmentPos
+ imm8OfPShufW1
]=
638 (a
+inc
) | ((b
+inc
)<<2) | ((c
+inc
)<<4) | ((d
+inc
)<<6);
639 filterCode
[fragmentPos
+ imm8OfPShufW2
]=
640 a
| (b
<<2) | (c
<<4) | (d
<<6);
642 if (i
+4-inc
>=dstW
) shift
=maxShift
; //avoid overread
643 else if ((filterPos
[i
/2]&3) <= maxShift
) shift
=filterPos
[i
/2]&3; //Align
645 if (shift
&& i
>=shift
) {
646 filterCode
[fragmentPos
+ imm8OfPShufW1
]+= 0x55*shift
;
647 filterCode
[fragmentPos
+ imm8OfPShufW2
]+= 0x55*shift
;
648 filterPos
[i
/2]-=shift
;
652 fragmentPos
+= fragmentLength
;
655 filterCode
[fragmentPos
]= RET
;
660 filterPos
[((i
/2)+1)&(~1)]= xpos
>>16; // needed to jump to the next part
662 return fragmentPos
+ 1;
664 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL */
666 static void getSubSampleFactors(int *h
, int *v
, enum PixelFormat format
)
668 *h
= av_pix_fmt_descriptors
[format
].log2_chroma_w
;
669 *v
= av_pix_fmt_descriptors
[format
].log2_chroma_h
;
672 static uint16_t roundToInt16(int64_t f
)
674 int r
= (f
+ (1<<15))>>16;
675 if (r
<-0x7FFF) return 0x8000;
676 else if (r
> 0x7FFF) return 0x7FFF;
680 int sws_setColorspaceDetails(SwsContext
*c
, const int inv_table
[4], int srcRange
, const int table
[4], int dstRange
, int brightness
, int contrast
, int saturation
)
682 int64_t crv
= inv_table
[0];
683 int64_t cbu
= inv_table
[1];
684 int64_t cgu
= -inv_table
[2];
685 int64_t cgv
= -inv_table
[3];
689 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int)*4);
690 memcpy(c
->dstColorspaceTable
, table
, sizeof(int)*4);
692 c
->brightness
= brightness
;
693 c
->contrast
= contrast
;
694 c
->saturation
= saturation
;
695 c
->srcRange
= srcRange
;
696 c
->dstRange
= dstRange
;
697 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
699 c
->uOffset
= 0x0400040004000400LL
;
700 c
->vOffset
= 0x0400040004000400LL
;
706 crv
= (crv
*224) / 255;
707 cbu
= (cbu
*224) / 255;
708 cgu
= (cgu
*224) / 255;
709 cgv
= (cgv
*224) / 255;
712 cy
= (cy
*contrast
)>>16;
713 crv
= (crv
*contrast
* saturation
)>>32;
714 cbu
= (cbu
*contrast
* saturation
)>>32;
715 cgu
= (cgu
*contrast
* saturation
)>>32;
716 cgv
= (cgv
*contrast
* saturation
)>>32;
718 oy
-= 256*brightness
;
720 c
->yCoeff
= roundToInt16(cy
*8192) * 0x0001000100010001ULL
;
721 c
->vrCoeff
= roundToInt16(crv
*8192) * 0x0001000100010001ULL
;
722 c
->ubCoeff
= roundToInt16(cbu
*8192) * 0x0001000100010001ULL
;
723 c
->vgCoeff
= roundToInt16(cgv
*8192) * 0x0001000100010001ULL
;
724 c
->ugCoeff
= roundToInt16(cgu
*8192) * 0x0001000100010001ULL
;
725 c
->yOffset
= roundToInt16(oy
* 8) * 0x0001000100010001ULL
;
727 c
->yuv2rgb_y_coeff
= (int16_t)roundToInt16(cy
<<13);
728 c
->yuv2rgb_y_offset
= (int16_t)roundToInt16(oy
<< 9);
729 c
->yuv2rgb_v2r_coeff
= (int16_t)roundToInt16(crv
<<13);
730 c
->yuv2rgb_v2g_coeff
= (int16_t)roundToInt16(cgv
<<13);
731 c
->yuv2rgb_u2g_coeff
= (int16_t)roundToInt16(cgu
<<13);
732 c
->yuv2rgb_u2b_coeff
= (int16_t)roundToInt16(cbu
<<13);
734 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
, contrast
, saturation
);
737 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
738 if (c
->flags
& SWS_CPU_CAPS_ALTIVEC
)
739 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
, contrast
, saturation
);
744 int sws_getColorspaceDetails(SwsContext
*c
, int **inv_table
, int *srcRange
, int **table
, int *dstRange
, int *brightness
, int *contrast
, int *saturation
)
746 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
748 *inv_table
= c
->srcColorspaceTable
;
749 *table
= c
->dstColorspaceTable
;
750 *srcRange
= c
->srcRange
;
751 *dstRange
= c
->dstRange
;
752 *brightness
= c
->brightness
;
753 *contrast
= c
->contrast
;
754 *saturation
= c
->saturation
;
759 static int handle_jpeg(enum PixelFormat
*format
)
762 case PIX_FMT_YUVJ420P
:
763 *format
= PIX_FMT_YUV420P
;
765 case PIX_FMT_YUVJ422P
:
766 *format
= PIX_FMT_YUV422P
;
768 case PIX_FMT_YUVJ444P
:
769 *format
= PIX_FMT_YUV444P
;
771 case PIX_FMT_YUVJ440P
:
772 *format
= PIX_FMT_YUV440P
;
779 SwsContext
*sws_getContext(int srcW
, int srcH
, enum PixelFormat srcFormat
,
780 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
781 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
786 int usesVFilter
, usesHFilter
;
788 int srcRange
, dstRange
;
789 SwsFilter dummyFilter
= {NULL
, NULL
, NULL
, NULL
};
791 if (flags
& SWS_CPU_CAPS_MMX
)
792 __asm__
volatile("emms\n\t"::: "memory");
795 #if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
796 flags
&= ~(SWS_CPU_CAPS_MMX
|SWS_CPU_CAPS_MMX2
|SWS_CPU_CAPS_3DNOW
|SWS_CPU_CAPS_ALTIVEC
|SWS_CPU_CAPS_BFIN
);
797 flags
|= ff_hardcodedcpuflags();
798 #endif /* CONFIG_RUNTIME_CPUDETECT */
799 if (!rgb15to16
) sws_rgb2rgb_init(flags
);
801 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
803 srcRange
= handle_jpeg(&srcFormat
);
804 dstRange
= handle_jpeg(&dstFormat
);
806 if (!isSupportedIn(srcFormat
)) {
807 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat
));
810 if (!isSupportedOut(dstFormat
)) {
811 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat
));
815 i
= flags
& ( SWS_POINT
826 if(!i
|| (i
& (i
-1))) {
827 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Exactly one scaler algorithm must be chosen\n");
832 if (srcW
<4 || srcH
<1 || dstW
<8 || dstH
<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
833 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
834 srcW
, srcH
, dstW
, dstH
);
837 if(srcW
> VOFW
|| dstW
> VOFW
) {
838 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW
)" change VOF/VOFW and recompile\n");
842 if (!dstFilter
) dstFilter
= &dummyFilter
;
843 if (!srcFilter
) srcFilter
= &dummyFilter
;
845 FF_ALLOCZ_OR_GOTO(NULL
, c
, sizeof(SwsContext
), fail
);
847 c
->av_class
= &sws_context_class
;
852 c
->lumXInc
= ((srcW
<<16) + (dstW
>>1))/dstW
;
853 c
->lumYInc
= ((srcH
<<16) + (dstH
>>1))/dstH
;
855 c
->dstFormat
= dstFormat
;
856 c
->srcFormat
= srcFormat
;
857 c
->vRounder
= 4* 0x0001000100010001ULL
;
859 usesHFilter
= usesVFilter
= 0;
860 if (dstFilter
->lumV
&& dstFilter
->lumV
->length
>1) usesVFilter
=1;
861 if (dstFilter
->lumH
&& dstFilter
->lumH
->length
>1) usesHFilter
=1;
862 if (dstFilter
->chrV
&& dstFilter
->chrV
->length
>1) usesVFilter
=1;
863 if (dstFilter
->chrH
&& dstFilter
->chrH
->length
>1) usesHFilter
=1;
864 if (srcFilter
->lumV
&& srcFilter
->lumV
->length
>1) usesVFilter
=1;
865 if (srcFilter
->lumH
&& srcFilter
->lumH
->length
>1) usesHFilter
=1;
866 if (srcFilter
->chrV
&& srcFilter
->chrV
->length
>1) usesVFilter
=1;
867 if (srcFilter
->chrH
&& srcFilter
->chrH
->length
>1) usesHFilter
=1;
869 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
870 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
872 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
873 if ((isBGR(dstFormat
) || isRGB(dstFormat
)) && !(flags
&SWS_FULL_CHR_H_INT
)) c
->chrDstHSubSample
=1;
875 // drop some chroma lines if the user wants it
876 c
->vChrDrop
= (flags
&SWS_SRC_V_CHR_DROP_MASK
)>>SWS_SRC_V_CHR_DROP_SHIFT
;
877 c
->chrSrcVSubSample
+= c
->vChrDrop
;
879 // drop every other pixel for chroma calculation unless user wants full chroma
880 if ((isBGR(srcFormat
) || isRGB(srcFormat
)) && !(flags
&SWS_FULL_CHR_H_INP
)
881 && srcFormat
!=PIX_FMT_RGB8
&& srcFormat
!=PIX_FMT_BGR8
882 && srcFormat
!=PIX_FMT_RGB4
&& srcFormat
!=PIX_FMT_BGR4
883 && srcFormat
!=PIX_FMT_RGB4_BYTE
&& srcFormat
!=PIX_FMT_BGR4_BYTE
884 && ((dstW
>>c
->chrDstHSubSample
) <= (srcW
>>1) || (flags
&(SWS_FAST_BILINEAR
|SWS_POINT
))))
885 c
->chrSrcHSubSample
=1;
888 c
->param
[0] = param
[0];
889 c
->param
[1] = param
[1];
892 c
->param
[1] = SWS_PARAM_DEFAULT
;
895 // Note the -((-x)>>y) is so that we always round toward +inf.
896 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
897 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
898 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
899 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
901 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, dstRange
, 0, 1<<16, 1<<16);
903 /* unscaled special cases */
904 if (unscaled
&& !usesHFilter
&& !usesVFilter
&& (srcRange
== dstRange
|| isBGR(dstFormat
) || isRGB(dstFormat
))) {
905 ff_get_unscaled_swscale(c
);
908 if (flags
&SWS_PRINT_INFO
)
909 av_log(c
, AV_LOG_INFO
, "using unscaled %s -> %s special converter\n",
910 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
915 if (flags
& SWS_CPU_CAPS_MMX2
) {
916 c
->canMMX2BeUsed
= (dstW
>=srcW
&& (dstW
&31)==0 && (srcW
&15)==0) ? 1 : 0;
917 if (!c
->canMMX2BeUsed
&& dstW
>=srcW
&& (srcW
&15)==0 && (flags
&SWS_FAST_BILINEAR
)) {
918 if (flags
&SWS_PRINT_INFO
)
919 av_log(c
, AV_LOG_INFO
, "output width is not a multiple of 32 -> no MMX2 scaler\n");
921 if (usesHFilter
) c
->canMMX2BeUsed
=0;
926 c
->chrXInc
= ((c
->chrSrcW
<<16) + (c
->chrDstW
>>1))/c
->chrDstW
;
927 c
->chrYInc
= ((c
->chrSrcH
<<16) + (c
->chrDstH
>>1))/c
->chrDstH
;
929 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
930 // but only for the FAST_BILINEAR mode otherwise do correct scaling
931 // n-2 is the last chrominance sample available
932 // this is not perfect, but no one should notice the difference, the more correct variant
933 // would be like the vertical one, but that would require some special code for the
934 // first and last pixel
935 if (flags
&SWS_FAST_BILINEAR
) {
936 if (c
->canMMX2BeUsed
) {
940 //we don't use the x86 asm scaler if MMX is available
941 else if (flags
& SWS_CPU_CAPS_MMX
) {
942 c
->lumXInc
= ((srcW
-2)<<16)/(dstW
-2) - 20;
943 c
->chrXInc
= ((c
->chrSrcW
-2)<<16)/(c
->chrDstW
-2) - 20;
947 /* precalculate horizontal scaler filter coefficients */
949 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
950 // can't downscale !!!
951 if (c
->canMMX2BeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
952 c
->lumMmx2FilterCodeSize
= initMMX2HScaler( dstW
, c
->lumXInc
, NULL
, NULL
, NULL
, 8);
953 c
->chrMmx2FilterCodeSize
= initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, NULL
, NULL
, NULL
, 4);
956 c
->lumMmx2FilterCode
= mmap(NULL
, c
->lumMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
957 c
->chrMmx2FilterCode
= mmap(NULL
, c
->chrMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
958 #elif HAVE_VIRTUALALLOC
959 c
->lumMmx2FilterCode
= VirtualAlloc(NULL
, c
->lumMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
960 c
->chrMmx2FilterCode
= VirtualAlloc(NULL
, c
->chrMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
962 c
->lumMmx2FilterCode
= av_malloc(c
->lumMmx2FilterCodeSize
);
963 c
->chrMmx2FilterCode
= av_malloc(c
->chrMmx2FilterCodeSize
);
966 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/8+8)*sizeof(int16_t), fail
);
967 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/4+8)*sizeof(int16_t), fail
);
968 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/2/8+8)*sizeof(int32_t), fail
);
969 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/2/4+8)*sizeof(int32_t), fail
);
971 initMMX2HScaler( dstW
, c
->lumXInc
, c
->lumMmx2FilterCode
, c
->hLumFilter
, c
->hLumFilterPos
, 8);
972 initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, c
->chrMmx2FilterCode
, c
->hChrFilter
, c
->hChrFilterPos
, 4);
975 mprotect(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
976 mprotect(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
979 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL */
981 const int filterAlign
=
982 (flags
& SWS_CPU_CAPS_MMX
) ? 4 :
983 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
986 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
, &c
->hLumFilterSize
, c
->lumXInc
,
987 srcW
, dstW
, filterAlign
, 1<<14,
988 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
989 srcFilter
->lumH
, dstFilter
->lumH
, c
->param
) < 0)
991 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
, &c
->hChrFilterSize
, c
->chrXInc
,
992 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1<<14,
993 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
994 srcFilter
->chrH
, dstFilter
->chrH
, c
->param
) < 0)
997 } // initialize horizontal stuff
999 /* precalculate vertical scaler filter coefficients */
1001 const int filterAlign
=
1002 (flags
& SWS_CPU_CAPS_MMX
) && (flags
& SWS_ACCURATE_RND
) ? 2 :
1003 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
1006 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
, c
->lumYInc
,
1007 srcH
, dstH
, filterAlign
, (1<<12),
1008 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
1009 srcFilter
->lumV
, dstFilter
->lumV
, c
->param
) < 0)
1011 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
, c
->chrYInc
,
1012 c
->chrSrcH
, c
->chrDstH
, filterAlign
, (1<<12),
1013 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
1014 srcFilter
->chrV
, dstFilter
->chrV
, c
->param
) < 0)
1017 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
1018 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof (vector
signed short)*c
->vLumFilterSize
*c
->dstH
, fail
);
1019 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof (vector
signed short)*c
->vChrFilterSize
*c
->chrDstH
, fail
);
1021 for (i
=0;i
<c
->vLumFilterSize
*c
->dstH
;i
++) {
1023 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1025 p
[j
] = c
->vLumFilter
[i
];
1028 for (i
=0;i
<c
->vChrFilterSize
*c
->chrDstH
;i
++) {
1030 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1032 p
[j
] = c
->vChrFilter
[i
];
1037 // calculate buffer sizes so that they won't run out while handling these damn slices
1038 c
->vLumBufSize
= c
->vLumFilterSize
;
1039 c
->vChrBufSize
= c
->vChrFilterSize
;
1040 for (i
=0; i
<dstH
; i
++) {
1041 int chrI
= i
*c
->chrDstH
/ dstH
;
1042 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1043 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)<<c
->chrSrcVSubSample
));
1045 nextSlice
>>= c
->chrSrcVSubSample
;
1046 nextSlice
<<= c
->chrSrcVSubSample
;
1047 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1048 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1049 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
< (nextSlice
>>c
->chrSrcVSubSample
))
1050 c
->vChrBufSize
= (nextSlice
>>c
->chrSrcVSubSample
) - c
->vChrFilterPos
[chrI
];
1053 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
1054 // allocate several megabytes to handle all possible cases)
1055 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1056 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
, c
->vChrBufSize
*2*sizeof(int16_t*), fail
);
1057 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1058 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1059 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
1060 /* align at 16 bytes for AltiVec */
1061 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1062 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1063 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+c
->vLumBufSize
];
1065 for (i
=0; i
<c
->vChrBufSize
; i
++) {
1066 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
[i
+c
->vChrBufSize
], (VOF
+1)*2, fail
);
1067 c
->chrPixBuf
[i
] = c
->chrPixBuf
[i
+c
->vChrBufSize
];
1069 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1070 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1071 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1072 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+c
->vLumBufSize
];
1075 //try to avoid drawing green stuff between the right end and the stride end
1076 for (i
=0; i
<c
->vChrBufSize
; i
++) memset(c
->chrPixBuf
[i
], 64, (VOF
+1)*2);
1078 assert(2*VOFW
== VOF
);
1080 assert(c
->chrDstH
<= dstH
);
1082 if (flags
&SWS_PRINT_INFO
) {
1083 if (flags
&SWS_FAST_BILINEAR
)
1084 av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1085 else if (flags
&SWS_BILINEAR
)
1086 av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1087 else if (flags
&SWS_BICUBIC
)
1088 av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1089 else if (flags
&SWS_X
)
1090 av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1091 else if (flags
&SWS_POINT
)
1092 av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1093 else if (flags
&SWS_AREA
)
1094 av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1095 else if (flags
&SWS_BICUBLIN
)
1096 av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1097 else if (flags
&SWS_GAUSS
)
1098 av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1099 else if (flags
&SWS_SINC
)
1100 av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1101 else if (flags
&SWS_LANCZOS
)
1102 av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1103 else if (flags
&SWS_SPLINE
)
1104 av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1106 av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1108 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1109 sws_format_name(srcFormat
),
1111 dstFormat
== PIX_FMT_BGR555
|| dstFormat
== PIX_FMT_BGR565
? "dithered " : "",
1115 sws_format_name(dstFormat
));
1117 if (flags
& SWS_CPU_CAPS_MMX2
)
1118 av_log(c
, AV_LOG_INFO
, "using MMX2\n");
1119 else if (flags
& SWS_CPU_CAPS_3DNOW
)
1120 av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1121 else if (flags
& SWS_CPU_CAPS_MMX
)
1122 av_log(c
, AV_LOG_INFO
, "using MMX\n");
1123 else if (flags
& SWS_CPU_CAPS_ALTIVEC
)
1124 av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1126 av_log(c
, AV_LOG_INFO
, "using C\n");
1129 if (flags
& SWS_PRINT_INFO
) {
1130 if (flags
& SWS_CPU_CAPS_MMX
) {
1131 if (c
->canMMX2BeUsed
&& (flags
&SWS_FAST_BILINEAR
))
1132 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
1134 if (c
->hLumFilterSize
==4)
1135 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal luminance scaling\n");
1136 else if (c
->hLumFilterSize
==8)
1137 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal luminance scaling\n");
1139 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal luminance scaling\n");
1141 if (c
->hChrFilterSize
==4)
1142 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
1143 else if (c
->hChrFilterSize
==8)
1144 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
1146 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal chrominance scaling\n");
1150 av_log(c
, AV_LOG_VERBOSE
, "using x86 asm scaler for horizontal scaling\n");
1152 if (flags
& SWS_FAST_BILINEAR
)
1153 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR C scaler for horizontal scaling\n");
1155 av_log(c
, AV_LOG_VERBOSE
, "using C scaler for horizontal scaling\n");
1158 if (isPlanarYUV(dstFormat
)) {
1159 if (c
->vLumFilterSize
==1)
1160 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1162 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1164 if (c
->vLumFilterSize
==1 && c
->vChrFilterSize
==2)
1165 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
1166 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1167 else if (c
->vLumFilterSize
==2 && c
->vChrFilterSize
==2)
1168 av_log(c
, AV_LOG_VERBOSE
, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1170 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1173 if (dstFormat
==PIX_FMT_BGR24
)
1174 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR24 converter\n",
1175 (flags
& SWS_CPU_CAPS_MMX2
) ? "MMX2" : ((flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C"));
1176 else if (dstFormat
==PIX_FMT_RGB32
)
1177 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR32 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1178 else if (dstFormat
==PIX_FMT_BGR565
)
1179 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR16 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1180 else if (dstFormat
==PIX_FMT_BGR555
)
1181 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR15 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1183 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1185 if (flags
& SWS_PRINT_INFO
) {
1186 av_log(c
, AV_LOG_DEBUG
, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1187 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1188 av_log(c
, AV_LOG_DEBUG
, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1189 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
, c
->chrXInc
, c
->chrYInc
);
1192 c
->swScale
= ff_getSwsFunc(c
);
1200 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1201 float lumaSharpen
, float chromaSharpen
,
1202 float chromaHShift
, float chromaVShift
,
1205 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1209 if (lumaGBlur
!=0.0) {
1210 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1211 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1213 filter
->lumH
= sws_getIdentityVec();
1214 filter
->lumV
= sws_getIdentityVec();
1217 if (chromaGBlur
!=0.0) {
1218 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1219 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1221 filter
->chrH
= sws_getIdentityVec();
1222 filter
->chrV
= sws_getIdentityVec();
1225 if (chromaSharpen
!=0.0) {
1226 SwsVector
*id
= sws_getIdentityVec();
1227 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1228 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1229 sws_addVec(filter
->chrH
, id
);
1230 sws_addVec(filter
->chrV
, id
);
1234 if (lumaSharpen
!=0.0) {
1235 SwsVector
*id
= sws_getIdentityVec();
1236 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1237 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1238 sws_addVec(filter
->lumH
, id
);
1239 sws_addVec(filter
->lumV
, id
);
1243 if (chromaHShift
!= 0.0)
1244 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+0.5));
1246 if (chromaVShift
!= 0.0)
1247 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+0.5));
1249 sws_normalizeVec(filter
->chrH
, 1.0);
1250 sws_normalizeVec(filter
->chrV
, 1.0);
1251 sws_normalizeVec(filter
->lumH
, 1.0);
1252 sws_normalizeVec(filter
->lumV
, 1.0);
1254 if (verbose
) sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1255 if (verbose
) sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1260 SwsVector
*sws_allocVec(int length
)
1262 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1265 vec
->length
= length
;
1266 vec
->coeff
= av_malloc(sizeof(double) * length
);
1272 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1274 const int length
= (int)(variance
*quality
+ 0.5) | 1;
1276 double middle
= (length
-1)*0.5;
1277 SwsVector
*vec
= sws_allocVec(length
);
1282 for (i
=0; i
<length
; i
++) {
1283 double dist
= i
-middle
;
1284 vec
->coeff
[i
]= exp(-dist
*dist
/(2*variance
*variance
)) / sqrt(2*variance
*M_PI
);
1287 sws_normalizeVec(vec
, 1.0);
1292 SwsVector
*sws_getConstVec(double c
, int length
)
1295 SwsVector
*vec
= sws_allocVec(length
);
1300 for (i
=0; i
<length
; i
++)
1306 SwsVector
*sws_getIdentityVec(void)
1308 return sws_getConstVec(1.0, 1);
1311 double sws_dcVec(SwsVector
*a
)
1316 for (i
=0; i
<a
->length
; i
++)
1322 void sws_scaleVec(SwsVector
*a
, double scalar
)
1326 for (i
=0; i
<a
->length
; i
++)
1327 a
->coeff
[i
]*= scalar
;
1330 void sws_normalizeVec(SwsVector
*a
, double height
)
1332 sws_scaleVec(a
, height
/sws_dcVec(a
));
1335 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1337 int length
= a
->length
+ b
->length
- 1;
1339 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1344 for (i
=0; i
<a
->length
; i
++) {
1345 for (j
=0; j
<b
->length
; j
++) {
1346 vec
->coeff
[i
+j
]+= a
->coeff
[i
]*b
->coeff
[j
];
1353 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1355 int length
= FFMAX(a
->length
, b
->length
);
1357 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1362 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1363 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]+= b
->coeff
[i
];
1368 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1370 int length
= FFMAX(a
->length
, b
->length
);
1372 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1377 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1378 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]-= b
->coeff
[i
];
1383 /* shift left / or right if "shift" is negative */
1384 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1386 int length
= a
->length
+ FFABS(shift
)*2;
1388 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1393 for (i
=0; i
<a
->length
; i
++) {
1394 vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2 - shift
]= a
->coeff
[i
];
1400 void sws_shiftVec(SwsVector
*a
, int shift
)
1402 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1404 a
->coeff
= shifted
->coeff
;
1405 a
->length
= shifted
->length
;
1409 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1411 SwsVector
*sum
= sws_sumVec(a
, b
);
1413 a
->coeff
= sum
->coeff
;
1414 a
->length
= sum
->length
;
1418 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1420 SwsVector
*diff
= sws_diffVec(a
, b
);
1422 a
->coeff
= diff
->coeff
;
1423 a
->length
= diff
->length
;
1427 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1429 SwsVector
*conv
= sws_getConvVec(a
, b
);
1431 a
->coeff
= conv
->coeff
;
1432 a
->length
= conv
->length
;
1436 SwsVector
*sws_cloneVec(SwsVector
*a
)
1439 SwsVector
*vec
= sws_allocVec(a
->length
);
1444 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
]= a
->coeff
[i
];
1449 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1456 for (i
=0; i
<a
->length
; i
++)
1457 if (a
->coeff
[i
]>max
) max
= a
->coeff
[i
];
1459 for (i
=0; i
<a
->length
; i
++)
1460 if (a
->coeff
[i
]<min
) min
= a
->coeff
[i
];
1464 for (i
=0; i
<a
->length
; i
++) {
1465 int x
= (int)((a
->coeff
[i
]-min
)*60.0/range
+0.5);
1466 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1467 for (;x
>0; x
--) av_log(log_ctx
, log_level
, " ");
1468 av_log(log_ctx
, log_level
, "|\n");
1472 #if LIBSWSCALE_VERSION_MAJOR < 1
1473 void sws_printVec(SwsVector
*a
)
1475 sws_printVec2(a
, NULL
, AV_LOG_DEBUG
);
1479 void sws_freeVec(SwsVector
*a
)
1482 av_freep(&a
->coeff
);
1487 void sws_freeFilter(SwsFilter
*filter
)
1489 if (!filter
) return;
1491 if (filter
->lumH
) sws_freeVec(filter
->lumH
);
1492 if (filter
->lumV
) sws_freeVec(filter
->lumV
);
1493 if (filter
->chrH
) sws_freeVec(filter
->chrH
);
1494 if (filter
->chrV
) sws_freeVec(filter
->chrV
);
1498 void sws_freeContext(SwsContext
*c
)
1504 for (i
=0; i
<c
->vLumBufSize
; i
++)
1505 av_freep(&c
->lumPixBuf
[i
]);
1506 av_freep(&c
->lumPixBuf
);
1510 for (i
=0; i
<c
->vChrBufSize
; i
++)
1511 av_freep(&c
->chrPixBuf
[i
]);
1512 av_freep(&c
->chrPixBuf
);
1515 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1516 for (i
=0; i
<c
->vLumBufSize
; i
++)
1517 av_freep(&c
->alpPixBuf
[i
]);
1518 av_freep(&c
->alpPixBuf
);
1521 av_freep(&c
->vLumFilter
);
1522 av_freep(&c
->vChrFilter
);
1523 av_freep(&c
->hLumFilter
);
1524 av_freep(&c
->hChrFilter
);
1525 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
1526 av_freep(&c
->vYCoeffsBank
);
1527 av_freep(&c
->vCCoeffsBank
);
1530 av_freep(&c
->vLumFilterPos
);
1531 av_freep(&c
->vChrFilterPos
);
1532 av_freep(&c
->hLumFilterPos
);
1533 av_freep(&c
->hChrFilterPos
);
1535 #if ARCH_X86 && CONFIG_GPL
1536 #ifdef MAP_ANONYMOUS
1537 if (c
->lumMmx2FilterCode
) munmap(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
);
1538 if (c
->chrMmx2FilterCode
) munmap(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
);
1539 #elif HAVE_VIRTUALALLOC
1540 if (c
->lumMmx2FilterCode
) VirtualFree(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, MEM_RELEASE
);
1541 if (c
->chrMmx2FilterCode
) VirtualFree(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, MEM_RELEASE
);
1543 av_free(c
->lumMmx2FilterCode
);
1544 av_free(c
->chrMmx2FilterCode
);
1546 c
->lumMmx2FilterCode
=NULL
;
1547 c
->chrMmx2FilterCode
=NULL
;
1548 #endif /* ARCH_X86 && CONFIG_GPL */
1550 av_freep(&c
->yuvTable
);
1555 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
,
1556 int srcW
, int srcH
, enum PixelFormat srcFormat
,
1557 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1558 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1560 static const double default_param
[2] = {SWS_PARAM_DEFAULT
, SWS_PARAM_DEFAULT
};
1563 param
= default_param
;
1566 if (context
->srcW
!= srcW
|| context
->srcH
!= srcH
||
1567 context
->srcFormat
!= srcFormat
||
1568 context
->dstW
!= dstW
|| context
->dstH
!= dstH
||
1569 context
->dstFormat
!= dstFormat
|| context
->flags
!= flags
||
1570 context
->param
[0] != param
[0] || context
->param
[1] != param
[1])
1572 sws_freeContext(context
);
1577 return sws_getContext(srcW
, srcH
, srcFormat
,
1578 dstW
, dstH
, dstFormat
, flags
,
1579 srcFilter
, dstFilter
, param
);