2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
30 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
31 #define MAP_ANONYMOUS MAP_ANON
35 #define WIN32_LEAN_AND_MEAN
39 #include "swscale_internal.h"
41 #include "libavutil/intreadwrite.h"
42 #include "libavutil/x86_cpu.h"
43 #include "libavutil/avutil.h"
44 #include "libavutil/bswap.h"
45 #include "libavutil/pixdesc.h"
47 unsigned swscale_version(void)
49 return LIBSWSCALE_VERSION_INT
;
52 const char *swscale_configuration(void)
54 return FFMPEG_CONFIGURATION
;
57 const char *swscale_license(void)
59 #define LICENSE_PREFIX "libswscale license: "
60 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
63 #define RET 0xC3 //near return opcode for x86
65 #define isSupportedIn(x) ( \
66 (x)==PIX_FMT_YUV420P \
67 || (x)==PIX_FMT_YUVA420P \
68 || (x)==PIX_FMT_YUYV422 \
69 || (x)==PIX_FMT_UYVY422 \
70 || (x)==PIX_FMT_RGB48BE \
71 || (x)==PIX_FMT_RGB48LE \
72 || (x)==PIX_FMT_RGB32 \
73 || (x)==PIX_FMT_RGB32_1 \
74 || (x)==PIX_FMT_BGR24 \
75 || (x)==PIX_FMT_BGR565 \
76 || (x)==PIX_FMT_BGR555 \
77 || (x)==PIX_FMT_BGR32 \
78 || (x)==PIX_FMT_BGR32_1 \
79 || (x)==PIX_FMT_RGB24 \
80 || (x)==PIX_FMT_RGB565 \
81 || (x)==PIX_FMT_RGB555 \
82 || (x)==PIX_FMT_GRAY8 \
83 || (x)==PIX_FMT_YUV410P \
84 || (x)==PIX_FMT_YUV440P \
85 || (x)==PIX_FMT_NV12 \
86 || (x)==PIX_FMT_NV21 \
87 || (x)==PIX_FMT_GRAY16BE \
88 || (x)==PIX_FMT_GRAY16LE \
89 || (x)==PIX_FMT_YUV444P \
90 || (x)==PIX_FMT_YUV422P \
91 || (x)==PIX_FMT_YUV411P \
92 || (x)==PIX_FMT_YUVJ420P \
93 || (x)==PIX_FMT_YUVJ422P \
94 || (x)==PIX_FMT_YUVJ440P \
95 || (x)==PIX_FMT_YUVJ444P \
96 || (x)==PIX_FMT_PAL8 \
97 || (x)==PIX_FMT_BGR8 \
98 || (x)==PIX_FMT_RGB8 \
99 || (x)==PIX_FMT_BGR4_BYTE \
100 || (x)==PIX_FMT_RGB4_BYTE \
101 || (x)==PIX_FMT_YUV440P \
102 || (x)==PIX_FMT_MONOWHITE \
103 || (x)==PIX_FMT_MONOBLACK \
104 || (x)==PIX_FMT_YUV420P16LE \
105 || (x)==PIX_FMT_YUV422P16LE \
106 || (x)==PIX_FMT_YUV444P16LE \
107 || (x)==PIX_FMT_YUV420P16BE \
108 || (x)==PIX_FMT_YUV422P16BE \
109 || (x)==PIX_FMT_YUV444P16BE \
112 int sws_isSupportedInput(enum PixelFormat pix_fmt
)
114 return isSupportedIn(pix_fmt
);
117 #define isSupportedOut(x) ( \
118 (x)==PIX_FMT_YUV420P \
119 || (x)==PIX_FMT_YUVA420P \
120 || (x)==PIX_FMT_YUYV422 \
121 || (x)==PIX_FMT_UYVY422 \
122 || (x)==PIX_FMT_YUV444P \
123 || (x)==PIX_FMT_YUV422P \
124 || (x)==PIX_FMT_YUV411P \
125 || (x)==PIX_FMT_YUVJ420P \
126 || (x)==PIX_FMT_YUVJ422P \
127 || (x)==PIX_FMT_YUVJ440P \
128 || (x)==PIX_FMT_YUVJ444P \
130 || (x)==PIX_FMT_NV12 \
131 || (x)==PIX_FMT_NV21 \
132 || (x)==PIX_FMT_GRAY16BE \
133 || (x)==PIX_FMT_GRAY16LE \
134 || (x)==PIX_FMT_GRAY8 \
135 || (x)==PIX_FMT_YUV410P \
136 || (x)==PIX_FMT_YUV440P \
137 || (x)==PIX_FMT_YUV420P16LE \
138 || (x)==PIX_FMT_YUV422P16LE \
139 || (x)==PIX_FMT_YUV444P16LE \
140 || (x)==PIX_FMT_YUV420P16BE \
141 || (x)==PIX_FMT_YUV422P16BE \
142 || (x)==PIX_FMT_YUV444P16BE \
145 int sws_isSupportedOutput(enum PixelFormat pix_fmt
)
147 return isSupportedOut(pix_fmt
);
150 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
152 const char *sws_format_name(enum PixelFormat format
)
154 if ((unsigned)format
< PIX_FMT_NB
&& av_pix_fmt_descriptors
[format
].name
)
155 return av_pix_fmt_descriptors
[format
].name
;
157 return "Unknown format";
160 static double getSplineCoeff(double a
, double b
, double c
, double d
, double dist
)
162 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
163 if (dist
<=1.0) return ((d
*dist
+ c
)*dist
+ b
)*dist
+a
;
164 else return getSplineCoeff( 0.0,
171 static int initFilter(int16_t **outFilter
, int16_t **filterPos
, int *outFilterSize
, int xInc
,
172 int srcW
, int dstW
, int filterAlign
, int one
, int flags
,
173 SwsVector
*srcFilter
, SwsVector
*dstFilter
, double param
[2])
179 int64_t *filter
=NULL
;
180 int64_t *filter2
=NULL
;
181 const int64_t fone
= 1LL<<54;
184 if (flags
& SWS_CPU_CAPS_MMX
)
185 __asm__
volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
188 // NOTE: the +1 is for the MMX scaler which reads over the end
189 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+1)*sizeof(int16_t), fail
);
191 if (FFABS(xInc
- 0x10000) <10) { // unscaled
194 FF_ALLOCZ_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
196 for (i
=0; i
<dstW
; i
++) {
197 filter
[i
*filterSize
]= fone
;
201 } else if (flags
&SWS_POINT
) { // lame looking point sampling mode
205 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
207 xDstInSrc
= xInc
/2 - 0x8000;
208 for (i
=0; i
<dstW
; i
++) {
209 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
215 } else if ((xInc
<= (1<<16) && (flags
&SWS_AREA
)) || (flags
&SWS_FAST_BILINEAR
)) { // bilinear upscale
219 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
221 xDstInSrc
= xInc
/2 - 0x8000;
222 for (i
=0; i
<dstW
; i
++) {
223 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
227 //bilinear upscale / linear interpolate / area averaging
228 for (j
=0; j
<filterSize
; j
++) {
229 int64_t coeff
= fone
- FFABS((xx
<<16) - xDstInSrc
)*(fone
>>16);
230 if (coeff
<0) coeff
=0;
231 filter
[i
*filterSize
+ j
]= coeff
;
240 if (flags
&SWS_BICUBIC
) sizeFactor
= 4;
241 else if (flags
&SWS_X
) sizeFactor
= 8;
242 else if (flags
&SWS_AREA
) sizeFactor
= 1; //downscale only, for upscale it is bilinear
243 else if (flags
&SWS_GAUSS
) sizeFactor
= 8; // infinite ;)
244 else if (flags
&SWS_LANCZOS
) sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2*param
[0]) : 6;
245 else if (flags
&SWS_SINC
) sizeFactor
= 20; // infinite ;)
246 else if (flags
&SWS_SPLINE
) sizeFactor
= 20; // infinite ;)
247 else if (flags
&SWS_BILINEAR
) sizeFactor
= 2;
249 sizeFactor
= 0; //GCC warning killer
253 if (xInc
<= 1<<16) filterSize
= 1 + sizeFactor
; // upscale
254 else filterSize
= 1 + (sizeFactor
*srcW
+ dstW
- 1)/ dstW
;
256 if (filterSize
> srcW
-2) filterSize
=srcW
-2;
258 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
260 xDstInSrc
= xInc
- 0x10000;
261 for (i
=0; i
<dstW
; i
++) {
262 int xx
= (xDstInSrc
- ((filterSize
-2)<<16)) / (1<<17);
265 for (j
=0; j
<filterSize
; j
++) {
266 int64_t d
= ((int64_t)FFABS((xx
<<17) - xDstInSrc
))<<13;
272 floatd
= d
* (1.0/(1<<30));
274 if (flags
& SWS_BICUBIC
) {
275 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1<<24);
276 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1<<24);
277 int64_t dd
= ( d
*d
)>>30;
278 int64_t ddd
= (dd
*d
)>>30;
281 coeff
= (12*(1<<24)-9*B
-6*C
)*ddd
+ (-18*(1<<24)+12*B
+6*C
)*dd
+ (6*(1<<24)-2*B
)*(1<<30);
282 else if (d
< 1LL<<31)
283 coeff
= (-B
-6*C
)*ddd
+ (6*B
+30*C
)*dd
+ (-12*B
-48*C
)*d
+ (8*B
+24*C
)*(1<<30);
286 coeff
*= fone
>>(30+24);
288 /* else if (flags & SWS_X) {
289 double p= param ? param*0.01 : 0.3;
290 coeff = d ? sin(d*M_PI)/(d*M_PI) : 1.0;
291 coeff*= pow(2.0, - p*d*d);
293 else if (flags
& SWS_X
) {
294 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
298 c
= cos(floatd
*M_PI
);
301 if (c
<0.0) c
= -pow(-c
, A
);
303 coeff
= (c
*0.5 + 0.5)*fone
;
304 } else if (flags
& SWS_AREA
) {
305 int64_t d2
= d
- (1<<29);
306 if (d2
*xInc
< -(1LL<<(29+16))) coeff
= 1.0 * (1LL<<(30+16));
307 else if (d2
*xInc
< (1LL<<(29+16))) coeff
= -d2
*xInc
+ (1LL<<(29+16));
309 coeff
*= fone
>>(30+16);
310 } else if (flags
& SWS_GAUSS
) {
311 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
312 coeff
= (pow(2.0, - p
*floatd
*floatd
))*fone
;
313 } else if (flags
& SWS_SINC
) {
314 coeff
= (d
? sin(floatd
*M_PI
)/(floatd
*M_PI
) : 1.0)*fone
;
315 } else if (flags
& SWS_LANCZOS
) {
316 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
317 coeff
= (d
? sin(floatd
*M_PI
)*sin(floatd
*M_PI
/p
)/(floatd
*floatd
*M_PI
*M_PI
/p
) : 1.0)*fone
;
318 if (floatd
>p
) coeff
=0;
319 } else if (flags
& SWS_BILINEAR
) {
321 if (coeff
<0) coeff
=0;
323 } else if (flags
& SWS_SPLINE
) {
324 double p
=-2.196152422706632;
325 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
-1.0, floatd
) * fone
;
327 coeff
= 0.0; //GCC warning killer
331 filter
[i
*filterSize
+ j
]= coeff
;
338 /* apply src & dst Filter to filter -> filter2
341 assert(filterSize
>0);
342 filter2Size
= filterSize
;
343 if (srcFilter
) filter2Size
+= srcFilter
->length
- 1;
344 if (dstFilter
) filter2Size
+= dstFilter
->length
- 1;
345 assert(filter2Size
>0);
346 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
*dstW
*sizeof(*filter2
), fail
);
348 for (i
=0; i
<dstW
; i
++) {
352 for (k
=0; k
<srcFilter
->length
; k
++) {
353 for (j
=0; j
<filterSize
; j
++)
354 filter2
[i
*filter2Size
+ k
+ j
] += srcFilter
->coeff
[k
]*filter
[i
*filterSize
+ j
];
357 for (j
=0; j
<filterSize
; j
++)
358 filter2
[i
*filter2Size
+ j
]= filter
[i
*filterSize
+ j
];
362 (*filterPos
)[i
]+= (filterSize
-1)/2 - (filter2Size
-1)/2;
366 /* try to reduce the filter-size (step1 find size and shift left) */
367 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
369 for (i
=dstW
-1; i
>=0; i
--) {
370 int min
= filter2Size
;
374 /* get rid of near zero elements on the left by shifting left */
375 for (j
=0; j
<filter2Size
; j
++) {
377 cutOff
+= FFABS(filter2
[i
*filter2Size
]);
379 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
381 /* preserve monotonicity because the core can't handle the filter otherwise */
382 if (i
<dstW
-1 && (*filterPos
)[i
] >= (*filterPos
)[i
+1]) break;
384 // move filter coefficients left
385 for (k
=1; k
<filter2Size
; k
++)
386 filter2
[i
*filter2Size
+ k
- 1]= filter2
[i
*filter2Size
+ k
];
387 filter2
[i
*filter2Size
+ k
- 1]= 0;
392 /* count near zeros on the right */
393 for (j
=filter2Size
-1; j
>0; j
--) {
394 cutOff
+= FFABS(filter2
[i
*filter2Size
+ j
]);
396 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
400 if (min
>minFilterSize
) minFilterSize
= min
;
403 if (flags
& SWS_CPU_CAPS_ALTIVEC
) {
404 // we can handle the special case 4,
405 // so we don't want to go to the full 8
406 if (minFilterSize
< 5)
409 // We really don't want to waste our time
410 // doing useless computation, so fall back on
411 // the scalar C code for very small filters.
412 // Vectorizing is worth it only if you have a
413 // decent-sized vector.
414 if (minFilterSize
< 3)
418 if (flags
& SWS_CPU_CAPS_MMX
) {
419 // special case for unscaled vertical filtering
420 if (minFilterSize
== 1 && filterAlign
== 2)
424 assert(minFilterSize
> 0);
425 filterSize
= (minFilterSize
+(filterAlign
-1)) & (~(filterAlign
-1));
426 assert(filterSize
> 0);
427 filter
= av_malloc(filterSize
*dstW
*sizeof(*filter
));
428 if (filterSize
>= MAX_FILTER_SIZE
*16/((flags
&SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
430 *outFilterSize
= filterSize
;
432 if (flags
&SWS_PRINT_INFO
)
433 av_log(NULL
, AV_LOG_VERBOSE
, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size
, filterSize
);
434 /* try to reduce the filter-size (step2 reduce it) */
435 for (i
=0; i
<dstW
; i
++) {
438 for (j
=0; j
<filterSize
; j
++) {
439 if (j
>=filter2Size
) filter
[i
*filterSize
+ j
]= 0;
440 else filter
[i
*filterSize
+ j
]= filter2
[i
*filter2Size
+ j
];
441 if((flags
& SWS_BITEXACT
) && j
>=minFilterSize
)
442 filter
[i
*filterSize
+ j
]= 0;
446 //FIXME try to align filterPos if possible
449 for (i
=0; i
<dstW
; i
++) {
451 if ((*filterPos
)[i
] < 0) {
452 // move filter coefficients left to compensate for filterPos
453 for (j
=1; j
<filterSize
; j
++) {
454 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
455 filter
[i
*filterSize
+ left
] += filter
[i
*filterSize
+ j
];
456 filter
[i
*filterSize
+ j
]=0;
461 if ((*filterPos
)[i
] + filterSize
> srcW
) {
462 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
463 // move filter coefficients right to compensate for filterPos
464 for (j
=filterSize
-2; j
>=0; j
--) {
465 int right
= FFMIN(j
+ shift
, filterSize
-1);
466 filter
[i
*filterSize
+right
] += filter
[i
*filterSize
+j
];
467 filter
[i
*filterSize
+j
]=0;
469 (*filterPos
)[i
]= srcW
- filterSize
;
473 // Note the +1 is for the MMX scaler which reads over the end
474 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
475 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
, *outFilterSize
*(dstW
+1)*sizeof(int16_t), fail
);
477 /* normalize & store in outFilter */
478 for (i
=0; i
<dstW
; i
++) {
483 for (j
=0; j
<filterSize
; j
++) {
484 sum
+= filter
[i
*filterSize
+ j
];
486 sum
= (sum
+ one
/2)/ one
;
487 for (j
=0; j
<*outFilterSize
; j
++) {
488 int64_t v
= filter
[i
*filterSize
+ j
] + error
;
489 int intV
= ROUNDED_DIV(v
, sum
);
490 (*outFilter
)[i
*(*outFilterSize
) + j
]= intV
;
495 (*filterPos
)[dstW
]= (*filterPos
)[dstW
-1]; // the MMX scaler will read over the end
496 for (i
=0; i
<*outFilterSize
; i
++) {
497 int j
= dstW
*(*outFilterSize
);
498 (*outFilter
)[j
+ i
]= (*outFilter
)[j
+ i
- (*outFilterSize
)];
508 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
509 static int initMMX2HScaler(int dstW
, int xInc
, uint8_t *filterCode
, int16_t *filter
, int32_t *filterPos
, int numSplits
)
512 x86_reg imm8OfPShufW1A
;
513 x86_reg imm8OfPShufW2A
;
514 x86_reg fragmentLengthA
;
516 x86_reg imm8OfPShufW1B
;
517 x86_reg imm8OfPShufW2B
;
518 x86_reg fragmentLengthB
;
523 // create an optimized horizontal scaling routine
524 /* This scaler is made of runtime-generated MMX2 code using specially
525 * tuned pshufw instructions. For every four output pixels, if four
526 * input pixels are enough for the fast bilinear scaling, then a chunk
527 * of fragmentB is used. If five input pixels are needed, then a chunk
528 * of fragmentA is used.
537 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
538 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
539 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
540 "punpcklbw %%mm7, %%mm1 \n\t"
541 "punpcklbw %%mm7, %%mm0 \n\t"
542 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
544 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
546 "psubw %%mm1, %%mm0 \n\t"
547 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
548 "pmullw %%mm3, %%mm0 \n\t"
549 "psllw $7, %%mm1 \n\t"
550 "paddw %%mm1, %%mm0 \n\t"
552 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
554 "add $8, %%"REG_a
" \n\t"
558 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
559 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
560 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
565 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
569 :"=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
570 "=r" (fragmentLengthA
)
577 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
578 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
579 "punpcklbw %%mm7, %%mm0 \n\t"
580 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
582 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
584 "psubw %%mm1, %%mm0 \n\t"
585 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
586 "pmullw %%mm3, %%mm0 \n\t"
587 "psllw $7, %%mm1 \n\t"
588 "paddw %%mm1, %%mm0 \n\t"
590 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
592 "add $8, %%"REG_a
" \n\t"
596 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
597 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
598 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
603 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
607 :"=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
608 "=r" (fragmentLengthB
)
611 xpos
= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
614 for (i
=0; i
<dstW
/numSplits
; i
++) {
619 int b
=((xpos
+xInc
)>>16) - xx
;
620 int c
=((xpos
+xInc
*2)>>16) - xx
;
621 int d
=((xpos
+xInc
*3)>>16) - xx
;
623 uint8_t *fragment
= (d
+1<4) ? fragmentB
: fragmentA
;
624 x86_reg imm8OfPShufW1
= (d
+1<4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
625 x86_reg imm8OfPShufW2
= (d
+1<4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
626 x86_reg fragmentLength
= (d
+1<4) ? fragmentLengthB
: fragmentLengthA
;
627 int maxShift
= 3-(d
+inc
);
631 filter
[i
] = (( xpos
& 0xFFFF) ^ 0xFFFF)>>9;
632 filter
[i
+1] = (((xpos
+xInc
) & 0xFFFF) ^ 0xFFFF)>>9;
633 filter
[i
+2] = (((xpos
+xInc
*2) & 0xFFFF) ^ 0xFFFF)>>9;
634 filter
[i
+3] = (((xpos
+xInc
*3) & 0xFFFF) ^ 0xFFFF)>>9;
637 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
639 filterCode
[fragmentPos
+ imm8OfPShufW1
]=
640 (a
+inc
) | ((b
+inc
)<<2) | ((c
+inc
)<<4) | ((d
+inc
)<<6);
641 filterCode
[fragmentPos
+ imm8OfPShufW2
]=
642 a
| (b
<<2) | (c
<<4) | (d
<<6);
644 if (i
+4-inc
>=dstW
) shift
=maxShift
; //avoid overread
645 else if ((filterPos
[i
/2]&3) <= maxShift
) shift
=filterPos
[i
/2]&3; //Align
647 if (shift
&& i
>=shift
) {
648 filterCode
[fragmentPos
+ imm8OfPShufW1
]+= 0x55*shift
;
649 filterCode
[fragmentPos
+ imm8OfPShufW2
]+= 0x55*shift
;
650 filterPos
[i
/2]-=shift
;
654 fragmentPos
+= fragmentLength
;
657 filterCode
[fragmentPos
]= RET
;
662 filterPos
[((i
/2)+1)&(~1)]= xpos
>>16; // needed to jump to the next part
664 return fragmentPos
+ 1;
666 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
668 static void getSubSampleFactors(int *h
, int *v
, enum PixelFormat format
)
670 *h
= av_pix_fmt_descriptors
[format
].log2_chroma_w
;
671 *v
= av_pix_fmt_descriptors
[format
].log2_chroma_h
;
674 static uint16_t roundToInt16(int64_t f
)
676 int r
= (f
+ (1<<15))>>16;
677 if (r
<-0x7FFF) return 0x8000;
678 else if (r
> 0x7FFF) return 0x7FFF;
682 int sws_setColorspaceDetails(SwsContext
*c
, const int inv_table
[4], int srcRange
, const int table
[4], int dstRange
, int brightness
, int contrast
, int saturation
)
684 int64_t crv
= inv_table
[0];
685 int64_t cbu
= inv_table
[1];
686 int64_t cgu
= -inv_table
[2];
687 int64_t cgv
= -inv_table
[3];
691 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int)*4);
692 memcpy(c
->dstColorspaceTable
, table
, sizeof(int)*4);
694 c
->brightness
= brightness
;
695 c
->contrast
= contrast
;
696 c
->saturation
= saturation
;
697 c
->srcRange
= srcRange
;
698 c
->dstRange
= dstRange
;
699 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
701 c
->uOffset
= 0x0400040004000400LL
;
702 c
->vOffset
= 0x0400040004000400LL
;
708 crv
= (crv
*224) / 255;
709 cbu
= (cbu
*224) / 255;
710 cgu
= (cgu
*224) / 255;
711 cgv
= (cgv
*224) / 255;
714 cy
= (cy
*contrast
)>>16;
715 crv
= (crv
*contrast
* saturation
)>>32;
716 cbu
= (cbu
*contrast
* saturation
)>>32;
717 cgu
= (cgu
*contrast
* saturation
)>>32;
718 cgv
= (cgv
*contrast
* saturation
)>>32;
720 oy
-= 256*brightness
;
722 c
->yCoeff
= roundToInt16(cy
*8192) * 0x0001000100010001ULL
;
723 c
->vrCoeff
= roundToInt16(crv
*8192) * 0x0001000100010001ULL
;
724 c
->ubCoeff
= roundToInt16(cbu
*8192) * 0x0001000100010001ULL
;
725 c
->vgCoeff
= roundToInt16(cgv
*8192) * 0x0001000100010001ULL
;
726 c
->ugCoeff
= roundToInt16(cgu
*8192) * 0x0001000100010001ULL
;
727 c
->yOffset
= roundToInt16(oy
* 8) * 0x0001000100010001ULL
;
729 c
->yuv2rgb_y_coeff
= (int16_t)roundToInt16(cy
<<13);
730 c
->yuv2rgb_y_offset
= (int16_t)roundToInt16(oy
<< 9);
731 c
->yuv2rgb_v2r_coeff
= (int16_t)roundToInt16(crv
<<13);
732 c
->yuv2rgb_v2g_coeff
= (int16_t)roundToInt16(cgv
<<13);
733 c
->yuv2rgb_u2g_coeff
= (int16_t)roundToInt16(cgu
<<13);
734 c
->yuv2rgb_u2b_coeff
= (int16_t)roundToInt16(cbu
<<13);
736 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
, contrast
, saturation
);
740 if (c
->flags
& SWS_CPU_CAPS_ALTIVEC
)
741 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
, contrast
, saturation
);
746 int sws_getColorspaceDetails(SwsContext
*c
, int **inv_table
, int *srcRange
, int **table
, int *dstRange
, int *brightness
, int *contrast
, int *saturation
)
748 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
750 *inv_table
= c
->srcColorspaceTable
;
751 *table
= c
->dstColorspaceTable
;
752 *srcRange
= c
->srcRange
;
753 *dstRange
= c
->dstRange
;
754 *brightness
= c
->brightness
;
755 *contrast
= c
->contrast
;
756 *saturation
= c
->saturation
;
761 static int handle_jpeg(enum PixelFormat
*format
)
764 case PIX_FMT_YUVJ420P
: *format
= PIX_FMT_YUV420P
; return 1;
765 case PIX_FMT_YUVJ422P
: *format
= PIX_FMT_YUV422P
; return 1;
766 case PIX_FMT_YUVJ444P
: *format
= PIX_FMT_YUV444P
; return 1;
767 case PIX_FMT_YUVJ440P
: *format
= PIX_FMT_YUV440P
; return 1;
772 SwsContext
*sws_getContext(int srcW
, int srcH
, enum PixelFormat srcFormat
,
773 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
774 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
778 int usesVFilter
, usesHFilter
;
780 int srcRange
, dstRange
;
781 SwsFilter dummyFilter
= {NULL
, NULL
, NULL
, NULL
};
783 if (flags
& SWS_CPU_CAPS_MMX
)
784 __asm__
volatile("emms\n\t"::: "memory");
787 #if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
788 flags
&= ~(SWS_CPU_CAPS_MMX
|SWS_CPU_CAPS_MMX2
|SWS_CPU_CAPS_3DNOW
|SWS_CPU_CAPS_ALTIVEC
|SWS_CPU_CAPS_BFIN
);
789 flags
|= ff_hardcodedcpuflags();
790 #endif /* CONFIG_RUNTIME_CPUDETECT */
791 if (!rgb15to16
) sws_rgb2rgb_init(flags
);
793 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
795 srcRange
= handle_jpeg(&srcFormat
);
796 dstRange
= handle_jpeg(&dstFormat
);
798 if (!isSupportedIn(srcFormat
)) {
799 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat
));
802 if (!isSupportedOut(dstFormat
)) {
803 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat
));
807 i
= flags
& ( SWS_POINT
818 if(!i
|| (i
& (i
-1))) {
819 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Exactly one scaler algorithm must be chosen\n");
824 if (srcW
<4 || srcH
<1 || dstW
<8 || dstH
<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
825 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
826 srcW
, srcH
, dstW
, dstH
);
829 if(srcW
> VOFW
|| dstW
> VOFW
) {
830 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW
)" change VOF/VOFW and recompile\n");
834 if (!dstFilter
) dstFilter
= &dummyFilter
;
835 if (!srcFilter
) srcFilter
= &dummyFilter
;
837 FF_ALLOCZ_OR_GOTO(NULL
, c
, sizeof(SwsContext
), fail
);
839 c
->av_class
= &sws_context_class
;
844 c
->lumXInc
= ((srcW
<<16) + (dstW
>>1))/dstW
;
845 c
->lumYInc
= ((srcH
<<16) + (dstH
>>1))/dstH
;
847 c
->dstFormat
= dstFormat
;
848 c
->srcFormat
= srcFormat
;
849 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[dstFormat
]);
850 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[srcFormat
]);
851 c
->vRounder
= 4* 0x0001000100010001ULL
;
853 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
>1) ||
854 (srcFilter
->chrV
&& srcFilter
->chrV
->length
>1) ||
855 (dstFilter
->lumV
&& dstFilter
->lumV
->length
>1) ||
856 (dstFilter
->chrV
&& dstFilter
->chrV
->length
>1);
857 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
>1) ||
858 (srcFilter
->chrH
&& srcFilter
->chrH
->length
>1) ||
859 (dstFilter
->lumH
&& dstFilter
->lumH
->length
>1) ||
860 (dstFilter
->chrH
&& dstFilter
->chrH
->length
>1);
862 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
863 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
865 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
866 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) c
->chrDstHSubSample
=1;
868 // drop some chroma lines if the user wants it
869 c
->vChrDrop
= (flags
&SWS_SRC_V_CHR_DROP_MASK
)>>SWS_SRC_V_CHR_DROP_SHIFT
;
870 c
->chrSrcVSubSample
+= c
->vChrDrop
;
872 // drop every other pixel for chroma calculation unless user wants full chroma
873 if (isAnyRGB(srcFormat
) && !(flags
&SWS_FULL_CHR_H_INP
)
874 && srcFormat
!=PIX_FMT_RGB8
&& srcFormat
!=PIX_FMT_BGR8
875 && srcFormat
!=PIX_FMT_RGB4
&& srcFormat
!=PIX_FMT_BGR4
876 && srcFormat
!=PIX_FMT_RGB4_BYTE
&& srcFormat
!=PIX_FMT_BGR4_BYTE
877 && ((dstW
>>c
->chrDstHSubSample
) <= (srcW
>>1) || (flags
&(SWS_FAST_BILINEAR
|SWS_POINT
))))
878 c
->chrSrcHSubSample
=1;
881 c
->param
[0] = param
[0];
882 c
->param
[1] = param
[1];
885 c
->param
[1] = SWS_PARAM_DEFAULT
;
888 // Note the -((-x)>>y) is so that we always round toward +inf.
889 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
890 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
891 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
892 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
894 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, dstRange
, 0, 1<<16, 1<<16);
896 /* unscaled special cases */
897 if (unscaled
&& !usesHFilter
&& !usesVFilter
&& (srcRange
== dstRange
|| isAnyRGB(dstFormat
))) {
898 ff_get_unscaled_swscale(c
);
901 if (flags
&SWS_PRINT_INFO
)
902 av_log(c
, AV_LOG_INFO
, "using unscaled %s -> %s special converter\n",
903 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
908 if (flags
& SWS_CPU_CAPS_MMX2
) {
909 c
->canMMX2BeUsed
= (dstW
>=srcW
&& (dstW
&31)==0 && (srcW
&15)==0) ? 1 : 0;
910 if (!c
->canMMX2BeUsed
&& dstW
>=srcW
&& (srcW
&15)==0 && (flags
&SWS_FAST_BILINEAR
)) {
911 if (flags
&SWS_PRINT_INFO
)
912 av_log(c
, AV_LOG_INFO
, "output width is not a multiple of 32 -> no MMX2 scaler\n");
914 if (usesHFilter
) c
->canMMX2BeUsed
=0;
919 c
->chrXInc
= ((c
->chrSrcW
<<16) + (c
->chrDstW
>>1))/c
->chrDstW
;
920 c
->chrYInc
= ((c
->chrSrcH
<<16) + (c
->chrDstH
>>1))/c
->chrDstH
;
922 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
923 // but only for the FAST_BILINEAR mode otherwise do correct scaling
924 // n-2 is the last chrominance sample available
925 // this is not perfect, but no one should notice the difference, the more correct variant
926 // would be like the vertical one, but that would require some special code for the
927 // first and last pixel
928 if (flags
&SWS_FAST_BILINEAR
) {
929 if (c
->canMMX2BeUsed
) {
933 //we don't use the x86 asm scaler if MMX is available
934 else if (flags
& SWS_CPU_CAPS_MMX
) {
935 c
->lumXInc
= ((srcW
-2)<<16)/(dstW
-2) - 20;
936 c
->chrXInc
= ((c
->chrSrcW
-2)<<16)/(c
->chrDstW
-2) - 20;
940 /* precalculate horizontal scaler filter coefficients */
942 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
943 // can't downscale !!!
944 if (c
->canMMX2BeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
945 c
->lumMmx2FilterCodeSize
= initMMX2HScaler( dstW
, c
->lumXInc
, NULL
, NULL
, NULL
, 8);
946 c
->chrMmx2FilterCodeSize
= initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, NULL
, NULL
, NULL
, 4);
949 c
->lumMmx2FilterCode
= mmap(NULL
, c
->lumMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
950 c
->chrMmx2FilterCode
= mmap(NULL
, c
->chrMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
951 #elif HAVE_VIRTUALALLOC
952 c
->lumMmx2FilterCode
= VirtualAlloc(NULL
, c
->lumMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
953 c
->chrMmx2FilterCode
= VirtualAlloc(NULL
, c
->chrMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
955 c
->lumMmx2FilterCode
= av_malloc(c
->lumMmx2FilterCodeSize
);
956 c
->chrMmx2FilterCode
= av_malloc(c
->chrMmx2FilterCodeSize
);
959 if (!c
->lumMmx2FilterCode
|| !c
->chrMmx2FilterCode
)
961 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/8+8)*sizeof(int16_t), fail
);
962 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/4+8)*sizeof(int16_t), fail
);
963 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/2/8+8)*sizeof(int32_t), fail
);
964 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/2/4+8)*sizeof(int32_t), fail
);
966 initMMX2HScaler( dstW
, c
->lumXInc
, c
->lumMmx2FilterCode
, c
->hLumFilter
, c
->hLumFilterPos
, 8);
967 initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, c
->chrMmx2FilterCode
, c
->hChrFilter
, c
->hChrFilterPos
, 4);
970 mprotect(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
971 mprotect(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
974 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
976 const int filterAlign
=
977 (flags
& SWS_CPU_CAPS_MMX
) ? 4 :
978 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
981 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
, &c
->hLumFilterSize
, c
->lumXInc
,
982 srcW
, dstW
, filterAlign
, 1<<14,
983 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
984 srcFilter
->lumH
, dstFilter
->lumH
, c
->param
) < 0)
986 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
, &c
->hChrFilterSize
, c
->chrXInc
,
987 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1<<14,
988 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
989 srcFilter
->chrH
, dstFilter
->chrH
, c
->param
) < 0)
992 } // initialize horizontal stuff
994 /* precalculate vertical scaler filter coefficients */
996 const int filterAlign
=
997 (flags
& SWS_CPU_CAPS_MMX
) && (flags
& SWS_ACCURATE_RND
) ? 2 :
998 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
1001 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
, c
->lumYInc
,
1002 srcH
, dstH
, filterAlign
, (1<<12),
1003 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
1004 srcFilter
->lumV
, dstFilter
->lumV
, c
->param
) < 0)
1006 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
, c
->chrYInc
,
1007 c
->chrSrcH
, c
->chrDstH
, filterAlign
, (1<<12),
1008 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
1009 srcFilter
->chrV
, dstFilter
->chrV
, c
->param
) < 0)
1013 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof (vector
signed short)*c
->vLumFilterSize
*c
->dstH
, fail
);
1014 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof (vector
signed short)*c
->vChrFilterSize
*c
->chrDstH
, fail
);
1016 for (i
=0;i
<c
->vLumFilterSize
*c
->dstH
;i
++) {
1018 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1020 p
[j
] = c
->vLumFilter
[i
];
1023 for (i
=0;i
<c
->vChrFilterSize
*c
->chrDstH
;i
++) {
1025 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1027 p
[j
] = c
->vChrFilter
[i
];
1032 // calculate buffer sizes so that they won't run out while handling these damn slices
1033 c
->vLumBufSize
= c
->vLumFilterSize
;
1034 c
->vChrBufSize
= c
->vChrFilterSize
;
1035 for (i
=0; i
<dstH
; i
++) {
1036 int chrI
= i
*c
->chrDstH
/ dstH
;
1037 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1038 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)<<c
->chrSrcVSubSample
));
1040 nextSlice
>>= c
->chrSrcVSubSample
;
1041 nextSlice
<<= c
->chrSrcVSubSample
;
1042 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1043 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1044 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
< (nextSlice
>>c
->chrSrcVSubSample
))
1045 c
->vChrBufSize
= (nextSlice
>>c
->chrSrcVSubSample
) - c
->vChrFilterPos
[chrI
];
1048 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
1049 // allocate several megabytes to handle all possible cases)
1050 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1051 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
, c
->vChrBufSize
*2*sizeof(int16_t*), fail
);
1052 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1053 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1054 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
1055 /* align at 16 bytes for AltiVec */
1056 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1057 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1058 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+c
->vLumBufSize
];
1060 for (i
=0; i
<c
->vChrBufSize
; i
++) {
1061 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
[i
+c
->vChrBufSize
], (VOF
+1)*2, fail
);
1062 c
->chrPixBuf
[i
] = c
->chrPixBuf
[i
+c
->vChrBufSize
];
1064 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1065 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1066 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1067 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+c
->vLumBufSize
];
1070 //try to avoid drawing green stuff between the right end and the stride end
1071 for (i
=0; i
<c
->vChrBufSize
; i
++) memset(c
->chrPixBuf
[i
], 64, (VOF
+1)*2);
1073 assert(2*VOFW
== VOF
);
1075 assert(c
->chrDstH
<= dstH
);
1077 if (flags
&SWS_PRINT_INFO
) {
1078 if (flags
&SWS_FAST_BILINEAR
)
1079 av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1080 else if (flags
&SWS_BILINEAR
)
1081 av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1082 else if (flags
&SWS_BICUBIC
)
1083 av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1084 else if (flags
&SWS_X
)
1085 av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1086 else if (flags
&SWS_POINT
)
1087 av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1088 else if (flags
&SWS_AREA
)
1089 av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1090 else if (flags
&SWS_BICUBLIN
)
1091 av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1092 else if (flags
&SWS_GAUSS
)
1093 av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1094 else if (flags
&SWS_SINC
)
1095 av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1096 else if (flags
&SWS_LANCZOS
)
1097 av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1098 else if (flags
&SWS_SPLINE
)
1099 av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1101 av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1103 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1104 sws_format_name(srcFormat
),
1106 dstFormat
== PIX_FMT_BGR555
|| dstFormat
== PIX_FMT_BGR565
||
1107 dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1108 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
? "dithered " : "",
1112 sws_format_name(dstFormat
));
1114 if (flags
& SWS_CPU_CAPS_MMX2
)
1115 av_log(c
, AV_LOG_INFO
, "using MMX2\n");
1116 else if (flags
& SWS_CPU_CAPS_3DNOW
)
1117 av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1118 else if (flags
& SWS_CPU_CAPS_MMX
)
1119 av_log(c
, AV_LOG_INFO
, "using MMX\n");
1120 else if (flags
& SWS_CPU_CAPS_ALTIVEC
)
1121 av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1123 av_log(c
, AV_LOG_INFO
, "using C\n");
1125 if (flags
& SWS_CPU_CAPS_MMX
) {
1126 if (c
->canMMX2BeUsed
&& (flags
&SWS_FAST_BILINEAR
))
1127 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
1129 if (c
->hLumFilterSize
==4)
1130 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal luminance scaling\n");
1131 else if (c
->hLumFilterSize
==8)
1132 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal luminance scaling\n");
1134 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal luminance scaling\n");
1136 if (c
->hChrFilterSize
==4)
1137 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
1138 else if (c
->hChrFilterSize
==8)
1139 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
1141 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal chrominance scaling\n");
1145 av_log(c
, AV_LOG_VERBOSE
, "using x86 asm scaler for horizontal scaling\n");
1147 if (flags
& SWS_FAST_BILINEAR
)
1148 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR C scaler for horizontal scaling\n");
1150 av_log(c
, AV_LOG_VERBOSE
, "using C scaler for horizontal scaling\n");
1153 if (isPlanarYUV(dstFormat
)) {
1154 if (c
->vLumFilterSize
==1)
1155 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1157 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1159 if (c
->vLumFilterSize
==1 && c
->vChrFilterSize
==2)
1160 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
1161 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1162 else if (c
->vLumFilterSize
==2 && c
->vChrFilterSize
==2)
1163 av_log(c
, AV_LOG_VERBOSE
, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1165 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1168 if (dstFormat
==PIX_FMT_BGR24
)
1169 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR24 converter\n",
1170 (flags
& SWS_CPU_CAPS_MMX2
) ? "MMX2" : ((flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C"));
1171 else if (dstFormat
==PIX_FMT_RGB32
)
1172 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR32 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1173 else if (dstFormat
==PIX_FMT_BGR565
)
1174 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR16 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1175 else if (dstFormat
==PIX_FMT_BGR555
)
1176 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR15 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1177 else if (dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1178 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
)
1179 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR12 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1181 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1182 av_log(c
, AV_LOG_DEBUG
, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1183 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1184 av_log(c
, AV_LOG_DEBUG
, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1185 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
, c
->chrXInc
, c
->chrYInc
);
1188 c
->swScale
= ff_getSwsFunc(c
);
1196 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1197 float lumaSharpen
, float chromaSharpen
,
1198 float chromaHShift
, float chromaVShift
,
1201 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1205 if (lumaGBlur
!=0.0) {
1206 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1207 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1209 filter
->lumH
= sws_getIdentityVec();
1210 filter
->lumV
= sws_getIdentityVec();
1213 if (chromaGBlur
!=0.0) {
1214 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1215 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1217 filter
->chrH
= sws_getIdentityVec();
1218 filter
->chrV
= sws_getIdentityVec();
1221 if (chromaSharpen
!=0.0) {
1222 SwsVector
*id
= sws_getIdentityVec();
1223 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1224 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1225 sws_addVec(filter
->chrH
, id
);
1226 sws_addVec(filter
->chrV
, id
);
1230 if (lumaSharpen
!=0.0) {
1231 SwsVector
*id
= sws_getIdentityVec();
1232 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1233 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1234 sws_addVec(filter
->lumH
, id
);
1235 sws_addVec(filter
->lumV
, id
);
1239 if (chromaHShift
!= 0.0)
1240 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+0.5));
1242 if (chromaVShift
!= 0.0)
1243 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+0.5));
1245 sws_normalizeVec(filter
->chrH
, 1.0);
1246 sws_normalizeVec(filter
->chrV
, 1.0);
1247 sws_normalizeVec(filter
->lumH
, 1.0);
1248 sws_normalizeVec(filter
->lumV
, 1.0);
1250 if (verbose
) sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1251 if (verbose
) sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1256 SwsVector
*sws_allocVec(int length
)
1258 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1261 vec
->length
= length
;
1262 vec
->coeff
= av_malloc(sizeof(double) * length
);
1268 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1270 const int length
= (int)(variance
*quality
+ 0.5) | 1;
1272 double middle
= (length
-1)*0.5;
1273 SwsVector
*vec
= sws_allocVec(length
);
1278 for (i
=0; i
<length
; i
++) {
1279 double dist
= i
-middle
;
1280 vec
->coeff
[i
]= exp(-dist
*dist
/(2*variance
*variance
)) / sqrt(2*variance
*M_PI
);
1283 sws_normalizeVec(vec
, 1.0);
1288 SwsVector
*sws_getConstVec(double c
, int length
)
1291 SwsVector
*vec
= sws_allocVec(length
);
1296 for (i
=0; i
<length
; i
++)
1302 SwsVector
*sws_getIdentityVec(void)
1304 return sws_getConstVec(1.0, 1);
1307 static double sws_dcVec(SwsVector
*a
)
1312 for (i
=0; i
<a
->length
; i
++)
1318 void sws_scaleVec(SwsVector
*a
, double scalar
)
1322 for (i
=0; i
<a
->length
; i
++)
1323 a
->coeff
[i
]*= scalar
;
1326 void sws_normalizeVec(SwsVector
*a
, double height
)
1328 sws_scaleVec(a
, height
/sws_dcVec(a
));
1331 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1333 int length
= a
->length
+ b
->length
- 1;
1335 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1340 for (i
=0; i
<a
->length
; i
++) {
1341 for (j
=0; j
<b
->length
; j
++) {
1342 vec
->coeff
[i
+j
]+= a
->coeff
[i
]*b
->coeff
[j
];
1349 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1351 int length
= FFMAX(a
->length
, b
->length
);
1353 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1358 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1359 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]+= b
->coeff
[i
];
1364 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1366 int length
= FFMAX(a
->length
, b
->length
);
1368 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1373 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1374 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]-= b
->coeff
[i
];
1379 /* shift left / or right if "shift" is negative */
1380 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1382 int length
= a
->length
+ FFABS(shift
)*2;
1384 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1389 for (i
=0; i
<a
->length
; i
++) {
1390 vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2 - shift
]= a
->coeff
[i
];
1396 void sws_shiftVec(SwsVector
*a
, int shift
)
1398 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1400 a
->coeff
= shifted
->coeff
;
1401 a
->length
= shifted
->length
;
1405 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1407 SwsVector
*sum
= sws_sumVec(a
, b
);
1409 a
->coeff
= sum
->coeff
;
1410 a
->length
= sum
->length
;
1414 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1416 SwsVector
*diff
= sws_diffVec(a
, b
);
1418 a
->coeff
= diff
->coeff
;
1419 a
->length
= diff
->length
;
1423 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1425 SwsVector
*conv
= sws_getConvVec(a
, b
);
1427 a
->coeff
= conv
->coeff
;
1428 a
->length
= conv
->length
;
1432 SwsVector
*sws_cloneVec(SwsVector
*a
)
1435 SwsVector
*vec
= sws_allocVec(a
->length
);
1440 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
]= a
->coeff
[i
];
1445 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1452 for (i
=0; i
<a
->length
; i
++)
1453 if (a
->coeff
[i
]>max
) max
= a
->coeff
[i
];
1455 for (i
=0; i
<a
->length
; i
++)
1456 if (a
->coeff
[i
]<min
) min
= a
->coeff
[i
];
1460 for (i
=0; i
<a
->length
; i
++) {
1461 int x
= (int)((a
->coeff
[i
]-min
)*60.0/range
+0.5);
1462 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1463 for (;x
>0; x
--) av_log(log_ctx
, log_level
, " ");
1464 av_log(log_ctx
, log_level
, "|\n");
1468 #if LIBSWSCALE_VERSION_MAJOR < 1
1469 void sws_printVec(SwsVector
*a
)
1471 sws_printVec2(a
, NULL
, AV_LOG_DEBUG
);
1475 void sws_freeVec(SwsVector
*a
)
1478 av_freep(&a
->coeff
);
1483 void sws_freeFilter(SwsFilter
*filter
)
1485 if (!filter
) return;
1487 if (filter
->lumH
) sws_freeVec(filter
->lumH
);
1488 if (filter
->lumV
) sws_freeVec(filter
->lumV
);
1489 if (filter
->chrH
) sws_freeVec(filter
->chrH
);
1490 if (filter
->chrV
) sws_freeVec(filter
->chrV
);
1494 void sws_freeContext(SwsContext
*c
)
1500 for (i
=0; i
<c
->vLumBufSize
; i
++)
1501 av_freep(&c
->lumPixBuf
[i
]);
1502 av_freep(&c
->lumPixBuf
);
1506 for (i
=0; i
<c
->vChrBufSize
; i
++)
1507 av_freep(&c
->chrPixBuf
[i
]);
1508 av_freep(&c
->chrPixBuf
);
1511 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1512 for (i
=0; i
<c
->vLumBufSize
; i
++)
1513 av_freep(&c
->alpPixBuf
[i
]);
1514 av_freep(&c
->alpPixBuf
);
1517 av_freep(&c
->vLumFilter
);
1518 av_freep(&c
->vChrFilter
);
1519 av_freep(&c
->hLumFilter
);
1520 av_freep(&c
->hChrFilter
);
1522 av_freep(&c
->vYCoeffsBank
);
1523 av_freep(&c
->vCCoeffsBank
);
1526 av_freep(&c
->vLumFilterPos
);
1527 av_freep(&c
->vChrFilterPos
);
1528 av_freep(&c
->hLumFilterPos
);
1529 av_freep(&c
->hChrFilterPos
);
1532 #ifdef MAP_ANONYMOUS
1533 if (c
->lumMmx2FilterCode
) munmap(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
);
1534 if (c
->chrMmx2FilterCode
) munmap(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
);
1535 #elif HAVE_VIRTUALALLOC
1536 if (c
->lumMmx2FilterCode
) VirtualFree(c
->lumMmx2FilterCode
, 0, MEM_RELEASE
);
1537 if (c
->chrMmx2FilterCode
) VirtualFree(c
->chrMmx2FilterCode
, 0, MEM_RELEASE
);
1539 av_free(c
->lumMmx2FilterCode
);
1540 av_free(c
->chrMmx2FilterCode
);
1542 c
->lumMmx2FilterCode
=NULL
;
1543 c
->chrMmx2FilterCode
=NULL
;
1544 #endif /* ARCH_X86 */
1546 av_freep(&c
->yuvTable
);
1551 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
,
1552 int srcW
, int srcH
, enum PixelFormat srcFormat
,
1553 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1554 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1556 static const double default_param
[2] = {SWS_PARAM_DEFAULT
, SWS_PARAM_DEFAULT
};
1559 param
= default_param
;
1562 (context
->srcW
!= srcW
||
1563 context
->srcH
!= srcH
||
1564 context
->srcFormat
!= srcFormat
||
1565 context
->dstW
!= dstW
||
1566 context
->dstH
!= dstH
||
1567 context
->dstFormat
!= dstFormat
||
1568 context
->flags
!= flags
||
1569 context
->param
[0] != param
[0] ||
1570 context
->param
[1] != param
[1])) {
1571 sws_freeContext(context
);
1576 return sws_getContext(srcW
, srcH
, srcFormat
,
1577 dstW
, dstH
, dstFormat
, flags
,
1578 srcFilter
, dstFilter
, param
);