2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
22 #define _DARWIN_C_SOURCE // needed for MAP_ANON
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
40 #include "swscale_internal.h"
42 #include "libavutil/intreadwrite.h"
43 #include "libavutil/x86_cpu.h"
44 #include "libavutil/avutil.h"
45 #include "libavutil/bswap.h"
46 #include "libavutil/pixdesc.h"
48 unsigned swscale_version(void)
50 return LIBSWSCALE_VERSION_INT
;
53 const char *swscale_configuration(void)
55 return FFMPEG_CONFIGURATION
;
58 const char *swscale_license(void)
60 #define LICENSE_PREFIX "libswscale license: "
61 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
64 #define RET 0xC3 //near return opcode for x86
66 #define isSupportedIn(x) ( \
67 (x)==PIX_FMT_YUV420P \
68 || (x)==PIX_FMT_YUVA420P \
69 || (x)==PIX_FMT_YUYV422 \
70 || (x)==PIX_FMT_UYVY422 \
71 || (x)==PIX_FMT_RGB48BE \
72 || (x)==PIX_FMT_RGB48LE \
73 || (x)==PIX_FMT_RGB32 \
74 || (x)==PIX_FMT_RGB32_1 \
75 || (x)==PIX_FMT_BGR24 \
76 || (x)==PIX_FMT_BGR565 \
77 || (x)==PIX_FMT_BGR555 \
78 || (x)==PIX_FMT_BGR32 \
79 || (x)==PIX_FMT_BGR32_1 \
80 || (x)==PIX_FMT_RGB24 \
81 || (x)==PIX_FMT_RGB565 \
82 || (x)==PIX_FMT_RGB555 \
83 || (x)==PIX_FMT_GRAY8 \
84 || (x)==PIX_FMT_Y400A \
85 || (x)==PIX_FMT_YUV410P \
86 || (x)==PIX_FMT_YUV440P \
87 || (x)==PIX_FMT_NV12 \
88 || (x)==PIX_FMT_NV21 \
89 || (x)==PIX_FMT_GRAY16BE \
90 || (x)==PIX_FMT_GRAY16LE \
91 || (x)==PIX_FMT_YUV444P \
92 || (x)==PIX_FMT_YUV422P \
93 || (x)==PIX_FMT_YUV411P \
94 || (x)==PIX_FMT_YUVJ420P \
95 || (x)==PIX_FMT_YUVJ422P \
96 || (x)==PIX_FMT_YUVJ440P \
97 || (x)==PIX_FMT_YUVJ444P \
98 || (x)==PIX_FMT_PAL8 \
99 || (x)==PIX_FMT_BGR8 \
100 || (x)==PIX_FMT_RGB8 \
101 || (x)==PIX_FMT_BGR4_BYTE \
102 || (x)==PIX_FMT_RGB4_BYTE \
103 || (x)==PIX_FMT_YUV440P \
104 || (x)==PIX_FMT_MONOWHITE \
105 || (x)==PIX_FMT_MONOBLACK \
106 || (x)==PIX_FMT_YUV420P16LE \
107 || (x)==PIX_FMT_YUV422P16LE \
108 || (x)==PIX_FMT_YUV444P16LE \
109 || (x)==PIX_FMT_YUV420P16BE \
110 || (x)==PIX_FMT_YUV422P16BE \
111 || (x)==PIX_FMT_YUV444P16BE \
114 int sws_isSupportedInput(enum PixelFormat pix_fmt
)
116 return isSupportedIn(pix_fmt
);
119 #define isSupportedOut(x) ( \
120 (x)==PIX_FMT_YUV420P \
121 || (x)==PIX_FMT_YUVA420P \
122 || (x)==PIX_FMT_YUYV422 \
123 || (x)==PIX_FMT_UYVY422 \
124 || (x)==PIX_FMT_YUV444P \
125 || (x)==PIX_FMT_YUV422P \
126 || (x)==PIX_FMT_YUV411P \
127 || (x)==PIX_FMT_YUVJ420P \
128 || (x)==PIX_FMT_YUVJ422P \
129 || (x)==PIX_FMT_YUVJ440P \
130 || (x)==PIX_FMT_YUVJ444P \
132 || (x)==PIX_FMT_NV12 \
133 || (x)==PIX_FMT_NV21 \
134 || (x)==PIX_FMT_GRAY16BE \
135 || (x)==PIX_FMT_GRAY16LE \
136 || (x)==PIX_FMT_GRAY8 \
137 || (x)==PIX_FMT_YUV410P \
138 || (x)==PIX_FMT_YUV440P \
139 || (x)==PIX_FMT_YUV420P16LE \
140 || (x)==PIX_FMT_YUV422P16LE \
141 || (x)==PIX_FMT_YUV444P16LE \
142 || (x)==PIX_FMT_YUV420P16BE \
143 || (x)==PIX_FMT_YUV422P16BE \
144 || (x)==PIX_FMT_YUV444P16BE \
147 int sws_isSupportedOutput(enum PixelFormat pix_fmt
)
149 return isSupportedOut(pix_fmt
);
152 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
154 const char *sws_format_name(enum PixelFormat format
)
156 if ((unsigned)format
< PIX_FMT_NB
&& av_pix_fmt_descriptors
[format
].name
)
157 return av_pix_fmt_descriptors
[format
].name
;
159 return "Unknown format";
162 static double getSplineCoeff(double a
, double b
, double c
, double d
, double dist
)
164 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
165 if (dist
<=1.0) return ((d
*dist
+ c
)*dist
+ b
)*dist
+a
;
166 else return getSplineCoeff( 0.0,
173 static int initFilter(int16_t **outFilter
, int16_t **filterPos
, int *outFilterSize
, int xInc
,
174 int srcW
, int dstW
, int filterAlign
, int one
, int flags
,
175 SwsVector
*srcFilter
, SwsVector
*dstFilter
, double param
[2])
181 int64_t *filter
=NULL
;
182 int64_t *filter2
=NULL
;
183 const int64_t fone
= 1LL<<54;
186 if (flags
& SWS_CPU_CAPS_MMX
)
187 __asm__
volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
190 // NOTE: the +1 is for the MMX scaler which reads over the end
191 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+1)*sizeof(int16_t), fail
);
193 if (FFABS(xInc
- 0x10000) <10) { // unscaled
196 FF_ALLOCZ_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
198 for (i
=0; i
<dstW
; i
++) {
199 filter
[i
*filterSize
]= fone
;
203 } else if (flags
&SWS_POINT
) { // lame looking point sampling mode
207 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
209 xDstInSrc
= xInc
/2 - 0x8000;
210 for (i
=0; i
<dstW
; i
++) {
211 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
217 } else if ((xInc
<= (1<<16) && (flags
&SWS_AREA
)) || (flags
&SWS_FAST_BILINEAR
)) { // bilinear upscale
221 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
223 xDstInSrc
= xInc
/2 - 0x8000;
224 for (i
=0; i
<dstW
; i
++) {
225 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
229 //bilinear upscale / linear interpolate / area averaging
230 for (j
=0; j
<filterSize
; j
++) {
231 int64_t coeff
= fone
- FFABS((xx
<<16) - xDstInSrc
)*(fone
>>16);
232 if (coeff
<0) coeff
=0;
233 filter
[i
*filterSize
+ j
]= coeff
;
242 if (flags
&SWS_BICUBIC
) sizeFactor
= 4;
243 else if (flags
&SWS_X
) sizeFactor
= 8;
244 else if (flags
&SWS_AREA
) sizeFactor
= 1; //downscale only, for upscale it is bilinear
245 else if (flags
&SWS_GAUSS
) sizeFactor
= 8; // infinite ;)
246 else if (flags
&SWS_LANCZOS
) sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2*param
[0]) : 6;
247 else if (flags
&SWS_SINC
) sizeFactor
= 20; // infinite ;)
248 else if (flags
&SWS_SPLINE
) sizeFactor
= 20; // infinite ;)
249 else if (flags
&SWS_BILINEAR
) sizeFactor
= 2;
251 sizeFactor
= 0; //GCC warning killer
255 if (xInc
<= 1<<16) filterSize
= 1 + sizeFactor
; // upscale
256 else filterSize
= 1 + (sizeFactor
*srcW
+ dstW
- 1)/ dstW
;
258 if (filterSize
> srcW
-2) filterSize
=srcW
-2;
260 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
262 xDstInSrc
= xInc
- 0x10000;
263 for (i
=0; i
<dstW
; i
++) {
264 int xx
= (xDstInSrc
- ((filterSize
-2)<<16)) / (1<<17);
267 for (j
=0; j
<filterSize
; j
++) {
268 int64_t d
= ((int64_t)FFABS((xx
<<17) - xDstInSrc
))<<13;
274 floatd
= d
* (1.0/(1<<30));
276 if (flags
& SWS_BICUBIC
) {
277 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1<<24);
278 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1<<24);
279 int64_t dd
= ( d
*d
)>>30;
280 int64_t ddd
= (dd
*d
)>>30;
283 coeff
= (12*(1<<24)-9*B
-6*C
)*ddd
+ (-18*(1<<24)+12*B
+6*C
)*dd
+ (6*(1<<24)-2*B
)*(1<<30);
284 else if (d
< 1LL<<31)
285 coeff
= (-B
-6*C
)*ddd
+ (6*B
+30*C
)*dd
+ (-12*B
-48*C
)*d
+ (8*B
+24*C
)*(1<<30);
288 coeff
*= fone
>>(30+24);
290 /* else if (flags & SWS_X) {
291 double p= param ? param*0.01 : 0.3;
292 coeff = d ? sin(d*M_PI)/(d*M_PI) : 1.0;
293 coeff*= pow(2.0, - p*d*d);
295 else if (flags
& SWS_X
) {
296 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
300 c
= cos(floatd
*M_PI
);
303 if (c
<0.0) c
= -pow(-c
, A
);
305 coeff
= (c
*0.5 + 0.5)*fone
;
306 } else if (flags
& SWS_AREA
) {
307 int64_t d2
= d
- (1<<29);
308 if (d2
*xInc
< -(1LL<<(29+16))) coeff
= 1.0 * (1LL<<(30+16));
309 else if (d2
*xInc
< (1LL<<(29+16))) coeff
= -d2
*xInc
+ (1LL<<(29+16));
311 coeff
*= fone
>>(30+16);
312 } else if (flags
& SWS_GAUSS
) {
313 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
314 coeff
= (pow(2.0, - p
*floatd
*floatd
))*fone
;
315 } else if (flags
& SWS_SINC
) {
316 coeff
= (d
? sin(floatd
*M_PI
)/(floatd
*M_PI
) : 1.0)*fone
;
317 } else if (flags
& SWS_LANCZOS
) {
318 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
319 coeff
= (d
? sin(floatd
*M_PI
)*sin(floatd
*M_PI
/p
)/(floatd
*floatd
*M_PI
*M_PI
/p
) : 1.0)*fone
;
320 if (floatd
>p
) coeff
=0;
321 } else if (flags
& SWS_BILINEAR
) {
323 if (coeff
<0) coeff
=0;
325 } else if (flags
& SWS_SPLINE
) {
326 double p
=-2.196152422706632;
327 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
-1.0, floatd
) * fone
;
329 coeff
= 0.0; //GCC warning killer
333 filter
[i
*filterSize
+ j
]= coeff
;
340 /* apply src & dst Filter to filter -> filter2
343 assert(filterSize
>0);
344 filter2Size
= filterSize
;
345 if (srcFilter
) filter2Size
+= srcFilter
->length
- 1;
346 if (dstFilter
) filter2Size
+= dstFilter
->length
- 1;
347 assert(filter2Size
>0);
348 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
*dstW
*sizeof(*filter2
), fail
);
350 for (i
=0; i
<dstW
; i
++) {
354 for (k
=0; k
<srcFilter
->length
; k
++) {
355 for (j
=0; j
<filterSize
; j
++)
356 filter2
[i
*filter2Size
+ k
+ j
] += srcFilter
->coeff
[k
]*filter
[i
*filterSize
+ j
];
359 for (j
=0; j
<filterSize
; j
++)
360 filter2
[i
*filter2Size
+ j
]= filter
[i
*filterSize
+ j
];
364 (*filterPos
)[i
]+= (filterSize
-1)/2 - (filter2Size
-1)/2;
368 /* try to reduce the filter-size (step1 find size and shift left) */
369 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
371 for (i
=dstW
-1; i
>=0; i
--) {
372 int min
= filter2Size
;
376 /* get rid of near zero elements on the left by shifting left */
377 for (j
=0; j
<filter2Size
; j
++) {
379 cutOff
+= FFABS(filter2
[i
*filter2Size
]);
381 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
383 /* preserve monotonicity because the core can't handle the filter otherwise */
384 if (i
<dstW
-1 && (*filterPos
)[i
] >= (*filterPos
)[i
+1]) break;
386 // move filter coefficients left
387 for (k
=1; k
<filter2Size
; k
++)
388 filter2
[i
*filter2Size
+ k
- 1]= filter2
[i
*filter2Size
+ k
];
389 filter2
[i
*filter2Size
+ k
- 1]= 0;
394 /* count near zeros on the right */
395 for (j
=filter2Size
-1; j
>0; j
--) {
396 cutOff
+= FFABS(filter2
[i
*filter2Size
+ j
]);
398 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
402 if (min
>minFilterSize
) minFilterSize
= min
;
405 if (flags
& SWS_CPU_CAPS_ALTIVEC
) {
406 // we can handle the special case 4,
407 // so we don't want to go to the full 8
408 if (minFilterSize
< 5)
411 // We really don't want to waste our time
412 // doing useless computation, so fall back on
413 // the scalar C code for very small filters.
414 // Vectorizing is worth it only if you have a
415 // decent-sized vector.
416 if (minFilterSize
< 3)
420 if (flags
& SWS_CPU_CAPS_MMX
) {
421 // special case for unscaled vertical filtering
422 if (minFilterSize
== 1 && filterAlign
== 2)
426 assert(minFilterSize
> 0);
427 filterSize
= (minFilterSize
+(filterAlign
-1)) & (~(filterAlign
-1));
428 assert(filterSize
> 0);
429 filter
= av_malloc(filterSize
*dstW
*sizeof(*filter
));
430 if (filterSize
>= MAX_FILTER_SIZE
*16/((flags
&SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
432 *outFilterSize
= filterSize
;
434 if (flags
&SWS_PRINT_INFO
)
435 av_log(NULL
, AV_LOG_VERBOSE
, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size
, filterSize
);
436 /* try to reduce the filter-size (step2 reduce it) */
437 for (i
=0; i
<dstW
; i
++) {
440 for (j
=0; j
<filterSize
; j
++) {
441 if (j
>=filter2Size
) filter
[i
*filterSize
+ j
]= 0;
442 else filter
[i
*filterSize
+ j
]= filter2
[i
*filter2Size
+ j
];
443 if((flags
& SWS_BITEXACT
) && j
>=minFilterSize
)
444 filter
[i
*filterSize
+ j
]= 0;
448 //FIXME try to align filterPos if possible
451 for (i
=0; i
<dstW
; i
++) {
453 if ((*filterPos
)[i
] < 0) {
454 // move filter coefficients left to compensate for filterPos
455 for (j
=1; j
<filterSize
; j
++) {
456 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
457 filter
[i
*filterSize
+ left
] += filter
[i
*filterSize
+ j
];
458 filter
[i
*filterSize
+ j
]=0;
463 if ((*filterPos
)[i
] + filterSize
> srcW
) {
464 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
465 // move filter coefficients right to compensate for filterPos
466 for (j
=filterSize
-2; j
>=0; j
--) {
467 int right
= FFMIN(j
+ shift
, filterSize
-1);
468 filter
[i
*filterSize
+right
] += filter
[i
*filterSize
+j
];
469 filter
[i
*filterSize
+j
]=0;
471 (*filterPos
)[i
]= srcW
- filterSize
;
475 // Note the +1 is for the MMX scaler which reads over the end
476 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
477 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
, *outFilterSize
*(dstW
+1)*sizeof(int16_t), fail
);
479 /* normalize & store in outFilter */
480 for (i
=0; i
<dstW
; i
++) {
485 for (j
=0; j
<filterSize
; j
++) {
486 sum
+= filter
[i
*filterSize
+ j
];
488 sum
= (sum
+ one
/2)/ one
;
489 for (j
=0; j
<*outFilterSize
; j
++) {
490 int64_t v
= filter
[i
*filterSize
+ j
] + error
;
491 int intV
= ROUNDED_DIV(v
, sum
);
492 (*outFilter
)[i
*(*outFilterSize
) + j
]= intV
;
497 (*filterPos
)[dstW
]= (*filterPos
)[dstW
-1]; // the MMX scaler will read over the end
498 for (i
=0; i
<*outFilterSize
; i
++) {
499 int j
= dstW
*(*outFilterSize
);
500 (*outFilter
)[j
+ i
]= (*outFilter
)[j
+ i
- (*outFilterSize
)];
510 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
511 static int initMMX2HScaler(int dstW
, int xInc
, uint8_t *filterCode
, int16_t *filter
, int32_t *filterPos
, int numSplits
)
514 x86_reg imm8OfPShufW1A
;
515 x86_reg imm8OfPShufW2A
;
516 x86_reg fragmentLengthA
;
518 x86_reg imm8OfPShufW1B
;
519 x86_reg imm8OfPShufW2B
;
520 x86_reg fragmentLengthB
;
525 // create an optimized horizontal scaling routine
526 /* This scaler is made of runtime-generated MMX2 code using specially
527 * tuned pshufw instructions. For every four output pixels, if four
528 * input pixels are enough for the fast bilinear scaling, then a chunk
529 * of fragmentB is used. If five input pixels are needed, then a chunk
530 * of fragmentA is used.
539 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
540 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
541 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
542 "punpcklbw %%mm7, %%mm1 \n\t"
543 "punpcklbw %%mm7, %%mm0 \n\t"
544 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
546 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
548 "psubw %%mm1, %%mm0 \n\t"
549 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
550 "pmullw %%mm3, %%mm0 \n\t"
551 "psllw $7, %%mm1 \n\t"
552 "paddw %%mm1, %%mm0 \n\t"
554 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
556 "add $8, %%"REG_a
" \n\t"
560 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
561 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
562 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
567 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
571 :"=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
572 "=r" (fragmentLengthA
)
579 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
580 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
581 "punpcklbw %%mm7, %%mm0 \n\t"
582 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
584 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
586 "psubw %%mm1, %%mm0 \n\t"
587 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
588 "pmullw %%mm3, %%mm0 \n\t"
589 "psllw $7, %%mm1 \n\t"
590 "paddw %%mm1, %%mm0 \n\t"
592 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
594 "add $8, %%"REG_a
" \n\t"
598 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
599 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
600 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
605 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
609 :"=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
610 "=r" (fragmentLengthB
)
613 xpos
= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
616 for (i
=0; i
<dstW
/numSplits
; i
++) {
621 int b
=((xpos
+xInc
)>>16) - xx
;
622 int c
=((xpos
+xInc
*2)>>16) - xx
;
623 int d
=((xpos
+xInc
*3)>>16) - xx
;
625 uint8_t *fragment
= (d
+1<4) ? fragmentB
: fragmentA
;
626 x86_reg imm8OfPShufW1
= (d
+1<4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
627 x86_reg imm8OfPShufW2
= (d
+1<4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
628 x86_reg fragmentLength
= (d
+1<4) ? fragmentLengthB
: fragmentLengthA
;
629 int maxShift
= 3-(d
+inc
);
633 filter
[i
] = (( xpos
& 0xFFFF) ^ 0xFFFF)>>9;
634 filter
[i
+1] = (((xpos
+xInc
) & 0xFFFF) ^ 0xFFFF)>>9;
635 filter
[i
+2] = (((xpos
+xInc
*2) & 0xFFFF) ^ 0xFFFF)>>9;
636 filter
[i
+3] = (((xpos
+xInc
*3) & 0xFFFF) ^ 0xFFFF)>>9;
639 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
641 filterCode
[fragmentPos
+ imm8OfPShufW1
]=
642 (a
+inc
) | ((b
+inc
)<<2) | ((c
+inc
)<<4) | ((d
+inc
)<<6);
643 filterCode
[fragmentPos
+ imm8OfPShufW2
]=
644 a
| (b
<<2) | (c
<<4) | (d
<<6);
646 if (i
+4-inc
>=dstW
) shift
=maxShift
; //avoid overread
647 else if ((filterPos
[i
/2]&3) <= maxShift
) shift
=filterPos
[i
/2]&3; //Align
649 if (shift
&& i
>=shift
) {
650 filterCode
[fragmentPos
+ imm8OfPShufW1
]+= 0x55*shift
;
651 filterCode
[fragmentPos
+ imm8OfPShufW2
]+= 0x55*shift
;
652 filterPos
[i
/2]-=shift
;
656 fragmentPos
+= fragmentLength
;
659 filterCode
[fragmentPos
]= RET
;
664 filterPos
[((i
/2)+1)&(~1)]= xpos
>>16; // needed to jump to the next part
666 return fragmentPos
+ 1;
668 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
670 static void getSubSampleFactors(int *h
, int *v
, enum PixelFormat format
)
672 *h
= av_pix_fmt_descriptors
[format
].log2_chroma_w
;
673 *v
= av_pix_fmt_descriptors
[format
].log2_chroma_h
;
676 static int update_flags_cpu(int flags
);
678 int sws_setColorspaceDetails(SwsContext
*c
, const int inv_table
[4], int srcRange
, const int table
[4], int dstRange
, int brightness
, int contrast
, int saturation
)
680 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int)*4);
681 memcpy(c
->dstColorspaceTable
, table
, sizeof(int)*4);
683 c
->brightness
= brightness
;
684 c
->contrast
= contrast
;
685 c
->saturation
= saturation
;
686 c
->srcRange
= srcRange
;
687 c
->dstRange
= dstRange
;
688 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
690 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[c
->dstFormat
]);
691 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[c
->srcFormat
]);
692 c
->flags
= update_flags_cpu(c
->flags
);
694 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
, contrast
, saturation
);
698 if (c
->flags
& SWS_CPU_CAPS_ALTIVEC
)
699 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
, contrast
, saturation
);
704 int sws_getColorspaceDetails(SwsContext
*c
, int **inv_table
, int *srcRange
, int **table
, int *dstRange
, int *brightness
, int *contrast
, int *saturation
)
706 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
708 *inv_table
= c
->srcColorspaceTable
;
709 *table
= c
->dstColorspaceTable
;
710 *srcRange
= c
->srcRange
;
711 *dstRange
= c
->dstRange
;
712 *brightness
= c
->brightness
;
713 *contrast
= c
->contrast
;
714 *saturation
= c
->saturation
;
719 static int handle_jpeg(enum PixelFormat
*format
)
722 case PIX_FMT_YUVJ420P
: *format
= PIX_FMT_YUV420P
; return 1;
723 case PIX_FMT_YUVJ422P
: *format
= PIX_FMT_YUV422P
; return 1;
724 case PIX_FMT_YUVJ444P
: *format
= PIX_FMT_YUV444P
; return 1;
725 case PIX_FMT_YUVJ440P
: *format
= PIX_FMT_YUV440P
; return 1;
730 static int update_flags_cpu(int flags
)
732 #if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
733 flags
&= ~( SWS_CPU_CAPS_MMX
737 |SWS_CPU_CAPS_ALTIVEC
739 flags
|= ff_hardcodedcpuflags();
740 #endif /* CONFIG_RUNTIME_CPUDETECT */
744 SwsContext
*sws_alloc_context(void)
746 SwsContext
*c
= av_mallocz(sizeof(SwsContext
));
748 c
->av_class
= &sws_context_class
;
753 int sws_init_context(SwsContext
*c
, SwsFilter
*srcFilter
, SwsFilter
*dstFilter
)
756 int usesVFilter
, usesHFilter
;
758 SwsFilter dummyFilter
= {NULL
, NULL
, NULL
, NULL
};
764 enum PixelFormat srcFormat
= c
->srcFormat
;
765 enum PixelFormat dstFormat
= c
->dstFormat
;
767 flags
= c
->flags
= update_flags_cpu(c
->flags
);
769 if (flags
& SWS_CPU_CAPS_MMX
)
770 __asm__
volatile("emms\n\t"::: "memory");
772 if (!rgb15to16
) sws_rgb2rgb_init(flags
);
774 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
776 if (!isSupportedIn(srcFormat
)) {
777 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat
));
778 return AVERROR(EINVAL
);
780 if (!isSupportedOut(dstFormat
)) {
781 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat
));
782 return AVERROR(EINVAL
);
785 i
= flags
& ( SWS_POINT
796 if(!i
|| (i
& (i
-1))) {
797 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Exactly one scaler algorithm must be chosen\n");
798 return AVERROR(EINVAL
);
801 if (srcW
<4 || srcH
<1 || dstW
<8 || dstH
<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
802 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
803 srcW
, srcH
, dstW
, dstH
);
804 return AVERROR(EINVAL
);
806 if(srcW
> VOFW
|| dstW
> VOFW
) {
807 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW
)" change VOF/VOFW and recompile\n");
808 return AVERROR(EINVAL
);
811 if (!dstFilter
) dstFilter
= &dummyFilter
;
812 if (!srcFilter
) srcFilter
= &dummyFilter
;
814 c
->lumXInc
= ((srcW
<<16) + (dstW
>>1))/dstW
;
815 c
->lumYInc
= ((srcH
<<16) + (dstH
>>1))/dstH
;
816 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[dstFormat
]);
817 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[srcFormat
]);
818 c
->vRounder
= 4* 0x0001000100010001ULL
;
820 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
>1) ||
821 (srcFilter
->chrV
&& srcFilter
->chrV
->length
>1) ||
822 (dstFilter
->lumV
&& dstFilter
->lumV
->length
>1) ||
823 (dstFilter
->chrV
&& dstFilter
->chrV
->length
>1);
824 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
>1) ||
825 (srcFilter
->chrH
&& srcFilter
->chrH
->length
>1) ||
826 (dstFilter
->lumH
&& dstFilter
->lumH
->length
>1) ||
827 (dstFilter
->chrH
&& dstFilter
->chrH
->length
>1);
829 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
830 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
832 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
833 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) c
->chrDstHSubSample
=1;
835 // drop some chroma lines if the user wants it
836 c
->vChrDrop
= (flags
&SWS_SRC_V_CHR_DROP_MASK
)>>SWS_SRC_V_CHR_DROP_SHIFT
;
837 c
->chrSrcVSubSample
+= c
->vChrDrop
;
839 // drop every other pixel for chroma calculation unless user wants full chroma
840 if (isAnyRGB(srcFormat
) && !(flags
&SWS_FULL_CHR_H_INP
)
841 && srcFormat
!=PIX_FMT_RGB8
&& srcFormat
!=PIX_FMT_BGR8
842 && srcFormat
!=PIX_FMT_RGB4
&& srcFormat
!=PIX_FMT_BGR4
843 && srcFormat
!=PIX_FMT_RGB4_BYTE
&& srcFormat
!=PIX_FMT_BGR4_BYTE
844 && ((dstW
>>c
->chrDstHSubSample
) <= (srcW
>>1) || (flags
&SWS_FAST_BILINEAR
)))
845 c
->chrSrcHSubSample
=1;
847 // Note the -((-x)>>y) is so that we always round toward +inf.
848 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
849 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
850 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
851 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
853 /* unscaled special cases */
854 if (unscaled
&& !usesHFilter
&& !usesVFilter
&& (c
->srcRange
== c
->dstRange
|| isAnyRGB(dstFormat
))) {
855 ff_get_unscaled_swscale(c
);
858 if (flags
&SWS_PRINT_INFO
)
859 av_log(c
, AV_LOG_INFO
, "using unscaled %s -> %s special converter\n",
860 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
865 if (flags
& SWS_CPU_CAPS_MMX2
) {
866 c
->canMMX2BeUsed
= (dstW
>=srcW
&& (dstW
&31)==0 && (srcW
&15)==0) ? 1 : 0;
867 if (!c
->canMMX2BeUsed
&& dstW
>=srcW
&& (srcW
&15)==0 && (flags
&SWS_FAST_BILINEAR
)) {
868 if (flags
&SWS_PRINT_INFO
)
869 av_log(c
, AV_LOG_INFO
, "output width is not a multiple of 32 -> no MMX2 scaler\n");
871 if (usesHFilter
) c
->canMMX2BeUsed
=0;
876 c
->chrXInc
= ((c
->chrSrcW
<<16) + (c
->chrDstW
>>1))/c
->chrDstW
;
877 c
->chrYInc
= ((c
->chrSrcH
<<16) + (c
->chrDstH
>>1))/c
->chrDstH
;
879 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
880 // but only for the FAST_BILINEAR mode otherwise do correct scaling
881 // n-2 is the last chrominance sample available
882 // this is not perfect, but no one should notice the difference, the more correct variant
883 // would be like the vertical one, but that would require some special code for the
884 // first and last pixel
885 if (flags
&SWS_FAST_BILINEAR
) {
886 if (c
->canMMX2BeUsed
) {
890 //we don't use the x86 asm scaler if MMX is available
891 else if (flags
& SWS_CPU_CAPS_MMX
) {
892 c
->lumXInc
= ((srcW
-2)<<16)/(dstW
-2) - 20;
893 c
->chrXInc
= ((c
->chrSrcW
-2)<<16)/(c
->chrDstW
-2) - 20;
897 /* precalculate horizontal scaler filter coefficients */
899 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
900 // can't downscale !!!
901 if (c
->canMMX2BeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
902 c
->lumMmx2FilterCodeSize
= initMMX2HScaler( dstW
, c
->lumXInc
, NULL
, NULL
, NULL
, 8);
903 c
->chrMmx2FilterCodeSize
= initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, NULL
, NULL
, NULL
, 4);
906 c
->lumMmx2FilterCode
= mmap(NULL
, c
->lumMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
907 c
->chrMmx2FilterCode
= mmap(NULL
, c
->chrMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
908 #elif HAVE_VIRTUALALLOC
909 c
->lumMmx2FilterCode
= VirtualAlloc(NULL
, c
->lumMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
910 c
->chrMmx2FilterCode
= VirtualAlloc(NULL
, c
->chrMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
912 c
->lumMmx2FilterCode
= av_malloc(c
->lumMmx2FilterCodeSize
);
913 c
->chrMmx2FilterCode
= av_malloc(c
->chrMmx2FilterCodeSize
);
916 if (!c
->lumMmx2FilterCode
|| !c
->chrMmx2FilterCode
)
917 return AVERROR(ENOMEM
);
918 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/8+8)*sizeof(int16_t), fail
);
919 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/4+8)*sizeof(int16_t), fail
);
920 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/2/8+8)*sizeof(int32_t), fail
);
921 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/2/4+8)*sizeof(int32_t), fail
);
923 initMMX2HScaler( dstW
, c
->lumXInc
, c
->lumMmx2FilterCode
, c
->hLumFilter
, c
->hLumFilterPos
, 8);
924 initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, c
->chrMmx2FilterCode
, c
->hChrFilter
, c
->hChrFilterPos
, 4);
927 mprotect(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
928 mprotect(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
931 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
933 const int filterAlign
=
934 (flags
& SWS_CPU_CAPS_MMX
) ? 4 :
935 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
938 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
, &c
->hLumFilterSize
, c
->lumXInc
,
939 srcW
, dstW
, filterAlign
, 1<<14,
940 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
941 srcFilter
->lumH
, dstFilter
->lumH
, c
->param
) < 0)
943 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
, &c
->hChrFilterSize
, c
->chrXInc
,
944 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1<<14,
945 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
946 srcFilter
->chrH
, dstFilter
->chrH
, c
->param
) < 0)
949 } // initialize horizontal stuff
951 /* precalculate vertical scaler filter coefficients */
953 const int filterAlign
=
954 (flags
& SWS_CPU_CAPS_MMX
) && (flags
& SWS_ACCURATE_RND
) ? 2 :
955 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
958 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
, c
->lumYInc
,
959 srcH
, dstH
, filterAlign
, (1<<12),
960 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
961 srcFilter
->lumV
, dstFilter
->lumV
, c
->param
) < 0)
963 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
, c
->chrYInc
,
964 c
->chrSrcH
, c
->chrDstH
, filterAlign
, (1<<12),
965 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
966 srcFilter
->chrV
, dstFilter
->chrV
, c
->param
) < 0)
970 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof (vector
signed short)*c
->vLumFilterSize
*c
->dstH
, fail
);
971 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof (vector
signed short)*c
->vChrFilterSize
*c
->chrDstH
, fail
);
973 for (i
=0;i
<c
->vLumFilterSize
*c
->dstH
;i
++) {
975 short *p
= (short *)&c
->vYCoeffsBank
[i
];
977 p
[j
] = c
->vLumFilter
[i
];
980 for (i
=0;i
<c
->vChrFilterSize
*c
->chrDstH
;i
++) {
982 short *p
= (short *)&c
->vCCoeffsBank
[i
];
984 p
[j
] = c
->vChrFilter
[i
];
989 // calculate buffer sizes so that they won't run out while handling these damn slices
990 c
->vLumBufSize
= c
->vLumFilterSize
;
991 c
->vChrBufSize
= c
->vChrFilterSize
;
992 for (i
=0; i
<dstH
; i
++) {
993 int chrI
= i
*c
->chrDstH
/ dstH
;
994 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
995 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)<<c
->chrSrcVSubSample
));
997 nextSlice
>>= c
->chrSrcVSubSample
;
998 nextSlice
<<= c
->chrSrcVSubSample
;
999 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1000 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1001 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
< (nextSlice
>>c
->chrSrcVSubSample
))
1002 c
->vChrBufSize
= (nextSlice
>>c
->chrSrcVSubSample
) - c
->vChrFilterPos
[chrI
];
1005 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
1006 // allocate several megabytes to handle all possible cases)
1007 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1008 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
, c
->vChrBufSize
*2*sizeof(int16_t*), fail
);
1009 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1010 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1011 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
1012 /* align at 16 bytes for AltiVec */
1013 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1014 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1015 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+c
->vLumBufSize
];
1017 for (i
=0; i
<c
->vChrBufSize
; i
++) {
1018 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
[i
+c
->vChrBufSize
], (VOF
+1)*2, fail
);
1019 c
->chrPixBuf
[i
] = c
->chrPixBuf
[i
+c
->vChrBufSize
];
1021 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1022 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1023 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1024 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+c
->vLumBufSize
];
1027 //try to avoid drawing green stuff between the right end and the stride end
1028 for (i
=0; i
<c
->vChrBufSize
; i
++) memset(c
->chrPixBuf
[i
], 64, (VOF
+1)*2);
1030 assert(2*VOFW
== VOF
);
1032 assert(c
->chrDstH
<= dstH
);
1034 if (flags
&SWS_PRINT_INFO
) {
1035 if (flags
&SWS_FAST_BILINEAR
)
1036 av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1037 else if (flags
&SWS_BILINEAR
)
1038 av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1039 else if (flags
&SWS_BICUBIC
)
1040 av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1041 else if (flags
&SWS_X
)
1042 av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1043 else if (flags
&SWS_POINT
)
1044 av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1045 else if (flags
&SWS_AREA
)
1046 av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1047 else if (flags
&SWS_BICUBLIN
)
1048 av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1049 else if (flags
&SWS_GAUSS
)
1050 av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1051 else if (flags
&SWS_SINC
)
1052 av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1053 else if (flags
&SWS_LANCZOS
)
1054 av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1055 else if (flags
&SWS_SPLINE
)
1056 av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1058 av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1060 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1061 sws_format_name(srcFormat
),
1063 dstFormat
== PIX_FMT_BGR555
|| dstFormat
== PIX_FMT_BGR565
||
1064 dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1065 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
? "dithered " : "",
1069 sws_format_name(dstFormat
));
1071 if (flags
& SWS_CPU_CAPS_MMX2
)
1072 av_log(c
, AV_LOG_INFO
, "using MMX2\n");
1073 else if (flags
& SWS_CPU_CAPS_3DNOW
)
1074 av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1075 else if (flags
& SWS_CPU_CAPS_MMX
)
1076 av_log(c
, AV_LOG_INFO
, "using MMX\n");
1077 else if (flags
& SWS_CPU_CAPS_ALTIVEC
)
1078 av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1080 av_log(c
, AV_LOG_INFO
, "using C\n");
1082 if (flags
& SWS_CPU_CAPS_MMX
) {
1083 if (c
->canMMX2BeUsed
&& (flags
&SWS_FAST_BILINEAR
))
1084 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
1086 if (c
->hLumFilterSize
==4)
1087 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal luminance scaling\n");
1088 else if (c
->hLumFilterSize
==8)
1089 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal luminance scaling\n");
1091 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal luminance scaling\n");
1093 if (c
->hChrFilterSize
==4)
1094 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
1095 else if (c
->hChrFilterSize
==8)
1096 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
1098 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal chrominance scaling\n");
1102 av_log(c
, AV_LOG_VERBOSE
, "using x86 asm scaler for horizontal scaling\n");
1104 if (flags
& SWS_FAST_BILINEAR
)
1105 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR C scaler for horizontal scaling\n");
1107 av_log(c
, AV_LOG_VERBOSE
, "using C scaler for horizontal scaling\n");
1110 if (isPlanarYUV(dstFormat
)) {
1111 if (c
->vLumFilterSize
==1)
1112 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1114 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1116 if (c
->vLumFilterSize
==1 && c
->vChrFilterSize
==2)
1117 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
1118 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1119 else if (c
->vLumFilterSize
==2 && c
->vChrFilterSize
==2)
1120 av_log(c
, AV_LOG_VERBOSE
, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1122 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1125 if (dstFormat
==PIX_FMT_BGR24
)
1126 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR24 converter\n",
1127 (flags
& SWS_CPU_CAPS_MMX2
) ? "MMX2" : ((flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C"));
1128 else if (dstFormat
==PIX_FMT_RGB32
)
1129 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR32 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1130 else if (dstFormat
==PIX_FMT_BGR565
)
1131 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR16 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1132 else if (dstFormat
==PIX_FMT_BGR555
)
1133 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR15 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1134 else if (dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1135 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
)
1136 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR12 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1138 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1139 av_log(c
, AV_LOG_DEBUG
, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1140 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1141 av_log(c
, AV_LOG_DEBUG
, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1142 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
, c
->chrXInc
, c
->chrYInc
);
1145 c
->swScale
= ff_getSwsFunc(c
);
1147 fail
: //FIXME replace things by appropriate error codes
1151 #if FF_API_SWS_GETCONTEXT
1152 SwsContext
*sws_getContext(int srcW
, int srcH
, enum PixelFormat srcFormat
,
1153 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1154 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1158 if(!(c
=sws_alloc_context()))
1166 c
->srcRange
= handle_jpeg(&srcFormat
);
1167 c
->dstRange
= handle_jpeg(&dstFormat
);
1168 c
->srcFormat
= srcFormat
;
1169 c
->dstFormat
= dstFormat
;
1172 c
->param
[0] = param
[0];
1173 c
->param
[1] = param
[1];
1176 c
->param
[1] = SWS_PARAM_DEFAULT
;
1178 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], c
->srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, c
->dstRange
, 0, 1<<16, 1<<16);
1180 if(sws_init_context(c
, srcFilter
, dstFilter
) < 0){
1189 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1190 float lumaSharpen
, float chromaSharpen
,
1191 float chromaHShift
, float chromaVShift
,
1194 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1198 if (lumaGBlur
!=0.0) {
1199 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1200 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1202 filter
->lumH
= sws_getIdentityVec();
1203 filter
->lumV
= sws_getIdentityVec();
1206 if (chromaGBlur
!=0.0) {
1207 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1208 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1210 filter
->chrH
= sws_getIdentityVec();
1211 filter
->chrV
= sws_getIdentityVec();
1214 if (chromaSharpen
!=0.0) {
1215 SwsVector
*id
= sws_getIdentityVec();
1216 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1217 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1218 sws_addVec(filter
->chrH
, id
);
1219 sws_addVec(filter
->chrV
, id
);
1223 if (lumaSharpen
!=0.0) {
1224 SwsVector
*id
= sws_getIdentityVec();
1225 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1226 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1227 sws_addVec(filter
->lumH
, id
);
1228 sws_addVec(filter
->lumV
, id
);
1232 if (chromaHShift
!= 0.0)
1233 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+0.5));
1235 if (chromaVShift
!= 0.0)
1236 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+0.5));
1238 sws_normalizeVec(filter
->chrH
, 1.0);
1239 sws_normalizeVec(filter
->chrV
, 1.0);
1240 sws_normalizeVec(filter
->lumH
, 1.0);
1241 sws_normalizeVec(filter
->lumV
, 1.0);
1243 if (verbose
) sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1244 if (verbose
) sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1249 SwsVector
*sws_allocVec(int length
)
1251 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1254 vec
->length
= length
;
1255 vec
->coeff
= av_malloc(sizeof(double) * length
);
1261 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1263 const int length
= (int)(variance
*quality
+ 0.5) | 1;
1265 double middle
= (length
-1)*0.5;
1266 SwsVector
*vec
= sws_allocVec(length
);
1271 for (i
=0; i
<length
; i
++) {
1272 double dist
= i
-middle
;
1273 vec
->coeff
[i
]= exp(-dist
*dist
/(2*variance
*variance
)) / sqrt(2*variance
*M_PI
);
1276 sws_normalizeVec(vec
, 1.0);
1281 SwsVector
*sws_getConstVec(double c
, int length
)
1284 SwsVector
*vec
= sws_allocVec(length
);
1289 for (i
=0; i
<length
; i
++)
1295 SwsVector
*sws_getIdentityVec(void)
1297 return sws_getConstVec(1.0, 1);
1300 static double sws_dcVec(SwsVector
*a
)
1305 for (i
=0; i
<a
->length
; i
++)
1311 void sws_scaleVec(SwsVector
*a
, double scalar
)
1315 for (i
=0; i
<a
->length
; i
++)
1316 a
->coeff
[i
]*= scalar
;
1319 void sws_normalizeVec(SwsVector
*a
, double height
)
1321 sws_scaleVec(a
, height
/sws_dcVec(a
));
1324 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1326 int length
= a
->length
+ b
->length
- 1;
1328 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1333 for (i
=0; i
<a
->length
; i
++) {
1334 for (j
=0; j
<b
->length
; j
++) {
1335 vec
->coeff
[i
+j
]+= a
->coeff
[i
]*b
->coeff
[j
];
1342 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1344 int length
= FFMAX(a
->length
, b
->length
);
1346 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1351 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1352 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]+= b
->coeff
[i
];
1357 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1359 int length
= FFMAX(a
->length
, b
->length
);
1361 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1366 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1367 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]-= b
->coeff
[i
];
1372 /* shift left / or right if "shift" is negative */
1373 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1375 int length
= a
->length
+ FFABS(shift
)*2;
1377 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1382 for (i
=0; i
<a
->length
; i
++) {
1383 vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2 - shift
]= a
->coeff
[i
];
1389 void sws_shiftVec(SwsVector
*a
, int shift
)
1391 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1393 a
->coeff
= shifted
->coeff
;
1394 a
->length
= shifted
->length
;
1398 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1400 SwsVector
*sum
= sws_sumVec(a
, b
);
1402 a
->coeff
= sum
->coeff
;
1403 a
->length
= sum
->length
;
1407 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1409 SwsVector
*diff
= sws_diffVec(a
, b
);
1411 a
->coeff
= diff
->coeff
;
1412 a
->length
= diff
->length
;
1416 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1418 SwsVector
*conv
= sws_getConvVec(a
, b
);
1420 a
->coeff
= conv
->coeff
;
1421 a
->length
= conv
->length
;
1425 SwsVector
*sws_cloneVec(SwsVector
*a
)
1428 SwsVector
*vec
= sws_allocVec(a
->length
);
1433 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
]= a
->coeff
[i
];
1438 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1445 for (i
=0; i
<a
->length
; i
++)
1446 if (a
->coeff
[i
]>max
) max
= a
->coeff
[i
];
1448 for (i
=0; i
<a
->length
; i
++)
1449 if (a
->coeff
[i
]<min
) min
= a
->coeff
[i
];
1453 for (i
=0; i
<a
->length
; i
++) {
1454 int x
= (int)((a
->coeff
[i
]-min
)*60.0/range
+0.5);
1455 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1456 for (;x
>0; x
--) av_log(log_ctx
, log_level
, " ");
1457 av_log(log_ctx
, log_level
, "|\n");
1461 #if LIBSWSCALE_VERSION_MAJOR < 1
1462 void sws_printVec(SwsVector
*a
)
1464 sws_printVec2(a
, NULL
, AV_LOG_DEBUG
);
1468 void sws_freeVec(SwsVector
*a
)
1471 av_freep(&a
->coeff
);
1476 void sws_freeFilter(SwsFilter
*filter
)
1478 if (!filter
) return;
1480 if (filter
->lumH
) sws_freeVec(filter
->lumH
);
1481 if (filter
->lumV
) sws_freeVec(filter
->lumV
);
1482 if (filter
->chrH
) sws_freeVec(filter
->chrH
);
1483 if (filter
->chrV
) sws_freeVec(filter
->chrV
);
1487 void sws_freeContext(SwsContext
*c
)
1493 for (i
=0; i
<c
->vLumBufSize
; i
++)
1494 av_freep(&c
->lumPixBuf
[i
]);
1495 av_freep(&c
->lumPixBuf
);
1499 for (i
=0; i
<c
->vChrBufSize
; i
++)
1500 av_freep(&c
->chrPixBuf
[i
]);
1501 av_freep(&c
->chrPixBuf
);
1504 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1505 for (i
=0; i
<c
->vLumBufSize
; i
++)
1506 av_freep(&c
->alpPixBuf
[i
]);
1507 av_freep(&c
->alpPixBuf
);
1510 av_freep(&c
->vLumFilter
);
1511 av_freep(&c
->vChrFilter
);
1512 av_freep(&c
->hLumFilter
);
1513 av_freep(&c
->hChrFilter
);
1515 av_freep(&c
->vYCoeffsBank
);
1516 av_freep(&c
->vCCoeffsBank
);
1519 av_freep(&c
->vLumFilterPos
);
1520 av_freep(&c
->vChrFilterPos
);
1521 av_freep(&c
->hLumFilterPos
);
1522 av_freep(&c
->hChrFilterPos
);
1525 #ifdef MAP_ANONYMOUS
1526 if (c
->lumMmx2FilterCode
) munmap(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
);
1527 if (c
->chrMmx2FilterCode
) munmap(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
);
1528 #elif HAVE_VIRTUALALLOC
1529 if (c
->lumMmx2FilterCode
) VirtualFree(c
->lumMmx2FilterCode
, 0, MEM_RELEASE
);
1530 if (c
->chrMmx2FilterCode
) VirtualFree(c
->chrMmx2FilterCode
, 0, MEM_RELEASE
);
1532 av_free(c
->lumMmx2FilterCode
);
1533 av_free(c
->chrMmx2FilterCode
);
1535 c
->lumMmx2FilterCode
=NULL
;
1536 c
->chrMmx2FilterCode
=NULL
;
1537 #endif /* ARCH_X86 */
1539 av_freep(&c
->yuvTable
);
1544 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
,
1545 int srcW
, int srcH
, enum PixelFormat srcFormat
,
1546 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1547 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1549 static const double default_param
[2] = {SWS_PARAM_DEFAULT
, SWS_PARAM_DEFAULT
};
1552 param
= default_param
;
1554 flags
= update_flags_cpu(flags
);
1557 (context
->srcW
!= srcW
||
1558 context
->srcH
!= srcH
||
1559 context
->srcFormat
!= srcFormat
||
1560 context
->dstW
!= dstW
||
1561 context
->dstH
!= dstH
||
1562 context
->dstFormat
!= dstFormat
||
1563 context
->flags
!= flags
||
1564 context
->param
[0] != param
[0] ||
1565 context
->param
[1] != param
[1])) {
1566 sws_freeContext(context
);
1571 if (!(context
= sws_alloc_context()))
1573 context
->srcW
= srcW
;
1574 context
->srcH
= srcH
;
1575 context
->srcFormat
= srcFormat
;
1576 context
->dstFormat
= dstFormat
;
1577 context
->flags
= flags
;
1578 context
->param
[0] = param
[0];
1579 context
->param
[1] = param
[1];
1580 if (sws_init_context(context
, srcFilter
, dstFilter
) < 0) {
1581 sws_freeContext(context
);