2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * the C code (not assembly, mmx, ...) of this file can be used
21 * under the LGPL license too
24 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
33 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
34 #define MAP_ANONYMOUS MAP_ANON
38 #define WIN32_LEAN_AND_MEAN
42 #include "swscale_internal.h"
44 #include "libavutil/intreadwrite.h"
45 #include "libavutil/x86_cpu.h"
46 #include "libavutil/avutil.h"
47 #include "libavutil/bswap.h"
48 #include "libavutil/pixdesc.h"
50 unsigned swscale_version(void)
52 return LIBSWSCALE_VERSION_INT
;
55 const char *swscale_configuration(void)
57 return FFMPEG_CONFIGURATION
;
60 const char *swscale_license(void)
62 #define LICENSE_PREFIX "libswscale license: "
63 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
66 #define RET 0xC3 //near return opcode for x86
68 #define isSupportedIn(x) ( \
69 (x)==PIX_FMT_YUV420P \
70 || (x)==PIX_FMT_YUVA420P \
71 || (x)==PIX_FMT_YUYV422 \
72 || (x)==PIX_FMT_UYVY422 \
73 || (x)==PIX_FMT_RGB48BE \
74 || (x)==PIX_FMT_RGB48LE \
75 || (x)==PIX_FMT_RGB32 \
76 || (x)==PIX_FMT_RGB32_1 \
77 || (x)==PIX_FMT_BGR24 \
78 || (x)==PIX_FMT_BGR565 \
79 || (x)==PIX_FMT_BGR555 \
80 || (x)==PIX_FMT_BGR32 \
81 || (x)==PIX_FMT_BGR32_1 \
82 || (x)==PIX_FMT_RGB24 \
83 || (x)==PIX_FMT_RGB565 \
84 || (x)==PIX_FMT_RGB555 \
85 || (x)==PIX_FMT_GRAY8 \
86 || (x)==PIX_FMT_YUV410P \
87 || (x)==PIX_FMT_YUV440P \
88 || (x)==PIX_FMT_NV12 \
89 || (x)==PIX_FMT_NV21 \
90 || (x)==PIX_FMT_GRAY16BE \
91 || (x)==PIX_FMT_GRAY16LE \
92 || (x)==PIX_FMT_YUV444P \
93 || (x)==PIX_FMT_YUV422P \
94 || (x)==PIX_FMT_YUV411P \
95 || (x)==PIX_FMT_YUVJ420P \
96 || (x)==PIX_FMT_YUVJ422P \
97 || (x)==PIX_FMT_YUVJ440P \
98 || (x)==PIX_FMT_YUVJ444P \
99 || (x)==PIX_FMT_PAL8 \
100 || (x)==PIX_FMT_BGR8 \
101 || (x)==PIX_FMT_RGB8 \
102 || (x)==PIX_FMT_BGR4_BYTE \
103 || (x)==PIX_FMT_RGB4_BYTE \
104 || (x)==PIX_FMT_YUV440P \
105 || (x)==PIX_FMT_MONOWHITE \
106 || (x)==PIX_FMT_MONOBLACK \
107 || (x)==PIX_FMT_YUV420P16LE \
108 || (x)==PIX_FMT_YUV422P16LE \
109 || (x)==PIX_FMT_YUV444P16LE \
110 || (x)==PIX_FMT_YUV420P16BE \
111 || (x)==PIX_FMT_YUV422P16BE \
112 || (x)==PIX_FMT_YUV444P16BE \
115 int sws_isSupportedInput(enum PixelFormat pix_fmt
)
117 return isSupportedIn(pix_fmt
);
120 #define isSupportedOut(x) ( \
121 (x)==PIX_FMT_YUV420P \
122 || (x)==PIX_FMT_YUVA420P \
123 || (x)==PIX_FMT_YUYV422 \
124 || (x)==PIX_FMT_UYVY422 \
125 || (x)==PIX_FMT_YUV444P \
126 || (x)==PIX_FMT_YUV422P \
127 || (x)==PIX_FMT_YUV411P \
128 || (x)==PIX_FMT_YUVJ420P \
129 || (x)==PIX_FMT_YUVJ422P \
130 || (x)==PIX_FMT_YUVJ440P \
131 || (x)==PIX_FMT_YUVJ444P \
133 || (x)==PIX_FMT_NV12 \
134 || (x)==PIX_FMT_NV21 \
135 || (x)==PIX_FMT_GRAY16BE \
136 || (x)==PIX_FMT_GRAY16LE \
137 || (x)==PIX_FMT_GRAY8 \
138 || (x)==PIX_FMT_YUV410P \
139 || (x)==PIX_FMT_YUV440P \
140 || (x)==PIX_FMT_YUV420P16LE \
141 || (x)==PIX_FMT_YUV422P16LE \
142 || (x)==PIX_FMT_YUV444P16LE \
143 || (x)==PIX_FMT_YUV420P16BE \
144 || (x)==PIX_FMT_YUV422P16BE \
145 || (x)==PIX_FMT_YUV444P16BE \
148 int sws_isSupportedOutput(enum PixelFormat pix_fmt
)
150 return isSupportedOut(pix_fmt
);
153 #define usePal(x) (av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL)
155 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
157 const char *sws_format_name(enum PixelFormat format
)
159 if ((unsigned)format
< PIX_FMT_NB
&& av_pix_fmt_descriptors
[format
].name
)
160 return av_pix_fmt_descriptors
[format
].name
;
162 return "Unknown format";
165 static double getSplineCoeff(double a
, double b
, double c
, double d
, double dist
)
167 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
168 if (dist
<=1.0) return ((d
*dist
+ c
)*dist
+ b
)*dist
+a
;
169 else return getSplineCoeff( 0.0,
176 static int initFilter(int16_t **outFilter
, int16_t **filterPos
, int *outFilterSize
, int xInc
,
177 int srcW
, int dstW
, int filterAlign
, int one
, int flags
,
178 SwsVector
*srcFilter
, SwsVector
*dstFilter
, double param
[2])
184 int64_t *filter
=NULL
;
185 int64_t *filter2
=NULL
;
186 const int64_t fone
= 1LL<<54;
189 if (flags
& SWS_CPU_CAPS_MMX
)
190 __asm__
volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
193 // NOTE: the +1 is for the MMX scaler which reads over the end
194 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+1)*sizeof(int16_t), fail
);
196 if (FFABS(xInc
- 0x10000) <10) { // unscaled
199 FF_ALLOCZ_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
201 for (i
=0; i
<dstW
; i
++) {
202 filter
[i
*filterSize
]= fone
;
206 } else if (flags
&SWS_POINT
) { // lame looking point sampling mode
210 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
212 xDstInSrc
= xInc
/2 - 0x8000;
213 for (i
=0; i
<dstW
; i
++) {
214 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
220 } else if ((xInc
<= (1<<16) && (flags
&SWS_AREA
)) || (flags
&SWS_FAST_BILINEAR
)) { // bilinear upscale
224 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
226 xDstInSrc
= xInc
/2 - 0x8000;
227 for (i
=0; i
<dstW
; i
++) {
228 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
232 //bilinear upscale / linear interpolate / area averaging
233 for (j
=0; j
<filterSize
; j
++) {
234 int64_t coeff
= fone
- FFABS((xx
<<16) - xDstInSrc
)*(fone
>>16);
235 if (coeff
<0) coeff
=0;
236 filter
[i
*filterSize
+ j
]= coeff
;
245 if (flags
&SWS_BICUBIC
) sizeFactor
= 4;
246 else if (flags
&SWS_X
) sizeFactor
= 8;
247 else if (flags
&SWS_AREA
) sizeFactor
= 1; //downscale only, for upscale it is bilinear
248 else if (flags
&SWS_GAUSS
) sizeFactor
= 8; // infinite ;)
249 else if (flags
&SWS_LANCZOS
) sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2*param
[0]) : 6;
250 else if (flags
&SWS_SINC
) sizeFactor
= 20; // infinite ;)
251 else if (flags
&SWS_SPLINE
) sizeFactor
= 20; // infinite ;)
252 else if (flags
&SWS_BILINEAR
) sizeFactor
= 2;
254 sizeFactor
= 0; //GCC warning killer
258 if (xInc
<= 1<<16) filterSize
= 1 + sizeFactor
; // upscale
259 else filterSize
= 1 + (sizeFactor
*srcW
+ dstW
- 1)/ dstW
;
261 if (filterSize
> srcW
-2) filterSize
=srcW
-2;
263 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
265 xDstInSrc
= xInc
- 0x10000;
266 for (i
=0; i
<dstW
; i
++) {
267 int xx
= (xDstInSrc
- ((filterSize
-2)<<16)) / (1<<17);
270 for (j
=0; j
<filterSize
; j
++) {
271 int64_t d
= ((int64_t)FFABS((xx
<<17) - xDstInSrc
))<<13;
277 floatd
= d
* (1.0/(1<<30));
279 if (flags
& SWS_BICUBIC
) {
280 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1<<24);
281 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1<<24);
282 int64_t dd
= ( d
*d
)>>30;
283 int64_t ddd
= (dd
*d
)>>30;
286 coeff
= (12*(1<<24)-9*B
-6*C
)*ddd
+ (-18*(1<<24)+12*B
+6*C
)*dd
+ (6*(1<<24)-2*B
)*(1<<30);
287 else if (d
< 1LL<<31)
288 coeff
= (-B
-6*C
)*ddd
+ (6*B
+30*C
)*dd
+ (-12*B
-48*C
)*d
+ (8*B
+24*C
)*(1<<30);
291 coeff
*= fone
>>(30+24);
293 /* else if (flags & SWS_X) {
294 double p= param ? param*0.01 : 0.3;
295 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
296 coeff*= pow(2.0, - p*d*d);
298 else if (flags
& SWS_X
) {
299 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
303 c
= cos(floatd
*M_PI
);
306 if (c
<0.0) c
= -pow(-c
, A
);
308 coeff
= (c
*0.5 + 0.5)*fone
;
309 } else if (flags
& SWS_AREA
) {
310 int64_t d2
= d
- (1<<29);
311 if (d2
*xInc
< -(1LL<<(29+16))) coeff
= 1.0 * (1LL<<(30+16));
312 else if (d2
*xInc
< (1LL<<(29+16))) coeff
= -d2
*xInc
+ (1LL<<(29+16));
314 coeff
*= fone
>>(30+16);
315 } else if (flags
& SWS_GAUSS
) {
316 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
317 coeff
= (pow(2.0, - p
*floatd
*floatd
))*fone
;
318 } else if (flags
& SWS_SINC
) {
319 coeff
= (d
? sin(floatd
*M_PI
)/(floatd
*M_PI
) : 1.0)*fone
;
320 } else if (flags
& SWS_LANCZOS
) {
321 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
322 coeff
= (d
? sin(floatd
*M_PI
)*sin(floatd
*M_PI
/p
)/(floatd
*floatd
*M_PI
*M_PI
/p
) : 1.0)*fone
;
323 if (floatd
>p
) coeff
=0;
324 } else if (flags
& SWS_BILINEAR
) {
326 if (coeff
<0) coeff
=0;
328 } else if (flags
& SWS_SPLINE
) {
329 double p
=-2.196152422706632;
330 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
-1.0, floatd
) * fone
;
332 coeff
= 0.0; //GCC warning killer
336 filter
[i
*filterSize
+ j
]= coeff
;
343 /* apply src & dst Filter to filter -> filter2
346 assert(filterSize
>0);
347 filter2Size
= filterSize
;
348 if (srcFilter
) filter2Size
+= srcFilter
->length
- 1;
349 if (dstFilter
) filter2Size
+= dstFilter
->length
- 1;
350 assert(filter2Size
>0);
351 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
*dstW
*sizeof(*filter2
), fail
);
353 for (i
=0; i
<dstW
; i
++) {
357 for (k
=0; k
<srcFilter
->length
; k
++) {
358 for (j
=0; j
<filterSize
; j
++)
359 filter2
[i
*filter2Size
+ k
+ j
] += srcFilter
->coeff
[k
]*filter
[i
*filterSize
+ j
];
362 for (j
=0; j
<filterSize
; j
++)
363 filter2
[i
*filter2Size
+ j
]= filter
[i
*filterSize
+ j
];
367 (*filterPos
)[i
]+= (filterSize
-1)/2 - (filter2Size
-1)/2;
371 /* try to reduce the filter-size (step1 find size and shift left) */
372 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
374 for (i
=dstW
-1; i
>=0; i
--) {
375 int min
= filter2Size
;
379 /* get rid of near zero elements on the left by shifting left */
380 for (j
=0; j
<filter2Size
; j
++) {
382 cutOff
+= FFABS(filter2
[i
*filter2Size
]);
384 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
386 /* preserve monotonicity because the core can't handle the filter otherwise */
387 if (i
<dstW
-1 && (*filterPos
)[i
] >= (*filterPos
)[i
+1]) break;
389 // move filter coefficients left
390 for (k
=1; k
<filter2Size
; k
++)
391 filter2
[i
*filter2Size
+ k
- 1]= filter2
[i
*filter2Size
+ k
];
392 filter2
[i
*filter2Size
+ k
- 1]= 0;
397 /* count near zeros on the right */
398 for (j
=filter2Size
-1; j
>0; j
--) {
399 cutOff
+= FFABS(filter2
[i
*filter2Size
+ j
]);
401 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
405 if (min
>minFilterSize
) minFilterSize
= min
;
408 if (flags
& SWS_CPU_CAPS_ALTIVEC
) {
409 // we can handle the special case 4,
410 // so we don't want to go to the full 8
411 if (minFilterSize
< 5)
414 // We really don't want to waste our time
415 // doing useless computation, so fall back on
416 // the scalar C code for very small filters.
417 // Vectorizing is worth it only if you have a
418 // decent-sized vector.
419 if (minFilterSize
< 3)
423 if (flags
& SWS_CPU_CAPS_MMX
) {
424 // special case for unscaled vertical filtering
425 if (minFilterSize
== 1 && filterAlign
== 2)
429 assert(minFilterSize
> 0);
430 filterSize
= (minFilterSize
+(filterAlign
-1)) & (~(filterAlign
-1));
431 assert(filterSize
> 0);
432 filter
= av_malloc(filterSize
*dstW
*sizeof(*filter
));
433 if (filterSize
>= MAX_FILTER_SIZE
*16/((flags
&SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
435 *outFilterSize
= filterSize
;
437 if (flags
&SWS_PRINT_INFO
)
438 av_log(NULL
, AV_LOG_VERBOSE
, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size
, filterSize
);
439 /* try to reduce the filter-size (step2 reduce it) */
440 for (i
=0; i
<dstW
; i
++) {
443 for (j
=0; j
<filterSize
; j
++) {
444 if (j
>=filter2Size
) filter
[i
*filterSize
+ j
]= 0;
445 else filter
[i
*filterSize
+ j
]= filter2
[i
*filter2Size
+ j
];
446 if((flags
& SWS_BITEXACT
) && j
>=minFilterSize
)
447 filter
[i
*filterSize
+ j
]= 0;
451 //FIXME try to align filterPos if possible
454 for (i
=0; i
<dstW
; i
++) {
456 if ((*filterPos
)[i
] < 0) {
457 // move filter coefficients left to compensate for filterPos
458 for (j
=1; j
<filterSize
; j
++) {
459 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
460 filter
[i
*filterSize
+ left
] += filter
[i
*filterSize
+ j
];
461 filter
[i
*filterSize
+ j
]=0;
466 if ((*filterPos
)[i
] + filterSize
> srcW
) {
467 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
468 // move filter coefficients right to compensate for filterPos
469 for (j
=filterSize
-2; j
>=0; j
--) {
470 int right
= FFMIN(j
+ shift
, filterSize
-1);
471 filter
[i
*filterSize
+right
] += filter
[i
*filterSize
+j
];
472 filter
[i
*filterSize
+j
]=0;
474 (*filterPos
)[i
]= srcW
- filterSize
;
478 // Note the +1 is for the MMX scaler which reads over the end
479 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
480 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
, *outFilterSize
*(dstW
+1)*sizeof(int16_t), fail
);
482 /* normalize & store in outFilter */
483 for (i
=0; i
<dstW
; i
++) {
488 for (j
=0; j
<filterSize
; j
++) {
489 sum
+= filter
[i
*filterSize
+ j
];
491 sum
= (sum
+ one
/2)/ one
;
492 for (j
=0; j
<*outFilterSize
; j
++) {
493 int64_t v
= filter
[i
*filterSize
+ j
] + error
;
494 int intV
= ROUNDED_DIV(v
, sum
);
495 (*outFilter
)[i
*(*outFilterSize
) + j
]= intV
;
500 (*filterPos
)[dstW
]= (*filterPos
)[dstW
-1]; // the MMX scaler will read over the end
501 for (i
=0; i
<*outFilterSize
; i
++) {
502 int j
= dstW
*(*outFilterSize
);
503 (*outFilter
)[j
+ i
]= (*outFilter
)[j
+ i
- (*outFilterSize
)];
513 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
514 static int initMMX2HScaler(int dstW
, int xInc
, uint8_t *filterCode
, int16_t *filter
, int32_t *filterPos
, int numSplits
)
517 x86_reg imm8OfPShufW1A
;
518 x86_reg imm8OfPShufW2A
;
519 x86_reg fragmentLengthA
;
521 x86_reg imm8OfPShufW1B
;
522 x86_reg imm8OfPShufW2B
;
523 x86_reg fragmentLengthB
;
528 // create an optimized horizontal scaling routine
529 /* This scaler is made of runtime-generated MMX2 code using specially
530 * tuned pshufw instructions. For every four output pixels, if four
531 * input pixels are enough for the fast bilinear scaling, then a chunk
532 * of fragmentB is used. If five input pixels are needed, then a chunk
533 * of fragmentA is used.
542 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
543 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
544 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
545 "punpcklbw %%mm7, %%mm1 \n\t"
546 "punpcklbw %%mm7, %%mm0 \n\t"
547 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
549 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
551 "psubw %%mm1, %%mm0 \n\t"
552 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
553 "pmullw %%mm3, %%mm0 \n\t"
554 "psllw $7, %%mm1 \n\t"
555 "paddw %%mm1, %%mm0 \n\t"
557 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
559 "add $8, %%"REG_a
" \n\t"
563 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
564 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
565 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
570 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
574 :"=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
575 "=r" (fragmentLengthA
)
582 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
583 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
584 "punpcklbw %%mm7, %%mm0 \n\t"
585 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
587 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
589 "psubw %%mm1, %%mm0 \n\t"
590 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
591 "pmullw %%mm3, %%mm0 \n\t"
592 "psllw $7, %%mm1 \n\t"
593 "paddw %%mm1, %%mm0 \n\t"
595 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
597 "add $8, %%"REG_a
" \n\t"
601 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
602 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
603 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
608 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
612 :"=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
613 "=r" (fragmentLengthB
)
616 xpos
= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
619 for (i
=0; i
<dstW
/numSplits
; i
++) {
624 int b
=((xpos
+xInc
)>>16) - xx
;
625 int c
=((xpos
+xInc
*2)>>16) - xx
;
626 int d
=((xpos
+xInc
*3)>>16) - xx
;
628 uint8_t *fragment
= (d
+1<4) ? fragmentB
: fragmentA
;
629 x86_reg imm8OfPShufW1
= (d
+1<4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
630 x86_reg imm8OfPShufW2
= (d
+1<4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
631 x86_reg fragmentLength
= (d
+1<4) ? fragmentLengthB
: fragmentLengthA
;
632 int maxShift
= 3-(d
+inc
);
636 filter
[i
] = (( xpos
& 0xFFFF) ^ 0xFFFF)>>9;
637 filter
[i
+1] = (((xpos
+xInc
) & 0xFFFF) ^ 0xFFFF)>>9;
638 filter
[i
+2] = (((xpos
+xInc
*2) & 0xFFFF) ^ 0xFFFF)>>9;
639 filter
[i
+3] = (((xpos
+xInc
*3) & 0xFFFF) ^ 0xFFFF)>>9;
642 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
644 filterCode
[fragmentPos
+ imm8OfPShufW1
]=
645 (a
+inc
) | ((b
+inc
)<<2) | ((c
+inc
)<<4) | ((d
+inc
)<<6);
646 filterCode
[fragmentPos
+ imm8OfPShufW2
]=
647 a
| (b
<<2) | (c
<<4) | (d
<<6);
649 if (i
+4-inc
>=dstW
) shift
=maxShift
; //avoid overread
650 else if ((filterPos
[i
/2]&3) <= maxShift
) shift
=filterPos
[i
/2]&3; //Align
652 if (shift
&& i
>=shift
) {
653 filterCode
[fragmentPos
+ imm8OfPShufW1
]+= 0x55*shift
;
654 filterCode
[fragmentPos
+ imm8OfPShufW2
]+= 0x55*shift
;
655 filterPos
[i
/2]-=shift
;
659 fragmentPos
+= fragmentLength
;
662 filterCode
[fragmentPos
]= RET
;
667 filterPos
[((i
/2)+1)&(~1)]= xpos
>>16; // needed to jump to the next part
669 return fragmentPos
+ 1;
671 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL */
673 static void getSubSampleFactors(int *h
, int *v
, enum PixelFormat format
)
675 *h
= av_pix_fmt_descriptors
[format
].log2_chroma_w
;
676 *v
= av_pix_fmt_descriptors
[format
].log2_chroma_h
;
679 static uint16_t roundToInt16(int64_t f
)
681 int r
= (f
+ (1<<15))>>16;
682 if (r
<-0x7FFF) return 0x8000;
683 else if (r
> 0x7FFF) return 0x7FFF;
687 int sws_setColorspaceDetails(SwsContext
*c
, const int inv_table
[4], int srcRange
, const int table
[4], int dstRange
, int brightness
, int contrast
, int saturation
)
689 int64_t crv
= inv_table
[0];
690 int64_t cbu
= inv_table
[1];
691 int64_t cgu
= -inv_table
[2];
692 int64_t cgv
= -inv_table
[3];
696 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int)*4);
697 memcpy(c
->dstColorspaceTable
, table
, sizeof(int)*4);
699 c
->brightness
= brightness
;
700 c
->contrast
= contrast
;
701 c
->saturation
= saturation
;
702 c
->srcRange
= srcRange
;
703 c
->dstRange
= dstRange
;
704 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
706 c
->uOffset
= 0x0400040004000400LL
;
707 c
->vOffset
= 0x0400040004000400LL
;
713 crv
= (crv
*224) / 255;
714 cbu
= (cbu
*224) / 255;
715 cgu
= (cgu
*224) / 255;
716 cgv
= (cgv
*224) / 255;
719 cy
= (cy
*contrast
)>>16;
720 crv
= (crv
*contrast
* saturation
)>>32;
721 cbu
= (cbu
*contrast
* saturation
)>>32;
722 cgu
= (cgu
*contrast
* saturation
)>>32;
723 cgv
= (cgv
*contrast
* saturation
)>>32;
725 oy
-= 256*brightness
;
727 c
->yCoeff
= roundToInt16(cy
*8192) * 0x0001000100010001ULL
;
728 c
->vrCoeff
= roundToInt16(crv
*8192) * 0x0001000100010001ULL
;
729 c
->ubCoeff
= roundToInt16(cbu
*8192) * 0x0001000100010001ULL
;
730 c
->vgCoeff
= roundToInt16(cgv
*8192) * 0x0001000100010001ULL
;
731 c
->ugCoeff
= roundToInt16(cgu
*8192) * 0x0001000100010001ULL
;
732 c
->yOffset
= roundToInt16(oy
* 8) * 0x0001000100010001ULL
;
734 c
->yuv2rgb_y_coeff
= (int16_t)roundToInt16(cy
<<13);
735 c
->yuv2rgb_y_offset
= (int16_t)roundToInt16(oy
<< 9);
736 c
->yuv2rgb_v2r_coeff
= (int16_t)roundToInt16(crv
<<13);
737 c
->yuv2rgb_v2g_coeff
= (int16_t)roundToInt16(cgv
<<13);
738 c
->yuv2rgb_u2g_coeff
= (int16_t)roundToInt16(cgu
<<13);
739 c
->yuv2rgb_u2b_coeff
= (int16_t)roundToInt16(cbu
<<13);
741 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
, contrast
, saturation
);
744 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
745 if (c
->flags
& SWS_CPU_CAPS_ALTIVEC
)
746 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
, contrast
, saturation
);
751 int sws_getColorspaceDetails(SwsContext
*c
, int **inv_table
, int *srcRange
, int **table
, int *dstRange
, int *brightness
, int *contrast
, int *saturation
)
753 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
755 *inv_table
= c
->srcColorspaceTable
;
756 *table
= c
->dstColorspaceTable
;
757 *srcRange
= c
->srcRange
;
758 *dstRange
= c
->dstRange
;
759 *brightness
= c
->brightness
;
760 *contrast
= c
->contrast
;
761 *saturation
= c
->saturation
;
766 static int handle_jpeg(enum PixelFormat
*format
)
769 case PIX_FMT_YUVJ420P
:
770 *format
= PIX_FMT_YUV420P
;
772 case PIX_FMT_YUVJ422P
:
773 *format
= PIX_FMT_YUV422P
;
775 case PIX_FMT_YUVJ444P
:
776 *format
= PIX_FMT_YUV444P
;
778 case PIX_FMT_YUVJ440P
:
779 *format
= PIX_FMT_YUV440P
;
786 SwsContext
*sws_getContext(int srcW
, int srcH
, enum PixelFormat srcFormat
,
787 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
788 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
792 int usesVFilter
, usesHFilter
;
794 int srcRange
, dstRange
;
795 SwsFilter dummyFilter
= {NULL
, NULL
, NULL
, NULL
};
797 if (flags
& SWS_CPU_CAPS_MMX
)
798 __asm__
volatile("emms\n\t"::: "memory");
801 #if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
802 flags
&= ~(SWS_CPU_CAPS_MMX
|SWS_CPU_CAPS_MMX2
|SWS_CPU_CAPS_3DNOW
|SWS_CPU_CAPS_ALTIVEC
|SWS_CPU_CAPS_BFIN
);
803 flags
|= ff_hardcodedcpuflags();
804 #endif /* CONFIG_RUNTIME_CPUDETECT */
805 if (!rgb15to16
) sws_rgb2rgb_init(flags
);
807 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
809 srcRange
= handle_jpeg(&srcFormat
);
810 dstRange
= handle_jpeg(&dstFormat
);
812 if (!isSupportedIn(srcFormat
)) {
813 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat
));
816 if (!isSupportedOut(dstFormat
)) {
817 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat
));
821 i
= flags
& ( SWS_POINT
832 if(!i
|| (i
& (i
-1))) {
833 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Exactly one scaler algorithm must be chosen\n");
838 if (srcW
<4 || srcH
<1 || dstW
<8 || dstH
<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
839 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
840 srcW
, srcH
, dstW
, dstH
);
843 if(srcW
> VOFW
|| dstW
> VOFW
) {
844 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW
)" change VOF/VOFW and recompile\n");
848 if (!dstFilter
) dstFilter
= &dummyFilter
;
849 if (!srcFilter
) srcFilter
= &dummyFilter
;
851 FF_ALLOCZ_OR_GOTO(NULL
, c
, sizeof(SwsContext
), fail
);
853 c
->av_class
= &sws_context_class
;
858 c
->lumXInc
= ((srcW
<<16) + (dstW
>>1))/dstW
;
859 c
->lumYInc
= ((srcH
<<16) + (dstH
>>1))/dstH
;
861 c
->dstFormat
= dstFormat
;
862 c
->srcFormat
= srcFormat
;
863 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[dstFormat
]);
864 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[srcFormat
]);
865 c
->vRounder
= 4* 0x0001000100010001ULL
;
867 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
>1) ||
868 (srcFilter
->chrV
&& srcFilter
->chrV
->length
>1) ||
869 (dstFilter
->lumV
&& dstFilter
->lumV
->length
>1) ||
870 (dstFilter
->chrV
&& dstFilter
->chrV
->length
>1);
871 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
>1) ||
872 (srcFilter
->chrH
&& srcFilter
->chrH
->length
>1) ||
873 (dstFilter
->lumH
&& dstFilter
->lumH
->length
>1) ||
874 (dstFilter
->chrH
&& dstFilter
->chrH
->length
>1);
876 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
877 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
879 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
880 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) c
->chrDstHSubSample
=1;
882 // drop some chroma lines if the user wants it
883 c
->vChrDrop
= (flags
&SWS_SRC_V_CHR_DROP_MASK
)>>SWS_SRC_V_CHR_DROP_SHIFT
;
884 c
->chrSrcVSubSample
+= c
->vChrDrop
;
886 // drop every other pixel for chroma calculation unless user wants full chroma
887 if (isAnyRGB(srcFormat
) && !(flags
&SWS_FULL_CHR_H_INP
)
888 && srcFormat
!=PIX_FMT_RGB8
&& srcFormat
!=PIX_FMT_BGR8
889 && srcFormat
!=PIX_FMT_RGB4
&& srcFormat
!=PIX_FMT_BGR4
890 && srcFormat
!=PIX_FMT_RGB4_BYTE
&& srcFormat
!=PIX_FMT_BGR4_BYTE
891 && ((dstW
>>c
->chrDstHSubSample
) <= (srcW
>>1) || (flags
&(SWS_FAST_BILINEAR
|SWS_POINT
))))
892 c
->chrSrcHSubSample
=1;
895 c
->param
[0] = param
[0];
896 c
->param
[1] = param
[1];
899 c
->param
[1] = SWS_PARAM_DEFAULT
;
902 // Note the -((-x)>>y) is so that we always round toward +inf.
903 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
904 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
905 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
906 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
908 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, dstRange
, 0, 1<<16, 1<<16);
910 /* unscaled special cases */
911 if (unscaled
&& !usesHFilter
&& !usesVFilter
&& (srcRange
== dstRange
|| isAnyRGB(dstFormat
))) {
912 ff_get_unscaled_swscale(c
);
915 if (flags
&SWS_PRINT_INFO
)
916 av_log(c
, AV_LOG_INFO
, "using unscaled %s -> %s special converter\n",
917 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
922 if (flags
& SWS_CPU_CAPS_MMX2
) {
923 c
->canMMX2BeUsed
= (dstW
>=srcW
&& (dstW
&31)==0 && (srcW
&15)==0) ? 1 : 0;
924 if (!c
->canMMX2BeUsed
&& dstW
>=srcW
&& (srcW
&15)==0 && (flags
&SWS_FAST_BILINEAR
)) {
925 if (flags
&SWS_PRINT_INFO
)
926 av_log(c
, AV_LOG_INFO
, "output width is not a multiple of 32 -> no MMX2 scaler\n");
928 if (usesHFilter
) c
->canMMX2BeUsed
=0;
933 c
->chrXInc
= ((c
->chrSrcW
<<16) + (c
->chrDstW
>>1))/c
->chrDstW
;
934 c
->chrYInc
= ((c
->chrSrcH
<<16) + (c
->chrDstH
>>1))/c
->chrDstH
;
936 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
937 // but only for the FAST_BILINEAR mode otherwise do correct scaling
938 // n-2 is the last chrominance sample available
939 // this is not perfect, but no one should notice the difference, the more correct variant
940 // would be like the vertical one, but that would require some special code for the
941 // first and last pixel
942 if (flags
&SWS_FAST_BILINEAR
) {
943 if (c
->canMMX2BeUsed
) {
947 //we don't use the x86 asm scaler if MMX is available
948 else if (flags
& SWS_CPU_CAPS_MMX
) {
949 c
->lumXInc
= ((srcW
-2)<<16)/(dstW
-2) - 20;
950 c
->chrXInc
= ((c
->chrSrcW
-2)<<16)/(c
->chrDstW
-2) - 20;
954 /* precalculate horizontal scaler filter coefficients */
956 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
957 // can't downscale !!!
958 if (c
->canMMX2BeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
959 c
->lumMmx2FilterCodeSize
= initMMX2HScaler( dstW
, c
->lumXInc
, NULL
, NULL
, NULL
, 8);
960 c
->chrMmx2FilterCodeSize
= initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, NULL
, NULL
, NULL
, 4);
963 c
->lumMmx2FilterCode
= mmap(NULL
, c
->lumMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
964 c
->chrMmx2FilterCode
= mmap(NULL
, c
->chrMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, 0, 0);
965 #elif HAVE_VIRTUALALLOC
966 c
->lumMmx2FilterCode
= VirtualAlloc(NULL
, c
->lumMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
967 c
->chrMmx2FilterCode
= VirtualAlloc(NULL
, c
->chrMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
969 c
->lumMmx2FilterCode
= av_malloc(c
->lumMmx2FilterCodeSize
);
970 c
->chrMmx2FilterCode
= av_malloc(c
->chrMmx2FilterCodeSize
);
973 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/8+8)*sizeof(int16_t), fail
);
974 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/4+8)*sizeof(int16_t), fail
);
975 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/2/8+8)*sizeof(int32_t), fail
);
976 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/2/4+8)*sizeof(int32_t), fail
);
978 initMMX2HScaler( dstW
, c
->lumXInc
, c
->lumMmx2FilterCode
, c
->hLumFilter
, c
->hLumFilterPos
, 8);
979 initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, c
->chrMmx2FilterCode
, c
->hChrFilter
, c
->hChrFilterPos
, 4);
982 mprotect(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
983 mprotect(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
986 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL */
988 const int filterAlign
=
989 (flags
& SWS_CPU_CAPS_MMX
) ? 4 :
990 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
993 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
, &c
->hLumFilterSize
, c
->lumXInc
,
994 srcW
, dstW
, filterAlign
, 1<<14,
995 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
996 srcFilter
->lumH
, dstFilter
->lumH
, c
->param
) < 0)
998 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
, &c
->hChrFilterSize
, c
->chrXInc
,
999 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1<<14,
1000 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
1001 srcFilter
->chrH
, dstFilter
->chrH
, c
->param
) < 0)
1004 } // initialize horizontal stuff
1006 /* precalculate vertical scaler filter coefficients */
1008 const int filterAlign
=
1009 (flags
& SWS_CPU_CAPS_MMX
) && (flags
& SWS_ACCURATE_RND
) ? 2 :
1010 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
1013 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
, c
->lumYInc
,
1014 srcH
, dstH
, filterAlign
, (1<<12),
1015 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
1016 srcFilter
->lumV
, dstFilter
->lumV
, c
->param
) < 0)
1018 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
, c
->chrYInc
,
1019 c
->chrSrcH
, c
->chrDstH
, filterAlign
, (1<<12),
1020 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
1021 srcFilter
->chrV
, dstFilter
->chrV
, c
->param
) < 0)
1024 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
1025 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof (vector
signed short)*c
->vLumFilterSize
*c
->dstH
, fail
);
1026 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof (vector
signed short)*c
->vChrFilterSize
*c
->chrDstH
, fail
);
1028 for (i
=0;i
<c
->vLumFilterSize
*c
->dstH
;i
++) {
1030 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1032 p
[j
] = c
->vLumFilter
[i
];
1035 for (i
=0;i
<c
->vChrFilterSize
*c
->chrDstH
;i
++) {
1037 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1039 p
[j
] = c
->vChrFilter
[i
];
1044 // calculate buffer sizes so that they won't run out while handling these damn slices
1045 c
->vLumBufSize
= c
->vLumFilterSize
;
1046 c
->vChrBufSize
= c
->vChrFilterSize
;
1047 for (i
=0; i
<dstH
; i
++) {
1048 int chrI
= i
*c
->chrDstH
/ dstH
;
1049 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1050 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)<<c
->chrSrcVSubSample
));
1052 nextSlice
>>= c
->chrSrcVSubSample
;
1053 nextSlice
<<= c
->chrSrcVSubSample
;
1054 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1055 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1056 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
< (nextSlice
>>c
->chrSrcVSubSample
))
1057 c
->vChrBufSize
= (nextSlice
>>c
->chrSrcVSubSample
) - c
->vChrFilterPos
[chrI
];
1060 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
1061 // allocate several megabytes to handle all possible cases)
1062 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1063 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
, c
->vChrBufSize
*2*sizeof(int16_t*), fail
);
1064 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1065 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1066 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
1067 /* align at 16 bytes for AltiVec */
1068 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1069 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1070 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+c
->vLumBufSize
];
1072 for (i
=0; i
<c
->vChrBufSize
; i
++) {
1073 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
[i
+c
->vChrBufSize
], (VOF
+1)*2, fail
);
1074 c
->chrPixBuf
[i
] = c
->chrPixBuf
[i
+c
->vChrBufSize
];
1076 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1077 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1078 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1079 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+c
->vLumBufSize
];
1082 //try to avoid drawing green stuff between the right end and the stride end
1083 for (i
=0; i
<c
->vChrBufSize
; i
++) memset(c
->chrPixBuf
[i
], 64, (VOF
+1)*2);
1085 assert(2*VOFW
== VOF
);
1087 assert(c
->chrDstH
<= dstH
);
1089 if (flags
&SWS_PRINT_INFO
) {
1090 if (flags
&SWS_FAST_BILINEAR
)
1091 av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1092 else if (flags
&SWS_BILINEAR
)
1093 av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1094 else if (flags
&SWS_BICUBIC
)
1095 av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1096 else if (flags
&SWS_X
)
1097 av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1098 else if (flags
&SWS_POINT
)
1099 av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1100 else if (flags
&SWS_AREA
)
1101 av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1102 else if (flags
&SWS_BICUBLIN
)
1103 av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1104 else if (flags
&SWS_GAUSS
)
1105 av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1106 else if (flags
&SWS_SINC
)
1107 av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1108 else if (flags
&SWS_LANCZOS
)
1109 av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1110 else if (flags
&SWS_SPLINE
)
1111 av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1113 av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1115 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1116 sws_format_name(srcFormat
),
1118 dstFormat
== PIX_FMT_BGR555
|| dstFormat
== PIX_FMT_BGR565
? "dithered " : "",
1122 sws_format_name(dstFormat
));
1124 if (flags
& SWS_CPU_CAPS_MMX2
)
1125 av_log(c
, AV_LOG_INFO
, "using MMX2\n");
1126 else if (flags
& SWS_CPU_CAPS_3DNOW
)
1127 av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1128 else if (flags
& SWS_CPU_CAPS_MMX
)
1129 av_log(c
, AV_LOG_INFO
, "using MMX\n");
1130 else if (flags
& SWS_CPU_CAPS_ALTIVEC
)
1131 av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1133 av_log(c
, AV_LOG_INFO
, "using C\n");
1135 if (flags
& SWS_CPU_CAPS_MMX
) {
1136 if (c
->canMMX2BeUsed
&& (flags
&SWS_FAST_BILINEAR
))
1137 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
1139 if (c
->hLumFilterSize
==4)
1140 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal luminance scaling\n");
1141 else if (c
->hLumFilterSize
==8)
1142 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal luminance scaling\n");
1144 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal luminance scaling\n");
1146 if (c
->hChrFilterSize
==4)
1147 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
1148 else if (c
->hChrFilterSize
==8)
1149 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
1151 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal chrominance scaling\n");
1155 av_log(c
, AV_LOG_VERBOSE
, "using x86 asm scaler for horizontal scaling\n");
1157 if (flags
& SWS_FAST_BILINEAR
)
1158 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR C scaler for horizontal scaling\n");
1160 av_log(c
, AV_LOG_VERBOSE
, "using C scaler for horizontal scaling\n");
1163 if (isPlanarYUV(dstFormat
)) {
1164 if (c
->vLumFilterSize
==1)
1165 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1167 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1169 if (c
->vLumFilterSize
==1 && c
->vChrFilterSize
==2)
1170 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
1171 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1172 else if (c
->vLumFilterSize
==2 && c
->vChrFilterSize
==2)
1173 av_log(c
, AV_LOG_VERBOSE
, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1175 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1178 if (dstFormat
==PIX_FMT_BGR24
)
1179 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR24 converter\n",
1180 (flags
& SWS_CPU_CAPS_MMX2
) ? "MMX2" : ((flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C"));
1181 else if (dstFormat
==PIX_FMT_RGB32
)
1182 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR32 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1183 else if (dstFormat
==PIX_FMT_BGR565
)
1184 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR16 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1185 else if (dstFormat
==PIX_FMT_BGR555
)
1186 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR15 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1188 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1189 av_log(c
, AV_LOG_DEBUG
, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1190 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1191 av_log(c
, AV_LOG_DEBUG
, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1192 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
, c
->chrXInc
, c
->chrYInc
);
1195 c
->swScale
= ff_getSwsFunc(c
);
1203 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1204 float lumaSharpen
, float chromaSharpen
,
1205 float chromaHShift
, float chromaVShift
,
1208 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1212 if (lumaGBlur
!=0.0) {
1213 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1214 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1216 filter
->lumH
= sws_getIdentityVec();
1217 filter
->lumV
= sws_getIdentityVec();
1220 if (chromaGBlur
!=0.0) {
1221 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1222 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1224 filter
->chrH
= sws_getIdentityVec();
1225 filter
->chrV
= sws_getIdentityVec();
1228 if (chromaSharpen
!=0.0) {
1229 SwsVector
*id
= sws_getIdentityVec();
1230 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1231 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1232 sws_addVec(filter
->chrH
, id
);
1233 sws_addVec(filter
->chrV
, id
);
1237 if (lumaSharpen
!=0.0) {
1238 SwsVector
*id
= sws_getIdentityVec();
1239 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1240 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1241 sws_addVec(filter
->lumH
, id
);
1242 sws_addVec(filter
->lumV
, id
);
1246 if (chromaHShift
!= 0.0)
1247 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+0.5));
1249 if (chromaVShift
!= 0.0)
1250 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+0.5));
1252 sws_normalizeVec(filter
->chrH
, 1.0);
1253 sws_normalizeVec(filter
->chrV
, 1.0);
1254 sws_normalizeVec(filter
->lumH
, 1.0);
1255 sws_normalizeVec(filter
->lumV
, 1.0);
1257 if (verbose
) sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1258 if (verbose
) sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1263 SwsVector
*sws_allocVec(int length
)
1265 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1268 vec
->length
= length
;
1269 vec
->coeff
= av_malloc(sizeof(double) * length
);
1275 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1277 const int length
= (int)(variance
*quality
+ 0.5) | 1;
1279 double middle
= (length
-1)*0.5;
1280 SwsVector
*vec
= sws_allocVec(length
);
1285 for (i
=0; i
<length
; i
++) {
1286 double dist
= i
-middle
;
1287 vec
->coeff
[i
]= exp(-dist
*dist
/(2*variance
*variance
)) / sqrt(2*variance
*M_PI
);
1290 sws_normalizeVec(vec
, 1.0);
1295 SwsVector
*sws_getConstVec(double c
, int length
)
1298 SwsVector
*vec
= sws_allocVec(length
);
1303 for (i
=0; i
<length
; i
++)
1309 SwsVector
*sws_getIdentityVec(void)
1311 return sws_getConstVec(1.0, 1);
1314 static double sws_dcVec(SwsVector
*a
)
1319 for (i
=0; i
<a
->length
; i
++)
1325 void sws_scaleVec(SwsVector
*a
, double scalar
)
1329 for (i
=0; i
<a
->length
; i
++)
1330 a
->coeff
[i
]*= scalar
;
1333 void sws_normalizeVec(SwsVector
*a
, double height
)
1335 sws_scaleVec(a
, height
/sws_dcVec(a
));
1338 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1340 int length
= a
->length
+ b
->length
- 1;
1342 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1347 for (i
=0; i
<a
->length
; i
++) {
1348 for (j
=0; j
<b
->length
; j
++) {
1349 vec
->coeff
[i
+j
]+= a
->coeff
[i
]*b
->coeff
[j
];
1356 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1358 int length
= FFMAX(a
->length
, b
->length
);
1360 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1365 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1366 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]+= b
->coeff
[i
];
1371 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1373 int length
= FFMAX(a
->length
, b
->length
);
1375 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1380 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1381 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]-= b
->coeff
[i
];
1386 /* shift left / or right if "shift" is negative */
1387 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1389 int length
= a
->length
+ FFABS(shift
)*2;
1391 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1396 for (i
=0; i
<a
->length
; i
++) {
1397 vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2 - shift
]= a
->coeff
[i
];
1403 void sws_shiftVec(SwsVector
*a
, int shift
)
1405 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1407 a
->coeff
= shifted
->coeff
;
1408 a
->length
= shifted
->length
;
1412 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1414 SwsVector
*sum
= sws_sumVec(a
, b
);
1416 a
->coeff
= sum
->coeff
;
1417 a
->length
= sum
->length
;
1421 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1423 SwsVector
*diff
= sws_diffVec(a
, b
);
1425 a
->coeff
= diff
->coeff
;
1426 a
->length
= diff
->length
;
1430 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1432 SwsVector
*conv
= sws_getConvVec(a
, b
);
1434 a
->coeff
= conv
->coeff
;
1435 a
->length
= conv
->length
;
1439 SwsVector
*sws_cloneVec(SwsVector
*a
)
1442 SwsVector
*vec
= sws_allocVec(a
->length
);
1447 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
]= a
->coeff
[i
];
1452 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1459 for (i
=0; i
<a
->length
; i
++)
1460 if (a
->coeff
[i
]>max
) max
= a
->coeff
[i
];
1462 for (i
=0; i
<a
->length
; i
++)
1463 if (a
->coeff
[i
]<min
) min
= a
->coeff
[i
];
1467 for (i
=0; i
<a
->length
; i
++) {
1468 int x
= (int)((a
->coeff
[i
]-min
)*60.0/range
+0.5);
1469 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1470 for (;x
>0; x
--) av_log(log_ctx
, log_level
, " ");
1471 av_log(log_ctx
, log_level
, "|\n");
1475 #if LIBSWSCALE_VERSION_MAJOR < 1
1476 void sws_printVec(SwsVector
*a
)
1478 sws_printVec2(a
, NULL
, AV_LOG_DEBUG
);
1482 void sws_freeVec(SwsVector
*a
)
1485 av_freep(&a
->coeff
);
1490 void sws_freeFilter(SwsFilter
*filter
)
1492 if (!filter
) return;
1494 if (filter
->lumH
) sws_freeVec(filter
->lumH
);
1495 if (filter
->lumV
) sws_freeVec(filter
->lumV
);
1496 if (filter
->chrH
) sws_freeVec(filter
->chrH
);
1497 if (filter
->chrV
) sws_freeVec(filter
->chrV
);
1501 void sws_freeContext(SwsContext
*c
)
1507 for (i
=0; i
<c
->vLumBufSize
; i
++)
1508 av_freep(&c
->lumPixBuf
[i
]);
1509 av_freep(&c
->lumPixBuf
);
1513 for (i
=0; i
<c
->vChrBufSize
; i
++)
1514 av_freep(&c
->chrPixBuf
[i
]);
1515 av_freep(&c
->chrPixBuf
);
1518 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1519 for (i
=0; i
<c
->vLumBufSize
; i
++)
1520 av_freep(&c
->alpPixBuf
[i
]);
1521 av_freep(&c
->alpPixBuf
);
1524 av_freep(&c
->vLumFilter
);
1525 av_freep(&c
->vChrFilter
);
1526 av_freep(&c
->hLumFilter
);
1527 av_freep(&c
->hChrFilter
);
1528 #if ARCH_PPC && (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT)
1529 av_freep(&c
->vYCoeffsBank
);
1530 av_freep(&c
->vCCoeffsBank
);
1533 av_freep(&c
->vLumFilterPos
);
1534 av_freep(&c
->vChrFilterPos
);
1535 av_freep(&c
->hLumFilterPos
);
1536 av_freep(&c
->hChrFilterPos
);
1538 #if ARCH_X86 && CONFIG_GPL
1539 #ifdef MAP_ANONYMOUS
1540 if (c
->lumMmx2FilterCode
) munmap(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
);
1541 if (c
->chrMmx2FilterCode
) munmap(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
);
1542 #elif HAVE_VIRTUALALLOC
1543 if (c
->lumMmx2FilterCode
) VirtualFree(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, MEM_RELEASE
);
1544 if (c
->chrMmx2FilterCode
) VirtualFree(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, MEM_RELEASE
);
1546 av_free(c
->lumMmx2FilterCode
);
1547 av_free(c
->chrMmx2FilterCode
);
1549 c
->lumMmx2FilterCode
=NULL
;
1550 c
->chrMmx2FilterCode
=NULL
;
1551 #endif /* ARCH_X86 && CONFIG_GPL */
1553 av_freep(&c
->yuvTable
);
1558 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
,
1559 int srcW
, int srcH
, enum PixelFormat srcFormat
,
1560 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1561 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1563 static const double default_param
[2] = {SWS_PARAM_DEFAULT
, SWS_PARAM_DEFAULT
};
1566 param
= default_param
;
1569 (context
->srcW
!= srcW
||
1570 context
->srcH
!= srcH
||
1571 context
->srcFormat
!= srcFormat
||
1572 context
->dstW
!= dstW
||
1573 context
->dstH
!= dstH
||
1574 context
->dstFormat
!= dstFormat
||
1575 context
->flags
!= flags
||
1576 context
->param
[0] != param
[0] ||
1577 context
->param
[1] != param
[1])) {
1578 sws_freeContext(context
);
1583 return sws_getContext(srcW
, srcH
, srcFormat
,
1584 dstW
, dstH
, dstFormat
, flags
,
1585 srcFilter
, dstFilter
, param
);