2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
40 #include "libavutil/attributes.h"
41 #include "libavutil/avutil.h"
42 #include "libavutil/bswap.h"
43 #include "libavutil/cpu.h"
44 #include "libavutil/intreadwrite.h"
45 #include "libavutil/mathematics.h"
46 #include "libavutil/opt.h"
47 #include "libavutil/pixdesc.h"
48 #include "libavutil/x86/asm.h"
49 #include "libavutil/x86/cpu.h"
52 #include "swscale_internal.h"
54 unsigned swscale_version(void)
56 return LIBSWSCALE_VERSION_INT
;
59 const char *swscale_configuration(void)
61 return LIBAV_CONFIGURATION
;
64 const char *swscale_license(void)
66 #define LICENSE_PREFIX "libswscale license: "
67 return LICENSE_PREFIX LIBAV_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
70 #define RET 0xC3 // near return opcode for x86
72 typedef struct FormatEntry
{
73 int is_supported_in
, is_supported_out
;
76 static const FormatEntry format_entries
[AV_PIX_FMT_NB
] = {
77 [AV_PIX_FMT_YUV420P
] = { 1, 1 },
78 [AV_PIX_FMT_YUYV422
] = { 1, 1 },
79 [AV_PIX_FMT_RGB24
] = { 1, 1 },
80 [AV_PIX_FMT_BGR24
] = { 1, 1 },
81 [AV_PIX_FMT_YUV422P
] = { 1, 1 },
82 [AV_PIX_FMT_YUV444P
] = { 1, 1 },
83 [AV_PIX_FMT_YUV410P
] = { 1, 1 },
84 [AV_PIX_FMT_YUV411P
] = { 1, 1 },
85 [AV_PIX_FMT_GRAY8
] = { 1, 1 },
86 [AV_PIX_FMT_MONOWHITE
] = { 1, 1 },
87 [AV_PIX_FMT_MONOBLACK
] = { 1, 1 },
88 [AV_PIX_FMT_PAL8
] = { 1, 0 },
89 [AV_PIX_FMT_YUVJ420P
] = { 1, 1 },
90 [AV_PIX_FMT_YUVJ422P
] = { 1, 1 },
91 [AV_PIX_FMT_YUVJ444P
] = { 1, 1 },
92 [AV_PIX_FMT_UYVY422
] = { 1, 1 },
93 [AV_PIX_FMT_UYYVYY411
] = { 0, 0 },
94 [AV_PIX_FMT_BGR8
] = { 1, 1 },
95 [AV_PIX_FMT_BGR4
] = { 0, 1 },
96 [AV_PIX_FMT_BGR4_BYTE
] = { 1, 1 },
97 [AV_PIX_FMT_RGB8
] = { 1, 1 },
98 [AV_PIX_FMT_RGB4
] = { 0, 1 },
99 [AV_PIX_FMT_RGB4_BYTE
] = { 1, 1 },
100 [AV_PIX_FMT_NV12
] = { 1, 1 },
101 [AV_PIX_FMT_NV21
] = { 1, 1 },
102 [AV_PIX_FMT_ARGB
] = { 1, 1 },
103 [AV_PIX_FMT_RGBA
] = { 1, 1 },
104 [AV_PIX_FMT_ABGR
] = { 1, 1 },
105 [AV_PIX_FMT_BGRA
] = { 1, 1 },
106 [AV_PIX_FMT_GRAY16BE
] = { 1, 1 },
107 [AV_PIX_FMT_GRAY16LE
] = { 1, 1 },
108 [AV_PIX_FMT_YUV440P
] = { 1, 1 },
109 [AV_PIX_FMT_YUVJ440P
] = { 1, 1 },
110 [AV_PIX_FMT_YUVA420P
] = { 1, 1 },
111 [AV_PIX_FMT_YUVA422P
] = { 1, 1 },
112 [AV_PIX_FMT_YUVA444P
] = { 1, 1 },
113 [AV_PIX_FMT_YUVA420P9BE
] = { 1, 1 },
114 [AV_PIX_FMT_YUVA420P9LE
] = { 1, 1 },
115 [AV_PIX_FMT_YUVA422P9BE
] = { 1, 1 },
116 [AV_PIX_FMT_YUVA422P9LE
] = { 1, 1 },
117 [AV_PIX_FMT_YUVA444P9BE
] = { 1, 1 },
118 [AV_PIX_FMT_YUVA444P9LE
] = { 1, 1 },
119 [AV_PIX_FMT_YUVA420P10BE
]= { 1, 1 },
120 [AV_PIX_FMT_YUVA420P10LE
]= { 1, 1 },
121 [AV_PIX_FMT_YUVA422P10BE
]= { 1, 1 },
122 [AV_PIX_FMT_YUVA422P10LE
]= { 1, 1 },
123 [AV_PIX_FMT_YUVA444P10BE
]= { 1, 1 },
124 [AV_PIX_FMT_YUVA444P10LE
]= { 1, 1 },
125 [AV_PIX_FMT_YUVA420P16BE
]= { 1, 1 },
126 [AV_PIX_FMT_YUVA420P16LE
]= { 1, 1 },
127 [AV_PIX_FMT_YUVA422P16BE
]= { 1, 1 },
128 [AV_PIX_FMT_YUVA422P16LE
]= { 1, 1 },
129 [AV_PIX_FMT_YUVA444P16BE
]= { 1, 1 },
130 [AV_PIX_FMT_YUVA444P16LE
]= { 1, 1 },
131 [AV_PIX_FMT_RGB48BE
] = { 1, 1 },
132 [AV_PIX_FMT_RGB48LE
] = { 1, 1 },
133 [AV_PIX_FMT_RGB565BE
] = { 1, 1 },
134 [AV_PIX_FMT_RGB565LE
] = { 1, 1 },
135 [AV_PIX_FMT_RGB555BE
] = { 1, 1 },
136 [AV_PIX_FMT_RGB555LE
] = { 1, 1 },
137 [AV_PIX_FMT_BGR565BE
] = { 1, 1 },
138 [AV_PIX_FMT_BGR565LE
] = { 1, 1 },
139 [AV_PIX_FMT_BGR555BE
] = { 1, 1 },
140 [AV_PIX_FMT_BGR555LE
] = { 1, 1 },
141 [AV_PIX_FMT_YUV420P16LE
] = { 1, 1 },
142 [AV_PIX_FMT_YUV420P16BE
] = { 1, 1 },
143 [AV_PIX_FMT_YUV422P16LE
] = { 1, 1 },
144 [AV_PIX_FMT_YUV422P16BE
] = { 1, 1 },
145 [AV_PIX_FMT_YUV444P16LE
] = { 1, 1 },
146 [AV_PIX_FMT_YUV444P16BE
] = { 1, 1 },
147 [AV_PIX_FMT_RGB444LE
] = { 1, 1 },
148 [AV_PIX_FMT_RGB444BE
] = { 1, 1 },
149 [AV_PIX_FMT_BGR444LE
] = { 1, 1 },
150 [AV_PIX_FMT_BGR444BE
] = { 1, 1 },
151 [AV_PIX_FMT_Y400A
] = { 1, 0 },
152 [AV_PIX_FMT_BGR48BE
] = { 1, 1 },
153 [AV_PIX_FMT_BGR48LE
] = { 1, 1 },
154 [AV_PIX_FMT_YUV420P9BE
] = { 1, 1 },
155 [AV_PIX_FMT_YUV420P9LE
] = { 1, 1 },
156 [AV_PIX_FMT_YUV420P10BE
] = { 1, 1 },
157 [AV_PIX_FMT_YUV420P10LE
] = { 1, 1 },
158 [AV_PIX_FMT_YUV422P9BE
] = { 1, 1 },
159 [AV_PIX_FMT_YUV422P9LE
] = { 1, 1 },
160 [AV_PIX_FMT_YUV422P10BE
] = { 1, 1 },
161 [AV_PIX_FMT_YUV422P10LE
] = { 1, 1 },
162 [AV_PIX_FMT_YUV444P9BE
] = { 1, 1 },
163 [AV_PIX_FMT_YUV444P9LE
] = { 1, 1 },
164 [AV_PIX_FMT_YUV444P10BE
] = { 1, 1 },
165 [AV_PIX_FMT_YUV444P10LE
] = { 1, 1 },
166 [AV_PIX_FMT_GBRP
] = { 1, 1 },
167 [AV_PIX_FMT_GBRP9LE
] = { 1, 1 },
168 [AV_PIX_FMT_GBRP9BE
] = { 1, 1 },
169 [AV_PIX_FMT_GBRP10LE
] = { 1, 1 },
170 [AV_PIX_FMT_GBRP10BE
] = { 1, 1 },
171 [AV_PIX_FMT_GBRP16LE
] = { 1, 0 },
172 [AV_PIX_FMT_GBRP16BE
] = { 1, 0 },
175 int sws_isSupportedInput(enum AVPixelFormat pix_fmt
)
177 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
178 format_entries
[pix_fmt
].is_supported_in
: 0;
181 int sws_isSupportedOutput(enum AVPixelFormat pix_fmt
)
183 return (unsigned)pix_fmt
< AV_PIX_FMT_NB
?
184 format_entries
[pix_fmt
].is_supported_out
: 0;
187 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
189 const char *sws_format_name(enum AVPixelFormat format
)
191 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(format
);
195 return "Unknown format";
198 static double getSplineCoeff(double a
, double b
, double c
, double d
,
202 return ((d
* dist
+ c
) * dist
+ b
) * dist
+ a
;
204 return getSplineCoeff(0.0,
205 b
+ 2.0 * c
+ 3.0 * d
,
207 -b
- 3.0 * c
- 6.0 * d
,
211 static int initFilter(int16_t **outFilter
, int32_t **filterPos
,
212 int *outFilterSize
, int xInc
, int srcW
, int dstW
,
213 int filterAlign
, int one
, int flags
, int cpu_flags
,
214 SwsVector
*srcFilter
, SwsVector
*dstFilter
,
215 double param
[2], int is_horizontal
)
221 int64_t *filter
= NULL
;
222 int64_t *filter2
= NULL
;
223 const int64_t fone
= 1LL << 54;
226 emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
228 // NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
229 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+ 3) * sizeof(**filterPos
), fail
);
231 if (FFABS(xInc
- 0x10000) < 10) { // unscaled
234 FF_ALLOCZ_OR_GOTO(NULL
, filter
,
235 dstW
* sizeof(*filter
) * filterSize
, fail
);
237 for (i
= 0; i
< dstW
; i
++) {
238 filter
[i
* filterSize
] = fone
;
241 } else if (flags
& SWS_POINT
) { // lame looking point sampling mode
245 FF_ALLOC_OR_GOTO(NULL
, filter
,
246 dstW
* sizeof(*filter
) * filterSize
, fail
);
248 xDstInSrc
= xInc
/ 2 - 0x8000;
249 for (i
= 0; i
< dstW
; i
++) {
250 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
252 (*filterPos
)[i
] = xx
;
256 } else if ((xInc
<= (1 << 16) && (flags
& SWS_AREA
)) ||
257 (flags
& SWS_FAST_BILINEAR
)) { // bilinear upscale
261 FF_ALLOC_OR_GOTO(NULL
, filter
,
262 dstW
* sizeof(*filter
) * filterSize
, fail
);
264 xDstInSrc
= xInc
/ 2 - 0x8000;
265 for (i
= 0; i
< dstW
; i
++) {
266 int xx
= (xDstInSrc
- ((filterSize
- 1) << 15) + (1 << 15)) >> 16;
269 (*filterPos
)[i
] = xx
;
270 // bilinear upscale / linear interpolate / area averaging
271 for (j
= 0; j
< filterSize
; j
++) {
272 int64_t coeff
= fone
- FFABS((xx
<< 16) - xDstInSrc
) *
276 filter
[i
* filterSize
+ j
] = coeff
;
285 if (flags
& SWS_BICUBIC
)
287 else if (flags
& SWS_X
)
289 else if (flags
& SWS_AREA
)
290 sizeFactor
= 1; // downscale only, for upscale it is bilinear
291 else if (flags
& SWS_GAUSS
)
292 sizeFactor
= 8; // infinite ;)
293 else if (flags
& SWS_LANCZOS
)
294 sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2 * param
[0]) : 6;
295 else if (flags
& SWS_SINC
)
296 sizeFactor
= 20; // infinite ;)
297 else if (flags
& SWS_SPLINE
)
298 sizeFactor
= 20; // infinite ;)
299 else if (flags
& SWS_BILINEAR
)
302 sizeFactor
= 0; // GCC warning killer
307 filterSize
= 1 + sizeFactor
; // upscale
309 filterSize
= 1 + (sizeFactor
* srcW
+ dstW
- 1) / dstW
;
311 filterSize
= FFMIN(filterSize
, srcW
- 2);
312 filterSize
= FFMAX(filterSize
, 1);
314 FF_ALLOC_OR_GOTO(NULL
, filter
,
315 dstW
* sizeof(*filter
) * filterSize
, fail
);
317 xDstInSrc
= xInc
- 0x10000;
318 for (i
= 0; i
< dstW
; i
++) {
319 int xx
= (xDstInSrc
- ((filterSize
- 2) << 16)) / (1 << 17);
321 (*filterPos
)[i
] = xx
;
322 for (j
= 0; j
< filterSize
; j
++) {
323 int64_t d
= (FFABS(((int64_t)xx
<< 17) - xDstInSrc
)) << 13;
329 floatd
= d
* (1.0 / (1 << 30));
331 if (flags
& SWS_BICUBIC
) {
332 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1 << 24);
333 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1 << 24);
335 if (d
>= 1LL << 31) {
338 int64_t dd
= (d
* d
) >> 30;
339 int64_t ddd
= (dd
* d
) >> 30;
342 coeff
= (12 * (1 << 24) - 9 * B
- 6 * C
) * ddd
+
343 (-18 * (1 << 24) + 12 * B
+ 6 * C
) * dd
+
344 (6 * (1 << 24) - 2 * B
) * (1 << 30);
346 coeff
= (-B
- 6 * C
) * ddd
+
347 (6 * B
+ 30 * C
) * dd
+
348 (-12 * B
- 48 * C
) * d
+
349 (8 * B
+ 24 * C
) * (1 << 30);
351 coeff
*= fone
>> (30 + 24);
354 else if (flags
& SWS_X
) {
355 double p
= param
? param
* 0.01 : 0.3;
356 coeff
= d
? sin(d
* M_PI
) / (d
* M_PI
) : 1.0;
357 coeff
*= pow(2.0, -p
* d
* d
);
360 else if (flags
& SWS_X
) {
361 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
365 c
= cos(floatd
* M_PI
);
372 coeff
= (c
* 0.5 + 0.5) * fone
;
373 } else if (flags
& SWS_AREA
) {
374 int64_t d2
= d
- (1 << 29);
375 if (d2
* xInc
< -(1LL << (29 + 16)))
376 coeff
= 1.0 * (1LL << (30 + 16));
377 else if (d2
* xInc
< (1LL << (29 + 16)))
378 coeff
= -d2
* xInc
+ (1LL << (29 + 16));
381 coeff
*= fone
>> (30 + 16);
382 } else if (flags
& SWS_GAUSS
) {
383 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
384 coeff
= (pow(2.0, -p
* floatd
* floatd
)) * fone
;
385 } else if (flags
& SWS_SINC
) {
386 coeff
= (d
? sin(floatd
* M_PI
) / (floatd
* M_PI
) : 1.0) * fone
;
387 } else if (flags
& SWS_LANCZOS
) {
388 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
389 coeff
= (d
? sin(floatd
* M_PI
) * sin(floatd
* M_PI
/ p
) /
390 (floatd
* floatd
* M_PI
* M_PI
/ p
) : 1.0) * fone
;
393 } else if (flags
& SWS_BILINEAR
) {
394 coeff
= (1 << 30) - d
;
398 } else if (flags
& SWS_SPLINE
) {
399 double p
= -2.196152422706632;
400 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
- 1.0, floatd
) * fone
;
402 coeff
= 0.0; // GCC warning killer
406 filter
[i
* filterSize
+ j
] = coeff
;
409 xDstInSrc
+= 2 * xInc
;
413 /* apply src & dst Filter to filter -> filter2
416 assert(filterSize
> 0);
417 filter2Size
= filterSize
;
419 filter2Size
+= srcFilter
->length
- 1;
421 filter2Size
+= dstFilter
->length
- 1;
422 assert(filter2Size
> 0);
423 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
* dstW
* sizeof(*filter2
), fail
);
425 for (i
= 0; i
< dstW
; i
++) {
429 for (k
= 0; k
< srcFilter
->length
; k
++) {
430 for (j
= 0; j
< filterSize
; j
++)
431 filter2
[i
* filter2Size
+ k
+ j
] +=
432 srcFilter
->coeff
[k
] * filter
[i
* filterSize
+ j
];
435 for (j
= 0; j
< filterSize
; j
++)
436 filter2
[i
* filter2Size
+ j
] = filter
[i
* filterSize
+ j
];
440 (*filterPos
)[i
] += (filterSize
- 1) / 2 - (filter2Size
- 1) / 2;
444 /* try to reduce the filter-size (step1 find size and shift left) */
445 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
447 for (i
= dstW
- 1; i
>= 0; i
--) {
448 int min
= filter2Size
;
450 int64_t cutOff
= 0.0;
452 /* get rid of near zero elements on the left by shifting left */
453 for (j
= 0; j
< filter2Size
; j
++) {
455 cutOff
+= FFABS(filter2
[i
* filter2Size
]);
457 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
460 /* preserve monotonicity because the core can't handle the
461 * filter otherwise */
462 if (i
< dstW
- 1 && (*filterPos
)[i
] >= (*filterPos
)[i
+ 1])
465 // move filter coefficients left
466 for (k
= 1; k
< filter2Size
; k
++)
467 filter2
[i
* filter2Size
+ k
- 1] = filter2
[i
* filter2Size
+ k
];
468 filter2
[i
* filter2Size
+ k
- 1] = 0;
473 /* count near zeros on the right */
474 for (j
= filter2Size
- 1; j
> 0; j
--) {
475 cutOff
+= FFABS(filter2
[i
* filter2Size
+ j
]);
477 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
* fone
)
482 if (min
> minFilterSize
)
486 if (HAVE_ALTIVEC
&& cpu_flags
& AV_CPU_FLAG_ALTIVEC
) {
487 // we can handle the special case 4, so we don't want to go the full 8
488 if (minFilterSize
< 5)
491 /* We really don't want to waste our time doing useless computation, so
492 * fall back on the scalar C code for very small filters.
493 * Vectorizing is worth it only if you have a decent-sized vector. */
494 if (minFilterSize
< 3)
498 if (INLINE_MMX(cpu_flags
)) {
499 // special case for unscaled vertical filtering
500 if (minFilterSize
== 1 && filterAlign
== 2)
504 assert(minFilterSize
> 0);
505 filterSize
= (minFilterSize
+ (filterAlign
- 1)) & (~(filterAlign
- 1));
506 assert(filterSize
> 0);
507 filter
= av_malloc(filterSize
* dstW
* sizeof(*filter
));
508 if (filterSize
>= MAX_FILTER_SIZE
* 16 /
509 ((flags
& SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
511 *outFilterSize
= filterSize
;
513 if (flags
& SWS_PRINT_INFO
)
514 av_log(NULL
, AV_LOG_VERBOSE
,
515 "SwScaler: reducing / aligning filtersize %d -> %d\n",
516 filter2Size
, filterSize
);
517 /* try to reduce the filter-size (step2 reduce it) */
518 for (i
= 0; i
< dstW
; i
++) {
521 for (j
= 0; j
< filterSize
; j
++) {
522 if (j
>= filter2Size
)
523 filter
[i
* filterSize
+ j
] = 0;
525 filter
[i
* filterSize
+ j
] = filter2
[i
* filter2Size
+ j
];
526 if ((flags
& SWS_BITEXACT
) && j
>= minFilterSize
)
527 filter
[i
* filterSize
+ j
] = 0;
531 // FIXME try to align filterPos if possible
535 for (i
= 0; i
< dstW
; i
++) {
537 if ((*filterPos
)[i
] < 0) {
538 // move filter coefficients left to compensate for filterPos
539 for (j
= 1; j
< filterSize
; j
++) {
540 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
541 filter
[i
* filterSize
+ left
] += filter
[i
* filterSize
+ j
];
542 filter
[i
* filterSize
+ j
] = 0;
547 if ((*filterPos
)[i
] + filterSize
> srcW
) {
548 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
549 // move filter coefficients right to compensate for filterPos
550 for (j
= filterSize
- 2; j
>= 0; j
--) {
551 int right
= FFMIN(j
+ shift
, filterSize
- 1);
552 filter
[i
* filterSize
+ right
] += filter
[i
* filterSize
+ j
];
553 filter
[i
* filterSize
+ j
] = 0;
555 (*filterPos
)[i
] = srcW
- filterSize
;
560 // Note the +1 is for the MMX scaler which reads over the end
561 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
562 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
,
563 *outFilterSize
* (dstW
+ 3) * sizeof(int16_t), fail
);
565 /* normalize & store in outFilter */
566 for (i
= 0; i
< dstW
; i
++) {
571 for (j
= 0; j
< filterSize
; j
++) {
572 sum
+= filter
[i
* filterSize
+ j
];
574 sum
= (sum
+ one
/ 2) / one
;
575 for (j
= 0; j
< *outFilterSize
; j
++) {
576 int64_t v
= filter
[i
* filterSize
+ j
] + error
;
577 int intV
= ROUNDED_DIV(v
, sum
);
578 (*outFilter
)[i
* (*outFilterSize
) + j
] = intV
;
579 error
= v
- intV
* sum
;
583 (*filterPos
)[dstW
+ 0] =
584 (*filterPos
)[dstW
+ 1] =
585 (*filterPos
)[dstW
+ 2] = (*filterPos
)[dstW
- 1]; /* the MMX/SSE scaler will
586 * read over the end */
587 for (i
= 0; i
< *outFilterSize
; i
++) {
588 int k
= (dstW
- 1) * (*outFilterSize
) + i
;
589 (*outFilter
)[k
+ 1 * (*outFilterSize
)] =
590 (*outFilter
)[k
+ 2 * (*outFilterSize
)] =
591 (*outFilter
)[k
+ 3 * (*outFilterSize
)] = (*outFilter
)[k
];
602 #if HAVE_MMXEXT_INLINE
603 static int init_hscaler_mmxext(int dstW
, int xInc
, uint8_t *filterCode
,
604 int16_t *filter
, int32_t *filterPos
,
608 x86_reg imm8OfPShufW1A
;
609 x86_reg imm8OfPShufW2A
;
610 x86_reg fragmentLengthA
;
612 x86_reg imm8OfPShufW1B
;
613 x86_reg imm8OfPShufW2B
;
614 x86_reg fragmentLengthB
;
619 // create an optimized horizontal scaling routine
620 /* This scaler is made of runtime-generated MMXEXT code using specially tuned
621 * pshufw instructions. For every four output pixels, if four input pixels
622 * are enough for the fast bilinear scaling, then a chunk of fragmentB is
623 * used. If five input pixels are needed, then a chunk of fragmentA is used.
632 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
633 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
634 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
635 "punpcklbw %%mm7, %%mm1 \n\t"
636 "punpcklbw %%mm7, %%mm0 \n\t"
637 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
639 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
641 "psubw %%mm1, %%mm0 \n\t"
642 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
643 "pmullw %%mm3, %%mm0 \n\t"
644 "psllw $7, %%mm1 \n\t"
645 "paddw %%mm1, %%mm0 \n\t"
647 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
649 "add $8, %%"REG_a
" \n\t"
653 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
654 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
655 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
660 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
664 : "=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
665 "=r" (fragmentLengthA
)
672 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
673 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
674 "punpcklbw %%mm7, %%mm0 \n\t"
675 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
677 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
679 "psubw %%mm1, %%mm0 \n\t"
680 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
681 "pmullw %%mm3, %%mm0 \n\t"
682 "psllw $7, %%mm1 \n\t"
683 "paddw %%mm1, %%mm0 \n\t"
685 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
687 "add $8, %%"REG_a
" \n\t"
691 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
692 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
693 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
698 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
702 : "=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
703 "=r" (fragmentLengthB
)
706 xpos
= 0; // lumXInc/2 - 0x8000; // difference between pixel centers
709 for (i
= 0; i
< dstW
/ numSplits
; i
++) {
714 int b
= ((xpos
+ xInc
) >> 16) - xx
;
715 int c
= ((xpos
+ xInc
* 2) >> 16) - xx
;
716 int d
= ((xpos
+ xInc
* 3) >> 16) - xx
;
717 int inc
= (d
+ 1 < 4);
718 uint8_t *fragment
= (d
+ 1 < 4) ? fragmentB
: fragmentA
;
719 x86_reg imm8OfPShufW1
= (d
+ 1 < 4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
720 x86_reg imm8OfPShufW2
= (d
+ 1 < 4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
721 x86_reg fragmentLength
= (d
+ 1 < 4) ? fragmentLengthB
: fragmentLengthA
;
722 int maxShift
= 3 - (d
+ inc
);
726 filter
[i
] = ((xpos
& 0xFFFF) ^ 0xFFFF) >> 9;
727 filter
[i
+ 1] = (((xpos
+ xInc
) & 0xFFFF) ^ 0xFFFF) >> 9;
728 filter
[i
+ 2] = (((xpos
+ xInc
* 2) & 0xFFFF) ^ 0xFFFF) >> 9;
729 filter
[i
+ 3] = (((xpos
+ xInc
* 3) & 0xFFFF) ^ 0xFFFF) >> 9;
730 filterPos
[i
/ 2] = xx
;
732 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
734 filterCode
[fragmentPos
+ imm8OfPShufW1
] = (a
+ inc
) |
738 filterCode
[fragmentPos
+ imm8OfPShufW2
] = a
| (b
<< 2) |
742 if (i
+ 4 - inc
>= dstW
)
743 shift
= maxShift
; // avoid overread
744 else if ((filterPos
[i
/ 2] & 3) <= maxShift
)
745 shift
= filterPos
[i
/ 2] & 3; // align
747 if (shift
&& i
>= shift
) {
748 filterCode
[fragmentPos
+ imm8OfPShufW1
] += 0x55 * shift
;
749 filterCode
[fragmentPos
+ imm8OfPShufW2
] += 0x55 * shift
;
750 filterPos
[i
/ 2] -= shift
;
754 fragmentPos
+= fragmentLength
;
757 filterCode
[fragmentPos
] = RET
;
762 filterPos
[((i
/ 2) + 1) & (~1)] = xpos
>> 16; // needed to jump to the next part
764 return fragmentPos
+ 1;
766 #endif /* HAVE_MMXEXT_INLINE */
768 static void getSubSampleFactors(int *h
, int *v
, enum AVPixelFormat format
)
770 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(format
);
771 *h
= desc
->log2_chroma_w
;
772 *v
= desc
->log2_chroma_h
;
775 int sws_setColorspaceDetails(struct SwsContext
*c
, const int inv_table
[4],
776 int srcRange
, const int table
[4], int dstRange
,
777 int brightness
, int contrast
, int saturation
)
779 const AVPixFmtDescriptor
*desc_dst
= av_pix_fmt_desc_get(c
->dstFormat
);
780 const AVPixFmtDescriptor
*desc_src
= av_pix_fmt_desc_get(c
->srcFormat
);
781 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int) * 4);
782 memcpy(c
->dstColorspaceTable
, table
, sizeof(int) * 4);
784 c
->brightness
= brightness
;
785 c
->contrast
= contrast
;
786 c
->saturation
= saturation
;
787 c
->srcRange
= srcRange
;
788 c
->dstRange
= dstRange
;
789 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
))
792 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
793 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
795 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
,
796 contrast
, saturation
);
799 if (HAVE_ALTIVEC
&& av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC
)
800 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
,
801 contrast
, saturation
);
805 int sws_getColorspaceDetails(struct SwsContext
*c
, int **inv_table
,
806 int *srcRange
, int **table
, int *dstRange
,
807 int *brightness
, int *contrast
, int *saturation
)
809 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
))
812 *inv_table
= c
->srcColorspaceTable
;
813 *table
= c
->dstColorspaceTable
;
814 *srcRange
= c
->srcRange
;
815 *dstRange
= c
->dstRange
;
816 *brightness
= c
->brightness
;
817 *contrast
= c
->contrast
;
818 *saturation
= c
->saturation
;
823 static int handle_jpeg(enum AVPixelFormat
*format
)
826 case AV_PIX_FMT_YUVJ420P
:
827 *format
= AV_PIX_FMT_YUV420P
;
829 case AV_PIX_FMT_YUVJ422P
:
830 *format
= AV_PIX_FMT_YUV422P
;
832 case AV_PIX_FMT_YUVJ444P
:
833 *format
= AV_PIX_FMT_YUV444P
;
835 case AV_PIX_FMT_YUVJ440P
:
836 *format
= AV_PIX_FMT_YUV440P
;
843 SwsContext
*sws_alloc_context(void)
845 SwsContext
*c
= av_mallocz(sizeof(SwsContext
));
848 c
->av_class
= &sws_context_class
;
849 av_opt_set_defaults(c
);
855 av_cold
int sws_init_context(SwsContext
*c
, SwsFilter
*srcFilter
,
856 SwsFilter
*dstFilter
)
859 int usesVFilter
, usesHFilter
;
861 SwsFilter dummyFilter
= { NULL
, NULL
, NULL
, NULL
};
866 int dst_stride
= FFALIGN(dstW
* sizeof(int16_t) + 16, 16);
867 int dst_stride_px
= dst_stride
>> 1;
868 int flags
, cpu_flags
;
869 enum AVPixelFormat srcFormat
= c
->srcFormat
;
870 enum AVPixelFormat dstFormat
= c
->dstFormat
;
871 const AVPixFmtDescriptor
*desc_src
= av_pix_fmt_desc_get(srcFormat
);
872 const AVPixFmtDescriptor
*desc_dst
= av_pix_fmt_desc_get(dstFormat
);
874 cpu_flags
= av_get_cpu_flags();
880 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
882 if (!sws_isSupportedInput(srcFormat
)) {
883 av_log(c
, AV_LOG_ERROR
, "%s is not supported as input pixel format\n",
884 sws_format_name(srcFormat
));
885 return AVERROR(EINVAL
);
887 if (!sws_isSupportedOutput(dstFormat
)) {
888 av_log(c
, AV_LOG_ERROR
, "%s is not supported as output pixel format\n",
889 sws_format_name(dstFormat
));
890 return AVERROR(EINVAL
);
893 i
= flags
& (SWS_POINT
|
904 if (!i
|| (i
& (i
- 1))) {
905 av_log(c
, AV_LOG_ERROR
,
906 "Exactly one scaler algorithm must be chosen\n");
907 return AVERROR(EINVAL
);
910 if (srcW
< 4 || srcH
< 1 || dstW
< 8 || dstH
< 1) {
911 /* FIXME check if these are enough and try to lower them after
912 * fixing the relevant parts of the code */
913 av_log(c
, AV_LOG_ERROR
, "%dx%d -> %dx%d is invalid scaling dimension\n",
914 srcW
, srcH
, dstW
, dstH
);
915 return AVERROR(EINVAL
);
919 dstFilter
= &dummyFilter
;
921 srcFilter
= &dummyFilter
;
923 c
->lumXInc
= (((int64_t)srcW
<< 16) + (dstW
>> 1)) / dstW
;
924 c
->lumYInc
= (((int64_t)srcH
<< 16) + (dstH
>> 1)) / dstH
;
925 c
->dstFormatBpp
= av_get_bits_per_pixel(desc_dst
);
926 c
->srcFormatBpp
= av_get_bits_per_pixel(desc_src
);
927 c
->vRounder
= 4 * 0x0001000100010001ULL
;
929 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
> 1) ||
930 (srcFilter
->chrV
&& srcFilter
->chrV
->length
> 1) ||
931 (dstFilter
->lumV
&& dstFilter
->lumV
->length
> 1) ||
932 (dstFilter
->chrV
&& dstFilter
->chrV
->length
> 1);
933 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
> 1) ||
934 (srcFilter
->chrH
&& srcFilter
->chrH
->length
> 1) ||
935 (dstFilter
->lumH
&& dstFilter
->lumH
->length
> 1) ||
936 (dstFilter
->chrH
&& dstFilter
->chrH
->length
> 1);
938 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
939 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
941 if (isPlanarRGB(dstFormat
)) {
942 if (!(flags
& SWS_FULL_CHR_H_INT
)) {
943 av_log(c
, AV_LOG_DEBUG
,
944 "%s output is not supported with half chroma resolution, switching to full\n",
945 av_get_pix_fmt_name(dstFormat
));
946 flags
|= SWS_FULL_CHR_H_INT
;
951 /* reuse chroma for 2 pixels RGB/BGR unless user wants full
952 * chroma interpolation */
953 if (flags
& SWS_FULL_CHR_H_INT
&&
954 isAnyRGB(dstFormat
) &&
955 !isPlanarRGB(dstFormat
) &&
956 dstFormat
!= AV_PIX_FMT_RGBA
&&
957 dstFormat
!= AV_PIX_FMT_ARGB
&&
958 dstFormat
!= AV_PIX_FMT_BGRA
&&
959 dstFormat
!= AV_PIX_FMT_ABGR
&&
960 dstFormat
!= AV_PIX_FMT_RGB24
&&
961 dstFormat
!= AV_PIX_FMT_BGR24
) {
962 av_log(c
, AV_LOG_ERROR
,
963 "full chroma interpolation for destination format '%s' not yet implemented\n",
964 sws_format_name(dstFormat
));
965 flags
&= ~SWS_FULL_CHR_H_INT
;
968 if (isAnyRGB(dstFormat
) && !(flags
& SWS_FULL_CHR_H_INT
))
969 c
->chrDstHSubSample
= 1;
971 // drop some chroma lines if the user wants it
972 c
->vChrDrop
= (flags
& SWS_SRC_V_CHR_DROP_MASK
) >>
973 SWS_SRC_V_CHR_DROP_SHIFT
;
974 c
->chrSrcVSubSample
+= c
->vChrDrop
;
976 /* drop every other pixel for chroma calculation unless user
977 * wants full chroma */
978 if (isAnyRGB(srcFormat
) && !(flags
& SWS_FULL_CHR_H_INP
) &&
979 srcFormat
!= AV_PIX_FMT_RGB8
&& srcFormat
!= AV_PIX_FMT_BGR8
&&
980 srcFormat
!= AV_PIX_FMT_RGB4
&& srcFormat
!= AV_PIX_FMT_BGR4
&&
981 srcFormat
!= AV_PIX_FMT_RGB4_BYTE
&& srcFormat
!= AV_PIX_FMT_BGR4_BYTE
&&
982 srcFormat
!= AV_PIX_FMT_GBRP9BE
&& srcFormat
!= AV_PIX_FMT_GBRP9LE
&&
983 srcFormat
!= AV_PIX_FMT_GBRP10BE
&& srcFormat
!= AV_PIX_FMT_GBRP10LE
&&
984 srcFormat
!= AV_PIX_FMT_GBRP16BE
&& srcFormat
!= AV_PIX_FMT_GBRP16LE
&&
985 ((dstW
>> c
->chrDstHSubSample
) <= (srcW
>> 1) ||
986 (flags
& SWS_FAST_BILINEAR
)))
987 c
->chrSrcHSubSample
= 1;
989 // Note the -((-x)>>y) is so that we always round toward +inf.
990 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
991 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
992 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
993 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
995 /* unscaled special cases */
996 if (unscaled
&& !usesHFilter
&& !usesVFilter
&&
997 (c
->srcRange
== c
->dstRange
|| isAnyRGB(dstFormat
))) {
998 ff_get_unscaled_swscale(c
);
1001 if (flags
& SWS_PRINT_INFO
)
1002 av_log(c
, AV_LOG_INFO
,
1003 "using unscaled %s -> %s special converter\n",
1004 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
1009 c
->srcBpc
= 1 + desc_src
->comp
[0].depth_minus1
;
1012 c
->dstBpc
= 1 + desc_dst
->comp
[0].depth_minus1
;
1015 if (c
->dstBpc
== 16)
1017 FF_ALLOC_OR_GOTO(c
, c
->formatConvBuffer
,
1018 (FFALIGN(srcW
, 16) * 2 * FFALIGN(c
->srcBpc
, 8) >> 3) + 16,
1020 if (INLINE_MMXEXT(cpu_flags
) && c
->srcBpc
== 8 && c
->dstBpc
<= 10) {
1021 c
->canMMXEXTBeUsed
= (dstW
>= srcW
&& (dstW
& 31) == 0 &&
1022 (srcW
& 15) == 0) ? 1 : 0;
1023 if (!c
->canMMXEXTBeUsed
&& dstW
>= srcW
&& (srcW
& 15) == 0
1024 && (flags
& SWS_FAST_BILINEAR
)) {
1025 if (flags
& SWS_PRINT_INFO
)
1026 av_log(c
, AV_LOG_INFO
,
1027 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1030 c
->canMMXEXTBeUsed
= 0;
1032 c
->canMMXEXTBeUsed
= 0;
1034 c
->chrXInc
= (((int64_t)c
->chrSrcW
<< 16) + (c
->chrDstW
>> 1)) / c
->chrDstW
;
1035 c
->chrYInc
= (((int64_t)c
->chrSrcH
<< 16) + (c
->chrDstH
>> 1)) / c
->chrDstH
;
1037 /* Match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src
1038 * to pixel n-2 of dst, but only for the FAST_BILINEAR mode otherwise do
1040 * n-2 is the last chrominance sample available.
1041 * This is not perfect, but no one should notice the difference, the more
1042 * correct variant would be like the vertical one, but that would require
1043 * some special code for the first and last pixel */
1044 if (flags
& SWS_FAST_BILINEAR
) {
1045 if (c
->canMMXEXTBeUsed
) {
1049 // we don't use the x86 asm scaler if MMX is available
1050 else if (INLINE_MMX(cpu_flags
)) {
1051 c
->lumXInc
= ((int64_t)(srcW
- 2) << 16) / (dstW
- 2) - 20;
1052 c
->chrXInc
= ((int64_t)(c
->chrSrcW
- 2) << 16) / (c
->chrDstW
- 2) - 20;
1056 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1058 /* precalculate horizontal scaler filter coefficients */
1060 #if HAVE_MMXEXT_INLINE
1061 // can't downscale !!!
1062 if (c
->canMMXEXTBeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
1063 c
->lumMmxextFilterCodeSize
= init_hscaler_mmxext(dstW
, c
->lumXInc
, NULL
,
1065 c
->chrMmxextFilterCodeSize
= init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
,
1066 NULL
, NULL
, NULL
, 4);
1069 c
->lumMmxextFilterCode
= mmap(NULL
, c
->lumMmxextFilterCodeSize
,
1070 PROT_READ
| PROT_WRITE
,
1071 MAP_PRIVATE
| MAP_ANONYMOUS
,
1073 c
->chrMmxextFilterCode
= mmap(NULL
, c
->chrMmxextFilterCodeSize
,
1074 PROT_READ
| PROT_WRITE
,
1075 MAP_PRIVATE
| MAP_ANONYMOUS
,
1077 #elif HAVE_VIRTUALALLOC
1078 c
->lumMmxextFilterCode
= VirtualAlloc(NULL
,
1079 c
->lumMmxextFilterCodeSize
,
1081 PAGE_EXECUTE_READWRITE
);
1082 c
->chrMmxextFilterCode
= VirtualAlloc(NULL
,
1083 c
->chrMmxextFilterCodeSize
,
1085 PAGE_EXECUTE_READWRITE
);
1087 c
->lumMmxextFilterCode
= av_malloc(c
->lumMmxextFilterCodeSize
);
1088 c
->chrMmxextFilterCode
= av_malloc(c
->chrMmxextFilterCodeSize
);
1091 if (!c
->lumMmxextFilterCode
|| !c
->chrMmxextFilterCode
)
1092 return AVERROR(ENOMEM
);
1093 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/ 8 + 8) * sizeof(int16_t), fail
);
1094 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/ 4 + 8) * sizeof(int16_t), fail
);
1095 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/ 2 / 8 + 8) * sizeof(int32_t), fail
);
1096 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/ 2 / 4 + 8) * sizeof(int32_t), fail
);
1098 init_hscaler_mmxext(dstW
, c
->lumXInc
, c
->lumMmxextFilterCode
,
1099 c
->hLumFilter
, c
->hLumFilterPos
, 8);
1100 init_hscaler_mmxext(c
->chrDstW
, c
->chrXInc
, c
->chrMmxextFilterCode
,
1101 c
->hChrFilter
, c
->hChrFilterPos
, 4);
1104 mprotect(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
);
1105 mprotect(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
, PROT_EXEC
| PROT_READ
);
1108 #endif /* HAVE_MMXEXT_INLINE */
1110 const int filterAlign
=
1111 (HAVE_MMX
&& cpu_flags
& AV_CPU_FLAG_MMX
) ? 4 :
1112 (HAVE_ALTIVEC
&& cpu_flags
& AV_CPU_FLAG_ALTIVEC
) ? 8 :
1115 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
,
1116 &c
->hLumFilterSize
, c
->lumXInc
,
1117 srcW
, dstW
, filterAlign
, 1 << 14,
1118 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1119 cpu_flags
, srcFilter
->lumH
, dstFilter
->lumH
,
1122 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
,
1123 &c
->hChrFilterSize
, c
->chrXInc
,
1124 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1 << 14,
1125 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1126 cpu_flags
, srcFilter
->chrH
, dstFilter
->chrH
,
1130 } // initialize horizontal stuff
1132 /* precalculate vertical scaler filter coefficients */
1134 const int filterAlign
=
1135 (HAVE_MMX
&& cpu_flags
& AV_CPU_FLAG_MMX
) ? 2 :
1136 (HAVE_ALTIVEC
&& cpu_flags
& AV_CPU_FLAG_ALTIVEC
) ? 8 :
1139 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
,
1140 c
->lumYInc
, srcH
, dstH
, filterAlign
, (1 << 12),
1141 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BICUBIC
) : flags
,
1142 cpu_flags
, srcFilter
->lumV
, dstFilter
->lumV
,
1145 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
,
1146 c
->chrYInc
, c
->chrSrcH
, c
->chrDstH
,
1147 filterAlign
, (1 << 12),
1148 (flags
& SWS_BICUBLIN
) ? (flags
| SWS_BILINEAR
) : flags
,
1149 cpu_flags
, srcFilter
->chrV
, dstFilter
->chrV
,
1154 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof(vector
signed short) * c
->vLumFilterSize
* c
->dstH
, fail
);
1155 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof(vector
signed short) * c
->vChrFilterSize
* c
->chrDstH
, fail
);
1157 for (i
= 0; i
< c
->vLumFilterSize
* c
->dstH
; i
++) {
1159 short *p
= (short *)&c
->vYCoeffsBank
[i
];
1160 for (j
= 0; j
< 8; j
++)
1161 p
[j
] = c
->vLumFilter
[i
];
1164 for (i
= 0; i
< c
->vChrFilterSize
* c
->chrDstH
; i
++) {
1166 short *p
= (short *)&c
->vCCoeffsBank
[i
];
1167 for (j
= 0; j
< 8; j
++)
1168 p
[j
] = c
->vChrFilter
[i
];
1173 // calculate buffer sizes so that they won't run out while handling these damn slices
1174 c
->vLumBufSize
= c
->vLumFilterSize
;
1175 c
->vChrBufSize
= c
->vChrFilterSize
;
1176 for (i
= 0; i
< dstH
; i
++) {
1177 int chrI
= (int64_t)i
* c
->chrDstH
/ dstH
;
1178 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
1179 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)
1180 << c
->chrSrcVSubSample
));
1182 nextSlice
>>= c
->chrSrcVSubSample
;
1183 nextSlice
<<= c
->chrSrcVSubSample
;
1184 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1185 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1186 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
<
1187 (nextSlice
>> c
->chrSrcVSubSample
))
1188 c
->vChrBufSize
= (nextSlice
>> c
->chrSrcVSubSample
) -
1189 c
->vChrFilterPos
[chrI
];
1192 /* Allocate pixbufs (we use dynamic allocation because otherwise we would
1193 * need to allocate several megabytes to handle all possible cases) */
1194 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1195 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1196 FF_ALLOC_OR_GOTO(c
, c
->chrVPixBuf
, c
->vChrBufSize
* 3 * sizeof(int16_t *), fail
);
1197 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1198 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
* 3 * sizeof(int16_t *), fail
);
1199 /* Note we need at least one pixel more at the end because of the MMX code
1200 * (just in case someone wants to replace the 4000/8000). */
1201 /* align at 16 bytes for AltiVec */
1202 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1203 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+ c
->vLumBufSize
],
1204 dst_stride
+ 16, fail
);
1205 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+ c
->vLumBufSize
];
1207 // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate)
1208 c
->uv_off_px
= dst_stride_px
+ 64 / (c
->dstBpc
& ~7);
1209 c
->uv_off_byte
= dst_stride
+ 16;
1210 for (i
= 0; i
< c
->vChrBufSize
; i
++) {
1211 FF_ALLOC_OR_GOTO(c
, c
->chrUPixBuf
[i
+ c
->vChrBufSize
],
1212 dst_stride
* 2 + 32, fail
);
1213 c
->chrUPixBuf
[i
] = c
->chrUPixBuf
[i
+ c
->vChrBufSize
];
1214 c
->chrVPixBuf
[i
] = c
->chrVPixBuf
[i
+ c
->vChrBufSize
]
1215 = c
->chrUPixBuf
[i
] + (dst_stride
>> 1) + 8;
1217 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1218 for (i
= 0; i
< c
->vLumBufSize
; i
++) {
1219 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+ c
->vLumBufSize
],
1220 dst_stride
+ 16, fail
);
1221 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+ c
->vLumBufSize
];
1224 // try to avoid drawing green stuff between the right end and the stride end
1225 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1226 memset(c
->chrUPixBuf
[i
], 64, dst_stride
* 2 + 1);
1228 assert(c
->chrDstH
<= dstH
);
1230 if (flags
& SWS_PRINT_INFO
) {
1231 if (flags
& SWS_FAST_BILINEAR
)
1232 av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1233 else if (flags
& SWS_BILINEAR
)
1234 av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1235 else if (flags
& SWS_BICUBIC
)
1236 av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1237 else if (flags
& SWS_X
)
1238 av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1239 else if (flags
& SWS_POINT
)
1240 av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1241 else if (flags
& SWS_AREA
)
1242 av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1243 else if (flags
& SWS_BICUBLIN
)
1244 av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1245 else if (flags
& SWS_GAUSS
)
1246 av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1247 else if (flags
& SWS_SINC
)
1248 av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1249 else if (flags
& SWS_LANCZOS
)
1250 av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1251 else if (flags
& SWS_SPLINE
)
1252 av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1254 av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1256 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1257 sws_format_name(srcFormat
),
1259 dstFormat
== AV_PIX_FMT_BGR555
|| dstFormat
== AV_PIX_FMT_BGR565
||
1260 dstFormat
== AV_PIX_FMT_RGB444BE
|| dstFormat
== AV_PIX_FMT_RGB444LE
||
1261 dstFormat
== AV_PIX_FMT_BGR444BE
|| dstFormat
== AV_PIX_FMT_BGR444LE
?
1266 sws_format_name(dstFormat
));
1268 if (INLINE_MMXEXT(cpu_flags
))
1269 av_log(c
, AV_LOG_INFO
, "using MMXEXT\n");
1270 else if (INLINE_AMD3DNOW(cpu_flags
))
1271 av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1272 else if (INLINE_MMX(cpu_flags
))
1273 av_log(c
, AV_LOG_INFO
, "using MMX\n");
1274 else if (HAVE_ALTIVEC
&& cpu_flags
& AV_CPU_FLAG_ALTIVEC
)
1275 av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1277 av_log(c
, AV_LOG_INFO
, "using C\n");
1279 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1280 av_log(c
, AV_LOG_DEBUG
,
1281 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1282 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1283 av_log(c
, AV_LOG_DEBUG
,
1284 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1285 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
,
1286 c
->chrXInc
, c
->chrYInc
);
1289 c
->swScale
= ff_getSwsFunc(c
);
1291 fail
: // FIXME replace things by appropriate error codes
1295 #if FF_API_SWS_GETCONTEXT
1296 SwsContext
*sws_getContext(int srcW
, int srcH
, enum AVPixelFormat srcFormat
,
1297 int dstW
, int dstH
, enum AVPixelFormat dstFormat
,
1298 int flags
, SwsFilter
*srcFilter
,
1299 SwsFilter
*dstFilter
, const double *param
)
1303 if (!(c
= sws_alloc_context()))
1311 c
->srcRange
= handle_jpeg(&srcFormat
);
1312 c
->dstRange
= handle_jpeg(&dstFormat
);
1313 c
->srcFormat
= srcFormat
;
1314 c
->dstFormat
= dstFormat
;
1317 c
->param
[0] = param
[0];
1318 c
->param
[1] = param
[1];
1320 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], c
->srcRange
,
1321 ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/,
1322 c
->dstRange
, 0, 1 << 16, 1 << 16);
1324 if (sws_init_context(c
, srcFilter
, dstFilter
) < 0) {
1333 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1334 float lumaSharpen
, float chromaSharpen
,
1335 float chromaHShift
, float chromaVShift
,
1338 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1342 if (lumaGBlur
!= 0.0) {
1343 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1344 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1346 filter
->lumH
= sws_getIdentityVec();
1347 filter
->lumV
= sws_getIdentityVec();
1350 if (chromaGBlur
!= 0.0) {
1351 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1352 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1354 filter
->chrH
= sws_getIdentityVec();
1355 filter
->chrV
= sws_getIdentityVec();
1358 if (chromaSharpen
!= 0.0) {
1359 SwsVector
*id
= sws_getIdentityVec();
1360 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1361 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1362 sws_addVec(filter
->chrH
, id
);
1363 sws_addVec(filter
->chrV
, id
);
1367 if (lumaSharpen
!= 0.0) {
1368 SwsVector
*id
= sws_getIdentityVec();
1369 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1370 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1371 sws_addVec(filter
->lumH
, id
);
1372 sws_addVec(filter
->lumV
, id
);
1376 if (chromaHShift
!= 0.0)
1377 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+ 0.5));
1379 if (chromaVShift
!= 0.0)
1380 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+ 0.5));
1382 sws_normalizeVec(filter
->chrH
, 1.0);
1383 sws_normalizeVec(filter
->chrV
, 1.0);
1384 sws_normalizeVec(filter
->lumH
, 1.0);
1385 sws_normalizeVec(filter
->lumV
, 1.0);
1388 sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1390 sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1395 SwsVector
*sws_allocVec(int length
)
1397 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1400 vec
->length
= length
;
1401 vec
->coeff
= av_malloc(sizeof(double) * length
);
1407 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1409 const int length
= (int)(variance
* quality
+ 0.5) | 1;
1411 double middle
= (length
- 1) * 0.5;
1412 SwsVector
*vec
= sws_allocVec(length
);
1417 for (i
= 0; i
< length
; i
++) {
1418 double dist
= i
- middle
;
1419 vec
->coeff
[i
] = exp(-dist
* dist
/ (2 * variance
* variance
)) /
1420 sqrt(2 * variance
* M_PI
);
1423 sws_normalizeVec(vec
, 1.0);
1428 SwsVector
*sws_getConstVec(double c
, int length
)
1431 SwsVector
*vec
= sws_allocVec(length
);
1436 for (i
= 0; i
< length
; i
++)
1442 SwsVector
*sws_getIdentityVec(void)
1444 return sws_getConstVec(1.0, 1);
1447 static double sws_dcVec(SwsVector
*a
)
1452 for (i
= 0; i
< a
->length
; i
++)
1458 void sws_scaleVec(SwsVector
*a
, double scalar
)
1462 for (i
= 0; i
< a
->length
; i
++)
1463 a
->coeff
[i
] *= scalar
;
1466 void sws_normalizeVec(SwsVector
*a
, double height
)
1468 sws_scaleVec(a
, height
/ sws_dcVec(a
));
1471 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1473 int length
= a
->length
+ b
->length
- 1;
1475 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1480 for (i
= 0; i
< a
->length
; i
++) {
1481 for (j
= 0; j
< b
->length
; j
++) {
1482 vec
->coeff
[i
+ j
] += a
->coeff
[i
] * b
->coeff
[j
];
1489 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1491 int length
= FFMAX(a
->length
, b
->length
);
1493 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1498 for (i
= 0; i
< a
->length
; i
++)
1499 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1500 for (i
= 0; i
< b
->length
; i
++)
1501 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] += b
->coeff
[i
];
1506 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1508 int length
= FFMAX(a
->length
, b
->length
);
1510 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1515 for (i
= 0; i
< a
->length
; i
++)
1516 vec
->coeff
[i
+ (length
- 1) / 2 - (a
->length
- 1) / 2] += a
->coeff
[i
];
1517 for (i
= 0; i
< b
->length
; i
++)
1518 vec
->coeff
[i
+ (length
- 1) / 2 - (b
->length
- 1) / 2] -= b
->coeff
[i
];
1523 /* shift left / or right if "shift" is negative */
1524 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1526 int length
= a
->length
+ FFABS(shift
) * 2;
1528 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1533 for (i
= 0; i
< a
->length
; i
++) {
1534 vec
->coeff
[i
+ (length
- 1) / 2 -
1535 (a
->length
- 1) / 2 - shift
] = a
->coeff
[i
];
1541 void sws_shiftVec(SwsVector
*a
, int shift
)
1543 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1545 a
->coeff
= shifted
->coeff
;
1546 a
->length
= shifted
->length
;
1550 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1552 SwsVector
*sum
= sws_sumVec(a
, b
);
1554 a
->coeff
= sum
->coeff
;
1555 a
->length
= sum
->length
;
1559 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1561 SwsVector
*diff
= sws_diffVec(a
, b
);
1563 a
->coeff
= diff
->coeff
;
1564 a
->length
= diff
->length
;
1568 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1570 SwsVector
*conv
= sws_getConvVec(a
, b
);
1572 a
->coeff
= conv
->coeff
;
1573 a
->length
= conv
->length
;
1577 SwsVector
*sws_cloneVec(SwsVector
*a
)
1580 SwsVector
*vec
= sws_allocVec(a
->length
);
1585 for (i
= 0; i
< a
->length
; i
++)
1586 vec
->coeff
[i
] = a
->coeff
[i
];
1591 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1598 for (i
= 0; i
< a
->length
; i
++)
1599 if (a
->coeff
[i
] > max
)
1602 for (i
= 0; i
< a
->length
; i
++)
1603 if (a
->coeff
[i
] < min
)
1608 for (i
= 0; i
< a
->length
; i
++) {
1609 int x
= (int)((a
->coeff
[i
] - min
) * 60.0 / range
+ 0.5);
1610 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1612 av_log(log_ctx
, log_level
, " ");
1613 av_log(log_ctx
, log_level
, "|\n");
1617 void sws_freeVec(SwsVector
*a
)
1621 av_freep(&a
->coeff
);
1626 void sws_freeFilter(SwsFilter
*filter
)
1632 sws_freeVec(filter
->lumH
);
1634 sws_freeVec(filter
->lumV
);
1636 sws_freeVec(filter
->chrH
);
1638 sws_freeVec(filter
->chrV
);
1642 void sws_freeContext(SwsContext
*c
)
1649 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1650 av_freep(&c
->lumPixBuf
[i
]);
1651 av_freep(&c
->lumPixBuf
);
1654 if (c
->chrUPixBuf
) {
1655 for (i
= 0; i
< c
->vChrBufSize
; i
++)
1656 av_freep(&c
->chrUPixBuf
[i
]);
1657 av_freep(&c
->chrUPixBuf
);
1658 av_freep(&c
->chrVPixBuf
);
1661 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1662 for (i
= 0; i
< c
->vLumBufSize
; i
++)
1663 av_freep(&c
->alpPixBuf
[i
]);
1664 av_freep(&c
->alpPixBuf
);
1667 av_freep(&c
->vLumFilter
);
1668 av_freep(&c
->vChrFilter
);
1669 av_freep(&c
->hLumFilter
);
1670 av_freep(&c
->hChrFilter
);
1672 av_freep(&c
->vYCoeffsBank
);
1673 av_freep(&c
->vCCoeffsBank
);
1676 av_freep(&c
->vLumFilterPos
);
1677 av_freep(&c
->vChrFilterPos
);
1678 av_freep(&c
->hLumFilterPos
);
1679 av_freep(&c
->hChrFilterPos
);
1683 if (c
->lumMmxextFilterCode
)
1684 munmap(c
->lumMmxextFilterCode
, c
->lumMmxextFilterCodeSize
);
1685 if (c
->chrMmxextFilterCode
)
1686 munmap(c
->chrMmxextFilterCode
, c
->chrMmxextFilterCodeSize
);
1687 #elif HAVE_VIRTUALALLOC
1688 if (c
->lumMmxextFilterCode
)
1689 VirtualFree(c
->lumMmxextFilterCode
, 0, MEM_RELEASE
);
1690 if (c
->chrMmxextFilterCode
)
1691 VirtualFree(c
->chrMmxextFilterCode
, 0, MEM_RELEASE
);
1693 av_free(c
->lumMmxextFilterCode
);
1694 av_free(c
->chrMmxextFilterCode
);
1696 c
->lumMmxextFilterCode
= NULL
;
1697 c
->chrMmxextFilterCode
= NULL
;
1698 #endif /* HAVE_MMX_INLINE */
1700 av_freep(&c
->yuvTable
);
1701 av_free(c
->formatConvBuffer
);
1706 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
, int srcW
,
1707 int srcH
, enum AVPixelFormat srcFormat
,
1709 enum AVPixelFormat dstFormat
, int flags
,
1710 SwsFilter
*srcFilter
,
1711 SwsFilter
*dstFilter
,
1712 const double *param
)
1714 static const double default_param
[2] = { SWS_PARAM_DEFAULT
,
1715 SWS_PARAM_DEFAULT
};
1718 param
= default_param
;
1721 (context
->srcW
!= srcW
||
1722 context
->srcH
!= srcH
||
1723 context
->srcFormat
!= srcFormat
||
1724 context
->dstW
!= dstW
||
1725 context
->dstH
!= dstH
||
1726 context
->dstFormat
!= dstFormat
||
1727 context
->flags
!= flags
||
1728 context
->param
[0] != param
[0] ||
1729 context
->param
[1] != param
[1])) {
1730 sws_freeContext(context
);
1735 if (!(context
= sws_alloc_context()))
1737 context
->srcW
= srcW
;
1738 context
->srcH
= srcH
;
1739 context
->srcRange
= handle_jpeg(&srcFormat
);
1740 context
->srcFormat
= srcFormat
;
1741 context
->dstW
= dstW
;
1742 context
->dstH
= dstH
;
1743 context
->dstRange
= handle_jpeg(&dstFormat
);
1744 context
->dstFormat
= dstFormat
;
1745 context
->flags
= flags
;
1746 context
->param
[0] = param
[0];
1747 context
->param
[1] = param
[1];
1748 sws_setColorspaceDetails(context
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
],
1750 ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/,
1751 context
->dstRange
, 0, 1 << 16, 1 << 16);
1752 if (sws_init_context(context
, srcFilter
, dstFilter
) < 0) {
1753 sws_freeContext(context
);