3 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "simple_idct.h"
39 void ff_spatial_dwt(int *buffer
, int width
, int height
, int stride
, int type
, int decomposition_count
);
42 void vorbis_inverse_coupling(float *mag
, float *ang
, int blocksize
);
45 void ff_ac3_downmix_c(float (*samples
)[256], float (*matrix
)[2], int out_ch
, int in_ch
, int len
);
48 void ff_flac_compute_autocorr(const int32_t *data
, int len
, int lag
, double *autoc
);
51 void ff_add_png_paeth_prediction(uint8_t *dst
, uint8_t *src
, uint8_t *top
, int w
, int bpp
);
54 void ff_ea_idct_put_c(uint8_t *dest
, int linesize
, DCTELEM
*block
);
56 uint8_t ff_cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
57 uint32_t ff_squareTbl
[512] = {0, };
59 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
60 #define pb_7f (~0UL/255 * 0x7f)
61 #define pb_80 (~0UL/255 * 0x80)
63 const uint8_t ff_zigzag_direct
[64] = {
64 0, 1, 8, 16, 9, 2, 3, 10,
65 17, 24, 32, 25, 18, 11, 4, 5,
66 12, 19, 26, 33, 40, 48, 41, 34,
67 27, 20, 13, 6, 7, 14, 21, 28,
68 35, 42, 49, 56, 57, 50, 43, 36,
69 29, 22, 15, 23, 30, 37, 44, 51,
70 58, 59, 52, 45, 38, 31, 39, 46,
71 53, 60, 61, 54, 47, 55, 62, 63
74 /* Specific zigzag scan for 248 idct. NOTE that unlike the
75 specification, we interleave the fields */
76 const uint8_t ff_zigzag248_direct
[64] = {
77 0, 8, 1, 9, 16, 24, 2, 10,
78 17, 25, 32, 40, 48, 56, 33, 41,
79 18, 26, 3, 11, 4, 12, 19, 27,
80 34, 42, 49, 57, 50, 58, 35, 43,
81 20, 28, 5, 13, 6, 14, 21, 29,
82 36, 44, 51, 59, 52, 60, 37, 45,
83 22, 30, 7, 15, 23, 31, 38, 46,
84 53, 61, 54, 62, 39, 47, 55, 63,
87 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
88 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16
[64]) = {0, };
90 const uint8_t ff_alternate_horizontal_scan
[64] = {
91 0, 1, 2, 3, 8, 9, 16, 17,
92 10, 11, 4, 5, 6, 7, 15, 14,
93 13, 12, 19, 18, 24, 25, 32, 33,
94 26, 27, 20, 21, 22, 23, 28, 29,
95 30, 31, 34, 35, 40, 41, 48, 49,
96 42, 43, 36, 37, 38, 39, 44, 45,
97 46, 47, 50, 51, 56, 57, 58, 59,
98 52, 53, 54, 55, 60, 61, 62, 63,
101 const uint8_t ff_alternate_vertical_scan
[64] = {
102 0, 8, 16, 24, 1, 9, 2, 10,
103 17, 25, 32, 40, 48, 56, 57, 49,
104 41, 33, 26, 18, 3, 11, 4, 12,
105 19, 27, 34, 42, 50, 58, 35, 43,
106 51, 59, 20, 28, 5, 13, 6, 14,
107 21, 29, 36, 44, 52, 60, 37, 45,
108 53, 61, 22, 30, 7, 15, 23, 31,
109 38, 46, 54, 62, 39, 47, 55, 63,
112 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
113 const uint32_t ff_inverse
[256]={
114 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
115 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
116 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
117 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
118 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
119 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
120 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
121 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
122 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
123 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
124 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
125 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
126 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
127 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
128 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
129 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
130 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
131 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
132 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
133 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
134 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
135 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
136 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
137 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
138 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
139 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
140 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
141 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
142 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
143 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
144 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
145 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
148 /* Input permutation for the simple_idct_mmx */
149 static const uint8_t simple_mmx_permutation
[64]={
150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
160 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
162 void ff_init_scantable(uint8_t *permutation
, ScanTable
*st
, const uint8_t *src_scantable
){
166 st
->scantable
= src_scantable
;
170 j
= src_scantable
[i
];
171 st
->permutated
[i
] = permutation
[j
];
180 j
= st
->permutated
[i
];
182 st
->raster_end
[i
]= end
;
186 static int pix_sum_c(uint8_t * pix
, int line_size
)
191 for (i
= 0; i
< 16; i
++) {
192 for (j
= 0; j
< 16; j
+= 8) {
203 pix
+= line_size
- 16;
208 static int pix_norm1_c(uint8_t * pix
, int line_size
)
211 uint32_t *sq
= ff_squareTbl
+ 256;
214 for (i
= 0; i
< 16; i
++) {
215 for (j
= 0; j
< 16; j
+= 8) {
226 #if LONG_MAX > 2147483647
227 register uint64_t x
=*(uint64_t*)pix
;
229 s
+= sq
[(x
>>8)&0xff];
230 s
+= sq
[(x
>>16)&0xff];
231 s
+= sq
[(x
>>24)&0xff];
232 s
+= sq
[(x
>>32)&0xff];
233 s
+= sq
[(x
>>40)&0xff];
234 s
+= sq
[(x
>>48)&0xff];
235 s
+= sq
[(x
>>56)&0xff];
237 register uint32_t x
=*(uint32_t*)pix
;
239 s
+= sq
[(x
>>8)&0xff];
240 s
+= sq
[(x
>>16)&0xff];
241 s
+= sq
[(x
>>24)&0xff];
242 x
=*(uint32_t*)(pix
+4);
244 s
+= sq
[(x
>>8)&0xff];
245 s
+= sq
[(x
>>16)&0xff];
246 s
+= sq
[(x
>>24)&0xff];
251 pix
+= line_size
- 16;
256 static void bswap_buf(uint32_t *dst
, const uint32_t *src
, int w
){
259 for(i
=0; i
+8<=w
; i
+=8){
260 dst
[i
+0]= bswap_32(src
[i
+0]);
261 dst
[i
+1]= bswap_32(src
[i
+1]);
262 dst
[i
+2]= bswap_32(src
[i
+2]);
263 dst
[i
+3]= bswap_32(src
[i
+3]);
264 dst
[i
+4]= bswap_32(src
[i
+4]);
265 dst
[i
+5]= bswap_32(src
[i
+5]);
266 dst
[i
+6]= bswap_32(src
[i
+6]);
267 dst
[i
+7]= bswap_32(src
[i
+7]);
270 dst
[i
+0]= bswap_32(src
[i
+0]);
274 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
277 uint32_t *sq
= ff_squareTbl
+ 256;
280 for (i
= 0; i
< h
; i
++) {
281 s
+= sq
[pix1
[0] - pix2
[0]];
282 s
+= sq
[pix1
[1] - pix2
[1]];
283 s
+= sq
[pix1
[2] - pix2
[2]];
284 s
+= sq
[pix1
[3] - pix2
[3]];
291 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
294 uint32_t *sq
= ff_squareTbl
+ 256;
297 for (i
= 0; i
< h
; i
++) {
298 s
+= sq
[pix1
[0] - pix2
[0]];
299 s
+= sq
[pix1
[1] - pix2
[1]];
300 s
+= sq
[pix1
[2] - pix2
[2]];
301 s
+= sq
[pix1
[3] - pix2
[3]];
302 s
+= sq
[pix1
[4] - pix2
[4]];
303 s
+= sq
[pix1
[5] - pix2
[5]];
304 s
+= sq
[pix1
[6] - pix2
[6]];
305 s
+= sq
[pix1
[7] - pix2
[7]];
312 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
315 uint32_t *sq
= ff_squareTbl
+ 256;
318 for (i
= 0; i
< h
; i
++) {
319 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
320 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
321 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
322 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
323 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
324 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
325 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
326 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
327 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
328 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
329 s
+= sq
[pix1
[10] - pix2
[10]];
330 s
+= sq
[pix1
[11] - pix2
[11]];
331 s
+= sq
[pix1
[12] - pix2
[12]];
332 s
+= sq
[pix1
[13] - pix2
[13]];
333 s
+= sq
[pix1
[14] - pix2
[14]];
334 s
+= sq
[pix1
[15] - pix2
[15]];
343 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
344 static inline int w_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int w
, int h
, int type
){
346 const int dec_count
= w
==8 ? 3 : 4;
349 static const int scale
[2][2][4][4]={
353 {268, 239, 239, 213},
357 // 9/7 16x16 or 32x32 dec=4
358 {344, 310, 310, 280},
366 {275, 245, 245, 218},
370 // 5/3 16x16 or 32x32 dec=4
371 {352, 317, 317, 286},
379 for (i
= 0; i
< h
; i
++) {
380 for (j
= 0; j
< w
; j
+=4) {
381 tmp
[32*i
+j
+0] = (pix1
[j
+0] - pix2
[j
+0])<<4;
382 tmp
[32*i
+j
+1] = (pix1
[j
+1] - pix2
[j
+1])<<4;
383 tmp
[32*i
+j
+2] = (pix1
[j
+2] - pix2
[j
+2])<<4;
384 tmp
[32*i
+j
+3] = (pix1
[j
+3] - pix2
[j
+3])<<4;
390 ff_spatial_dwt(tmp
, w
, h
, 32, type
, dec_count
);
394 for(level
=0; level
<dec_count
; level
++){
395 for(ori
= level
? 1 : 0; ori
<4; ori
++){
396 int size
= w
>>(dec_count
-level
);
397 int sx
= (ori
&1) ? size
: 0;
398 int stride
= 32<<(dec_count
-level
);
399 int sy
= (ori
&2) ? stride
>>1 : 0;
401 for(i
=0; i
<size
; i
++){
402 for(j
=0; j
<size
; j
++){
403 int v
= tmp
[sx
+ sy
+ i
*stride
+ j
] * scale
[type
][dec_count
-3][level
][ori
];
413 static int w53_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
414 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 1);
417 static int w97_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
418 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 0);
421 static int w53_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
422 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 1);
425 static int w97_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
426 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 0);
429 int w53_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
430 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 1);
433 int w97_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
434 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 0);
438 /* draw the edges of width 'w' of an image of size width, height */
439 //FIXME check that this is ok for mpeg4 interlaced
440 static void draw_edges_c(uint8_t *buf
, int wrap
, int width
, int height
, int w
)
442 uint8_t *ptr
, *last_line
;
445 last_line
= buf
+ (height
- 1) * wrap
;
448 memcpy(buf
- (i
+ 1) * wrap
, buf
, width
);
449 memcpy(last_line
+ (i
+ 1) * wrap
, last_line
, width
);
453 for(i
=0;i
<height
;i
++) {
454 memset(ptr
- w
, ptr
[0], w
);
455 memset(ptr
+ width
, ptr
[width
-1], w
);
460 memset(buf
- (i
+ 1) * wrap
- w
, buf
[0], w
); /* top left */
461 memset(buf
- (i
+ 1) * wrap
+ width
, buf
[width
-1], w
); /* top right */
462 memset(last_line
+ (i
+ 1) * wrap
- w
, last_line
[0], w
); /* top left */
463 memset(last_line
+ (i
+ 1) * wrap
+ width
, last_line
[width
-1], w
); /* top right */
468 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
469 * @param buf destination buffer
470 * @param src source buffer
471 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
472 * @param block_w width of block
473 * @param block_h height of block
474 * @param src_x x coordinate of the top left sample of the block in the source buffer
475 * @param src_y y coordinate of the top left sample of the block in the source buffer
476 * @param w width of the source buffer
477 * @param h height of the source buffer
479 void ff_emulated_edge_mc(uint8_t *buf
, uint8_t *src
, int linesize
, int block_w
, int block_h
,
480 int src_x
, int src_y
, int w
, int h
){
482 int start_y
, start_x
, end_y
, end_x
;
485 src
+= (h
-1-src_y
)*linesize
;
487 }else if(src_y
<=-block_h
){
488 src
+= (1-block_h
-src_y
)*linesize
;
494 }else if(src_x
<=-block_w
){
495 src
+= (1-block_w
-src_x
);
499 start_y
= FFMAX(0, -src_y
);
500 start_x
= FFMAX(0, -src_x
);
501 end_y
= FFMIN(block_h
, h
-src_y
);
502 end_x
= FFMIN(block_w
, w
-src_x
);
504 // copy existing part
505 for(y
=start_y
; y
<end_y
; y
++){
506 for(x
=start_x
; x
<end_x
; x
++){
507 buf
[x
+ y
*linesize
]= src
[x
+ y
*linesize
];
512 for(y
=0; y
<start_y
; y
++){
513 for(x
=start_x
; x
<end_x
; x
++){
514 buf
[x
+ y
*linesize
]= buf
[x
+ start_y
*linesize
];
519 for(y
=end_y
; y
<block_h
; y
++){
520 for(x
=start_x
; x
<end_x
; x
++){
521 buf
[x
+ y
*linesize
]= buf
[x
+ (end_y
-1)*linesize
];
525 for(y
=0; y
<block_h
; y
++){
527 for(x
=0; x
<start_x
; x
++){
528 buf
[x
+ y
*linesize
]= buf
[start_x
+ y
*linesize
];
532 for(x
=end_x
; x
<block_w
; x
++){
533 buf
[x
+ y
*linesize
]= buf
[end_x
- 1 + y
*linesize
];
538 static void get_pixels_c(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
542 /* read the pixels */
544 block
[0] = pixels
[0];
545 block
[1] = pixels
[1];
546 block
[2] = pixels
[2];
547 block
[3] = pixels
[3];
548 block
[4] = pixels
[4];
549 block
[5] = pixels
[5];
550 block
[6] = pixels
[6];
551 block
[7] = pixels
[7];
557 static void diff_pixels_c(DCTELEM
*restrict block
, const uint8_t *s1
,
558 const uint8_t *s2
, int stride
){
561 /* read the pixels */
563 block
[0] = s1
[0] - s2
[0];
564 block
[1] = s1
[1] - s2
[1];
565 block
[2] = s1
[2] - s2
[2];
566 block
[3] = s1
[3] - s2
[3];
567 block
[4] = s1
[4] - s2
[4];
568 block
[5] = s1
[5] - s2
[5];
569 block
[6] = s1
[6] - s2
[6];
570 block
[7] = s1
[7] - s2
[7];
578 static void put_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
582 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
584 /* read the pixels */
586 pixels
[0] = cm
[block
[0]];
587 pixels
[1] = cm
[block
[1]];
588 pixels
[2] = cm
[block
[2]];
589 pixels
[3] = cm
[block
[3]];
590 pixels
[4] = cm
[block
[4]];
591 pixels
[5] = cm
[block
[5]];
592 pixels
[6] = cm
[block
[6]];
593 pixels
[7] = cm
[block
[7]];
600 static void put_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
604 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
606 /* read the pixels */
608 pixels
[0] = cm
[block
[0]];
609 pixels
[1] = cm
[block
[1]];
610 pixels
[2] = cm
[block
[2]];
611 pixels
[3] = cm
[block
[3]];
618 static void put_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
622 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
624 /* read the pixels */
626 pixels
[0] = cm
[block
[0]];
627 pixels
[1] = cm
[block
[1]];
634 static void put_signed_pixels_clamped_c(const DCTELEM
*block
,
635 uint8_t *restrict pixels
,
640 for (i
= 0; i
< 8; i
++) {
641 for (j
= 0; j
< 8; j
++) {
644 else if (*block
> 127)
647 *pixels
= (uint8_t)(*block
+ 128);
651 pixels
+= (line_size
- 8);
655 static void add_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
659 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
661 /* read the pixels */
663 pixels
[0] = cm
[pixels
[0] + block
[0]];
664 pixels
[1] = cm
[pixels
[1] + block
[1]];
665 pixels
[2] = cm
[pixels
[2] + block
[2]];
666 pixels
[3] = cm
[pixels
[3] + block
[3]];
667 pixels
[4] = cm
[pixels
[4] + block
[4]];
668 pixels
[5] = cm
[pixels
[5] + block
[5]];
669 pixels
[6] = cm
[pixels
[6] + block
[6]];
670 pixels
[7] = cm
[pixels
[7] + block
[7]];
676 static void add_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
680 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
682 /* read the pixels */
684 pixels
[0] = cm
[pixels
[0] + block
[0]];
685 pixels
[1] = cm
[pixels
[1] + block
[1]];
686 pixels
[2] = cm
[pixels
[2] + block
[2]];
687 pixels
[3] = cm
[pixels
[3] + block
[3]];
693 static void add_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
697 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
699 /* read the pixels */
701 pixels
[0] = cm
[pixels
[0] + block
[0]];
702 pixels
[1] = cm
[pixels
[1] + block
[1]];
708 static void add_pixels8_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
712 pixels
[0] += block
[0];
713 pixels
[1] += block
[1];
714 pixels
[2] += block
[2];
715 pixels
[3] += block
[3];
716 pixels
[4] += block
[4];
717 pixels
[5] += block
[5];
718 pixels
[6] += block
[6];
719 pixels
[7] += block
[7];
725 static void add_pixels4_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
729 pixels
[0] += block
[0];
730 pixels
[1] += block
[1];
731 pixels
[2] += block
[2];
732 pixels
[3] += block
[3];
738 static int sum_abs_dctelem_c(DCTELEM
*block
)
742 sum
+= FFABS(block
[i
]);
748 #define PIXOP2(OPNAME, OP) \
749 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
753 OP(*((uint64_t*)block), AV_RN64(pixels));\
759 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
763 const uint64_t a= AV_RN64(pixels );\
764 const uint64_t b= AV_RN64(pixels+1);\
765 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
771 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
775 const uint64_t a= AV_RN64(pixels );\
776 const uint64_t b= AV_RN64(pixels+1);\
777 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
783 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
787 const uint64_t a= AV_RN64(pixels );\
788 const uint64_t b= AV_RN64(pixels+line_size);\
789 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
795 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
799 const uint64_t a= AV_RN64(pixels );\
800 const uint64_t b= AV_RN64(pixels+line_size);\
801 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
807 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
810 const uint64_t a= AV_RN64(pixels );\
811 const uint64_t b= AV_RN64(pixels+1);\
812 uint64_t l0= (a&0x0303030303030303ULL)\
813 + (b&0x0303030303030303ULL)\
814 + 0x0202020202020202ULL;\
815 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
816 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
820 for(i=0; i<h; i+=2){\
821 uint64_t a= AV_RN64(pixels );\
822 uint64_t b= AV_RN64(pixels+1);\
823 l1= (a&0x0303030303030303ULL)\
824 + (b&0x0303030303030303ULL);\
825 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
826 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
827 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
830 a= AV_RN64(pixels );\
831 b= AV_RN64(pixels+1);\
832 l0= (a&0x0303030303030303ULL)\
833 + (b&0x0303030303030303ULL)\
834 + 0x0202020202020202ULL;\
835 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
836 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
837 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
843 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
846 const uint64_t a= AV_RN64(pixels );\
847 const uint64_t b= AV_RN64(pixels+1);\
848 uint64_t l0= (a&0x0303030303030303ULL)\
849 + (b&0x0303030303030303ULL)\
850 + 0x0101010101010101ULL;\
851 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
852 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
856 for(i=0; i<h; i+=2){\
857 uint64_t a= AV_RN64(pixels );\
858 uint64_t b= AV_RN64(pixels+1);\
859 l1= (a&0x0303030303030303ULL)\
860 + (b&0x0303030303030303ULL);\
861 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
862 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
863 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
866 a= AV_RN64(pixels );\
867 b= AV_RN64(pixels+1);\
868 l0= (a&0x0303030303030303ULL)\
869 + (b&0x0303030303030303ULL)\
870 + 0x0101010101010101ULL;\
871 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
872 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
873 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
879 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
880 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
881 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
882 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
883 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
884 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
885 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
887 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
888 #else // 64 bit variant
890 #define PIXOP2(OPNAME, OP) \
891 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
894 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
899 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
902 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
907 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
910 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
911 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
916 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
917 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
920 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
921 int src_stride1, int src_stride2, int h){\
925 a= AV_RN32(&src1[i*src_stride1 ]);\
926 b= AV_RN32(&src2[i*src_stride2 ]);\
927 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
928 a= AV_RN32(&src1[i*src_stride1+4]);\
929 b= AV_RN32(&src2[i*src_stride2+4]);\
930 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
934 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
935 int src_stride1, int src_stride2, int h){\
939 a= AV_RN32(&src1[i*src_stride1 ]);\
940 b= AV_RN32(&src2[i*src_stride2 ]);\
941 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
942 a= AV_RN32(&src1[i*src_stride1+4]);\
943 b= AV_RN32(&src2[i*src_stride2+4]);\
944 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
948 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
949 int src_stride1, int src_stride2, int h){\
953 a= AV_RN32(&src1[i*src_stride1 ]);\
954 b= AV_RN32(&src2[i*src_stride2 ]);\
955 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
959 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
960 int src_stride1, int src_stride2, int h){\
964 a= AV_RN16(&src1[i*src_stride1 ]);\
965 b= AV_RN16(&src2[i*src_stride2 ]);\
966 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
970 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
971 int src_stride1, int src_stride2, int h){\
972 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
973 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
976 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
977 int src_stride1, int src_stride2, int h){\
978 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
979 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
982 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
983 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
986 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
987 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
990 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
991 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
994 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
995 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
998 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
999 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1001 for(i=0; i<h; i++){\
1002 uint32_t a, b, c, d, l0, l1, h0, h1;\
1003 a= AV_RN32(&src1[i*src_stride1]);\
1004 b= AV_RN32(&src2[i*src_stride2]);\
1005 c= AV_RN32(&src3[i*src_stride3]);\
1006 d= AV_RN32(&src4[i*src_stride4]);\
1007 l0= (a&0x03030303UL)\
1010 h0= ((a&0xFCFCFCFCUL)>>2)\
1011 + ((b&0xFCFCFCFCUL)>>2);\
1012 l1= (c&0x03030303UL)\
1013 + (d&0x03030303UL);\
1014 h1= ((c&0xFCFCFCFCUL)>>2)\
1015 + ((d&0xFCFCFCFCUL)>>2);\
1016 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1017 a= AV_RN32(&src1[i*src_stride1+4]);\
1018 b= AV_RN32(&src2[i*src_stride2+4]);\
1019 c= AV_RN32(&src3[i*src_stride3+4]);\
1020 d= AV_RN32(&src4[i*src_stride4+4]);\
1021 l0= (a&0x03030303UL)\
1024 h0= ((a&0xFCFCFCFCUL)>>2)\
1025 + ((b&0xFCFCFCFCUL)>>2);\
1026 l1= (c&0x03030303UL)\
1027 + (d&0x03030303UL);\
1028 h1= ((c&0xFCFCFCFCUL)>>2)\
1029 + ((d&0xFCFCFCFCUL)>>2);\
1030 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1034 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1035 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1038 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1039 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1042 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1043 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1046 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1047 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1050 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1051 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1053 for(i=0; i<h; i++){\
1054 uint32_t a, b, c, d, l0, l1, h0, h1;\
1055 a= AV_RN32(&src1[i*src_stride1]);\
1056 b= AV_RN32(&src2[i*src_stride2]);\
1057 c= AV_RN32(&src3[i*src_stride3]);\
1058 d= AV_RN32(&src4[i*src_stride4]);\
1059 l0= (a&0x03030303UL)\
1062 h0= ((a&0xFCFCFCFCUL)>>2)\
1063 + ((b&0xFCFCFCFCUL)>>2);\
1064 l1= (c&0x03030303UL)\
1065 + (d&0x03030303UL);\
1066 h1= ((c&0xFCFCFCFCUL)>>2)\
1067 + ((d&0xFCFCFCFCUL)>>2);\
1068 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1069 a= AV_RN32(&src1[i*src_stride1+4]);\
1070 b= AV_RN32(&src2[i*src_stride2+4]);\
1071 c= AV_RN32(&src3[i*src_stride3+4]);\
1072 d= AV_RN32(&src4[i*src_stride4+4]);\
1073 l0= (a&0x03030303UL)\
1076 h0= ((a&0xFCFCFCFCUL)>>2)\
1077 + ((b&0xFCFCFCFCUL)>>2);\
1078 l1= (c&0x03030303UL)\
1079 + (d&0x03030303UL);\
1080 h1= ((c&0xFCFCFCFCUL)>>2)\
1081 + ((d&0xFCFCFCFCUL)>>2);\
1082 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1085 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1086 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1087 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1088 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1090 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1091 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1092 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1093 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1096 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1098 int i, a0, b0, a1, b1;\
1105 for(i=0; i<h; i+=2){\
1111 block[0]= (a1+a0)>>2; /* FIXME non put */\
1112 block[1]= (b1+b0)>>2;\
1122 block[0]= (a1+a0)>>2;\
1123 block[1]= (b1+b0)>>2;\
1129 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1132 const uint32_t a= AV_RN32(pixels );\
1133 const uint32_t b= AV_RN32(pixels+1);\
1134 uint32_t l0= (a&0x03030303UL)\
1137 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1138 + ((b&0xFCFCFCFCUL)>>2);\
1142 for(i=0; i<h; i+=2){\
1143 uint32_t a= AV_RN32(pixels );\
1144 uint32_t b= AV_RN32(pixels+1);\
1145 l1= (a&0x03030303UL)\
1146 + (b&0x03030303UL);\
1147 h1= ((a&0xFCFCFCFCUL)>>2)\
1148 + ((b&0xFCFCFCFCUL)>>2);\
1149 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1152 a= AV_RN32(pixels );\
1153 b= AV_RN32(pixels+1);\
1154 l0= (a&0x03030303UL)\
1157 h0= ((a&0xFCFCFCFCUL)>>2)\
1158 + ((b&0xFCFCFCFCUL)>>2);\
1159 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1165 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1168 for(j=0; j<2; j++){\
1170 const uint32_t a= AV_RN32(pixels );\
1171 const uint32_t b= AV_RN32(pixels+1);\
1172 uint32_t l0= (a&0x03030303UL)\
1175 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1176 + ((b&0xFCFCFCFCUL)>>2);\
1180 for(i=0; i<h; i+=2){\
1181 uint32_t a= AV_RN32(pixels );\
1182 uint32_t b= AV_RN32(pixels+1);\
1183 l1= (a&0x03030303UL)\
1184 + (b&0x03030303UL);\
1185 h1= ((a&0xFCFCFCFCUL)>>2)\
1186 + ((b&0xFCFCFCFCUL)>>2);\
1187 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1190 a= AV_RN32(pixels );\
1191 b= AV_RN32(pixels+1);\
1192 l0= (a&0x03030303UL)\
1195 h0= ((a&0xFCFCFCFCUL)>>2)\
1196 + ((b&0xFCFCFCFCUL)>>2);\
1197 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1201 pixels+=4-line_size*(h+1);\
1202 block +=4-line_size*h;\
1206 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1209 for(j=0; j<2; j++){\
1211 const uint32_t a= AV_RN32(pixels );\
1212 const uint32_t b= AV_RN32(pixels+1);\
1213 uint32_t l0= (a&0x03030303UL)\
1216 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1217 + ((b&0xFCFCFCFCUL)>>2);\
1221 for(i=0; i<h; i+=2){\
1222 uint32_t a= AV_RN32(pixels );\
1223 uint32_t b= AV_RN32(pixels+1);\
1224 l1= (a&0x03030303UL)\
1225 + (b&0x03030303UL);\
1226 h1= ((a&0xFCFCFCFCUL)>>2)\
1227 + ((b&0xFCFCFCFCUL)>>2);\
1228 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1231 a= AV_RN32(pixels );\
1232 b= AV_RN32(pixels+1);\
1233 l0= (a&0x03030303UL)\
1236 h0= ((a&0xFCFCFCFCUL)>>2)\
1237 + ((b&0xFCFCFCFCUL)>>2);\
1238 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1242 pixels+=4-line_size*(h+1);\
1243 block +=4-line_size*h;\
1247 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1248 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1249 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1250 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1251 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1252 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1253 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1254 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1256 #define op_avg(a, b) a = rnd_avg32(a, b)
1258 #define op_put(a, b) a = b
1265 #define avg2(a,b) ((a+b+1)>>1)
1266 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1268 static void put_no_rnd_pixels16_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1269 put_no_rnd_pixels16_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1272 static void put_no_rnd_pixels8_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1273 put_no_rnd_pixels8_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1276 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
1278 const int A
=(16-x16
)*(16-y16
);
1279 const int B
=( x16
)*(16-y16
);
1280 const int C
=(16-x16
)*( y16
);
1281 const int D
=( x16
)*( y16
);
1286 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
1287 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
1288 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
1289 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
1290 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
1291 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
1292 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
1293 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
1299 void ff_gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
1300 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
1303 const int s
= 1<<shift
;
1313 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
1314 int src_x
, src_y
, frac_x
, frac_y
, index
;
1318 frac_x
= src_x
&(s
-1);
1319 frac_y
= src_y
&(s
-1);
1323 if((unsigned)src_x
< width
){
1324 if((unsigned)src_y
< height
){
1325 index
= src_x
+ src_y
*stride
;
1326 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1327 + src
[index
+1]* frac_x
)*(s
-frac_y
)
1328 + ( src
[index
+stride
]*(s
-frac_x
)
1329 + src
[index
+stride
+1]* frac_x
)* frac_y
1332 index
= src_x
+ av_clip(src_y
, 0, height
)*stride
;
1333 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1334 + src
[index
+1]* frac_x
)*s
1338 if((unsigned)src_y
< height
){
1339 index
= av_clip(src_x
, 0, width
) + src_y
*stride
;
1340 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
1341 + src
[index
+stride
]* frac_y
)*s
1344 index
= av_clip(src_x
, 0, width
) + av_clip(src_y
, 0, height
)*stride
;
1345 dst
[y
*stride
+ x
]= src
[index
];
1357 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1359 case 2: put_pixels2_c (dst
, src
, stride
, height
); break;
1360 case 4: put_pixels4_c (dst
, src
, stride
, height
); break;
1361 case 8: put_pixels8_c (dst
, src
, stride
, height
); break;
1362 case 16:put_pixels16_c(dst
, src
, stride
, height
); break;
1366 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1368 for (i
=0; i
< height
; i
++) {
1369 for (j
=0; j
< width
; j
++) {
1370 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
1377 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1379 for (i
=0; i
< height
; i
++) {
1380 for (j
=0; j
< width
; j
++) {
1381 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
1388 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1390 for (i
=0; i
< height
; i
++) {
1391 for (j
=0; j
< width
; j
++) {
1392 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
1399 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1401 for (i
=0; i
< height
; i
++) {
1402 for (j
=0; j
< width
; j
++) {
1403 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
1410 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1412 for (i
=0; i
< height
; i
++) {
1413 for (j
=0; j
< width
; j
++) {
1414 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1421 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1423 for (i
=0; i
< height
; i
++) {
1424 for (j
=0; j
< width
; j
++) {
1425 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
1432 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1434 for (i
=0; i
< height
; i
++) {
1435 for (j
=0; j
< width
; j
++) {
1436 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1443 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1445 for (i
=0; i
< height
; i
++) {
1446 for (j
=0; j
< width
; j
++) {
1447 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
1454 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1456 case 2: avg_pixels2_c (dst
, src
, stride
, height
); break;
1457 case 4: avg_pixels4_c (dst
, src
, stride
, height
); break;
1458 case 8: avg_pixels8_c (dst
, src
, stride
, height
); break;
1459 case 16:avg_pixels16_c(dst
, src
, stride
, height
); break;
1463 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1465 for (i
=0; i
< height
; i
++) {
1466 for (j
=0; j
< width
; j
++) {
1467 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
1474 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1476 for (i
=0; i
< height
; i
++) {
1477 for (j
=0; j
< width
; j
++) {
1478 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
1485 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1487 for (i
=0; i
< height
; i
++) {
1488 for (j
=0; j
< width
; j
++) {
1489 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1496 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1498 for (i
=0; i
< height
; i
++) {
1499 for (j
=0; j
< width
; j
++) {
1500 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1507 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1509 for (i
=0; i
< height
; i
++) {
1510 for (j
=0; j
< width
; j
++) {
1511 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1518 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1520 for (i
=0; i
< height
; i
++) {
1521 for (j
=0; j
< width
; j
++) {
1522 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1529 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1531 for (i
=0; i
< height
; i
++) {
1532 for (j
=0; j
< width
; j
++) {
1533 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1540 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1542 for (i
=0; i
< height
; i
++) {
1543 for (j
=0; j
< width
; j
++) {
1544 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1551 #define TPEL_WIDTH(width)\
1552 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1553 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1554 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1555 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1556 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1557 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1558 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1559 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1560 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1561 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1562 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1563 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1564 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1565 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1566 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1567 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1568 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1569 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1572 #define H264_CHROMA_MC(OPNAME, OP)\
1573 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1574 const int A=(8-x)*(8-y);\
1575 const int B=( x)*(8-y);\
1576 const int C=(8-x)*( y);\
1577 const int D=( x)*( y);\
1580 assert(x<8 && y<8 && x>=0 && y>=0);\
1583 for(i=0; i<h; i++){\
1584 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1585 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1591 const int step= C ? stride : 1;\
1592 for(i=0; i<h; i++){\
1593 OP(dst[0], (A*src[0] + E*src[step+0]));\
1594 OP(dst[1], (A*src[1] + E*src[step+1]));\
1601 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1602 const int A=(8-x)*(8-y);\
1603 const int B=( x)*(8-y);\
1604 const int C=(8-x)*( y);\
1605 const int D=( x)*( y);\
1608 assert(x<8 && y<8 && x>=0 && y>=0);\
1611 for(i=0; i<h; i++){\
1612 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1613 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1614 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1615 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1621 const int step= C ? stride : 1;\
1622 for(i=0; i<h; i++){\
1623 OP(dst[0], (A*src[0] + E*src[step+0]));\
1624 OP(dst[1], (A*src[1] + E*src[step+1]));\
1625 OP(dst[2], (A*src[2] + E*src[step+2]));\
1626 OP(dst[3], (A*src[3] + E*src[step+3]));\
1633 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1634 const int A=(8-x)*(8-y);\
1635 const int B=( x)*(8-y);\
1636 const int C=(8-x)*( y);\
1637 const int D=( x)*( y);\
1640 assert(x<8 && y<8 && x>=0 && y>=0);\
1643 for(i=0; i<h; i++){\
1644 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1645 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1646 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1647 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1648 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1649 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1650 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1651 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1657 const int step= C ? stride : 1;\
1658 for(i=0; i<h; i++){\
1659 OP(dst[0], (A*src[0] + E*src[step+0]));\
1660 OP(dst[1], (A*src[1] + E*src[step+1]));\
1661 OP(dst[2], (A*src[2] + E*src[step+2]));\
1662 OP(dst[3], (A*src[3] + E*src[step+3]));\
1663 OP(dst[4], (A*src[4] + E*src[step+4]));\
1664 OP(dst[5], (A*src[5] + E*src[step+5]));\
1665 OP(dst[6], (A*src[6] + E*src[step+6]));\
1666 OP(dst[7], (A*src[7] + E*src[step+7]));\
1673 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1674 #define op_put(a, b) a = (((b) + 32)>>6)
1676 H264_CHROMA_MC(put_
, op_put
)
1677 H264_CHROMA_MC(avg_
, op_avg
)
1681 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
1682 const int A
=(8-x
)*(8-y
);
1683 const int B
=( x
)*(8-y
);
1684 const int C
=(8-x
)*( y
);
1685 const int D
=( x
)*( y
);
1688 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
1692 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
1693 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
1694 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
1695 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
1696 dst
[4] = (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6;
1697 dst
[5] = (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6;
1698 dst
[6] = (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6;
1699 dst
[7] = (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6;
1705 #define QPEL_MC(r, OPNAME, RND, OP) \
1706 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1707 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1711 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1712 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1713 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1714 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1715 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1716 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1717 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1718 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1724 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1726 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1730 const int src0= src[0*srcStride];\
1731 const int src1= src[1*srcStride];\
1732 const int src2= src[2*srcStride];\
1733 const int src3= src[3*srcStride];\
1734 const int src4= src[4*srcStride];\
1735 const int src5= src[5*srcStride];\
1736 const int src6= src[6*srcStride];\
1737 const int src7= src[7*srcStride];\
1738 const int src8= src[8*srcStride];\
1739 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1740 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1741 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1742 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1743 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1744 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1745 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1746 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1752 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1753 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1758 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1759 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1760 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1761 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1762 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1763 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1764 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1765 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1766 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1767 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1768 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1769 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1770 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1771 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1772 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1773 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1779 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1780 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1785 const int src0= src[0*srcStride];\
1786 const int src1= src[1*srcStride];\
1787 const int src2= src[2*srcStride];\
1788 const int src3= src[3*srcStride];\
1789 const int src4= src[4*srcStride];\
1790 const int src5= src[5*srcStride];\
1791 const int src6= src[6*srcStride];\
1792 const int src7= src[7*srcStride];\
1793 const int src8= src[8*srcStride];\
1794 const int src9= src[9*srcStride];\
1795 const int src10= src[10*srcStride];\
1796 const int src11= src[11*srcStride];\
1797 const int src12= src[12*srcStride];\
1798 const int src13= src[13*srcStride];\
1799 const int src14= src[14*srcStride];\
1800 const int src15= src[15*srcStride];\
1801 const int src16= src[16*srcStride];\
1802 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1803 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1804 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1805 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1806 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1807 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1808 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1809 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1810 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1811 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1812 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1813 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1814 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1815 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1816 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1817 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1823 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1824 OPNAME ## pixels8_c(dst, src, stride, 8);\
1827 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1829 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1830 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1833 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1834 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1837 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1839 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1840 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1843 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1844 uint8_t full[16*9];\
1846 copy_block9(full, src, 16, stride, 9);\
1847 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1848 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1851 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1852 uint8_t full[16*9];\
1853 copy_block9(full, src, 16, stride, 9);\
1854 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1857 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1858 uint8_t full[16*9];\
1860 copy_block9(full, src, 16, stride, 9);\
1861 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1862 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1864 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1865 uint8_t full[16*9];\
1868 uint8_t halfHV[64];\
1869 copy_block9(full, src, 16, stride, 9);\
1870 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1871 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1873 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1875 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1876 uint8_t full[16*9];\
1878 uint8_t halfHV[64];\
1879 copy_block9(full, src, 16, stride, 9);\
1880 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1881 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1882 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1883 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1885 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1886 uint8_t full[16*9];\
1889 uint8_t halfHV[64];\
1890 copy_block9(full, src, 16, stride, 9);\
1891 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1892 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1893 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1894 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1896 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1897 uint8_t full[16*9];\
1899 uint8_t halfHV[64];\
1900 copy_block9(full, src, 16, stride, 9);\
1901 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1902 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1903 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1904 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1906 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1907 uint8_t full[16*9];\
1910 uint8_t halfHV[64];\
1911 copy_block9(full, src, 16, stride, 9);\
1912 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1913 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1914 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1915 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1917 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1918 uint8_t full[16*9];\
1920 uint8_t halfHV[64];\
1921 copy_block9(full, src, 16, stride, 9);\
1922 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1923 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1924 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1925 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1927 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1928 uint8_t full[16*9];\
1931 uint8_t halfHV[64];\
1932 copy_block9(full, src, 16, stride, 9);\
1933 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1934 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1935 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1936 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1938 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1939 uint8_t full[16*9];\
1941 uint8_t halfHV[64];\
1942 copy_block9(full, src, 16, stride, 9);\
1943 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1944 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1945 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1946 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1948 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1950 uint8_t halfHV[64];\
1951 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1952 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1953 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1955 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1957 uint8_t halfHV[64];\
1958 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1960 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1962 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1963 uint8_t full[16*9];\
1966 uint8_t halfHV[64];\
1967 copy_block9(full, src, 16, stride, 9);\
1968 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1969 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1970 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1971 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1973 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1974 uint8_t full[16*9];\
1976 copy_block9(full, src, 16, stride, 9);\
1977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1978 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1979 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1981 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1982 uint8_t full[16*9];\
1985 uint8_t halfHV[64];\
1986 copy_block9(full, src, 16, stride, 9);\
1987 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1988 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1989 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1990 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1992 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1993 uint8_t full[16*9];\
1995 copy_block9(full, src, 16, stride, 9);\
1996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1997 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1998 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2000 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2002 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2003 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2005 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2006 OPNAME ## pixels16_c(dst, src, stride, 16);\
2009 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2011 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2012 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2015 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2016 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2019 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2021 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2022 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2025 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2026 uint8_t full[24*17];\
2028 copy_block17(full, src, 24, stride, 17);\
2029 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2030 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2033 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t full[24*17];\
2035 copy_block17(full, src, 24, stride, 17);\
2036 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2039 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2040 uint8_t full[24*17];\
2042 copy_block17(full, src, 24, stride, 17);\
2043 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2044 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2046 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2047 uint8_t full[24*17];\
2048 uint8_t halfH[272];\
2049 uint8_t halfV[256];\
2050 uint8_t halfHV[256];\
2051 copy_block17(full, src, 24, stride, 17);\
2052 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2053 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2054 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2055 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2057 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2058 uint8_t full[24*17];\
2059 uint8_t halfH[272];\
2060 uint8_t halfHV[256];\
2061 copy_block17(full, src, 24, stride, 17);\
2062 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2063 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2064 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2065 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2067 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2068 uint8_t full[24*17];\
2069 uint8_t halfH[272];\
2070 uint8_t halfV[256];\
2071 uint8_t halfHV[256];\
2072 copy_block17(full, src, 24, stride, 17);\
2073 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2074 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2075 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2076 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2078 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2079 uint8_t full[24*17];\
2080 uint8_t halfH[272];\
2081 uint8_t halfHV[256];\
2082 copy_block17(full, src, 24, stride, 17);\
2083 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2084 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2085 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2086 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2088 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2089 uint8_t full[24*17];\
2090 uint8_t halfH[272];\
2091 uint8_t halfV[256];\
2092 uint8_t halfHV[256];\
2093 copy_block17(full, src, 24, stride, 17);\
2094 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2095 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2096 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2097 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2099 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2100 uint8_t full[24*17];\
2101 uint8_t halfH[272];\
2102 uint8_t halfHV[256];\
2103 copy_block17(full, src, 24, stride, 17);\
2104 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2105 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2106 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2107 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2109 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2110 uint8_t full[24*17];\
2111 uint8_t halfH[272];\
2112 uint8_t halfV[256];\
2113 uint8_t halfHV[256];\
2114 copy_block17(full, src, 24, stride, 17);\
2115 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2116 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2117 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2118 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2120 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2121 uint8_t full[24*17];\
2122 uint8_t halfH[272];\
2123 uint8_t halfHV[256];\
2124 copy_block17(full, src, 24, stride, 17);\
2125 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2126 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2127 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2128 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2130 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2131 uint8_t halfH[272];\
2132 uint8_t halfHV[256];\
2133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2134 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2135 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2137 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2138 uint8_t halfH[272];\
2139 uint8_t halfHV[256];\
2140 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2141 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2142 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2144 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2145 uint8_t full[24*17];\
2146 uint8_t halfH[272];\
2147 uint8_t halfV[256];\
2148 uint8_t halfHV[256];\
2149 copy_block17(full, src, 24, stride, 17);\
2150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2151 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2152 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2153 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2155 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2156 uint8_t full[24*17];\
2157 uint8_t halfH[272];\
2158 copy_block17(full, src, 24, stride, 17);\
2159 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2160 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2161 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2163 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2164 uint8_t full[24*17];\
2165 uint8_t halfH[272];\
2166 uint8_t halfV[256];\
2167 uint8_t halfHV[256];\
2168 copy_block17(full, src, 24, stride, 17);\
2169 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2170 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2171 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2172 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2174 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2175 uint8_t full[24*17];\
2176 uint8_t halfH[272];\
2177 copy_block17(full, src, 24, stride, 17);\
2178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2179 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2180 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2182 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2183 uint8_t halfH[272];\
2184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2185 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2188 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2189 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2190 #define op_put(a, b) a = cm[((b) + 16)>>5]
2191 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2193 QPEL_MC(0, put_
, _
, op_put
)
2194 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
2195 QPEL_MC(0, avg_
, _
, op_avg
)
2196 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2198 #undef op_avg_no_rnd
2200 #undef op_put_no_rnd
2203 #define H264_LOWPASS(OPNAME, OP, OP2) \
2204 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2206 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2210 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2211 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2217 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2219 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2223 const int srcB= src[-2*srcStride];\
2224 const int srcA= src[-1*srcStride];\
2225 const int src0= src[0 *srcStride];\
2226 const int src1= src[1 *srcStride];\
2227 const int src2= src[2 *srcStride];\
2228 const int src3= src[3 *srcStride];\
2229 const int src4= src[4 *srcStride];\
2230 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2231 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2237 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2240 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2242 src -= 2*srcStride;\
2243 for(i=0; i<h+5; i++)\
2245 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2246 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2250 tmp -= tmpStride*(h+5-2);\
2253 const int tmpB= tmp[-2*tmpStride];\
2254 const int tmpA= tmp[-1*tmpStride];\
2255 const int tmp0= tmp[0 *tmpStride];\
2256 const int tmp1= tmp[1 *tmpStride];\
2257 const int tmp2= tmp[2 *tmpStride];\
2258 const int tmp3= tmp[3 *tmpStride];\
2259 const int tmp4= tmp[4 *tmpStride];\
2260 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2261 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2266 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2268 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2272 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2273 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2274 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2275 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2281 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2283 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2287 const int srcB= src[-2*srcStride];\
2288 const int srcA= src[-1*srcStride];\
2289 const int src0= src[0 *srcStride];\
2290 const int src1= src[1 *srcStride];\
2291 const int src2= src[2 *srcStride];\
2292 const int src3= src[3 *srcStride];\
2293 const int src4= src[4 *srcStride];\
2294 const int src5= src[5 *srcStride];\
2295 const int src6= src[6 *srcStride];\
2296 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2297 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2298 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2299 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2305 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2308 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2310 src -= 2*srcStride;\
2311 for(i=0; i<h+5; i++)\
2313 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2314 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2315 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2316 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2320 tmp -= tmpStride*(h+5-2);\
2323 const int tmpB= tmp[-2*tmpStride];\
2324 const int tmpA= tmp[-1*tmpStride];\
2325 const int tmp0= tmp[0 *tmpStride];\
2326 const int tmp1= tmp[1 *tmpStride];\
2327 const int tmp2= tmp[2 *tmpStride];\
2328 const int tmp3= tmp[3 *tmpStride];\
2329 const int tmp4= tmp[4 *tmpStride];\
2330 const int tmp5= tmp[5 *tmpStride];\
2331 const int tmp6= tmp[6 *tmpStride];\
2332 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2333 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2334 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2335 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2341 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2343 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2347 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2348 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2349 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2350 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2351 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2352 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2353 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2354 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2360 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2362 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2366 const int srcB= src[-2*srcStride];\
2367 const int srcA= src[-1*srcStride];\
2368 const int src0= src[0 *srcStride];\
2369 const int src1= src[1 *srcStride];\
2370 const int src2= src[2 *srcStride];\
2371 const int src3= src[3 *srcStride];\
2372 const int src4= src[4 *srcStride];\
2373 const int src5= src[5 *srcStride];\
2374 const int src6= src[6 *srcStride];\
2375 const int src7= src[7 *srcStride];\
2376 const int src8= src[8 *srcStride];\
2377 const int src9= src[9 *srcStride];\
2378 const int src10=src[10*srcStride];\
2379 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2380 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2381 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2382 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2383 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2384 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2385 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2386 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2392 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2395 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2397 src -= 2*srcStride;\
2398 for(i=0; i<h+5; i++)\
2400 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2401 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2402 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2403 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2404 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2405 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2406 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2407 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2411 tmp -= tmpStride*(h+5-2);\
2414 const int tmpB= tmp[-2*tmpStride];\
2415 const int tmpA= tmp[-1*tmpStride];\
2416 const int tmp0= tmp[0 *tmpStride];\
2417 const int tmp1= tmp[1 *tmpStride];\
2418 const int tmp2= tmp[2 *tmpStride];\
2419 const int tmp3= tmp[3 *tmpStride];\
2420 const int tmp4= tmp[4 *tmpStride];\
2421 const int tmp5= tmp[5 *tmpStride];\
2422 const int tmp6= tmp[6 *tmpStride];\
2423 const int tmp7= tmp[7 *tmpStride];\
2424 const int tmp8= tmp[8 *tmpStride];\
2425 const int tmp9= tmp[9 *tmpStride];\
2426 const int tmp10=tmp[10*tmpStride];\
2427 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2428 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2429 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2430 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2431 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2432 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2433 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2434 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2440 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2441 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2442 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2443 src += 8*srcStride;\
2444 dst += 8*dstStride;\
2445 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2446 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2449 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2450 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2451 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2452 src += 8*srcStride;\
2453 dst += 8*dstStride;\
2454 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2455 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2458 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2459 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2460 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2461 src += 8*srcStride;\
2462 dst += 8*dstStride;\
2463 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2464 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2467 #define H264_MC(OPNAME, SIZE) \
2468 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2469 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2472 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2473 uint8_t half[SIZE*SIZE];\
2474 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2475 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2478 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2479 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2482 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2483 uint8_t half[SIZE*SIZE];\
2484 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2485 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2488 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2489 uint8_t full[SIZE*(SIZE+5)];\
2490 uint8_t * const full_mid= full + SIZE*2;\
2491 uint8_t half[SIZE*SIZE];\
2492 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2493 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2494 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2497 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2498 uint8_t full[SIZE*(SIZE+5)];\
2499 uint8_t * const full_mid= full + SIZE*2;\
2500 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2501 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2504 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2505 uint8_t full[SIZE*(SIZE+5)];\
2506 uint8_t * const full_mid= full + SIZE*2;\
2507 uint8_t half[SIZE*SIZE];\
2508 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2509 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2510 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2513 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2514 uint8_t full[SIZE*(SIZE+5)];\
2515 uint8_t * const full_mid= full + SIZE*2;\
2516 uint8_t halfH[SIZE*SIZE];\
2517 uint8_t halfV[SIZE*SIZE];\
2518 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2519 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2520 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2521 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2524 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2525 uint8_t full[SIZE*(SIZE+5)];\
2526 uint8_t * const full_mid= full + SIZE*2;\
2527 uint8_t halfH[SIZE*SIZE];\
2528 uint8_t halfV[SIZE*SIZE];\
2529 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2530 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2531 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2532 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2535 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2536 uint8_t full[SIZE*(SIZE+5)];\
2537 uint8_t * const full_mid= full + SIZE*2;\
2538 uint8_t halfH[SIZE*SIZE];\
2539 uint8_t halfV[SIZE*SIZE];\
2540 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2541 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2542 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2543 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2546 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2547 uint8_t full[SIZE*(SIZE+5)];\
2548 uint8_t * const full_mid= full + SIZE*2;\
2549 uint8_t halfH[SIZE*SIZE];\
2550 uint8_t halfV[SIZE*SIZE];\
2551 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2552 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2553 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2554 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2557 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2558 int16_t tmp[SIZE*(SIZE+5)];\
2559 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2562 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2563 int16_t tmp[SIZE*(SIZE+5)];\
2564 uint8_t halfH[SIZE*SIZE];\
2565 uint8_t halfHV[SIZE*SIZE];\
2566 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2567 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2568 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2571 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2572 int16_t tmp[SIZE*(SIZE+5)];\
2573 uint8_t halfH[SIZE*SIZE];\
2574 uint8_t halfHV[SIZE*SIZE];\
2575 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2576 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2577 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2580 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2581 uint8_t full[SIZE*(SIZE+5)];\
2582 uint8_t * const full_mid= full + SIZE*2;\
2583 int16_t tmp[SIZE*(SIZE+5)];\
2584 uint8_t halfV[SIZE*SIZE];\
2585 uint8_t halfHV[SIZE*SIZE];\
2586 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2587 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2588 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2589 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2592 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2593 uint8_t full[SIZE*(SIZE+5)];\
2594 uint8_t * const full_mid= full + SIZE*2;\
2595 int16_t tmp[SIZE*(SIZE+5)];\
2596 uint8_t halfV[SIZE*SIZE];\
2597 uint8_t halfHV[SIZE*SIZE];\
2598 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2599 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2600 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2601 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2604 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2605 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2606 #define op_put(a, b) a = cm[((b) + 16)>>5]
2607 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2608 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2610 H264_LOWPASS(put_
, op_put
, op2_put
)
2611 H264_LOWPASS(avg_
, op_avg
, op2_avg
)
2626 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2627 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2628 #define H264_WEIGHT(W,H) \
2629 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2631 offset <<= log2_denom; \
2632 if(log2_denom) offset += 1<<(log2_denom-1); \
2633 for(y=0; y<H; y++, block += stride){ \
2636 if(W==2) continue; \
2639 if(W==4) continue; \
2644 if(W==8) continue; \
2655 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2657 offset = ((offset + 1) | 1) << log2_denom; \
2658 for(y=0; y<H; y++, dst += stride, src += stride){ \
2661 if(W==2) continue; \
2664 if(W==4) continue; \
2669 if(W==8) continue; \
2696 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
2697 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2701 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
2702 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
2703 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
2704 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
2705 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
2706 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
2707 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
2708 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
2714 #ifdef CONFIG_CAVS_DECODER
2716 void ff_cavsdsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2718 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2719 put_pixels8_c(dst
, src
, stride
, 8);
2721 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2722 avg_pixels8_c(dst
, src
, stride
, 8);
2724 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2725 put_pixels16_c(dst
, src
, stride
, 16);
2727 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2728 avg_pixels16_c(dst
, src
, stride
, 16);
2730 #endif /* CONFIG_CAVS_DECODER */
2732 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
2734 void ff_vc1dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2736 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
, int rnd
) {
2737 put_pixels8_c(dst
, src
, stride
, 8);
2739 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
2741 void ff_intrax8dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2744 void ff_h264dspenc_init(DSPContext
* c
, AVCodecContext
*avctx
);
2746 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
2747 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2751 const int src_1
= src
[ -srcStride
];
2752 const int src0
= src
[0 ];
2753 const int src1
= src
[ srcStride
];
2754 const int src2
= src
[2*srcStride
];
2755 const int src3
= src
[3*srcStride
];
2756 const int src4
= src
[4*srcStride
];
2757 const int src5
= src
[5*srcStride
];
2758 const int src6
= src
[6*srcStride
];
2759 const int src7
= src
[7*srcStride
];
2760 const int src8
= src
[8*srcStride
];
2761 const int src9
= src
[9*srcStride
];
2762 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
2763 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
2764 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
2765 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
2766 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
2767 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
2768 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
2769 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
2775 static void put_mspel8_mc00_c (uint8_t *dst
, uint8_t *src
, int stride
){
2776 put_pixels8_c(dst
, src
, stride
, 8);
2779 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
2781 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2782 put_pixels8_l2(dst
, src
, half
, stride
, stride
, 8, 8);
2785 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
2786 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
2789 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
2791 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2792 put_pixels8_l2(dst
, src
+1, half
, stride
, stride
, 8, 8);
2795 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
2796 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
2799 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
2803 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2804 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
2805 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2806 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2808 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
2812 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2813 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
2814 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2815 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2817 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
2819 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2820 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
2823 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2824 if(ENABLE_ANY_H263
) {
2826 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2830 int p0
= src
[x
-2*stride
];
2831 int p1
= src
[x
-1*stride
];
2832 int p2
= src
[x
+0*stride
];
2833 int p3
= src
[x
+1*stride
];
2834 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2836 if (d
<-2*strength
) d1
= 0;
2837 else if(d
<- strength
) d1
=-2*strength
- d
;
2838 else if(d
< strength
) d1
= d
;
2839 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2844 if(p1
&256) p1
= ~(p1
>>31);
2845 if(p2
&256) p2
= ~(p2
>>31);
2847 src
[x
-1*stride
] = p1
;
2848 src
[x
+0*stride
] = p2
;
2852 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2854 src
[x
-2*stride
] = p0
- d2
;
2855 src
[x
+ stride
] = p3
+ d2
;
2860 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2861 if(ENABLE_ANY_H263
) {
2863 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2867 int p0
= src
[y
*stride
-2];
2868 int p1
= src
[y
*stride
-1];
2869 int p2
= src
[y
*stride
+0];
2870 int p3
= src
[y
*stride
+1];
2871 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2873 if (d
<-2*strength
) d1
= 0;
2874 else if(d
<- strength
) d1
=-2*strength
- d
;
2875 else if(d
< strength
) d1
= d
;
2876 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2881 if(p1
&256) p1
= ~(p1
>>31);
2882 if(p2
&256) p2
= ~(p2
>>31);
2884 src
[y
*stride
-1] = p1
;
2885 src
[y
*stride
+0] = p2
;
2889 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2891 src
[y
*stride
-2] = p0
- d2
;
2892 src
[y
*stride
+1] = p3
+ d2
;
2897 static void h261_loop_filter_c(uint8_t *src
, int stride
){
2902 temp
[x
] = 4*src
[x
];
2903 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
2907 xy
= y
* stride
+ x
;
2909 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
2914 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
2915 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
2917 xy
= y
* stride
+ x
;
2919 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
2924 static inline void h264_loop_filter_luma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2927 for( i
= 0; i
< 4; i
++ ) {
2932 for( d
= 0; d
< 4; d
++ ) {
2933 const int p0
= pix
[-1*xstride
];
2934 const int p1
= pix
[-2*xstride
];
2935 const int p2
= pix
[-3*xstride
];
2936 const int q0
= pix
[0];
2937 const int q1
= pix
[1*xstride
];
2938 const int q2
= pix
[2*xstride
];
2940 if( FFABS( p0
- q0
) < alpha
&&
2941 FFABS( p1
- p0
) < beta
&&
2942 FFABS( q1
- q0
) < beta
) {
2947 if( FFABS( p2
- p0
) < beta
) {
2948 pix
[-2*xstride
] = p1
+ av_clip( (( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - p1
, -tc0
[i
], tc0
[i
] );
2951 if( FFABS( q2
- q0
) < beta
) {
2952 pix
[ xstride
] = q1
+ av_clip( (( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - q1
, -tc0
[i
], tc0
[i
] );
2956 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2957 pix
[-xstride
] = av_clip_uint8( p0
+ i_delta
); /* p0' */
2958 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
2964 static void h264_v_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2966 h264_loop_filter_luma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2968 static void h264_h_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2970 h264_loop_filter_luma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
2973 static inline void h264_loop_filter_chroma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2976 for( i
= 0; i
< 4; i
++ ) {
2977 const int tc
= tc0
[i
];
2982 for( d
= 0; d
< 2; d
++ ) {
2983 const int p0
= pix
[-1*xstride
];
2984 const int p1
= pix
[-2*xstride
];
2985 const int q0
= pix
[0];
2986 const int q1
= pix
[1*xstride
];
2988 if( FFABS( p0
- q0
) < alpha
&&
2989 FFABS( p1
- p0
) < beta
&&
2990 FFABS( q1
- q0
) < beta
) {
2992 int delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2994 pix
[-xstride
] = av_clip_uint8( p0
+ delta
); /* p0' */
2995 pix
[0] = av_clip_uint8( q0
- delta
); /* q0' */
3001 static void h264_v_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3003 h264_loop_filter_chroma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
3005 static void h264_h_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3007 h264_loop_filter_chroma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
3010 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
)
3013 for( d
= 0; d
< 8; d
++ ) {
3014 const int p0
= pix
[-1*xstride
];
3015 const int p1
= pix
[-2*xstride
];
3016 const int q0
= pix
[0];
3017 const int q1
= pix
[1*xstride
];
3019 if( FFABS( p0
- q0
) < alpha
&&
3020 FFABS( p1
- p0
) < beta
&&
3021 FFABS( q1
- q0
) < beta
) {
3023 pix
[-xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
3024 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
3029 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3031 h264_loop_filter_chroma_intra_c(pix
, stride
, 1, alpha
, beta
);
3033 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3035 h264_loop_filter_chroma_intra_c(pix
, 1, stride
, alpha
, beta
);
3038 static inline int pix_abs16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3044 s
+= abs(pix1
[0] - pix2
[0]);
3045 s
+= abs(pix1
[1] - pix2
[1]);
3046 s
+= abs(pix1
[2] - pix2
[2]);
3047 s
+= abs(pix1
[3] - pix2
[3]);
3048 s
+= abs(pix1
[4] - pix2
[4]);
3049 s
+= abs(pix1
[5] - pix2
[5]);
3050 s
+= abs(pix1
[6] - pix2
[6]);
3051 s
+= abs(pix1
[7] - pix2
[7]);
3052 s
+= abs(pix1
[8] - pix2
[8]);
3053 s
+= abs(pix1
[9] - pix2
[9]);
3054 s
+= abs(pix1
[10] - pix2
[10]);
3055 s
+= abs(pix1
[11] - pix2
[11]);
3056 s
+= abs(pix1
[12] - pix2
[12]);
3057 s
+= abs(pix1
[13] - pix2
[13]);
3058 s
+= abs(pix1
[14] - pix2
[14]);
3059 s
+= abs(pix1
[15] - pix2
[15]);
3066 static int pix_abs16_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3072 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3073 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3074 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3075 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3076 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3077 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3078 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3079 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3080 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
3081 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
3082 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
3083 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
3084 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
3085 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
3086 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
3087 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
3094 static int pix_abs16_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3097 uint8_t *pix3
= pix2
+ line_size
;
3101 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3102 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3103 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3104 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3105 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3106 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3107 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3108 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3109 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
3110 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
3111 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
3112 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
3113 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
3114 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
3115 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
3116 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
3124 static int pix_abs16_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3127 uint8_t *pix3
= pix2
+ line_size
;
3131 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3132 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3133 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3134 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3135 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3136 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3137 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3138 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3139 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
3140 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
3141 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
3142 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
3143 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
3144 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
3145 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
3146 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
3154 static inline int pix_abs8_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3160 s
+= abs(pix1
[0] - pix2
[0]);
3161 s
+= abs(pix1
[1] - pix2
[1]);
3162 s
+= abs(pix1
[2] - pix2
[2]);
3163 s
+= abs(pix1
[3] - pix2
[3]);
3164 s
+= abs(pix1
[4] - pix2
[4]);
3165 s
+= abs(pix1
[5] - pix2
[5]);
3166 s
+= abs(pix1
[6] - pix2
[6]);
3167 s
+= abs(pix1
[7] - pix2
[7]);
3174 static int pix_abs8_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3180 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3181 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3182 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3183 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3184 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3185 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3186 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3187 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3194 static int pix_abs8_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3197 uint8_t *pix3
= pix2
+ line_size
;
3201 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3202 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3203 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3204 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3205 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3206 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3207 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3208 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3216 static int pix_abs8_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3219 uint8_t *pix3
= pix2
+ line_size
;
3223 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3224 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3225 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3226 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3227 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3228 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3229 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3230 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3238 static int nsse16_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3239 MpegEncContext
*c
= v
;
3245 for(x
=0; x
<16; x
++){
3246 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3249 for(x
=0; x
<15; x
++){
3250 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3251 - s1
[x
+1] + s1
[x
+1+stride
])
3252 -FFABS( s2
[x
] - s2
[x
+stride
]
3253 - s2
[x
+1] + s2
[x
+1+stride
]);
3260 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3261 else return score1
+ FFABS(score2
)*8;
3264 static int nsse8_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3265 MpegEncContext
*c
= v
;
3272 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3276 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3277 - s1
[x
+1] + s1
[x
+1+stride
])
3278 -FFABS( s2
[x
] - s2
[x
+stride
]
3279 - s2
[x
+1] + s2
[x
+1+stride
]);
3286 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3287 else return score1
+ FFABS(score2
)*8;
3290 static int try_8x8basis_c(int16_t rem
[64], int16_t weight
[64], int16_t basis
[64], int scale
){
3294 for(i
=0; i
<8*8; i
++){
3295 int b
= rem
[i
] + ((basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
));
3298 assert(-512<b
&& b
<512);
3300 sum
+= (w
*b
)*(w
*b
)>>4;
3305 static void add_8x8basis_c(int16_t rem
[64], int16_t basis
[64], int scale
){
3308 for(i
=0; i
<8*8; i
++){
3309 rem
[i
] += (basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
);
3314 * permutes an 8x8 block.
3315 * @param block the block which will be permuted according to the given permutation vector
3316 * @param permutation the permutation vector
3317 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3318 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3319 * (inverse) permutated to scantable order!
3321 void ff_block_permute(DCTELEM
*block
, uint8_t *permutation
, const uint8_t *scantable
, int last
)
3327 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
3329 for(i
=0; i
<=last
; i
++){
3330 const int j
= scantable
[i
];
3335 for(i
=0; i
<=last
; i
++){
3336 const int j
= scantable
[i
];
3337 const int perm_j
= permutation
[j
];
3338 block
[perm_j
]= temp
[j
];
3342 static int zero_cmp(void *s
, uint8_t *a
, uint8_t *b
, int stride
, int h
){
3346 void ff_set_cmp(DSPContext
* c
, me_cmp_func
*cmp
, int type
){
3349 memset(cmp
, 0, sizeof(void*)*5);
3357 cmp
[i
]= c
->hadamard8_diff
[i
];
3363 cmp
[i
]= c
->dct_sad
[i
];
3366 cmp
[i
]= c
->dct264_sad
[i
];
3369 cmp
[i
]= c
->dct_max
[i
];
3372 cmp
[i
]= c
->quant_psnr
[i
];
3392 #ifdef CONFIG_SNOW_ENCODER
3401 av_log(NULL
, AV_LOG_ERROR
,"internal error in cmp function selection\n");
3407 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3409 static void clear_blocks_c(DCTELEM
*blocks
)
3411 memset(blocks
, 0, sizeof(DCTELEM
)*6*64);
3414 static void add_bytes_c(uint8_t *dst
, uint8_t *src
, int w
){
3416 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3417 long a
= *(long*)(src
+i
);
3418 long b
= *(long*)(dst
+i
);
3419 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3422 dst
[i
+0] += src
[i
+0];
3425 static void add_bytes_l2_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3427 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3428 long a
= *(long*)(src1
+i
);
3429 long b
= *(long*)(src2
+i
);
3430 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3433 dst
[i
] = src1
[i
]+src2
[i
];
3436 static void diff_bytes_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3438 #ifndef HAVE_FAST_UNALIGNED
3439 if((long)src2
& (sizeof(long)-1)){
3440 for(i
=0; i
+7<w
; i
+=8){
3441 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3442 dst
[i
+1] = src1
[i
+1]-src2
[i
+1];
3443 dst
[i
+2] = src1
[i
+2]-src2
[i
+2];
3444 dst
[i
+3] = src1
[i
+3]-src2
[i
+3];
3445 dst
[i
+4] = src1
[i
+4]-src2
[i
+4];
3446 dst
[i
+5] = src1
[i
+5]-src2
[i
+5];
3447 dst
[i
+6] = src1
[i
+6]-src2
[i
+6];
3448 dst
[i
+7] = src1
[i
+7]-src2
[i
+7];
3452 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3453 long a
= *(long*)(src1
+i
);
3454 long b
= *(long*)(src2
+i
);
3455 *(long*)(dst
+i
) = ((a
|pb_80
) - (b
&pb_7f
)) ^ ((a
^b
^pb_80
)&pb_80
);
3458 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3461 static void sub_hfyu_median_prediction_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
, int *left
, int *left_top
){
3469 const int pred
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF);
3479 #define BUTTERFLY2(o1,o2,i1,i2) \
3483 #define BUTTERFLY1(x,y) \
3492 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
3494 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s
, uint8_t *dst
, uint8_t *src
, int stride
, int h
){
3502 //FIXME try pointer walks
3503 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0]-dst
[stride
*i
+0],src
[stride
*i
+1]-dst
[stride
*i
+1]);
3504 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2]-dst
[stride
*i
+2],src
[stride
*i
+3]-dst
[stride
*i
+3]);
3505 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4]-dst
[stride
*i
+4],src
[stride
*i
+5]-dst
[stride
*i
+5]);
3506 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6]-dst
[stride
*i
+6],src
[stride
*i
+7]-dst
[stride
*i
+7]);
3508 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3509 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3510 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3511 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3513 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3514 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3515 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3516 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3520 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3521 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3522 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3523 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3525 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3526 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3527 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3528 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3531 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3532 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3533 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3534 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3540 printf("MAX:%d\n", maxi
);
3546 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s
, uint8_t *src
, uint8_t *dummy
, int stride
, int h
){
3554 //FIXME try pointer walks
3555 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0],src
[stride
*i
+1]);
3556 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2],src
[stride
*i
+3]);
3557 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4],src
[stride
*i
+5]);
3558 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6],src
[stride
*i
+7]);
3560 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3561 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3562 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3563 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3565 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3566 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3567 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3568 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3572 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3573 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3574 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3575 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3577 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3578 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3579 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3580 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3583 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3584 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3585 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3586 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3589 sum
-= FFABS(temp
[8*0] + temp
[8*4]); // -mean
3594 static int dct_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3595 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3596 DECLARE_ALIGNED_16(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3597 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3601 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3603 return s
->dsp
.sum_abs_dctelem(temp
);
3608 const int s07 = SRC(0) + SRC(7);\
3609 const int s16 = SRC(1) + SRC(6);\
3610 const int s25 = SRC(2) + SRC(5);\
3611 const int s34 = SRC(3) + SRC(4);\
3612 const int a0 = s07 + s34;\
3613 const int a1 = s16 + s25;\
3614 const int a2 = s07 - s34;\
3615 const int a3 = s16 - s25;\
3616 const int d07 = SRC(0) - SRC(7);\
3617 const int d16 = SRC(1) - SRC(6);\
3618 const int d25 = SRC(2) - SRC(5);\
3619 const int d34 = SRC(3) - SRC(4);\
3620 const int a4 = d16 + d25 + (d07 + (d07>>1));\
3621 const int a5 = d07 - d34 - (d25 + (d25>>1));\
3622 const int a6 = d07 + d34 - (d16 + (d16>>1));\
3623 const int a7 = d16 - d25 + (d34 + (d34>>1));\
3625 DST(1, a4 + (a7>>2)) ;\
3626 DST(2, a2 + (a3>>1)) ;\
3627 DST(3, a5 + (a6>>2)) ;\
3629 DST(5, a6 - (a5>>2)) ;\
3630 DST(6, (a2>>1) - a3 ) ;\
3631 DST(7, (a4>>2) - a7 ) ;\
3634 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3635 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3640 s
->dsp
.diff_pixels(dct
[0], src1
, src2
, stride
);
3642 #define SRC(x) dct[i][x]
3643 #define DST(x,v) dct[i][x]= v
3644 for( i
= 0; i
< 8; i
++ )
3649 #define SRC(x) dct[x][i]
3650 #define DST(x,v) sum += FFABS(v)
3651 for( i
= 0; i
< 8; i
++ )
3659 static int dct_max8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3660 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3661 DECLARE_ALIGNED_8(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3662 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3667 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3671 sum
= FFMAX(sum
, FFABS(temp
[i
]));
3676 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3677 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3678 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64*2/8]);
3679 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3680 DCTELEM
* const bak
= ((DCTELEM
*)aligned_temp
)+64;
3686 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3688 memcpy(bak
, temp
, 64*sizeof(DCTELEM
));
3690 s
->block_last_index
[0/*FIXME*/]= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3691 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3692 ff_simple_idct(temp
); //FIXME
3695 sum
+= (temp
[i
]-bak
[i
])*(temp
[i
]-bak
[i
]);
3700 static int rd8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3701 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3702 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3703 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3704 DECLARE_ALIGNED_8 (uint64_t, aligned_bak
[stride
]);
3705 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3706 uint8_t * const bak
= (uint8_t*)aligned_bak
;
3707 int i
, last
, run
, bits
, level
, distortion
, start_i
;
3708 const int esc_length
= s
->ac_esc_length
;
3710 uint8_t * last_length
;
3715 ((uint32_t*)(bak
+ i
*stride
))[0]= ((uint32_t*)(src2
+ i
*stride
))[0];
3716 ((uint32_t*)(bak
+ i
*stride
))[1]= ((uint32_t*)(src2
+ i
*stride
))[1];
3719 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3721 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3727 length
= s
->intra_ac_vlc_length
;
3728 last_length
= s
->intra_ac_vlc_last_length
;
3729 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3732 length
= s
->inter_ac_vlc_length
;
3733 last_length
= s
->inter_ac_vlc_last_length
;
3738 for(i
=start_i
; i
<last
; i
++){
3739 int j
= scantable
[i
];
3744 if((level
&(~127)) == 0){
3745 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3754 level
= temp
[i
] + 64;
3758 if((level
&(~127)) == 0){
3759 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3767 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
3769 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3772 s
->dsp
.idct_add(bak
, stride
, temp
);
3774 distortion
= s
->dsp
.sse
[1](NULL
, bak
, src1
, stride
, 8);
3776 return distortion
+ ((bits
*s
->qscale
*s
->qscale
*109 + 64)>>7);
3779 static int bit8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3780 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3781 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3782 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3783 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3784 int i
, last
, run
, bits
, level
, start_i
;
3785 const int esc_length
= s
->ac_esc_length
;
3787 uint8_t * last_length
;
3791 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3793 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3799 length
= s
->intra_ac_vlc_length
;
3800 last_length
= s
->intra_ac_vlc_last_length
;
3801 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3804 length
= s
->inter_ac_vlc_length
;
3805 last_length
= s
->inter_ac_vlc_last_length
;
3810 for(i
=start_i
; i
<last
; i
++){
3811 int j
= scantable
[i
];
3816 if((level
&(~127)) == 0){
3817 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3826 level
= temp
[i
] + 64;
3830 if((level
&(~127)) == 0){
3831 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3839 static int vsad_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3844 for(x
=0; x
<16; x
+=4){
3845 score
+= FFABS(s
[x
] - s
[x
+stride
]) + FFABS(s
[x
+1] - s
[x
+1+stride
])
3846 +FFABS(s
[x
+2] - s
[x
+2+stride
]) + FFABS(s
[x
+3] - s
[x
+3+stride
]);
3854 static int vsad16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3859 for(x
=0; x
<16; x
++){
3860 score
+= FFABS(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3869 #define SQ(a) ((a)*(a))
3870 static int vsse_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3875 for(x
=0; x
<16; x
+=4){
3876 score
+= SQ(s
[x
] - s
[x
+stride
]) + SQ(s
[x
+1] - s
[x
+1+stride
])
3877 +SQ(s
[x
+2] - s
[x
+2+stride
]) + SQ(s
[x
+3] - s
[x
+3+stride
]);
3885 static int vsse16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3890 for(x
=0; x
<16; x
++){
3891 score
+= SQ(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3900 static int ssd_int8_vs_int16_c(const int8_t *pix1
, const int16_t *pix2
,
3904 for(i
=0; i
<size
; i
++)
3905 score
+= (pix1
[i
]-pix2
[i
])*(pix1
[i
]-pix2
[i
]);
3909 WRAPPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
3910 WRAPPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
3911 WRAPPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
3913 WRAPPER8_16_SQ(dct264_sad8x8_c
, dct264_sad16_c
)
3915 WRAPPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
3916 WRAPPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
3917 WRAPPER8_16_SQ(rd8x8_c
, rd16_c
)
3918 WRAPPER8_16_SQ(bit8x8_c
, bit16_c
)
3920 static void vector_fmul_c(float *dst
, const float *src
, int len
){
3922 for(i
=0; i
<len
; i
++)
3926 static void vector_fmul_reverse_c(float *dst
, const float *src0
, const float *src1
, int len
){
3929 for(i
=0; i
<len
; i
++)
3930 dst
[i
] = src0
[i
] * src1
[-i
];
3933 void ff_vector_fmul_add_add_c(float *dst
, const float *src0
, const float *src1
, const float *src2
, int src3
, int len
, int step
){
3935 for(i
=0; i
<len
; i
++)
3936 dst
[i
*step
] = src0
[i
] * src1
[i
] + src2
[i
] + src3
;
3939 void ff_vector_fmul_window_c(float *dst
, const float *src0
, const float *src1
, const float *win
, float add_bias
, int len
){
3944 for(i
=-len
, j
=len
-1; i
<0; i
++, j
--) {
3949 dst
[i
] = s0
*wj
- s1
*wi
+ add_bias
;
3950 dst
[j
] = s0
*wi
+ s1
*wj
+ add_bias
;
3954 static void int32_to_float_fmul_scalar_c(float *dst
, const int *src
, float mul
, int len
){
3956 for(i
=0; i
<len
; i
++)
3957 dst
[i
] = src
[i
] * mul
;
3960 static av_always_inline
int float_to_int16_one(const float *src
){
3961 int_fast32_t tmp
= *(const int32_t*)src
;
3963 tmp
= (0x43c0ffff - tmp
)>>31;
3964 // is this faster on some gcc/cpu combinations?
3965 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
3968 return tmp
- 0x8000;
3971 void ff_float_to_int16_c(int16_t *dst
, const float *src
, long len
){
3973 for(i
=0; i
<len
; i
++)
3974 dst
[i
] = float_to_int16_one(src
+i
);
3977 void ff_float_to_int16_interleave_c(int16_t *dst
, const float **src
, long len
, int channels
){
3980 for(i
=0; i
<len
; i
++){
3981 dst
[2*i
] = float_to_int16_one(src
[0]+i
);
3982 dst
[2*i
+1] = float_to_int16_one(src
[1]+i
);
3985 for(c
=0; c
<channels
; c
++)
3986 for(i
=0, j
=c
; i
<len
; i
++, j
+=channels
)
3987 dst
[j
] = float_to_int16_one(src
[c
]+i
);
3991 static void add_int16_c(int16_t * v1
, int16_t * v2
, int order
)
3997 static void sub_int16_c(int16_t * v1
, int16_t * v2
, int order
)
4003 static int32_t scalarproduct_int16_c(int16_t * v1
, int16_t * v2
, int order
, int shift
)
4008 res
+= (*v1
++ * *v2
++) >> shift
;
4014 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
4015 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
4016 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
4017 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
4018 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
4019 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
4020 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
4022 static void wmv2_idct_row(short * b
)
4025 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
4027 a1
= W1
*b
[1]+W7
*b
[7];
4028 a7
= W7
*b
[1]-W1
*b
[7];
4029 a5
= W5
*b
[5]+W3
*b
[3];
4030 a3
= W3
*b
[5]-W5
*b
[3];
4031 a2
= W2
*b
[2]+W6
*b
[6];
4032 a6
= W6
*b
[2]-W2
*b
[6];
4033 a0
= W0
*b
[0]+W0
*b
[4];
4034 a4
= W0
*b
[0]-W0
*b
[4];
4036 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;//1,3,5,7,
4037 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
4039 b
[0] = (a0
+a2
+a1
+a5
+ (1<<7))>>8;
4040 b
[1] = (a4
+a6
+s1
+ (1<<7))>>8;
4041 b
[2] = (a4
-a6
+s2
+ (1<<7))>>8;
4042 b
[3] = (a0
-a2
+a7
+a3
+ (1<<7))>>8;
4043 b
[4] = (a0
-a2
-a7
-a3
+ (1<<7))>>8;
4044 b
[5] = (a4
-a6
-s2
+ (1<<7))>>8;
4045 b
[6] = (a4
+a6
-s1
+ (1<<7))>>8;
4046 b
[7] = (a0
+a2
-a1
-a5
+ (1<<7))>>8;
4048 static void wmv2_idct_col(short * b
)
4051 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
4052 /*step 1, with extended precision*/
4053 a1
= (W1
*b
[8*1]+W7
*b
[8*7] + 4)>>3;
4054 a7
= (W7
*b
[8*1]-W1
*b
[8*7] + 4)>>3;
4055 a5
= (W5
*b
[8*5]+W3
*b
[8*3] + 4)>>3;
4056 a3
= (W3
*b
[8*5]-W5
*b
[8*3] + 4)>>3;
4057 a2
= (W2
*b
[8*2]+W6
*b
[8*6] + 4)>>3;
4058 a6
= (W6
*b
[8*2]-W2
*b
[8*6] + 4)>>3;
4059 a0
= (W0
*b
[8*0]+W0
*b
[8*4] )>>3;
4060 a4
= (W0
*b
[8*0]-W0
*b
[8*4] )>>3;
4062 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;
4063 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
4065 b
[8*0] = (a0
+a2
+a1
+a5
+ (1<<13))>>14;
4066 b
[8*1] = (a4
+a6
+s1
+ (1<<13))>>14;
4067 b
[8*2] = (a4
-a6
+s2
+ (1<<13))>>14;
4068 b
[8*3] = (a0
-a2
+a7
+a3
+ (1<<13))>>14;
4070 b
[8*4] = (a0
-a2
-a7
-a3
+ (1<<13))>>14;
4071 b
[8*5] = (a4
-a6
-s2
+ (1<<13))>>14;
4072 b
[8*6] = (a4
+a6
-s1
+ (1<<13))>>14;
4073 b
[8*7] = (a0
+a2
-a1
-a5
+ (1<<13))>>14;
4075 void ff_wmv2_idct_c(short * block
){
4079 wmv2_idct_row(block
+i
);
4082 wmv2_idct_col(block
+i
);
4085 /* XXX: those functions should be suppressed ASAP when all IDCTs are
4087 static void ff_wmv2_idct_put_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4089 ff_wmv2_idct_c(block
);
4090 put_pixels_clamped_c(block
, dest
, line_size
);
4092 static void ff_wmv2_idct_add_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4094 ff_wmv2_idct_c(block
);
4095 add_pixels_clamped_c(block
, dest
, line_size
);
4097 static void ff_jref_idct_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4100 put_pixels_clamped_c(block
, dest
, line_size
);
4102 static void ff_jref_idct_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4105 add_pixels_clamped_c(block
, dest
, line_size
);
4108 static void ff_jref_idct4_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4111 put_pixels_clamped4_c(block
, dest
, line_size
);
4113 static void ff_jref_idct4_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4116 add_pixels_clamped4_c(block
, dest
, line_size
);
4119 static void ff_jref_idct2_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4122 put_pixels_clamped2_c(block
, dest
, line_size
);
4124 static void ff_jref_idct2_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4127 add_pixels_clamped2_c(block
, dest
, line_size
);
4130 static void ff_jref_idct1_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4132 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4134 dest
[0] = cm
[(block
[0] + 4)>>3];
4136 static void ff_jref_idct1_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4138 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4140 dest
[0] = cm
[dest
[0] + ((block
[0] + 4)>>3)];
4143 static void just_return(void *mem av_unused
, int stride av_unused
, int h av_unused
) { return; }
4145 /* init static data */
4146 void dsputil_static_init(void)
4150 for(i
=0;i
<256;i
++) ff_cropTbl
[i
+ MAX_NEG_CROP
] = i
;
4151 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
4153 ff_cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
4156 for(i
=0;i
<512;i
++) {
4157 ff_squareTbl
[i
] = (i
- 256) * (i
- 256);
4160 for(i
=0; i
<64; i
++) inv_zigzag_direct16
[ff_zigzag_direct
[i
]]= i
+1;
4163 int ff_check_alignment(void){
4164 static int did_fail
=0;
4165 DECLARE_ALIGNED_16(int, aligned
);
4167 if((long)&aligned
& 15){
4169 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
4170 av_log(NULL
, AV_LOG_ERROR
,
4171 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
4172 "and may be very slow or crash. This is not a bug in libavcodec,\n"
4173 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4174 "Do not report crashes to FFmpeg developers.\n");
4183 void dsputil_init(DSPContext
* c
, AVCodecContext
*avctx
)
4187 ff_check_alignment();
4189 #ifdef CONFIG_ENCODERS
4190 if(avctx
->dct_algo
==FF_DCT_FASTINT
) {
4191 c
->fdct
= fdct_ifast
;
4192 c
->fdct248
= fdct_ifast248
;
4194 else if(avctx
->dct_algo
==FF_DCT_FAAN
) {
4195 c
->fdct
= ff_faandct
;
4196 c
->fdct248
= ff_faandct248
;
4199 c
->fdct
= ff_jpeg_fdct_islow
; //slow/accurate/default
4200 c
->fdct248
= ff_fdct248_islow
;
4202 #endif //CONFIG_ENCODERS
4204 if(avctx
->lowres
==1){
4205 if(avctx
->idct_algo
==FF_IDCT_INT
|| avctx
->idct_algo
==FF_IDCT_AUTO
|| !ENABLE_H264_DECODER
){
4206 c
->idct_put
= ff_jref_idct4_put
;
4207 c
->idct_add
= ff_jref_idct4_add
;
4209 c
->idct_put
= ff_h264_lowres_idct_put_c
;
4210 c
->idct_add
= ff_h264_lowres_idct_add_c
;
4212 c
->idct
= j_rev_dct4
;
4213 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4214 }else if(avctx
->lowres
==2){
4215 c
->idct_put
= ff_jref_idct2_put
;
4216 c
->idct_add
= ff_jref_idct2_add
;
4217 c
->idct
= j_rev_dct2
;
4218 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4219 }else if(avctx
->lowres
==3){
4220 c
->idct_put
= ff_jref_idct1_put
;
4221 c
->idct_add
= ff_jref_idct1_add
;
4222 c
->idct
= j_rev_dct1
;
4223 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4225 if(avctx
->idct_algo
==FF_IDCT_INT
){
4226 c
->idct_put
= ff_jref_idct_put
;
4227 c
->idct_add
= ff_jref_idct_add
;
4228 c
->idct
= j_rev_dct
;
4229 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
4230 }else if((ENABLE_VP3_DECODER
|| ENABLE_VP5_DECODER
|| ENABLE_VP6_DECODER
|| ENABLE_THEORA_DECODER
) &&
4231 avctx
->idct_algo
==FF_IDCT_VP3
){
4232 c
->idct_put
= ff_vp3_idct_put_c
;
4233 c
->idct_add
= ff_vp3_idct_add_c
;
4234 c
->idct
= ff_vp3_idct_c
;
4235 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4236 }else if(avctx
->idct_algo
==FF_IDCT_WMV2
){
4237 c
->idct_put
= ff_wmv2_idct_put_c
;
4238 c
->idct_add
= ff_wmv2_idct_add_c
;
4239 c
->idct
= ff_wmv2_idct_c
;
4240 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4241 }else if(avctx
->idct_algo
==FF_IDCT_FAAN
){
4242 c
->idct_put
= ff_faanidct_put
;
4243 c
->idct_add
= ff_faanidct_add
;
4244 c
->idct
= ff_faanidct
;
4245 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4246 }else if(ENABLE_EATGQ_DECODER
&& avctx
->idct_algo
==FF_IDCT_EA
) {
4247 c
->idct_put
= ff_ea_idct_put_c
;
4248 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4249 }else{ //accurate/default
4250 c
->idct_put
= ff_simple_idct_put
;
4251 c
->idct_add
= ff_simple_idct_add
;
4252 c
->idct
= ff_simple_idct
;
4253 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4257 if (ENABLE_H264_DECODER
) {
4258 c
->h264_idct_add
= ff_h264_idct_add_c
;
4259 c
->h264_idct8_add
= ff_h264_idct8_add_c
;
4260 c
->h264_idct_dc_add
= ff_h264_idct_dc_add_c
;
4261 c
->h264_idct8_dc_add
= ff_h264_idct8_dc_add_c
;
4264 c
->get_pixels
= get_pixels_c
;
4265 c
->diff_pixels
= diff_pixels_c
;
4266 c
->put_pixels_clamped
= put_pixels_clamped_c
;
4267 c
->put_signed_pixels_clamped
= put_signed_pixels_clamped_c
;
4268 c
->add_pixels_clamped
= add_pixels_clamped_c
;
4269 c
->add_pixels8
= add_pixels8_c
;
4270 c
->add_pixels4
= add_pixels4_c
;
4271 c
->sum_abs_dctelem
= sum_abs_dctelem_c
;
4274 c
->clear_blocks
= clear_blocks_c
;
4275 c
->pix_sum
= pix_sum_c
;
4276 c
->pix_norm1
= pix_norm1_c
;
4278 /* TODO [0] 16 [1] 8 */
4279 c
->pix_abs
[0][0] = pix_abs16_c
;
4280 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
4281 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
4282 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
4283 c
->pix_abs
[1][0] = pix_abs8_c
;
4284 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
4285 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
4286 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
4288 #define dspfunc(PFX, IDX, NUM) \
4289 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
4290 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
4291 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
4292 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
4294 dspfunc(put
, 0, 16);
4295 dspfunc(put_no_rnd
, 0, 16);
4297 dspfunc(put_no_rnd
, 1, 8);
4301 dspfunc(avg
, 0, 16);
4302 dspfunc(avg_no_rnd
, 0, 16);
4304 dspfunc(avg_no_rnd
, 1, 8);
4309 c
->put_no_rnd_pixels_l2
[0]= put_no_rnd_pixels16_l2_c
;
4310 c
->put_no_rnd_pixels_l2
[1]= put_no_rnd_pixels8_l2_c
;
4312 c
->put_tpel_pixels_tab
[ 0] = put_tpel_pixels_mc00_c
;
4313 c
->put_tpel_pixels_tab
[ 1] = put_tpel_pixels_mc10_c
;
4314 c
->put_tpel_pixels_tab
[ 2] = put_tpel_pixels_mc20_c
;
4315 c
->put_tpel_pixels_tab
[ 4] = put_tpel_pixels_mc01_c
;
4316 c
->put_tpel_pixels_tab
[ 5] = put_tpel_pixels_mc11_c
;
4317 c
->put_tpel_pixels_tab
[ 6] = put_tpel_pixels_mc21_c
;
4318 c
->put_tpel_pixels_tab
[ 8] = put_tpel_pixels_mc02_c
;
4319 c
->put_tpel_pixels_tab
[ 9] = put_tpel_pixels_mc12_c
;
4320 c
->put_tpel_pixels_tab
[10] = put_tpel_pixels_mc22_c
;
4322 c
->avg_tpel_pixels_tab
[ 0] = avg_tpel_pixels_mc00_c
;
4323 c
->avg_tpel_pixels_tab
[ 1] = avg_tpel_pixels_mc10_c
;
4324 c
->avg_tpel_pixels_tab
[ 2] = avg_tpel_pixels_mc20_c
;
4325 c
->avg_tpel_pixels_tab
[ 4] = avg_tpel_pixels_mc01_c
;
4326 c
->avg_tpel_pixels_tab
[ 5] = avg_tpel_pixels_mc11_c
;
4327 c
->avg_tpel_pixels_tab
[ 6] = avg_tpel_pixels_mc21_c
;
4328 c
->avg_tpel_pixels_tab
[ 8] = avg_tpel_pixels_mc02_c
;
4329 c
->avg_tpel_pixels_tab
[ 9] = avg_tpel_pixels_mc12_c
;
4330 c
->avg_tpel_pixels_tab
[10] = avg_tpel_pixels_mc22_c
;
4332 #define dspfunc(PFX, IDX, NUM) \
4333 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
4334 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
4335 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
4336 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
4337 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
4338 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
4339 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
4340 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
4341 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
4342 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
4343 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
4344 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
4345 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
4346 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
4347 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
4348 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
4350 dspfunc(put_qpel
, 0, 16);
4351 dspfunc(put_no_rnd_qpel
, 0, 16);
4353 dspfunc(avg_qpel
, 0, 16);
4354 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
4356 dspfunc(put_qpel
, 1, 8);
4357 dspfunc(put_no_rnd_qpel
, 1, 8);
4359 dspfunc(avg_qpel
, 1, 8);
4360 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
4362 dspfunc(put_h264_qpel
, 0, 16);
4363 dspfunc(put_h264_qpel
, 1, 8);
4364 dspfunc(put_h264_qpel
, 2, 4);
4365 dspfunc(put_h264_qpel
, 3, 2);
4366 dspfunc(avg_h264_qpel
, 0, 16);
4367 dspfunc(avg_h264_qpel
, 1, 8);
4368 dspfunc(avg_h264_qpel
, 2, 4);
4371 c
->put_h264_chroma_pixels_tab
[0]= put_h264_chroma_mc8_c
;
4372 c
->put_h264_chroma_pixels_tab
[1]= put_h264_chroma_mc4_c
;
4373 c
->put_h264_chroma_pixels_tab
[2]= put_h264_chroma_mc2_c
;
4374 c
->avg_h264_chroma_pixels_tab
[0]= avg_h264_chroma_mc8_c
;
4375 c
->avg_h264_chroma_pixels_tab
[1]= avg_h264_chroma_mc4_c
;
4376 c
->avg_h264_chroma_pixels_tab
[2]= avg_h264_chroma_mc2_c
;
4377 c
->put_no_rnd_h264_chroma_pixels_tab
[0]= put_no_rnd_h264_chroma_mc8_c
;
4379 c
->weight_h264_pixels_tab
[0]= weight_h264_pixels16x16_c
;
4380 c
->weight_h264_pixels_tab
[1]= weight_h264_pixels16x8_c
;
4381 c
->weight_h264_pixels_tab
[2]= weight_h264_pixels8x16_c
;
4382 c
->weight_h264_pixels_tab
[3]= weight_h264_pixels8x8_c
;
4383 c
->weight_h264_pixels_tab
[4]= weight_h264_pixels8x4_c
;
4384 c
->weight_h264_pixels_tab
[5]= weight_h264_pixels4x8_c
;
4385 c
->weight_h264_pixels_tab
[6]= weight_h264_pixels4x4_c
;
4386 c
->weight_h264_pixels_tab
[7]= weight_h264_pixels4x2_c
;
4387 c
->weight_h264_pixels_tab
[8]= weight_h264_pixels2x4_c
;
4388 c
->weight_h264_pixels_tab
[9]= weight_h264_pixels2x2_c
;
4389 c
->biweight_h264_pixels_tab
[0]= biweight_h264_pixels16x16_c
;
4390 c
->biweight_h264_pixels_tab
[1]= biweight_h264_pixels16x8_c
;
4391 c
->biweight_h264_pixels_tab
[2]= biweight_h264_pixels8x16_c
;
4392 c
->biweight_h264_pixels_tab
[3]= biweight_h264_pixels8x8_c
;
4393 c
->biweight_h264_pixels_tab
[4]= biweight_h264_pixels8x4_c
;
4394 c
->biweight_h264_pixels_tab
[5]= biweight_h264_pixels4x8_c
;
4395 c
->biweight_h264_pixels_tab
[6]= biweight_h264_pixels4x4_c
;
4396 c
->biweight_h264_pixels_tab
[7]= biweight_h264_pixels4x2_c
;
4397 c
->biweight_h264_pixels_tab
[8]= biweight_h264_pixels2x4_c
;
4398 c
->biweight_h264_pixels_tab
[9]= biweight_h264_pixels2x2_c
;
4400 c
->draw_edges
= draw_edges_c
;
4402 #ifdef CONFIG_CAVS_DECODER
4403 ff_cavsdsp_init(c
,avctx
);
4405 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4406 ff_vc1dsp_init(c
,avctx
);
4408 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4409 ff_intrax8dsp_init(c
,avctx
);
4411 #if defined(CONFIG_H264_ENCODER)
4412 ff_h264dspenc_init(c
,avctx
);
4415 c
->put_mspel_pixels_tab
[0]= put_mspel8_mc00_c
;
4416 c
->put_mspel_pixels_tab
[1]= put_mspel8_mc10_c
;
4417 c
->put_mspel_pixels_tab
[2]= put_mspel8_mc20_c
;
4418 c
->put_mspel_pixels_tab
[3]= put_mspel8_mc30_c
;
4419 c
->put_mspel_pixels_tab
[4]= put_mspel8_mc02_c
;
4420 c
->put_mspel_pixels_tab
[5]= put_mspel8_mc12_c
;
4421 c
->put_mspel_pixels_tab
[6]= put_mspel8_mc22_c
;
4422 c
->put_mspel_pixels_tab
[7]= put_mspel8_mc32_c
;
4424 #define SET_CMP_FUNC(name) \
4425 c->name[0]= name ## 16_c;\
4426 c->name[1]= name ## 8x8_c;
4428 SET_CMP_FUNC(hadamard8_diff
)
4429 c
->hadamard8_diff
[4]= hadamard8_intra16_c
;
4430 SET_CMP_FUNC(dct_sad
)
4431 SET_CMP_FUNC(dct_max
)
4433 SET_CMP_FUNC(dct264_sad
)
4435 c
->sad
[0]= pix_abs16_c
;
4436 c
->sad
[1]= pix_abs8_c
;
4440 SET_CMP_FUNC(quant_psnr
)
4443 c
->vsad
[0]= vsad16_c
;
4444 c
->vsad
[4]= vsad_intra16_c
;
4445 c
->vsse
[0]= vsse16_c
;
4446 c
->vsse
[4]= vsse_intra16_c
;
4447 c
->nsse
[0]= nsse16_c
;
4448 c
->nsse
[1]= nsse8_c
;
4449 #ifdef CONFIG_SNOW_ENCODER
4450 c
->w53
[0]= w53_16_c
;
4452 c
->w97
[0]= w97_16_c
;
4456 c
->ssd_int8_vs_int16
= ssd_int8_vs_int16_c
;
4458 c
->add_bytes
= add_bytes_c
;
4459 c
->add_bytes_l2
= add_bytes_l2_c
;
4460 c
->diff_bytes
= diff_bytes_c
;
4461 c
->sub_hfyu_median_prediction
= sub_hfyu_median_prediction_c
;
4462 c
->bswap_buf
= bswap_buf
;
4463 #ifdef CONFIG_PNG_DECODER
4464 c
->add_png_paeth_prediction
= ff_add_png_paeth_prediction
;
4467 c
->h264_v_loop_filter_luma
= h264_v_loop_filter_luma_c
;
4468 c
->h264_h_loop_filter_luma
= h264_h_loop_filter_luma_c
;
4469 c
->h264_v_loop_filter_chroma
= h264_v_loop_filter_chroma_c
;
4470 c
->h264_h_loop_filter_chroma
= h264_h_loop_filter_chroma_c
;
4471 c
->h264_v_loop_filter_chroma_intra
= h264_v_loop_filter_chroma_intra_c
;
4472 c
->h264_h_loop_filter_chroma_intra
= h264_h_loop_filter_chroma_intra_c
;
4473 c
->h264_loop_filter_strength
= NULL
;
4475 if (ENABLE_ANY_H263
) {
4476 c
->h263_h_loop_filter
= h263_h_loop_filter_c
;
4477 c
->h263_v_loop_filter
= h263_v_loop_filter_c
;
4480 if (ENABLE_VP3_DECODER
|| ENABLE_THEORA_DECODER
) {
4481 c
->vp3_h_loop_filter
= ff_vp3_h_loop_filter_c
;
4482 c
->vp3_v_loop_filter
= ff_vp3_v_loop_filter_c
;
4485 c
->h261_loop_filter
= h261_loop_filter_c
;
4487 c
->try_8x8basis
= try_8x8basis_c
;
4488 c
->add_8x8basis
= add_8x8basis_c
;
4490 #ifdef CONFIG_SNOW_DECODER
4491 c
->vertical_compose97i
= ff_snow_vertical_compose97i
;
4492 c
->horizontal_compose97i
= ff_snow_horizontal_compose97i
;
4493 c
->inner_add_yblock
= ff_snow_inner_add_yblock
;
4496 #ifdef CONFIG_VORBIS_DECODER
4497 c
->vorbis_inverse_coupling
= vorbis_inverse_coupling
;
4499 #ifdef CONFIG_AC3_DECODER
4500 c
->ac3_downmix
= ff_ac3_downmix_c
;
4502 #ifdef CONFIG_FLAC_ENCODER
4503 c
->flac_compute_autocorr
= ff_flac_compute_autocorr
;
4505 c
->vector_fmul
= vector_fmul_c
;
4506 c
->vector_fmul_reverse
= vector_fmul_reverse_c
;
4507 c
->vector_fmul_add_add
= ff_vector_fmul_add_add_c
;
4508 c
->vector_fmul_window
= ff_vector_fmul_window_c
;
4509 c
->int32_to_float_fmul_scalar
= int32_to_float_fmul_scalar_c
;
4510 c
->float_to_int16
= ff_float_to_int16_c
;
4511 c
->float_to_int16_interleave
= ff_float_to_int16_interleave_c
;
4512 c
->add_int16
= add_int16_c
;
4513 c
->sub_int16
= sub_int16_c
;
4514 c
->scalarproduct_int16
= scalarproduct_int16_c
;
4516 c
->shrink
[0]= ff_img_copy_plane
;
4517 c
->shrink
[1]= ff_shrink22
;
4518 c
->shrink
[2]= ff_shrink44
;
4519 c
->shrink
[3]= ff_shrink88
;
4521 c
->prefetch
= just_return
;
4523 memset(c
->put_2tap_qpel_pixels_tab
, 0, sizeof(c
->put_2tap_qpel_pixels_tab
));
4524 memset(c
->avg_2tap_qpel_pixels_tab
, 0, sizeof(c
->avg_2tap_qpel_pixels_tab
));
4526 if (ENABLE_MMX
) dsputil_init_mmx (c
, avctx
);
4527 if (ENABLE_ARMV4L
) dsputil_init_armv4l(c
, avctx
);
4528 if (ENABLE_MLIB
) dsputil_init_mlib (c
, avctx
);
4529 if (ENABLE_VIS
) dsputil_init_vis (c
, avctx
);
4530 if (ENABLE_ALPHA
) dsputil_init_alpha (c
, avctx
);
4531 if (ENABLE_POWERPC
) dsputil_init_ppc (c
, avctx
);
4532 if (ENABLE_MMI
) dsputil_init_mmi (c
, avctx
);
4533 if (ENABLE_SH4
) dsputil_init_sh4 (c
, avctx
);
4534 if (ENABLE_BFIN
) dsputil_init_bfin (c
, avctx
);
4536 for(i
=0; i
<64; i
++){
4537 if(!c
->put_2tap_qpel_pixels_tab
[0][i
])
4538 c
->put_2tap_qpel_pixels_tab
[0][i
]= c
->put_h264_qpel_pixels_tab
[0][i
];
4539 if(!c
->avg_2tap_qpel_pixels_tab
[0][i
])
4540 c
->avg_2tap_qpel_pixels_tab
[0][i
]= c
->avg_h264_qpel_pixels_tab
[0][i
];
4543 switch(c
->idct_permutation_type
){
4544 case FF_NO_IDCT_PERM
:
4546 c
->idct_permutation
[i
]= i
;
4548 case FF_LIBMPEG2_IDCT_PERM
:
4550 c
->idct_permutation
[i
]= (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
4552 case FF_SIMPLE_IDCT_PERM
:
4554 c
->idct_permutation
[i
]= simple_mmx_permutation
[i
];
4556 case FF_TRANSPOSE_IDCT_PERM
:
4558 c
->idct_permutation
[i
]= ((i
&7)<<3) | (i
>>3);
4560 case FF_PARTTRANS_IDCT_PERM
:
4562 c
->idct_permutation
[i
]= (i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3);
4564 case FF_SSE2_IDCT_PERM
:
4566 c
->idct_permutation
[i
]= (i
&0x38) | idct_sse2_row_perm
[i
&7];
4569 av_log(avctx
, AV_LOG_ERROR
, "Internal error, IDCT permutation not set\n");