3 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "simple_idct.h"
39 void ff_spatial_dwt(int *buffer
, int width
, int height
, int stride
, int type
, int decomposition_count
);
42 void vorbis_inverse_coupling(float *mag
, float *ang
, int blocksize
);
45 void ff_ac3_downmix_c(float (*samples
)[256], float (*matrix
)[2], int out_ch
, int in_ch
, int len
);
48 void ff_flac_compute_autocorr(const int32_t *data
, int len
, int lag
, double *autoc
);
51 void ff_add_png_paeth_prediction(uint8_t *dst
, uint8_t *src
, uint8_t *top
, int w
, int bpp
);
53 uint8_t ff_cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
54 uint32_t ff_squareTbl
[512] = {0, };
56 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
57 #define pb_7f (~0UL/255 * 0x7f)
58 #define pb_80 (~0UL/255 * 0x80)
60 const uint8_t ff_zigzag_direct
[64] = {
61 0, 1, 8, 16, 9, 2, 3, 10,
62 17, 24, 32, 25, 18, 11, 4, 5,
63 12, 19, 26, 33, 40, 48, 41, 34,
64 27, 20, 13, 6, 7, 14, 21, 28,
65 35, 42, 49, 56, 57, 50, 43, 36,
66 29, 22, 15, 23, 30, 37, 44, 51,
67 58, 59, 52, 45, 38, 31, 39, 46,
68 53, 60, 61, 54, 47, 55, 62, 63
71 /* Specific zigzag scan for 248 idct. NOTE that unlike the
72 specification, we interleave the fields */
73 const uint8_t ff_zigzag248_direct
[64] = {
74 0, 8, 1, 9, 16, 24, 2, 10,
75 17, 25, 32, 40, 48, 56, 33, 41,
76 18, 26, 3, 11, 4, 12, 19, 27,
77 34, 42, 49, 57, 50, 58, 35, 43,
78 20, 28, 5, 13, 6, 14, 21, 29,
79 36, 44, 51, 59, 52, 60, 37, 45,
80 22, 30, 7, 15, 23, 31, 38, 46,
81 53, 61, 54, 62, 39, 47, 55, 63,
84 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
85 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16
[64]) = {0, };
87 const uint8_t ff_alternate_horizontal_scan
[64] = {
88 0, 1, 2, 3, 8, 9, 16, 17,
89 10, 11, 4, 5, 6, 7, 15, 14,
90 13, 12, 19, 18, 24, 25, 32, 33,
91 26, 27, 20, 21, 22, 23, 28, 29,
92 30, 31, 34, 35, 40, 41, 48, 49,
93 42, 43, 36, 37, 38, 39, 44, 45,
94 46, 47, 50, 51, 56, 57, 58, 59,
95 52, 53, 54, 55, 60, 61, 62, 63,
98 const uint8_t ff_alternate_vertical_scan
[64] = {
99 0, 8, 16, 24, 1, 9, 2, 10,
100 17, 25, 32, 40, 48, 56, 57, 49,
101 41, 33, 26, 18, 3, 11, 4, 12,
102 19, 27, 34, 42, 50, 58, 35, 43,
103 51, 59, 20, 28, 5, 13, 6, 14,
104 21, 29, 36, 44, 52, 60, 37, 45,
105 53, 61, 22, 30, 7, 15, 23, 31,
106 38, 46, 54, 62, 39, 47, 55, 63,
109 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
110 const uint32_t ff_inverse
[256]={
111 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
112 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
113 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
114 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
115 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
116 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
117 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
118 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
119 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
120 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
121 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
122 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
123 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
124 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
125 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
126 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
127 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
128 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
129 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
130 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
131 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
132 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
133 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
134 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
135 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
136 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
137 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
138 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
139 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
140 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
141 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
142 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
145 /* Input permutation for the simple_idct_mmx */
146 static const uint8_t simple_mmx_permutation
[64]={
147 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
148 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
149 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
150 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
151 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
152 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
153 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
154 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
157 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
159 void ff_init_scantable(uint8_t *permutation
, ScanTable
*st
, const uint8_t *src_scantable
){
163 st
->scantable
= src_scantable
;
167 j
= src_scantable
[i
];
168 st
->permutated
[i
] = permutation
[j
];
177 j
= st
->permutated
[i
];
179 st
->raster_end
[i
]= end
;
183 static int pix_sum_c(uint8_t * pix
, int line_size
)
188 for (i
= 0; i
< 16; i
++) {
189 for (j
= 0; j
< 16; j
+= 8) {
200 pix
+= line_size
- 16;
205 static int pix_norm1_c(uint8_t * pix
, int line_size
)
208 uint32_t *sq
= ff_squareTbl
+ 256;
211 for (i
= 0; i
< 16; i
++) {
212 for (j
= 0; j
< 16; j
+= 8) {
223 #if LONG_MAX > 2147483647
224 register uint64_t x
=*(uint64_t*)pix
;
226 s
+= sq
[(x
>>8)&0xff];
227 s
+= sq
[(x
>>16)&0xff];
228 s
+= sq
[(x
>>24)&0xff];
229 s
+= sq
[(x
>>32)&0xff];
230 s
+= sq
[(x
>>40)&0xff];
231 s
+= sq
[(x
>>48)&0xff];
232 s
+= sq
[(x
>>56)&0xff];
234 register uint32_t x
=*(uint32_t*)pix
;
236 s
+= sq
[(x
>>8)&0xff];
237 s
+= sq
[(x
>>16)&0xff];
238 s
+= sq
[(x
>>24)&0xff];
239 x
=*(uint32_t*)(pix
+4);
241 s
+= sq
[(x
>>8)&0xff];
242 s
+= sq
[(x
>>16)&0xff];
243 s
+= sq
[(x
>>24)&0xff];
248 pix
+= line_size
- 16;
253 static void bswap_buf(uint32_t *dst
, const uint32_t *src
, int w
){
256 for(i
=0; i
+8<=w
; i
+=8){
257 dst
[i
+0]= bswap_32(src
[i
+0]);
258 dst
[i
+1]= bswap_32(src
[i
+1]);
259 dst
[i
+2]= bswap_32(src
[i
+2]);
260 dst
[i
+3]= bswap_32(src
[i
+3]);
261 dst
[i
+4]= bswap_32(src
[i
+4]);
262 dst
[i
+5]= bswap_32(src
[i
+5]);
263 dst
[i
+6]= bswap_32(src
[i
+6]);
264 dst
[i
+7]= bswap_32(src
[i
+7]);
267 dst
[i
+0]= bswap_32(src
[i
+0]);
271 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
274 uint32_t *sq
= ff_squareTbl
+ 256;
277 for (i
= 0; i
< h
; i
++) {
278 s
+= sq
[pix1
[0] - pix2
[0]];
279 s
+= sq
[pix1
[1] - pix2
[1]];
280 s
+= sq
[pix1
[2] - pix2
[2]];
281 s
+= sq
[pix1
[3] - pix2
[3]];
288 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
291 uint32_t *sq
= ff_squareTbl
+ 256;
294 for (i
= 0; i
< h
; i
++) {
295 s
+= sq
[pix1
[0] - pix2
[0]];
296 s
+= sq
[pix1
[1] - pix2
[1]];
297 s
+= sq
[pix1
[2] - pix2
[2]];
298 s
+= sq
[pix1
[3] - pix2
[3]];
299 s
+= sq
[pix1
[4] - pix2
[4]];
300 s
+= sq
[pix1
[5] - pix2
[5]];
301 s
+= sq
[pix1
[6] - pix2
[6]];
302 s
+= sq
[pix1
[7] - pix2
[7]];
309 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
312 uint32_t *sq
= ff_squareTbl
+ 256;
315 for (i
= 0; i
< h
; i
++) {
316 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
317 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
318 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
319 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
320 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
321 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
322 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
323 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
324 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
325 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
326 s
+= sq
[pix1
[10] - pix2
[10]];
327 s
+= sq
[pix1
[11] - pix2
[11]];
328 s
+= sq
[pix1
[12] - pix2
[12]];
329 s
+= sq
[pix1
[13] - pix2
[13]];
330 s
+= sq
[pix1
[14] - pix2
[14]];
331 s
+= sq
[pix1
[15] - pix2
[15]];
340 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
341 static inline int w_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int w
, int h
, int type
){
343 const int dec_count
= w
==8 ? 3 : 4;
346 static const int scale
[2][2][4][4]={
350 {268, 239, 239, 213},
354 // 9/7 16x16 or 32x32 dec=4
355 {344, 310, 310, 280},
363 {275, 245, 245, 218},
367 // 5/3 16x16 or 32x32 dec=4
368 {352, 317, 317, 286},
376 for (i
= 0; i
< h
; i
++) {
377 for (j
= 0; j
< w
; j
+=4) {
378 tmp
[32*i
+j
+0] = (pix1
[j
+0] - pix2
[j
+0])<<4;
379 tmp
[32*i
+j
+1] = (pix1
[j
+1] - pix2
[j
+1])<<4;
380 tmp
[32*i
+j
+2] = (pix1
[j
+2] - pix2
[j
+2])<<4;
381 tmp
[32*i
+j
+3] = (pix1
[j
+3] - pix2
[j
+3])<<4;
387 ff_spatial_dwt(tmp
, w
, h
, 32, type
, dec_count
);
391 for(level
=0; level
<dec_count
; level
++){
392 for(ori
= level
? 1 : 0; ori
<4; ori
++){
393 int size
= w
>>(dec_count
-level
);
394 int sx
= (ori
&1) ? size
: 0;
395 int stride
= 32<<(dec_count
-level
);
396 int sy
= (ori
&2) ? stride
>>1 : 0;
398 for(i
=0; i
<size
; i
++){
399 for(j
=0; j
<size
; j
++){
400 int v
= tmp
[sx
+ sy
+ i
*stride
+ j
] * scale
[type
][dec_count
-3][level
][ori
];
410 static int w53_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
411 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 1);
414 static int w97_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
415 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 0);
418 static int w53_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
419 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 1);
422 static int w97_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
423 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 0);
426 int w53_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
427 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 1);
430 int w97_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
431 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 0);
435 /* draw the edges of width 'w' of an image of size width, height */
436 //FIXME check that this is ok for mpeg4 interlaced
437 static void draw_edges_c(uint8_t *buf
, int wrap
, int width
, int height
, int w
)
439 uint8_t *ptr
, *last_line
;
442 last_line
= buf
+ (height
- 1) * wrap
;
445 memcpy(buf
- (i
+ 1) * wrap
, buf
, width
);
446 memcpy(last_line
+ (i
+ 1) * wrap
, last_line
, width
);
450 for(i
=0;i
<height
;i
++) {
451 memset(ptr
- w
, ptr
[0], w
);
452 memset(ptr
+ width
, ptr
[width
-1], w
);
457 memset(buf
- (i
+ 1) * wrap
- w
, buf
[0], w
); /* top left */
458 memset(buf
- (i
+ 1) * wrap
+ width
, buf
[width
-1], w
); /* top right */
459 memset(last_line
+ (i
+ 1) * wrap
- w
, last_line
[0], w
); /* top left */
460 memset(last_line
+ (i
+ 1) * wrap
+ width
, last_line
[width
-1], w
); /* top right */
465 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
466 * @param buf destination buffer
467 * @param src source buffer
468 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
469 * @param block_w width of block
470 * @param block_h height of block
471 * @param src_x x coordinate of the top left sample of the block in the source buffer
472 * @param src_y y coordinate of the top left sample of the block in the source buffer
473 * @param w width of the source buffer
474 * @param h height of the source buffer
476 void ff_emulated_edge_mc(uint8_t *buf
, uint8_t *src
, int linesize
, int block_w
, int block_h
,
477 int src_x
, int src_y
, int w
, int h
){
479 int start_y
, start_x
, end_y
, end_x
;
482 src
+= (h
-1-src_y
)*linesize
;
484 }else if(src_y
<=-block_h
){
485 src
+= (1-block_h
-src_y
)*linesize
;
491 }else if(src_x
<=-block_w
){
492 src
+= (1-block_w
-src_x
);
496 start_y
= FFMAX(0, -src_y
);
497 start_x
= FFMAX(0, -src_x
);
498 end_y
= FFMIN(block_h
, h
-src_y
);
499 end_x
= FFMIN(block_w
, w
-src_x
);
501 // copy existing part
502 for(y
=start_y
; y
<end_y
; y
++){
503 for(x
=start_x
; x
<end_x
; x
++){
504 buf
[x
+ y
*linesize
]= src
[x
+ y
*linesize
];
509 for(y
=0; y
<start_y
; y
++){
510 for(x
=start_x
; x
<end_x
; x
++){
511 buf
[x
+ y
*linesize
]= buf
[x
+ start_y
*linesize
];
516 for(y
=end_y
; y
<block_h
; y
++){
517 for(x
=start_x
; x
<end_x
; x
++){
518 buf
[x
+ y
*linesize
]= buf
[x
+ (end_y
-1)*linesize
];
522 for(y
=0; y
<block_h
; y
++){
524 for(x
=0; x
<start_x
; x
++){
525 buf
[x
+ y
*linesize
]= buf
[start_x
+ y
*linesize
];
529 for(x
=end_x
; x
<block_w
; x
++){
530 buf
[x
+ y
*linesize
]= buf
[end_x
- 1 + y
*linesize
];
535 static void get_pixels_c(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
539 /* read the pixels */
541 block
[0] = pixels
[0];
542 block
[1] = pixels
[1];
543 block
[2] = pixels
[2];
544 block
[3] = pixels
[3];
545 block
[4] = pixels
[4];
546 block
[5] = pixels
[5];
547 block
[6] = pixels
[6];
548 block
[7] = pixels
[7];
554 static void diff_pixels_c(DCTELEM
*restrict block
, const uint8_t *s1
,
555 const uint8_t *s2
, int stride
){
558 /* read the pixels */
560 block
[0] = s1
[0] - s2
[0];
561 block
[1] = s1
[1] - s2
[1];
562 block
[2] = s1
[2] - s2
[2];
563 block
[3] = s1
[3] - s2
[3];
564 block
[4] = s1
[4] - s2
[4];
565 block
[5] = s1
[5] - s2
[5];
566 block
[6] = s1
[6] - s2
[6];
567 block
[7] = s1
[7] - s2
[7];
575 static void put_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
579 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
581 /* read the pixels */
583 pixels
[0] = cm
[block
[0]];
584 pixels
[1] = cm
[block
[1]];
585 pixels
[2] = cm
[block
[2]];
586 pixels
[3] = cm
[block
[3]];
587 pixels
[4] = cm
[block
[4]];
588 pixels
[5] = cm
[block
[5]];
589 pixels
[6] = cm
[block
[6]];
590 pixels
[7] = cm
[block
[7]];
597 static void put_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
601 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
603 /* read the pixels */
605 pixels
[0] = cm
[block
[0]];
606 pixels
[1] = cm
[block
[1]];
607 pixels
[2] = cm
[block
[2]];
608 pixels
[3] = cm
[block
[3]];
615 static void put_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
619 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
621 /* read the pixels */
623 pixels
[0] = cm
[block
[0]];
624 pixels
[1] = cm
[block
[1]];
631 static void put_signed_pixels_clamped_c(const DCTELEM
*block
,
632 uint8_t *restrict pixels
,
637 for (i
= 0; i
< 8; i
++) {
638 for (j
= 0; j
< 8; j
++) {
641 else if (*block
> 127)
644 *pixels
= (uint8_t)(*block
+ 128);
648 pixels
+= (line_size
- 8);
652 static void add_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
656 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
658 /* read the pixels */
660 pixels
[0] = cm
[pixels
[0] + block
[0]];
661 pixels
[1] = cm
[pixels
[1] + block
[1]];
662 pixels
[2] = cm
[pixels
[2] + block
[2]];
663 pixels
[3] = cm
[pixels
[3] + block
[3]];
664 pixels
[4] = cm
[pixels
[4] + block
[4]];
665 pixels
[5] = cm
[pixels
[5] + block
[5]];
666 pixels
[6] = cm
[pixels
[6] + block
[6]];
667 pixels
[7] = cm
[pixels
[7] + block
[7]];
673 static void add_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
677 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
679 /* read the pixels */
681 pixels
[0] = cm
[pixels
[0] + block
[0]];
682 pixels
[1] = cm
[pixels
[1] + block
[1]];
683 pixels
[2] = cm
[pixels
[2] + block
[2]];
684 pixels
[3] = cm
[pixels
[3] + block
[3]];
690 static void add_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
694 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
696 /* read the pixels */
698 pixels
[0] = cm
[pixels
[0] + block
[0]];
699 pixels
[1] = cm
[pixels
[1] + block
[1]];
705 static void add_pixels8_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
709 pixels
[0] += block
[0];
710 pixels
[1] += block
[1];
711 pixels
[2] += block
[2];
712 pixels
[3] += block
[3];
713 pixels
[4] += block
[4];
714 pixels
[5] += block
[5];
715 pixels
[6] += block
[6];
716 pixels
[7] += block
[7];
722 static void add_pixels4_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
726 pixels
[0] += block
[0];
727 pixels
[1] += block
[1];
728 pixels
[2] += block
[2];
729 pixels
[3] += block
[3];
735 static int sum_abs_dctelem_c(DCTELEM
*block
)
739 sum
+= FFABS(block
[i
]);
745 #define PIXOP2(OPNAME, OP) \
746 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
750 OP(*((uint64_t*)block), AV_RN64(pixels));\
756 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
760 const uint64_t a= AV_RN64(pixels );\
761 const uint64_t b= AV_RN64(pixels+1);\
762 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
768 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
772 const uint64_t a= AV_RN64(pixels );\
773 const uint64_t b= AV_RN64(pixels+1);\
774 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
780 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
784 const uint64_t a= AV_RN64(pixels );\
785 const uint64_t b= AV_RN64(pixels+line_size);\
786 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
792 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
796 const uint64_t a= AV_RN64(pixels );\
797 const uint64_t b= AV_RN64(pixels+line_size);\
798 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
804 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
807 const uint64_t a= AV_RN64(pixels );\
808 const uint64_t b= AV_RN64(pixels+1);\
809 uint64_t l0= (a&0x0303030303030303ULL)\
810 + (b&0x0303030303030303ULL)\
811 + 0x0202020202020202ULL;\
812 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
813 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
817 for(i=0; i<h; i+=2){\
818 uint64_t a= AV_RN64(pixels );\
819 uint64_t b= AV_RN64(pixels+1);\
820 l1= (a&0x0303030303030303ULL)\
821 + (b&0x0303030303030303ULL);\
822 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
823 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
824 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
827 a= AV_RN64(pixels );\
828 b= AV_RN64(pixels+1);\
829 l0= (a&0x0303030303030303ULL)\
830 + (b&0x0303030303030303ULL)\
831 + 0x0202020202020202ULL;\
832 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
833 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
834 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
840 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
843 const uint64_t a= AV_RN64(pixels );\
844 const uint64_t b= AV_RN64(pixels+1);\
845 uint64_t l0= (a&0x0303030303030303ULL)\
846 + (b&0x0303030303030303ULL)\
847 + 0x0101010101010101ULL;\
848 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
849 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
853 for(i=0; i<h; i+=2){\
854 uint64_t a= AV_RN64(pixels );\
855 uint64_t b= AV_RN64(pixels+1);\
856 l1= (a&0x0303030303030303ULL)\
857 + (b&0x0303030303030303ULL);\
858 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
859 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
860 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
863 a= AV_RN64(pixels );\
864 b= AV_RN64(pixels+1);\
865 l0= (a&0x0303030303030303ULL)\
866 + (b&0x0303030303030303ULL)\
867 + 0x0101010101010101ULL;\
868 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
869 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
870 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
876 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
877 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
878 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
879 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
880 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
881 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
882 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
884 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
885 #else // 64 bit variant
887 #define PIXOP2(OPNAME, OP) \
888 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
891 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
896 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
899 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
904 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
907 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
908 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
913 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
914 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
917 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
918 int src_stride1, int src_stride2, int h){\
922 a= AV_RN32(&src1[i*src_stride1 ]);\
923 b= AV_RN32(&src2[i*src_stride2 ]);\
924 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
925 a= AV_RN32(&src1[i*src_stride1+4]);\
926 b= AV_RN32(&src2[i*src_stride2+4]);\
927 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
931 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
932 int src_stride1, int src_stride2, int h){\
936 a= AV_RN32(&src1[i*src_stride1 ]);\
937 b= AV_RN32(&src2[i*src_stride2 ]);\
938 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
939 a= AV_RN32(&src1[i*src_stride1+4]);\
940 b= AV_RN32(&src2[i*src_stride2+4]);\
941 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
945 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
946 int src_stride1, int src_stride2, int h){\
950 a= AV_RN32(&src1[i*src_stride1 ]);\
951 b= AV_RN32(&src2[i*src_stride2 ]);\
952 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
956 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
957 int src_stride1, int src_stride2, int h){\
961 a= AV_RN16(&src1[i*src_stride1 ]);\
962 b= AV_RN16(&src2[i*src_stride2 ]);\
963 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
967 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
968 int src_stride1, int src_stride2, int h){\
969 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
970 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
973 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
974 int src_stride1, int src_stride2, int h){\
975 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
976 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
979 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
980 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
983 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
984 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
987 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
988 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
991 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
992 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
995 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
996 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
999 uint32_t a, b, c, d, l0, l1, h0, h1;\
1000 a= AV_RN32(&src1[i*src_stride1]);\
1001 b= AV_RN32(&src2[i*src_stride2]);\
1002 c= AV_RN32(&src3[i*src_stride3]);\
1003 d= AV_RN32(&src4[i*src_stride4]);\
1004 l0= (a&0x03030303UL)\
1007 h0= ((a&0xFCFCFCFCUL)>>2)\
1008 + ((b&0xFCFCFCFCUL)>>2);\
1009 l1= (c&0x03030303UL)\
1010 + (d&0x03030303UL);\
1011 h1= ((c&0xFCFCFCFCUL)>>2)\
1012 + ((d&0xFCFCFCFCUL)>>2);\
1013 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1014 a= AV_RN32(&src1[i*src_stride1+4]);\
1015 b= AV_RN32(&src2[i*src_stride2+4]);\
1016 c= AV_RN32(&src3[i*src_stride3+4]);\
1017 d= AV_RN32(&src4[i*src_stride4+4]);\
1018 l0= (a&0x03030303UL)\
1021 h0= ((a&0xFCFCFCFCUL)>>2)\
1022 + ((b&0xFCFCFCFCUL)>>2);\
1023 l1= (c&0x03030303UL)\
1024 + (d&0x03030303UL);\
1025 h1= ((c&0xFCFCFCFCUL)>>2)\
1026 + ((d&0xFCFCFCFCUL)>>2);\
1027 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1031 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1032 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1035 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1036 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1039 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1040 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1043 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1044 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1047 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1048 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1050 for(i=0; i<h; i++){\
1051 uint32_t a, b, c, d, l0, l1, h0, h1;\
1052 a= AV_RN32(&src1[i*src_stride1]);\
1053 b= AV_RN32(&src2[i*src_stride2]);\
1054 c= AV_RN32(&src3[i*src_stride3]);\
1055 d= AV_RN32(&src4[i*src_stride4]);\
1056 l0= (a&0x03030303UL)\
1059 h0= ((a&0xFCFCFCFCUL)>>2)\
1060 + ((b&0xFCFCFCFCUL)>>2);\
1061 l1= (c&0x03030303UL)\
1062 + (d&0x03030303UL);\
1063 h1= ((c&0xFCFCFCFCUL)>>2)\
1064 + ((d&0xFCFCFCFCUL)>>2);\
1065 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1066 a= AV_RN32(&src1[i*src_stride1+4]);\
1067 b= AV_RN32(&src2[i*src_stride2+4]);\
1068 c= AV_RN32(&src3[i*src_stride3+4]);\
1069 d= AV_RN32(&src4[i*src_stride4+4]);\
1070 l0= (a&0x03030303UL)\
1073 h0= ((a&0xFCFCFCFCUL)>>2)\
1074 + ((b&0xFCFCFCFCUL)>>2);\
1075 l1= (c&0x03030303UL)\
1076 + (d&0x03030303UL);\
1077 h1= ((c&0xFCFCFCFCUL)>>2)\
1078 + ((d&0xFCFCFCFCUL)>>2);\
1079 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1082 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1083 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1084 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1085 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1087 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1088 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1089 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1090 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1093 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1095 int i, a0, b0, a1, b1;\
1102 for(i=0; i<h; i+=2){\
1108 block[0]= (a1+a0)>>2; /* FIXME non put */\
1109 block[1]= (b1+b0)>>2;\
1119 block[0]= (a1+a0)>>2;\
1120 block[1]= (b1+b0)>>2;\
1126 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1129 const uint32_t a= AV_RN32(pixels );\
1130 const uint32_t b= AV_RN32(pixels+1);\
1131 uint32_t l0= (a&0x03030303UL)\
1134 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1135 + ((b&0xFCFCFCFCUL)>>2);\
1139 for(i=0; i<h; i+=2){\
1140 uint32_t a= AV_RN32(pixels );\
1141 uint32_t b= AV_RN32(pixels+1);\
1142 l1= (a&0x03030303UL)\
1143 + (b&0x03030303UL);\
1144 h1= ((a&0xFCFCFCFCUL)>>2)\
1145 + ((b&0xFCFCFCFCUL)>>2);\
1146 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1149 a= AV_RN32(pixels );\
1150 b= AV_RN32(pixels+1);\
1151 l0= (a&0x03030303UL)\
1154 h0= ((a&0xFCFCFCFCUL)>>2)\
1155 + ((b&0xFCFCFCFCUL)>>2);\
1156 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1162 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1165 for(j=0; j<2; j++){\
1167 const uint32_t a= AV_RN32(pixels );\
1168 const uint32_t b= AV_RN32(pixels+1);\
1169 uint32_t l0= (a&0x03030303UL)\
1172 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1173 + ((b&0xFCFCFCFCUL)>>2);\
1177 for(i=0; i<h; i+=2){\
1178 uint32_t a= AV_RN32(pixels );\
1179 uint32_t b= AV_RN32(pixels+1);\
1180 l1= (a&0x03030303UL)\
1181 + (b&0x03030303UL);\
1182 h1= ((a&0xFCFCFCFCUL)>>2)\
1183 + ((b&0xFCFCFCFCUL)>>2);\
1184 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1187 a= AV_RN32(pixels );\
1188 b= AV_RN32(pixels+1);\
1189 l0= (a&0x03030303UL)\
1192 h0= ((a&0xFCFCFCFCUL)>>2)\
1193 + ((b&0xFCFCFCFCUL)>>2);\
1194 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1198 pixels+=4-line_size*(h+1);\
1199 block +=4-line_size*h;\
1203 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1206 for(j=0; j<2; j++){\
1208 const uint32_t a= AV_RN32(pixels );\
1209 const uint32_t b= AV_RN32(pixels+1);\
1210 uint32_t l0= (a&0x03030303UL)\
1213 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1214 + ((b&0xFCFCFCFCUL)>>2);\
1218 for(i=0; i<h; i+=2){\
1219 uint32_t a= AV_RN32(pixels );\
1220 uint32_t b= AV_RN32(pixels+1);\
1221 l1= (a&0x03030303UL)\
1222 + (b&0x03030303UL);\
1223 h1= ((a&0xFCFCFCFCUL)>>2)\
1224 + ((b&0xFCFCFCFCUL)>>2);\
1225 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1228 a= AV_RN32(pixels );\
1229 b= AV_RN32(pixels+1);\
1230 l0= (a&0x03030303UL)\
1233 h0= ((a&0xFCFCFCFCUL)>>2)\
1234 + ((b&0xFCFCFCFCUL)>>2);\
1235 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1239 pixels+=4-line_size*(h+1);\
1240 block +=4-line_size*h;\
1244 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1245 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1246 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1247 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1248 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1249 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1250 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1251 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1253 #define op_avg(a, b) a = rnd_avg32(a, b)
1255 #define op_put(a, b) a = b
1262 #define avg2(a,b) ((a+b+1)>>1)
1263 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1265 static void put_no_rnd_pixels16_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1266 put_no_rnd_pixels16_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1269 static void put_no_rnd_pixels8_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1270 put_no_rnd_pixels8_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1273 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
1275 const int A
=(16-x16
)*(16-y16
);
1276 const int B
=( x16
)*(16-y16
);
1277 const int C
=(16-x16
)*( y16
);
1278 const int D
=( x16
)*( y16
);
1283 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
1284 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
1285 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
1286 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
1287 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
1288 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
1289 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
1290 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
1296 void ff_gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
1297 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
1300 const int s
= 1<<shift
;
1310 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
1311 int src_x
, src_y
, frac_x
, frac_y
, index
;
1315 frac_x
= src_x
&(s
-1);
1316 frac_y
= src_y
&(s
-1);
1320 if((unsigned)src_x
< width
){
1321 if((unsigned)src_y
< height
){
1322 index
= src_x
+ src_y
*stride
;
1323 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1324 + src
[index
+1]* frac_x
)*(s
-frac_y
)
1325 + ( src
[index
+stride
]*(s
-frac_x
)
1326 + src
[index
+stride
+1]* frac_x
)* frac_y
1329 index
= src_x
+ av_clip(src_y
, 0, height
)*stride
;
1330 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1331 + src
[index
+1]* frac_x
)*s
1335 if((unsigned)src_y
< height
){
1336 index
= av_clip(src_x
, 0, width
) + src_y
*stride
;
1337 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
1338 + src
[index
+stride
]* frac_y
)*s
1341 index
= av_clip(src_x
, 0, width
) + av_clip(src_y
, 0, height
)*stride
;
1342 dst
[y
*stride
+ x
]= src
[index
];
1354 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1356 case 2: put_pixels2_c (dst
, src
, stride
, height
); break;
1357 case 4: put_pixels4_c (dst
, src
, stride
, height
); break;
1358 case 8: put_pixels8_c (dst
, src
, stride
, height
); break;
1359 case 16:put_pixels16_c(dst
, src
, stride
, height
); break;
1363 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1365 for (i
=0; i
< height
; i
++) {
1366 for (j
=0; j
< width
; j
++) {
1367 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
1374 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1376 for (i
=0; i
< height
; i
++) {
1377 for (j
=0; j
< width
; j
++) {
1378 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
1385 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1387 for (i
=0; i
< height
; i
++) {
1388 for (j
=0; j
< width
; j
++) {
1389 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
1396 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1398 for (i
=0; i
< height
; i
++) {
1399 for (j
=0; j
< width
; j
++) {
1400 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
1407 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1409 for (i
=0; i
< height
; i
++) {
1410 for (j
=0; j
< width
; j
++) {
1411 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1418 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1420 for (i
=0; i
< height
; i
++) {
1421 for (j
=0; j
< width
; j
++) {
1422 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
1429 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1431 for (i
=0; i
< height
; i
++) {
1432 for (j
=0; j
< width
; j
++) {
1433 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1440 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1442 for (i
=0; i
< height
; i
++) {
1443 for (j
=0; j
< width
; j
++) {
1444 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
1451 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1453 case 2: avg_pixels2_c (dst
, src
, stride
, height
); break;
1454 case 4: avg_pixels4_c (dst
, src
, stride
, height
); break;
1455 case 8: avg_pixels8_c (dst
, src
, stride
, height
); break;
1456 case 16:avg_pixels16_c(dst
, src
, stride
, height
); break;
1460 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1462 for (i
=0; i
< height
; i
++) {
1463 for (j
=0; j
< width
; j
++) {
1464 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
1471 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1473 for (i
=0; i
< height
; i
++) {
1474 for (j
=0; j
< width
; j
++) {
1475 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
1482 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1484 for (i
=0; i
< height
; i
++) {
1485 for (j
=0; j
< width
; j
++) {
1486 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1493 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1495 for (i
=0; i
< height
; i
++) {
1496 for (j
=0; j
< width
; j
++) {
1497 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1504 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1506 for (i
=0; i
< height
; i
++) {
1507 for (j
=0; j
< width
; j
++) {
1508 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1515 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1517 for (i
=0; i
< height
; i
++) {
1518 for (j
=0; j
< width
; j
++) {
1519 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1526 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1528 for (i
=0; i
< height
; i
++) {
1529 for (j
=0; j
< width
; j
++) {
1530 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1537 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1539 for (i
=0; i
< height
; i
++) {
1540 for (j
=0; j
< width
; j
++) {
1541 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1548 #define TPEL_WIDTH(width)\
1549 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1550 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1551 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1552 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1553 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1554 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1555 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1556 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1557 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1558 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1559 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1560 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1561 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1562 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1563 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1564 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1565 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1566 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1569 #define H264_CHROMA_MC(OPNAME, OP)\
1570 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1571 const int A=(8-x)*(8-y);\
1572 const int B=( x)*(8-y);\
1573 const int C=(8-x)*( y);\
1574 const int D=( x)*( y);\
1577 assert(x<8 && y<8 && x>=0 && y>=0);\
1580 for(i=0; i<h; i++){\
1581 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1582 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1588 const int step= C ? stride : 1;\
1589 for(i=0; i<h; i++){\
1590 OP(dst[0], (A*src[0] + E*src[step+0]));\
1591 OP(dst[1], (A*src[1] + E*src[step+1]));\
1598 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1599 const int A=(8-x)*(8-y);\
1600 const int B=( x)*(8-y);\
1601 const int C=(8-x)*( y);\
1602 const int D=( x)*( y);\
1605 assert(x<8 && y<8 && x>=0 && y>=0);\
1608 for(i=0; i<h; i++){\
1609 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1610 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1611 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1612 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1618 const int step= C ? stride : 1;\
1619 for(i=0; i<h; i++){\
1620 OP(dst[0], (A*src[0] + E*src[step+0]));\
1621 OP(dst[1], (A*src[1] + E*src[step+1]));\
1622 OP(dst[2], (A*src[2] + E*src[step+2]));\
1623 OP(dst[3], (A*src[3] + E*src[step+3]));\
1630 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1631 const int A=(8-x)*(8-y);\
1632 const int B=( x)*(8-y);\
1633 const int C=(8-x)*( y);\
1634 const int D=( x)*( y);\
1637 assert(x<8 && y<8 && x>=0 && y>=0);\
1640 for(i=0; i<h; i++){\
1641 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1642 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1643 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1644 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1645 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1646 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1647 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1648 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1654 const int step= C ? stride : 1;\
1655 for(i=0; i<h; i++){\
1656 OP(dst[0], (A*src[0] + E*src[step+0]));\
1657 OP(dst[1], (A*src[1] + E*src[step+1]));\
1658 OP(dst[2], (A*src[2] + E*src[step+2]));\
1659 OP(dst[3], (A*src[3] + E*src[step+3]));\
1660 OP(dst[4], (A*src[4] + E*src[step+4]));\
1661 OP(dst[5], (A*src[5] + E*src[step+5]));\
1662 OP(dst[6], (A*src[6] + E*src[step+6]));\
1663 OP(dst[7], (A*src[7] + E*src[step+7]));\
1670 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1671 #define op_put(a, b) a = (((b) + 32)>>6)
1673 H264_CHROMA_MC(put_
, op_put
)
1674 H264_CHROMA_MC(avg_
, op_avg
)
1678 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
1679 const int A
=(8-x
)*(8-y
);
1680 const int B
=( x
)*(8-y
);
1681 const int C
=(8-x
)*( y
);
1682 const int D
=( x
)*( y
);
1685 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
1689 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
1690 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
1691 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
1692 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
1693 dst
[4] = (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6;
1694 dst
[5] = (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6;
1695 dst
[6] = (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6;
1696 dst
[7] = (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6;
1702 #define QPEL_MC(r, OPNAME, RND, OP) \
1703 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1704 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1708 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1709 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1710 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1711 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1712 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1713 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1714 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1715 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1721 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1723 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1727 const int src0= src[0*srcStride];\
1728 const int src1= src[1*srcStride];\
1729 const int src2= src[2*srcStride];\
1730 const int src3= src[3*srcStride];\
1731 const int src4= src[4*srcStride];\
1732 const int src5= src[5*srcStride];\
1733 const int src6= src[6*srcStride];\
1734 const int src7= src[7*srcStride];\
1735 const int src8= src[8*srcStride];\
1736 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1737 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1738 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1739 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1740 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1741 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1742 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1743 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1749 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1750 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1755 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1756 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1757 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1758 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1759 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1760 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1761 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1762 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1763 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1764 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1765 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1766 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1767 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1768 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1769 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1770 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1776 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1777 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1782 const int src0= src[0*srcStride];\
1783 const int src1= src[1*srcStride];\
1784 const int src2= src[2*srcStride];\
1785 const int src3= src[3*srcStride];\
1786 const int src4= src[4*srcStride];\
1787 const int src5= src[5*srcStride];\
1788 const int src6= src[6*srcStride];\
1789 const int src7= src[7*srcStride];\
1790 const int src8= src[8*srcStride];\
1791 const int src9= src[9*srcStride];\
1792 const int src10= src[10*srcStride];\
1793 const int src11= src[11*srcStride];\
1794 const int src12= src[12*srcStride];\
1795 const int src13= src[13*srcStride];\
1796 const int src14= src[14*srcStride];\
1797 const int src15= src[15*srcStride];\
1798 const int src16= src[16*srcStride];\
1799 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1800 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1801 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1802 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1803 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1804 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1805 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1806 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1807 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1808 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1809 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1810 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1811 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1812 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1813 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1814 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1820 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1821 OPNAME ## pixels8_c(dst, src, stride, 8);\
1824 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1826 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1827 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1830 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1831 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1834 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1836 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1837 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1840 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1841 uint8_t full[16*9];\
1843 copy_block9(full, src, 16, stride, 9);\
1844 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1845 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1848 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1849 uint8_t full[16*9];\
1850 copy_block9(full, src, 16, stride, 9);\
1851 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1854 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1855 uint8_t full[16*9];\
1857 copy_block9(full, src, 16, stride, 9);\
1858 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1859 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1861 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1862 uint8_t full[16*9];\
1865 uint8_t halfHV[64];\
1866 copy_block9(full, src, 16, stride, 9);\
1867 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1868 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1869 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1870 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1872 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1873 uint8_t full[16*9];\
1875 uint8_t halfHV[64];\
1876 copy_block9(full, src, 16, stride, 9);\
1877 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1878 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1879 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1880 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1882 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1883 uint8_t full[16*9];\
1886 uint8_t halfHV[64];\
1887 copy_block9(full, src, 16, stride, 9);\
1888 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1889 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1890 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1891 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1893 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1894 uint8_t full[16*9];\
1896 uint8_t halfHV[64];\
1897 copy_block9(full, src, 16, stride, 9);\
1898 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1899 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1900 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1901 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1903 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1904 uint8_t full[16*9];\
1907 uint8_t halfHV[64];\
1908 copy_block9(full, src, 16, stride, 9);\
1909 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1910 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1911 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1912 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1914 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1915 uint8_t full[16*9];\
1917 uint8_t halfHV[64];\
1918 copy_block9(full, src, 16, stride, 9);\
1919 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1920 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1921 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1922 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1924 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1925 uint8_t full[16*9];\
1928 uint8_t halfHV[64];\
1929 copy_block9(full, src, 16, stride, 9);\
1930 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1931 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1932 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1933 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1935 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1936 uint8_t full[16*9];\
1938 uint8_t halfHV[64];\
1939 copy_block9(full, src, 16, stride, 9);\
1940 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1941 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1942 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1943 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1945 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1947 uint8_t halfHV[64];\
1948 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1949 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1950 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1952 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1954 uint8_t halfHV[64];\
1955 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1956 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1957 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1959 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1960 uint8_t full[16*9];\
1963 uint8_t halfHV[64];\
1964 copy_block9(full, src, 16, stride, 9);\
1965 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1966 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1967 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1968 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1970 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1971 uint8_t full[16*9];\
1973 copy_block9(full, src, 16, stride, 9);\
1974 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1975 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1976 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1978 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1979 uint8_t full[16*9];\
1982 uint8_t halfHV[64];\
1983 copy_block9(full, src, 16, stride, 9);\
1984 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1985 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1987 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1989 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1990 uint8_t full[16*9];\
1992 copy_block9(full, src, 16, stride, 9);\
1993 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1994 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1995 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1997 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2000 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2002 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2003 OPNAME ## pixels16_c(dst, src, stride, 16);\
2006 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2008 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2009 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2012 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2013 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2016 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2018 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2019 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2022 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2023 uint8_t full[24*17];\
2025 copy_block17(full, src, 24, stride, 17);\
2026 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2027 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2030 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2031 uint8_t full[24*17];\
2032 copy_block17(full, src, 24, stride, 17);\
2033 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2036 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2037 uint8_t full[24*17];\
2039 copy_block17(full, src, 24, stride, 17);\
2040 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2041 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2043 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2044 uint8_t full[24*17];\
2045 uint8_t halfH[272];\
2046 uint8_t halfV[256];\
2047 uint8_t halfHV[256];\
2048 copy_block17(full, src, 24, stride, 17);\
2049 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2050 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2051 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2052 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2054 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2055 uint8_t full[24*17];\
2056 uint8_t halfH[272];\
2057 uint8_t halfHV[256];\
2058 copy_block17(full, src, 24, stride, 17);\
2059 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2060 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2061 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2062 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2064 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2065 uint8_t full[24*17];\
2066 uint8_t halfH[272];\
2067 uint8_t halfV[256];\
2068 uint8_t halfHV[256];\
2069 copy_block17(full, src, 24, stride, 17);\
2070 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2071 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2072 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2073 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2075 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2076 uint8_t full[24*17];\
2077 uint8_t halfH[272];\
2078 uint8_t halfHV[256];\
2079 copy_block17(full, src, 24, stride, 17);\
2080 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2081 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2082 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2083 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2085 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2086 uint8_t full[24*17];\
2087 uint8_t halfH[272];\
2088 uint8_t halfV[256];\
2089 uint8_t halfHV[256];\
2090 copy_block17(full, src, 24, stride, 17);\
2091 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2092 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2093 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2094 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2096 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2097 uint8_t full[24*17];\
2098 uint8_t halfH[272];\
2099 uint8_t halfHV[256];\
2100 copy_block17(full, src, 24, stride, 17);\
2101 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2102 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2103 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2104 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2106 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2107 uint8_t full[24*17];\
2108 uint8_t halfH[272];\
2109 uint8_t halfV[256];\
2110 uint8_t halfHV[256];\
2111 copy_block17(full, src, 24, stride, 17);\
2112 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2113 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2114 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2115 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2117 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2118 uint8_t full[24*17];\
2119 uint8_t halfH[272];\
2120 uint8_t halfHV[256];\
2121 copy_block17(full, src, 24, stride, 17);\
2122 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2123 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2124 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2125 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2127 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2128 uint8_t halfH[272];\
2129 uint8_t halfHV[256];\
2130 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2131 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2132 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2134 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2135 uint8_t halfH[272];\
2136 uint8_t halfHV[256];\
2137 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2138 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2139 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2141 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2142 uint8_t full[24*17];\
2143 uint8_t halfH[272];\
2144 uint8_t halfV[256];\
2145 uint8_t halfHV[256];\
2146 copy_block17(full, src, 24, stride, 17);\
2147 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2148 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2149 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2150 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2152 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2153 uint8_t full[24*17];\
2154 uint8_t halfH[272];\
2155 copy_block17(full, src, 24, stride, 17);\
2156 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2157 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2158 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2160 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2161 uint8_t full[24*17];\
2162 uint8_t halfH[272];\
2163 uint8_t halfV[256];\
2164 uint8_t halfHV[256];\
2165 copy_block17(full, src, 24, stride, 17);\
2166 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2167 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2169 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2171 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2172 uint8_t full[24*17];\
2173 uint8_t halfH[272];\
2174 copy_block17(full, src, 24, stride, 17);\
2175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2176 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2177 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2179 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2180 uint8_t halfH[272];\
2181 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2182 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2185 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2186 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2187 #define op_put(a, b) a = cm[((b) + 16)>>5]
2188 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2190 QPEL_MC(0, put_
, _
, op_put
)
2191 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
2192 QPEL_MC(0, avg_
, _
, op_avg
)
2193 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2195 #undef op_avg_no_rnd
2197 #undef op_put_no_rnd
2200 #define H264_LOWPASS(OPNAME, OP, OP2) \
2201 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2203 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2207 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2208 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2214 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2216 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2220 const int srcB= src[-2*srcStride];\
2221 const int srcA= src[-1*srcStride];\
2222 const int src0= src[0 *srcStride];\
2223 const int src1= src[1 *srcStride];\
2224 const int src2= src[2 *srcStride];\
2225 const int src3= src[3 *srcStride];\
2226 const int src4= src[4 *srcStride];\
2227 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2228 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2234 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2237 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2239 src -= 2*srcStride;\
2240 for(i=0; i<h+5; i++)\
2242 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2243 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2247 tmp -= tmpStride*(h+5-2);\
2250 const int tmpB= tmp[-2*tmpStride];\
2251 const int tmpA= tmp[-1*tmpStride];\
2252 const int tmp0= tmp[0 *tmpStride];\
2253 const int tmp1= tmp[1 *tmpStride];\
2254 const int tmp2= tmp[2 *tmpStride];\
2255 const int tmp3= tmp[3 *tmpStride];\
2256 const int tmp4= tmp[4 *tmpStride];\
2257 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2258 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2263 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2265 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2269 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2270 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2271 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2272 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2278 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2280 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2284 const int srcB= src[-2*srcStride];\
2285 const int srcA= src[-1*srcStride];\
2286 const int src0= src[0 *srcStride];\
2287 const int src1= src[1 *srcStride];\
2288 const int src2= src[2 *srcStride];\
2289 const int src3= src[3 *srcStride];\
2290 const int src4= src[4 *srcStride];\
2291 const int src5= src[5 *srcStride];\
2292 const int src6= src[6 *srcStride];\
2293 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2294 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2295 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2296 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2302 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2305 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2307 src -= 2*srcStride;\
2308 for(i=0; i<h+5; i++)\
2310 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2311 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2312 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2313 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2317 tmp -= tmpStride*(h+5-2);\
2320 const int tmpB= tmp[-2*tmpStride];\
2321 const int tmpA= tmp[-1*tmpStride];\
2322 const int tmp0= tmp[0 *tmpStride];\
2323 const int tmp1= tmp[1 *tmpStride];\
2324 const int tmp2= tmp[2 *tmpStride];\
2325 const int tmp3= tmp[3 *tmpStride];\
2326 const int tmp4= tmp[4 *tmpStride];\
2327 const int tmp5= tmp[5 *tmpStride];\
2328 const int tmp6= tmp[6 *tmpStride];\
2329 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2330 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2331 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2332 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2338 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2340 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2344 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2345 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2346 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2347 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2348 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2349 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2350 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2351 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2357 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2359 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2363 const int srcB= src[-2*srcStride];\
2364 const int srcA= src[-1*srcStride];\
2365 const int src0= src[0 *srcStride];\
2366 const int src1= src[1 *srcStride];\
2367 const int src2= src[2 *srcStride];\
2368 const int src3= src[3 *srcStride];\
2369 const int src4= src[4 *srcStride];\
2370 const int src5= src[5 *srcStride];\
2371 const int src6= src[6 *srcStride];\
2372 const int src7= src[7 *srcStride];\
2373 const int src8= src[8 *srcStride];\
2374 const int src9= src[9 *srcStride];\
2375 const int src10=src[10*srcStride];\
2376 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2377 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2378 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2379 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2380 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2381 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2382 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2383 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2389 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2392 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2394 src -= 2*srcStride;\
2395 for(i=0; i<h+5; i++)\
2397 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2398 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2399 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2400 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2401 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2402 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2403 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2404 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2408 tmp -= tmpStride*(h+5-2);\
2411 const int tmpB= tmp[-2*tmpStride];\
2412 const int tmpA= tmp[-1*tmpStride];\
2413 const int tmp0= tmp[0 *tmpStride];\
2414 const int tmp1= tmp[1 *tmpStride];\
2415 const int tmp2= tmp[2 *tmpStride];\
2416 const int tmp3= tmp[3 *tmpStride];\
2417 const int tmp4= tmp[4 *tmpStride];\
2418 const int tmp5= tmp[5 *tmpStride];\
2419 const int tmp6= tmp[6 *tmpStride];\
2420 const int tmp7= tmp[7 *tmpStride];\
2421 const int tmp8= tmp[8 *tmpStride];\
2422 const int tmp9= tmp[9 *tmpStride];\
2423 const int tmp10=tmp[10*tmpStride];\
2424 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2425 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2426 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2427 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2428 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2429 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2430 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2431 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2437 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2438 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2439 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2440 src += 8*srcStride;\
2441 dst += 8*dstStride;\
2442 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2443 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2446 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2447 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2448 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2449 src += 8*srcStride;\
2450 dst += 8*dstStride;\
2451 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2452 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2455 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2456 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2457 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2458 src += 8*srcStride;\
2459 dst += 8*dstStride;\
2460 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2461 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2464 #define H264_MC(OPNAME, SIZE) \
2465 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2466 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2469 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2470 uint8_t half[SIZE*SIZE];\
2471 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2472 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2475 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2476 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2479 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2480 uint8_t half[SIZE*SIZE];\
2481 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2482 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2485 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2486 uint8_t full[SIZE*(SIZE+5)];\
2487 uint8_t * const full_mid= full + SIZE*2;\
2488 uint8_t half[SIZE*SIZE];\
2489 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2490 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2491 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2494 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2495 uint8_t full[SIZE*(SIZE+5)];\
2496 uint8_t * const full_mid= full + SIZE*2;\
2497 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2498 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2501 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2502 uint8_t full[SIZE*(SIZE+5)];\
2503 uint8_t * const full_mid= full + SIZE*2;\
2504 uint8_t half[SIZE*SIZE];\
2505 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2506 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2507 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2510 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2511 uint8_t full[SIZE*(SIZE+5)];\
2512 uint8_t * const full_mid= full + SIZE*2;\
2513 uint8_t halfH[SIZE*SIZE];\
2514 uint8_t halfV[SIZE*SIZE];\
2515 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2516 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2517 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2518 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2521 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2522 uint8_t full[SIZE*(SIZE+5)];\
2523 uint8_t * const full_mid= full + SIZE*2;\
2524 uint8_t halfH[SIZE*SIZE];\
2525 uint8_t halfV[SIZE*SIZE];\
2526 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2527 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2528 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2529 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2532 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2533 uint8_t full[SIZE*(SIZE+5)];\
2534 uint8_t * const full_mid= full + SIZE*2;\
2535 uint8_t halfH[SIZE*SIZE];\
2536 uint8_t halfV[SIZE*SIZE];\
2537 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2538 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2539 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2540 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2543 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2544 uint8_t full[SIZE*(SIZE+5)];\
2545 uint8_t * const full_mid= full + SIZE*2;\
2546 uint8_t halfH[SIZE*SIZE];\
2547 uint8_t halfV[SIZE*SIZE];\
2548 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2549 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2550 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2551 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2554 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2555 int16_t tmp[SIZE*(SIZE+5)];\
2556 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2559 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2560 int16_t tmp[SIZE*(SIZE+5)];\
2561 uint8_t halfH[SIZE*SIZE];\
2562 uint8_t halfHV[SIZE*SIZE];\
2563 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2564 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2565 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2568 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2569 int16_t tmp[SIZE*(SIZE+5)];\
2570 uint8_t halfH[SIZE*SIZE];\
2571 uint8_t halfHV[SIZE*SIZE];\
2572 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2573 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2574 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2577 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2578 uint8_t full[SIZE*(SIZE+5)];\
2579 uint8_t * const full_mid= full + SIZE*2;\
2580 int16_t tmp[SIZE*(SIZE+5)];\
2581 uint8_t halfV[SIZE*SIZE];\
2582 uint8_t halfHV[SIZE*SIZE];\
2583 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2584 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2585 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2586 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2589 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2590 uint8_t full[SIZE*(SIZE+5)];\
2591 uint8_t * const full_mid= full + SIZE*2;\
2592 int16_t tmp[SIZE*(SIZE+5)];\
2593 uint8_t halfV[SIZE*SIZE];\
2594 uint8_t halfHV[SIZE*SIZE];\
2595 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2596 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2597 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2598 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2601 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2602 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2603 #define op_put(a, b) a = cm[((b) + 16)>>5]
2604 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2605 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2607 H264_LOWPASS(put_
, op_put
, op2_put
)
2608 H264_LOWPASS(avg_
, op_avg
, op2_avg
)
2623 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2624 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2625 #define H264_WEIGHT(W,H) \
2626 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2628 offset <<= log2_denom; \
2629 if(log2_denom) offset += 1<<(log2_denom-1); \
2630 for(y=0; y<H; y++, block += stride){ \
2633 if(W==2) continue; \
2636 if(W==4) continue; \
2641 if(W==8) continue; \
2652 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2654 offset = ((offset + 1) | 1) << log2_denom; \
2655 for(y=0; y<H; y++, dst += stride, src += stride){ \
2658 if(W==2) continue; \
2661 if(W==4) continue; \
2666 if(W==8) continue; \
2693 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
2694 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2698 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
2699 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
2700 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
2701 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
2702 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
2703 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
2704 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
2705 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
2711 #ifdef CONFIG_CAVS_DECODER
2713 void ff_cavsdsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2715 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2716 put_pixels8_c(dst
, src
, stride
, 8);
2718 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2719 avg_pixels8_c(dst
, src
, stride
, 8);
2721 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2722 put_pixels16_c(dst
, src
, stride
, 16);
2724 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2725 avg_pixels16_c(dst
, src
, stride
, 16);
2727 #endif /* CONFIG_CAVS_DECODER */
2729 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
2731 void ff_vc1dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2733 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
, int rnd
) {
2734 put_pixels8_c(dst
, src
, stride
, 8);
2736 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
2738 void ff_intrax8dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2741 void ff_h264dspenc_init(DSPContext
* c
, AVCodecContext
*avctx
);
2743 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
2744 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2748 const int src_1
= src
[ -srcStride
];
2749 const int src0
= src
[0 ];
2750 const int src1
= src
[ srcStride
];
2751 const int src2
= src
[2*srcStride
];
2752 const int src3
= src
[3*srcStride
];
2753 const int src4
= src
[4*srcStride
];
2754 const int src5
= src
[5*srcStride
];
2755 const int src6
= src
[6*srcStride
];
2756 const int src7
= src
[7*srcStride
];
2757 const int src8
= src
[8*srcStride
];
2758 const int src9
= src
[9*srcStride
];
2759 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
2760 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
2761 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
2762 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
2763 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
2764 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
2765 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
2766 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
2772 static void put_mspel8_mc00_c (uint8_t *dst
, uint8_t *src
, int stride
){
2773 put_pixels8_c(dst
, src
, stride
, 8);
2776 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
2778 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2779 put_pixels8_l2(dst
, src
, half
, stride
, stride
, 8, 8);
2782 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
2783 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
2786 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
2788 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2789 put_pixels8_l2(dst
, src
+1, half
, stride
, stride
, 8, 8);
2792 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
2793 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
2796 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
2800 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2801 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
2802 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2803 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2805 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
2809 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2810 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
2811 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2812 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2814 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
2816 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2817 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
2820 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2821 if(ENABLE_ANY_H263
) {
2823 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2827 int p0
= src
[x
-2*stride
];
2828 int p1
= src
[x
-1*stride
];
2829 int p2
= src
[x
+0*stride
];
2830 int p3
= src
[x
+1*stride
];
2831 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2833 if (d
<-2*strength
) d1
= 0;
2834 else if(d
<- strength
) d1
=-2*strength
- d
;
2835 else if(d
< strength
) d1
= d
;
2836 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2841 if(p1
&256) p1
= ~(p1
>>31);
2842 if(p2
&256) p2
= ~(p2
>>31);
2844 src
[x
-1*stride
] = p1
;
2845 src
[x
+0*stride
] = p2
;
2849 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2851 src
[x
-2*stride
] = p0
- d2
;
2852 src
[x
+ stride
] = p3
+ d2
;
2857 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2858 if(ENABLE_ANY_H263
) {
2860 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2864 int p0
= src
[y
*stride
-2];
2865 int p1
= src
[y
*stride
-1];
2866 int p2
= src
[y
*stride
+0];
2867 int p3
= src
[y
*stride
+1];
2868 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2870 if (d
<-2*strength
) d1
= 0;
2871 else if(d
<- strength
) d1
=-2*strength
- d
;
2872 else if(d
< strength
) d1
= d
;
2873 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2878 if(p1
&256) p1
= ~(p1
>>31);
2879 if(p2
&256) p2
= ~(p2
>>31);
2881 src
[y
*stride
-1] = p1
;
2882 src
[y
*stride
+0] = p2
;
2886 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2888 src
[y
*stride
-2] = p0
- d2
;
2889 src
[y
*stride
+1] = p3
+ d2
;
2894 static void h261_loop_filter_c(uint8_t *src
, int stride
){
2899 temp
[x
] = 4*src
[x
];
2900 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
2904 xy
= y
* stride
+ x
;
2906 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
2911 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
2912 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
2914 xy
= y
* stride
+ x
;
2916 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
2921 static inline void h264_loop_filter_luma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2924 for( i
= 0; i
< 4; i
++ ) {
2929 for( d
= 0; d
< 4; d
++ ) {
2930 const int p0
= pix
[-1*xstride
];
2931 const int p1
= pix
[-2*xstride
];
2932 const int p2
= pix
[-3*xstride
];
2933 const int q0
= pix
[0];
2934 const int q1
= pix
[1*xstride
];
2935 const int q2
= pix
[2*xstride
];
2937 if( FFABS( p0
- q0
) < alpha
&&
2938 FFABS( p1
- p0
) < beta
&&
2939 FFABS( q1
- q0
) < beta
) {
2944 if( FFABS( p2
- p0
) < beta
) {
2945 pix
[-2*xstride
] = p1
+ av_clip( (( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - p1
, -tc0
[i
], tc0
[i
] );
2948 if( FFABS( q2
- q0
) < beta
) {
2949 pix
[ xstride
] = q1
+ av_clip( (( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - q1
, -tc0
[i
], tc0
[i
] );
2953 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2954 pix
[-xstride
] = av_clip_uint8( p0
+ i_delta
); /* p0' */
2955 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
2961 static void h264_v_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2963 h264_loop_filter_luma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2965 static void h264_h_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2967 h264_loop_filter_luma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
2970 static inline void h264_loop_filter_chroma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2973 for( i
= 0; i
< 4; i
++ ) {
2974 const int tc
= tc0
[i
];
2979 for( d
= 0; d
< 2; d
++ ) {
2980 const int p0
= pix
[-1*xstride
];
2981 const int p1
= pix
[-2*xstride
];
2982 const int q0
= pix
[0];
2983 const int q1
= pix
[1*xstride
];
2985 if( FFABS( p0
- q0
) < alpha
&&
2986 FFABS( p1
- p0
) < beta
&&
2987 FFABS( q1
- q0
) < beta
) {
2989 int delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2991 pix
[-xstride
] = av_clip_uint8( p0
+ delta
); /* p0' */
2992 pix
[0] = av_clip_uint8( q0
- delta
); /* q0' */
2998 static void h264_v_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3000 h264_loop_filter_chroma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
3002 static void h264_h_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3004 h264_loop_filter_chroma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
3007 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
)
3010 for( d
= 0; d
< 8; d
++ ) {
3011 const int p0
= pix
[-1*xstride
];
3012 const int p1
= pix
[-2*xstride
];
3013 const int q0
= pix
[0];
3014 const int q1
= pix
[1*xstride
];
3016 if( FFABS( p0
- q0
) < alpha
&&
3017 FFABS( p1
- p0
) < beta
&&
3018 FFABS( q1
- q0
) < beta
) {
3020 pix
[-xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
3021 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
3026 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3028 h264_loop_filter_chroma_intra_c(pix
, stride
, 1, alpha
, beta
);
3030 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3032 h264_loop_filter_chroma_intra_c(pix
, 1, stride
, alpha
, beta
);
3035 static inline int pix_abs16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3041 s
+= abs(pix1
[0] - pix2
[0]);
3042 s
+= abs(pix1
[1] - pix2
[1]);
3043 s
+= abs(pix1
[2] - pix2
[2]);
3044 s
+= abs(pix1
[3] - pix2
[3]);
3045 s
+= abs(pix1
[4] - pix2
[4]);
3046 s
+= abs(pix1
[5] - pix2
[5]);
3047 s
+= abs(pix1
[6] - pix2
[6]);
3048 s
+= abs(pix1
[7] - pix2
[7]);
3049 s
+= abs(pix1
[8] - pix2
[8]);
3050 s
+= abs(pix1
[9] - pix2
[9]);
3051 s
+= abs(pix1
[10] - pix2
[10]);
3052 s
+= abs(pix1
[11] - pix2
[11]);
3053 s
+= abs(pix1
[12] - pix2
[12]);
3054 s
+= abs(pix1
[13] - pix2
[13]);
3055 s
+= abs(pix1
[14] - pix2
[14]);
3056 s
+= abs(pix1
[15] - pix2
[15]);
3063 static int pix_abs16_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3069 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3070 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3071 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3072 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3073 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3074 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3075 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3076 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3077 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
3078 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
3079 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
3080 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
3081 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
3082 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
3083 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
3084 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
3091 static int pix_abs16_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3094 uint8_t *pix3
= pix2
+ line_size
;
3098 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3099 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3100 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3101 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3102 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3103 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3104 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3105 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3106 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
3107 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
3108 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
3109 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
3110 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
3111 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
3112 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
3113 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
3121 static int pix_abs16_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3124 uint8_t *pix3
= pix2
+ line_size
;
3128 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3129 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3130 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3131 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3132 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3133 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3134 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3135 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3136 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
3137 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
3138 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
3139 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
3140 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
3141 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
3142 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
3143 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
3151 static inline int pix_abs8_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3157 s
+= abs(pix1
[0] - pix2
[0]);
3158 s
+= abs(pix1
[1] - pix2
[1]);
3159 s
+= abs(pix1
[2] - pix2
[2]);
3160 s
+= abs(pix1
[3] - pix2
[3]);
3161 s
+= abs(pix1
[4] - pix2
[4]);
3162 s
+= abs(pix1
[5] - pix2
[5]);
3163 s
+= abs(pix1
[6] - pix2
[6]);
3164 s
+= abs(pix1
[7] - pix2
[7]);
3171 static int pix_abs8_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3177 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3178 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3179 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3180 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3181 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3182 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3183 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3184 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3191 static int pix_abs8_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3194 uint8_t *pix3
= pix2
+ line_size
;
3198 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3199 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3200 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3201 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3202 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3203 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3204 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3205 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3213 static int pix_abs8_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3216 uint8_t *pix3
= pix2
+ line_size
;
3220 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3221 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3222 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3223 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3224 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3225 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3226 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3227 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3235 static int nsse16_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3236 MpegEncContext
*c
= v
;
3242 for(x
=0; x
<16; x
++){
3243 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3246 for(x
=0; x
<15; x
++){
3247 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3248 - s1
[x
+1] + s1
[x
+1+stride
])
3249 -FFABS( s2
[x
] - s2
[x
+stride
]
3250 - s2
[x
+1] + s2
[x
+1+stride
]);
3257 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3258 else return score1
+ FFABS(score2
)*8;
3261 static int nsse8_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3262 MpegEncContext
*c
= v
;
3269 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3273 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3274 - s1
[x
+1] + s1
[x
+1+stride
])
3275 -FFABS( s2
[x
] - s2
[x
+stride
]
3276 - s2
[x
+1] + s2
[x
+1+stride
]);
3283 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3284 else return score1
+ FFABS(score2
)*8;
3287 static int try_8x8basis_c(int16_t rem
[64], int16_t weight
[64], int16_t basis
[64], int scale
){
3291 for(i
=0; i
<8*8; i
++){
3292 int b
= rem
[i
] + ((basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
));
3295 assert(-512<b
&& b
<512);
3297 sum
+= (w
*b
)*(w
*b
)>>4;
3302 static void add_8x8basis_c(int16_t rem
[64], int16_t basis
[64], int scale
){
3305 for(i
=0; i
<8*8; i
++){
3306 rem
[i
] += (basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
);
3311 * permutes an 8x8 block.
3312 * @param block the block which will be permuted according to the given permutation vector
3313 * @param permutation the permutation vector
3314 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3315 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3316 * (inverse) permutated to scantable order!
3318 void ff_block_permute(DCTELEM
*block
, uint8_t *permutation
, const uint8_t *scantable
, int last
)
3324 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
3326 for(i
=0; i
<=last
; i
++){
3327 const int j
= scantable
[i
];
3332 for(i
=0; i
<=last
; i
++){
3333 const int j
= scantable
[i
];
3334 const int perm_j
= permutation
[j
];
3335 block
[perm_j
]= temp
[j
];
3339 static int zero_cmp(void *s
, uint8_t *a
, uint8_t *b
, int stride
, int h
){
3343 void ff_set_cmp(DSPContext
* c
, me_cmp_func
*cmp
, int type
){
3346 memset(cmp
, 0, sizeof(void*)*5);
3354 cmp
[i
]= c
->hadamard8_diff
[i
];
3360 cmp
[i
]= c
->dct_sad
[i
];
3363 cmp
[i
]= c
->dct264_sad
[i
];
3366 cmp
[i
]= c
->dct_max
[i
];
3369 cmp
[i
]= c
->quant_psnr
[i
];
3389 #ifdef CONFIG_SNOW_ENCODER
3398 av_log(NULL
, AV_LOG_ERROR
,"internal error in cmp function selection\n");
3404 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3406 static void clear_blocks_c(DCTELEM
*blocks
)
3408 memset(blocks
, 0, sizeof(DCTELEM
)*6*64);
3411 static void add_bytes_c(uint8_t *dst
, uint8_t *src
, int w
){
3413 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3414 long a
= *(long*)(src
+i
);
3415 long b
= *(long*)(dst
+i
);
3416 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3419 dst
[i
+0] += src
[i
+0];
3422 static void add_bytes_l2_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3424 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3425 long a
= *(long*)(src1
+i
);
3426 long b
= *(long*)(src2
+i
);
3427 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3430 dst
[i
] = src1
[i
]+src2
[i
];
3433 static void diff_bytes_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3435 #ifndef HAVE_FAST_UNALIGNED
3436 if((long)src2
& (sizeof(long)-1)){
3437 for(i
=0; i
+7<w
; i
+=8){
3438 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3439 dst
[i
+1] = src1
[i
+1]-src2
[i
+1];
3440 dst
[i
+2] = src1
[i
+2]-src2
[i
+2];
3441 dst
[i
+3] = src1
[i
+3]-src2
[i
+3];
3442 dst
[i
+4] = src1
[i
+4]-src2
[i
+4];
3443 dst
[i
+5] = src1
[i
+5]-src2
[i
+5];
3444 dst
[i
+6] = src1
[i
+6]-src2
[i
+6];
3445 dst
[i
+7] = src1
[i
+7]-src2
[i
+7];
3449 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3450 long a
= *(long*)(src1
+i
);
3451 long b
= *(long*)(src2
+i
);
3452 *(long*)(dst
+i
) = ((a
|pb_80
) - (b
&pb_7f
)) ^ ((a
^b
^pb_80
)&pb_80
);
3455 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3458 static void sub_hfyu_median_prediction_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
, int *left
, int *left_top
){
3466 const int pred
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF);
3476 #define BUTTERFLY2(o1,o2,i1,i2) \
3480 #define BUTTERFLY1(x,y) \
3489 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
3491 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s
, uint8_t *dst
, uint8_t *src
, int stride
, int h
){
3499 //FIXME try pointer walks
3500 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0]-dst
[stride
*i
+0],src
[stride
*i
+1]-dst
[stride
*i
+1]);
3501 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2]-dst
[stride
*i
+2],src
[stride
*i
+3]-dst
[stride
*i
+3]);
3502 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4]-dst
[stride
*i
+4],src
[stride
*i
+5]-dst
[stride
*i
+5]);
3503 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6]-dst
[stride
*i
+6],src
[stride
*i
+7]-dst
[stride
*i
+7]);
3505 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3506 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3507 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3508 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3510 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3511 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3512 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3513 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3517 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3518 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3519 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3520 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3522 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3523 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3524 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3525 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3528 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3529 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3530 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3531 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3537 printf("MAX:%d\n", maxi
);
3543 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s
, uint8_t *src
, uint8_t *dummy
, int stride
, int h
){
3551 //FIXME try pointer walks
3552 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0],src
[stride
*i
+1]);
3553 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2],src
[stride
*i
+3]);
3554 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4],src
[stride
*i
+5]);
3555 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6],src
[stride
*i
+7]);
3557 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3558 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3559 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3560 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3562 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3563 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3564 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3565 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3569 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3570 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3571 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3572 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3574 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3575 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3576 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3577 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3580 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3581 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3582 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3583 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3586 sum
-= FFABS(temp
[8*0] + temp
[8*4]); // -mean
3591 static int dct_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3592 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3593 DECLARE_ALIGNED_16(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3594 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3598 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3600 return s
->dsp
.sum_abs_dctelem(temp
);
3605 const int s07 = SRC(0) + SRC(7);\
3606 const int s16 = SRC(1) + SRC(6);\
3607 const int s25 = SRC(2) + SRC(5);\
3608 const int s34 = SRC(3) + SRC(4);\
3609 const int a0 = s07 + s34;\
3610 const int a1 = s16 + s25;\
3611 const int a2 = s07 - s34;\
3612 const int a3 = s16 - s25;\
3613 const int d07 = SRC(0) - SRC(7);\
3614 const int d16 = SRC(1) - SRC(6);\
3615 const int d25 = SRC(2) - SRC(5);\
3616 const int d34 = SRC(3) - SRC(4);\
3617 const int a4 = d16 + d25 + (d07 + (d07>>1));\
3618 const int a5 = d07 - d34 - (d25 + (d25>>1));\
3619 const int a6 = d07 + d34 - (d16 + (d16>>1));\
3620 const int a7 = d16 - d25 + (d34 + (d34>>1));\
3622 DST(1, a4 + (a7>>2)) ;\
3623 DST(2, a2 + (a3>>1)) ;\
3624 DST(3, a5 + (a6>>2)) ;\
3626 DST(5, a6 - (a5>>2)) ;\
3627 DST(6, (a2>>1) - a3 ) ;\
3628 DST(7, (a4>>2) - a7 ) ;\
3631 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3632 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3637 s
->dsp
.diff_pixels(dct
[0], src1
, src2
, stride
);
3639 #define SRC(x) dct[i][x]
3640 #define DST(x,v) dct[i][x]= v
3641 for( i
= 0; i
< 8; i
++ )
3646 #define SRC(x) dct[x][i]
3647 #define DST(x,v) sum += FFABS(v)
3648 for( i
= 0; i
< 8; i
++ )
3656 static int dct_max8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3657 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3658 DECLARE_ALIGNED_8(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3659 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3664 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3668 sum
= FFMAX(sum
, FFABS(temp
[i
]));
3673 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3674 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3675 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64*2/8]);
3676 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3677 DCTELEM
* const bak
= ((DCTELEM
*)aligned_temp
)+64;
3683 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3685 memcpy(bak
, temp
, 64*sizeof(DCTELEM
));
3687 s
->block_last_index
[0/*FIXME*/]= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3688 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3689 ff_simple_idct(temp
); //FIXME
3692 sum
+= (temp
[i
]-bak
[i
])*(temp
[i
]-bak
[i
]);
3697 static int rd8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3698 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3699 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3700 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3701 DECLARE_ALIGNED_8 (uint64_t, aligned_bak
[stride
]);
3702 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3703 uint8_t * const bak
= (uint8_t*)aligned_bak
;
3704 int i
, last
, run
, bits
, level
, distortion
, start_i
;
3705 const int esc_length
= s
->ac_esc_length
;
3707 uint8_t * last_length
;
3712 ((uint32_t*)(bak
+ i
*stride
))[0]= ((uint32_t*)(src2
+ i
*stride
))[0];
3713 ((uint32_t*)(bak
+ i
*stride
))[1]= ((uint32_t*)(src2
+ i
*stride
))[1];
3716 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3718 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3724 length
= s
->intra_ac_vlc_length
;
3725 last_length
= s
->intra_ac_vlc_last_length
;
3726 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3729 length
= s
->inter_ac_vlc_length
;
3730 last_length
= s
->inter_ac_vlc_last_length
;
3735 for(i
=start_i
; i
<last
; i
++){
3736 int j
= scantable
[i
];
3741 if((level
&(~127)) == 0){
3742 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3751 level
= temp
[i
] + 64;
3755 if((level
&(~127)) == 0){
3756 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3764 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
3766 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3769 s
->dsp
.idct_add(bak
, stride
, temp
);
3771 distortion
= s
->dsp
.sse
[1](NULL
, bak
, src1
, stride
, 8);
3773 return distortion
+ ((bits
*s
->qscale
*s
->qscale
*109 + 64)>>7);
3776 static int bit8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3777 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3778 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3779 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3780 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3781 int i
, last
, run
, bits
, level
, start_i
;
3782 const int esc_length
= s
->ac_esc_length
;
3784 uint8_t * last_length
;
3788 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3790 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3796 length
= s
->intra_ac_vlc_length
;
3797 last_length
= s
->intra_ac_vlc_last_length
;
3798 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3801 length
= s
->inter_ac_vlc_length
;
3802 last_length
= s
->inter_ac_vlc_last_length
;
3807 for(i
=start_i
; i
<last
; i
++){
3808 int j
= scantable
[i
];
3813 if((level
&(~127)) == 0){
3814 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3823 level
= temp
[i
] + 64;
3827 if((level
&(~127)) == 0){
3828 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3836 static int vsad_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3841 for(x
=0; x
<16; x
+=4){
3842 score
+= FFABS(s
[x
] - s
[x
+stride
]) + FFABS(s
[x
+1] - s
[x
+1+stride
])
3843 +FFABS(s
[x
+2] - s
[x
+2+stride
]) + FFABS(s
[x
+3] - s
[x
+3+stride
]);
3851 static int vsad16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3856 for(x
=0; x
<16; x
++){
3857 score
+= FFABS(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3866 #define SQ(a) ((a)*(a))
3867 static int vsse_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3872 for(x
=0; x
<16; x
+=4){
3873 score
+= SQ(s
[x
] - s
[x
+stride
]) + SQ(s
[x
+1] - s
[x
+1+stride
])
3874 +SQ(s
[x
+2] - s
[x
+2+stride
]) + SQ(s
[x
+3] - s
[x
+3+stride
]);
3882 static int vsse16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3887 for(x
=0; x
<16; x
++){
3888 score
+= SQ(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3897 static int ssd_int8_vs_int16_c(const int8_t *pix1
, const int16_t *pix2
,
3901 for(i
=0; i
<size
; i
++)
3902 score
+= (pix1
[i
]-pix2
[i
])*(pix1
[i
]-pix2
[i
]);
3906 WRAPPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
3907 WRAPPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
3908 WRAPPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
3910 WRAPPER8_16_SQ(dct264_sad8x8_c
, dct264_sad16_c
)
3912 WRAPPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
3913 WRAPPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
3914 WRAPPER8_16_SQ(rd8x8_c
, rd16_c
)
3915 WRAPPER8_16_SQ(bit8x8_c
, bit16_c
)
3917 static void vector_fmul_c(float *dst
, const float *src
, int len
){
3919 for(i
=0; i
<len
; i
++)
3923 static void vector_fmul_reverse_c(float *dst
, const float *src0
, const float *src1
, int len
){
3926 for(i
=0; i
<len
; i
++)
3927 dst
[i
] = src0
[i
] * src1
[-i
];
3930 void ff_vector_fmul_add_add_c(float *dst
, const float *src0
, const float *src1
, const float *src2
, int src3
, int len
, int step
){
3932 for(i
=0; i
<len
; i
++)
3933 dst
[i
*step
] = src0
[i
] * src1
[i
] + src2
[i
] + src3
;
3936 void ff_vector_fmul_window_c(float *dst
, const float *src0
, const float *src1
, const float *win
, float add_bias
, int len
){
3941 for(i
=-len
, j
=len
-1; i
<0; i
++, j
--) {
3946 dst
[i
] = s0
*wj
- s1
*wi
+ add_bias
;
3947 dst
[j
] = s0
*wi
+ s1
*wj
+ add_bias
;
3951 static void int32_to_float_fmul_scalar_c(float *dst
, const int *src
, float mul
, int len
){
3953 for(i
=0; i
<len
; i
++)
3954 dst
[i
] = src
[i
] * mul
;
3957 static av_always_inline
int float_to_int16_one(const float *src
){
3958 int_fast32_t tmp
= *(const int32_t*)src
;
3960 tmp
= (0x43c0ffff - tmp
)>>31;
3961 // is this faster on some gcc/cpu combinations?
3962 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
3965 return tmp
- 0x8000;
3968 void ff_float_to_int16_c(int16_t *dst
, const float *src
, long len
){
3970 for(i
=0; i
<len
; i
++)
3971 dst
[i
] = float_to_int16_one(src
+i
);
3974 void ff_float_to_int16_interleave_c(int16_t *dst
, const float **src
, long len
, int channels
){
3977 for(i
=0; i
<len
; i
++){
3978 dst
[2*i
] = float_to_int16_one(src
[0]+i
);
3979 dst
[2*i
+1] = float_to_int16_one(src
[1]+i
);
3982 for(c
=0; c
<channels
; c
++)
3983 for(i
=0, j
=c
; i
<len
; i
++, j
+=channels
)
3984 dst
[j
] = float_to_int16_one(src
[c
]+i
);
3988 static void add_int16_c(int16_t * v1
, int16_t * v2
, int order
)
3994 static void sub_int16_c(int16_t * v1
, int16_t * v2
, int order
)
4000 static int32_t scalarproduct_int16_c(int16_t * v1
, int16_t * v2
, int order
, int shift
)
4005 res
+= (*v1
++ * *v2
++) >> shift
;
4011 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
4012 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
4013 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
4014 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
4015 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
4016 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
4017 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
4019 static void wmv2_idct_row(short * b
)
4022 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
4024 a1
= W1
*b
[1]+W7
*b
[7];
4025 a7
= W7
*b
[1]-W1
*b
[7];
4026 a5
= W5
*b
[5]+W3
*b
[3];
4027 a3
= W3
*b
[5]-W5
*b
[3];
4028 a2
= W2
*b
[2]+W6
*b
[6];
4029 a6
= W6
*b
[2]-W2
*b
[6];
4030 a0
= W0
*b
[0]+W0
*b
[4];
4031 a4
= W0
*b
[0]-W0
*b
[4];
4033 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;//1,3,5,7,
4034 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
4036 b
[0] = (a0
+a2
+a1
+a5
+ (1<<7))>>8;
4037 b
[1] = (a4
+a6
+s1
+ (1<<7))>>8;
4038 b
[2] = (a4
-a6
+s2
+ (1<<7))>>8;
4039 b
[3] = (a0
-a2
+a7
+a3
+ (1<<7))>>8;
4040 b
[4] = (a0
-a2
-a7
-a3
+ (1<<7))>>8;
4041 b
[5] = (a4
-a6
-s2
+ (1<<7))>>8;
4042 b
[6] = (a4
+a6
-s1
+ (1<<7))>>8;
4043 b
[7] = (a0
+a2
-a1
-a5
+ (1<<7))>>8;
4045 static void wmv2_idct_col(short * b
)
4048 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
4049 /*step 1, with extended precision*/
4050 a1
= (W1
*b
[8*1]+W7
*b
[8*7] + 4)>>3;
4051 a7
= (W7
*b
[8*1]-W1
*b
[8*7] + 4)>>3;
4052 a5
= (W5
*b
[8*5]+W3
*b
[8*3] + 4)>>3;
4053 a3
= (W3
*b
[8*5]-W5
*b
[8*3] + 4)>>3;
4054 a2
= (W2
*b
[8*2]+W6
*b
[8*6] + 4)>>3;
4055 a6
= (W6
*b
[8*2]-W2
*b
[8*6] + 4)>>3;
4056 a0
= (W0
*b
[8*0]+W0
*b
[8*4] )>>3;
4057 a4
= (W0
*b
[8*0]-W0
*b
[8*4] )>>3;
4059 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;
4060 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
4062 b
[8*0] = (a0
+a2
+a1
+a5
+ (1<<13))>>14;
4063 b
[8*1] = (a4
+a6
+s1
+ (1<<13))>>14;
4064 b
[8*2] = (a4
-a6
+s2
+ (1<<13))>>14;
4065 b
[8*3] = (a0
-a2
+a7
+a3
+ (1<<13))>>14;
4067 b
[8*4] = (a0
-a2
-a7
-a3
+ (1<<13))>>14;
4068 b
[8*5] = (a4
-a6
-s2
+ (1<<13))>>14;
4069 b
[8*6] = (a4
+a6
-s1
+ (1<<13))>>14;
4070 b
[8*7] = (a0
+a2
-a1
-a5
+ (1<<13))>>14;
4072 void ff_wmv2_idct_c(short * block
){
4076 wmv2_idct_row(block
+i
);
4079 wmv2_idct_col(block
+i
);
4082 /* XXX: those functions should be suppressed ASAP when all IDCTs are
4084 static void ff_wmv2_idct_put_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4086 ff_wmv2_idct_c(block
);
4087 put_pixels_clamped_c(block
, dest
, line_size
);
4089 static void ff_wmv2_idct_add_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4091 ff_wmv2_idct_c(block
);
4092 add_pixels_clamped_c(block
, dest
, line_size
);
4094 static void ff_jref_idct_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4097 put_pixels_clamped_c(block
, dest
, line_size
);
4099 static void ff_jref_idct_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4102 add_pixels_clamped_c(block
, dest
, line_size
);
4105 static void ff_jref_idct4_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4108 put_pixels_clamped4_c(block
, dest
, line_size
);
4110 static void ff_jref_idct4_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4113 add_pixels_clamped4_c(block
, dest
, line_size
);
4116 static void ff_jref_idct2_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4119 put_pixels_clamped2_c(block
, dest
, line_size
);
4121 static void ff_jref_idct2_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4124 add_pixels_clamped2_c(block
, dest
, line_size
);
4127 static void ff_jref_idct1_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4129 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4131 dest
[0] = cm
[(block
[0] + 4)>>3];
4133 static void ff_jref_idct1_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4135 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4137 dest
[0] = cm
[dest
[0] + ((block
[0] + 4)>>3)];
4140 static void just_return(void *mem av_unused
, int stride av_unused
, int h av_unused
) { return; }
4142 /* init static data */
4143 void dsputil_static_init(void)
4147 for(i
=0;i
<256;i
++) ff_cropTbl
[i
+ MAX_NEG_CROP
] = i
;
4148 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
4150 ff_cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
4153 for(i
=0;i
<512;i
++) {
4154 ff_squareTbl
[i
] = (i
- 256) * (i
- 256);
4157 for(i
=0; i
<64; i
++) inv_zigzag_direct16
[ff_zigzag_direct
[i
]]= i
+1;
4160 int ff_check_alignment(void){
4161 static int did_fail
=0;
4162 DECLARE_ALIGNED_16(int, aligned
);
4164 if((long)&aligned
& 15){
4166 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
4167 av_log(NULL
, AV_LOG_ERROR
,
4168 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
4169 "and may be very slow or crash. This is not a bug in libavcodec,\n"
4170 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4171 "Do not report crashes to FFmpeg developers.\n");
4180 void dsputil_init(DSPContext
* c
, AVCodecContext
*avctx
)
4184 ff_check_alignment();
4186 #ifdef CONFIG_ENCODERS
4187 if(avctx
->dct_algo
==FF_DCT_FASTINT
) {
4188 c
->fdct
= fdct_ifast
;
4189 c
->fdct248
= fdct_ifast248
;
4191 else if(avctx
->dct_algo
==FF_DCT_FAAN
) {
4192 c
->fdct
= ff_faandct
;
4193 c
->fdct248
= ff_faandct248
;
4196 c
->fdct
= ff_jpeg_fdct_islow
; //slow/accurate/default
4197 c
->fdct248
= ff_fdct248_islow
;
4199 #endif //CONFIG_ENCODERS
4201 if(avctx
->lowres
==1){
4202 if(avctx
->idct_algo
==FF_IDCT_INT
|| avctx
->idct_algo
==FF_IDCT_AUTO
|| !ENABLE_H264_DECODER
){
4203 c
->idct_put
= ff_jref_idct4_put
;
4204 c
->idct_add
= ff_jref_idct4_add
;
4206 c
->idct_put
= ff_h264_lowres_idct_put_c
;
4207 c
->idct_add
= ff_h264_lowres_idct_add_c
;
4209 c
->idct
= j_rev_dct4
;
4210 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4211 }else if(avctx
->lowres
==2){
4212 c
->idct_put
= ff_jref_idct2_put
;
4213 c
->idct_add
= ff_jref_idct2_add
;
4214 c
->idct
= j_rev_dct2
;
4215 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4216 }else if(avctx
->lowres
==3){
4217 c
->idct_put
= ff_jref_idct1_put
;
4218 c
->idct_add
= ff_jref_idct1_add
;
4219 c
->idct
= j_rev_dct1
;
4220 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4222 if(avctx
->idct_algo
==FF_IDCT_INT
){
4223 c
->idct_put
= ff_jref_idct_put
;
4224 c
->idct_add
= ff_jref_idct_add
;
4225 c
->idct
= j_rev_dct
;
4226 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
4227 }else if((ENABLE_VP3_DECODER
|| ENABLE_VP5_DECODER
|| ENABLE_VP6_DECODER
|| ENABLE_THEORA_DECODER
) &&
4228 avctx
->idct_algo
==FF_IDCT_VP3
){
4229 c
->idct_put
= ff_vp3_idct_put_c
;
4230 c
->idct_add
= ff_vp3_idct_add_c
;
4231 c
->idct
= ff_vp3_idct_c
;
4232 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4233 }else if(avctx
->idct_algo
==FF_IDCT_WMV2
){
4234 c
->idct_put
= ff_wmv2_idct_put_c
;
4235 c
->idct_add
= ff_wmv2_idct_add_c
;
4236 c
->idct
= ff_wmv2_idct_c
;
4237 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4238 }else if(avctx
->idct_algo
==FF_IDCT_FAAN
){
4239 c
->idct_put
= ff_faanidct_put
;
4240 c
->idct_add
= ff_faanidct_add
;
4241 c
->idct
= ff_faanidct
;
4242 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4243 }else{ //accurate/default
4244 c
->idct_put
= ff_simple_idct_put
;
4245 c
->idct_add
= ff_simple_idct_add
;
4246 c
->idct
= ff_simple_idct
;
4247 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4251 if (ENABLE_H264_DECODER
) {
4252 c
->h264_idct_add
= ff_h264_idct_add_c
;
4253 c
->h264_idct8_add
= ff_h264_idct8_add_c
;
4254 c
->h264_idct_dc_add
= ff_h264_idct_dc_add_c
;
4255 c
->h264_idct8_dc_add
= ff_h264_idct8_dc_add_c
;
4258 c
->get_pixels
= get_pixels_c
;
4259 c
->diff_pixels
= diff_pixels_c
;
4260 c
->put_pixels_clamped
= put_pixels_clamped_c
;
4261 c
->put_signed_pixels_clamped
= put_signed_pixels_clamped_c
;
4262 c
->add_pixels_clamped
= add_pixels_clamped_c
;
4263 c
->add_pixels8
= add_pixels8_c
;
4264 c
->add_pixels4
= add_pixels4_c
;
4265 c
->sum_abs_dctelem
= sum_abs_dctelem_c
;
4268 c
->clear_blocks
= clear_blocks_c
;
4269 c
->pix_sum
= pix_sum_c
;
4270 c
->pix_norm1
= pix_norm1_c
;
4272 /* TODO [0] 16 [1] 8 */
4273 c
->pix_abs
[0][0] = pix_abs16_c
;
4274 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
4275 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
4276 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
4277 c
->pix_abs
[1][0] = pix_abs8_c
;
4278 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
4279 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
4280 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
4282 #define dspfunc(PFX, IDX, NUM) \
4283 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
4284 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
4285 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
4286 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
4288 dspfunc(put
, 0, 16);
4289 dspfunc(put_no_rnd
, 0, 16);
4291 dspfunc(put_no_rnd
, 1, 8);
4295 dspfunc(avg
, 0, 16);
4296 dspfunc(avg_no_rnd
, 0, 16);
4298 dspfunc(avg_no_rnd
, 1, 8);
4303 c
->put_no_rnd_pixels_l2
[0]= put_no_rnd_pixels16_l2_c
;
4304 c
->put_no_rnd_pixels_l2
[1]= put_no_rnd_pixels8_l2_c
;
4306 c
->put_tpel_pixels_tab
[ 0] = put_tpel_pixels_mc00_c
;
4307 c
->put_tpel_pixels_tab
[ 1] = put_tpel_pixels_mc10_c
;
4308 c
->put_tpel_pixels_tab
[ 2] = put_tpel_pixels_mc20_c
;
4309 c
->put_tpel_pixels_tab
[ 4] = put_tpel_pixels_mc01_c
;
4310 c
->put_tpel_pixels_tab
[ 5] = put_tpel_pixels_mc11_c
;
4311 c
->put_tpel_pixels_tab
[ 6] = put_tpel_pixels_mc21_c
;
4312 c
->put_tpel_pixels_tab
[ 8] = put_tpel_pixels_mc02_c
;
4313 c
->put_tpel_pixels_tab
[ 9] = put_tpel_pixels_mc12_c
;
4314 c
->put_tpel_pixels_tab
[10] = put_tpel_pixels_mc22_c
;
4316 c
->avg_tpel_pixels_tab
[ 0] = avg_tpel_pixels_mc00_c
;
4317 c
->avg_tpel_pixels_tab
[ 1] = avg_tpel_pixels_mc10_c
;
4318 c
->avg_tpel_pixels_tab
[ 2] = avg_tpel_pixels_mc20_c
;
4319 c
->avg_tpel_pixels_tab
[ 4] = avg_tpel_pixels_mc01_c
;
4320 c
->avg_tpel_pixels_tab
[ 5] = avg_tpel_pixels_mc11_c
;
4321 c
->avg_tpel_pixels_tab
[ 6] = avg_tpel_pixels_mc21_c
;
4322 c
->avg_tpel_pixels_tab
[ 8] = avg_tpel_pixels_mc02_c
;
4323 c
->avg_tpel_pixels_tab
[ 9] = avg_tpel_pixels_mc12_c
;
4324 c
->avg_tpel_pixels_tab
[10] = avg_tpel_pixels_mc22_c
;
4326 #define dspfunc(PFX, IDX, NUM) \
4327 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
4328 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
4329 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
4330 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
4331 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
4332 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
4333 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
4334 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
4335 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
4336 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
4337 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
4338 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
4339 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
4340 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
4341 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
4342 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
4344 dspfunc(put_qpel
, 0, 16);
4345 dspfunc(put_no_rnd_qpel
, 0, 16);
4347 dspfunc(avg_qpel
, 0, 16);
4348 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
4350 dspfunc(put_qpel
, 1, 8);
4351 dspfunc(put_no_rnd_qpel
, 1, 8);
4353 dspfunc(avg_qpel
, 1, 8);
4354 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
4356 dspfunc(put_h264_qpel
, 0, 16);
4357 dspfunc(put_h264_qpel
, 1, 8);
4358 dspfunc(put_h264_qpel
, 2, 4);
4359 dspfunc(put_h264_qpel
, 3, 2);
4360 dspfunc(avg_h264_qpel
, 0, 16);
4361 dspfunc(avg_h264_qpel
, 1, 8);
4362 dspfunc(avg_h264_qpel
, 2, 4);
4365 c
->put_h264_chroma_pixels_tab
[0]= put_h264_chroma_mc8_c
;
4366 c
->put_h264_chroma_pixels_tab
[1]= put_h264_chroma_mc4_c
;
4367 c
->put_h264_chroma_pixels_tab
[2]= put_h264_chroma_mc2_c
;
4368 c
->avg_h264_chroma_pixels_tab
[0]= avg_h264_chroma_mc8_c
;
4369 c
->avg_h264_chroma_pixels_tab
[1]= avg_h264_chroma_mc4_c
;
4370 c
->avg_h264_chroma_pixels_tab
[2]= avg_h264_chroma_mc2_c
;
4371 c
->put_no_rnd_h264_chroma_pixels_tab
[0]= put_no_rnd_h264_chroma_mc8_c
;
4373 c
->weight_h264_pixels_tab
[0]= weight_h264_pixels16x16_c
;
4374 c
->weight_h264_pixels_tab
[1]= weight_h264_pixels16x8_c
;
4375 c
->weight_h264_pixels_tab
[2]= weight_h264_pixels8x16_c
;
4376 c
->weight_h264_pixels_tab
[3]= weight_h264_pixels8x8_c
;
4377 c
->weight_h264_pixels_tab
[4]= weight_h264_pixels8x4_c
;
4378 c
->weight_h264_pixels_tab
[5]= weight_h264_pixels4x8_c
;
4379 c
->weight_h264_pixels_tab
[6]= weight_h264_pixels4x4_c
;
4380 c
->weight_h264_pixels_tab
[7]= weight_h264_pixels4x2_c
;
4381 c
->weight_h264_pixels_tab
[8]= weight_h264_pixels2x4_c
;
4382 c
->weight_h264_pixels_tab
[9]= weight_h264_pixels2x2_c
;
4383 c
->biweight_h264_pixels_tab
[0]= biweight_h264_pixels16x16_c
;
4384 c
->biweight_h264_pixels_tab
[1]= biweight_h264_pixels16x8_c
;
4385 c
->biweight_h264_pixels_tab
[2]= biweight_h264_pixels8x16_c
;
4386 c
->biweight_h264_pixels_tab
[3]= biweight_h264_pixels8x8_c
;
4387 c
->biweight_h264_pixels_tab
[4]= biweight_h264_pixels8x4_c
;
4388 c
->biweight_h264_pixels_tab
[5]= biweight_h264_pixels4x8_c
;
4389 c
->biweight_h264_pixels_tab
[6]= biweight_h264_pixels4x4_c
;
4390 c
->biweight_h264_pixels_tab
[7]= biweight_h264_pixels4x2_c
;
4391 c
->biweight_h264_pixels_tab
[8]= biweight_h264_pixels2x4_c
;
4392 c
->biweight_h264_pixels_tab
[9]= biweight_h264_pixels2x2_c
;
4394 c
->draw_edges
= draw_edges_c
;
4396 #ifdef CONFIG_CAVS_DECODER
4397 ff_cavsdsp_init(c
,avctx
);
4399 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4400 ff_vc1dsp_init(c
,avctx
);
4402 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4403 ff_intrax8dsp_init(c
,avctx
);
4405 #if defined(CONFIG_H264_ENCODER)
4406 ff_h264dspenc_init(c
,avctx
);
4409 c
->put_mspel_pixels_tab
[0]= put_mspel8_mc00_c
;
4410 c
->put_mspel_pixels_tab
[1]= put_mspel8_mc10_c
;
4411 c
->put_mspel_pixels_tab
[2]= put_mspel8_mc20_c
;
4412 c
->put_mspel_pixels_tab
[3]= put_mspel8_mc30_c
;
4413 c
->put_mspel_pixels_tab
[4]= put_mspel8_mc02_c
;
4414 c
->put_mspel_pixels_tab
[5]= put_mspel8_mc12_c
;
4415 c
->put_mspel_pixels_tab
[6]= put_mspel8_mc22_c
;
4416 c
->put_mspel_pixels_tab
[7]= put_mspel8_mc32_c
;
4418 #define SET_CMP_FUNC(name) \
4419 c->name[0]= name ## 16_c;\
4420 c->name[1]= name ## 8x8_c;
4422 SET_CMP_FUNC(hadamard8_diff
)
4423 c
->hadamard8_diff
[4]= hadamard8_intra16_c
;
4424 SET_CMP_FUNC(dct_sad
)
4425 SET_CMP_FUNC(dct_max
)
4427 SET_CMP_FUNC(dct264_sad
)
4429 c
->sad
[0]= pix_abs16_c
;
4430 c
->sad
[1]= pix_abs8_c
;
4434 SET_CMP_FUNC(quant_psnr
)
4437 c
->vsad
[0]= vsad16_c
;
4438 c
->vsad
[4]= vsad_intra16_c
;
4439 c
->vsse
[0]= vsse16_c
;
4440 c
->vsse
[4]= vsse_intra16_c
;
4441 c
->nsse
[0]= nsse16_c
;
4442 c
->nsse
[1]= nsse8_c
;
4443 #ifdef CONFIG_SNOW_ENCODER
4444 c
->w53
[0]= w53_16_c
;
4446 c
->w97
[0]= w97_16_c
;
4450 c
->ssd_int8_vs_int16
= ssd_int8_vs_int16_c
;
4452 c
->add_bytes
= add_bytes_c
;
4453 c
->add_bytes_l2
= add_bytes_l2_c
;
4454 c
->diff_bytes
= diff_bytes_c
;
4455 c
->sub_hfyu_median_prediction
= sub_hfyu_median_prediction_c
;
4456 c
->bswap_buf
= bswap_buf
;
4457 #ifdef CONFIG_PNG_DECODER
4458 c
->add_png_paeth_prediction
= ff_add_png_paeth_prediction
;
4461 c
->h264_v_loop_filter_luma
= h264_v_loop_filter_luma_c
;
4462 c
->h264_h_loop_filter_luma
= h264_h_loop_filter_luma_c
;
4463 c
->h264_v_loop_filter_chroma
= h264_v_loop_filter_chroma_c
;
4464 c
->h264_h_loop_filter_chroma
= h264_h_loop_filter_chroma_c
;
4465 c
->h264_v_loop_filter_chroma_intra
= h264_v_loop_filter_chroma_intra_c
;
4466 c
->h264_h_loop_filter_chroma_intra
= h264_h_loop_filter_chroma_intra_c
;
4467 c
->h264_loop_filter_strength
= NULL
;
4469 if (ENABLE_ANY_H263
) {
4470 c
->h263_h_loop_filter
= h263_h_loop_filter_c
;
4471 c
->h263_v_loop_filter
= h263_v_loop_filter_c
;
4474 c
->h261_loop_filter
= h261_loop_filter_c
;
4476 c
->try_8x8basis
= try_8x8basis_c
;
4477 c
->add_8x8basis
= add_8x8basis_c
;
4479 #ifdef CONFIG_SNOW_DECODER
4480 c
->vertical_compose97i
= ff_snow_vertical_compose97i
;
4481 c
->horizontal_compose97i
= ff_snow_horizontal_compose97i
;
4482 c
->inner_add_yblock
= ff_snow_inner_add_yblock
;
4485 #ifdef CONFIG_VORBIS_DECODER
4486 c
->vorbis_inverse_coupling
= vorbis_inverse_coupling
;
4488 #ifdef CONFIG_AC3_DECODER
4489 c
->ac3_downmix
= ff_ac3_downmix_c
;
4491 #ifdef CONFIG_FLAC_ENCODER
4492 c
->flac_compute_autocorr
= ff_flac_compute_autocorr
;
4494 c
->vector_fmul
= vector_fmul_c
;
4495 c
->vector_fmul_reverse
= vector_fmul_reverse_c
;
4496 c
->vector_fmul_add_add
= ff_vector_fmul_add_add_c
;
4497 c
->vector_fmul_window
= ff_vector_fmul_window_c
;
4498 c
->int32_to_float_fmul_scalar
= int32_to_float_fmul_scalar_c
;
4499 c
->float_to_int16
= ff_float_to_int16_c
;
4500 c
->float_to_int16_interleave
= ff_float_to_int16_interleave_c
;
4501 c
->add_int16
= add_int16_c
;
4502 c
->sub_int16
= sub_int16_c
;
4503 c
->scalarproduct_int16
= scalarproduct_int16_c
;
4505 c
->shrink
[0]= ff_img_copy_plane
;
4506 c
->shrink
[1]= ff_shrink22
;
4507 c
->shrink
[2]= ff_shrink44
;
4508 c
->shrink
[3]= ff_shrink88
;
4510 c
->prefetch
= just_return
;
4512 memset(c
->put_2tap_qpel_pixels_tab
, 0, sizeof(c
->put_2tap_qpel_pixels_tab
));
4513 memset(c
->avg_2tap_qpel_pixels_tab
, 0, sizeof(c
->avg_2tap_qpel_pixels_tab
));
4515 if (ENABLE_MMX
) dsputil_init_mmx (c
, avctx
);
4516 if (ENABLE_ARMV4L
) dsputil_init_armv4l(c
, avctx
);
4517 if (ENABLE_MLIB
) dsputil_init_mlib (c
, avctx
);
4518 if (ENABLE_VIS
) dsputil_init_vis (c
, avctx
);
4519 if (ENABLE_ALPHA
) dsputil_init_alpha (c
, avctx
);
4520 if (ENABLE_POWERPC
) dsputil_init_ppc (c
, avctx
);
4521 if (ENABLE_MMI
) dsputil_init_mmi (c
, avctx
);
4522 if (ENABLE_SH4
) dsputil_init_sh4 (c
, avctx
);
4523 if (ENABLE_BFIN
) dsputil_init_bfin (c
, avctx
);
4525 for(i
=0; i
<64; i
++){
4526 if(!c
->put_2tap_qpel_pixels_tab
[0][i
])
4527 c
->put_2tap_qpel_pixels_tab
[0][i
]= c
->put_h264_qpel_pixels_tab
[0][i
];
4528 if(!c
->avg_2tap_qpel_pixels_tab
[0][i
])
4529 c
->avg_2tap_qpel_pixels_tab
[0][i
]= c
->avg_h264_qpel_pixels_tab
[0][i
];
4532 switch(c
->idct_permutation_type
){
4533 case FF_NO_IDCT_PERM
:
4535 c
->idct_permutation
[i
]= i
;
4537 case FF_LIBMPEG2_IDCT_PERM
:
4539 c
->idct_permutation
[i
]= (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
4541 case FF_SIMPLE_IDCT_PERM
:
4543 c
->idct_permutation
[i
]= simple_mmx_permutation
[i
];
4545 case FF_TRANSPOSE_IDCT_PERM
:
4547 c
->idct_permutation
[i
]= ((i
&7)<<3) | (i
>>3);
4549 case FF_PARTTRANS_IDCT_PERM
:
4551 c
->idct_permutation
[i
]= (i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3);
4553 case FF_SSE2_IDCT_PERM
:
4555 c
->idct_permutation
[i
]= (i
&0x38) | idct_sse2_row_perm
[i
&7];
4558 av_log(avctx
, AV_LOG_ERROR
, "Internal error, IDCT permutation not set\n");