2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264pred.c
24 * H.264 / AVC / MPEG4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
29 #include "mpegvideo.h"
32 static void pred4x4_vertical_c(uint8_t *src
, uint8_t *topright
, int stride
){
33 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
34 ((uint32_t*)(src
+0*stride
))[0]= a
;
35 ((uint32_t*)(src
+1*stride
))[0]= a
;
36 ((uint32_t*)(src
+2*stride
))[0]= a
;
37 ((uint32_t*)(src
+3*stride
))[0]= a
;
40 static void pred4x4_horizontal_c(uint8_t *src
, uint8_t *topright
, int stride
){
41 ((uint32_t*)(src
+0*stride
))[0]= src
[-1+0*stride
]*0x01010101;
42 ((uint32_t*)(src
+1*stride
))[0]= src
[-1+1*stride
]*0x01010101;
43 ((uint32_t*)(src
+2*stride
))[0]= src
[-1+2*stride
]*0x01010101;
44 ((uint32_t*)(src
+3*stride
))[0]= src
[-1+3*stride
]*0x01010101;
47 static void pred4x4_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
48 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
49 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
51 ((uint32_t*)(src
+0*stride
))[0]=
52 ((uint32_t*)(src
+1*stride
))[0]=
53 ((uint32_t*)(src
+2*stride
))[0]=
54 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
57 static void pred4x4_left_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
58 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
60 ((uint32_t*)(src
+0*stride
))[0]=
61 ((uint32_t*)(src
+1*stride
))[0]=
62 ((uint32_t*)(src
+2*stride
))[0]=
63 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
66 static void pred4x4_top_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
67 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
69 ((uint32_t*)(src
+0*stride
))[0]=
70 ((uint32_t*)(src
+1*stride
))[0]=
71 ((uint32_t*)(src
+2*stride
))[0]=
72 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
75 static void pred4x4_128_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
76 ((uint32_t*)(src
+0*stride
))[0]=
77 ((uint32_t*)(src
+1*stride
))[0]=
78 ((uint32_t*)(src
+2*stride
))[0]=
79 ((uint32_t*)(src
+3*stride
))[0]= 128U*0x01010101U
;
83 #define LOAD_TOP_RIGHT_EDGE\
84 const int av_unused t4= topright[0];\
85 const int av_unused t5= topright[1];\
86 const int av_unused t6= topright[2];\
87 const int av_unused t7= topright[3];\
89 #define LOAD_DOWN_LEFT_EDGE\
90 const int av_unused l4= src[-1+4*stride];\
91 const int av_unused l5= src[-1+5*stride];\
92 const int av_unused l6= src[-1+6*stride];\
93 const int av_unused l7= src[-1+7*stride];\
95 #define LOAD_LEFT_EDGE\
96 const int av_unused l0= src[-1+0*stride];\
97 const int av_unused l1= src[-1+1*stride];\
98 const int av_unused l2= src[-1+2*stride];\
99 const int av_unused l3= src[-1+3*stride];\
101 #define LOAD_TOP_EDGE\
102 const int av_unused t0= src[ 0-1*stride];\
103 const int av_unused t1= src[ 1-1*stride];\
104 const int av_unused t2= src[ 2-1*stride];\
105 const int av_unused t3= src[ 3-1*stride];\
107 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
108 const int lt
= src
[-1-1*stride
];
112 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
114 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
117 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
121 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
124 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
126 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
127 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
130 static void pred4x4_down_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
135 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
137 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
140 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
144 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
147 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
149 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
150 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
153 static void pred4x4_down_left_svq3_c(uint8_t *src
, uint8_t *topright
, int stride
){
156 const av_unused
int unu0
= t0
;
157 const av_unused
int unu1
= l0
;
159 src
[0+0*stride
]=(l1
+ t1
)>>1;
161 src
[0+1*stride
]=(l2
+ t2
)>>1;
174 src
[3+3*stride
]=(l3
+ t3
)>>1;
177 static void pred4x4_down_left_rv40_c(uint8_t *src
, uint8_t *topright
, int stride
){
183 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2 + l0
+ l2
+ 2*l1
+ 2)>>3;
185 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2 + l1
+ l3
+ 2*l2
+ 2)>>3;
188 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2 + l2
+ l4
+ 2*l3
+ 2)>>3;
192 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2 + l3
+ l5
+ 2*l4
+ 2)>>3;
195 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2 + l4
+ l6
+ 2*l5
+ 2)>>3;
197 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2 + l5
+ l7
+ 2*l6
+ 2)>>3;
198 src
[3+3*stride
]=(t6
+ t7
+ 1 + l6
+ l7
+ 1)>>2;
201 static void pred4x4_down_left_rv40_nodown_c(uint8_t *src
, uint8_t *topright
, int stride
){
206 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2 + l0
+ l2
+ 2*l1
+ 2)>>3;
208 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2 + l1
+ l3
+ 2*l2
+ 2)>>3;
211 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2 + l2
+ 3*l3
+ 2)>>3;
215 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2 + l3
*4 + 2)>>3;
218 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2 + l3
*4 + 2)>>3;
220 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2 + l3
*4 + 2)>>3;
221 src
[3+3*stride
]=(t6
+ t7
+ 1 + 2*l3
+ 1)>>2;
224 static void pred4x4_vertical_right_c(uint8_t *src
, uint8_t *topright
, int stride
){
225 const int lt
= src
[-1-1*stride
];
230 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
232 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
234 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
235 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
237 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
239 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
241 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
242 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
243 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
244 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
247 static void pred4x4_vertical_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
251 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
253 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
255 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
257 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
258 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
259 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
261 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
263 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
265 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
266 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
269 static void pred4x4_vertical_left_rv40(uint8_t *src
, uint8_t *topright
, int stride
,
270 const int l0
, const int l1
, const int l2
, const int l3
, const int l4
){
274 src
[0+0*stride
]=(2*t0
+ 2*t1
+ l1
+ 2*l2
+ l3
+ 4)>>3;
276 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
278 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
280 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
281 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
282 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ l2
+ 2*l3
+ l4
+ 4)>>3;
284 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
286 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
288 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
289 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
292 static void pred4x4_vertical_left_rv40_c(uint8_t *src
, uint8_t *topright
, int stride
){
296 pred4x4_vertical_left_rv40(src
, topright
, stride
, l0
, l1
, l2
, l3
, l4
);
299 static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src
, uint8_t *topright
, int stride
){
302 pred4x4_vertical_left_rv40(src
, topright
, stride
, l0
, l1
, l2
, l3
, l3
);
305 static void pred4x4_horizontal_up_c(uint8_t *src
, uint8_t *topright
, int stride
){
308 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
309 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
311 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
313 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
315 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
317 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
326 static void pred4x4_horizontal_up_rv40_c(uint8_t *src
, uint8_t *topright
, int stride
){
332 src
[0+0*stride
]=(t1
+ 2*t2
+ t3
+ 2*l0
+ 2*l1
+ 4)>>3;
333 src
[1+0*stride
]=(t2
+ 2*t3
+ t4
+ l0
+ 2*l1
+ l2
+ 4)>>3;
335 src
[0+1*stride
]=(t3
+ 2*t4
+ t5
+ 2*l1
+ 2*l2
+ 4)>>3;
337 src
[1+1*stride
]=(t4
+ 2*t5
+ t6
+ l1
+ 2*l2
+ l3
+ 4)>>3;
339 src
[0+2*stride
]=(t5
+ 2*t6
+ t7
+ 2*l2
+ 2*l3
+ 4)>>3;
341 src
[1+2*stride
]=(t6
+ 3*t7
+ l2
+ 3*l3
+ 4)>>3;
343 src
[1+3*stride
]=(l3
+ 2*l4
+ l5
+ 2)>>2;
345 src
[2+2*stride
]=(t6
+ t7
+ l3
+ l4
+ 2)>>2;
346 src
[2+3*stride
]=(l4
+ l5
+ 1)>>1;
347 src
[3+3*stride
]=(l4
+ 2*l5
+ l6
+ 2)>>2;
350 static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src
, uint8_t *topright
, int stride
){
355 src
[0+0*stride
]=(t1
+ 2*t2
+ t3
+ 2*l0
+ 2*l1
+ 4)>>3;
356 src
[1+0*stride
]=(t2
+ 2*t3
+ t4
+ l0
+ 2*l1
+ l2
+ 4)>>3;
358 src
[0+1*stride
]=(t3
+ 2*t4
+ t5
+ 2*l1
+ 2*l2
+ 4)>>3;
360 src
[1+1*stride
]=(t4
+ 2*t5
+ t6
+ l1
+ 2*l2
+ l3
+ 4)>>3;
362 src
[0+2*stride
]=(t5
+ 2*t6
+ t7
+ 2*l2
+ 2*l3
+ 4)>>3;
364 src
[1+2*stride
]=(t6
+ 3*t7
+ l2
+ 3*l3
+ 4)>>3;
368 src
[2+2*stride
]=(t6
+ t7
+ 2*l3
+ 2)>>2;
373 static void pred4x4_horizontal_down_c(uint8_t *src
, uint8_t *topright
, int stride
){
374 const int lt
= src
[-1-1*stride
];
379 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
381 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
382 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
383 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
385 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
387 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
389 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
391 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
392 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
393 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
396 static void pred16x16_vertical_c(uint8_t *src
, int stride
){
398 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
399 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
400 const uint32_t c
= ((uint32_t*)(src
-stride
))[2];
401 const uint32_t d
= ((uint32_t*)(src
-stride
))[3];
404 ((uint32_t*)(src
+i
*stride
))[0]= a
;
405 ((uint32_t*)(src
+i
*stride
))[1]= b
;
406 ((uint32_t*)(src
+i
*stride
))[2]= c
;
407 ((uint32_t*)(src
+i
*stride
))[3]= d
;
411 static void pred16x16_horizontal_c(uint8_t *src
, int stride
){
415 ((uint32_t*)(src
+i
*stride
))[0]=
416 ((uint32_t*)(src
+i
*stride
))[1]=
417 ((uint32_t*)(src
+i
*stride
))[2]=
418 ((uint32_t*)(src
+i
*stride
))[3]= src
[-1+i
*stride
]*0x01010101;
422 static void pred16x16_dc_c(uint8_t *src
, int stride
){
426 dc
+= src
[-1+i
*stride
];
433 dc
= 0x01010101*((dc
+ 16)>>5);
436 ((uint32_t*)(src
+i
*stride
))[0]=
437 ((uint32_t*)(src
+i
*stride
))[1]=
438 ((uint32_t*)(src
+i
*stride
))[2]=
439 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
443 static void pred16x16_left_dc_c(uint8_t *src
, int stride
){
447 dc
+= src
[-1+i
*stride
];
450 dc
= 0x01010101*((dc
+ 8)>>4);
453 ((uint32_t*)(src
+i
*stride
))[0]=
454 ((uint32_t*)(src
+i
*stride
))[1]=
455 ((uint32_t*)(src
+i
*stride
))[2]=
456 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
460 static void pred16x16_top_dc_c(uint8_t *src
, int stride
){
466 dc
= 0x01010101*((dc
+ 8)>>4);
469 ((uint32_t*)(src
+i
*stride
))[0]=
470 ((uint32_t*)(src
+i
*stride
))[1]=
471 ((uint32_t*)(src
+i
*stride
))[2]=
472 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
476 static void pred16x16_128_dc_c(uint8_t *src
, int stride
){
480 ((uint32_t*)(src
+i
*stride
))[0]=
481 ((uint32_t*)(src
+i
*stride
))[1]=
482 ((uint32_t*)(src
+i
*stride
))[2]=
483 ((uint32_t*)(src
+i
*stride
))[3]= 0x01010101U
*128U;
487 static inline void pred16x16_plane_compat_c(uint8_t *src
, int stride
, const int svq3
, const int rv40
){
490 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
491 const uint8_t * const src0
= src
+7-stride
;
492 const uint8_t *src1
= src
+8*stride
-1;
493 const uint8_t *src2
= src1
-2*stride
; // == src+6*stride-1;
494 int H
= src0
[1] - src0
[-1];
495 int V
= src1
[0] - src2
[ 0];
496 for(k
=2; k
<=8; ++k
) {
497 src1
+= stride
; src2
-= stride
;
498 H
+= k
*(src0
[k
] - src0
[-k
]);
499 V
+= k
*(src1
[0] - src2
[ 0]);
502 H
= ( 5*(H
/4) ) / 16;
503 V
= ( 5*(V
/4) ) / 16;
505 /* required for 100% accuracy */
508 H
= ( H
+ (H
>>2) ) >> 4;
509 V
= ( V
+ (V
>>2) ) >> 4;
515 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
516 for(j
=16; j
>0; --j
) {
519 for(i
=-16; i
<0; i
+=4) {
520 src
[16+i
] = cm
[ (b
) >> 5 ];
521 src
[17+i
] = cm
[ (b
+ H
) >> 5 ];
522 src
[18+i
] = cm
[ (b
+2*H
) >> 5 ];
523 src
[19+i
] = cm
[ (b
+3*H
) >> 5 ];
530 static void pred16x16_plane_c(uint8_t *src
, int stride
){
531 pred16x16_plane_compat_c(src
, stride
, 0, 0);
534 static void pred16x16_plane_svq3_c(uint8_t *src
, int stride
){
535 pred16x16_plane_compat_c(src
, stride
, 1, 0);
538 static void pred16x16_plane_rv40_c(uint8_t *src
, int stride
){
539 pred16x16_plane_compat_c(src
, stride
, 0, 1);
542 static void pred8x8_vertical_c(uint8_t *src
, int stride
){
544 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
545 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
548 ((uint32_t*)(src
+i
*stride
))[0]= a
;
549 ((uint32_t*)(src
+i
*stride
))[1]= b
;
553 static void pred8x8_horizontal_c(uint8_t *src
, int stride
){
557 ((uint32_t*)(src
+i
*stride
))[0]=
558 ((uint32_t*)(src
+i
*stride
))[1]= src
[-1+i
*stride
]*0x01010101;
562 static void pred8x8_128_dc_c(uint8_t *src
, int stride
){
566 ((uint32_t*)(src
+i
*stride
))[0]=
567 ((uint32_t*)(src
+i
*stride
))[1]= 0x01010101U
*128U;
571 static void pred8x8_left_dc_c(uint8_t *src
, int stride
){
577 dc0
+= src
[-1+i
*stride
];
578 dc2
+= src
[-1+(i
+4)*stride
];
580 dc0
= 0x01010101*((dc0
+ 2)>>2);
581 dc2
= 0x01010101*((dc2
+ 2)>>2);
584 ((uint32_t*)(src
+i
*stride
))[0]=
585 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
588 ((uint32_t*)(src
+i
*stride
))[0]=
589 ((uint32_t*)(src
+i
*stride
))[1]= dc2
;
593 static void pred8x8_left_dc_rv40_c(uint8_t *src
, int stride
){
599 dc0
+= src
[-1+i
*stride
];
600 dc0
= 0x01010101*((dc0
+ 4)>>3);
603 ((uint32_t*)(src
+i
*stride
))[0]=
604 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
608 static void pred8x8_top_dc_c(uint8_t *src
, int stride
){
615 dc1
+= src
[4+i
-stride
];
617 dc0
= 0x01010101*((dc0
+ 2)>>2);
618 dc1
= 0x01010101*((dc1
+ 2)>>2);
621 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
622 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
625 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
626 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
630 static void pred8x8_top_dc_rv40_c(uint8_t *src
, int stride
){
637 dc0
= 0x01010101*((dc0
+ 4)>>3);
640 ((uint32_t*)(src
+i
*stride
))[0]=
641 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
646 static void pred8x8_dc_c(uint8_t *src
, int stride
){
648 int dc0
, dc1
, dc2
, dc3
;
652 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
653 dc1
+= src
[4+i
-stride
];
654 dc2
+= src
[-1+(i
+4)*stride
];
656 dc3
= 0x01010101*((dc1
+ dc2
+ 4)>>3);
657 dc0
= 0x01010101*((dc0
+ 4)>>3);
658 dc1
= 0x01010101*((dc1
+ 2)>>2);
659 dc2
= 0x01010101*((dc2
+ 2)>>2);
662 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
663 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
666 ((uint32_t*)(src
+i
*stride
))[0]= dc2
;
667 ((uint32_t*)(src
+i
*stride
))[1]= dc3
;
671 //the following 4 function should not be optimized!
672 static void pred8x8_mad_cow_dc_l0t(uint8_t *src
, int stride
){
673 pred8x8_top_dc_c(src
, stride
);
674 pred4x4_dc_c(src
, NULL
, stride
);
677 static void pred8x8_mad_cow_dc_0lt(uint8_t *src
, int stride
){
678 pred8x8_dc_c(src
, stride
);
679 pred4x4_top_dc_c(src
, NULL
, stride
);
682 static void pred8x8_mad_cow_dc_l00(uint8_t *src
, int stride
){
683 pred8x8_left_dc_c(src
, stride
);
684 pred4x4_128_dc_c(src
+ 4*stride
, NULL
, stride
);
685 pred4x4_128_dc_c(src
+ 4*stride
+ 4, NULL
, stride
);
688 static void pred8x8_mad_cow_dc_0l0(uint8_t *src
, int stride
){
689 pred8x8_left_dc_c(src
, stride
);
690 pred4x4_128_dc_c(src
, NULL
, stride
);
691 pred4x4_128_dc_c(src
+ 4, NULL
, stride
);
694 static void pred8x8_dc_rv40_c(uint8_t *src
, int stride
){
699 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
700 dc0
+= src
[4+i
-stride
];
701 dc0
+= src
[-1+(i
+4)*stride
];
703 dc0
= 0x01010101*((dc0
+ 8)>>4);
706 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
707 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
710 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
711 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
715 static void pred8x8_plane_c(uint8_t *src
, int stride
){
718 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
719 const uint8_t * const src0
= src
+3-stride
;
720 const uint8_t *src1
= src
+4*stride
-1;
721 const uint8_t *src2
= src1
-2*stride
; // == src+2*stride-1;
722 int H
= src0
[1] - src0
[-1];
723 int V
= src1
[0] - src2
[ 0];
724 for(k
=2; k
<=4; ++k
) {
725 src1
+= stride
; src2
-= stride
;
726 H
+= k
*(src0
[k
] - src0
[-k
]);
727 V
+= k
*(src1
[0] - src2
[ 0]);
729 H
= ( 17*H
+16 ) >> 5;
730 V
= ( 17*V
+16 ) >> 5;
732 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
736 src
[0] = cm
[ (b
) >> 5 ];
737 src
[1] = cm
[ (b
+ H
) >> 5 ];
738 src
[2] = cm
[ (b
+2*H
) >> 5 ];
739 src
[3] = cm
[ (b
+3*H
) >> 5 ];
740 src
[4] = cm
[ (b
+4*H
) >> 5 ];
741 src
[5] = cm
[ (b
+5*H
) >> 5 ];
742 src
[6] = cm
[ (b
+6*H
) >> 5 ];
743 src
[7] = cm
[ (b
+7*H
) >> 5 ];
748 #define SRC(x,y) src[(x)+(y)*stride]
750 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
751 #define PREDICT_8x8_LOAD_LEFT \
752 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
753 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
754 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
755 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
758 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
759 #define PREDICT_8x8_LOAD_TOP \
760 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
761 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
762 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
763 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
764 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
767 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
768 #define PREDICT_8x8_LOAD_TOPRIGHT \
769 int t8, t9, t10, t11, t12, t13, t14, t15; \
771 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
772 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
773 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
775 #define PREDICT_8x8_LOAD_TOPLEFT \
776 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
778 #define PREDICT_8x8_DC(v) \
780 for( y = 0; y < 8; y++ ) { \
781 ((uint32_t*)src)[0] = \
782 ((uint32_t*)src)[1] = v; \
786 static void pred8x8l_128_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
788 PREDICT_8x8_DC(0x80808080);
790 static void pred8x8l_left_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
792 PREDICT_8x8_LOAD_LEFT
;
793 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3) * 0x01010101;
796 static void pred8x8l_top_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
798 PREDICT_8x8_LOAD_TOP
;
799 const uint32_t dc
= ((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3) * 0x01010101;
802 static void pred8x8l_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
804 PREDICT_8x8_LOAD_LEFT
;
805 PREDICT_8x8_LOAD_TOP
;
806 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
807 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4) * 0x01010101;
810 static void pred8x8l_horizontal_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
812 PREDICT_8x8_LOAD_LEFT
;
813 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
814 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
815 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
818 static void pred8x8l_vertical_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
821 PREDICT_8x8_LOAD_TOP
;
830 for( y
= 1; y
< 8; y
++ )
831 *(uint64_t*)(src
+y
*stride
) = *(uint64_t*)src
;
833 static void pred8x8l_down_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
835 PREDICT_8x8_LOAD_TOP
;
836 PREDICT_8x8_LOAD_TOPRIGHT
;
837 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
838 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
839 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
840 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
841 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
842 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
843 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
844 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
845 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
846 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
847 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
848 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
849 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
850 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
851 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
853 static void pred8x8l_down_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
855 PREDICT_8x8_LOAD_TOP
;
856 PREDICT_8x8_LOAD_LEFT
;
857 PREDICT_8x8_LOAD_TOPLEFT
;
858 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
859 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
860 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
861 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
862 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
863 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
864 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
865 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
866 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
867 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
868 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
869 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
870 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
871 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
872 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
875 static void pred8x8l_vertical_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
877 PREDICT_8x8_LOAD_TOP
;
878 PREDICT_8x8_LOAD_LEFT
;
879 PREDICT_8x8_LOAD_TOPLEFT
;
880 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
881 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
882 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
883 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
884 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
885 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
886 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
887 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
888 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
889 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
890 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
891 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
892 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
893 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
894 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
895 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
896 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
897 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
898 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
899 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
900 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
901 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
903 static void pred8x8l_horizontal_down_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
905 PREDICT_8x8_LOAD_TOP
;
906 PREDICT_8x8_LOAD_LEFT
;
907 PREDICT_8x8_LOAD_TOPLEFT
;
908 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
909 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
910 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
911 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
912 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
913 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
914 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
915 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
916 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
917 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
918 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
919 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
920 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
921 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
922 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
923 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
924 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
925 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
926 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
927 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
928 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
929 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
931 static void pred8x8l_vertical_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
933 PREDICT_8x8_LOAD_TOP
;
934 PREDICT_8x8_LOAD_TOPRIGHT
;
935 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
936 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
937 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
938 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
939 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
940 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
941 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
942 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
943 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
944 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
945 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
946 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
947 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
948 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
949 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
950 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
951 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
952 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
953 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
954 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
955 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
956 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
958 static void pred8x8l_horizontal_up_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
960 PREDICT_8x8_LOAD_LEFT
;
961 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
962 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
963 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
964 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
965 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
966 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
967 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
968 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
969 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
970 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
971 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
972 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
973 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
974 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
975 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
976 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
977 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
978 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
980 #undef PREDICT_8x8_LOAD_LEFT
981 #undef PREDICT_8x8_LOAD_TOP
982 #undef PREDICT_8x8_LOAD_TOPLEFT
983 #undef PREDICT_8x8_LOAD_TOPRIGHT
984 #undef PREDICT_8x8_DC
990 static void pred4x4_vertical_add_c(uint8_t *pix
, const DCTELEM
*block
, int stride
){
995 pix
[1*stride
]= v
+= block
[0];
996 pix
[2*stride
]= v
+= block
[4];
997 pix
[3*stride
]= v
+= block
[8];
998 pix
[4*stride
]= v
+ block
[12];
1004 static void pred4x4_horizontal_add_c(uint8_t *pix
, const DCTELEM
*block
, int stride
){
1007 uint8_t v
= pix
[-1];
1008 pix
[0]= v
+= block
[0];
1009 pix
[1]= v
+= block
[1];
1010 pix
[2]= v
+= block
[2];
1011 pix
[3]= v
+ block
[3];
1017 static void pred8x8l_vertical_add_c(uint8_t *pix
, const DCTELEM
*block
, int stride
){
1022 pix
[1*stride
]= v
+= block
[0];
1023 pix
[2*stride
]= v
+= block
[8];
1024 pix
[3*stride
]= v
+= block
[16];
1025 pix
[4*stride
]= v
+= block
[24];
1026 pix
[5*stride
]= v
+= block
[32];
1027 pix
[6*stride
]= v
+= block
[40];
1028 pix
[7*stride
]= v
+= block
[48];
1029 pix
[8*stride
]= v
+ block
[56];
1035 static void pred8x8l_horizontal_add_c(uint8_t *pix
, const DCTELEM
*block
, int stride
){
1038 uint8_t v
= pix
[-1];
1039 pix
[0]= v
+= block
[0];
1040 pix
[1]= v
+= block
[1];
1041 pix
[2]= v
+= block
[2];
1042 pix
[3]= v
+= block
[3];
1043 pix
[4]= v
+= block
[4];
1044 pix
[5]= v
+= block
[5];
1045 pix
[6]= v
+= block
[6];
1046 pix
[7]= v
+ block
[7];
1052 static void pred16x16_vertical_add_c(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
1055 pred4x4_vertical_add_c(pix
+ block_offset
[i
], block
+ i
*16, stride
);
1058 static void pred16x16_horizontal_add_c(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
1061 pred4x4_horizontal_add_c(pix
+ block_offset
[i
], block
+ i
*16, stride
);
1064 static void pred8x8_vertical_add_c(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
1067 pred4x4_vertical_add_c(pix
+ block_offset
[i
], block
+ i
*16, stride
);
1070 static void pred8x8_horizontal_add_c(uint8_t *pix
, const int *block_offset
, const DCTELEM
*block
, int stride
){
1073 pred4x4_horizontal_add_c(pix
+ block_offset
[i
], block
+ i
*16, stride
);
1078 * Sets the intra prediction function pointers.
1080 void ff_h264_pred_init(H264PredContext
*h
, int codec_id
){
1081 // MpegEncContext * const s = &h->s;
1083 if(codec_id
!= CODEC_ID_RV40
){
1084 h
->pred4x4
[VERT_PRED
]= pred4x4_vertical_c
;
1085 h
->pred4x4
[HOR_PRED
]= pred4x4_horizontal_c
;
1086 h
->pred4x4
[DC_PRED
]= pred4x4_dc_c
;
1087 if(codec_id
== CODEC_ID_SVQ3
)
1088 h
->pred4x4
[DIAG_DOWN_LEFT_PRED
]= pred4x4_down_left_svq3_c
;
1090 h
->pred4x4
[DIAG_DOWN_LEFT_PRED
]= pred4x4_down_left_c
;
1091 h
->pred4x4
[DIAG_DOWN_RIGHT_PRED
]= pred4x4_down_right_c
;
1092 h
->pred4x4
[VERT_RIGHT_PRED
]= pred4x4_vertical_right_c
;
1093 h
->pred4x4
[HOR_DOWN_PRED
]= pred4x4_horizontal_down_c
;
1094 h
->pred4x4
[VERT_LEFT_PRED
]= pred4x4_vertical_left_c
;
1095 h
->pred4x4
[HOR_UP_PRED
]= pred4x4_horizontal_up_c
;
1096 h
->pred4x4
[LEFT_DC_PRED
]= pred4x4_left_dc_c
;
1097 h
->pred4x4
[TOP_DC_PRED
]= pred4x4_top_dc_c
;
1098 h
->pred4x4
[DC_128_PRED
]= pred4x4_128_dc_c
;
1100 h
->pred4x4
[VERT_PRED
]= pred4x4_vertical_c
;
1101 h
->pred4x4
[HOR_PRED
]= pred4x4_horizontal_c
;
1102 h
->pred4x4
[DC_PRED
]= pred4x4_dc_c
;
1103 h
->pred4x4
[DIAG_DOWN_LEFT_PRED
]= pred4x4_down_left_rv40_c
;
1104 h
->pred4x4
[DIAG_DOWN_RIGHT_PRED
]= pred4x4_down_right_c
;
1105 h
->pred4x4
[VERT_RIGHT_PRED
]= pred4x4_vertical_right_c
;
1106 h
->pred4x4
[HOR_DOWN_PRED
]= pred4x4_horizontal_down_c
;
1107 h
->pred4x4
[VERT_LEFT_PRED
]= pred4x4_vertical_left_rv40_c
;
1108 h
->pred4x4
[HOR_UP_PRED
]= pred4x4_horizontal_up_rv40_c
;
1109 h
->pred4x4
[LEFT_DC_PRED
]= pred4x4_left_dc_c
;
1110 h
->pred4x4
[TOP_DC_PRED
]= pred4x4_top_dc_c
;
1111 h
->pred4x4
[DC_128_PRED
]= pred4x4_128_dc_c
;
1112 h
->pred4x4
[DIAG_DOWN_LEFT_PRED_RV40_NODOWN
]= pred4x4_down_left_rv40_nodown_c
;
1113 h
->pred4x4
[HOR_UP_PRED_RV40_NODOWN
]= pred4x4_horizontal_up_rv40_nodown_c
;
1114 h
->pred4x4
[VERT_LEFT_PRED_RV40_NODOWN
]= pred4x4_vertical_left_rv40_nodown_c
;
1117 h
->pred8x8l
[VERT_PRED
]= pred8x8l_vertical_c
;
1118 h
->pred8x8l
[HOR_PRED
]= pred8x8l_horizontal_c
;
1119 h
->pred8x8l
[DC_PRED
]= pred8x8l_dc_c
;
1120 h
->pred8x8l
[DIAG_DOWN_LEFT_PRED
]= pred8x8l_down_left_c
;
1121 h
->pred8x8l
[DIAG_DOWN_RIGHT_PRED
]= pred8x8l_down_right_c
;
1122 h
->pred8x8l
[VERT_RIGHT_PRED
]= pred8x8l_vertical_right_c
;
1123 h
->pred8x8l
[HOR_DOWN_PRED
]= pred8x8l_horizontal_down_c
;
1124 h
->pred8x8l
[VERT_LEFT_PRED
]= pred8x8l_vertical_left_c
;
1125 h
->pred8x8l
[HOR_UP_PRED
]= pred8x8l_horizontal_up_c
;
1126 h
->pred8x8l
[LEFT_DC_PRED
]= pred8x8l_left_dc_c
;
1127 h
->pred8x8l
[TOP_DC_PRED
]= pred8x8l_top_dc_c
;
1128 h
->pred8x8l
[DC_128_PRED
]= pred8x8l_128_dc_c
;
1130 h
->pred8x8
[VERT_PRED8x8
]= pred8x8_vertical_c
;
1131 h
->pred8x8
[HOR_PRED8x8
]= pred8x8_horizontal_c
;
1132 h
->pred8x8
[PLANE_PRED8x8
]= pred8x8_plane_c
;
1133 if(codec_id
!= CODEC_ID_RV40
){
1134 h
->pred8x8
[DC_PRED8x8
]= pred8x8_dc_c
;
1135 h
->pred8x8
[LEFT_DC_PRED8x8
]= pred8x8_left_dc_c
;
1136 h
->pred8x8
[TOP_DC_PRED8x8
]= pred8x8_top_dc_c
;
1137 h
->pred8x8
[ALZHEIMER_DC_L0T_PRED8x8
]= pred8x8_mad_cow_dc_l0t
;
1138 h
->pred8x8
[ALZHEIMER_DC_0LT_PRED8x8
]= pred8x8_mad_cow_dc_0lt
;
1139 h
->pred8x8
[ALZHEIMER_DC_L00_PRED8x8
]= pred8x8_mad_cow_dc_l00
;
1140 h
->pred8x8
[ALZHEIMER_DC_0L0_PRED8x8
]= pred8x8_mad_cow_dc_0l0
;
1142 h
->pred8x8
[DC_PRED8x8
]= pred8x8_dc_rv40_c
;
1143 h
->pred8x8
[LEFT_DC_PRED8x8
]= pred8x8_left_dc_rv40_c
;
1144 h
->pred8x8
[TOP_DC_PRED8x8
]= pred8x8_top_dc_rv40_c
;
1146 h
->pred8x8
[DC_128_PRED8x8
]= pred8x8_128_dc_c
;
1148 h
->pred16x16
[DC_PRED8x8
]= pred16x16_dc_c
;
1149 h
->pred16x16
[VERT_PRED8x8
]= pred16x16_vertical_c
;
1150 h
->pred16x16
[HOR_PRED8x8
]= pred16x16_horizontal_c
;
1151 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_c
;
1154 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_svq3_c
;
1157 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_rv40_c
;
1160 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_c
;
1162 h
->pred16x16
[LEFT_DC_PRED8x8
]= pred16x16_left_dc_c
;
1163 h
->pred16x16
[TOP_DC_PRED8x8
]= pred16x16_top_dc_c
;
1164 h
->pred16x16
[DC_128_PRED8x8
]= pred16x16_128_dc_c
;
1166 //special lossless h/v prediction for h264
1167 h
->pred4x4_add
[VERT_PRED
]= pred4x4_vertical_add_c
;
1168 h
->pred4x4_add
[ HOR_PRED
]= pred4x4_horizontal_add_c
;
1169 h
->pred8x8l_add
[VERT_PRED
]= pred8x8l_vertical_add_c
;
1170 h
->pred8x8l_add
[ HOR_PRED
]= pred8x8l_horizontal_add_c
;
1171 h
->pred8x8_add
[VERT_PRED8x8
]= pred8x8_vertical_add_c
;
1172 h
->pred8x8_add
[ HOR_PRED8x8
]= pred8x8_horizontal_add_c
;
1173 h
->pred16x16_add
[VERT_PRED8x8
]= pred16x16_vertical_add_c
;
1174 h
->pred16x16_add
[ HOR_PRED8x8
]= pred16x16_horizontal_add_c
;