2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "bit_depth_template.c"
32 static void FUNCC(pred4x4_vertical
)(uint8_t *_src
, const uint8_t *topright
,
35 pixel
*src
= (pixel
*)_src
;
36 int stride
= _stride
/sizeof(pixel
);
37 const pixel4 a
= AV_RN4PA(src
-stride
);
39 AV_WN4PA(src
+0*stride
, a
);
40 AV_WN4PA(src
+1*stride
, a
);
41 AV_WN4PA(src
+2*stride
, a
);
42 AV_WN4PA(src
+3*stride
, a
);
45 static void FUNCC(pred4x4_horizontal
)(uint8_t *_src
, const uint8_t *topright
,
48 pixel
*src
= (pixel
*)_src
;
49 int stride
= _stride
/sizeof(pixel
);
50 AV_WN4PA(src
+0*stride
, PIXEL_SPLAT_X4(src
[-1+0*stride
]));
51 AV_WN4PA(src
+1*stride
, PIXEL_SPLAT_X4(src
[-1+1*stride
]));
52 AV_WN4PA(src
+2*stride
, PIXEL_SPLAT_X4(src
[-1+2*stride
]));
53 AV_WN4PA(src
+3*stride
, PIXEL_SPLAT_X4(src
[-1+3*stride
]));
56 static void FUNCC(pred4x4_dc
)(uint8_t *_src
, const uint8_t *topright
,
59 pixel
*src
= (pixel
*)_src
;
60 int stride
= _stride
/sizeof(pixel
);
61 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
62 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
63 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
65 AV_WN4PA(src
+0*stride
, a
);
66 AV_WN4PA(src
+1*stride
, a
);
67 AV_WN4PA(src
+2*stride
, a
);
68 AV_WN4PA(src
+3*stride
, a
);
71 static void FUNCC(pred4x4_left_dc
)(uint8_t *_src
, const uint8_t *topright
,
74 pixel
*src
= (pixel
*)_src
;
75 int stride
= _stride
/sizeof(pixel
);
76 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
77 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
79 AV_WN4PA(src
+0*stride
, a
);
80 AV_WN4PA(src
+1*stride
, a
);
81 AV_WN4PA(src
+2*stride
, a
);
82 AV_WN4PA(src
+3*stride
, a
);
85 static void FUNCC(pred4x4_top_dc
)(uint8_t *_src
, const uint8_t *topright
,
88 pixel
*src
= (pixel
*)_src
;
89 int stride
= _stride
/sizeof(pixel
);
90 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
91 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
93 AV_WN4PA(src
+0*stride
, a
);
94 AV_WN4PA(src
+1*stride
, a
);
95 AV_WN4PA(src
+2*stride
, a
);
96 AV_WN4PA(src
+3*stride
, a
);
99 static void FUNCC(pred4x4_128_dc
)(uint8_t *_src
, const uint8_t *topright
,
102 pixel
*src
= (pixel
*)_src
;
103 int stride
= _stride
/sizeof(pixel
);
104 const pixel4 a
= PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1));
106 AV_WN4PA(src
+0*stride
, a
);
107 AV_WN4PA(src
+1*stride
, a
);
108 AV_WN4PA(src
+2*stride
, a
);
109 AV_WN4PA(src
+3*stride
, a
);
112 static void FUNCC(pred4x4_127_dc
)(uint8_t *_src
, const uint8_t *topright
,
115 pixel
*src
= (pixel
*)_src
;
116 int stride
= _stride
/sizeof(pixel
);
117 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))-1);
119 AV_WN4PA(src
+0*stride
, a
);
120 AV_WN4PA(src
+1*stride
, a
);
121 AV_WN4PA(src
+2*stride
, a
);
122 AV_WN4PA(src
+3*stride
, a
);
125 static void FUNCC(pred4x4_129_dc
)(uint8_t *_src
, const uint8_t *topright
,
128 pixel
*src
= (pixel
*)_src
;
129 int stride
= _stride
/sizeof(pixel
);
130 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))+1);
132 AV_WN4PA(src
+0*stride
, a
);
133 AV_WN4PA(src
+1*stride
, a
);
134 AV_WN4PA(src
+2*stride
, a
);
135 AV_WN4PA(src
+3*stride
, a
);
139 #define LOAD_TOP_RIGHT_EDGE\
140 const unsigned av_unused t4 = topright[0];\
141 const unsigned av_unused t5 = topright[1];\
142 const unsigned av_unused t6 = topright[2];\
143 const unsigned av_unused t7 = topright[3];\
145 #define LOAD_DOWN_LEFT_EDGE\
146 const unsigned av_unused l4 = src[-1+4*stride];\
147 const unsigned av_unused l5 = src[-1+5*stride];\
148 const unsigned av_unused l6 = src[-1+6*stride];\
149 const unsigned av_unused l7 = src[-1+7*stride];\
151 #define LOAD_LEFT_EDGE\
152 const unsigned av_unused l0 = src[-1+0*stride];\
153 const unsigned av_unused l1 = src[-1+1*stride];\
154 const unsigned av_unused l2 = src[-1+2*stride];\
155 const unsigned av_unused l3 = src[-1+3*stride];\
157 #define LOAD_TOP_EDGE\
158 const unsigned av_unused t0 = src[ 0-1*stride];\
159 const unsigned av_unused t1 = src[ 1-1*stride];\
160 const unsigned av_unused t2 = src[ 2-1*stride];\
161 const unsigned av_unused t3 = src[ 3-1*stride];\
163 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
166 pixel
*src
= (pixel
*)_src
;
167 int stride
= _stride
/sizeof(pixel
);
168 const int lt
= src
[-1-1*stride
];
172 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
174 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
177 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
181 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
184 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
186 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
187 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
190 static void FUNCC(pred4x4_down_left
)(uint8_t *_src
, const uint8_t *_topright
,
193 pixel
*src
= (pixel
*)_src
;
194 const pixel
*topright
= (const pixel
*)_topright
;
195 int stride
= _stride
/sizeof(pixel
);
200 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
202 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
205 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
209 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
212 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
214 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
215 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
218 static void FUNCC(pred4x4_vertical_right
)(uint8_t *_src
,
219 const uint8_t *topright
,
222 pixel
*src
= (pixel
*)_src
;
223 int stride
= _stride
/sizeof(pixel
);
224 const int lt
= src
[-1-1*stride
];
229 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
231 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
233 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
234 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
236 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
238 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
240 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
241 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
242 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
243 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
246 static void FUNCC(pred4x4_vertical_left
)(uint8_t *_src
,
247 const uint8_t *_topright
,
250 pixel
*src
= (pixel
*)_src
;
251 const pixel
*topright
= (const pixel
*)_topright
;
252 int stride
= _stride
/sizeof(pixel
);
256 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
258 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
260 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
262 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
263 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
264 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
266 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
268 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
270 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
271 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
274 static void FUNCC(pred4x4_horizontal_up
)(uint8_t *_src
, const uint8_t *topright
,
277 pixel
*src
= (pixel
*)_src
;
278 int stride
= _stride
/sizeof(pixel
);
281 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
282 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
284 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
286 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
288 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
290 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
299 static void FUNCC(pred4x4_horizontal_down
)(uint8_t *_src
,
300 const uint8_t *topright
,
303 pixel
*src
= (pixel
*)_src
;
304 int stride
= _stride
/sizeof(pixel
);
305 const int lt
= src
[-1-1*stride
];
310 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
312 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
313 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
314 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
316 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
318 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
320 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
322 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
323 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
324 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
327 static void FUNCC(pred16x16_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
330 pixel
*src
= (pixel
*)_src
;
331 int stride
= _stride
/sizeof(pixel
);
332 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
333 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
334 const pixel4 c
= AV_RN4PA(((pixel4
*)(src
-stride
))+2);
335 const pixel4 d
= AV_RN4PA(((pixel4
*)(src
-stride
))+3);
338 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
339 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
340 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, c
);
341 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, d
);
345 static void FUNCC(pred16x16_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
348 pixel
*src
= (pixel
*)_src
;
349 stride
/= sizeof(pixel
);
352 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
354 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
355 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
356 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, a
);
357 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, a
);
361 #define PREDICT_16x16_DC(v)\
362 for(i=0; i<16; i++){\
363 AV_WN4PA(src+ 0, v);\
364 AV_WN4PA(src+ 4, v);\
365 AV_WN4PA(src+ 8, v);\
366 AV_WN4PA(src+12, v);\
370 static void FUNCC(pred16x16_dc
)(uint8_t *_src
, ptrdiff_t stride
)
373 pixel
*src
= (pixel
*)_src
;
375 stride
/= sizeof(pixel
);
378 dc
+= src
[-1+i
*stride
];
385 dcsplat
= PIXEL_SPLAT_X4((dc
+16)>>5);
386 PREDICT_16x16_DC(dcsplat
);
389 static void FUNCC(pred16x16_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
392 pixel
*src
= (pixel
*)_src
;
394 stride
/= sizeof(pixel
);
397 dc
+= src
[-1+i
*stride
];
400 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
401 PREDICT_16x16_DC(dcsplat
);
404 static void FUNCC(pred16x16_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
407 pixel
*src
= (pixel
*)_src
;
409 stride
/= sizeof(pixel
);
415 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
416 PREDICT_16x16_DC(dcsplat
);
419 #define PRED16x16_X(n, v) \
420 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
423 pixel *src = (pixel*)_src;\
424 stride /= sizeof(pixel);\
425 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
428 PRED16x16_X(127, (1<<(BIT_DEPTH
-1))-1)
429 PRED16x16_X(128, (1<<(BIT_DEPTH
-1))+0)
430 PRED16x16_X(129, (1<<(BIT_DEPTH
-1))+1)
432 static inline void FUNCC(pred16x16_plane_compat
)(uint8_t *_src
,
440 pixel
*src
= (pixel
*)_src
;
441 int stride
= _stride
/sizeof(pixel
);
442 const pixel
* const src0
= src
+7-stride
;
443 const pixel
* src1
= src
+8*stride
-1;
444 const pixel
* src2
= src1
-2*stride
; // == src+6*stride-1;
445 int H
= src0
[1] - src0
[-1];
446 int V
= src1
[0] - src2
[ 0];
447 for(k
=2; k
<=8; ++k
) {
448 src1
+= stride
; src2
-= stride
;
449 H
+= k
*(src0
[k
] - src0
[-k
]);
450 V
+= k
*(src1
[0] - src2
[ 0]);
453 H
= ( 5*(H
/4) ) / 16;
454 V
= ( 5*(V
/4) ) / 16;
456 /* required for 100% accuracy */
459 H
= ( H
+ (H
>>2) ) >> 4;
460 V
= ( V
+ (V
>>2) ) >> 4;
466 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
467 for(j
=16; j
>0; --j
) {
470 for(i
=-16; i
<0; i
+=4) {
471 src
[16+i
] = CLIP((b
) >> 5);
472 src
[17+i
] = CLIP((b
+ H
) >> 5);
473 src
[18+i
] = CLIP((b
+2*H
) >> 5);
474 src
[19+i
] = CLIP((b
+3*H
) >> 5);
481 static void FUNCC(pred16x16_plane
)(uint8_t *src
, ptrdiff_t stride
)
483 FUNCC(pred16x16_plane_compat
)(src
, stride
, 0, 0);
486 static void FUNCC(pred8x8_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
489 pixel
*src
= (pixel
*)_src
;
490 int stride
= _stride
/sizeof(pixel
);
491 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
492 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
495 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
496 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
500 static void FUNCC(pred8x16_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
503 pixel
*src
= (pixel
*)_src
;
504 int stride
= _stride
>>(sizeof(pixel
)-1);
505 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
506 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
509 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
510 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
514 static void FUNCC(pred8x8_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
517 pixel
*src
= (pixel
*)_src
;
518 stride
/= sizeof(pixel
);
521 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
522 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
523 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
527 static void FUNCC(pred8x16_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
530 pixel
*src
= (pixel
*)_src
;
531 stride
>>= sizeof(pixel
)-1;
533 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
534 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
535 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
539 #define PRED8x8_X(n, v)\
540 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
543 const pixel4 a = PIXEL_SPLAT_X4(v);\
544 pixel *src = (pixel*)_src;\
545 stride /= sizeof(pixel);\
547 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
548 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
552 PRED8x8_X(127, (1<<(BIT_DEPTH
-1))-1)
553 PRED8x8_X(128, (1<<(BIT_DEPTH
-1))+0)
554 PRED8x8_X(129, (1<<(BIT_DEPTH
-1))+1)
556 static void FUNCC(pred8x16_128_dc
)(uint8_t *_src
, ptrdiff_t stride
)
558 FUNCC(pred8x8_128_dc
)(_src
, stride
);
559 FUNCC(pred8x8_128_dc
)(_src
+8*stride
, stride
);
562 static void FUNCC(pred8x8_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
566 pixel4 dc0splat
, dc2splat
;
567 pixel
*src
= (pixel
*)_src
;
568 stride
/= sizeof(pixel
);
572 dc0
+= src
[-1+i
*stride
];
573 dc2
+= src
[-1+(i
+4)*stride
];
575 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
576 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
579 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
580 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc0splat
);
583 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
584 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc2splat
);
588 static void FUNCC(pred8x16_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
590 FUNCC(pred8x8_left_dc
)(_src
, stride
);
591 FUNCC(pred8x8_left_dc
)(_src
+8*stride
, stride
);
594 static void FUNCC(pred8x8_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
598 pixel4 dc0splat
, dc1splat
;
599 pixel
*src
= (pixel
*)_src
;
600 stride
/= sizeof(pixel
);
605 dc1
+= src
[4+i
-stride
];
607 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
608 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
611 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
612 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
615 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
616 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
620 static void FUNCC(pred8x16_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
624 pixel4 dc0splat
, dc1splat
;
625 pixel
*src
= (pixel
*)_src
;
626 stride
>>= sizeof(pixel
)-1;
631 dc1
+= src
[4+i
-stride
];
633 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
634 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
637 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
638 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
642 static void FUNCC(pred8x8_dc
)(uint8_t *_src
, ptrdiff_t stride
)
646 pixel4 dc0splat
, dc1splat
, dc2splat
, dc3splat
;
647 pixel
*src
= (pixel
*)_src
;
648 stride
/= sizeof(pixel
);
652 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
653 dc1
+= src
[4+i
-stride
];
654 dc2
+= src
[-1+(i
+4)*stride
];
656 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 4)>>3);
657 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
658 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
659 dc3splat
= PIXEL_SPLAT_X4((dc1
+ dc2
+ 4)>>3);
662 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
663 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
666 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
667 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc3splat
);
671 static void FUNCC(pred8x16_dc
)(uint8_t *_src
, ptrdiff_t stride
)
674 int dc0
, dc1
, dc2
, dc3
, dc4
;
675 pixel4 dc0splat
, dc1splat
, dc2splat
, dc3splat
, dc4splat
, dc5splat
, dc6splat
, dc7splat
;
676 pixel
*src
= (pixel
*)_src
;
677 stride
>>= sizeof(pixel
)-1;
679 dc0
=dc1
=dc2
=dc3
=dc4
=0;
681 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
682 dc1
+= src
[4+i
-stride
];
683 dc2
+= src
[-1+(i
+4)*stride
];
684 dc3
+= src
[-1+(i
+8)*stride
];
685 dc4
+= src
[-1+(i
+12)*stride
];
687 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 4)>>3);
688 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
689 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
690 dc3splat
= PIXEL_SPLAT_X4((dc1
+ dc2
+ 4)>>3);
691 dc4splat
= PIXEL_SPLAT_X4((dc3
+ 2)>>2);
692 dc5splat
= PIXEL_SPLAT_X4((dc1
+ dc3
+ 4)>>3);
693 dc6splat
= PIXEL_SPLAT_X4((dc4
+ 2)>>2);
694 dc7splat
= PIXEL_SPLAT_X4((dc1
+ dc4
+ 4)>>3);
697 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
698 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
701 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
702 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc3splat
);
705 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc4splat
);
706 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc5splat
);
708 for(i
=12; i
<16; i
++){
709 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc6splat
);
710 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc7splat
);
714 static void FUNC(pred8x8_mad_cow_dc_l0t
)(uint8_t *src
, ptrdiff_t stride
)
716 FUNCC(pred8x8_top_dc
)(src
, stride
);
717 FUNCC(pred4x4_dc
)(src
, NULL
, stride
);
720 static void FUNC(pred8x16_mad_cow_dc_l0t
)(uint8_t *src
, ptrdiff_t stride
)
722 FUNCC(pred8x16_top_dc
)(src
, stride
);
723 FUNCC(pred4x4_dc
)(src
, NULL
, stride
);
726 static void FUNC(pred8x8_mad_cow_dc_0lt
)(uint8_t *src
, ptrdiff_t stride
)
728 FUNCC(pred8x8_dc
)(src
, stride
);
729 FUNCC(pred4x4_top_dc
)(src
, NULL
, stride
);
732 static void FUNC(pred8x16_mad_cow_dc_0lt
)(uint8_t *src
, ptrdiff_t stride
)
734 FUNCC(pred8x16_dc
)(src
, stride
);
735 FUNCC(pred4x4_top_dc
)(src
, NULL
, stride
);
738 static void FUNC(pred8x8_mad_cow_dc_l00
)(uint8_t *src
, ptrdiff_t stride
)
740 FUNCC(pred8x8_left_dc
)(src
, stride
);
741 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
, NULL
, stride
);
742 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
+ 4*sizeof(pixel
), NULL
, stride
);
745 static void FUNC(pred8x16_mad_cow_dc_l00
)(uint8_t *src
, ptrdiff_t stride
)
747 FUNCC(pred8x16_left_dc
)(src
, stride
);
748 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
, NULL
, stride
);
749 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
+ 4*sizeof(pixel
), NULL
, stride
);
752 static void FUNC(pred8x8_mad_cow_dc_0l0
)(uint8_t *src
, ptrdiff_t stride
)
754 FUNCC(pred8x8_left_dc
)(src
, stride
);
755 FUNCC(pred4x4_128_dc
)(src
, NULL
, stride
);
756 FUNCC(pred4x4_128_dc
)(src
+ 4*sizeof(pixel
), NULL
, stride
);
759 static void FUNC(pred8x16_mad_cow_dc_0l0
)(uint8_t *src
, ptrdiff_t stride
)
761 FUNCC(pred8x16_left_dc
)(src
, stride
);
762 FUNCC(pred4x4_128_dc
)(src
, NULL
, stride
);
763 FUNCC(pred4x4_128_dc
)(src
+ 4*sizeof(pixel
), NULL
, stride
);
766 static void FUNCC(pred8x8_plane
)(uint8_t *_src
, ptrdiff_t _stride
)
771 pixel
*src
= (pixel
*)_src
;
772 int stride
= _stride
/sizeof(pixel
);
773 const pixel
* const src0
= src
+3-stride
;
774 const pixel
* src1
= src
+4*stride
-1;
775 const pixel
* src2
= src1
-2*stride
; // == src+2*stride-1;
776 int H
= src0
[1] - src0
[-1];
777 int V
= src1
[0] - src2
[ 0];
778 for(k
=2; k
<=4; ++k
) {
779 src1
+= stride
; src2
-= stride
;
780 H
+= k
*(src0
[k
] - src0
[-k
]);
781 V
+= k
*(src1
[0] - src2
[ 0]);
783 H
= ( 17*H
+16 ) >> 5;
784 V
= ( 17*V
+16 ) >> 5;
786 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
790 src
[0] = CLIP((b
) >> 5);
791 src
[1] = CLIP((b
+ H
) >> 5);
792 src
[2] = CLIP((b
+2*H
) >> 5);
793 src
[3] = CLIP((b
+3*H
) >> 5);
794 src
[4] = CLIP((b
+4*H
) >> 5);
795 src
[5] = CLIP((b
+5*H
) >> 5);
796 src
[6] = CLIP((b
+6*H
) >> 5);
797 src
[7] = CLIP((b
+7*H
) >> 5);
802 static void FUNCC(pred8x16_plane
)(uint8_t *_src
, ptrdiff_t _stride
)
807 pixel
*src
= (pixel
*)_src
;
808 int stride
= _stride
>>(sizeof(pixel
)-1);
809 const pixel
* const src0
= src
+3-stride
;
810 const pixel
* src1
= src
+8*stride
-1;
811 const pixel
* src2
= src1
-2*stride
; // == src+6*stride-1;
812 int H
= src0
[1] - src0
[-1];
813 int V
= src1
[0] - src2
[ 0];
815 for (k
= 2; k
<= 4; ++k
) {
816 src1
+= stride
; src2
-= stride
;
817 H
+= k
*(src0
[k
] - src0
[-k
]);
818 V
+= k
*(src1
[0] - src2
[ 0]);
820 for (; k
<= 8; ++k
) {
821 src1
+= stride
; src2
-= stride
;
822 V
+= k
*(src1
[0] - src2
[0]);
828 a
= 16*(src1
[0] + src2
[8] + 1) - 7*V
- 3*H
;
829 for(j
=16; j
>0; --j
) {
832 src
[0] = CLIP((b
) >> 5);
833 src
[1] = CLIP((b
+ H
) >> 5);
834 src
[2] = CLIP((b
+2*H
) >> 5);
835 src
[3] = CLIP((b
+3*H
) >> 5);
836 src
[4] = CLIP((b
+4*H
) >> 5);
837 src
[5] = CLIP((b
+5*H
) >> 5);
838 src
[6] = CLIP((b
+6*H
) >> 5);
839 src
[7] = CLIP((b
+7*H
) >> 5);
844 #define SRC(x,y) src[(x)+(y)*stride]
846 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
847 #define PREDICT_8x8_LOAD_LEFT \
848 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
849 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
850 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
851 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
854 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
855 #define PREDICT_8x8_LOAD_TOP \
856 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
857 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
858 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
859 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
860 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
863 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
864 #define PREDICT_8x8_LOAD_TOPRIGHT \
865 int t8, t9, t10, t11, t12, t13, t14, t15; \
867 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
868 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
869 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
871 #define PREDICT_8x8_LOAD_TOPLEFT \
872 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
874 #define PREDICT_8x8_DC(v) \
876 for( y = 0; y < 8; y++ ) { \
877 AV_WN4PA(((pixel4*)src)+0, v); \
878 AV_WN4PA(((pixel4*)src)+1, v); \
882 static void FUNCC(pred8x8l_128_dc
)(uint8_t *_src
, int has_topleft
,
883 int has_topright
, ptrdiff_t _stride
)
885 pixel
*src
= (pixel
*)_src
;
886 int stride
= _stride
/sizeof(pixel
);
888 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1)));
890 static void FUNCC(pred8x8l_left_dc
)(uint8_t *_src
, int has_topleft
,
891 int has_topright
, ptrdiff_t _stride
)
893 pixel
*src
= (pixel
*)_src
;
894 int stride
= _stride
/sizeof(pixel
);
896 PREDICT_8x8_LOAD_LEFT
;
897 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3);
900 static void FUNCC(pred8x8l_top_dc
)(uint8_t *_src
, int has_topleft
,
901 int has_topright
, ptrdiff_t _stride
)
903 pixel
*src
= (pixel
*)_src
;
904 int stride
= _stride
/sizeof(pixel
);
906 PREDICT_8x8_LOAD_TOP
;
907 const pixel4 dc
= PIXEL_SPLAT_X4((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3);
910 static void FUNCC(pred8x8l_dc
)(uint8_t *_src
, int has_topleft
,
911 int has_topright
, ptrdiff_t _stride
)
913 pixel
*src
= (pixel
*)_src
;
914 int stride
= _stride
/sizeof(pixel
);
916 PREDICT_8x8_LOAD_LEFT
;
917 PREDICT_8x8_LOAD_TOP
;
918 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
919 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4);
922 static void FUNCC(pred8x8l_horizontal
)(uint8_t *_src
, int has_topleft
,
923 int has_topright
, ptrdiff_t _stride
)
925 pixel
*src
= (pixel
*)_src
;
926 int stride
= _stride
/sizeof(pixel
);
929 PREDICT_8x8_LOAD_LEFT
;
930 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
931 AV_WN4PA(src+y*stride, a); \
932 AV_WN4PA(src+y*stride+4, a);
933 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
936 static void FUNCC(pred8x8l_vertical
)(uint8_t *_src
, int has_topleft
,
937 int has_topright
, ptrdiff_t _stride
)
940 pixel
*src
= (pixel
*)_src
;
941 int stride
= _stride
/sizeof(pixel
);
944 PREDICT_8x8_LOAD_TOP
;
953 a
= AV_RN4PA(((pixel4
*)src
)+0);
954 b
= AV_RN4PA(((pixel4
*)src
)+1);
955 for( y
= 1; y
< 8; y
++ ) {
956 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+0, a
);
957 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+1, b
);
960 static void FUNCC(pred8x8l_down_left
)(uint8_t *_src
, int has_topleft
,
961 int has_topright
, ptrdiff_t _stride
)
963 pixel
*src
= (pixel
*)_src
;
964 int stride
= _stride
/sizeof(pixel
);
965 PREDICT_8x8_LOAD_TOP
;
966 PREDICT_8x8_LOAD_TOPRIGHT
;
967 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
968 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
969 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
970 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
971 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
972 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
973 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
974 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
975 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
976 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
977 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
978 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
979 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
980 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
981 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
983 static void FUNCC(pred8x8l_down_right
)(uint8_t *_src
, int has_topleft
,
984 int has_topright
, ptrdiff_t _stride
)
986 pixel
*src
= (pixel
*)_src
;
987 int stride
= _stride
/sizeof(pixel
);
988 PREDICT_8x8_LOAD_TOP
;
989 PREDICT_8x8_LOAD_LEFT
;
990 PREDICT_8x8_LOAD_TOPLEFT
;
991 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
992 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
993 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
994 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
995 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
996 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
997 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
998 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
999 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
1000 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1001 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1002 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1003 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1004 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1005 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1007 static void FUNCC(pred8x8l_vertical_right
)(uint8_t *_src
, int has_topleft
,
1008 int has_topright
, ptrdiff_t _stride
)
1010 pixel
*src
= (pixel
*)_src
;
1011 int stride
= _stride
/sizeof(pixel
);
1012 PREDICT_8x8_LOAD_TOP
;
1013 PREDICT_8x8_LOAD_LEFT
;
1014 PREDICT_8x8_LOAD_TOPLEFT
;
1015 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
1016 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
1017 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
1018 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
1019 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
1020 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
1021 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
1022 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
1023 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
1024 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
1025 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1026 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
1027 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1028 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
1029 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1030 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
1031 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1032 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
1033 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1034 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
1035 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1036 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
1038 static void FUNCC(pred8x8l_horizontal_down
)(uint8_t *_src
, int has_topleft
,
1039 int has_topright
, ptrdiff_t _stride
)
1041 pixel
*src
= (pixel
*)_src
;
1042 int stride
= _stride
/sizeof(pixel
);
1043 PREDICT_8x8_LOAD_TOP
;
1044 PREDICT_8x8_LOAD_LEFT
;
1045 PREDICT_8x8_LOAD_TOPLEFT
;
1046 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
1047 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
1048 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
1049 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
1050 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
1051 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
1052 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
1053 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
1054 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
1055 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
1056 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
1057 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
1058 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
1059 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
1060 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
1061 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
1062 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
1063 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
1064 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
1065 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
1066 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
1067 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
1069 static void FUNCC(pred8x8l_vertical_left
)(uint8_t *_src
, int has_topleft
,
1070 int has_topright
, ptrdiff_t _stride
)
1072 pixel
*src
= (pixel
*)_src
;
1073 int stride
= _stride
/sizeof(pixel
);
1074 PREDICT_8x8_LOAD_TOP
;
1075 PREDICT_8x8_LOAD_TOPRIGHT
;
1076 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
1077 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1078 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
1079 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1080 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
1081 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1082 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
1083 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1084 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
1085 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1086 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
1087 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1088 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
1089 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
1090 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
1091 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
1092 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
1093 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
1094 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
1095 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
1096 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
1097 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
1099 static void FUNCC(pred8x8l_horizontal_up
)(uint8_t *_src
, int has_topleft
,
1100 int has_topright
, ptrdiff_t _stride
)
1102 pixel
*src
= (pixel
*)_src
;
1103 int stride
= _stride
/sizeof(pixel
);
1104 PREDICT_8x8_LOAD_LEFT
;
1105 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
1106 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
1107 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
1108 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
1109 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
1110 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
1111 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
1112 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
1113 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
1114 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
1115 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
1116 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
1117 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
1118 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
1119 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1120 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1121 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1122 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
1124 #undef PREDICT_8x8_LOAD_LEFT
1125 #undef PREDICT_8x8_LOAD_TOP
1126 #undef PREDICT_8x8_LOAD_TOPLEFT
1127 #undef PREDICT_8x8_LOAD_TOPRIGHT
1128 #undef PREDICT_8x8_DC
1134 static void FUNCC(pred4x4_vertical_add
)(uint8_t *_pix
, const int16_t *_block
,
1138 pixel
*pix
= (pixel
*)_pix
;
1139 const dctcoef
*block
= (const dctcoef
*)_block
;
1140 stride
/= sizeof(pixel
);
1144 pix
[1*stride
]= v
+= block
[0];
1145 pix
[2*stride
]= v
+= block
[4];
1146 pix
[3*stride
]= v
+= block
[8];
1147 pix
[4*stride
]= v
+ block
[12];
1153 static void FUNCC(pred4x4_horizontal_add
)(uint8_t *_pix
, const int16_t *_block
,
1157 pixel
*pix
= (pixel
*)_pix
;
1158 const dctcoef
*block
= (const dctcoef
*)_block
;
1159 stride
/= sizeof(pixel
);
1162 pix
[0]= v
+= block
[0];
1163 pix
[1]= v
+= block
[1];
1164 pix
[2]= v
+= block
[2];
1165 pix
[3]= v
+ block
[3];
1171 static void FUNCC(pred8x8l_vertical_add
)(uint8_t *_pix
, const int16_t *_block
,
1175 pixel
*pix
= (pixel
*)_pix
;
1176 const dctcoef
*block
= (const dctcoef
*)_block
;
1177 stride
/= sizeof(pixel
);
1181 pix
[1*stride
]= v
+= block
[0];
1182 pix
[2*stride
]= v
+= block
[8];
1183 pix
[3*stride
]= v
+= block
[16];
1184 pix
[4*stride
]= v
+= block
[24];
1185 pix
[5*stride
]= v
+= block
[32];
1186 pix
[6*stride
]= v
+= block
[40];
1187 pix
[7*stride
]= v
+= block
[48];
1188 pix
[8*stride
]= v
+ block
[56];
1194 static void FUNCC(pred8x8l_horizontal_add
)(uint8_t *_pix
, const int16_t *_block
,
1198 pixel
*pix
= (pixel
*)_pix
;
1199 const dctcoef
*block
= (const dctcoef
*)_block
;
1200 stride
/= sizeof(pixel
);
1203 pix
[0]= v
+= block
[0];
1204 pix
[1]= v
+= block
[1];
1205 pix
[2]= v
+= block
[2];
1206 pix
[3]= v
+= block
[3];
1207 pix
[4]= v
+= block
[4];
1208 pix
[5]= v
+= block
[5];
1209 pix
[6]= v
+= block
[6];
1210 pix
[7]= v
+ block
[7];
1216 static void FUNCC(pred16x16_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1217 const int16_t *block
,
1222 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1225 static void FUNCC(pred16x16_horizontal_add
)(uint8_t *pix
,
1226 const int *block_offset
,
1227 const int16_t *block
,
1232 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1235 static void FUNCC(pred8x8_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1236 const int16_t *block
, ptrdiff_t stride
)
1240 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1243 static void FUNCC(pred8x16_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1244 const int16_t *block
, ptrdiff_t stride
)
1248 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1250 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
1253 static void FUNCC(pred8x8_horizontal_add
)(uint8_t *pix
, const int *block_offset
,
1254 const int16_t *block
,
1259 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1262 static void FUNCC(pred8x16_horizontal_add
)(uint8_t *pix
,
1263 const int *block_offset
,
1264 const int16_t *block
, ptrdiff_t stride
)
1268 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1270 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);