2 * VC-1 and WMV3 decoder - DSP functions
3 * Copyright (c) 2006 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/vc1dsp.c
24 * VC-1 and WMV3 decoder
31 /** Apply overlap transform to horizontal edge
33 static void vc1_v_overlap_c(uint8_t* src
, int stride
)
39 for(i
= 0; i
< 8; i
++) {
44 d1
= (a
- d
+ 3 + rnd
) >> 3;
45 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
47 src
[-2*stride
] = a
- d1
;
48 src
[-stride
] = av_clip_uint8(b
- d2
);
49 src
[0] = av_clip_uint8(c
+ d2
);
56 /** Apply overlap transform to vertical edge
58 static void vc1_h_overlap_c(uint8_t* src
, int stride
)
64 for(i
= 0; i
< 8; i
++) {
69 d1
= (a
- d
+ 3 + rnd
) >> 3;
70 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
73 src
[-1] = av_clip_uint8(b
- d2
);
74 src
[0] = av_clip_uint8(c
+ d2
);
82 * VC-1 in-loop deblocking filter for one line
83 * @param src source block type
84 * @param stride block stride
85 * @param pq block quantizer
86 * @return whether other 3 pairs should be filtered or not
89 static av_always_inline
int vc1_filter_line(uint8_t* src
, int stride
, int pq
){
90 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
92 int a0
= (2*(src
[-2*stride
] - src
[ 1*stride
]) - 5*(src
[-1*stride
] - src
[ 0*stride
]) + 4) >> 3;
93 int a0_sign
= a0
>> 31; /* Store sign */
94 a0
= (a0
^ a0_sign
) - a0_sign
; /* a0 = FFABS(a0); */
96 int a1
= FFABS((2*(src
[-4*stride
] - src
[-1*stride
]) - 5*(src
[-3*stride
] - src
[-2*stride
]) + 4) >> 3);
97 int a2
= FFABS((2*(src
[ 0*stride
] - src
[ 3*stride
]) - 5*(src
[ 1*stride
] - src
[ 2*stride
]) + 4) >> 3);
98 if(a1
< a0
|| a2
< a0
){
99 int clip
= src
[-1*stride
] - src
[ 0*stride
];
100 int clip_sign
= clip
>> 31;
101 clip
= ((clip
^ clip_sign
) - clip_sign
)>>1;
103 int a3
= FFMIN(a1
, a2
);
104 int d
= 5 * (a3
- a0
);
105 int d_sign
= (d
>> 31);
106 d
= ((d
^ d_sign
) - d_sign
) >> 3;
109 if( d_sign
^ clip_sign
)
113 d
= (d
^ d_sign
) - d_sign
; /* Restore sign */
114 src
[-1*stride
] = cm
[src
[-1*stride
] - d
];
115 src
[ 0*stride
] = cm
[src
[ 0*stride
] + d
];
125 * VC-1 in-loop deblocking filter
126 * @param src source block type
127 * @param step distance between horizontally adjacent elements
128 * @param stride distance between vertically adjacent elements
129 * @param len edge length to filter (4 or 8 pixels)
130 * @param pq block quantizer
133 static inline void vc1_loop_filter(uint8_t* src
, int step
, int stride
, int len
, int pq
)
138 for(i
= 0; i
< len
; i
+= 4){
139 filt3
= vc1_filter_line(src
+ 2*step
, stride
, pq
);
141 vc1_filter_line(src
+ 0*step
, stride
, pq
);
142 vc1_filter_line(src
+ 1*step
, stride
, pq
);
143 vc1_filter_line(src
+ 3*step
, stride
, pq
);
149 static void vc1_v_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
151 vc1_loop_filter(src
, 1, stride
, 4, pq
);
154 static void vc1_h_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
156 vc1_loop_filter(src
, stride
, 1, 4, pq
);
159 static void vc1_v_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
161 vc1_loop_filter(src
, 1, stride
, 8, pq
);
164 static void vc1_h_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
166 vc1_loop_filter(src
, stride
, 1, 8, pq
);
169 static void vc1_v_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
171 vc1_loop_filter(src
, 1, stride
, 16, pq
);
174 static void vc1_h_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
176 vc1_loop_filter(src
, stride
, 1, 16, pq
);
179 /** Do inverse transform on 8x8 block
181 static void vc1_inv_trans_8x8_c(DCTELEM block
[64])
184 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
189 for(i
= 0; i
< 8; i
++){
190 t1
= 12 * (src
[0] + src
[4]) + 4;
191 t2
= 12 * (src
[0] - src
[4]) + 4;
192 t3
= 16 * src
[2] + 6 * src
[6];
193 t4
= 6 * src
[2] - 16 * src
[6];
200 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
201 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
202 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
203 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
205 dst
[0] = (t5
+ t1
) >> 3;
206 dst
[1] = (t6
+ t2
) >> 3;
207 dst
[2] = (t7
+ t3
) >> 3;
208 dst
[3] = (t8
+ t4
) >> 3;
209 dst
[4] = (t8
- t4
) >> 3;
210 dst
[5] = (t7
- t3
) >> 3;
211 dst
[6] = (t6
- t2
) >> 3;
212 dst
[7] = (t5
- t1
) >> 3;
220 for(i
= 0; i
< 8; i
++){
221 t1
= 12 * (src
[ 0] + src
[32]) + 64;
222 t2
= 12 * (src
[ 0] - src
[32]) + 64;
223 t3
= 16 * src
[16] + 6 * src
[48];
224 t4
= 6 * src
[16] - 16 * src
[48];
231 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
232 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
233 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
234 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
236 dst
[ 0] = (t5
+ t1
) >> 7;
237 dst
[ 8] = (t6
+ t2
) >> 7;
238 dst
[16] = (t7
+ t3
) >> 7;
239 dst
[24] = (t8
+ t4
) >> 7;
240 dst
[32] = (t8
- t4
+ 1) >> 7;
241 dst
[40] = (t7
- t3
+ 1) >> 7;
242 dst
[48] = (t6
- t2
+ 1) >> 7;
243 dst
[56] = (t5
- t1
+ 1) >> 7;
250 /** Do inverse transform on 8x4 part of block
252 static void vc1_inv_trans_8x4_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
255 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
257 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
261 for(i
= 0; i
< 4; i
++){
262 t1
= 12 * (src
[0] + src
[4]) + 4;
263 t2
= 12 * (src
[0] - src
[4]) + 4;
264 t3
= 16 * src
[2] + 6 * src
[6];
265 t4
= 6 * src
[2] - 16 * src
[6];
272 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
273 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
274 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
275 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
277 dst
[0] = (t5
+ t1
) >> 3;
278 dst
[1] = (t6
+ t2
) >> 3;
279 dst
[2] = (t7
+ t3
) >> 3;
280 dst
[3] = (t8
+ t4
) >> 3;
281 dst
[4] = (t8
- t4
) >> 3;
282 dst
[5] = (t7
- t3
) >> 3;
283 dst
[6] = (t6
- t2
) >> 3;
284 dst
[7] = (t5
- t1
) >> 3;
291 for(i
= 0; i
< 8; i
++){
292 t1
= 17 * (src
[ 0] + src
[16]) + 64;
293 t2
= 17 * (src
[ 0] - src
[16]) + 64;
294 t3
= 22 * src
[ 8] + 10 * src
[24];
295 t4
= 22 * src
[24] - 10 * src
[ 8];
297 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t1
+ t3
) >> 7)];
298 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t2
- t4
) >> 7)];
299 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t2
+ t4
) >> 7)];
300 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t1
- t3
) >> 7)];
307 /** Do inverse transform on 4x8 parts of block
309 static void vc1_inv_trans_4x8_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
312 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
314 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
318 for(i
= 0; i
< 8; i
++){
319 t1
= 17 * (src
[0] + src
[2]) + 4;
320 t2
= 17 * (src
[0] - src
[2]) + 4;
321 t3
= 22 * src
[1] + 10 * src
[3];
322 t4
= 22 * src
[3] - 10 * src
[1];
324 dst
[0] = (t1
+ t3
) >> 3;
325 dst
[1] = (t2
- t4
) >> 3;
326 dst
[2] = (t2
+ t4
) >> 3;
327 dst
[3] = (t1
- t3
) >> 3;
334 for(i
= 0; i
< 4; i
++){
335 t1
= 12 * (src
[ 0] + src
[32]) + 64;
336 t2
= 12 * (src
[ 0] - src
[32]) + 64;
337 t3
= 16 * src
[16] + 6 * src
[48];
338 t4
= 6 * src
[16] - 16 * src
[48];
345 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
346 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
347 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
348 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
350 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t5
+ t1
) >> 7)];
351 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t6
+ t2
) >> 7)];
352 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t7
+ t3
) >> 7)];
353 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t8
+ t4
) >> 7)];
354 dest
[4*linesize
] = cm
[dest
[4*linesize
] + ((t8
- t4
+ 1) >> 7)];
355 dest
[5*linesize
] = cm
[dest
[5*linesize
] + ((t7
- t3
+ 1) >> 7)];
356 dest
[6*linesize
] = cm
[dest
[6*linesize
] + ((t6
- t2
+ 1) >> 7)];
357 dest
[7*linesize
] = cm
[dest
[7*linesize
] + ((t5
- t1
+ 1) >> 7)];
364 /** Do inverse transform on 4x4 part of block
366 static void vc1_inv_trans_4x4_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
369 register int t1
,t2
,t3
,t4
;
371 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
375 for(i
= 0; i
< 4; i
++){
376 t1
= 17 * (src
[0] + src
[2]) + 4;
377 t2
= 17 * (src
[0] - src
[2]) + 4;
378 t3
= 22 * src
[1] + 10 * src
[3];
379 t4
= 22 * src
[3] - 10 * src
[1];
381 dst
[0] = (t1
+ t3
) >> 3;
382 dst
[1] = (t2
- t4
) >> 3;
383 dst
[2] = (t2
+ t4
) >> 3;
384 dst
[3] = (t1
- t3
) >> 3;
391 for(i
= 0; i
< 4; i
++){
392 t1
= 17 * (src
[ 0] + src
[16]) + 64;
393 t2
= 17 * (src
[ 0] - src
[16]) + 64;
394 t3
= 22 * src
[ 8] + 10 * src
[24];
395 t4
= 22 * src
[24] - 10 * src
[ 8];
397 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t1
+ t3
) >> 7)];
398 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t2
- t4
) >> 7)];
399 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t2
+ t4
) >> 7)];
400 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t1
- t3
) >> 7)];
407 /* motion compensation functions */
408 /** Filter in case of 2 filters */
409 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
410 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
413 case 0: /* no shift - should not occur */ \
415 case 1: /* 1/4 shift */ \
416 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
417 case 2: /* 1/2 shift */ \
418 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
419 case 3: /* 3/4 shift */ \
420 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
422 return 0; /* should not occur */ \
425 VC1_MSPEL_FILTER_16B(ver
, uint8_t);
426 VC1_MSPEL_FILTER_16B(hor
, int16_t);
429 /** Filter used to interpolate fractional pel values
431 static av_always_inline
int vc1_mspel_filter(const uint8_t *src
, int stride
, int mode
, int r
)
437 return (-4*src
[-stride
] + 53*src
[0] + 18*src
[stride
] - 3*src
[stride
*2] + 32 - r
) >> 6;
439 return (-src
[-stride
] + 9*src
[0] + 9*src
[stride
] - src
[stride
*2] + 8 - r
) >> 4;
441 return (-3*src
[-stride
] + 18*src
[0] + 53*src
[stride
] - 4*src
[stride
*2] + 32 - r
) >> 6;
443 return 0; //should not occur
446 /** Function used to do motion compensation with bicubic interpolation
448 #define VC1_MSPEL_MC(OP, OPNAME)\
449 static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
453 if (vmode) { /* Horizontal filter to apply */\
456 if (hmode) { /* Vertical filter to apply, output to tmp */\
457 static const int shift_value[] = { 0, 5, 1, 5 };\
458 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\
459 int16_t tmp[11*8], *tptr = tmp;\
461 r = (1<<(shift-1)) + rnd-1;\
464 for(j = 0; j < 8; j++) {\
465 for(i = 0; i < 11; i++)\
466 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\
473 for(j = 0; j < 8; j++) {\
474 for(i = 0; i < 8; i++)\
475 OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\
482 else { /* No horizontal filter, output 8 lines to dst */\
485 for(j = 0; j < 8; j++) {\
486 for(i = 0; i < 8; i++)\
487 OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\
495 /* Horizontal mode with no vertical mode */\
496 for(j = 0; j < 8; j++) {\
497 for(i = 0; i < 8; i++)\
498 OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\
504 #define op_put(a, b) a = av_clip_uint8(b)
505 #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
507 VC1_MSPEL_MC(op_put
, put_
)
508 VC1_MSPEL_MC(op_avg
, avg_
)
510 /* pixel functions - really are entry points to vc1_mspel_mc */
512 /* this one is defined in dsputil.c */
513 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
);
514 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
);
516 #define PUT_VC1_MSPEL(a, b)\
517 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
518 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
520 static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
521 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
543 void ff_vc1dsp_init(DSPContext
* dsp
, AVCodecContext
*avctx
) {
544 dsp
->vc1_inv_trans_8x8
= vc1_inv_trans_8x8_c
;
545 dsp
->vc1_inv_trans_4x8
= vc1_inv_trans_4x8_c
;
546 dsp
->vc1_inv_trans_8x4
= vc1_inv_trans_8x4_c
;
547 dsp
->vc1_inv_trans_4x4
= vc1_inv_trans_4x4_c
;
548 dsp
->vc1_h_overlap
= vc1_h_overlap_c
;
549 dsp
->vc1_v_overlap
= vc1_v_overlap_c
;
550 dsp
->vc1_v_loop_filter4
= vc1_v_loop_filter4_c
;
551 dsp
->vc1_h_loop_filter4
= vc1_h_loop_filter4_c
;
552 dsp
->vc1_v_loop_filter8
= vc1_v_loop_filter8_c
;
553 dsp
->vc1_h_loop_filter8
= vc1_h_loop_filter8_c
;
554 dsp
->vc1_v_loop_filter16
= vc1_v_loop_filter16_c
;
555 dsp
->vc1_h_loop_filter16
= vc1_h_loop_filter16_c
;
557 dsp
->put_vc1_mspel_pixels_tab
[ 0] = ff_put_vc1_mspel_mc00_c
;
558 dsp
->put_vc1_mspel_pixels_tab
[ 1] = put_vc1_mspel_mc10_c
;
559 dsp
->put_vc1_mspel_pixels_tab
[ 2] = put_vc1_mspel_mc20_c
;
560 dsp
->put_vc1_mspel_pixels_tab
[ 3] = put_vc1_mspel_mc30_c
;
561 dsp
->put_vc1_mspel_pixels_tab
[ 4] = put_vc1_mspel_mc01_c
;
562 dsp
->put_vc1_mspel_pixels_tab
[ 5] = put_vc1_mspel_mc11_c
;
563 dsp
->put_vc1_mspel_pixels_tab
[ 6] = put_vc1_mspel_mc21_c
;
564 dsp
->put_vc1_mspel_pixels_tab
[ 7] = put_vc1_mspel_mc31_c
;
565 dsp
->put_vc1_mspel_pixels_tab
[ 8] = put_vc1_mspel_mc02_c
;
566 dsp
->put_vc1_mspel_pixels_tab
[ 9] = put_vc1_mspel_mc12_c
;
567 dsp
->put_vc1_mspel_pixels_tab
[10] = put_vc1_mspel_mc22_c
;
568 dsp
->put_vc1_mspel_pixels_tab
[11] = put_vc1_mspel_mc32_c
;
569 dsp
->put_vc1_mspel_pixels_tab
[12] = put_vc1_mspel_mc03_c
;
570 dsp
->put_vc1_mspel_pixels_tab
[13] = put_vc1_mspel_mc13_c
;
571 dsp
->put_vc1_mspel_pixels_tab
[14] = put_vc1_mspel_mc23_c
;
572 dsp
->put_vc1_mspel_pixels_tab
[15] = put_vc1_mspel_mc33_c
;
574 dsp
->avg_vc1_mspel_pixels_tab
[ 0] = ff_avg_vc1_mspel_mc00_c
;
575 dsp
->avg_vc1_mspel_pixels_tab
[ 1] = avg_vc1_mspel_mc10_c
;
576 dsp
->avg_vc1_mspel_pixels_tab
[ 2] = avg_vc1_mspel_mc20_c
;
577 dsp
->avg_vc1_mspel_pixels_tab
[ 3] = avg_vc1_mspel_mc30_c
;
578 dsp
->avg_vc1_mspel_pixels_tab
[ 4] = avg_vc1_mspel_mc01_c
;
579 dsp
->avg_vc1_mspel_pixels_tab
[ 5] = avg_vc1_mspel_mc11_c
;
580 dsp
->avg_vc1_mspel_pixels_tab
[ 6] = avg_vc1_mspel_mc21_c
;
581 dsp
->avg_vc1_mspel_pixels_tab
[ 7] = avg_vc1_mspel_mc31_c
;
582 dsp
->avg_vc1_mspel_pixels_tab
[ 8] = avg_vc1_mspel_mc02_c
;
583 dsp
->avg_vc1_mspel_pixels_tab
[ 9] = avg_vc1_mspel_mc12_c
;
584 dsp
->avg_vc1_mspel_pixels_tab
[10] = avg_vc1_mspel_mc22_c
;
585 dsp
->avg_vc1_mspel_pixels_tab
[11] = avg_vc1_mspel_mc32_c
;
586 dsp
->avg_vc1_mspel_pixels_tab
[12] = avg_vc1_mspel_mc03_c
;
587 dsp
->avg_vc1_mspel_pixels_tab
[13] = avg_vc1_mspel_mc13_c
;
588 dsp
->avg_vc1_mspel_pixels_tab
[14] = avg_vc1_mspel_mc23_c
;
589 dsp
->avg_vc1_mspel_pixels_tab
[15] = avg_vc1_mspel_mc33_c
;