2 * VC-1 and WMV3 decoder - DSP functions
3 * Copyright (c) 2006 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * VC-1 and WMV3 decoder
31 /** Apply overlap transform to horizontal edge
33 static void vc1_v_overlap_c(uint8_t* src
, int stride
)
39 for(i
= 0; i
< 8; i
++) {
44 d1
= (a
- d
+ 3 + rnd
) >> 3;
45 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
47 src
[-2*stride
] = a
- d1
;
48 src
[-stride
] = b
- d2
;
56 /** Apply overlap transform to vertical edge
58 static void vc1_h_overlap_c(uint8_t* src
, int stride
)
64 for(i
= 0; i
< 8; i
++) {
69 d1
= (a
- d
+ 3 + rnd
) >> 3;
70 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
82 /** Do inverse transform on 8x8 block
84 static void vc1_inv_trans_8x8_c(DCTELEM block
[64])
87 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
92 for(i
= 0; i
< 8; i
++){
93 t1
= 12 * (src
[0] + src
[4]);
94 t2
= 12 * (src
[0] - src
[4]);
95 t3
= 16 * src
[2] + 6 * src
[6];
96 t4
= 6 * src
[2] - 16 * src
[6];
103 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
104 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
105 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
106 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
108 dst
[0] = (t5
+ t1
+ 4) >> 3;
109 dst
[1] = (t6
+ t2
+ 4) >> 3;
110 dst
[2] = (t7
+ t3
+ 4) >> 3;
111 dst
[3] = (t8
+ t4
+ 4) >> 3;
112 dst
[4] = (t8
- t4
+ 4) >> 3;
113 dst
[5] = (t7
- t3
+ 4) >> 3;
114 dst
[6] = (t6
- t2
+ 4) >> 3;
115 dst
[7] = (t5
- t1
+ 4) >> 3;
123 for(i
= 0; i
< 8; i
++){
124 t1
= 12 * (src
[ 0] + src
[32]);
125 t2
= 12 * (src
[ 0] - src
[32]);
126 t3
= 16 * src
[16] + 6 * src
[48];
127 t4
= 6 * src
[16] - 16 * src
[48];
134 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
135 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
136 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
137 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
139 dst
[ 0] = (t5
+ t1
+ 64) >> 7;
140 dst
[ 8] = (t6
+ t2
+ 64) >> 7;
141 dst
[16] = (t7
+ t3
+ 64) >> 7;
142 dst
[24] = (t8
+ t4
+ 64) >> 7;
143 dst
[32] = (t8
- t4
+ 64 + 1) >> 7;
144 dst
[40] = (t7
- t3
+ 64 + 1) >> 7;
145 dst
[48] = (t6
- t2
+ 64 + 1) >> 7;
146 dst
[56] = (t5
- t1
+ 64 + 1) >> 7;
153 /** Do inverse transform on 8x4 part of block
155 static void vc1_inv_trans_8x4_c(DCTELEM block
[64], int n
)
158 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
165 for(i
= 0; i
< 4; i
++){
166 t1
= 12 * (src
[0] + src
[4]);
167 t2
= 12 * (src
[0] - src
[4]);
168 t3
= 16 * src
[2] + 6 * src
[6];
169 t4
= 6 * src
[2] - 16 * src
[6];
176 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
177 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
178 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
179 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
181 dst
[0] = (t5
+ t1
+ 4) >> 3;
182 dst
[1] = (t6
+ t2
+ 4) >> 3;
183 dst
[2] = (t7
+ t3
+ 4) >> 3;
184 dst
[3] = (t8
+ t4
+ 4) >> 3;
185 dst
[4] = (t8
- t4
+ 4) >> 3;
186 dst
[5] = (t7
- t3
+ 4) >> 3;
187 dst
[6] = (t6
- t2
+ 4) >> 3;
188 dst
[7] = (t5
- t1
+ 4) >> 3;
196 for(i
= 0; i
< 8; i
++){
197 t1
= 17 * (src
[ 0] + src
[16]);
198 t2
= 17 * (src
[ 0] - src
[16]);
204 dst
[ 0] = (t1
+ t3
+ t6
+ 64) >> 7;
205 dst
[ 8] = (t2
- t4
+ t5
+ 64) >> 7;
206 dst
[16] = (t2
+ t4
- t5
+ 64) >> 7;
207 dst
[24] = (t1
- t3
- t6
+ 64) >> 7;
214 /** Do inverse transform on 4x8 parts of block
216 static void vc1_inv_trans_4x8_c(DCTELEM block
[64], int n
)
219 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
226 for(i
= 0; i
< 8; i
++){
227 t1
= 17 * (src
[0] + src
[2]);
228 t2
= 17 * (src
[0] - src
[2]);
234 dst
[0] = (t1
+ t3
+ t6
+ 4) >> 3;
235 dst
[1] = (t2
- t4
+ t5
+ 4) >> 3;
236 dst
[2] = (t2
+ t4
- t5
+ 4) >> 3;
237 dst
[3] = (t1
- t3
- t6
+ 4) >> 3;
245 for(i
= 0; i
< 4; i
++){
246 t1
= 12 * (src
[ 0] + src
[32]);
247 t2
= 12 * (src
[ 0] - src
[32]);
248 t3
= 16 * src
[16] + 6 * src
[48];
249 t4
= 6 * src
[16] - 16 * src
[48];
256 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
257 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
258 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
259 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
261 dst
[ 0] = (t5
+ t1
+ 64) >> 7;
262 dst
[ 8] = (t6
+ t2
+ 64) >> 7;
263 dst
[16] = (t7
+ t3
+ 64) >> 7;
264 dst
[24] = (t8
+ t4
+ 64) >> 7;
265 dst
[32] = (t8
- t4
+ 64 + 1) >> 7;
266 dst
[40] = (t7
- t3
+ 64 + 1) >> 7;
267 dst
[48] = (t6
- t2
+ 64 + 1) >> 7;
268 dst
[56] = (t5
- t1
+ 64 + 1) >> 7;
275 /** Do inverse transform on 4x4 part of block
277 static void vc1_inv_trans_4x4_c(DCTELEM block
[64], int n
)
280 register int t1
,t2
,t3
,t4
,t5
,t6
;
284 off
= (n
&1) * 4 + (n
&2) * 16;
287 for(i
= 0; i
< 4; i
++){
288 t1
= 17 * (src
[0] + src
[2]);
289 t2
= 17 * (src
[0] - src
[2]);
295 dst
[0] = (t1
+ t3
+ t6
+ 4) >> 3;
296 dst
[1] = (t2
- t4
+ t5
+ 4) >> 3;
297 dst
[2] = (t2
+ t4
- t5
+ 4) >> 3;
298 dst
[3] = (t1
- t3
- t6
+ 4) >> 3;
306 for(i
= 0; i
< 4; i
++){
307 t1
= 17 * (src
[ 0] + src
[16]);
308 t2
= 17 * (src
[ 0] - src
[16]);
314 dst
[ 0] = (t1
+ t3
+ t6
+ 64) >> 7;
315 dst
[ 8] = (t2
- t4
+ t5
+ 64) >> 7;
316 dst
[16] = (t2
+ t4
- t5
+ 64) >> 7;
317 dst
[24] = (t1
- t3
- t6
+ 64) >> 7;
324 /* motion compensation functions */
326 /** Filter used to interpolate fractional pel values
328 static av_always_inline
int vc1_mspel_filter(const uint8_t *src
, int stride
, int mode
, int r
)
334 return (-4*src
[-stride
] + 53*src
[0] + 18*src
[stride
] - 3*src
[stride
*2] + 32 - r
) >> 6;
336 return (-src
[-stride
] + 9*src
[0] + 9*src
[stride
] - src
[stride
*2] + 8 - r
) >> 4;
338 return (-3*src
[-stride
] + 18*src
[0] + 53*src
[stride
] - 4*src
[stride
*2] + 32 - r
) >> 6;
340 return 0; //should not occur
343 /** Function used to do motion compensation with bicubic interpolation
345 static void vc1_mspel_mc(uint8_t *dst
, const uint8_t *src
, int stride
, int hmode
, int vmode
, int rnd
)
348 uint8_t tmp
[8*11], *tptr
;
354 for(j
= 0; j
< 11; j
++) {
355 for(i
= 0; i
< 8; i
++)
356 tptr
[i
] = av_clip_uint8(vc1_mspel_filter(src
+ i
, 1, hmode
, r
));
363 for(j
= 0; j
< 8; j
++) {
364 for(i
= 0; i
< 8; i
++)
365 dst
[i
] = av_clip_uint8(vc1_mspel_filter(tptr
+ i
, 8, vmode
, r
));
371 /* pixel functions - really are entry points to vc1_mspel_mc */
373 /* this one is defined in dsputil.c */
374 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
);
376 #define PUT_VC1_MSPEL(a, b)\
377 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
378 vc1_mspel_mc(dst, src, stride, a, b, rnd); \
400 void ff_vc1dsp_init(DSPContext
* dsp
, AVCodecContext
*avctx
) {
401 dsp
->vc1_inv_trans_8x8
= vc1_inv_trans_8x8_c
;
402 dsp
->vc1_inv_trans_4x8
= vc1_inv_trans_4x8_c
;
403 dsp
->vc1_inv_trans_8x4
= vc1_inv_trans_8x4_c
;
404 dsp
->vc1_inv_trans_4x4
= vc1_inv_trans_4x4_c
;
405 dsp
->vc1_h_overlap
= vc1_h_overlap_c
;
406 dsp
->vc1_v_overlap
= vc1_v_overlap_c
;
408 dsp
->put_vc1_mspel_pixels_tab
[ 0] = ff_put_vc1_mspel_mc00_c
;
409 dsp
->put_vc1_mspel_pixels_tab
[ 1] = put_vc1_mspel_mc10_c
;
410 dsp
->put_vc1_mspel_pixels_tab
[ 2] = put_vc1_mspel_mc20_c
;
411 dsp
->put_vc1_mspel_pixels_tab
[ 3] = put_vc1_mspel_mc30_c
;
412 dsp
->put_vc1_mspel_pixels_tab
[ 4] = put_vc1_mspel_mc01_c
;
413 dsp
->put_vc1_mspel_pixels_tab
[ 5] = put_vc1_mspel_mc11_c
;
414 dsp
->put_vc1_mspel_pixels_tab
[ 6] = put_vc1_mspel_mc21_c
;
415 dsp
->put_vc1_mspel_pixels_tab
[ 7] = put_vc1_mspel_mc31_c
;
416 dsp
->put_vc1_mspel_pixels_tab
[ 8] = put_vc1_mspel_mc02_c
;
417 dsp
->put_vc1_mspel_pixels_tab
[ 9] = put_vc1_mspel_mc12_c
;
418 dsp
->put_vc1_mspel_pixels_tab
[10] = put_vc1_mspel_mc22_c
;
419 dsp
->put_vc1_mspel_pixels_tab
[11] = put_vc1_mspel_mc32_c
;
420 dsp
->put_vc1_mspel_pixels_tab
[12] = put_vc1_mspel_mc03_c
;
421 dsp
->put_vc1_mspel_pixels_tab
[13] = put_vc1_mspel_mc13_c
;
422 dsp
->put_vc1_mspel_pixels_tab
[14] = put_vc1_mspel_mc23_c
;
423 dsp
->put_vc1_mspel_pixels_tab
[15] = put_vc1_mspel_mc33_c
;