2 * Copyright (C) 2004 the ffmpeg project
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * Standard C DSP-oriented functions cribbed from the original VP3
30 #define IdctAdjustBeforeShift 8
39 #define M(a,b) (((a) * (b))>>16)
41 static av_always_inline
void idct(uint8_t *dst
, int stride
, int16_t *input
, int type
)
44 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
46 int A
, B
, C
, D
, Ad
, Bd
, Cd
, Dd
, E
, F
, G
, H
;
47 int Ed
, Gd
, Add
, Bdd
, Fd
, Hd
;
51 /* Inverse DCT on the rows now */
52 for (i
= 0; i
< 8; i
++) {
53 /* Check for non-zero values */
54 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] | ip
[4] | ip
[5] | ip
[6] | ip
[7] ) {
55 A
= M(xC1S7
, ip
[1]) + M(xC7S1
, ip
[7]);
56 B
= M(xC7S1
, ip
[1]) - M(xC1S7
, ip
[7]);
57 C
= M(xC3S5
, ip
[3]) + M(xC5S3
, ip
[5]);
58 D
= M(xC3S5
, ip
[5]) - M(xC5S3
, ip
[3]);
60 Ad
= M(xC4S4
, (A
- C
));
61 Bd
= M(xC4S4
, (B
- D
));
66 E
= M(xC4S4
, (ip
[0] + ip
[4]));
67 F
= M(xC4S4
, (ip
[0] - ip
[4]));
69 G
= M(xC2S6
, ip
[2]) + M(xC6S2
, ip
[6]);
70 H
= M(xC6S2
, ip
[2]) - M(xC2S6
, ip
[6]);
81 /* Final sequence of operations over-write original inputs. */
95 ip
+= 8; /* next row */
100 for ( i
= 0; i
< 8; i
++) {
101 /* Check for non-zero values (bitwise or faster than ||) */
102 if ( ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] |
103 ip
[4 * 8] | ip
[5 * 8] | ip
[6 * 8] | ip
[7 * 8] ) {
105 A
= M(xC1S7
, ip
[1*8]) + M(xC7S1
, ip
[7*8]);
106 B
= M(xC7S1
, ip
[1*8]) - M(xC1S7
, ip
[7*8]);
107 C
= M(xC3S5
, ip
[3*8]) + M(xC5S3
, ip
[5*8]);
108 D
= M(xC3S5
, ip
[5*8]) - M(xC5S3
, ip
[3*8]);
110 Ad
= M(xC4S4
, (A
- C
));
111 Bd
= M(xC4S4
, (B
- D
));
116 E
= M(xC4S4
, (ip
[0*8] + ip
[4*8])) + 8;
117 F
= M(xC4S4
, (ip
[0*8] - ip
[4*8])) + 8;
124 G
= M(xC2S6
, ip
[2*8]) + M(xC6S2
, ip
[6*8]);
125 H
= M(xC6S2
, ip
[2*8]) - M(xC2S6
, ip
[6*8]);
136 /* Final sequence of operations over-write original inputs. */
138 ip
[0*8] = (Gd
+ Cd
) >> 4;
139 ip
[7*8] = (Gd
- Cd
) >> 4;
141 ip
[1*8] = (Add
+ Hd
) >> 4;
142 ip
[2*8] = (Add
- Hd
) >> 4;
144 ip
[3*8] = (Ed
+ Dd
) >> 4;
145 ip
[4*8] = (Ed
- Dd
) >> 4;
147 ip
[5*8] = (Fd
+ Bdd
) >> 4;
148 ip
[6*8] = (Fd
- Bdd
) >> 4;
150 dst
[0*stride
] = cm
[(Gd
+ Cd
) >> 4];
151 dst
[7*stride
] = cm
[(Gd
- Cd
) >> 4];
153 dst
[1*stride
] = cm
[(Add
+ Hd
) >> 4];
154 dst
[2*stride
] = cm
[(Add
- Hd
) >> 4];
156 dst
[3*stride
] = cm
[(Ed
+ Dd
) >> 4];
157 dst
[4*stride
] = cm
[(Ed
- Dd
) >> 4];
159 dst
[5*stride
] = cm
[(Fd
+ Bdd
) >> 4];
160 dst
[6*stride
] = cm
[(Fd
- Bdd
) >> 4];
162 dst
[0*stride
] = cm
[dst
[0*stride
] + ((Gd
+ Cd
) >> 4)];
163 dst
[7*stride
] = cm
[dst
[7*stride
] + ((Gd
- Cd
) >> 4)];
165 dst
[1*stride
] = cm
[dst
[1*stride
] + ((Add
+ Hd
) >> 4)];
166 dst
[2*stride
] = cm
[dst
[2*stride
] + ((Add
- Hd
) >> 4)];
168 dst
[3*stride
] = cm
[dst
[3*stride
] + ((Ed
+ Dd
) >> 4)];
169 dst
[4*stride
] = cm
[dst
[4*stride
] + ((Ed
- Dd
) >> 4)];
171 dst
[5*stride
] = cm
[dst
[5*stride
] + ((Fd
+ Bdd
) >> 4)];
172 dst
[6*stride
] = cm
[dst
[6*stride
] + ((Fd
- Bdd
) >> 4)];
184 ip
[7*8] = ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
193 dst
[7*stride
]= cm
[128 + ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20)];
196 int v
= ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
197 dst
[0*stride
] = cm
[dst
[0*stride
] + v
];
198 dst
[1*stride
] = cm
[dst
[1*stride
] + v
];
199 dst
[2*stride
] = cm
[dst
[2*stride
] + v
];
200 dst
[3*stride
] = cm
[dst
[3*stride
] + v
];
201 dst
[4*stride
] = cm
[dst
[4*stride
] + v
];
202 dst
[5*stride
] = cm
[dst
[5*stride
] + v
];
203 dst
[6*stride
] = cm
[dst
[6*stride
] + v
];
204 dst
[7*stride
] = cm
[dst
[7*stride
] + v
];
209 ip
++; /* next column */
214 void ff_vp3_idct_c(DCTELEM
*block
/* align 16*/){
215 idct(NULL
, 0, block
, 0);
218 void ff_vp3_idct_put_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
219 idct(dest
, line_size
, block
, 1);
222 void ff_vp3_idct_add_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
223 idct(dest
, line_size
, block
, 2);
226 void ff_vp3_idct_dc_add_c(uint8_t *dest
/*align 8*/, int line_size
, const DCTELEM
*block
/*align 16*/){
227 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
228 int i
, dc
= block
[0];
230 dc
= (46341*dc
+ (8<<16))>>20;
232 for(i
= 0; i
< 8; i
++){
233 dest
[0] = cm
[dest
[0]+dc
];
234 dest
[1] = cm
[dest
[1]+dc
];
235 dest
[2] = cm
[dest
[2]+dc
];
236 dest
[3] = cm
[dest
[3]+dc
];
237 dest
[4] = cm
[dest
[4]+dc
];
238 dest
[5] = cm
[dest
[5]+dc
];
239 dest
[6] = cm
[dest
[6]+dc
];
240 dest
[7] = cm
[dest
[7]+dc
];
245 void ff_vp3_v_loop_filter_c(uint8_t *first_pixel
, int stride
, int *bounding_values
)
249 const int nstride
= -stride
;
251 for (end
= first_pixel
+ 8; first_pixel
< end
; first_pixel
++) {
253 (first_pixel
[2 * nstride
] - first_pixel
[ stride
])
254 +3*(first_pixel
[0 ] - first_pixel
[nstride
]);
255 filter_value
= bounding_values
[(filter_value
+ 4) >> 3];
256 first_pixel
[nstride
] = av_clip_uint8(first_pixel
[nstride
] + filter_value
);
257 first_pixel
[0] = av_clip_uint8(first_pixel
[0] - filter_value
);
261 void ff_vp3_h_loop_filter_c(uint8_t *first_pixel
, int stride
, int *bounding_values
)
266 for (end
= first_pixel
+ 8*stride
; first_pixel
!= end
; first_pixel
+= stride
) {
268 (first_pixel
[-2] - first_pixel
[ 1])
269 +3*(first_pixel
[ 0] - first_pixel
[-1]);
270 filter_value
= bounding_values
[(filter_value
+ 4) >> 3];
271 first_pixel
[-1] = av_clip_uint8(first_pixel
[-1] + filter_value
);
272 first_pixel
[ 0] = av_clip_uint8(first_pixel
[ 0] - filter_value
);