2 * Copyright (C) 2004 the ffmpeg project
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * Standard C DSP-oriented functions cribbed from the original VP3
29 #define IdctAdjustBeforeShift 8
38 static always_inline
void idct(uint8_t *dst
, int stride
, int16_t *input
, int type
)
41 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
43 int A_
, B_
, C_
, D_
, _Ad
, _Bd
, _Cd
, _Dd
, E_
, F_
, G_
, H_
;
44 int _Ed
, _Gd
, _Add
, _Bdd
, _Fd
, _Hd
;
49 /* Inverse DCT on the rows now */
50 for (i
= 0; i
< 8; i
++) {
51 /* Check for non-zero values */
52 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] | ip
[4] | ip
[5] | ip
[6] | ip
[7] ) {
53 t1
= (int32_t)(xC1S7
* ip
[1]);
54 t2
= (int32_t)(xC7S1
* ip
[7]);
59 t1
= (int32_t)(xC7S1
* ip
[1]);
60 t2
= (int32_t)(xC1S7
* ip
[7]);
65 t1
= (int32_t)(xC3S5
* ip
[3]);
66 t2
= (int32_t)(xC5S3
* ip
[5]);
71 t1
= (int32_t)(xC3S5
* ip
[5]);
72 t2
= (int32_t)(xC5S3
* ip
[3]);
78 t1
= (int32_t)(xC4S4
* (A_
- C_
));
82 t1
= (int32_t)(xC4S4
* (B_
- D_
));
90 t1
= (int32_t)(xC4S4
* (ip
[0] + ip
[4]));
94 t1
= (int32_t)(xC4S4
* (ip
[0] - ip
[4]));
98 t1
= (int32_t)(xC2S6
* ip
[2]);
99 t2
= (int32_t)(xC6S2
* ip
[6]);
104 t1
= (int32_t)(xC6S2
* ip
[2]);
105 t2
= (int32_t)(xC2S6
* ip
[6]);
120 /* Final sequence of operations over-write original inputs. */
135 ip
+= 8; /* next row */
140 for ( i
= 0; i
< 8; i
++) {
141 /* Check for non-zero values (bitwise or faster than ||) */
142 if ( ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] |
143 ip
[4 * 8] | ip
[5 * 8] | ip
[6 * 8] | ip
[7 * 8] ) {
145 t1
= (int32_t)(xC1S7
* ip
[1*8]);
146 t2
= (int32_t)(xC7S1
* ip
[7*8]);
151 t1
= (int32_t)(xC7S1
* ip
[1*8]);
152 t2
= (int32_t)(xC1S7
* ip
[7*8]);
157 t1
= (int32_t)(xC3S5
* ip
[3*8]);
158 t2
= (int32_t)(xC5S3
* ip
[5*8]);
163 t1
= (int32_t)(xC3S5
* ip
[5*8]);
164 t2
= (int32_t)(xC5S3
* ip
[3*8]);
170 t1
= (int32_t)(xC4S4
* (A_
- C_
));
174 t1
= (int32_t)(xC4S4
* (B_
- D_
));
182 t1
= (int32_t)(xC4S4
* (ip
[0*8] + ip
[4*8]));
186 t1
= (int32_t)(xC4S4
* (ip
[0*8] - ip
[4*8]));
190 t1
= (int32_t)(xC2S6
* ip
[2*8]);
191 t2
= (int32_t)(xC6S2
* ip
[6*8]);
196 t1
= (int32_t)(xC6S2
* ip
[2*8]);
197 t2
= (int32_t)(xC2S6
* ip
[6*8]);
218 _Gd
+= IdctAdjustBeforeShift
;
219 _Add
+= IdctAdjustBeforeShift
;
220 _Ed
+= IdctAdjustBeforeShift
;
221 _Fd
+= IdctAdjustBeforeShift
;
223 /* Final sequence of operations over-write original inputs. */
225 ip
[0*8] = (_Gd
+ _Cd
) >> 4;
226 ip
[7*8] = (_Gd
- _Cd
) >> 4;
228 ip
[1*8] = (_Add
+ _Hd
) >> 4;
229 ip
[2*8] = (_Add
- _Hd
) >> 4;
231 ip
[3*8] = (_Ed
+ _Dd
) >> 4;
232 ip
[4*8] = (_Ed
- _Dd
) >> 4;
234 ip
[5*8] = (_Fd
+ _Bdd
) >> 4;
235 ip
[6*8] = (_Fd
- _Bdd
) >> 4;
237 dst
[0*stride
] = cm
[(_Gd
+ _Cd
) >> 4];
238 dst
[7*stride
] = cm
[(_Gd
- _Cd
) >> 4];
240 dst
[1*stride
] = cm
[(_Add
+ _Hd
) >> 4];
241 dst
[2*stride
] = cm
[(_Add
- _Hd
) >> 4];
243 dst
[3*stride
] = cm
[(_Ed
+ _Dd
) >> 4];
244 dst
[4*stride
] = cm
[(_Ed
- _Dd
) >> 4];
246 dst
[5*stride
] = cm
[(_Fd
+ _Bdd
) >> 4];
247 dst
[6*stride
] = cm
[(_Fd
- _Bdd
) >> 4];
249 dst
[0*stride
] = cm
[dst
[0*stride
] + ((_Gd
+ _Cd
) >> 4)];
250 dst
[7*stride
] = cm
[dst
[7*stride
] + ((_Gd
- _Cd
) >> 4)];
252 dst
[1*stride
] = cm
[dst
[1*stride
] + ((_Add
+ _Hd
) >> 4)];
253 dst
[2*stride
] = cm
[dst
[2*stride
] + ((_Add
- _Hd
) >> 4)];
255 dst
[3*stride
] = cm
[dst
[3*stride
] + ((_Ed
+ _Dd
) >> 4)];
256 dst
[4*stride
] = cm
[dst
[4*stride
] + ((_Ed
- _Dd
) >> 4)];
258 dst
[5*stride
] = cm
[dst
[5*stride
] + ((_Fd
+ _Bdd
) >> 4)];
259 dst
[6*stride
] = cm
[dst
[6*stride
] + ((_Fd
- _Bdd
) >> 4)];
271 ip
[7*8] = ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
280 dst
[7*stride
]= 128 + ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
283 int v
= ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
284 dst
[0*stride
] = cm
[dst
[0*stride
] + v
];
285 dst
[1*stride
] = cm
[dst
[1*stride
] + v
];
286 dst
[2*stride
] = cm
[dst
[2*stride
] + v
];
287 dst
[3*stride
] = cm
[dst
[3*stride
] + v
];
288 dst
[4*stride
] = cm
[dst
[4*stride
] + v
];
289 dst
[5*stride
] = cm
[dst
[5*stride
] + v
];
290 dst
[6*stride
] = cm
[dst
[6*stride
] + v
];
291 dst
[7*stride
] = cm
[dst
[7*stride
] + v
];
296 ip
++; /* next column */
301 void ff_vp3_idct_c(DCTELEM
*block
/* align 16*/){
302 idct(NULL
, 0, block
, 0);
305 void ff_vp3_idct_put_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
306 idct(dest
, line_size
, block
, 1);
309 void ff_vp3_idct_add_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
310 idct(dest
, line_size
, block
, 2);