2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "third_party/googletest/src/include/gtest/gtest.h"
16 #include "vpx_ports/mem.h"
19 #include "vp9/common/vp9_entropy.h"
21 void vp9_short_idct16x16_add_c(short *input
, uint8_t *output
, int pitch
);
24 #include "acm_random.h"
25 #include "vpx/vpx_integer.h"
27 using libvpx_test::ACMRandom
;
32 static int round(double x
) {
34 return (int)ceil(x
- 0.5);
36 return (int)floor(x
+ 0.5);
40 const double PI
= 3.1415926535898;
41 void reference2_16x16_idct_2d(double *input
, double *output
) {
43 for (int l
= 0; l
< 16; ++l
) {
44 for (int k
= 0; k
< 16; ++k
) {
46 for (int i
= 0; i
< 16; ++i
) {
47 for (int j
= 0; j
< 16; ++j
) {
48 x
=cos(PI
*j
*(l
+0.5)/16.0)*cos(PI
*i
*(k
+0.5)/16.0)*input
[i
*16+j
]/256;
62 static const double C1
= 0.995184726672197;
63 static const double C2
= 0.98078528040323;
64 static const double C3
= 0.956940335732209;
65 static const double C4
= 0.923879532511287;
66 static const double C5
= 0.881921264348355;
67 static const double C6
= 0.831469612302545;
68 static const double C7
= 0.773010453362737;
69 static const double C8
= 0.707106781186548;
70 static const double C9
= 0.634393284163646;
71 static const double C10
= 0.555570233019602;
72 static const double C11
= 0.471396736825998;
73 static const double C12
= 0.38268343236509;
74 static const double C13
= 0.290284677254462;
75 static const double C14
= 0.195090322016128;
76 static const double C15
= 0.098017140329561;
78 static void butterfly_16x16_dct_1d(double input
[16], double output
[16]) {
80 double intermediate
[16];
84 step
[ 0] = input
[0] + input
[15];
85 step
[ 1] = input
[1] + input
[14];
86 step
[ 2] = input
[2] + input
[13];
87 step
[ 3] = input
[3] + input
[12];
88 step
[ 4] = input
[4] + input
[11];
89 step
[ 5] = input
[5] + input
[10];
90 step
[ 6] = input
[6] + input
[ 9];
91 step
[ 7] = input
[7] + input
[ 8];
92 step
[ 8] = input
[7] - input
[ 8];
93 step
[ 9] = input
[6] - input
[ 9];
94 step
[10] = input
[5] - input
[10];
95 step
[11] = input
[4] - input
[11];
96 step
[12] = input
[3] - input
[12];
97 step
[13] = input
[2] - input
[13];
98 step
[14] = input
[1] - input
[14];
99 step
[15] = input
[0] - input
[15];
102 output
[0] = step
[0] + step
[7];
103 output
[1] = step
[1] + step
[6];
104 output
[2] = step
[2] + step
[5];
105 output
[3] = step
[3] + step
[4];
106 output
[4] = step
[3] - step
[4];
107 output
[5] = step
[2] - step
[5];
108 output
[6] = step
[1] - step
[6];
109 output
[7] = step
[0] - step
[7];
113 output
[ 8] = temp1
+ temp2
;
115 temp1
= step
[ 9]*C11
;
117 output
[ 9] = temp1
- temp2
;
120 temp2
= step
[13]*C13
;
121 output
[10] = temp1
+ temp2
;
123 temp1
= step
[11]*C15
;
125 output
[11] = temp1
- temp2
;
128 temp2
= step
[12]*C15
;
129 output
[12] = temp2
+ temp1
;
131 temp1
= step
[10]*C13
;
133 output
[13] = temp2
- temp1
;
136 temp2
= step
[14]*C11
;
137 output
[14] = temp2
+ temp1
;
141 output
[15] = temp2
- temp1
;
144 step
[ 0] = output
[0] + output
[3];
145 step
[ 1] = output
[1] + output
[2];
146 step
[ 2] = output
[1] - output
[2];
147 step
[ 3] = output
[0] - output
[3];
149 temp1
= output
[4]*C14
;
150 temp2
= output
[7]*C2
;
151 step
[ 4] = temp1
+ temp2
;
153 temp1
= output
[5]*C10
;
154 temp2
= output
[6]*C6
;
155 step
[ 5] = temp1
+ temp2
;
157 temp1
= output
[5]*C6
;
158 temp2
= output
[6]*C10
;
159 step
[ 6] = temp2
- temp1
;
161 temp1
= output
[4]*C2
;
162 temp2
= output
[7]*C14
;
163 step
[ 7] = temp2
- temp1
;
165 step
[ 8] = output
[ 8] + output
[11];
166 step
[ 9] = output
[ 9] + output
[10];
167 step
[10] = output
[ 9] - output
[10];
168 step
[11] = output
[ 8] - output
[11];
170 step
[12] = output
[12] + output
[15];
171 step
[13] = output
[13] + output
[14];
172 step
[14] = output
[13] - output
[14];
173 step
[15] = output
[12] - output
[15];
176 output
[ 0] = (step
[ 0] + step
[ 1]);
177 output
[ 8] = (step
[ 0] - step
[ 1]);
181 temp1
= temp1
+ temp2
;
182 output
[ 4] = 2*(temp1
*C8
);
186 temp1
= temp2
- temp1
;
187 output
[12] = 2*(temp1
*C8
);
189 output
[ 2] = 2*((step
[4] + step
[ 5])*C8
);
190 output
[14] = 2*((step
[7] - step
[ 6])*C8
);
192 temp1
= step
[4] - step
[5];
193 temp2
= step
[6] + step
[7];
194 output
[ 6] = (temp1
+ temp2
);
195 output
[10] = (temp1
- temp2
);
197 intermediate
[8] = step
[8] + step
[14];
198 intermediate
[9] = step
[9] + step
[15];
200 temp1
= intermediate
[8]*C12
;
201 temp2
= intermediate
[9]*C4
;
202 temp1
= temp1
- temp2
;
203 output
[3] = 2*(temp1
*C8
);
205 temp1
= intermediate
[8]*C4
;
206 temp2
= intermediate
[9]*C12
;
207 temp1
= temp2
+ temp1
;
208 output
[13] = 2*(temp1
*C8
);
210 output
[ 9] = 2*((step
[10] + step
[11])*C8
);
212 intermediate
[11] = step
[10] - step
[11];
213 intermediate
[12] = step
[12] + step
[13];
214 intermediate
[13] = step
[12] - step
[13];
215 intermediate
[14] = step
[ 8] - step
[14];
216 intermediate
[15] = step
[ 9] - step
[15];
218 output
[15] = (intermediate
[11] + intermediate
[12]);
219 output
[ 1] = -(intermediate
[11] - intermediate
[12]);
221 output
[ 7] = 2*(intermediate
[13]*C8
);
223 temp1
= intermediate
[14]*C12
;
224 temp2
= intermediate
[15]*C4
;
225 temp1
= temp1
- temp2
;
226 output
[11] = -2*(temp1
*C8
);
228 temp1
= intermediate
[14]*C4
;
229 temp2
= intermediate
[15]*C12
;
230 temp1
= temp2
+ temp1
;
231 output
[ 5] = 2*(temp1
*C8
);
234 static void reference_16x16_dct_1d(double in
[16], double out
[16]) {
235 const double kPi
= 3.141592653589793238462643383279502884;
236 const double kInvSqrt2
= 0.707106781186547524400844362104;
237 for (int k
= 0; k
< 16; k
++) {
239 for (int n
= 0; n
< 16; n
++)
240 out
[k
] += in
[n
]*cos(kPi
*(2*n
+1)*k
/32.0);
242 out
[k
] = out
[k
]*kInvSqrt2
;
246 void reference_16x16_dct_2d(int16_t input
[16*16], double output
[16*16]) {
247 // First transform columns
248 for (int i
= 0; i
< 16; ++i
) {
249 double temp_in
[16], temp_out
[16];
250 for (int j
= 0; j
< 16; ++j
)
251 temp_in
[j
] = input
[j
*16 + i
];
252 butterfly_16x16_dct_1d(temp_in
, temp_out
);
253 for (int j
= 0; j
< 16; ++j
)
254 output
[j
*16 + i
] = temp_out
[j
];
256 // Then transform rows
257 for (int i
= 0; i
< 16; ++i
) {
258 double temp_in
[16], temp_out
[16];
259 for (int j
= 0; j
< 16; ++j
)
260 temp_in
[j
] = output
[j
+ i
*16];
261 butterfly_16x16_dct_1d(temp_in
, temp_out
);
262 // Scale by some magic number
263 for (int j
= 0; j
< 16; ++j
)
264 output
[j
+ i
*16] = temp_out
[j
]/2;
268 void fdct16x16(int16_t *in
, int16_t *out
, uint8_t* /*dst*/,
269 int stride
, int /*tx_type*/) {
270 vp9_short_fdct16x16_c(in
, out
, stride
);
272 void idct16x16_add(int16_t* /*in*/, int16_t *out
, uint8_t *dst
,
273 int stride
, int /*tx_type*/) {
274 vp9_short_idct16x16_add_c(out
, dst
, stride
>> 1);
276 void fht16x16(int16_t *in
, int16_t *out
, uint8_t* /*dst*/,
277 int stride
, int tx_type
) {
278 // FIXME(jingning): need to test both SSE2 and c
280 vp9_short_fht16x16_sse2(in
, out
, stride
>> 1, tx_type
);
282 vp9_short_fht16x16_c(in
, out
, stride
>> 1, tx_type
);
285 void iht16x16_add(int16_t* /*in*/, int16_t *out
, uint8_t *dst
,
286 int stride
, int tx_type
) {
287 vp9_short_iht16x16_add_c(out
, dst
, stride
>> 1, tx_type
);
290 class FwdTrans16x16Test
: public ::testing::TestWithParam
<int> {
292 virtual ~FwdTrans16x16Test() {}
294 virtual void SetUp() {
295 tx_type_
= GetParam();
297 fwd_txfm
= fdct16x16
;
298 inv_txfm
= idct16x16_add
;
301 inv_txfm
= iht16x16_add
;
306 void RunFwdTxfm(int16_t *in
, int16_t *out
, uint8_t *dst
,
307 int stride
, int tx_type
) {
308 (*fwd_txfm
)(in
, out
, dst
, stride
, tx_type
);
310 void RunInvTxfm(int16_t *in
, int16_t *out
, uint8_t *dst
,
311 int stride
, int tx_type
) {
312 (*inv_txfm
)(in
, out
, dst
, stride
, tx_type
);
316 void (*fwd_txfm
)(int16_t*, int16_t*, uint8_t*, int, int);
317 void (*inv_txfm
)(int16_t*, int16_t*, uint8_t*, int, int);
320 TEST_P(FwdTrans16x16Test
, AccuracyCheck
) {
321 ACMRandom
rnd(ACMRandom::DeterministicSeed());
323 double total_error
= 0;
324 const int count_test_block
= 10000;
325 for (int i
= 0; i
< count_test_block
; ++i
) {
326 DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block
, 256);
327 DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block
, 256);
328 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst
, 256);
329 DECLARE_ALIGNED_ARRAY(16, uint8_t, src
, 256);
331 for (int j
= 0; j
< 256; ++j
) {
332 src
[j
] = rnd
.Rand8();
333 dst
[j
] = rnd
.Rand8();
334 // Initialize a test block with input range [-255, 255].
335 test_input_block
[j
] = src
[j
] - dst
[j
];
338 const int pitch
= 32;
339 RunFwdTxfm(test_input_block
, test_temp_block
, dst
, pitch
, tx_type_
);
340 RunInvTxfm(test_input_block
, test_temp_block
, dst
, pitch
, tx_type_
);
342 for (int j
= 0; j
< 256; ++j
) {
343 const int diff
= dst
[j
] - src
[j
];
344 const int error
= diff
* diff
;
345 if (max_error
< error
)
347 total_error
+= error
;
351 EXPECT_GE(1, max_error
)
352 << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
354 EXPECT_GE(count_test_block
, total_error
)
355 << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
358 TEST_P(FwdTrans16x16Test
, CoeffSizeCheck
) {
359 ACMRandom
rnd(ACMRandom::DeterministicSeed());
360 const int count_test_block
= 1000;
361 for (int i
= 0; i
< count_test_block
; ++i
) {
362 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block
, 256);
363 DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block
, 256);
364 DECLARE_ALIGNED_ARRAY(16, int16_t, output_block
, 256);
365 DECLARE_ALIGNED_ARRAY(16, int16_t, output_extreme_block
, 256);
366 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst
, 256);
368 // Initialize a test block with input range [-255, 255].
369 for (int j
= 0; j
< 256; ++j
) {
370 input_block
[j
] = rnd
.Rand8() - rnd
.Rand8();
371 input_extreme_block
[j
] = rnd
.Rand8() % 2 ? 255 : -255;
374 for (int j
= 0; j
< 256; ++j
)
375 input_extreme_block
[j
] = 255;
377 const int pitch
= 32;
378 RunFwdTxfm(input_block
, output_block
, dst
, pitch
, tx_type_
);
379 RunFwdTxfm(input_extreme_block
, output_extreme_block
, dst
, pitch
, tx_type_
);
381 // The minimum quant value is 4.
382 for (int j
= 0; j
< 256; ++j
) {
383 EXPECT_GE(4 * DCT_MAX_VALUE
, abs(output_block
[j
]))
384 << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
385 EXPECT_GE(4 * DCT_MAX_VALUE
, abs(output_extreme_block
[j
]))
386 << "Error: 16x16 FDCT extreme has coefficient larger "
387 << "than 4*DCT_MAX_VALUE";
392 INSTANTIATE_TEST_CASE_P(VP9
, FwdTrans16x16Test
, ::testing::Range(0, 4));
394 TEST(VP9Idct16x16Test
, AccuracyCheck
) {
395 ACMRandom
rnd(ACMRandom::DeterministicSeed());
396 const int count_test_block
= 1000;
397 for (int i
= 0; i
< count_test_block
; ++i
) {
398 int16_t in
[256], coeff
[256];
399 uint8_t dst
[256], src
[256];
402 for (int j
= 0; j
< 256; ++j
) {
403 src
[j
] = rnd
.Rand8();
404 dst
[j
] = rnd
.Rand8();
406 // Initialize a test block with input range [-255, 255].
407 for (int j
= 0; j
< 256; ++j
)
408 in
[j
] = src
[j
] - dst
[j
];
410 reference_16x16_dct_2d(in
, out_r
);
411 for (int j
= 0; j
< 256; j
++)
412 coeff
[j
] = round(out_r
[j
]);
413 vp9_short_idct16x16_add_c(coeff
, dst
, 16);
414 for (int j
= 0; j
< 256; ++j
) {
415 const int diff
= dst
[j
] - src
[j
];
416 const int error
= diff
* diff
;
418 << "Error: 16x16 IDCT has error " << error
419 << " at index " << j
;