2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "third_party/googletest/src/include/gtest/gtest.h"
17 #include "./vp9_rtcd.h"
18 #include "./vpx_config.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vpx/vpx_codec.h"
26 #include "vpx/vpx_integer.h"
27 #include "vpx_ports/mem.h"
29 using libvpx_test::ACMRandom
;
33 static int round(double x
) {
35 return static_cast<int>(ceil(x
- 0.5));
37 return static_cast<int>(floor(x
+ 0.5));
41 const int kNumCoeffs
= 1024;
42 const double kPi
= 3.141592653589793238462643383279502884;
43 void reference_32x32_dct_1d(const double in
[32], double out
[32]) {
44 const double kInvSqrt2
= 0.707106781186547524400844362104;
45 for (int k
= 0; k
< 32; k
++) {
47 for (int n
= 0; n
< 32; n
++)
48 out
[k
] += in
[n
] * cos(kPi
* (2 * n
+ 1) * k
/ 64.0);
50 out
[k
] = out
[k
] * kInvSqrt2
;
54 void reference_32x32_dct_2d(const int16_t input
[kNumCoeffs
],
55 double output
[kNumCoeffs
]) {
56 // First transform columns
57 for (int i
= 0; i
< 32; ++i
) {
58 double temp_in
[32], temp_out
[32];
59 for (int j
= 0; j
< 32; ++j
)
60 temp_in
[j
] = input
[j
*32 + i
];
61 reference_32x32_dct_1d(temp_in
, temp_out
);
62 for (int j
= 0; j
< 32; ++j
)
63 output
[j
* 32 + i
] = temp_out
[j
];
65 // Then transform rows
66 for (int i
= 0; i
< 32; ++i
) {
67 double temp_in
[32], temp_out
[32];
68 for (int j
= 0; j
< 32; ++j
)
69 temp_in
[j
] = output
[j
+ i
*32];
70 reference_32x32_dct_1d(temp_in
, temp_out
);
71 // Scale by some magic number
72 for (int j
= 0; j
< 32; ++j
)
73 output
[j
+ i
* 32] = temp_out
[j
] / 4;
77 typedef void (*FwdTxfmFunc
)(const int16_t *in
, tran_low_t
*out
, int stride
);
78 typedef void (*InvTxfmFunc
)(const tran_low_t
*in
, uint8_t *out
, int stride
);
80 typedef std::tr1::tuple
<FwdTxfmFunc
, InvTxfmFunc
, int, vpx_bit_depth_t
>
83 #if CONFIG_VP9_HIGHBITDEPTH
84 void idct32x32_10(const tran_low_t
*in
, uint8_t *out
, int stride
) {
85 vpx_highbd_idct32x32_1024_add_c(in
, out
, stride
, 10);
88 void idct32x32_12(const tran_low_t
*in
, uint8_t *out
, int stride
) {
89 vpx_highbd_idct32x32_1024_add_c(in
, out
, stride
, 12);
91 #endif // CONFIG_VP9_HIGHBITDEPTH
93 class Trans32x32Test
: public ::testing::TestWithParam
<Trans32x32Param
> {
95 virtual ~Trans32x32Test() {}
96 virtual void SetUp() {
97 fwd_txfm_
= GET_PARAM(0);
98 inv_txfm_
= GET_PARAM(1);
99 version_
= GET_PARAM(2); // 0: high precision forward transform
100 // 1: low precision version for rd loop
101 bit_depth_
= GET_PARAM(3);
102 mask_
= (1 << bit_depth_
) - 1;
105 virtual void TearDown() { libvpx_test::ClearSystemState(); }
109 vpx_bit_depth_t bit_depth_
;
111 FwdTxfmFunc fwd_txfm_
;
112 InvTxfmFunc inv_txfm_
;
115 TEST_P(Trans32x32Test
, AccuracyCheck
) {
116 ACMRandom
rnd(ACMRandom::DeterministicSeed());
117 uint32_t max_error
= 0;
118 int64_t total_error
= 0;
119 const int count_test_block
= 10000;
120 DECLARE_ALIGNED(16, int16_t, test_input_block
[kNumCoeffs
]);
121 DECLARE_ALIGNED(16, tran_low_t
, test_temp_block
[kNumCoeffs
]);
122 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
123 DECLARE_ALIGNED(16, uint8_t, src
[kNumCoeffs
]);
124 #if CONFIG_VP9_HIGHBITDEPTH
125 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
126 DECLARE_ALIGNED(16, uint16_t, src16
[kNumCoeffs
]);
129 for (int i
= 0; i
< count_test_block
; ++i
) {
130 // Initialize a test block with input range [-mask_, mask_].
131 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
132 if (bit_depth_
== VPX_BITS_8
) {
133 src
[j
] = rnd
.Rand8();
134 dst
[j
] = rnd
.Rand8();
135 test_input_block
[j
] = src
[j
] - dst
[j
];
136 #if CONFIG_VP9_HIGHBITDEPTH
138 src16
[j
] = rnd
.Rand16() & mask_
;
139 dst16
[j
] = rnd
.Rand16() & mask_
;
140 test_input_block
[j
] = src16
[j
] - dst16
[j
];
145 ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block
, test_temp_block
, 32));
146 if (bit_depth_
== VPX_BITS_8
) {
147 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block
, dst
, 32));
148 #if CONFIG_VP9_HIGHBITDEPTH
150 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block
,
151 CONVERT_TO_BYTEPTR(dst16
), 32));
155 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
156 #if CONFIG_VP9_HIGHBITDEPTH
157 const uint32_t diff
=
158 bit_depth_
== VPX_BITS_8
? dst
[j
] - src
[j
] : dst16
[j
] - src16
[j
];
160 const uint32_t diff
= dst
[j
] - src
[j
];
162 const uint32_t error
= diff
* diff
;
163 if (max_error
< error
)
165 total_error
+= error
;
174 EXPECT_GE(1u << 2 * (bit_depth_
- 8), max_error
)
175 << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
177 EXPECT_GE(count_test_block
<< 2 * (bit_depth_
- 8), total_error
)
178 << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
181 TEST_P(Trans32x32Test
, CoeffCheck
) {
182 ACMRandom
rnd(ACMRandom::DeterministicSeed());
183 const int count_test_block
= 1000;
185 DECLARE_ALIGNED(16, int16_t, input_block
[kNumCoeffs
]);
186 DECLARE_ALIGNED(16, tran_low_t
, output_ref_block
[kNumCoeffs
]);
187 DECLARE_ALIGNED(16, tran_low_t
, output_block
[kNumCoeffs
]);
189 for (int i
= 0; i
< count_test_block
; ++i
) {
190 for (int j
= 0; j
< kNumCoeffs
; ++j
)
191 input_block
[j
] = (rnd
.Rand16() & mask_
) - (rnd
.Rand16() & mask_
);
193 const int stride
= 32;
194 vpx_fdct32x32_c(input_block
, output_ref_block
, stride
);
195 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block
, output_block
, stride
));
198 for (int j
= 0; j
< kNumCoeffs
; ++j
)
199 EXPECT_EQ(output_block
[j
], output_ref_block
[j
])
200 << "Error: 32x32 FDCT versions have mismatched coefficients";
202 for (int j
= 0; j
< kNumCoeffs
; ++j
)
203 EXPECT_GE(6, abs(output_block
[j
] - output_ref_block
[j
]))
204 << "Error: 32x32 FDCT rd has mismatched coefficients";
209 TEST_P(Trans32x32Test
, MemCheck
) {
210 ACMRandom
rnd(ACMRandom::DeterministicSeed());
211 const int count_test_block
= 2000;
213 DECLARE_ALIGNED(16, int16_t, input_extreme_block
[kNumCoeffs
]);
214 DECLARE_ALIGNED(16, tran_low_t
, output_ref_block
[kNumCoeffs
]);
215 DECLARE_ALIGNED(16, tran_low_t
, output_block
[kNumCoeffs
]);
217 for (int i
= 0; i
< count_test_block
; ++i
) {
218 // Initialize a test block with input range [-mask_, mask_].
219 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
220 input_extreme_block
[j
] = rnd
.Rand8() & 1 ? mask_
: -mask_
;
223 for (int j
= 0; j
< kNumCoeffs
; ++j
)
224 input_extreme_block
[j
] = mask_
;
226 for (int j
= 0; j
< kNumCoeffs
; ++j
)
227 input_extreme_block
[j
] = -mask_
;
230 const int stride
= 32;
231 vpx_fdct32x32_c(input_extreme_block
, output_ref_block
, stride
);
232 ASM_REGISTER_STATE_CHECK(
233 fwd_txfm_(input_extreme_block
, output_block
, stride
));
235 // The minimum quant value is 4.
236 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
238 EXPECT_EQ(output_block
[j
], output_ref_block
[j
])
239 << "Error: 32x32 FDCT versions have mismatched coefficients";
241 EXPECT_GE(6, abs(output_block
[j
] - output_ref_block
[j
]))
242 << "Error: 32x32 FDCT rd has mismatched coefficients";
244 EXPECT_GE(4 * DCT_MAX_VALUE
<< (bit_depth_
- 8), abs(output_ref_block
[j
]))
245 << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
246 EXPECT_GE(4 * DCT_MAX_VALUE
<< (bit_depth_
- 8), abs(output_block
[j
]))
247 << "Error: 32x32 FDCT has coefficient larger than "
248 << "4*DCT_MAX_VALUE";
253 TEST_P(Trans32x32Test
, InverseAccuracy
) {
254 ACMRandom
rnd(ACMRandom::DeterministicSeed());
255 const int count_test_block
= 1000;
256 DECLARE_ALIGNED(16, int16_t, in
[kNumCoeffs
]);
257 DECLARE_ALIGNED(16, tran_low_t
, coeff
[kNumCoeffs
]);
258 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
259 DECLARE_ALIGNED(16, uint8_t, src
[kNumCoeffs
]);
260 #if CONFIG_VP9_HIGHBITDEPTH
261 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
262 DECLARE_ALIGNED(16, uint16_t, src16
[kNumCoeffs
]);
265 for (int i
= 0; i
< count_test_block
; ++i
) {
266 double out_r
[kNumCoeffs
];
268 // Initialize a test block with input range [-255, 255]
269 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
270 if (bit_depth_
== VPX_BITS_8
) {
271 src
[j
] = rnd
.Rand8();
272 dst
[j
] = rnd
.Rand8();
273 in
[j
] = src
[j
] - dst
[j
];
274 #if CONFIG_VP9_HIGHBITDEPTH
276 src16
[j
] = rnd
.Rand16() & mask_
;
277 dst16
[j
] = rnd
.Rand16() & mask_
;
278 in
[j
] = src16
[j
] - dst16
[j
];
283 reference_32x32_dct_2d(in
, out_r
);
284 for (int j
= 0; j
< kNumCoeffs
; ++j
)
285 coeff
[j
] = static_cast<tran_low_t
>(round(out_r
[j
]));
286 if (bit_depth_
== VPX_BITS_8
) {
287 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff
, dst
, 32));
288 #if CONFIG_VP9_HIGHBITDEPTH
290 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff
, CONVERT_TO_BYTEPTR(dst16
), 32));
293 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
294 #if CONFIG_VP9_HIGHBITDEPTH
296 bit_depth_
== VPX_BITS_8
? dst
[j
] - src
[j
] : dst16
[j
] - src16
[j
];
298 const int diff
= dst
[j
] - src
[j
];
300 const int error
= diff
* diff
;
302 << "Error: 32x32 IDCT has error " << error
303 << " at index " << j
;
308 class PartialTrans32x32Test
309 : public ::testing::TestWithParam
<
310 std::tr1::tuple
<FwdTxfmFunc
, vpx_bit_depth_t
> > {
312 virtual ~PartialTrans32x32Test() {}
313 virtual void SetUp() {
314 fwd_txfm_
= GET_PARAM(0);
315 bit_depth_
= GET_PARAM(1);
318 virtual void TearDown() { libvpx_test::ClearSystemState(); }
321 vpx_bit_depth_t bit_depth_
;
322 FwdTxfmFunc fwd_txfm_
;
325 TEST_P(PartialTrans32x32Test
, Extremes
) {
326 #if CONFIG_VP9_HIGHBITDEPTH
327 const int16_t maxval
=
328 static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_
));
330 const int16_t maxval
= 255;
332 const int minval
= -maxval
;
333 DECLARE_ALIGNED(16, int16_t, input
[kNumCoeffs
]);
334 DECLARE_ALIGNED(16, tran_low_t
, output
[kNumCoeffs
]);
336 for (int i
= 0; i
< kNumCoeffs
; ++i
) input
[i
] = maxval
;
338 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input
, output
, 32));
339 EXPECT_EQ((maxval
* kNumCoeffs
) >> 3, output
[0]);
341 for (int i
= 0; i
< kNumCoeffs
; ++i
) input
[i
] = minval
;
343 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input
, output
, 32));
344 EXPECT_EQ((minval
* kNumCoeffs
) >> 3, output
[0]);
347 TEST_P(PartialTrans32x32Test
, Random
) {
348 #if CONFIG_VP9_HIGHBITDEPTH
349 const int16_t maxval
=
350 static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_
));
352 const int16_t maxval
= 255;
354 DECLARE_ALIGNED(16, int16_t, input
[kNumCoeffs
]);
355 DECLARE_ALIGNED(16, tran_low_t
, output
[kNumCoeffs
]);
356 ACMRandom
rnd(ACMRandom::DeterministicSeed());
359 for (int i
= 0; i
< kNumCoeffs
; ++i
) {
360 const int val
= (i
& 1) ? -rnd(maxval
+ 1) : rnd(maxval
+ 1);
365 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input
, output
, 32));
366 EXPECT_EQ(sum
>> 3, output
[0]);
369 using std::tr1::make_tuple
;
371 #if CONFIG_VP9_HIGHBITDEPTH
372 INSTANTIATE_TEST_CASE_P(
375 make_tuple(&vpx_highbd_fdct32x32_c
,
376 &idct32x32_10
, 0, VPX_BITS_10
),
377 make_tuple(&vpx_highbd_fdct32x32_rd_c
,
378 &idct32x32_10
, 1, VPX_BITS_10
),
379 make_tuple(&vpx_highbd_fdct32x32_c
,
380 &idct32x32_12
, 0, VPX_BITS_12
),
381 make_tuple(&vpx_highbd_fdct32x32_rd_c
,
382 &idct32x32_12
, 1, VPX_BITS_12
),
383 make_tuple(&vpx_fdct32x32_c
,
384 &vpx_idct32x32_1024_add_c
, 0, VPX_BITS_8
),
385 make_tuple(&vpx_fdct32x32_rd_c
,
386 &vpx_idct32x32_1024_add_c
, 1, VPX_BITS_8
)));
387 INSTANTIATE_TEST_CASE_P(
388 C
, PartialTrans32x32Test
,
389 ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c
, VPX_BITS_8
),
390 make_tuple(&vpx_highbd_fdct32x32_1_c
, VPX_BITS_10
),
391 make_tuple(&vpx_highbd_fdct32x32_1_c
, VPX_BITS_12
)));
393 INSTANTIATE_TEST_CASE_P(
396 make_tuple(&vpx_fdct32x32_c
,
397 &vpx_idct32x32_1024_add_c
, 0, VPX_BITS_8
),
398 make_tuple(&vpx_fdct32x32_rd_c
,
399 &vpx_idct32x32_1024_add_c
, 1, VPX_BITS_8
)));
400 INSTANTIATE_TEST_CASE_P(C
, PartialTrans32x32Test
,
401 ::testing::Values(make_tuple(&vpx_fdct32x32_1_c
,
403 #endif // CONFIG_VP9_HIGHBITDEPTH
405 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
406 INSTANTIATE_TEST_CASE_P(
407 NEON
, Trans32x32Test
,
409 make_tuple(&vpx_fdct32x32_c
,
410 &vpx_idct32x32_1024_add_neon
, 0, VPX_BITS_8
),
411 make_tuple(&vpx_fdct32x32_rd_c
,
412 &vpx_idct32x32_1024_add_neon
, 1, VPX_BITS_8
)));
413 #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
415 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
416 INSTANTIATE_TEST_CASE_P(
417 SSE2
, Trans32x32Test
,
419 make_tuple(&vpx_fdct32x32_sse2
,
420 &vpx_idct32x32_1024_add_sse2
, 0, VPX_BITS_8
),
421 make_tuple(&vpx_fdct32x32_rd_sse2
,
422 &vpx_idct32x32_1024_add_sse2
, 1, VPX_BITS_8
)));
423 INSTANTIATE_TEST_CASE_P(SSE2
, PartialTrans32x32Test
,
424 ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2
,
426 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
428 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
429 INSTANTIATE_TEST_CASE_P(
430 SSE2
, Trans32x32Test
,
432 make_tuple(&vpx_highbd_fdct32x32_sse2
, &idct32x32_10
, 0, VPX_BITS_10
),
433 make_tuple(&vpx_highbd_fdct32x32_rd_sse2
, &idct32x32_10
, 1,
435 make_tuple(&vpx_highbd_fdct32x32_sse2
, &idct32x32_12
, 0, VPX_BITS_12
),
436 make_tuple(&vpx_highbd_fdct32x32_rd_sse2
, &idct32x32_12
, 1,
438 make_tuple(&vpx_fdct32x32_sse2
, &vpx_idct32x32_1024_add_c
, 0,
440 make_tuple(&vpx_fdct32x32_rd_sse2
, &vpx_idct32x32_1024_add_c
, 1,
442 INSTANTIATE_TEST_CASE_P(SSE2
, PartialTrans32x32Test
,
443 ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2
,
445 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
447 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
448 INSTANTIATE_TEST_CASE_P(
449 AVX2
, Trans32x32Test
,
451 make_tuple(&vpx_fdct32x32_avx2
,
452 &vpx_idct32x32_1024_add_sse2
, 0, VPX_BITS_8
),
453 make_tuple(&vpx_fdct32x32_rd_avx2
,
454 &vpx_idct32x32_1024_add_sse2
, 1, VPX_BITS_8
)));
455 #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
457 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
458 INSTANTIATE_TEST_CASE_P(
461 make_tuple(&vpx_fdct32x32_msa
,
462 &vpx_idct32x32_1024_add_msa
, 0, VPX_BITS_8
),
463 make_tuple(&vpx_fdct32x32_rd_msa
,
464 &vpx_idct32x32_1024_add_msa
, 1, VPX_BITS_8
)));
465 INSTANTIATE_TEST_CASE_P(MSA
, PartialTrans32x32Test
,
466 ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa
,
468 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE