Merge "remove mmx variance functions"
[aom.git] / test / dct32x32_test.cc
blob1cbac5c636ce28158e63c918d3f6bea3e279e80d
1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
15 #include "third_party/googletest/src/include/gtest/gtest.h"
17 #include "./vp9_rtcd.h"
18 #include "./vpx_config.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vpx/vpx_codec.h"
26 #include "vpx/vpx_integer.h"
27 #include "vpx_ports/mem.h"
29 using libvpx_test::ACMRandom;
31 namespace {
32 #ifdef _MSC_VER
33 static int round(double x) {
34 if (x < 0)
35 return static_cast<int>(ceil(x - 0.5));
36 else
37 return static_cast<int>(floor(x + 0.5));
39 #endif
41 const int kNumCoeffs = 1024;
42 const double kPi = 3.141592653589793238462643383279502884;
43 void reference_32x32_dct_1d(const double in[32], double out[32]) {
44 const double kInvSqrt2 = 0.707106781186547524400844362104;
45 for (int k = 0; k < 32; k++) {
46 out[k] = 0.0;
47 for (int n = 0; n < 32; n++)
48 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
49 if (k == 0)
50 out[k] = out[k] * kInvSqrt2;
54 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
55 double output[kNumCoeffs]) {
56 // First transform columns
57 for (int i = 0; i < 32; ++i) {
58 double temp_in[32], temp_out[32];
59 for (int j = 0; j < 32; ++j)
60 temp_in[j] = input[j*32 + i];
61 reference_32x32_dct_1d(temp_in, temp_out);
62 for (int j = 0; j < 32; ++j)
63 output[j * 32 + i] = temp_out[j];
65 // Then transform rows
66 for (int i = 0; i < 32; ++i) {
67 double temp_in[32], temp_out[32];
68 for (int j = 0; j < 32; ++j)
69 temp_in[j] = output[j + i*32];
70 reference_32x32_dct_1d(temp_in, temp_out);
71 // Scale by some magic number
72 for (int j = 0; j < 32; ++j)
73 output[j + i * 32] = temp_out[j] / 4;
77 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
78 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
80 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
81 Trans32x32Param;
83 #if CONFIG_VP9_HIGHBITDEPTH
84 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
85 vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
88 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
89 vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
91 #endif // CONFIG_VP9_HIGHBITDEPTH
93 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
94 public:
95 virtual ~Trans32x32Test() {}
96 virtual void SetUp() {
97 fwd_txfm_ = GET_PARAM(0);
98 inv_txfm_ = GET_PARAM(1);
99 version_ = GET_PARAM(2); // 0: high precision forward transform
100 // 1: low precision version for rd loop
101 bit_depth_ = GET_PARAM(3);
102 mask_ = (1 << bit_depth_) - 1;
105 virtual void TearDown() { libvpx_test::ClearSystemState(); }
107 protected:
108 int version_;
109 vpx_bit_depth_t bit_depth_;
110 int mask_;
111 FwdTxfmFunc fwd_txfm_;
112 InvTxfmFunc inv_txfm_;
115 TEST_P(Trans32x32Test, AccuracyCheck) {
116 ACMRandom rnd(ACMRandom::DeterministicSeed());
117 uint32_t max_error = 0;
118 int64_t total_error = 0;
119 const int count_test_block = 10000;
120 DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
121 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
122 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
123 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
124 #if CONFIG_VP9_HIGHBITDEPTH
125 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
126 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
127 #endif
129 for (int i = 0; i < count_test_block; ++i) {
130 // Initialize a test block with input range [-mask_, mask_].
131 for (int j = 0; j < kNumCoeffs; ++j) {
132 if (bit_depth_ == VPX_BITS_8) {
133 src[j] = rnd.Rand8();
134 dst[j] = rnd.Rand8();
135 test_input_block[j] = src[j] - dst[j];
136 #if CONFIG_VP9_HIGHBITDEPTH
137 } else {
138 src16[j] = rnd.Rand16() & mask_;
139 dst16[j] = rnd.Rand16() & mask_;
140 test_input_block[j] = src16[j] - dst16[j];
141 #endif
145 ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
146 if (bit_depth_ == VPX_BITS_8) {
147 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
148 #if CONFIG_VP9_HIGHBITDEPTH
149 } else {
150 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
151 CONVERT_TO_BYTEPTR(dst16), 32));
152 #endif
155 for (int j = 0; j < kNumCoeffs; ++j) {
156 #if CONFIG_VP9_HIGHBITDEPTH
157 const uint32_t diff =
158 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
159 #else
160 const uint32_t diff = dst[j] - src[j];
161 #endif
162 const uint32_t error = diff * diff;
163 if (max_error < error)
164 max_error = error;
165 total_error += error;
169 if (version_ == 1) {
170 max_error /= 2;
171 total_error /= 45;
174 EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
175 << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
177 EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
178 << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
181 TEST_P(Trans32x32Test, CoeffCheck) {
182 ACMRandom rnd(ACMRandom::DeterministicSeed());
183 const int count_test_block = 1000;
185 DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
186 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
187 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
189 for (int i = 0; i < count_test_block; ++i) {
190 for (int j = 0; j < kNumCoeffs; ++j)
191 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
193 const int stride = 32;
194 vpx_fdct32x32_c(input_block, output_ref_block, stride);
195 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
197 if (version_ == 0) {
198 for (int j = 0; j < kNumCoeffs; ++j)
199 EXPECT_EQ(output_block[j], output_ref_block[j])
200 << "Error: 32x32 FDCT versions have mismatched coefficients";
201 } else {
202 for (int j = 0; j < kNumCoeffs; ++j)
203 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
204 << "Error: 32x32 FDCT rd has mismatched coefficients";
209 TEST_P(Trans32x32Test, MemCheck) {
210 ACMRandom rnd(ACMRandom::DeterministicSeed());
211 const int count_test_block = 2000;
213 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
214 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
215 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
217 for (int i = 0; i < count_test_block; ++i) {
218 // Initialize a test block with input range [-mask_, mask_].
219 for (int j = 0; j < kNumCoeffs; ++j) {
220 input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
222 if (i == 0) {
223 for (int j = 0; j < kNumCoeffs; ++j)
224 input_extreme_block[j] = mask_;
225 } else if (i == 1) {
226 for (int j = 0; j < kNumCoeffs; ++j)
227 input_extreme_block[j] = -mask_;
230 const int stride = 32;
231 vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
232 ASM_REGISTER_STATE_CHECK(
233 fwd_txfm_(input_extreme_block, output_block, stride));
235 // The minimum quant value is 4.
236 for (int j = 0; j < kNumCoeffs; ++j) {
237 if (version_ == 0) {
238 EXPECT_EQ(output_block[j], output_ref_block[j])
239 << "Error: 32x32 FDCT versions have mismatched coefficients";
240 } else {
241 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
242 << "Error: 32x32 FDCT rd has mismatched coefficients";
244 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
245 << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
246 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
247 << "Error: 32x32 FDCT has coefficient larger than "
248 << "4*DCT_MAX_VALUE";
253 TEST_P(Trans32x32Test, InverseAccuracy) {
254 ACMRandom rnd(ACMRandom::DeterministicSeed());
255 const int count_test_block = 1000;
256 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
257 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
258 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
259 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
260 #if CONFIG_VP9_HIGHBITDEPTH
261 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
262 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
263 #endif
265 for (int i = 0; i < count_test_block; ++i) {
266 double out_r[kNumCoeffs];
268 // Initialize a test block with input range [-255, 255]
269 for (int j = 0; j < kNumCoeffs; ++j) {
270 if (bit_depth_ == VPX_BITS_8) {
271 src[j] = rnd.Rand8();
272 dst[j] = rnd.Rand8();
273 in[j] = src[j] - dst[j];
274 #if CONFIG_VP9_HIGHBITDEPTH
275 } else {
276 src16[j] = rnd.Rand16() & mask_;
277 dst16[j] = rnd.Rand16() & mask_;
278 in[j] = src16[j] - dst16[j];
279 #endif
283 reference_32x32_dct_2d(in, out_r);
284 for (int j = 0; j < kNumCoeffs; ++j)
285 coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
286 if (bit_depth_ == VPX_BITS_8) {
287 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
288 #if CONFIG_VP9_HIGHBITDEPTH
289 } else {
290 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
291 #endif
293 for (int j = 0; j < kNumCoeffs; ++j) {
294 #if CONFIG_VP9_HIGHBITDEPTH
295 const int diff =
296 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
297 #else
298 const int diff = dst[j] - src[j];
299 #endif
300 const int error = diff * diff;
301 EXPECT_GE(1, error)
302 << "Error: 32x32 IDCT has error " << error
303 << " at index " << j;
308 class PartialTrans32x32Test
309 : public ::testing::TestWithParam<
310 std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
311 public:
312 virtual ~PartialTrans32x32Test() {}
313 virtual void SetUp() {
314 fwd_txfm_ = GET_PARAM(0);
315 bit_depth_ = GET_PARAM(1);
318 virtual void TearDown() { libvpx_test::ClearSystemState(); }
320 protected:
321 vpx_bit_depth_t bit_depth_;
322 FwdTxfmFunc fwd_txfm_;
325 TEST_P(PartialTrans32x32Test, Extremes) {
326 #if CONFIG_VP9_HIGHBITDEPTH
327 const int16_t maxval =
328 static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
329 #else
330 const int16_t maxval = 255;
331 #endif
332 const int minval = -maxval;
333 DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
334 DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
336 for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
337 output[0] = 0;
338 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
339 EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
341 for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
342 output[0] = 0;
343 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
344 EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
347 TEST_P(PartialTrans32x32Test, Random) {
348 #if CONFIG_VP9_HIGHBITDEPTH
349 const int16_t maxval =
350 static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
351 #else
352 const int16_t maxval = 255;
353 #endif
354 DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
355 DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
356 ACMRandom rnd(ACMRandom::DeterministicSeed());
358 int sum = 0;
359 for (int i = 0; i < kNumCoeffs; ++i) {
360 const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
361 input[i] = val;
362 sum += val;
364 output[0] = 0;
365 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
366 EXPECT_EQ(sum >> 3, output[0]);
369 using std::tr1::make_tuple;
371 #if CONFIG_VP9_HIGHBITDEPTH
372 INSTANTIATE_TEST_CASE_P(
373 C, Trans32x32Test,
374 ::testing::Values(
375 make_tuple(&vpx_highbd_fdct32x32_c,
376 &idct32x32_10, 0, VPX_BITS_10),
377 make_tuple(&vpx_highbd_fdct32x32_rd_c,
378 &idct32x32_10, 1, VPX_BITS_10),
379 make_tuple(&vpx_highbd_fdct32x32_c,
380 &idct32x32_12, 0, VPX_BITS_12),
381 make_tuple(&vpx_highbd_fdct32x32_rd_c,
382 &idct32x32_12, 1, VPX_BITS_12),
383 make_tuple(&vpx_fdct32x32_c,
384 &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
385 make_tuple(&vpx_fdct32x32_rd_c,
386 &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
387 INSTANTIATE_TEST_CASE_P(
388 C, PartialTrans32x32Test,
389 ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
390 make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
391 make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
392 #else
393 INSTANTIATE_TEST_CASE_P(
394 C, Trans32x32Test,
395 ::testing::Values(
396 make_tuple(&vpx_fdct32x32_c,
397 &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
398 make_tuple(&vpx_fdct32x32_rd_c,
399 &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
400 INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
401 ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
402 VPX_BITS_8)));
403 #endif // CONFIG_VP9_HIGHBITDEPTH
405 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
406 INSTANTIATE_TEST_CASE_P(
407 NEON, Trans32x32Test,
408 ::testing::Values(
409 make_tuple(&vpx_fdct32x32_c,
410 &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
411 make_tuple(&vpx_fdct32x32_rd_c,
412 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
413 #endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
415 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
416 INSTANTIATE_TEST_CASE_P(
417 SSE2, Trans32x32Test,
418 ::testing::Values(
419 make_tuple(&vpx_fdct32x32_sse2,
420 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
421 make_tuple(&vpx_fdct32x32_rd_sse2,
422 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
423 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
424 ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
425 VPX_BITS_8)));
426 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
428 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
429 INSTANTIATE_TEST_CASE_P(
430 SSE2, Trans32x32Test,
431 ::testing::Values(
432 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
433 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
434 VPX_BITS_10),
435 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
436 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
437 VPX_BITS_12),
438 make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
439 VPX_BITS_8),
440 make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
441 VPX_BITS_8)));
442 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
443 ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
444 VPX_BITS_8)));
445 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
447 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
448 INSTANTIATE_TEST_CASE_P(
449 AVX2, Trans32x32Test,
450 ::testing::Values(
451 make_tuple(&vpx_fdct32x32_avx2,
452 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
453 make_tuple(&vpx_fdct32x32_rd_avx2,
454 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
455 #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
457 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
458 INSTANTIATE_TEST_CASE_P(
459 MSA, Trans32x32Test,
460 ::testing::Values(
461 make_tuple(&vpx_fdct32x32_msa,
462 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
463 make_tuple(&vpx_fdct32x32_rd_msa,
464 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
465 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
466 ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
467 VPX_BITS_8)));
468 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
469 } // namespace