test/dct32x32_test.cc

   1 /*
   2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <math.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14
  15 #include "third_party/googletest/src/include/gtest/gtest.h"
  16
  17 #include "./vp9_rtcd.h"
  18 #include "./vpx_config.h"
  19 #include "./vpx_dsp_rtcd.h"
  20 #include "test/acm_random.h"
  21 #include "test/clear_system_state.h"
  22 #include "test/register_state_check.h"
  23 #include "test/util.h"
  24 #include "vp9/common/vp9_entropy.h"
  25 #include "vpx/vpx_codec.h"
  26 #include "vpx/vpx_integer.h"
  27 #include "vpx_ports/mem.h"
  28
  29 using libvpx_test::ACMRandom;
  30
  31 namespace {
  32 #ifdef _MSC_VER
  33 static int round(double x) {
  34   if (x < 0)
  35     return static_cast<int>(ceil(x - 0.5));
  36   else
  37     return static_cast<int>(floor(x + 0.5));
  38 }
  39 #endif
  40
  41 const int kNumCoeffs = 1024;
  42 const double kPi = 3.141592653589793238462643383279502884;
  43 void reference_32x32_dct_1d(const double in[32], double out[32]) {
  44   const double kInvSqrt2 = 0.707106781186547524400844362104;
  45   for (int k = 0; k < 32; k++) {
  46     out[k] = 0.0;
  47     for (int n = 0; n < 32; n++)
  48       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  49     if (k == 0)
  50       out[k] = out[k] * kInvSqrt2;
  51   }
  52 }
  53
  54 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  55                             double output[kNumCoeffs]) {
  56   // First transform columns
  57   for (int i = 0; i < 32; ++i) {
  58     double temp_in[32], temp_out[32];
  59     for (int j = 0; j < 32; ++j)
  60       temp_in[j] = input[j*32 + i];
  61     reference_32x32_dct_1d(temp_in, temp_out);
  62     for (int j = 0; j < 32; ++j)
  63       output[j * 32 + i] = temp_out[j];
  64   }
  65   // Then transform rows
  66   for (int i = 0; i < 32; ++i) {
  67     double temp_in[32], temp_out[32];
  68     for (int j = 0; j < 32; ++j)
  69       temp_in[j] = output[j + i*32];
  70     reference_32x32_dct_1d(temp_in, temp_out);
  71     // Scale by some magic number
  72     for (int j = 0; j < 32; ++j)
  73       output[j + i * 32] = temp_out[j] / 4;
  74   }
  75 }
  76
  77 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  78 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  79
  80 typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
  81     Trans32x32Param;
  82
  83 #if CONFIG_VP9_HIGHBITDEPTH
  84 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  85   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
  86 }
  87
  88 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  89   vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
  90 }
  91 #endif  // CONFIG_VP9_HIGHBITDEPTH
  92
  93 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
  94  public:
  95   virtual ~Trans32x32Test() {}
  96   virtual void SetUp() {
  97     fwd_txfm_ = GET_PARAM(0);
  98     inv_txfm_ = GET_PARAM(1);
  99     version_  = GET_PARAM(2);  // 0: high precision forward transform
 100                                // 1: low precision version for rd loop
 101     bit_depth_ = GET_PARAM(3);
 102     mask_ = (1 << bit_depth_) - 1;
 103   }
 104
 105   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 106
 107  protected:
 108   int version_;
 109   vpx_bit_depth_t bit_depth_;
 110   int mask_;
 111   FwdTxfmFunc fwd_txfm_;
 112   InvTxfmFunc inv_txfm_;
 113 };
 114
 115 TEST_P(Trans32x32Test, AccuracyCheck) {
 116   ACMRandom rnd(ACMRandom::DeterministicSeed());
 117   uint32_t max_error = 0;
 118   int64_t total_error = 0;
 119   const int count_test_block = 10000;
 120   DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
 121   DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
 122   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 123   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 124 #if CONFIG_VP9_HIGHBITDEPTH
 125   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 126   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 127 #endif
 128
 129   for (int i = 0; i < count_test_block; ++i) {
 130     // Initialize a test block with input range [-mask_, mask_].
 131     for (int j = 0; j < kNumCoeffs; ++j) {
 132       if (bit_depth_ == VPX_BITS_8) {
 133         src[j] = rnd.Rand8();
 134         dst[j] = rnd.Rand8();
 135         test_input_block[j] = src[j] - dst[j];
 136 #if CONFIG_VP9_HIGHBITDEPTH
 137       } else {
 138         src16[j] = rnd.Rand16() & mask_;
 139         dst16[j] = rnd.Rand16() & mask_;
 140         test_input_block[j] = src16[j] - dst16[j];
 141 #endif
 142       }
 143     }
 144
 145     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
 146     if (bit_depth_ == VPX_BITS_8) {
 147       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 148 #if CONFIG_VP9_HIGHBITDEPTH
 149     } else {
 150       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block,
 151                                          CONVERT_TO_BYTEPTR(dst16), 32));
 152 #endif
 153     }
 154
 155     for (int j = 0; j < kNumCoeffs; ++j) {
 156 #if CONFIG_VP9_HIGHBITDEPTH
 157       const uint32_t diff =
 158           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 159 #else
 160       const uint32_t diff = dst[j] - src[j];
 161 #endif
 162       const uint32_t error = diff * diff;
 163       if (max_error < error)
 164         max_error = error;
 165       total_error += error;
 166     }
 167   }
 168
 169   if (version_ == 1) {
 170     max_error /= 2;
 171     total_error /= 45;
 172   }
 173
 174   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
 175       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
 176
 177   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
 178       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
 179 }
 180
 181 TEST_P(Trans32x32Test, CoeffCheck) {
 182   ACMRandom rnd(ACMRandom::DeterministicSeed());
 183   const int count_test_block = 1000;
 184
 185   DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
 186   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 187   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 188
 189   for (int i = 0; i < count_test_block; ++i) {
 190     for (int j = 0; j < kNumCoeffs; ++j)
 191       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
 192
 193     const int stride = 32;
 194     vpx_fdct32x32_c(input_block, output_ref_block, stride);
 195     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 196
 197     if (version_ == 0) {
 198       for (int j = 0; j < kNumCoeffs; ++j)
 199         EXPECT_EQ(output_block[j], output_ref_block[j])
 200             << "Error: 32x32 FDCT versions have mismatched coefficients";
 201     } else {
 202       for (int j = 0; j < kNumCoeffs; ++j)
 203         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 204             << "Error: 32x32 FDCT rd has mismatched coefficients";
 205     }
 206   }
 207 }
 208
 209 TEST_P(Trans32x32Test, MemCheck) {
 210   ACMRandom rnd(ACMRandom::DeterministicSeed());
 211   const int count_test_block = 2000;
 212
 213   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
 214   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
 215   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
 216
 217   for (int i = 0; i < count_test_block; ++i) {
 218     // Initialize a test block with input range [-mask_, mask_].
 219     for (int j = 0; j < kNumCoeffs; ++j) {
 220       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
 221     }
 222     if (i == 0) {
 223       for (int j = 0; j < kNumCoeffs; ++j)
 224         input_extreme_block[j] = mask_;
 225     } else if (i == 1) {
 226       for (int j = 0; j < kNumCoeffs; ++j)
 227         input_extreme_block[j] = -mask_;
 228     }
 229
 230     const int stride = 32;
 231     vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
 232     ASM_REGISTER_STATE_CHECK(
 233         fwd_txfm_(input_extreme_block, output_block, stride));
 234
 235     // The minimum quant value is 4.
 236     for (int j = 0; j < kNumCoeffs; ++j) {
 237       if (version_ == 0) {
 238         EXPECT_EQ(output_block[j], output_ref_block[j])
 239             << "Error: 32x32 FDCT versions have mismatched coefficients";
 240       } else {
 241         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
 242             << "Error: 32x32 FDCT rd has mismatched coefficients";
 243       }
 244       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
 245           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
 246       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
 247           << "Error: 32x32 FDCT has coefficient larger than "
 248           << "4*DCT_MAX_VALUE";
 249     }
 250   }
 251 }
 252
 253 TEST_P(Trans32x32Test, InverseAccuracy) {
 254   ACMRandom rnd(ACMRandom::DeterministicSeed());
 255   const int count_test_block = 1000;
 256   DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
 257   DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
 258   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
 259   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 260 #if CONFIG_VP9_HIGHBITDEPTH
 261   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 262   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 263 #endif
 264
 265   for (int i = 0; i < count_test_block; ++i) {
 266     double out_r[kNumCoeffs];
 267
 268     // Initialize a test block with input range [-255, 255]
 269     for (int j = 0; j < kNumCoeffs; ++j) {
 270       if (bit_depth_ == VPX_BITS_8) {
 271         src[j] = rnd.Rand8();
 272         dst[j] = rnd.Rand8();
 273         in[j] = src[j] - dst[j];
 274 #if CONFIG_VP9_HIGHBITDEPTH
 275       } else {
 276         src16[j] = rnd.Rand16() & mask_;
 277         dst16[j] = rnd.Rand16() & mask_;
 278         in[j] = src16[j] - dst16[j];
 279 #endif
 280       }
 281     }
 282
 283     reference_32x32_dct_2d(in, out_r);
 284     for (int j = 0; j < kNumCoeffs; ++j)
 285       coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
 286     if (bit_depth_ == VPX_BITS_8) {
 287       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 288 #if CONFIG_VP9_HIGHBITDEPTH
 289     } else {
 290       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 291 #endif
 292     }
 293     for (int j = 0; j < kNumCoeffs; ++j) {
 294 #if CONFIG_VP9_HIGHBITDEPTH
 295       const int diff =
 296           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 297 #else
 298       const int diff = dst[j] - src[j];
 299 #endif
 300       const int error = diff * diff;
 301       EXPECT_GE(1, error)
 302           << "Error: 32x32 IDCT has error " << error
 303           << " at index " << j;
 304     }
 305   }
 306 }
 307
 308 class PartialTrans32x32Test
 309     : public ::testing::TestWithParam<
 310           std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
 311  public:
 312   virtual ~PartialTrans32x32Test() {}
 313   virtual void SetUp() {
 314     fwd_txfm_ = GET_PARAM(0);
 315     bit_depth_ = GET_PARAM(1);
 316   }
 317
 318   virtual void TearDown() { libvpx_test::ClearSystemState(); }
 319
 320  protected:
 321   vpx_bit_depth_t bit_depth_;
 322   FwdTxfmFunc fwd_txfm_;
 323 };
 324
 325 TEST_P(PartialTrans32x32Test, Extremes) {
 326 #if CONFIG_VP9_HIGHBITDEPTH
 327   const int16_t maxval =
 328       static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 329 #else
 330   const int16_t maxval = 255;
 331 #endif
 332   const int minval = -maxval;
 333   DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
 334   DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
 335
 336   for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
 337   output[0] = 0;
 338   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 339   EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
 340
 341   for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
 342   output[0] = 0;
 343   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 344   EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
 345 }
 346
 347 TEST_P(PartialTrans32x32Test, Random) {
 348 #if CONFIG_VP9_HIGHBITDEPTH
 349   const int16_t maxval =
 350       static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
 351 #else
 352   const int16_t maxval = 255;
 353 #endif
 354   DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
 355   DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
 356   ACMRandom rnd(ACMRandom::DeterministicSeed());
 357
 358   int sum = 0;
 359   for (int i = 0; i < kNumCoeffs; ++i) {
 360     const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
 361     input[i] = val;
 362     sum += val;
 363   }
 364   output[0] = 0;
 365   ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
 366   EXPECT_EQ(sum >> 3, output[0]);
 367 }
 368
 369 using std::tr1::make_tuple;
 370
 371 #if CONFIG_VP9_HIGHBITDEPTH
 372 INSTANTIATE_TEST_CASE_P(
 373     C, Trans32x32Test,
 374     ::testing::Values(
 375         make_tuple(&vpx_highbd_fdct32x32_c,
 376                    &idct32x32_10, 0, VPX_BITS_10),
 377         make_tuple(&vpx_highbd_fdct32x32_rd_c,
 378                    &idct32x32_10, 1, VPX_BITS_10),
 379         make_tuple(&vpx_highbd_fdct32x32_c,
 380                    &idct32x32_12, 0, VPX_BITS_12),
 381         make_tuple(&vpx_highbd_fdct32x32_rd_c,
 382                    &idct32x32_12, 1, VPX_BITS_12),
 383         make_tuple(&vpx_fdct32x32_c,
 384                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
 385         make_tuple(&vpx_fdct32x32_rd_c,
 386                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 387 INSTANTIATE_TEST_CASE_P(
 388     C, PartialTrans32x32Test,
 389     ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
 390                       make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
 391                       make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 392 #else
 393 INSTANTIATE_TEST_CASE_P(
 394     C, Trans32x32Test,
 395     ::testing::Values(
 396         make_tuple(&vpx_fdct32x32_c,
 397                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
 398         make_tuple(&vpx_fdct32x32_rd_c,
 399                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 400 INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
 401                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
 402                                                      VPX_BITS_8)));
 403 #endif  // CONFIG_VP9_HIGHBITDEPTH
 404
 405 #if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 406 INSTANTIATE_TEST_CASE_P(
 407     NEON, Trans32x32Test,
 408     ::testing::Values(
 409         make_tuple(&vpx_fdct32x32_c,
 410                    &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
 411         make_tuple(&vpx_fdct32x32_rd_c,
 412                    &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
 413 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 414
 415 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 416 INSTANTIATE_TEST_CASE_P(
 417     SSE2, Trans32x32Test,
 418     ::testing::Values(
 419         make_tuple(&vpx_fdct32x32_sse2,
 420                    &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 421         make_tuple(&vpx_fdct32x32_rd_sse2,
 422                    &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 423 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
 424                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
 425                                                      VPX_BITS_8)));
 426 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 427
 428 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 429 INSTANTIATE_TEST_CASE_P(
 430     SSE2, Trans32x32Test,
 431     ::testing::Values(
 432         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
 433         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
 434                    VPX_BITS_10),
 435         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
 436         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
 437                    VPX_BITS_12),
 438         make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
 439                    VPX_BITS_8),
 440         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
 441                    VPX_BITS_8)));
 442 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
 443                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
 444                                                      VPX_BITS_8)));
 445 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 446
 447 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 448 INSTANTIATE_TEST_CASE_P(
 449     AVX2, Trans32x32Test,
 450     ::testing::Values(
 451         make_tuple(&vpx_fdct32x32_avx2,
 452                    &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
 453         make_tuple(&vpx_fdct32x32_rd_avx2,
 454                    &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
 455 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 456
 457 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 458 INSTANTIATE_TEST_CASE_P(
 459     MSA, Trans32x32Test,
 460     ::testing::Values(
 461         make_tuple(&vpx_fdct32x32_msa,
 462                    &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
 463         make_tuple(&vpx_fdct32x32_rd_msa,
 464                    &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
 465 INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
 466                         ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
 467                                                      VPX_BITS_8)));
 468 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 469 }  // namespace