test/av1_convolve_scale_test.cc

   1 /*
   2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11
  12 #include <vector>
  13
  14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
  15
  16 #include "./av1_rtcd.h"
  17 #include "aom_ports/aom_timer.h"
  18 #include "test/acm_random.h"
  19 #include "test/clear_system_state.h"
  20 #include "test/register_state_check.h"
  21 #include "test/util.h"
  22
  23 #if CONFIG_JNT_COMP
  24 #include "av1/common/common_data.h"
  25 #endif
  26
  27 namespace {
  28 const int kTestIters = 10;
  29 const int kPerfIters = 1000;
  30
  31 const int kVPad = 32;
  32 const int kHPad = 32;
  33 const int kXStepQn = 16;
  34 const int kYStepQn = 20;
  35
  36 using std::tr1::tuple;
  37 using std::tr1::make_tuple;
  38 using libaom_test::ACMRandom;
  39
  40 enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
  41 int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
  42
  43 // A 16-bit filter with a configurable number of taps.
  44 class TestFilter {
  45  public:
  46   void set(NTaps ntaps, bool backwards);
  47
  48   InterpFilterParams params_;
  49
  50  private:
  51   std::vector<int16_t> coeffs_;
  52 };
  53
  54 void TestFilter::set(NTaps ntaps, bool backwards) {
  55   const int n = NTapsToInt(ntaps);
  56   assert(n >= 8 && n <= 12);
  57
  58   // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
  59   // elements at the end so that convolutions can read off the end safely.
  60   coeffs_.resize(n * SUBPEL_SHIFTS + 8);
  61
  62   // The coefficients are pretty much arbitrary, but convolutions shouldn't
  63   // over or underflow. For the first filter (subpels = 0), we use an
  64   // increasing or decreasing ramp (depending on the backwards parameter). We
  65   // don't want any zero coefficients, so we make it have an x-intercept at -1
  66   // or n. To ensure absence of under/overflow, we normalise the area under the
  67   // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
  68   // gives the identity).
  69   //
  70   // When increasing, the function has the form:
  71   //
  72   //   f(x) = A * (x + 1)
  73   //
  74   // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
  75   // filter is reversed, we have the same A but with formula
  76   //
  77   //   g(x) = A * (n - x)
  78   const int I = 1 << FILTER_BITS;
  79   const float A = 2.f * I / (n * (n + 1.f));
  80   for (int i = 0; i < n; ++i) {
  81     coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
  82   }
  83
  84   // For the other filters, make them slightly different by swapping two
  85   // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
  86   const size_t filter_size = sizeof(coeffs_[0] * n);
  87   int16_t *const filter0 = &coeffs_[0];
  88   for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
  89     int16_t *filterk = &coeffs_[k * n];
  90     memcpy(filterk, filter0, filter_size);
  91
  92     const int idx0 = k % n;
  93     const int idx1 = (7 * k) % n;
  94
  95     const int16_t tmp = filterk[idx0];
  96     filterk[idx0] = filterk[idx1];
  97     filterk[idx1] = tmp;
  98   }
  99
 100   // Finally, write some rubbish at the end to make sure we don't use it.
 101   for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
 102
 103   // Fill in params
 104   params_.filter_ptr = &coeffs_[0];
 105   params_.taps = n;
 106   // These are ignored by the functions being tested. Set them to whatever.
 107   params_.subpel_shifts = SUBPEL_SHIFTS;
 108   params_.interp_filter = EIGHTTAP_REGULAR;
 109 }
 110
 111 template <typename SrcPixel>
 112 class TestImage {
 113  public:
 114   TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
 115     assert(bd < 16);
 116     assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
 117
 118     // Pad width by 2*kHPad and then round up to the next multiple of 16
 119     // to get src_stride_. Add another 16 for dst_stride_ (to make sure
 120     // something goes wrong if we use the wrong one)
 121     src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
 122     dst_stride_ = src_stride_ + 16;
 123
 124     // Allocate image data
 125     src_data_.resize(2 * src_block_size());
 126     dst_data_.resize(2 * dst_block_size());
 127   }
 128
 129   void Initialize(ACMRandom *rnd);
 130   void Check() const;
 131
 132   int src_stride() const { return src_stride_; }
 133   int dst_stride() const { return dst_stride_; }
 134
 135   int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
 136   int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
 137
 138   const SrcPixel *GetSrcData(bool ref, bool borders) const {
 139     const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
 140     return borders ? block : block + kHPad + src_stride_ * kVPad;
 141   }
 142
 143   int32_t *GetDstData(bool ref, bool borders) {
 144     int32_t *block = &dst_data_[ref ? 0 : dst_block_size()];
 145     return borders ? block : block + kHPad + dst_stride_ * kVPad;
 146   }
 147
 148  private:
 149   int w_, h_, bd_;
 150   int src_stride_, dst_stride_;
 151
 152   std::vector<SrcPixel> src_data_;
 153   std::vector<int32_t> dst_data_;
 154 };
 155
 156 template <typename Pixel>
 157 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
 158   if (!trash) {
 159     memset(data, 0, sizeof(*data) * num_pixels);
 160     return;
 161   }
 162   const Pixel mask = (1 << bd) - 1;
 163   for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
 164 }
 165
 166 template <typename Pixel>
 167 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
 168                  bool trash_edges, Pixel *data) {
 169   assert(rnd);
 170   const Pixel mask = (1 << bd) - 1;
 171
 172   // Fill in the first buffer with random data
 173   // Top border
 174   FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
 175   for (int r = 0; r < h; ++r) {
 176     Pixel *row_data = data + (kVPad + r) * stride;
 177     // Left border, contents, right border
 178     FillEdge(rnd, kHPad, bd, trash_edges, row_data);
 179     for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
 180     FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
 181   }
 182   // Bottom border
 183   FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
 184
 185   const int bpp = sizeof(*data);
 186   const int block_elts = stride * (h + 2 * kVPad);
 187   const int block_size = bpp * block_elts;
 188
 189   // Now copy that to the second buffer
 190   memcpy(data + block_elts, data, block_size);
 191 }
 192
 193 template <typename SrcPixel>
 194 void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
 195   PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
 196   PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
 197 }
 198
 199 template <typename SrcPixel>
 200 void TestImage<SrcPixel>::Check() const {
 201   // If memcmp returns 0, there's nothing to do.
 202   const int num_pixels = dst_block_size();
 203   const int32_t *ref_dst = &dst_data_[0];
 204   const int32_t *tst_dst = &dst_data_[num_pixels];
 205
 206   if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
 207
 208   // Otherwise, iterate through the buffer looking for differences (including
 209   // the edges)
 210   const int stride = dst_stride_;
 211   for (int r = 0; r < h_ + 2 * kVPad; ++r) {
 212     for (int c = 0; c < w_ + 2 * kHPad; ++c) {
 213       const int32_t ref_value = ref_dst[r * stride + c];
 214       const int32_t tst_value = tst_dst[r * stride + c];
 215
 216       EXPECT_EQ(tst_value, ref_value)
 217           << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
 218     }
 219   }
 220 }
 221
 222 typedef tuple<int, int> BlockDimension;
 223
 224 struct BaseParams {
 225   BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg)
 226       : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {}
 227
 228   BlockDimension dims;
 229   NTaps ntaps_x, ntaps_y;
 230   bool avg;
 231 };
 232
 233 template <typename SrcPixel>
 234 class ConvolveScaleTestBase : public ::testing::Test {
 235  public:
 236   ConvolveScaleTestBase() : image_(NULL) {}
 237   virtual ~ConvolveScaleTestBase() { delete image_; }
 238   virtual void TearDown() { libaom_test::ClearSystemState(); }
 239
 240   // Implemented by subclasses (SetUp depends on the parameters passed
 241   // in and RunOne depends on the function to be tested. These can't
 242   // be templated for low/high bit depths because they have different
 243   // numbers of parameters)
 244   virtual void SetUp() = 0;
 245   virtual void RunOne(bool ref) = 0;
 246
 247  protected:
 248   void SetParams(const BaseParams &params, int bd) {
 249     width_ = std::tr1::get<0>(params.dims);
 250     height_ = std::tr1::get<1>(params.dims);
 251     ntaps_x_ = params.ntaps_x;
 252     ntaps_y_ = params.ntaps_y;
 253     bd_ = bd;
 254     avg_ = params.avg;
 255
 256     filter_x_.set(ntaps_x_, false);
 257     filter_y_.set(ntaps_y_, true);
 258     convolve_params_ =
 259         get_conv_params_no_round(0, avg_ != false, 0, NULL, 0, 1);
 260
 261     delete image_;
 262     image_ = new TestImage<SrcPixel>(width_, height_, bd_);
 263   }
 264
 265 #if CONFIG_JNT_COMP
 266   void SetConvParamOffset(int i, int j) {
 267     if (i == -1 && j == -1) {
 268       convolve_params_.use_jnt_comp_avg = 0;
 269     } else {
 270       convolve_params_.use_jnt_comp_avg = 1;
 271       convolve_params_.fwd_offset = quant_dist_lookup_table[i][j][0];
 272       convolve_params_.bck_offset = quant_dist_lookup_table[i][j][1];
 273     }
 274   }
 275 #endif  // CONFIG_JNT_COMP
 276
 277   void Run() {
 278     ACMRandom rnd(ACMRandom::DeterministicSeed());
 279     for (int i = 0; i < kTestIters; ++i) {
 280 #if CONFIG_JNT_COMP
 281       SetConvParamOffset(-1, -1);
 282       Prep(&rnd);
 283       RunOne(true);
 284       RunOne(false);
 285       image_->Check();
 286
 287       for (int j = 0; j < 2; ++j) {
 288         for (int k = 0; k < 4; ++k) {
 289           SetConvParamOffset(j, k);
 290           Prep(&rnd);
 291           RunOne(true);
 292           RunOne(false);
 293           image_->Check();
 294         }
 295       }
 296 #else
 297       Prep(&rnd);
 298       RunOne(true);
 299       RunOne(false);
 300       image_->Check();
 301 #endif  // CONFIG_JNT_COMP
 302     }
 303   }
 304
 305   void SpeedTest() {
 306     ACMRandom rnd(ACMRandom::DeterministicSeed());
 307     Prep(&rnd);
 308
 309     aom_usec_timer ref_timer;
 310     aom_usec_timer_start(&ref_timer);
 311     for (int i = 0; i < kPerfIters; ++i) RunOne(true);
 312     aom_usec_timer_mark(&ref_timer);
 313     const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
 314
 315     aom_usec_timer tst_timer;
 316     aom_usec_timer_start(&tst_timer);
 317     for (int i = 0; i < kPerfIters; ++i) RunOne(false);
 318     aom_usec_timer_mark(&tst_timer);
 319     const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
 320
 321     std::cout << "[          ] C time = " << ref_time / 1000
 322               << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
 323
 324     EXPECT_GT(ref_time, tst_time)
 325         << "Error: CDEFSpeedTest, SIMD slower than C.\n"
 326         << "C time: " << ref_time << " us\n"
 327         << "SIMD time: " << tst_time << " us\n";
 328   }
 329
 330   static int RandomSubpel(ACMRandom *rnd) {
 331     const uint8_t subpel_mode = rnd->Rand8();
 332     if ((subpel_mode & 7) == 0) {
 333       return 0;
 334     } else if ((subpel_mode & 7) == 1) {
 335       return SCALE_SUBPEL_SHIFTS - 1;
 336     } else {
 337       return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
 338     }
 339   }
 340
 341   void Prep(ACMRandom *rnd) {
 342     assert(rnd);
 343
 344     // Choose subpel_x_ and subpel_y_. They should be less than
 345     // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
 346     // values: 0 and SCALE_SUBPEL_SHIFTS - 1
 347     subpel_x_ = RandomSubpel(rnd);
 348     subpel_y_ = RandomSubpel(rnd);
 349
 350     image_->Initialize(rnd);
 351   }
 352
 353   int width_, height_, bd_;
 354   NTaps ntaps_x_, ntaps_y_;
 355   bool avg_;
 356   int subpel_x_, subpel_y_;
 357   TestFilter filter_x_, filter_y_;
 358   TestImage<SrcPixel> *image_;
 359   ConvolveParams convolve_params_;
 360 };
 361
 362 typedef tuple<int, int> BlockDimension;
 363
 364 typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
 365                                   int32_t *dst, int dst_stride, int w, int h,
 366                                   InterpFilterParams *filter_params_x,
 367                                   InterpFilterParams *filter_params_y,
 368                                   const int subpel_x_qn, const int x_step_qn,
 369                                   const int subpel_y_qn, const int y_step_qn,
 370                                   ConvolveParams *conv_params);
 371
 372 // Test parameter list:
 373 //  <tst_fun, dims, ntaps_x, ntaps_y, avg>
 374 typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
 375     LowBDParams;
 376
 377 class LowBDConvolveScaleTest
 378     : public ConvolveScaleTestBase<uint8_t>,
 379       public ::testing::WithParamInterface<LowBDParams> {
 380  public:
 381   virtual ~LowBDConvolveScaleTest() {}
 382
 383   void SetUp() {
 384     tst_fun_ = GET_PARAM(0);
 385
 386     const BlockDimension &block = GET_PARAM(1);
 387     const NTaps ntaps_x = GET_PARAM(2);
 388     const NTaps ntaps_y = GET_PARAM(3);
 389     const int bd = 8;
 390     const bool avg = GET_PARAM(4);
 391
 392     SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
 393   }
 394
 395   void RunOne(bool ref) {
 396     const uint8_t *src = image_->GetSrcData(ref, false);
 397     CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
 398     const int src_stride = image_->src_stride();
 399     const int dst_stride = image_->dst_stride();
 400
 401     if (ref) {
 402       av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
 403                               &filter_x_.params_, &filter_y_.params_, subpel_x_,
 404                               kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
 405     } else {
 406       tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
 407                &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
 408                subpel_y_, kYStepQn, &convolve_params_);
 409     }
 410   }
 411
 412  private:
 413   LowbdConvolveFunc tst_fun_;
 414 };
 415
 416 const BlockDimension kBlockDim[] = {
 417   make_tuple(2, 2),    make_tuple(2, 4),    make_tuple(4, 4),
 418   make_tuple(4, 8),    make_tuple(8, 4),    make_tuple(8, 8),
 419   make_tuple(8, 16),   make_tuple(16, 8),   make_tuple(16, 16),
 420   make_tuple(16, 32),  make_tuple(32, 16),  make_tuple(32, 32),
 421   make_tuple(32, 64),  make_tuple(64, 32),  make_tuple(64, 64),
 422   make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
 423 };
 424
 425 const NTaps kNTaps[] = { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
 426
 427 TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
 428 TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
 429
 430 INSTANTIATE_TEST_CASE_P(
 431     SSE4_1, LowBDConvolveScaleTest,
 432     ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
 433                        ::testing::ValuesIn(kBlockDim),
 434                        ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
 435                        ::testing::Bool()));
 436
 437 typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
 438                                    int32_t *dst, int dst_stride, int w, int h,
 439                                    InterpFilterParams *filter_params_x,
 440                                    InterpFilterParams *filter_params_y,
 441                                    const int subpel_x_qn, const int x_step_qn,
 442                                    const int subpel_y_qn, const int y_step_qn,
 443                                    ConvolveParams *conv_params, int bd);
 444
 445 // Test parameter list:
 446 //  <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
 447 typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
 448     HighBDParams;
 449
 450 class HighBDConvolveScaleTest
 451     : public ConvolveScaleTestBase<uint16_t>,
 452       public ::testing::WithParamInterface<HighBDParams> {
 453  public:
 454   virtual ~HighBDConvolveScaleTest() {}
 455
 456   void SetUp() {
 457     tst_fun_ = GET_PARAM(0);
 458
 459     const BlockDimension &block = GET_PARAM(1);
 460     const NTaps ntaps_x = GET_PARAM(2);
 461     const NTaps ntaps_y = GET_PARAM(3);
 462     const bool avg = GET_PARAM(4);
 463     const int bd = GET_PARAM(5);
 464
 465     SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
 466   }
 467
 468   void RunOne(bool ref) {
 469     const uint16_t *src = image_->GetSrcData(ref, false);
 470     CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
 471     const int src_stride = image_->src_stride();
 472     const int dst_stride = image_->dst_stride();
 473
 474     if (ref) {
 475       av1_highbd_convolve_2d_scale_c(
 476           src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
 477           &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
 478           &convolve_params_, bd_);
 479     } else {
 480       tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
 481                &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
 482                subpel_y_, kYStepQn, &convolve_params_, bd_);
 483     }
 484   }
 485
 486  private:
 487   HighbdConvolveFunc tst_fun_;
 488 };
 489
 490 const int kBDs[] = { 8, 10, 12 };
 491
 492 TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
 493 TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
 494
 495 INSTANTIATE_TEST_CASE_P(
 496     SSE4_1, HighBDConvolveScaleTest,
 497     ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
 498                        ::testing::ValuesIn(kBlockDim),
 499                        ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
 500                        ::testing::Bool(), ::testing::ValuesIn(kBDs)));
 501 }  // namespace