third_party/aom/test/hiprec_convolve_test_util.cc

   1 /*
   2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11
  12 #include "test/hiprec_convolve_test_util.h"
  13
  14 #include "av1/common/restoration.h"
  15
  16 using ::testing::make_tuple;
  17 using ::testing::tuple;
  18
  19 namespace libaom_test {
  20
  21 // Generate a random pair of filter kernels, using the ranges
  22 // of possible values from the loop-restoration experiment
  23 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
  24                              InterpKernel vkernel) {
  25   hkernel[0] = hkernel[6] =
  26       WIENER_FILT_TAP0_MINV +
  27       rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
  28   hkernel[1] = hkernel[5] =
  29       WIENER_FILT_TAP1_MINV +
  30       rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
  31   hkernel[2] = hkernel[4] =
  32       WIENER_FILT_TAP2_MINV +
  33       rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
  34   hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
  35   hkernel[7] = 0;
  36
  37   vkernel[0] = vkernel[6] =
  38       WIENER_FILT_TAP0_MINV +
  39       rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
  40   vkernel[1] = vkernel[5] =
  41       WIENER_FILT_TAP1_MINV +
  42       rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
  43   vkernel[2] = vkernel[4] =
  44       WIENER_FILT_TAP2_MINV +
  45       rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
  46   vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
  47   vkernel[7] = 0;
  48 }
  49
  50 namespace AV1HiprecConvolve {
  51
  52 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
  53     hiprec_convolve_func filter) {
  54   const HiprecConvolveParam params[] = {
  55     make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
  56     make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
  57     make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
  58     make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
  59     make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
  60     make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
  61     make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
  62   };
  63   return ::testing::ValuesIn(params);
  64 }
  65
  66 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
  67 void AV1HiprecConvolveTest::SetUp() {
  68   rnd_.Reset(ACMRandom::DeterministicSeed());
  69 }
  70
  71 void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
  72
  73 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
  74   const int w = 128, h = 128;
  75   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
  76   const int num_iters = GET_PARAM(2);
  77   int i, j;
  78   const ConvolveParams conv_params = get_conv_params_wiener(8);
  79
  80   uint8_t *input_ = new uint8_t[h * w];
  81   uint8_t *input = input_;
  82
  83   // The AVX2 convolve functions always write rows with widths that are
  84   // multiples of 16. So to avoid a buffer overflow, we may need to pad
  85   // rows to a multiple of 16.
  86   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
  87   uint8_t *output = new uint8_t[output_n];
  88   uint8_t *output2 = new uint8_t[output_n];
  89
  90   // Generate random filter kernels
  91   DECLARE_ALIGNED(16, InterpKernel, hkernel);
  92   DECLARE_ALIGNED(16, InterpKernel, vkernel);
  93
  94   generate_kernels(&rnd_, hkernel, vkernel);
  95
  96   for (i = 0; i < h; ++i)
  97     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
  98
  99   for (i = 0; i < num_iters; ++i) {
 100     // Choose random locations within the source block
 101     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
 102     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
 103     av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
 104                                   out_w, hkernel, 16, vkernel, 16, out_w, out_h,
 105                                   &conv_params);
 106     test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
 107               vkernel, 16, out_w, out_h, &conv_params);
 108
 109     for (j = 0; j < out_w * out_h; ++j)
 110       ASSERT_EQ(output[j], output2[j])
 111           << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
 112           << (j / out_w) << ") on iteration " << i;
 113   }
 114   delete[] input_;
 115   delete[] output;
 116   delete[] output2;
 117 }
 118
 119 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
 120   const int w = 128, h = 128;
 121   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
 122   const int num_iters = GET_PARAM(2) / 500;
 123   int i, j, k;
 124   const ConvolveParams conv_params = get_conv_params_wiener(8);
 125
 126   uint8_t *input_ = new uint8_t[h * w];
 127   uint8_t *input = input_;
 128
 129   // The AVX2 convolve functions always write rows with widths that are
 130   // multiples of 16. So to avoid a buffer overflow, we may need to pad
 131   // rows to a multiple of 16.
 132   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
 133   uint8_t *output = new uint8_t[output_n];
 134   uint8_t *output2 = new uint8_t[output_n];
 135
 136   // Generate random filter kernels
 137   DECLARE_ALIGNED(16, InterpKernel, hkernel);
 138   DECLARE_ALIGNED(16, InterpKernel, vkernel);
 139
 140   generate_kernels(&rnd_, hkernel, vkernel);
 141
 142   for (i = 0; i < h; ++i)
 143     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
 144
 145   aom_usec_timer ref_timer;
 146   aom_usec_timer_start(&ref_timer);
 147   for (i = 0; i < num_iters; ++i) {
 148     for (j = 3; j < h - out_h - 4; j++) {
 149       for (k = 3; k < w - out_w - 4; k++) {
 150         av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
 151                                       hkernel, 16, vkernel, 16, out_w, out_h,
 152                                       &conv_params);
 153       }
 154     }
 155   }
 156   aom_usec_timer_mark(&ref_timer);
 157   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
 158
 159   aom_usec_timer tst_timer;
 160   aom_usec_timer_start(&tst_timer);
 161   for (i = 0; i < num_iters; ++i) {
 162     for (j = 3; j < h - out_h - 4; j++) {
 163       for (k = 3; k < w - out_w - 4; k++) {
 164         test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
 165                   16, out_w, out_h, &conv_params);
 166       }
 167     }
 168   }
 169   aom_usec_timer_mark(&tst_timer);
 170   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
 171
 172   std::cout << "[          ] C time = " << ref_time / 1000
 173             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
 174
 175   EXPECT_GT(ref_time, tst_time)
 176       << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
 177       << "C time: " << ref_time << " us\n"
 178       << "SIMD time: " << tst_time << " us\n";
 179
 180   delete[] input_;
 181   delete[] output;
 182   delete[] output2;
 183 }
 184 }  // namespace AV1HiprecConvolve
 185
 186 namespace AV1HighbdHiprecConvolve {
 187
 188 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
 189     highbd_hiprec_convolve_func filter) {
 190   const HighbdHiprecConvolveParam params[] = {
 191     make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
 192     make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
 193     make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
 194     make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
 195     make_tuple(32, 8, 10000, 12, filter),
 196   };
 197   return ::testing::ValuesIn(params);
 198 }
 199
 200 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
 201 void AV1HighbdHiprecConvolveTest::SetUp() {
 202   rnd_.Reset(ACMRandom::DeterministicSeed());
 203 }
 204
 205 void AV1HighbdHiprecConvolveTest::TearDown() {
 206   libaom_test::ClearSystemState();
 207 }
 208
 209 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
 210     highbd_hiprec_convolve_func test_impl) {
 211   const int w = 128, h = 128;
 212   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
 213   const int num_iters = GET_PARAM(2);
 214   const int bd = GET_PARAM(3);
 215   int i, j;
 216   const ConvolveParams conv_params = get_conv_params_wiener(bd);
 217
 218   uint16_t *input = new uint16_t[h * w];
 219
 220   // The AVX2 convolve functions always write rows with widths that are
 221   // multiples of 16. So to avoid a buffer overflow, we may need to pad
 222   // rows to a multiple of 16.
 223   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
 224   uint16_t *output = new uint16_t[output_n];
 225   uint16_t *output2 = new uint16_t[output_n];
 226
 227   // Generate random filter kernels
 228   DECLARE_ALIGNED(16, InterpKernel, hkernel);
 229   DECLARE_ALIGNED(16, InterpKernel, vkernel);
 230
 231   generate_kernels(&rnd_, hkernel, vkernel);
 232
 233   for (i = 0; i < h; ++i)
 234     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
 235
 236   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
 237   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
 238   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
 239
 240   for (i = 0; i < num_iters; ++i) {
 241     // Choose random locations within the source block
 242     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
 243     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
 244     av1_highbd_wiener_convolve_add_src_c(
 245         input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
 246         vkernel, 16, out_w, out_h, &conv_params, bd);
 247     test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
 248               hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
 249
 250     for (j = 0; j < out_w * out_h; ++j)
 251       ASSERT_EQ(output[j], output2[j])
 252           << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
 253           << (j / out_w) << ") on iteration " << i;
 254   }
 255   delete[] input;
 256   delete[] output;
 257   delete[] output2;
 258 }
 259
 260 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
 261     highbd_hiprec_convolve_func test_impl) {
 262   const int w = 128, h = 128;
 263   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
 264   const int num_iters = GET_PARAM(2) / 500;
 265   const int bd = GET_PARAM(3);
 266   int i, j, k;
 267   const ConvolveParams conv_params = get_conv_params_wiener(bd);
 268
 269   uint16_t *input = new uint16_t[h * w];
 270
 271   // The AVX2 convolve functions always write rows with widths that are
 272   // multiples of 16. So to avoid a buffer overflow, we may need to pad
 273   // rows to a multiple of 16.
 274   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
 275   uint16_t *output = new uint16_t[output_n];
 276   uint16_t *output2 = new uint16_t[output_n];
 277
 278   // Generate random filter kernels
 279   DECLARE_ALIGNED(16, InterpKernel, hkernel);
 280   DECLARE_ALIGNED(16, InterpKernel, vkernel);
 281
 282   generate_kernels(&rnd_, hkernel, vkernel);
 283
 284   for (i = 0; i < h; ++i)
 285     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
 286
 287   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
 288   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
 289   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
 290
 291   aom_usec_timer ref_timer;
 292   aom_usec_timer_start(&ref_timer);
 293   for (i = 0; i < num_iters; ++i) {
 294     for (j = 3; j < h - out_h - 4; j++) {
 295       for (k = 3; k < w - out_w - 4; k++) {
 296         av1_highbd_wiener_convolve_add_src_c(
 297             input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
 298             16, out_w, out_h, &conv_params, bd);
 299       }
 300     }
 301   }
 302   aom_usec_timer_mark(&ref_timer);
 303   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
 304
 305   aom_usec_timer tst_timer;
 306   aom_usec_timer_start(&tst_timer);
 307   for (i = 0; i < num_iters; ++i) {
 308     for (j = 3; j < h - out_h - 4; j++) {
 309       for (k = 3; k < w - out_w - 4; k++) {
 310         test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
 311                   vkernel, 16, out_w, out_h, &conv_params, bd);
 312       }
 313     }
 314   }
 315   aom_usec_timer_mark(&tst_timer);
 316   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
 317
 318   std::cout << "[          ] C time = " << ref_time / 1000
 319             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
 320
 321   EXPECT_GT(ref_time, tst_time)
 322       << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
 323       << "C time: " << ref_time << " us\n"
 324       << "SIMD time: " << tst_time << " us\n";
 325
 326   delete[] input;
 327   delete[] output;
 328   delete[] output2;
 329 }
 330 }  // namespace AV1HighbdHiprecConvolve
 331 }  // namespace libaom_test