test/convolve_test.cc

   1 /*
   2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11
  12 #include <string.h>
  13 #include <tuple>
  14
  15 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
  16
  17 #include "config/aom_config.h"
  18 #include "config/aom_dsp_rtcd.h"
  19
  20 #include "aom_dsp/aom_dsp_common.h"
  21 #include "aom_dsp/aom_filter.h"
  22 #include "aom_mem/aom_mem.h"
  23 #include "aom_ports/aom_timer.h"
  24 #include "aom_ports/mem.h"
  25 #include "av1/common/filter.h"
  26 #include "test/acm_random.h"
  27 #include "test/register_state_check.h"
  28 #include "test/util.h"
  29
  30 namespace {
  31
  32 static const unsigned int kMaxDimension = MAX_SB_SIZE;
  33
  34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
  35                              uint8_t *dst, ptrdiff_t dst_stride,
  36                              const int16_t *filter_x, int filter_x_stride,
  37                              const int16_t *filter_y, int filter_y_stride,
  38                              int w, int h);
  39
  40 struct ConvolveFunctions {
  41   ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd)
  42       : h8_(h8), v8_(v8), use_highbd_(bd) {}
  43
  44   ConvolveFunc h8_;
  45   ConvolveFunc v8_;
  46   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
  47 };
  48
  49 typedef std::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
  50
  51 #define ALL_SIZES_64(convolve_fn)                                         \
  52   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
  53       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
  54       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
  55       make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
  56       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
  57       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
  58       make_tuple(64, 64, &convolve_fn)
  59
  60 #define ALL_SIZES(convolve_fn)                                          \
  61   make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
  62       make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
  63
  64 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
  65 #define AV1_FILTER_WEIGHT 128
  66 #define AV1_FILTER_SHIFT 7
  67 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
  68
  69 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
  70                         const int16_t *HFilter, const int16_t *VFilter,
  71                         uint8_t *dst_ptr, unsigned int dst_stride,
  72                         unsigned int output_width, unsigned int output_height) {
  73   // Between passes, we use an intermediate buffer whose height is extended to
  74   // have enough horizontally filtered values as input for the vertical pass.
  75   // This buffer is allocated to be big enough for the largest block type we
  76   // support.
  77   const int kInterp_Extend = 4;
  78   const unsigned int intermediate_height =
  79       (kInterp_Extend - 1) + output_height + kInterp_Extend;
  80   unsigned int i, j;
  81
  82   assert(intermediate_height > 7);
  83
  84   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  85   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  86   //                                 + kInterp_Extend
  87   //                               = 3 + 16 + 4
  88   //                               = 23
  89   // and filter_max_width          = 16
  90   //
  91   uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
  92   const int intermediate_next_stride =
  93       1 - static_cast<int>(intermediate_height * output_width);
  94
  95   // Horizontal pass (src -> transposed intermediate).
  96   uint8_t *output_ptr = intermediate_buffer;
  97   const int src_next_row_stride = src_stride - output_width;
  98   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  99   for (i = 0; i < intermediate_height; ++i) {
 100     for (j = 0; j < output_width; ++j) {
 101       // Apply filter...
 102       const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
 103                        (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
 104                        (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
 105                        (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
 106                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
 107
 108       // Normalize back to 0-255...
 109       *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
 110       ++src_ptr;
 111       output_ptr += intermediate_height;
 112     }
 113     src_ptr += src_next_row_stride;
 114     output_ptr += intermediate_next_stride;
 115   }
 116
 117   // Vertical pass (transposed intermediate -> dst).
 118   src_ptr = intermediate_buffer;
 119   const int dst_next_row_stride = dst_stride - output_width;
 120   for (i = 0; i < output_height; ++i) {
 121     for (j = 0; j < output_width; ++j) {
 122       // Apply filter...
 123       const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
 124                        (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
 125                        (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
 126                        (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
 127                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
 128
 129       // Normalize back to 0-255...
 130       *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
 131       src_ptr += intermediate_height;
 132     }
 133     src_ptr += intermediate_next_stride;
 134     dst_ptr += dst_next_row_stride;
 135   }
 136 }
 137
 138 void block2d_average_c(uint8_t *src, unsigned int src_stride,
 139                        uint8_t *output_ptr, unsigned int output_stride,
 140                        unsigned int output_width, unsigned int output_height) {
 141   unsigned int i, j;
 142   for (i = 0; i < output_height; ++i) {
 143     for (j = 0; j < output_width; ++j) {
 144       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 145     }
 146     output_ptr += output_stride;
 147   }
 148 }
 149
 150 void filter_average_block2d_8_c(const uint8_t *src_ptr,
 151                                 const unsigned int src_stride,
 152                                 const int16_t *HFilter, const int16_t *VFilter,
 153                                 uint8_t *dst_ptr, unsigned int dst_stride,
 154                                 unsigned int output_width,
 155                                 unsigned int output_height) {
 156   uint8_t tmp[kMaxDimension * kMaxDimension];
 157
 158   assert(output_width <= kMaxDimension);
 159   assert(output_height <= kMaxDimension);
 160   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
 161                      output_width, output_height);
 162   block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
 163                     output_height);
 164 }
 165
 166 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
 167                                const unsigned int src_stride,
 168                                const int16_t *HFilter, const int16_t *VFilter,
 169                                uint16_t *dst_ptr, unsigned int dst_stride,
 170                                unsigned int output_width,
 171                                unsigned int output_height, int bd) {
 172   // Between passes, we use an intermediate buffer whose height is extended to
 173   // have enough horizontally filtered values as input for the vertical pass.
 174   // This buffer is allocated to be big enough for the largest block type we
 175   // support.
 176   const int kInterp_Extend = 4;
 177   const unsigned int intermediate_height =
 178       (kInterp_Extend - 1) + output_height + kInterp_Extend;
 179
 180   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
 181    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
 182    *                                 + kInterp_Extend
 183    *                               = 3 + 16 + 4
 184    *                               = 23
 185    * and filter_max_width = 16
 186    */
 187   uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
 188   const int intermediate_next_stride =
 189       1 - static_cast<int>(intermediate_height * output_width);
 190
 191   // Horizontal pass (src -> transposed intermediate).
 192   {
 193     uint16_t *output_ptr = intermediate_buffer;
 194     const int src_next_row_stride = src_stride - output_width;
 195     unsigned int i, j;
 196     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 197     for (i = 0; i < intermediate_height; ++i) {
 198       for (j = 0; j < output_width; ++j) {
 199         // Apply filter...
 200         const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
 201                          (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
 202                          (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
 203                          (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
 204                          (AV1_FILTER_WEIGHT >> 1);  // Rounding
 205
 206         // Normalize back to 0-255...
 207         *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
 208         ++src_ptr;
 209         output_ptr += intermediate_height;
 210       }
 211       src_ptr += src_next_row_stride;
 212       output_ptr += intermediate_next_stride;
 213     }
 214   }
 215
 216   // Vertical pass (transposed intermediate -> dst).
 217   {
 218     const uint16_t *interm_ptr = intermediate_buffer;
 219     const int dst_next_row_stride = dst_stride - output_width;
 220     unsigned int i, j;
 221     for (i = 0; i < output_height; ++i) {
 222       for (j = 0; j < output_width; ++j) {
 223         // Apply filter...
 224         const int temp =
 225             (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
 226             (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
 227             (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
 228             (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
 229             (AV1_FILTER_WEIGHT >> 1);  // Rounding
 230
 231         // Normalize back to 0-255...
 232         *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
 233         interm_ptr += intermediate_height;
 234       }
 235       interm_ptr += intermediate_next_stride;
 236       dst_ptr += dst_next_row_stride;
 237     }
 238   }
 239 }
 240
 241 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
 242                               uint16_t *output_ptr, unsigned int output_stride,
 243                               unsigned int output_width,
 244                               unsigned int output_height) {
 245   unsigned int i, j;
 246   for (i = 0; i < output_height; ++i) {
 247     for (j = 0; j < output_width; ++j) {
 248       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 249     }
 250     output_ptr += output_stride;
 251   }
 252 }
 253
 254 void highbd_filter_average_block2d_8_c(
 255     const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 256     const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
 257     unsigned int output_width, unsigned int output_height, int bd) {
 258   uint16_t tmp[kMaxDimension * kMaxDimension];
 259
 260   assert(output_width <= kMaxDimension);
 261   assert(output_height <= kMaxDimension);
 262   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
 263                             kMaxDimension, output_width, output_height, bd);
 264   highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
 265                            output_width, output_height);
 266 }
 267
 268 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 269  public:
 270   static void SetUpTestSuite() {
 271     // Force input_ to be unaligned, output to be 16 byte aligned.
 272     input_ = reinterpret_cast<uint8_t *>(
 273                  aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
 274              1;
 275     ASSERT_NE(input_, nullptr);
 276     ref8_ = reinterpret_cast<uint8_t *>(
 277         aom_memalign(kDataAlignment, kOutputStride * kMaxDimension));
 278     ASSERT_NE(ref8_, nullptr);
 279     output_ = reinterpret_cast<uint8_t *>(
 280         aom_memalign(kDataAlignment, kOutputBufferSize));
 281     ASSERT_NE(output_, nullptr);
 282     output_ref_ = reinterpret_cast<uint8_t *>(
 283         aom_memalign(kDataAlignment, kOutputBufferSize));
 284     ASSERT_NE(output_ref_, nullptr);
 285     input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
 286                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
 287                1;
 288     ASSERT_NE(input16_, nullptr);
 289     ref16_ = reinterpret_cast<uint16_t *>(aom_memalign(
 290         kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t)));
 291     ASSERT_NE(ref16_, nullptr);
 292     output16_ = reinterpret_cast<uint16_t *>(
 293         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 294     ASSERT_NE(output16_, nullptr);
 295     output16_ref_ = reinterpret_cast<uint16_t *>(
 296         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 297     ASSERT_NE(output16_ref_, nullptr);
 298   }
 299
 300   virtual void TearDown() {}
 301
 302   static void TearDownTestSuite() {
 303     aom_free(input_ - 1);
 304     input_ = NULL;
 305     aom_free(ref8_);
 306     ref8_ = NULL;
 307     aom_free(output_);
 308     output_ = NULL;
 309     aom_free(output_ref_);
 310     output_ref_ = NULL;
 311     aom_free(input16_ - 1);
 312     input16_ = NULL;
 313     aom_free(ref16_);
 314     ref16_ = NULL;
 315     aom_free(output16_);
 316     output16_ = NULL;
 317     aom_free(output16_ref_);
 318     output16_ref_ = NULL;
 319   }
 320
 321  protected:
 322   static const int kDataAlignment = 16;
 323   static const int kOuterBlockSize = 4 * kMaxDimension;
 324   static const int kInputStride = kOuterBlockSize;
 325   static const int kOutputStride = kOuterBlockSize;
 326   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
 327   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
 328
 329   int Width() const { return GET_PARAM(0); }
 330   int Height() const { return GET_PARAM(1); }
 331   int BorderLeft() const {
 332     const int center = (kOuterBlockSize - Width()) / 2;
 333     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
 334   }
 335   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
 336
 337   bool IsIndexInBorder(int i) {
 338     return (i < BorderTop() * kOuterBlockSize ||
 339             i >= (BorderTop() + Height()) * kOuterBlockSize ||
 340             i % kOuterBlockSize < BorderLeft() ||
 341             i % kOuterBlockSize >= (BorderLeft() + Width()));
 342   }
 343
 344   virtual void SetUp() {
 345     UUT_ = GET_PARAM(2);
 346     if (UUT_->use_highbd_ != 0)
 347       mask_ = (1 << UUT_->use_highbd_) - 1;
 348     else
 349       mask_ = 255;
 350     /* Set up guard blocks for an inner block centered in the outer block */
 351     for (int i = 0; i < kOutputBufferSize; ++i) {
 352       if (IsIndexInBorder(i)) {
 353         output_[i] = 255;
 354         output16_[i] = mask_;
 355       } else {
 356         output_[i] = 0;
 357         output16_[i] = 0;
 358       }
 359     }
 360
 361     ::libaom_test::ACMRandom prng;
 362     for (int i = 0; i < kInputBufferSize; ++i) {
 363       if (i & 1) {
 364         input_[i] = 255;
 365         input16_[i] = mask_;
 366       } else {
 367         input_[i] = prng.Rand8Extremes();
 368         input16_[i] = prng.Rand16() & mask_;
 369       }
 370     }
 371   }
 372
 373   void SetConstantInput(int value) {
 374     memset(input_, value, kInputBufferSize);
 375     aom_memset16(input16_, value, kInputBufferSize);
 376   }
 377
 378   void CopyOutputToRef() {
 379     memcpy(output_ref_, output_, kOutputBufferSize);
 380     // Copy 16-bit pixels values. The effective number of bytes is double.
 381     memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
 382   }
 383
 384   void CheckGuardBlocks() {
 385     for (int i = 0; i < kOutputBufferSize; ++i) {
 386       if (IsIndexInBorder(i)) {
 387         EXPECT_EQ(255, output_[i]);
 388       }
 389     }
 390   }
 391
 392   uint8_t *input() const {
 393     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 394     if (UUT_->use_highbd_ == 0) {
 395       return input_ + offset;
 396     } else {
 397       return CONVERT_TO_BYTEPTR(input16_) + offset;
 398     }
 399   }
 400
 401   uint8_t *output() const {
 402     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 403     if (UUT_->use_highbd_ == 0) {
 404       return output_ + offset;
 405     } else {
 406       return CONVERT_TO_BYTEPTR(output16_) + offset;
 407     }
 408   }
 409
 410   uint8_t *output_ref() const {
 411     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 412     if (UUT_->use_highbd_ == 0) {
 413       return output_ref_ + offset;
 414     } else {
 415       return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
 416     }
 417   }
 418
 419   uint16_t lookup(uint8_t *list, int index) const {
 420     if (UUT_->use_highbd_ == 0) {
 421       return list[index];
 422     } else {
 423       return CONVERT_TO_SHORTPTR(list)[index];
 424     }
 425   }
 426
 427   void assign_val(uint8_t *list, int index, uint16_t val) const {
 428     if (UUT_->use_highbd_ == 0) {
 429       list[index] = (uint8_t)val;
 430     } else {
 431       CONVERT_TO_SHORTPTR(list)[index] = val;
 432     }
 433   }
 434
 435   void wrapper_filter_average_block2d_8_c(
 436       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 437       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
 438       unsigned int output_width, unsigned int output_height) {
 439     if (UUT_->use_highbd_ == 0) {
 440       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
 441                                  dst_stride, output_width, output_height);
 442     } else {
 443       highbd_filter_average_block2d_8_c(
 444           CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
 445           CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
 446           UUT_->use_highbd_);
 447     }
 448   }
 449
 450   void wrapper_filter_block2d_8_c(
 451       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 452       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
 453       unsigned int output_width, unsigned int output_height) {
 454     if (UUT_->use_highbd_ == 0) {
 455       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
 456                          dst_stride, output_width, output_height);
 457     } else {
 458       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
 459                                 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
 460                                 dst_stride, output_width, output_height,
 461                                 UUT_->use_highbd_);
 462     }
 463   }
 464
 465   const ConvolveFunctions *UUT_;
 466   static uint8_t *input_;
 467   static uint8_t *ref8_;
 468   static uint8_t *output_;
 469   static uint8_t *output_ref_;
 470   static uint16_t *input16_;
 471   static uint16_t *ref16_;
 472   static uint16_t *output16_;
 473   static uint16_t *output16_ref_;
 474   int mask_;
 475 };
 476
 477 uint8_t *ConvolveTest::input_ = NULL;
 478 uint8_t *ConvolveTest::ref8_ = NULL;
 479 uint8_t *ConvolveTest::output_ = NULL;
 480 uint8_t *ConvolveTest::output_ref_ = NULL;
 481 uint16_t *ConvolveTest::input16_ = NULL;
 482 uint16_t *ConvolveTest::ref16_ = NULL;
 483 uint16_t *ConvolveTest::output16_ = NULL;
 484 uint16_t *ConvolveTest::output16_ref_ = NULL;
 485
 486 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
 487
 488 const int kNumFilterBanks = SWITCHABLE_FILTERS;
 489 const int kNumFilters = 16;
 490
 491 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 492   int subpel_search;
 493   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
 494        ++subpel_search) {
 495     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 496       const InterpFilter filter = (InterpFilter)filter_bank;
 497       const InterpKernel *filters =
 498           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
 499                                                              subpel_search);
 500       for (int i = 0; i < kNumFilters; i++) {
 501         const int p0 = filters[i][0] + filters[i][1];
 502         const int p1 = filters[i][2] + filters[i][3];
 503         const int p2 = filters[i][4] + filters[i][5];
 504         const int p3 = filters[i][6] + filters[i][7];
 505         EXPECT_LE(p0, 128);
 506         EXPECT_LE(p1, 128);
 507         EXPECT_LE(p2, 128);
 508         EXPECT_LE(p3, 128);
 509         EXPECT_LE(p0 + p3, 128);
 510         EXPECT_LE(p0 + p3 + p1, 128);
 511         EXPECT_LE(p0 + p3 + p1 + p2, 128);
 512         EXPECT_EQ(p0 + p1 + p2 + p3, 128);
 513       }
 514     }
 515   }
 516 }
 517
 518 const int16_t kInvalidFilter[8] = { 0 };
 519
 520 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
 521   uint8_t *const in = input();
 522   uint8_t *const out = output();
 523   uint8_t *ref;
 524   if (UUT_->use_highbd_ == 0) {
 525     ref = ref8_;
 526   } else {
 527     ref = CONVERT_TO_BYTEPTR(ref16_);
 528   }
 529   int subpel_search;
 530   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
 531        ++subpel_search) {
 532     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 533       const InterpFilter filter = (InterpFilter)filter_bank;
 534       const InterpKernel *filters =
 535           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
 536                                                              subpel_search);
 537       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 538         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 539           wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
 540                                      filters[filter_y], ref, kOutputStride,
 541                                      Width(), Height());
 542
 543           if (filter_x && filter_y)
 544             continue;
 545           else if (filter_y)
 546             API_REGISTER_STATE_CHECK(
 547                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
 548                           16, filters[filter_y], 16, Width(), Height()));
 549           else if (filter_x)
 550             API_REGISTER_STATE_CHECK(UUT_->h8_(
 551                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
 552                 kInvalidFilter, 16, Width(), Height()));
 553           else
 554             continue;
 555
 556           CheckGuardBlocks();
 557
 558           for (int y = 0; y < Height(); ++y)
 559             for (int x = 0; x < Width(); ++x)
 560               ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 561                         lookup(out, y * kOutputStride + x))
 562                   << "mismatch at (" << x << "," << y << "), "
 563                   << "filters (" << filter_bank << "," << filter_x << ","
 564                   << filter_y << ")";
 565         }
 566       }
 567     }
 568   }
 569 }
 570
 571 TEST_P(ConvolveTest, FilterExtremes) {
 572   uint8_t *const in = input();
 573   uint8_t *const out = output();
 574   uint8_t *ref;
 575   if (UUT_->use_highbd_ == 0) {
 576     ref = ref8_;
 577   } else {
 578     ref = CONVERT_TO_BYTEPTR(ref16_);
 579   }
 580
 581   // Populate ref and out with some random data
 582   ::libaom_test::ACMRandom prng;
 583   for (int y = 0; y < Height(); ++y) {
 584     for (int x = 0; x < Width(); ++x) {
 585       uint16_t r;
 586       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 587         r = prng.Rand8Extremes();
 588       } else {
 589         r = prng.Rand16() & mask_;
 590       }
 591       assign_val(out, y * kOutputStride + x, r);
 592       assign_val(ref, y * kOutputStride + x, r);
 593     }
 594   }
 595
 596   for (int axis = 0; axis < 2; axis++) {
 597     int seed_val = 0;
 598     while (seed_val < 256) {
 599       for (int y = 0; y < 8; ++y) {
 600         for (int x = 0; x < 8; ++x) {
 601           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 602                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
 603           if (axis) seed_val++;
 604         }
 605         if (axis)
 606           seed_val -= 8;
 607         else
 608           seed_val++;
 609       }
 610       if (axis) seed_val += 8;
 611       int subpel_search;
 612       for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
 613            ++subpel_search) {
 614         for (int filter_bank = 0; filter_bank < kNumFilterBanks;
 615              ++filter_bank) {
 616           const InterpFilter filter = (InterpFilter)filter_bank;
 617           const InterpKernel *filters =
 618               (const InterpKernel *)av1_get_interp_filter_kernel(filter,
 619                                                                  subpel_search);
 620           for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 621             for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 622               wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
 623                                          filters[filter_y], ref, kOutputStride,
 624                                          Width(), Height());
 625               if (filter_x && filter_y)
 626                 continue;
 627               else if (filter_y)
 628                 API_REGISTER_STATE_CHECK(UUT_->v8_(
 629                     in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
 630                     filters[filter_y], 16, Width(), Height()));
 631               else if (filter_x)
 632                 API_REGISTER_STATE_CHECK(UUT_->h8_(
 633                     in, kInputStride, out, kOutputStride, filters[filter_x], 16,
 634                     kInvalidFilter, 16, Width(), Height()));
 635               else
 636                 continue;
 637
 638               for (int y = 0; y < Height(); ++y)
 639                 for (int x = 0; x < Width(); ++x)
 640                   ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 641                             lookup(out, y * kOutputStride + x))
 642                       << "mismatch at (" << x << "," << y << "), "
 643                       << "filters (" << filter_bank << "," << filter_x << ","
 644                       << filter_y << ")";
 645             }
 646           }
 647         }
 648       }
 649     }
 650   }
 651 }
 652
 653 TEST_P(ConvolveTest, DISABLED_Speed) {
 654   uint8_t *const in = input();
 655   uint8_t *const out = output();
 656   uint8_t *ref;
 657   if (UUT_->use_highbd_ == 0) {
 658     ref = ref8_;
 659   } else {
 660     ref = CONVERT_TO_BYTEPTR(ref16_);
 661   }
 662
 663   // Populate ref and out with some random data
 664   ::libaom_test::ACMRandom prng;
 665   for (int y = 0; y < Height(); ++y) {
 666     for (int x = 0; x < Width(); ++x) {
 667       uint16_t r;
 668       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 669         r = prng.Rand8Extremes();
 670       } else {
 671         r = prng.Rand16() & mask_;
 672       }
 673       assign_val(out, y * kOutputStride + x, r);
 674       assign_val(ref, y * kOutputStride + x, r);
 675     }
 676   }
 677
 678   const InterpFilter filter = (InterpFilter)1;
 679   const InterpKernel *filters =
 680       (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS);
 681   wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
 682                                      out, kOutputStride, Width(), Height());
 683
 684   aom_usec_timer timer;
 685   int tests_num = 1000;
 686
 687   aom_usec_timer_start(&timer);
 688   while (tests_num > 0) {
 689     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 690       const InterpFilter filter = (InterpFilter)filter_bank;
 691       const InterpKernel *filters =
 692           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
 693                                                              USE_8_TAPS);
 694       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 695         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 696           if (filter_x && filter_y) continue;
 697           if (filter_y)
 698             API_REGISTER_STATE_CHECK(
 699                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
 700                           16, filters[filter_y], 16, Width(), Height()));
 701           else if (filter_x)
 702             API_REGISTER_STATE_CHECK(UUT_->h8_(
 703                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
 704                 kInvalidFilter, 16, Width(), Height()));
 705         }
 706       }
 707     }
 708     tests_num--;
 709   }
 710   aom_usec_timer_mark(&timer);
 711
 712   const int elapsed_time =
 713       static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
 714   printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
 715          UUT_->use_highbd_, elapsed_time);
 716 }
 717
 718 using std::make_tuple;
 719
 720 // WRAP macro is only used for high bitdepth build.
 721 #if CONFIG_AV1_HIGHBITDEPTH
 722 #define WRAP(func, bd)                                                       \
 723   static void wrap_##func##_##bd(                                            \
 724       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
 725       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
 726       const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
 727     aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
 728                       filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
 729   }
 730 #if HAVE_SSE2 && ARCH_X86_64
 731 WRAP(convolve8_horiz_sse2, 8)
 732 WRAP(convolve8_vert_sse2, 8)
 733 WRAP(convolve8_horiz_sse2, 10)
 734 WRAP(convolve8_vert_sse2, 10)
 735 WRAP(convolve8_horiz_sse2, 12)
 736 WRAP(convolve8_vert_sse2, 12)
 737 #endif  // HAVE_SSE2 && ARCH_X86_64
 738
 739 WRAP(convolve8_horiz_c, 8)
 740 WRAP(convolve8_vert_c, 8)
 741 WRAP(convolve8_horiz_c, 10)
 742 WRAP(convolve8_vert_c, 10)
 743 WRAP(convolve8_horiz_c, 12)
 744 WRAP(convolve8_vert_c, 12)
 745
 746 #if HAVE_AVX2
 747 WRAP(convolve8_horiz_avx2, 8)
 748 WRAP(convolve8_vert_avx2, 8)
 749
 750 WRAP(convolve8_horiz_avx2, 10)
 751 WRAP(convolve8_vert_avx2, 10)
 752
 753 WRAP(convolve8_horiz_avx2, 12)
 754 WRAP(convolve8_vert_avx2, 12)
 755 #endif  // HAVE_AVX2
 756 #endif  // CONFIG_AV1_HIGHBITDEPTH
 757
 758 #undef WRAP
 759
 760 #if CONFIG_AV1_HIGHBITDEPTH
 761 const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8,
 762                                          wrap_convolve8_vert_c_8, 8);
 763 const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10,
 764                                           wrap_convolve8_vert_c_10, 10);
 765 const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12,
 766                                           wrap_convolve8_vert_c_12, 12);
 767 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(wrap_convolve8_c),
 768                                            ALL_SIZES(wrap_convolve10_c),
 769                                            ALL_SIZES(wrap_convolve12_c) };
 770 #else
 771 const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c,
 772                                     0);
 773 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
 774 #endif
 775
 776 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest,
 777                          ::testing::ValuesIn(kArrayConvolve_c));
 778
 779 #if HAVE_SSE2 && ARCH_X86_64
 780 #if CONFIG_AV1_HIGHBITDEPTH
 781 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
 782                                             wrap_convolve8_vert_sse2_8, 8);
 783 const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10,
 784                                              wrap_convolve8_vert_sse2_10, 10);
 785 const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12,
 786                                              wrap_convolve8_vert_sse2_12, 12);
 787 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(wrap_convolve8_sse2),
 788                                               ALL_SIZES(wrap_convolve10_sse2),
 789                                               ALL_SIZES(wrap_convolve12_sse2) };
 790 #else
 791 const ConvolveFunctions convolve8_sse2(aom_convolve8_horiz_sse2,
 792                                        aom_convolve8_vert_sse2, 0);
 793 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
 794 #endif
 795 INSTANTIATE_TEST_SUITE_P(SSE2, ConvolveTest,
 796                          ::testing::ValuesIn(kArrayConvolve_sse2));
 797 #endif
 798
 799 #if HAVE_SSSE3
 800 const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3,
 801                                         aom_convolve8_vert_ssse3, 0);
 802
 803 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
 804 INSTANTIATE_TEST_SUITE_P(SSSE3, ConvolveTest,
 805                          ::testing::ValuesIn(kArrayConvolve8_ssse3));
 806 #endif
 807
 808 #if HAVE_AVX2
 809 #if CONFIG_AV1_HIGHBITDEPTH
 810 const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8,
 811                                             wrap_convolve8_vert_avx2_8, 8);
 812 const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10,
 813                                              wrap_convolve8_vert_avx2_10, 10);
 814 const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12,
 815                                              wrap_convolve8_vert_avx2_12, 12);
 816 const ConvolveParam kArray_Convolve8_avx2[] = {
 817   ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
 818   ALL_SIZES_64(wrap_convolve12_avx2)
 819 };
 820 #else
 821 const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2,
 822                                        aom_convolve8_vert_avx2, 0);
 823 const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
 824 #endif
 825
 826 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveTest,
 827                          ::testing::ValuesIn(kArray_Convolve8_avx2));
 828 #endif  // HAVE_AVX2
 829
 830 }  // namespace