test/convolve_test.cc

   1 /*
   2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11
  12 #include <string.h>
  13
  14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
  15
  16 #include "config/aom_config.h"
  17 #include "config/aom_dsp_rtcd.h"
  18
  19 #include "aom_dsp/aom_dsp_common.h"
  20 #include "aom_dsp/aom_filter.h"
  21 #include "aom_mem/aom_mem.h"
  22 #include "aom_ports/aom_timer.h"
  23 #include "aom_ports/mem.h"
  24 #include "av1/common/filter.h"
  25 #include "test/acm_random.h"
  26 #include "test/clear_system_state.h"
  27 #include "test/register_state_check.h"
  28 #include "test/util.h"
  29
  30 namespace {
  31
  32 static const unsigned int kMaxDimension = MAX_SB_SIZE;
  33
  34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
  35                              uint8_t *dst, ptrdiff_t dst_stride,
  36                              const int16_t *filter_x, int filter_x_stride,
  37                              const int16_t *filter_y, int filter_y_stride,
  38                              int w, int h);
  39
  40 struct ConvolveFunctions {
  41   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc h8, ConvolveFunc v8, int bd)
  42       : copy_(copy), h8_(h8), v8_(v8), use_highbd_(bd) {}
  43
  44   ConvolveFunc copy_;
  45   ConvolveFunc h8_;
  46   ConvolveFunc v8_;
  47   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
  48 };
  49
  50 typedef ::testing::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
  51
  52 #define ALL_SIZES_64(convolve_fn)                                         \
  53   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
  54       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
  55       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
  56       make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
  57       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
  58       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
  59       make_tuple(64, 64, &convolve_fn)
  60
  61 #define ALL_SIZES(convolve_fn)                                          \
  62   make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
  63       make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
  64
  65 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
  66 #define AV1_FILTER_WEIGHT 128
  67 #define AV1_FILTER_SHIFT 7
  68 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
  69
  70 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
  71                         const int16_t *HFilter, const int16_t *VFilter,
  72                         uint8_t *dst_ptr, unsigned int dst_stride,
  73                         unsigned int output_width, unsigned int output_height) {
  74   // Between passes, we use an intermediate buffer whose height is extended to
  75   // have enough horizontally filtered values as input for the vertical pass.
  76   // This buffer is allocated to be big enough for the largest block type we
  77   // support.
  78   const int kInterp_Extend = 4;
  79   const unsigned int intermediate_height =
  80       (kInterp_Extend - 1) + output_height + kInterp_Extend;
  81   unsigned int i, j;
  82
  83   assert(intermediate_height > 7);
  84
  85   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  86   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  87   //                                 + kInterp_Extend
  88   //                               = 3 + 16 + 4
  89   //                               = 23
  90   // and filter_max_width          = 16
  91   //
  92   uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
  93   const int intermediate_next_stride =
  94       1 - static_cast<int>(intermediate_height * output_width);
  95
  96   // Horizontal pass (src -> transposed intermediate).
  97   uint8_t *output_ptr = intermediate_buffer;
  98   const int src_next_row_stride = src_stride - output_width;
  99   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 100   for (i = 0; i < intermediate_height; ++i) {
 101     for (j = 0; j < output_width; ++j) {
 102       // Apply filter...
 103       const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
 104                        (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
 105                        (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
 106                        (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
 107                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
 108
 109       // Normalize back to 0-255...
 110       *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
 111       ++src_ptr;
 112       output_ptr += intermediate_height;
 113     }
 114     src_ptr += src_next_row_stride;
 115     output_ptr += intermediate_next_stride;
 116   }
 117
 118   // Vertical pass (transposed intermediate -> dst).
 119   src_ptr = intermediate_buffer;
 120   const int dst_next_row_stride = dst_stride - output_width;
 121   for (i = 0; i < output_height; ++i) {
 122     for (j = 0; j < output_width; ++j) {
 123       // Apply filter...
 124       const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
 125                        (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
 126                        (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
 127                        (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
 128                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
 129
 130       // Normalize back to 0-255...
 131       *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
 132       src_ptr += intermediate_height;
 133     }
 134     src_ptr += intermediate_next_stride;
 135     dst_ptr += dst_next_row_stride;
 136   }
 137 }
 138
 139 void block2d_average_c(uint8_t *src, unsigned int src_stride,
 140                        uint8_t *output_ptr, unsigned int output_stride,
 141                        unsigned int output_width, unsigned int output_height) {
 142   unsigned int i, j;
 143   for (i = 0; i < output_height; ++i) {
 144     for (j = 0; j < output_width; ++j) {
 145       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 146     }
 147     output_ptr += output_stride;
 148   }
 149 }
 150
 151 void filter_average_block2d_8_c(const uint8_t *src_ptr,
 152                                 const unsigned int src_stride,
 153                                 const int16_t *HFilter, const int16_t *VFilter,
 154                                 uint8_t *dst_ptr, unsigned int dst_stride,
 155                                 unsigned int output_width,
 156                                 unsigned int output_height) {
 157   uint8_t tmp[kMaxDimension * kMaxDimension];
 158
 159   assert(output_width <= kMaxDimension);
 160   assert(output_height <= kMaxDimension);
 161   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
 162                      output_width, output_height);
 163   block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
 164                     output_height);
 165 }
 166
 167 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
 168                                const unsigned int src_stride,
 169                                const int16_t *HFilter, const int16_t *VFilter,
 170                                uint16_t *dst_ptr, unsigned int dst_stride,
 171                                unsigned int output_width,
 172                                unsigned int output_height, int bd) {
 173   // Between passes, we use an intermediate buffer whose height is extended to
 174   // have enough horizontally filtered values as input for the vertical pass.
 175   // This buffer is allocated to be big enough for the largest block type we
 176   // support.
 177   const int kInterp_Extend = 4;
 178   const unsigned int intermediate_height =
 179       (kInterp_Extend - 1) + output_height + kInterp_Extend;
 180
 181   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
 182    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
 183    *                                 + kInterp_Extend
 184    *                               = 3 + 16 + 4
 185    *                               = 23
 186    * and filter_max_width = 16
 187    */
 188   uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
 189   const int intermediate_next_stride =
 190       1 - static_cast<int>(intermediate_height * output_width);
 191
 192   // Horizontal pass (src -> transposed intermediate).
 193   {
 194     uint16_t *output_ptr = intermediate_buffer;
 195     const int src_next_row_stride = src_stride - output_width;
 196     unsigned int i, j;
 197     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
 198     for (i = 0; i < intermediate_height; ++i) {
 199       for (j = 0; j < output_width; ++j) {
 200         // Apply filter...
 201         const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
 202                          (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
 203                          (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
 204                          (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
 205                          (AV1_FILTER_WEIGHT >> 1);  // Rounding
 206
 207         // Normalize back to 0-255...
 208         *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
 209         ++src_ptr;
 210         output_ptr += intermediate_height;
 211       }
 212       src_ptr += src_next_row_stride;
 213       output_ptr += intermediate_next_stride;
 214     }
 215   }
 216
 217   // Vertical pass (transposed intermediate -> dst).
 218   {
 219     const uint16_t *interm_ptr = intermediate_buffer;
 220     const int dst_next_row_stride = dst_stride - output_width;
 221     unsigned int i, j;
 222     for (i = 0; i < output_height; ++i) {
 223       for (j = 0; j < output_width; ++j) {
 224         // Apply filter...
 225         const int temp =
 226             (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
 227             (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
 228             (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
 229             (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
 230             (AV1_FILTER_WEIGHT >> 1);  // Rounding
 231
 232         // Normalize back to 0-255...
 233         *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
 234         interm_ptr += intermediate_height;
 235       }
 236       interm_ptr += intermediate_next_stride;
 237       dst_ptr += dst_next_row_stride;
 238     }
 239   }
 240 }
 241
 242 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
 243                               uint16_t *output_ptr, unsigned int output_stride,
 244                               unsigned int output_width,
 245                               unsigned int output_height) {
 246   unsigned int i, j;
 247   for (i = 0; i < output_height; ++i) {
 248     for (j = 0; j < output_width; ++j) {
 249       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
 250     }
 251     output_ptr += output_stride;
 252   }
 253 }
 254
 255 void highbd_filter_average_block2d_8_c(
 256     const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 257     const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
 258     unsigned int output_width, unsigned int output_height, int bd) {
 259   uint16_t tmp[kMaxDimension * kMaxDimension];
 260
 261   assert(output_width <= kMaxDimension);
 262   assert(output_height <= kMaxDimension);
 263   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
 264                             kMaxDimension, output_width, output_height, bd);
 265   highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
 266                            output_width, output_height);
 267 }
 268
 269 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 270  public:
 271   static void SetUpTestCase() {
 272     // Force input_ to be unaligned, output to be 16 byte aligned.
 273     input_ = reinterpret_cast<uint8_t *>(
 274                  aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
 275              1;
 276     output_ = reinterpret_cast<uint8_t *>(
 277         aom_memalign(kDataAlignment, kOutputBufferSize));
 278     output_ref_ = reinterpret_cast<uint8_t *>(
 279         aom_memalign(kDataAlignment, kOutputBufferSize));
 280     input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
 281                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
 282                1;
 283     output16_ = reinterpret_cast<uint16_t *>(
 284         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 285     output16_ref_ = reinterpret_cast<uint16_t *>(
 286         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 287   }
 288
 289   virtual void TearDown() { libaom_test::ClearSystemState(); }
 290
 291   static void TearDownTestCase() {
 292     aom_free(input_ - 1);
 293     input_ = NULL;
 294     aom_free(output_);
 295     output_ = NULL;
 296     aom_free(output_ref_);
 297     output_ref_ = NULL;
 298     aom_free(input16_ - 1);
 299     input16_ = NULL;
 300     aom_free(output16_);
 301     output16_ = NULL;
 302     aom_free(output16_ref_);
 303     output16_ref_ = NULL;
 304   }
 305
 306  protected:
 307   static const int kDataAlignment = 16;
 308   static const int kOuterBlockSize = 4 * kMaxDimension;
 309   static const int kInputStride = kOuterBlockSize;
 310   static const int kOutputStride = kOuterBlockSize;
 311   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
 312   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
 313
 314   int Width() const { return GET_PARAM(0); }
 315   int Height() const { return GET_PARAM(1); }
 316   int BorderLeft() const {
 317     const int center = (kOuterBlockSize - Width()) / 2;
 318     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
 319   }
 320   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
 321
 322   bool IsIndexInBorder(int i) {
 323     return (i < BorderTop() * kOuterBlockSize ||
 324             i >= (BorderTop() + Height()) * kOuterBlockSize ||
 325             i % kOuterBlockSize < BorderLeft() ||
 326             i % kOuterBlockSize >= (BorderLeft() + Width()));
 327   }
 328
 329   virtual void SetUp() {
 330     UUT_ = GET_PARAM(2);
 331     if (UUT_->use_highbd_ != 0)
 332       mask_ = (1 << UUT_->use_highbd_) - 1;
 333     else
 334       mask_ = 255;
 335     /* Set up guard blocks for an inner block centered in the outer block */
 336     for (int i = 0; i < kOutputBufferSize; ++i) {
 337       if (IsIndexInBorder(i)) {
 338         output_[i] = 255;
 339         output16_[i] = mask_;
 340       } else {
 341         output_[i] = 0;
 342         output16_[i] = 0;
 343       }
 344     }
 345
 346     ::libaom_test::ACMRandom prng;
 347     for (int i = 0; i < kInputBufferSize; ++i) {
 348       if (i & 1) {
 349         input_[i] = 255;
 350         input16_[i] = mask_;
 351       } else {
 352         input_[i] = prng.Rand8Extremes();
 353         input16_[i] = prng.Rand16() & mask_;
 354       }
 355     }
 356   }
 357
 358   void SetConstantInput(int value) {
 359     memset(input_, value, kInputBufferSize);
 360     aom_memset16(input16_, value, kInputBufferSize);
 361   }
 362
 363   void CopyOutputToRef() {
 364     memcpy(output_ref_, output_, kOutputBufferSize);
 365     // Copy 16-bit pixels values. The effective number of bytes is double.
 366     memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
 367   }
 368
 369   void CheckGuardBlocks() {
 370     for (int i = 0; i < kOutputBufferSize; ++i) {
 371       if (IsIndexInBorder(i)) {
 372         EXPECT_EQ(255, output_[i]);
 373       }
 374     }
 375   }
 376
 377   uint8_t *input() const {
 378     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 379     if (UUT_->use_highbd_ == 0) {
 380       return input_ + offset;
 381     } else {
 382       return CONVERT_TO_BYTEPTR(input16_) + offset;
 383     }
 384   }
 385
 386   uint8_t *output() const {
 387     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 388     if (UUT_->use_highbd_ == 0) {
 389       return output_ + offset;
 390     } else {
 391       return CONVERT_TO_BYTEPTR(output16_) + offset;
 392     }
 393   }
 394
 395   uint8_t *output_ref() const {
 396     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
 397     if (UUT_->use_highbd_ == 0) {
 398       return output_ref_ + offset;
 399     } else {
 400       return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
 401     }
 402   }
 403
 404   uint16_t lookup(uint8_t *list, int index) const {
 405     if (UUT_->use_highbd_ == 0) {
 406       return list[index];
 407     } else {
 408       return CONVERT_TO_SHORTPTR(list)[index];
 409     }
 410   }
 411
 412   void assign_val(uint8_t *list, int index, uint16_t val) const {
 413     if (UUT_->use_highbd_ == 0) {
 414       list[index] = (uint8_t)val;
 415     } else {
 416       CONVERT_TO_SHORTPTR(list)[index] = val;
 417     }
 418   }
 419
 420   void wrapper_filter_average_block2d_8_c(
 421       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 422       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
 423       unsigned int output_width, unsigned int output_height) {
 424     if (UUT_->use_highbd_ == 0) {
 425       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
 426                                  dst_stride, output_width, output_height);
 427     } else {
 428       highbd_filter_average_block2d_8_c(
 429           CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
 430           CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
 431           UUT_->use_highbd_);
 432     }
 433   }
 434
 435   void wrapper_filter_block2d_8_c(
 436       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
 437       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
 438       unsigned int output_width, unsigned int output_height) {
 439     if (UUT_->use_highbd_ == 0) {
 440       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
 441                          dst_stride, output_width, output_height);
 442     } else {
 443       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
 444                                 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
 445                                 dst_stride, output_width, output_height,
 446                                 UUT_->use_highbd_);
 447     }
 448   }
 449
 450   const ConvolveFunctions *UUT_;
 451   static uint8_t *input_;
 452   static uint8_t *output_;
 453   static uint8_t *output_ref_;
 454   static uint16_t *input16_;
 455   static uint16_t *output16_;
 456   static uint16_t *output16_ref_;
 457   int mask_;
 458 };
 459
 460 uint8_t *ConvolveTest::input_ = NULL;
 461 uint8_t *ConvolveTest::output_ = NULL;
 462 uint8_t *ConvolveTest::output_ref_ = NULL;
 463 uint16_t *ConvolveTest::input16_ = NULL;
 464 uint16_t *ConvolveTest::output16_ = NULL;
 465 uint16_t *ConvolveTest::output16_ref_ = NULL;
 466
 467 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
 468
 469 TEST_P(ConvolveTest, Copy) {
 470   uint8_t *const in = input();
 471   uint8_t *const out = output();
 472
 473   ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
 474                                        NULL, 0, NULL, 0, Width(), Height()));
 475
 476   CheckGuardBlocks();
 477
 478   for (int y = 0; y < Height(); ++y)
 479     for (int x = 0; x < Width(); ++x)
 480       ASSERT_EQ(lookup(out, y * kOutputStride + x),
 481                 lookup(in, y * kInputStride + x))
 482           << "(" << x << "," << y << ")";
 483 }
 484
 485 const int kNumFilterBanks = SWITCHABLE_FILTERS;
 486 const int kNumFilters = 16;
 487
 488 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 489   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 490     const InterpFilter filter = (InterpFilter)filter_bank;
 491     const InterpKernel *filters =
 492         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
 493     const InterpFilterParams filter_params =
 494         av1_get_interp_filter_params_with_block_size(filter, 8);
 495     if (filter_params.taps != SUBPEL_TAPS) continue;
 496     for (int i = 0; i < kNumFilters; i++) {
 497       const int p0 = filters[i][0] + filters[i][1];
 498       const int p1 = filters[i][2] + filters[i][3];
 499       const int p2 = filters[i][4] + filters[i][5];
 500       const int p3 = filters[i][6] + filters[i][7];
 501       EXPECT_LE(p0, 128);
 502       EXPECT_LE(p1, 128);
 503       EXPECT_LE(p2, 128);
 504       EXPECT_LE(p3, 128);
 505       EXPECT_LE(p0 + p3, 128);
 506       EXPECT_LE(p0 + p3 + p1, 128);
 507       EXPECT_LE(p0 + p3 + p1 + p2, 128);
 508       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
 509     }
 510   }
 511 }
 512
 513 const int16_t kInvalidFilter[8] = { 0 };
 514
 515 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
 516   uint8_t *const in = input();
 517   uint8_t *const out = output();
 518   uint8_t ref8[kOutputStride * kMaxDimension];
 519   uint16_t ref16[kOutputStride * kMaxDimension];
 520   uint8_t *ref;
 521   if (UUT_->use_highbd_ == 0) {
 522     ref = ref8;
 523   } else {
 524     ref = CONVERT_TO_BYTEPTR(ref16);
 525   }
 526
 527   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 528     const InterpFilter filter = (InterpFilter)filter_bank;
 529     const InterpKernel *filters =
 530         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
 531     const InterpFilterParams filter_params =
 532         av1_get_interp_filter_params_with_block_size(filter, 8);
 533     if (filter_params.taps != SUBPEL_TAPS) continue;
 534
 535     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 536       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 537         wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
 538                                    filters[filter_y], ref, kOutputStride,
 539                                    Width(), Height());
 540
 541         if (filter_x && filter_y)
 542           continue;
 543         else if (filter_y)
 544           ASM_REGISTER_STATE_CHECK(
 545               UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
 546                         16, filters[filter_y], 16, Width(), Height()));
 547         else if (filter_x)
 548           ASM_REGISTER_STATE_CHECK(
 549               UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
 550                         16, kInvalidFilter, 16, Width(), Height()));
 551         else
 552           ASM_REGISTER_STATE_CHECK(
 553               UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
 554                           0, kInvalidFilter, 0, Width(), Height()));
 555
 556         CheckGuardBlocks();
 557
 558         for (int y = 0; y < Height(); ++y)
 559           for (int x = 0; x < Width(); ++x)
 560             ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 561                       lookup(out, y * kOutputStride + x))
 562                 << "mismatch at (" << x << "," << y << "), "
 563                 << "filters (" << filter_bank << "," << filter_x << ","
 564                 << filter_y << ")";
 565       }
 566     }
 567   }
 568 }
 569
 570 TEST_P(ConvolveTest, FilterExtremes) {
 571   uint8_t *const in = input();
 572   uint8_t *const out = output();
 573   uint8_t ref8[kOutputStride * kMaxDimension];
 574   uint16_t ref16[kOutputStride * kMaxDimension];
 575   uint8_t *ref;
 576   if (UUT_->use_highbd_ == 0) {
 577     ref = ref8;
 578   } else {
 579     ref = CONVERT_TO_BYTEPTR(ref16);
 580   }
 581
 582   // Populate ref and out with some random data
 583   ::libaom_test::ACMRandom prng;
 584   for (int y = 0; y < Height(); ++y) {
 585     for (int x = 0; x < Width(); ++x) {
 586       uint16_t r;
 587       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 588         r = prng.Rand8Extremes();
 589       } else {
 590         r = prng.Rand16() & mask_;
 591       }
 592       assign_val(out, y * kOutputStride + x, r);
 593       assign_val(ref, y * kOutputStride + x, r);
 594     }
 595   }
 596
 597   for (int axis = 0; axis < 2; axis++) {
 598     int seed_val = 0;
 599     while (seed_val < 256) {
 600       for (int y = 0; y < 8; ++y) {
 601         for (int x = 0; x < 8; ++x) {
 602           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
 603                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
 604           if (axis) seed_val++;
 605         }
 606         if (axis)
 607           seed_val -= 8;
 608         else
 609           seed_val++;
 610       }
 611       if (axis) seed_val += 8;
 612
 613       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 614         const InterpFilter filter = (InterpFilter)filter_bank;
 615         const InterpKernel *filters =
 616             (const InterpKernel *)av1_get_interp_filter_kernel(filter);
 617         const InterpFilterParams filter_params =
 618             av1_get_interp_filter_params_with_block_size(filter, 8);
 619         if (filter_params.taps != SUBPEL_TAPS) continue;
 620         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 621           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 622             wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
 623                                        filters[filter_y], ref, kOutputStride,
 624                                        Width(), Height());
 625             if (filter_x && filter_y)
 626               continue;
 627             else if (filter_y)
 628               ASM_REGISTER_STATE_CHECK(UUT_->v8_(
 629                   in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
 630                   filters[filter_y], 16, Width(), Height()));
 631             else if (filter_x)
 632               ASM_REGISTER_STATE_CHECK(UUT_->h8_(
 633                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
 634                   kInvalidFilter, 16, Width(), Height()));
 635             else
 636               ASM_REGISTER_STATE_CHECK(UUT_->copy_(
 637                   in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
 638                   kInvalidFilter, 0, Width(), Height()));
 639
 640             for (int y = 0; y < Height(); ++y)
 641               for (int x = 0; x < Width(); ++x)
 642                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
 643                           lookup(out, y * kOutputStride + x))
 644                     << "mismatch at (" << x << "," << y << "), "
 645                     << "filters (" << filter_bank << "," << filter_x << ","
 646                     << filter_y << ")";
 647           }
 648         }
 649       }
 650     }
 651   }
 652 }
 653
 654 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
 655   const uint8_t *const in = input();
 656   uint8_t *const out = output();
 657   const int kNumTests = 5000000;
 658   const int width = Width();
 659   const int height = Height();
 660   aom_usec_timer timer;
 661
 662   aom_usec_timer_start(&timer);
 663   for (int n = 0; n < kNumTests; ++n) {
 664     UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
 665                 height);
 666   }
 667   aom_usec_timer_mark(&timer);
 668
 669   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
 670   printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
 671          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 672 }
 673
 674 TEST_P(ConvolveTest, DISABLED_Speed) {
 675   uint8_t *const in = input();
 676   uint8_t *const out = output();
 677   uint8_t ref8[kOutputStride * kMaxDimension];
 678   uint16_t ref16[kOutputStride * kMaxDimension];
 679   uint8_t *ref;
 680   if (UUT_->use_highbd_ == 0) {
 681     ref = ref8;
 682   } else {
 683     ref = CONVERT_TO_BYTEPTR(ref16);
 684   }
 685
 686   // Populate ref and out with some random data
 687   ::libaom_test::ACMRandom prng;
 688   for (int y = 0; y < Height(); ++y) {
 689     for (int x = 0; x < Width(); ++x) {
 690       uint16_t r;
 691       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
 692         r = prng.Rand8Extremes();
 693       } else {
 694         r = prng.Rand16() & mask_;
 695       }
 696       assign_val(out, y * kOutputStride + x, r);
 697       assign_val(ref, y * kOutputStride + x, r);
 698     }
 699   }
 700
 701   const InterpFilter filter = (InterpFilter)1;
 702   const InterpKernel *filters =
 703       (const InterpKernel *)av1_get_interp_filter_kernel(filter);
 704   wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
 705                                      out, kOutputStride, Width(), Height());
 706
 707   aom_usec_timer timer;
 708   int tests_num = 1000;
 709
 710   aom_usec_timer_start(&timer);
 711   while (tests_num > 0) {
 712     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
 713       const InterpFilter filter = (InterpFilter)filter_bank;
 714       const InterpKernel *filters =
 715           (const InterpKernel *)av1_get_interp_filter_kernel(filter);
 716       const InterpFilterParams filter_params =
 717           av1_get_interp_filter_params_with_block_size(filter, 8);
 718       if (filter_params.taps != SUBPEL_TAPS) continue;
 719
 720       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
 721         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
 722           if (filter_x && filter_y) continue;
 723           if (filter_y)
 724             ASM_REGISTER_STATE_CHECK(
 725                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
 726                           16, filters[filter_y], 16, Width(), Height()));
 727           else if (filter_x)
 728             ASM_REGISTER_STATE_CHECK(UUT_->h8_(
 729                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
 730                 kInvalidFilter, 16, Width(), Height()));
 731         }
 732       }
 733     }
 734     tests_num--;
 735   }
 736   aom_usec_timer_mark(&timer);
 737
 738   const int elapsed_time =
 739       static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
 740   printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
 741          UUT_->use_highbd_, elapsed_time);
 742 }
 743
 744 using ::testing::make_tuple;
 745
 746 #define WRAP(func, bd)                                                       \
 747   static void wrap_##func##_##bd(                                            \
 748       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
 749       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
 750       const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
 751     aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
 752                       filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
 753   }
 754 #if HAVE_SSE2 && ARCH_X86_64
 755 WRAP(convolve_copy_sse2, 8)
 756 WRAP(convolve_copy_sse2, 10)
 757 WRAP(convolve_copy_sse2, 12)
 758 WRAP(convolve8_horiz_sse2, 8)
 759 WRAP(convolve8_vert_sse2, 8)
 760 WRAP(convolve8_horiz_sse2, 10)
 761 WRAP(convolve8_vert_sse2, 10)
 762 WRAP(convolve8_horiz_sse2, 12)
 763 WRAP(convolve8_vert_sse2, 12)
 764 #endif  // HAVE_SSE2 && ARCH_X86_64
 765
 766 WRAP(convolve_copy_c, 8)
 767 WRAP(convolve8_horiz_c, 8)
 768 WRAP(convolve8_vert_c, 8)
 769 WRAP(convolve_copy_c, 10)
 770 WRAP(convolve8_horiz_c, 10)
 771 WRAP(convolve8_vert_c, 10)
 772 WRAP(convolve_copy_c, 12)
 773 WRAP(convolve8_horiz_c, 12)
 774 WRAP(convolve8_vert_c, 12)
 775
 776 #if HAVE_AVX2
 777 WRAP(convolve_copy_avx2, 8)
 778 WRAP(convolve8_horiz_avx2, 8)
 779 WRAP(convolve8_vert_avx2, 8)
 780
 781 WRAP(convolve_copy_avx2, 10)
 782 WRAP(convolve8_horiz_avx2, 10)
 783 WRAP(convolve8_vert_avx2, 10)
 784
 785 WRAP(convolve_copy_avx2, 12)
 786 WRAP(convolve8_horiz_avx2, 12)
 787 WRAP(convolve8_vert_avx2, 12)
 788 #endif  // HAVE_AVX2
 789
 790 #undef WRAP
 791
 792 const ConvolveFunctions convolve8_c(wrap_convolve_copy_c_8,
 793                                     wrap_convolve8_horiz_c_8,
 794                                     wrap_convolve8_vert_c_8, 8);
 795 const ConvolveFunctions convolve10_c(wrap_convolve_copy_c_10,
 796                                      wrap_convolve8_horiz_c_10,
 797                                      wrap_convolve8_vert_c_10, 10);
 798 const ConvolveFunctions convolve12_c(wrap_convolve_copy_c_12,
 799                                      wrap_convolve8_horiz_c_12,
 800                                      wrap_convolve8_vert_c_12, 12);
 801 const ConvolveParam kArrayConvolve_c[] = {
 802   ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
 803 };
 804
 805 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
 806
 807 #if HAVE_SSE2 && ARCH_X86_64
 808 const ConvolveFunctions convolve8_sse2(wrap_convolve_copy_sse2_8,
 809                                        wrap_convolve8_horiz_sse2_8,
 810                                        wrap_convolve8_vert_sse2_8, 8);
 811 const ConvolveFunctions convolve10_sse2(wrap_convolve_copy_sse2_10,
 812                                         wrap_convolve8_horiz_sse2_10,
 813                                         wrap_convolve8_vert_sse2_10, 10);
 814 const ConvolveFunctions convolve12_sse2(wrap_convolve_copy_sse2_12,
 815                                         wrap_convolve8_horiz_sse2_12,
 816                                         wrap_convolve8_vert_sse2_12, 12);
 817 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
 818                                               ALL_SIZES(convolve10_sse2),
 819                                               ALL_SIZES(convolve12_sse2) };
 820 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
 821                         ::testing::ValuesIn(kArrayConvolve_sse2));
 822 #endif
 823
 824 #if HAVE_SSSE3
 825 const ConvolveFunctions convolve8_ssse3(aom_convolve_copy_c,
 826                                         aom_convolve8_horiz_ssse3,
 827                                         aom_convolve8_vert_ssse3, 0);
 828
 829 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
 830 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
 831                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
 832 #endif
 833
 834 #if HAVE_AVX2
 835 const ConvolveFunctions convolve8_avx2(wrap_convolve_copy_avx2_8,
 836                                        wrap_convolve8_horiz_avx2_8,
 837                                        wrap_convolve8_vert_avx2_8, 8);
 838 const ConvolveFunctions convolve10_avx2(wrap_convolve_copy_avx2_10,
 839                                         wrap_convolve8_horiz_avx2_10,
 840                                         wrap_convolve8_vert_avx2_10, 10);
 841 const ConvolveFunctions convolve12_avx2(wrap_convolve_copy_avx2_12,
 842                                         wrap_convolve8_horiz_avx2_12,
 843                                         wrap_convolve8_vert_avx2_12, 12);
 844 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2),
 845                                                ALL_SIZES_64(convolve10_avx2),
 846                                                ALL_SIZES_64(convolve12_avx2) };
 847 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
 848                         ::testing::ValuesIn(kArrayConvolve8_avx2));
 849 #endif  // HAVE_AVX2
 850
 851 }  // namespace