test/cfl_test.cc

   1 /*
   2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
   3  *
   4  * This source code is subject to the terms of the BSD 2 Clause License and
   5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
   6  * was not distributed with this source code in the LICENSE file, you can
   7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
   8  * Media Patent License 1.0 was not distributed with this source code in the
   9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  10  */
  11 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
  12
  13 #include "aom_ports/aom_timer.h"
  14 #include "./av1_rtcd.h"
  15 #include "test/util.h"
  16 #include "test/acm_random.h"
  17
  18 using ::testing::make_tuple;
  19
  20 using libaom_test::ACMRandom;
  21
  22 #define NUM_ITERATIONS (100)
  23 #define NUM_ITERATIONS_SPEED (INT16_MAX)
  24
  25 #define ALL_CFL_TX_SIZES(function)                                     \
  26   make_tuple(TX_4X4, &function), make_tuple(TX_4X8, &function),        \
  27       make_tuple(TX_4X16, &function), make_tuple(TX_8X4, &function),   \
  28       make_tuple(TX_8X8, &function), make_tuple(TX_8X16, &function),   \
  29       make_tuple(TX_8X32, &function), make_tuple(TX_16X4, &function),  \
  30       make_tuple(TX_16X8, &function), make_tuple(TX_16X16, &function), \
  31       make_tuple(TX_16X32, &function), make_tuple(TX_32X8, &function), \
  32       make_tuple(TX_32X16, &function), make_tuple(TX_32X32, &function)
  33
  34 namespace {
  35 typedef cfl_subsample_lbd_fn (*get_subsample_fn)(TX_SIZE tx_size);
  36
  37 typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
  38
  39 typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
  40
  41 typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
  42
  43 typedef ::testing::tuple<TX_SIZE, get_subsample_fn> subsample_param;
  44
  45 typedef ::testing::tuple<TX_SIZE, get_predict_fn> predict_param;
  46
  47 typedef ::testing::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
  48
  49 typedef ::testing::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
  50
  51 template <typename A>
  52 static void assert_eq(const A *a, const A *b, int width, int height) {
  53   for (int j = 0; j < height; j++) {
  54     for (int i = 0; i < width; i++) {
  55       ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
  56     }
  57   }
  58 }
  59
  60 static void assertFaster(int ref_elapsed_time, int elapsed_time) {
  61   EXPECT_GT(ref_elapsed_time, elapsed_time)
  62       << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
  63       << "C time: " << ref_elapsed_time << " us" << std::endl
  64       << "SIMD time: " << elapsed_time << " us" << std::endl;
  65 }
  66
  67 static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
  68                        int height) {
  69   std::cout.precision(2);
  70   std::cout << "[          ] " << width << "x" << height
  71             << ": C time = " << ref_elapsed_time
  72             << " us, SIMD time = " << elapsed_time << " us"
  73             << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
  74             << std::endl;
  75 }
  76
  77 template <typename F>
  78 class CFLTest
  79     : public ::testing::TestWithParam< ::testing::tuple<TX_SIZE, F> > {
  80  public:
  81   virtual ~CFLTest() {}
  82   virtual void SetUp() {
  83     tx_size = ::testing::get<0>(this->GetParam());
  84     width = tx_size_wide[tx_size];
  85     height = tx_size_high[tx_size];
  86     fun_under_test = ::testing::get<1>(this->GetParam());
  87     rnd(ACMRandom::DeterministicSeed());
  88   }
  89
  90  protected:
  91   ACMRandom rnd;
  92   F fun_under_test;
  93   TX_SIZE tx_size;
  94   int width;
  95   int height;
  96 };
  97
  98 template <typename F, typename I>
  99 class CFLTestWithData : public CFLTest<F> {
 100  public:
 101   virtual ~CFLTestWithData() {}
 102
 103  protected:
 104   I data[CFL_BUF_SQUARE];
 105   I data_ref[CFL_BUF_SQUARE];
 106
 107   void init(I (ACMRandom::*random)()) {
 108     for (int j = 0; j < this->height; j++) {
 109       for (int i = 0; i < this->width; i++) {
 110         const I d = (this->rnd.*random)();
 111         data[j * CFL_BUF_LINE + i] = d;
 112         data_ref[j * CFL_BUF_LINE + i] = d;
 113       }
 114     }
 115   }
 116 };
 117
 118 template <typename F, typename I>
 119 class CFLTestWithAlignedData : public CFLTest<F> {
 120  public:
 121   virtual ~CFLTestWithAlignedData() {}
 122   virtual void SetUp() {
 123     CFLTest<F>::SetUp();
 124     chroma_pels_ref =
 125         reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
 126     chroma_pels =
 127         reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
 128     sub_luma_pels_ref = reinterpret_cast<int16_t *>(
 129         aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
 130     sub_luma_pels = reinterpret_cast<int16_t *>(
 131         aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
 132     memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
 133     memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
 134     memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
 135     memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
 136   }
 137
 138   virtual void TearDown() {
 139     aom_free(chroma_pels_ref);
 140     aom_free(sub_luma_pels_ref);
 141     aom_free(chroma_pels);
 142     aom_free(sub_luma_pels);
 143   }
 144
 145  protected:
 146   I *chroma_pels_ref;
 147   I *chroma_pels;
 148   int16_t *sub_luma_pels_ref;
 149   int16_t *sub_luma_pels;
 150   int alpha_q3;
 151   I dc;
 152   void init(int bd) {
 153     alpha_q3 = this->rnd(33) - 16;
 154     dc = this->rnd(1 << bd);
 155     for (int j = 0; j < this->height; j++) {
 156       for (int i = 0; i < this->width; i++) {
 157         chroma_pels[j * CFL_BUF_LINE + i] = dc;
 158         chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
 159         sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
 160             sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd.Rand15Signed();
 161       }
 162     }
 163   }
 164 };
 165
 166 class CFLSubAvgTest : public CFLTestWithData<sub_avg_fn, int16_t> {
 167  public:
 168   virtual ~CFLSubAvgTest() {}
 169 };
 170
 171 class CFLSubsampleTest : public CFLTestWithData<get_subsample_fn, uint8_t> {
 172  public:
 173   virtual ~CFLSubsampleTest() {}
 174 };
 175
 176 class CFLPredictTest : public CFLTestWithAlignedData<get_predict_fn, uint8_t> {
 177  public:
 178   virtual ~CFLPredictTest() {}
 179 };
 180
 181 class CFLPredictHBDTest
 182     : public CFLTestWithAlignedData<get_predict_fn_hbd, uint16_t> {
 183  public:
 184   virtual ~CFLPredictHBDTest() {}
 185 };
 186
 187 TEST_P(CFLSubAvgTest, SubAvgTest) {
 188   const cfl_subtract_average_fn ref_sub = get_subtract_average_fn_c(tx_size);
 189   const cfl_subtract_average_fn sub = fun_under_test(tx_size);
 190   for (int it = 0; it < NUM_ITERATIONS; it++) {
 191     init(&ACMRandom::Rand15Signed);
 192     sub(data);
 193     ref_sub(data_ref);
 194     assert_eq<int16_t>(data, data_ref, width, height);
 195   }
 196 }
 197
 198 TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
 199   const cfl_subtract_average_fn ref_sub = get_subtract_average_fn_c(tx_size);
 200   const cfl_subtract_average_fn sub = fun_under_test(tx_size);
 201
 202   aom_usec_timer ref_timer;
 203   aom_usec_timer timer;
 204
 205   init(&ACMRandom::Rand15Signed);
 206   aom_usec_timer_start(&ref_timer);
 207   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 208     ref_sub(data_ref);
 209   }
 210   aom_usec_timer_mark(&ref_timer);
 211   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
 212
 213   aom_usec_timer_start(&timer);
 214   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 215     sub(data);
 216   }
 217   aom_usec_timer_mark(&timer);
 218   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 219
 220   printSpeed(ref_elapsed_time, elapsed_time, width, height);
 221   assertFaster(ref_elapsed_time, elapsed_time);
 222 }
 223
 224 TEST_P(CFLSubsampleTest, SubsampleTest) {
 225   int16_t sub_luma_pels[CFL_BUF_SQUARE];
 226   int16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
 227   const int sub_width = width >> 1;
 228   const int sub_height = height >> 1;
 229
 230   for (int it = 0; it < NUM_ITERATIONS; it++) {
 231     init(&ACMRandom::Rand8);
 232     fun_under_test(tx_size)(data, CFL_BUF_LINE, sub_luma_pels);
 233     cfl_get_luma_subsampling_420_lbd_c(tx_size)(data_ref, CFL_BUF_LINE,
 234                                                 sub_luma_pels_ref);
 235     assert_eq<int16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width, sub_height);
 236   }
 237 }
 238
 239 TEST_P(CFLSubsampleTest, DISABLED_SubsampleSpeedTest) {
 240   int16_t sub_luma_pels[CFL_BUF_SQUARE];
 241   int16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
 242   cfl_subsample_lbd_fn subsample = fun_under_test(tx_size);
 243   cfl_subsample_lbd_fn subsample_ref =
 244       cfl_get_luma_subsampling_420_lbd_c(tx_size);
 245   aom_usec_timer ref_timer;
 246   aom_usec_timer timer;
 247
 248   init(&ACMRandom::Rand8);
 249   aom_usec_timer_start(&ref_timer);
 250   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 251     subsample_ref(data_ref, CFL_BUF_LINE, sub_luma_pels);
 252   }
 253   aom_usec_timer_mark(&ref_timer);
 254   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
 255
 256   aom_usec_timer_start(&timer);
 257   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 258     subsample(data, CFL_BUF_LINE, sub_luma_pels_ref);
 259   }
 260   aom_usec_timer_mark(&timer);
 261   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 262
 263   printSpeed(ref_elapsed_time, elapsed_time, width, height);
 264   assertFaster(ref_elapsed_time, elapsed_time);
 265 }
 266
 267 TEST_P(CFLPredictTest, PredictTest) {
 268   for (int it = 0; it < NUM_ITERATIONS; it++) {
 269     init(8);
 270     fun_under_test(tx_size)(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
 271     get_predict_lbd_fn_c(tx_size)(sub_luma_pels_ref, chroma_pels_ref,
 272                                   CFL_BUF_LINE, alpha_q3);
 273
 274     assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
 275   }
 276 }
 277
 278 TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
 279   aom_usec_timer ref_timer;
 280   aom_usec_timer timer;
 281
 282   init(8);
 283   cfl_predict_lbd_fn predict_impl = get_predict_lbd_fn_c(tx_size);
 284   aom_usec_timer_start(&ref_timer);
 285
 286   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 287     predict_impl(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
 288   }
 289   aom_usec_timer_mark(&ref_timer);
 290   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
 291
 292   predict_impl = fun_under_test(tx_size);
 293   aom_usec_timer_start(&timer);
 294   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 295     predict_impl(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
 296   }
 297   aom_usec_timer_mark(&timer);
 298   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 299
 300   printSpeed(ref_elapsed_time, elapsed_time, width, height);
 301   assertFaster(ref_elapsed_time, elapsed_time);
 302 }
 303
 304 TEST_P(CFLPredictHBDTest, PredictHBDTest) {
 305   int bd = 12;
 306   for (int it = 0; it < NUM_ITERATIONS; it++) {
 307     init(bd);
 308     fun_under_test(tx_size)(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3,
 309                             bd);
 310     get_predict_hbd_fn_c(tx_size)(sub_luma_pels_ref, chroma_pels_ref,
 311                                   CFL_BUF_LINE, alpha_q3, bd);
 312
 313     assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
 314   }
 315 }
 316
 317 TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
 318   aom_usec_timer ref_timer;
 319   aom_usec_timer timer;
 320   int bd = 12;
 321   init(bd);
 322   cfl_predict_hbd_fn predict_impl = get_predict_hbd_fn_c(tx_size);
 323   aom_usec_timer_start(&ref_timer);
 324
 325   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 326     predict_impl(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3,
 327                  bd);
 328   }
 329   aom_usec_timer_mark(&ref_timer);
 330   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
 331
 332   predict_impl = fun_under_test(tx_size);
 333   aom_usec_timer_start(&timer);
 334   for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
 335     predict_impl(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
 336   }
 337   aom_usec_timer_mark(&timer);
 338   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 339
 340   printSpeed(ref_elapsed_time, elapsed_time, width, height);
 341   assertFaster(ref_elapsed_time, elapsed_time);
 342 }
 343
 344 #if HAVE_SSE2
 345 const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
 346     get_subtract_average_fn_sse2) };
 347
 348 INSTANTIATE_TEST_CASE_P(SSE2, CFLSubAvgTest,
 349                         ::testing::ValuesIn(sub_avg_sizes_sse2));
 350
 351 #endif
 352
 353 #if HAVE_SSSE3
 354
 355 const subsample_param subsample_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
 356     cfl_get_luma_subsampling_420_lbd_ssse3) };
 357
 358 const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
 359     get_predict_lbd_fn_ssse3) };
 360
 361 const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
 362     get_predict_hbd_fn_ssse3) };
 363
 364 INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleTest,
 365                         ::testing::ValuesIn(subsample_sizes_ssse3));
 366
 367 INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictTest,
 368                         ::testing::ValuesIn(predict_sizes_ssse3));
 369
 370 INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictHBDTest,
 371                         ::testing::ValuesIn(predict_sizes_hbd_ssse3));
 372 #endif
 373
 374 #if HAVE_AVX2
 375 const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
 376     get_subtract_average_fn_avx2) };
 377
 378 const subsample_param subsample_sizes_avx2[] = { ALL_CFL_TX_SIZES(
 379     cfl_get_luma_subsampling_420_lbd_avx2) };
 380
 381 const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
 382     get_predict_lbd_fn_avx2) };
 383
 384 const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
 385     get_predict_hbd_fn_avx2) };
 386
 387 INSTANTIATE_TEST_CASE_P(AVX2, CFLSubAvgTest,
 388                         ::testing::ValuesIn(sub_avg_sizes_avx2));
 389
 390 INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleTest,
 391                         ::testing::ValuesIn(subsample_sizes_avx2));
 392
 393 INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictTest,
 394                         ::testing::ValuesIn(predict_sizes_avx2));
 395
 396 INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictHBDTest,
 397                         ::testing::ValuesIn(predict_sizes_hbd_avx2));
 398 #endif
 399
 400 #if HAVE_NEON
 401 const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
 402     get_subtract_average_fn_neon) };
 403
 404 const subsample_param subsample_sizes_neon[] = { ALL_CFL_TX_SIZES(
 405     cfl_get_luma_subsampling_420_lbd_neon) };
 406
 407 INSTANTIATE_TEST_CASE_P(NEON, CFLSubAvgTest,
 408                         ::testing::ValuesIn(sub_avg_sizes_neon));
 409
 410 INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleTest,
 411                         ::testing::ValuesIn(subsample_sizes_neon));
 412
 413 #endif
 414 }  // namespace