av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / av1_convolve_scale_test.cc
blob78ec988a13599059deeb49a7cf0b1618afd22346
1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <vector>
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "./av1_rtcd.h"
17 #include "aom_ports/aom_timer.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
23 #if CONFIG_JNT_COMP
24 #include "av1/common/common_data.h"
25 #endif
27 namespace {
28 const int kTestIters = 10;
29 const int kPerfIters = 1000;
31 const int kVPad = 32;
32 const int kHPad = 32;
33 const int kXStepQn = 16;
34 const int kYStepQn = 20;
36 using std::tr1::tuple;
37 using std::tr1::make_tuple;
38 using libaom_test::ACMRandom;
40 enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
41 int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
43 // A 16-bit filter with a configurable number of taps.
44 class TestFilter {
45 public:
46 void set(NTaps ntaps, bool backwards);
48 InterpFilterParams params_;
50 private:
51 std::vector<int16_t> coeffs_;
54 void TestFilter::set(NTaps ntaps, bool backwards) {
55 const int n = NTapsToInt(ntaps);
56 assert(n >= 8 && n <= 12);
58 // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
59 // elements at the end so that convolutions can read off the end safely.
60 coeffs_.resize(n * SUBPEL_SHIFTS + 8);
62 // The coefficients are pretty much arbitrary, but convolutions shouldn't
63 // over or underflow. For the first filter (subpels = 0), we use an
64 // increasing or decreasing ramp (depending on the backwards parameter). We
65 // don't want any zero coefficients, so we make it have an x-intercept at -1
66 // or n. To ensure absence of under/overflow, we normalise the area under the
67 // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
68 // gives the identity).
70 // When increasing, the function has the form:
72 // f(x) = A * (x + 1)
74 // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
75 // filter is reversed, we have the same A but with formula
77 // g(x) = A * (n - x)
78 const int I = 1 << FILTER_BITS;
79 const float A = 2.f * I / (n * (n + 1.f));
80 for (int i = 0; i < n; ++i) {
81 coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
84 // For the other filters, make them slightly different by swapping two
85 // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
86 const size_t filter_size = sizeof(coeffs_[0] * n);
87 int16_t *const filter0 = &coeffs_[0];
88 for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
89 int16_t *filterk = &coeffs_[k * n];
90 memcpy(filterk, filter0, filter_size);
92 const int idx0 = k % n;
93 const int idx1 = (7 * k) % n;
95 const int16_t tmp = filterk[idx0];
96 filterk[idx0] = filterk[idx1];
97 filterk[idx1] = tmp;
100 // Finally, write some rubbish at the end to make sure we don't use it.
101 for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
103 // Fill in params
104 params_.filter_ptr = &coeffs_[0];
105 params_.taps = n;
106 // These are ignored by the functions being tested. Set them to whatever.
107 params_.subpel_shifts = SUBPEL_SHIFTS;
108 params_.interp_filter = EIGHTTAP_REGULAR;
111 template <typename SrcPixel>
112 class TestImage {
113 public:
114 TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
115 assert(bd < 16);
116 assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
118 // Pad width by 2*kHPad and then round up to the next multiple of 16
119 // to get src_stride_. Add another 16 for dst_stride_ (to make sure
120 // something goes wrong if we use the wrong one)
121 src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
122 dst_stride_ = src_stride_ + 16;
124 // Allocate image data
125 src_data_.resize(2 * src_block_size());
126 dst_data_.resize(2 * dst_block_size());
129 void Initialize(ACMRandom *rnd);
130 void Check() const;
132 int src_stride() const { return src_stride_; }
133 int dst_stride() const { return dst_stride_; }
135 int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
136 int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
138 const SrcPixel *GetSrcData(bool ref, bool borders) const {
139 const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
140 return borders ? block : block + kHPad + src_stride_ * kVPad;
143 int32_t *GetDstData(bool ref, bool borders) {
144 int32_t *block = &dst_data_[ref ? 0 : dst_block_size()];
145 return borders ? block : block + kHPad + dst_stride_ * kVPad;
148 private:
149 int w_, h_, bd_;
150 int src_stride_, dst_stride_;
152 std::vector<SrcPixel> src_data_;
153 std::vector<int32_t> dst_data_;
156 template <typename Pixel>
157 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
158 if (!trash) {
159 memset(data, 0, sizeof(*data) * num_pixels);
160 return;
162 const Pixel mask = (1 << bd) - 1;
163 for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
166 template <typename Pixel>
167 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
168 bool trash_edges, Pixel *data) {
169 assert(rnd);
170 const Pixel mask = (1 << bd) - 1;
172 // Fill in the first buffer with random data
173 // Top border
174 FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
175 for (int r = 0; r < h; ++r) {
176 Pixel *row_data = data + (kVPad + r) * stride;
177 // Left border, contents, right border
178 FillEdge(rnd, kHPad, bd, trash_edges, row_data);
179 for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
180 FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
182 // Bottom border
183 FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
185 const int bpp = sizeof(*data);
186 const int block_elts = stride * (h + 2 * kVPad);
187 const int block_size = bpp * block_elts;
189 // Now copy that to the second buffer
190 memcpy(data + block_elts, data, block_size);
193 template <typename SrcPixel>
194 void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
195 PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
196 PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
199 template <typename SrcPixel>
200 void TestImage<SrcPixel>::Check() const {
201 // If memcmp returns 0, there's nothing to do.
202 const int num_pixels = dst_block_size();
203 const int32_t *ref_dst = &dst_data_[0];
204 const int32_t *tst_dst = &dst_data_[num_pixels];
206 if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
208 // Otherwise, iterate through the buffer looking for differences (including
209 // the edges)
210 const int stride = dst_stride_;
211 for (int r = 0; r < h_ + 2 * kVPad; ++r) {
212 for (int c = 0; c < w_ + 2 * kHPad; ++c) {
213 const int32_t ref_value = ref_dst[r * stride + c];
214 const int32_t tst_value = tst_dst[r * stride + c];
216 EXPECT_EQ(tst_value, ref_value)
217 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
222 typedef tuple<int, int> BlockDimension;
224 struct BaseParams {
225 BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg)
226 : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {}
228 BlockDimension dims;
229 NTaps ntaps_x, ntaps_y;
230 bool avg;
233 template <typename SrcPixel>
234 class ConvolveScaleTestBase : public ::testing::Test {
235 public:
236 ConvolveScaleTestBase() : image_(NULL) {}
237 virtual ~ConvolveScaleTestBase() { delete image_; }
238 virtual void TearDown() { libaom_test::ClearSystemState(); }
240 // Implemented by subclasses (SetUp depends on the parameters passed
241 // in and RunOne depends on the function to be tested. These can't
242 // be templated for low/high bit depths because they have different
243 // numbers of parameters)
244 virtual void SetUp() = 0;
245 virtual void RunOne(bool ref) = 0;
247 protected:
248 void SetParams(const BaseParams &params, int bd) {
249 width_ = std::tr1::get<0>(params.dims);
250 height_ = std::tr1::get<1>(params.dims);
251 ntaps_x_ = params.ntaps_x;
252 ntaps_y_ = params.ntaps_y;
253 bd_ = bd;
254 avg_ = params.avg;
256 filter_x_.set(ntaps_x_, false);
257 filter_y_.set(ntaps_y_, true);
258 convolve_params_ =
259 get_conv_params_no_round(0, avg_ != false, 0, NULL, 0, 1);
261 delete image_;
262 image_ = new TestImage<SrcPixel>(width_, height_, bd_);
265 #if CONFIG_JNT_COMP
266 void SetConvParamOffset(int i, int j) {
267 if (i == -1 && j == -1) {
268 convolve_params_.use_jnt_comp_avg = 0;
269 } else {
270 convolve_params_.use_jnt_comp_avg = 1;
271 convolve_params_.fwd_offset = quant_dist_lookup_table[i][j][0];
272 convolve_params_.bck_offset = quant_dist_lookup_table[i][j][1];
275 #endif // CONFIG_JNT_COMP
277 void Run() {
278 ACMRandom rnd(ACMRandom::DeterministicSeed());
279 for (int i = 0; i < kTestIters; ++i) {
280 #if CONFIG_JNT_COMP
281 SetConvParamOffset(-1, -1);
282 Prep(&rnd);
283 RunOne(true);
284 RunOne(false);
285 image_->Check();
287 for (int j = 0; j < 2; ++j) {
288 for (int k = 0; k < 4; ++k) {
289 SetConvParamOffset(j, k);
290 Prep(&rnd);
291 RunOne(true);
292 RunOne(false);
293 image_->Check();
296 #else
297 Prep(&rnd);
298 RunOne(true);
299 RunOne(false);
300 image_->Check();
301 #endif // CONFIG_JNT_COMP
305 void SpeedTest() {
306 ACMRandom rnd(ACMRandom::DeterministicSeed());
307 Prep(&rnd);
309 aom_usec_timer ref_timer;
310 aom_usec_timer_start(&ref_timer);
311 for (int i = 0; i < kPerfIters; ++i) RunOne(true);
312 aom_usec_timer_mark(&ref_timer);
313 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
315 aom_usec_timer tst_timer;
316 aom_usec_timer_start(&tst_timer);
317 for (int i = 0; i < kPerfIters; ++i) RunOne(false);
318 aom_usec_timer_mark(&tst_timer);
319 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
321 std::cout << "[ ] C time = " << ref_time / 1000
322 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
324 EXPECT_GT(ref_time, tst_time)
325 << "Error: CDEFSpeedTest, SIMD slower than C.\n"
326 << "C time: " << ref_time << " us\n"
327 << "SIMD time: " << tst_time << " us\n";
330 static int RandomSubpel(ACMRandom *rnd) {
331 const uint8_t subpel_mode = rnd->Rand8();
332 if ((subpel_mode & 7) == 0) {
333 return 0;
334 } else if ((subpel_mode & 7) == 1) {
335 return SCALE_SUBPEL_SHIFTS - 1;
336 } else {
337 return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
341 void Prep(ACMRandom *rnd) {
342 assert(rnd);
344 // Choose subpel_x_ and subpel_y_. They should be less than
345 // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
346 // values: 0 and SCALE_SUBPEL_SHIFTS - 1
347 subpel_x_ = RandomSubpel(rnd);
348 subpel_y_ = RandomSubpel(rnd);
350 image_->Initialize(rnd);
353 int width_, height_, bd_;
354 NTaps ntaps_x_, ntaps_y_;
355 bool avg_;
356 int subpel_x_, subpel_y_;
357 TestFilter filter_x_, filter_y_;
358 TestImage<SrcPixel> *image_;
359 ConvolveParams convolve_params_;
362 typedef tuple<int, int> BlockDimension;
364 typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
365 int32_t *dst, int dst_stride, int w, int h,
366 InterpFilterParams *filter_params_x,
367 InterpFilterParams *filter_params_y,
368 const int subpel_x_qn, const int x_step_qn,
369 const int subpel_y_qn, const int y_step_qn,
370 ConvolveParams *conv_params);
372 // Test parameter list:
373 // <tst_fun, dims, ntaps_x, ntaps_y, avg>
374 typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
375 LowBDParams;
377 class LowBDConvolveScaleTest
378 : public ConvolveScaleTestBase<uint8_t>,
379 public ::testing::WithParamInterface<LowBDParams> {
380 public:
381 virtual ~LowBDConvolveScaleTest() {}
383 void SetUp() {
384 tst_fun_ = GET_PARAM(0);
386 const BlockDimension &block = GET_PARAM(1);
387 const NTaps ntaps_x = GET_PARAM(2);
388 const NTaps ntaps_y = GET_PARAM(3);
389 const int bd = 8;
390 const bool avg = GET_PARAM(4);
392 SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
395 void RunOne(bool ref) {
396 const uint8_t *src = image_->GetSrcData(ref, false);
397 CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
398 const int src_stride = image_->src_stride();
399 const int dst_stride = image_->dst_stride();
401 if (ref) {
402 av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
403 &filter_x_.params_, &filter_y_.params_, subpel_x_,
404 kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
405 } else {
406 tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
407 &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
408 subpel_y_, kYStepQn, &convolve_params_);
412 private:
413 LowbdConvolveFunc tst_fun_;
416 const BlockDimension kBlockDim[] = {
417 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
418 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
419 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
420 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
421 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
422 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
425 const NTaps kNTaps[] = { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
427 TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
428 TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
430 INSTANTIATE_TEST_CASE_P(
431 SSE4_1, LowBDConvolveScaleTest,
432 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
433 ::testing::ValuesIn(kBlockDim),
434 ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
435 ::testing::Bool()));
437 typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
438 int32_t *dst, int dst_stride, int w, int h,
439 InterpFilterParams *filter_params_x,
440 InterpFilterParams *filter_params_y,
441 const int subpel_x_qn, const int x_step_qn,
442 const int subpel_y_qn, const int y_step_qn,
443 ConvolveParams *conv_params, int bd);
445 // Test parameter list:
446 // <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
447 typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
448 HighBDParams;
450 class HighBDConvolveScaleTest
451 : public ConvolveScaleTestBase<uint16_t>,
452 public ::testing::WithParamInterface<HighBDParams> {
453 public:
454 virtual ~HighBDConvolveScaleTest() {}
456 void SetUp() {
457 tst_fun_ = GET_PARAM(0);
459 const BlockDimension &block = GET_PARAM(1);
460 const NTaps ntaps_x = GET_PARAM(2);
461 const NTaps ntaps_y = GET_PARAM(3);
462 const bool avg = GET_PARAM(4);
463 const int bd = GET_PARAM(5);
465 SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
468 void RunOne(bool ref) {
469 const uint16_t *src = image_->GetSrcData(ref, false);
470 CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
471 const int src_stride = image_->src_stride();
472 const int dst_stride = image_->dst_stride();
474 if (ref) {
475 av1_highbd_convolve_2d_scale_c(
476 src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
477 &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
478 &convolve_params_, bd_);
479 } else {
480 tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
481 &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
482 subpel_y_, kYStepQn, &convolve_params_, bd_);
486 private:
487 HighbdConvolveFunc tst_fun_;
490 const int kBDs[] = { 8, 10, 12 };
492 TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
493 TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
495 INSTANTIATE_TEST_CASE_P(
496 SSE4_1, HighBDConvolveScaleTest,
497 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
498 ::testing::ValuesIn(kBlockDim),
499 ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
500 ::testing::Bool(), ::testing::ValuesIn(kBDs)));
501 } // namespace